1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2023, Microsoft Corporation. 4 * 5 * Author: 6 * Roman Kisel <romank@linux.microsoft.com> 7 * Saurabh Sengar <ssengar@linux.microsoft.com> 8 * Naman Jain <namjain@linux.microsoft.com> 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/module.h> 13 #include <linux/miscdevice.h> 14 #include <linux/anon_inodes.h> 15 #include <linux/cpuhotplug.h> 16 #include <linux/count_zeros.h> 17 #include <linux/entry-virt.h> 18 #include <linux/eventfd.h> 19 #include <linux/poll.h> 20 #include <linux/file.h> 21 #include <linux/vmalloc.h> 22 #include <asm/debugreg.h> 23 #include <asm/mshyperv.h> 24 #include <trace/events/ipi.h> 25 #include <uapi/asm/mtrr.h> 26 #include <uapi/linux/mshv.h> 27 #include <hyperv/hvhdk.h> 28 29 #include "../../kernel/fpu/legacy.h" 30 #include "mshv.h" 31 #include "mshv_vtl.h" 32 #include "hyperv_vmbus.h" 33 34 MODULE_AUTHOR("Microsoft"); 35 MODULE_LICENSE("GPL"); 36 MODULE_DESCRIPTION("Microsoft Hyper-V VTL Driver"); 37 38 #define MSHV_ENTRY_REASON_LOWER_VTL_CALL 0x1 39 #define MSHV_ENTRY_REASON_INTERRUPT 0x2 40 #define MSHV_ENTRY_REASON_INTERCEPT 0x3 41 42 #define MSHV_REAL_OFF_SHIFT 16 43 #define MSHV_PG_OFF_CPU_MASK (BIT_ULL(MSHV_REAL_OFF_SHIFT) - 1) 44 #define MSHV_RUN_PAGE_OFFSET 0 45 #define MSHV_REG_PAGE_OFFSET 1 46 #define VTL2_VMBUS_SINT_INDEX 7 47 48 static struct device *mem_dev; 49 50 static struct tasklet_struct msg_dpc; 51 static wait_queue_head_t fd_wait_queue; 52 static bool has_message; 53 static struct eventfd_ctx *flag_eventfds[HV_EVENT_FLAGS_COUNT]; 54 static DEFINE_MUTEX(flag_lock); 55 static bool __read_mostly mshv_has_reg_page; 56 57 /* hvcall code is of type u16, allocate a bitmap of size (1 << 16) to accommodate it */ 58 #define MAX_BITMAP_SIZE ((U16_MAX + 1) / 8) 59 60 struct mshv_vtl_hvcall_fd { 61 u8 allow_bitmap[MAX_BITMAP_SIZE]; 62 bool allow_map_initialized; 63 /* 64 * Used to protect hvcall setup in IOCTLs 65 */ 66 struct mutex init_mutex; 67 struct miscdevice *dev; 68 }; 69 70 struct mshv_vtl_poll_file { 71 struct file *file; 72 wait_queue_entry_t wait; 73 wait_queue_head_t *wqh; 74 poll_table pt; 75 int cpu; 76 }; 77 78 struct mshv_vtl { 79 struct device *module_dev; 80 u64 id; 81 }; 82 83 struct mshv_vtl_per_cpu { 84 struct mshv_vtl_run *run; 85 struct page *reg_page; 86 }; 87 88 /* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */ 89 union hv_synic_overlay_page_msr { 90 u64 as_uint64; 91 struct { 92 u64 enabled: 1; 93 u64 reserved: 11; 94 u64 pfn: 52; 95 } __packed; 96 }; 97 98 static struct mutex mshv_vtl_poll_file_lock; 99 static union hv_register_vsm_page_offsets mshv_vsm_page_offsets; 100 static union hv_register_vsm_capabilities mshv_vsm_capabilities; 101 102 static DEFINE_PER_CPU(struct mshv_vtl_poll_file, mshv_vtl_poll_file); 103 static DEFINE_PER_CPU(unsigned long long, num_vtl0_transitions); 104 static DEFINE_PER_CPU(struct mshv_vtl_per_cpu, mshv_vtl_per_cpu); 105 106 static const union hv_input_vtl input_vtl_zero; 107 static const union hv_input_vtl input_vtl_normal = { 108 .use_target_vtl = 1, 109 }; 110 111 static const struct file_operations mshv_vtl_fops; 112 113 static long 114 mshv_ioctl_create_vtl(void __user *user_arg, struct device *module_dev) 115 { 116 struct mshv_vtl *vtl; 117 struct file *file; 118 int fd; 119 120 vtl = kzalloc_obj(*vtl); 121 if (!vtl) 122 return -ENOMEM; 123 124 fd = get_unused_fd_flags(O_CLOEXEC); 125 if (fd < 0) { 126 kfree(vtl); 127 return fd; 128 } 129 file = anon_inode_getfile("mshv_vtl", &mshv_vtl_fops, 130 vtl, O_RDWR); 131 if (IS_ERR(file)) { 132 kfree(vtl); 133 return PTR_ERR(file); 134 } 135 vtl->module_dev = module_dev; 136 fd_install(fd, file); 137 138 return fd; 139 } 140 141 static long 142 mshv_ioctl_check_extension(void __user *user_arg) 143 { 144 u32 arg; 145 146 if (copy_from_user(&arg, user_arg, sizeof(arg))) 147 return -EFAULT; 148 149 switch (arg) { 150 case MSHV_CAP_CORE_API_STABLE: 151 return 0; 152 case MSHV_CAP_REGISTER_PAGE: 153 return mshv_has_reg_page; 154 case MSHV_CAP_VTL_RETURN_ACTION: 155 return mshv_vsm_capabilities.return_action_available; 156 case MSHV_CAP_DR6_SHARED: 157 return mshv_vsm_capabilities.dr6_shared; 158 } 159 160 return -EOPNOTSUPP; 161 } 162 163 static long 164 mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) 165 { 166 struct miscdevice *misc = filp->private_data; 167 168 switch (ioctl) { 169 case MSHV_CHECK_EXTENSION: 170 return mshv_ioctl_check_extension((void __user *)arg); 171 case MSHV_CREATE_VTL: 172 return mshv_ioctl_create_vtl((void __user *)arg, misc->this_device); 173 } 174 175 return -ENOTTY; 176 } 177 178 static const struct file_operations mshv_dev_fops = { 179 .owner = THIS_MODULE, 180 .unlocked_ioctl = mshv_dev_ioctl, 181 .llseek = noop_llseek, 182 }; 183 184 static struct miscdevice mshv_dev = { 185 .minor = MISC_DYNAMIC_MINOR, 186 .name = "mshv", 187 .fops = &mshv_dev_fops, 188 .mode = 0600, 189 }; 190 191 static struct mshv_vtl_run *mshv_vtl_this_run(void) 192 { 193 return *this_cpu_ptr(&mshv_vtl_per_cpu.run); 194 } 195 196 static struct mshv_vtl_run *mshv_vtl_cpu_run(int cpu) 197 { 198 return *per_cpu_ptr(&mshv_vtl_per_cpu.run, cpu); 199 } 200 201 static struct page *mshv_vtl_cpu_reg_page(int cpu) 202 { 203 return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu); 204 } 205 206 static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu) 207 { 208 struct hv_register_assoc reg_assoc = {}; 209 union hv_synic_overlay_page_msr overlay = {}; 210 struct page *reg_page; 211 212 reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL); 213 if (!reg_page) { 214 WARN(1, "failed to allocate register page\n"); 215 return; 216 } 217 218 overlay.enabled = 1; 219 overlay.pfn = page_to_hvpfn(reg_page); 220 reg_assoc.name = HV_X64_REGISTER_REG_PAGE; 221 reg_assoc.value.reg64 = overlay.as_uint64; 222 223 if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF, 224 1, input_vtl_zero, ®_assoc)) { 225 WARN(1, "failed to setup register page\n"); 226 __free_page(reg_page); 227 return; 228 } 229 230 per_cpu->reg_page = reg_page; 231 mshv_has_reg_page = true; 232 } 233 234 static void mshv_vtl_synic_enable_regs(unsigned int cpu) 235 { 236 union hv_synic_sint sint; 237 238 sint.as_uint64 = 0; 239 sint.vector = HYPERVISOR_CALLBACK_VECTOR; 240 sint.masked = false; 241 sint.auto_eoi = hv_recommend_using_aeoi(); 242 243 /* Enable intercepts */ 244 if (!mshv_vsm_capabilities.intercept_page_available) 245 hv_set_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX, 246 sint.as_uint64); 247 248 /* VTL2 Host VSP SINT is (un)masked when the user mode requests that */ 249 } 250 251 static int mshv_vtl_get_vsm_regs(void) 252 { 253 struct hv_register_assoc registers[2]; 254 int ret, count = 2; 255 256 registers[0].name = HV_REGISTER_VSM_CODE_PAGE_OFFSETS; 257 registers[1].name = HV_REGISTER_VSM_CAPABILITIES; 258 259 ret = hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF, 260 count, input_vtl_zero, registers); 261 if (ret) 262 return ret; 263 264 mshv_vsm_page_offsets.as_uint64 = registers[0].value.reg64; 265 mshv_vsm_capabilities.as_uint64 = registers[1].value.reg64; 266 267 return ret; 268 } 269 270 static int mshv_vtl_configure_vsm_partition(struct device *dev) 271 { 272 union hv_register_vsm_partition_config config; 273 struct hv_register_assoc reg_assoc; 274 275 config.as_uint64 = 0; 276 config.default_vtl_protection_mask = HV_MAP_GPA_PERMISSIONS_MASK; 277 config.enable_vtl_protection = 1; 278 config.zero_memory_on_reset = 1; 279 config.intercept_vp_startup = 1; 280 config.intercept_cpuid_unimplemented = 1; 281 282 if (mshv_vsm_capabilities.intercept_page_available) { 283 dev_dbg(dev, "using intercept page\n"); 284 config.intercept_page = 1; 285 } 286 287 reg_assoc.name = HV_REGISTER_VSM_PARTITION_CONFIG; 288 reg_assoc.value.reg64 = config.as_uint64; 289 290 return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF, 291 1, input_vtl_zero, ®_assoc); 292 } 293 294 static void mshv_vtl_vmbus_isr(void) 295 { 296 struct hv_per_cpu_context *per_cpu; 297 struct hv_message *msg; 298 u32 message_type; 299 union hv_synic_event_flags *event_flags; 300 struct eventfd_ctx *eventfd; 301 u16 i; 302 303 per_cpu = this_cpu_ptr(hv_context.cpu_context); 304 if (smp_processor_id() == 0) { 305 msg = (struct hv_message *)per_cpu->hyp_synic_message_page + VTL2_VMBUS_SINT_INDEX; 306 message_type = READ_ONCE(msg->header.message_type); 307 if (message_type != HVMSG_NONE) 308 tasklet_schedule(&msg_dpc); 309 } 310 311 event_flags = (union hv_synic_event_flags *)per_cpu->hyp_synic_event_page + 312 VTL2_VMBUS_SINT_INDEX; 313 for_each_set_bit(i, event_flags->flags, HV_EVENT_FLAGS_COUNT) { 314 if (!sync_test_and_clear_bit(i, event_flags->flags)) 315 continue; 316 rcu_read_lock(); 317 eventfd = READ_ONCE(flag_eventfds[i]); 318 if (eventfd) 319 eventfd_signal(eventfd); 320 rcu_read_unlock(); 321 } 322 323 vmbus_isr(); 324 } 325 326 static int mshv_vtl_alloc_context(unsigned int cpu) 327 { 328 struct mshv_vtl_per_cpu *per_cpu = this_cpu_ptr(&mshv_vtl_per_cpu); 329 330 per_cpu->run = (struct mshv_vtl_run *)__get_free_page(GFP_KERNEL | __GFP_ZERO); 331 if (!per_cpu->run) 332 return -ENOMEM; 333 334 if (mshv_vsm_capabilities.intercept_page_available) 335 mshv_vtl_configure_reg_page(per_cpu); 336 337 mshv_vtl_synic_enable_regs(cpu); 338 339 return 0; 340 } 341 342 static int mshv_vtl_cpuhp_online; 343 344 static int hv_vtl_setup_synic(void) 345 { 346 int ret; 347 348 /* Use our isr to first filter out packets destined for userspace */ 349 hv_setup_vmbus_handler(mshv_vtl_vmbus_isr); 350 351 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vtl:online", 352 mshv_vtl_alloc_context, NULL); 353 if (ret < 0) { 354 hv_setup_vmbus_handler(vmbus_isr); 355 return ret; 356 } 357 358 mshv_vtl_cpuhp_online = ret; 359 360 return 0; 361 } 362 363 static void hv_vtl_remove_synic(void) 364 { 365 cpuhp_remove_state(mshv_vtl_cpuhp_online); 366 hv_setup_vmbus_handler(vmbus_isr); 367 } 368 369 static int vtl_get_vp_register(struct hv_register_assoc *reg) 370 { 371 return hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF, 372 1, input_vtl_normal, reg); 373 } 374 375 static int vtl_set_vp_register(struct hv_register_assoc *reg) 376 { 377 return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF, 378 1, input_vtl_normal, reg); 379 } 380 381 static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl *vtl, void __user *arg) 382 { 383 struct mshv_vtl_ram_disposition vtl0_mem; 384 struct dev_pagemap *pgmap; 385 void *addr; 386 387 if (copy_from_user(&vtl0_mem, arg, sizeof(vtl0_mem))) 388 return -EFAULT; 389 if (vtl0_mem.last_pfn <= vtl0_mem.start_pfn) { 390 dev_err(vtl->module_dev, "range start pfn (%llx) > end pfn (%llx)\n", 391 vtl0_mem.start_pfn, vtl0_mem.last_pfn); 392 return -EFAULT; 393 } 394 395 pgmap = kzalloc_obj(*pgmap); 396 if (!pgmap) 397 return -ENOMEM; 398 399 /* 400 * vtl0_mem.last_pfn is excluded in the pagemap range for VTL0 as per design. 401 * last_pfn is not reserved or wasted, and reflects 'start_pfn + size' of pagemap range. 402 */ 403 pgmap->ranges[0].start = PFN_PHYS(vtl0_mem.start_pfn); 404 pgmap->ranges[0].end = PFN_PHYS(vtl0_mem.last_pfn) - 1; 405 pgmap->nr_range = 1; 406 pgmap->type = MEMORY_DEVICE_GENERIC; 407 408 /* 409 * Determine the highest page order that can be used for the given memory range. 410 * This works best when the range is aligned; i.e. both the start and the length. 411 * Clamp to MAX_FOLIO_ORDER to avoid a WARN in memremap_pages() when the range 412 * alignment exceeds the maximum supported folio order for this kernel config. 413 */ 414 pgmap->vmemmap_shift = min(count_trailing_zeros(vtl0_mem.start_pfn | vtl0_mem.last_pfn), 415 MAX_FOLIO_ORDER); 416 dev_dbg(vtl->module_dev, 417 "Add VTL0 memory: start: 0x%llx, end_pfn: 0x%llx, page order: %lu\n", 418 vtl0_mem.start_pfn, vtl0_mem.last_pfn, pgmap->vmemmap_shift); 419 420 addr = devm_memremap_pages(mem_dev, pgmap); 421 if (IS_ERR(addr)) { 422 dev_err(vtl->module_dev, "devm_memremap_pages error: %ld\n", PTR_ERR(addr)); 423 kfree(pgmap); 424 return PTR_ERR(addr); 425 } 426 427 /* Don't free pgmap, since it has to stick around until the memory 428 * is unmapped, which will never happen as there is no scenario 429 * where VTL0 can be released/shutdown without bringing down VTL2. 430 */ 431 return 0; 432 } 433 434 static void mshv_vtl_cancel(int cpu) 435 { 436 int here = get_cpu(); 437 438 if (here != cpu) { 439 if (!xchg_relaxed(&mshv_vtl_cpu_run(cpu)->cancel, 1)) 440 smp_send_reschedule(cpu); 441 } else { 442 WRITE_ONCE(mshv_vtl_this_run()->cancel, 1); 443 } 444 put_cpu(); 445 } 446 447 static int mshv_vtl_poll_file_wake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) 448 { 449 struct mshv_vtl_poll_file *poll_file = container_of(wait, struct mshv_vtl_poll_file, wait); 450 451 mshv_vtl_cancel(poll_file->cpu); 452 453 return 0; 454 } 455 456 static void mshv_vtl_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt) 457 { 458 struct mshv_vtl_poll_file *poll_file = container_of(pt, struct mshv_vtl_poll_file, pt); 459 460 WARN_ON(poll_file->wqh); 461 poll_file->wqh = wqh; 462 add_wait_queue(wqh, &poll_file->wait); 463 } 464 465 static int mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user *user_input) 466 { 467 struct file *file, *old_file; 468 struct mshv_vtl_poll_file *poll_file; 469 struct mshv_vtl_set_poll_file input; 470 471 if (copy_from_user(&input, user_input, sizeof(input))) 472 return -EFAULT; 473 474 if (input.cpu >= num_possible_cpus() || !cpu_online(input.cpu)) 475 return -EINVAL; 476 /* 477 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists. 478 * CPU is expected to remain online after above cpu_online() check. 479 */ 480 481 file = NULL; 482 file = fget(input.fd); 483 if (!file) 484 return -EBADFD; 485 486 poll_file = per_cpu_ptr(&mshv_vtl_poll_file, READ_ONCE(input.cpu)); 487 if (!poll_file) 488 return -EINVAL; 489 490 mutex_lock(&mshv_vtl_poll_file_lock); 491 492 if (poll_file->wqh) 493 remove_wait_queue(poll_file->wqh, &poll_file->wait); 494 poll_file->wqh = NULL; 495 496 old_file = poll_file->file; 497 poll_file->file = file; 498 poll_file->cpu = input.cpu; 499 500 if (file) { 501 init_waitqueue_func_entry(&poll_file->wait, mshv_vtl_poll_file_wake); 502 init_poll_funcptr(&poll_file->pt, mshv_vtl_ptable_queue_proc); 503 vfs_poll(file, &poll_file->pt); 504 } 505 506 mutex_unlock(&mshv_vtl_poll_file_lock); 507 508 if (old_file) 509 fput(old_file); 510 511 return 0; 512 } 513 514 /* Static table mapping register names to their corresponding actions */ 515 static const struct { 516 enum hv_register_name reg_name; 517 int debug_reg_num; /* -1 if not a debug register */ 518 u32 msr_addr; /* 0 if not an MSR */ 519 } reg_table[] = { 520 /* Debug registers */ 521 {HV_X64_REGISTER_DR0, 0, 0}, 522 {HV_X64_REGISTER_DR1, 1, 0}, 523 {HV_X64_REGISTER_DR2, 2, 0}, 524 {HV_X64_REGISTER_DR3, 3, 0}, 525 {HV_X64_REGISTER_DR6, 6, 0}, 526 /* MTRR MSRs */ 527 {HV_X64_REGISTER_MSR_MTRR_CAP, -1, MSR_MTRRcap}, 528 {HV_X64_REGISTER_MSR_MTRR_DEF_TYPE, -1, MSR_MTRRdefType}, 529 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0, -1, MTRRphysBase_MSR(0)}, 530 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1, -1, MTRRphysBase_MSR(1)}, 531 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2, -1, MTRRphysBase_MSR(2)}, 532 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3, -1, MTRRphysBase_MSR(3)}, 533 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4, -1, MTRRphysBase_MSR(4)}, 534 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5, -1, MTRRphysBase_MSR(5)}, 535 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6, -1, MTRRphysBase_MSR(6)}, 536 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7, -1, MTRRphysBase_MSR(7)}, 537 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8, -1, MTRRphysBase_MSR(8)}, 538 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9, -1, MTRRphysBase_MSR(9)}, 539 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA, -1, MTRRphysBase_MSR(0xa)}, 540 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB, -1, MTRRphysBase_MSR(0xb)}, 541 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC, -1, MTRRphysBase_MSR(0xc)}, 542 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASED, -1, MTRRphysBase_MSR(0xd)}, 543 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE, -1, MTRRphysBase_MSR(0xe)}, 544 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF, -1, MTRRphysBase_MSR(0xf)}, 545 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0, -1, MTRRphysMask_MSR(0)}, 546 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1, -1, MTRRphysMask_MSR(1)}, 547 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2, -1, MTRRphysMask_MSR(2)}, 548 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3, -1, MTRRphysMask_MSR(3)}, 549 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4, -1, MTRRphysMask_MSR(4)}, 550 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5, -1, MTRRphysMask_MSR(5)}, 551 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6, -1, MTRRphysMask_MSR(6)}, 552 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7, -1, MTRRphysMask_MSR(7)}, 553 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8, -1, MTRRphysMask_MSR(8)}, 554 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9, -1, MTRRphysMask_MSR(9)}, 555 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA, -1, MTRRphysMask_MSR(0xa)}, 556 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB, -1, MTRRphysMask_MSR(0xb)}, 557 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC, -1, MTRRphysMask_MSR(0xc)}, 558 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD, -1, MTRRphysMask_MSR(0xd)}, 559 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE, -1, MTRRphysMask_MSR(0xe)}, 560 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF, -1, MTRRphysMask_MSR(0xf)}, 561 {HV_X64_REGISTER_MSR_MTRR_FIX64K00000, -1, MSR_MTRRfix64K_00000}, 562 {HV_X64_REGISTER_MSR_MTRR_FIX16K80000, -1, MSR_MTRRfix16K_80000}, 563 {HV_X64_REGISTER_MSR_MTRR_FIX16KA0000, -1, MSR_MTRRfix16K_A0000}, 564 {HV_X64_REGISTER_MSR_MTRR_FIX4KC0000, -1, MSR_MTRRfix4K_C0000}, 565 {HV_X64_REGISTER_MSR_MTRR_FIX4KC8000, -1, MSR_MTRRfix4K_C8000}, 566 {HV_X64_REGISTER_MSR_MTRR_FIX4KD0000, -1, MSR_MTRRfix4K_D0000}, 567 {HV_X64_REGISTER_MSR_MTRR_FIX4KD8000, -1, MSR_MTRRfix4K_D8000}, 568 {HV_X64_REGISTER_MSR_MTRR_FIX4KE0000, -1, MSR_MTRRfix4K_E0000}, 569 {HV_X64_REGISTER_MSR_MTRR_FIX4KE8000, -1, MSR_MTRRfix4K_E8000}, 570 {HV_X64_REGISTER_MSR_MTRR_FIX4KF0000, -1, MSR_MTRRfix4K_F0000}, 571 {HV_X64_REGISTER_MSR_MTRR_FIX4KF8000, -1, MSR_MTRRfix4K_F8000}, 572 }; 573 574 static int mshv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set) 575 { 576 u64 *reg64; 577 enum hv_register_name gpr_name; 578 int i; 579 580 gpr_name = regs->name; 581 reg64 = ®s->value.reg64; 582 583 /* Search for the register in the table */ 584 for (i = 0; i < ARRAY_SIZE(reg_table); i++) { 585 if (reg_table[i].reg_name != gpr_name) 586 continue; 587 if (reg_table[i].debug_reg_num != -1) { 588 /* Handle debug registers */ 589 if (gpr_name == HV_X64_REGISTER_DR6 && 590 !mshv_vsm_capabilities.dr6_shared) 591 goto hypercall; 592 if (set) 593 native_set_debugreg(reg_table[i].debug_reg_num, *reg64); 594 else 595 *reg64 = native_get_debugreg(reg_table[i].debug_reg_num); 596 } else { 597 /* Handle MSRs */ 598 if (set) 599 wrmsrl(reg_table[i].msr_addr, *reg64); 600 else 601 rdmsrl(reg_table[i].msr_addr, *reg64); 602 } 603 return 0; 604 } 605 606 hypercall: 607 return 1; 608 } 609 610 static void mshv_vtl_return(struct mshv_vtl_cpu_context *vtl0) 611 { 612 struct hv_vp_assist_page *hvp; 613 614 hvp = hv_vp_assist_page[smp_processor_id()]; 615 616 /* 617 * Process signal event direct set in the run page, if any. 618 */ 619 if (mshv_vsm_capabilities.return_action_available) { 620 u32 offset = READ_ONCE(mshv_vtl_this_run()->vtl_ret_action_size); 621 622 WRITE_ONCE(mshv_vtl_this_run()->vtl_ret_action_size, 0); 623 624 /* 625 * Hypervisor will take care of clearing out the actions 626 * set in the assist page. 627 */ 628 memcpy(hvp->vtl_ret_actions, 629 mshv_vtl_this_run()->vtl_ret_actions, 630 min_t(u32, offset, sizeof(hvp->vtl_ret_actions))); 631 } 632 633 mshv_vtl_return_call(vtl0); 634 } 635 636 static bool mshv_vtl_process_intercept(void) 637 { 638 struct hv_per_cpu_context *mshv_cpu; 639 void *synic_message_page; 640 struct hv_message *msg; 641 u32 message_type; 642 643 mshv_cpu = this_cpu_ptr(hv_context.cpu_context); 644 synic_message_page = mshv_cpu->hyp_synic_message_page; 645 if (unlikely(!synic_message_page)) 646 return true; 647 648 msg = (struct hv_message *)synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX; 649 message_type = READ_ONCE(msg->header.message_type); 650 if (message_type == HVMSG_NONE) 651 return true; 652 653 memcpy(mshv_vtl_this_run()->exit_message, msg, sizeof(*msg)); 654 vmbus_signal_eom(msg, message_type); 655 656 return false; 657 } 658 659 static int mshv_vtl_ioctl_return_to_lower_vtl(void) 660 { 661 preempt_disable(); 662 for (;;) { 663 unsigned long irq_flags; 664 struct hv_vp_assist_page *hvp; 665 int ret; 666 667 if (__xfer_to_guest_mode_work_pending()) { 668 preempt_enable(); 669 ret = xfer_to_guest_mode_handle_work(); 670 if (ret) 671 return ret; 672 preempt_disable(); 673 } 674 675 local_irq_save(irq_flags); 676 if (READ_ONCE(mshv_vtl_this_run()->cancel)) { 677 local_irq_restore(irq_flags); 678 preempt_enable(); 679 return -EINTR; 680 } 681 682 mshv_vtl_return(&mshv_vtl_this_run()->cpu_context); 683 local_irq_restore(irq_flags); 684 685 hvp = hv_vp_assist_page[smp_processor_id()]; 686 this_cpu_inc(num_vtl0_transitions); 687 switch (hvp->vtl_entry_reason) { 688 case MSHV_ENTRY_REASON_INTERRUPT: 689 if (!mshv_vsm_capabilities.intercept_page_available && 690 likely(!mshv_vtl_process_intercept())) 691 goto done; 692 break; 693 694 case MSHV_ENTRY_REASON_INTERCEPT: 695 WARN_ON(!mshv_vsm_capabilities.intercept_page_available); 696 memcpy(mshv_vtl_this_run()->exit_message, hvp->intercept_message, 697 sizeof(hvp->intercept_message)); 698 goto done; 699 700 default: 701 panic("unknown entry reason: %d", hvp->vtl_entry_reason); 702 } 703 } 704 705 done: 706 preempt_enable(); 707 708 return 0; 709 } 710 711 static long 712 mshv_vtl_ioctl_get_regs(void __user *user_args) 713 { 714 struct mshv_vp_registers args; 715 struct hv_register_assoc reg; 716 long ret; 717 718 if (copy_from_user(&args, user_args, sizeof(args))) 719 return -EFAULT; 720 721 /* This IOCTL supports processing only one register at a time. */ 722 if (args.count != 1) 723 return -EINVAL; 724 725 if (copy_from_user(®, (void __user *)args.regs_ptr, 726 sizeof(reg))) 727 return -EFAULT; 728 729 ret = mshv_vtl_get_set_reg(®, false); 730 if (!ret) 731 goto copy_args; /* No need of hypercall */ 732 ret = vtl_get_vp_register(®); 733 if (ret) 734 return ret; 735 736 copy_args: 737 if (copy_to_user((void __user *)args.regs_ptr, ®, sizeof(reg))) 738 ret = -EFAULT; 739 740 return ret; 741 } 742 743 static long 744 mshv_vtl_ioctl_set_regs(void __user *user_args) 745 { 746 struct mshv_vp_registers args; 747 struct hv_register_assoc reg; 748 long ret; 749 750 if (copy_from_user(&args, user_args, sizeof(args))) 751 return -EFAULT; 752 753 /* This IOCTL supports processing only one register at a time. */ 754 if (args.count != 1) 755 return -EINVAL; 756 757 if (copy_from_user(®, (void __user *)args.regs_ptr, sizeof(reg))) 758 return -EFAULT; 759 760 ret = mshv_vtl_get_set_reg(®, true); 761 if (!ret) 762 return ret; /* No need of hypercall */ 763 ret = vtl_set_vp_register(®); 764 765 return ret; 766 } 767 768 static long 769 mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) 770 { 771 long ret; 772 struct mshv_vtl *vtl = filp->private_data; 773 774 switch (ioctl) { 775 case MSHV_SET_POLL_FILE: 776 ret = mshv_vtl_ioctl_set_poll_file((struct mshv_vtl_set_poll_file __user *)arg); 777 break; 778 case MSHV_GET_VP_REGISTERS: 779 ret = mshv_vtl_ioctl_get_regs((void __user *)arg); 780 break; 781 case MSHV_SET_VP_REGISTERS: 782 ret = mshv_vtl_ioctl_set_regs((void __user *)arg); 783 break; 784 case MSHV_RETURN_TO_LOWER_VTL: 785 ret = mshv_vtl_ioctl_return_to_lower_vtl(); 786 break; 787 case MSHV_ADD_VTL0_MEMORY: 788 ret = mshv_vtl_ioctl_add_vtl0_mem(vtl, (void __user *)arg); 789 break; 790 default: 791 dev_err(vtl->module_dev, "invalid vtl ioctl: %#x\n", ioctl); 792 ret = -ENOTTY; 793 } 794 795 return ret; 796 } 797 798 static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) 799 { 800 struct page *page; 801 int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK; 802 int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT; 803 804 if (!cpu_online(cpu)) 805 return VM_FAULT_SIGBUS; 806 /* 807 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists. 808 * CPU is expected to remain online after above cpu_online() check. 809 */ 810 811 if (real_off == MSHV_RUN_PAGE_OFFSET) { 812 page = virt_to_page(mshv_vtl_cpu_run(cpu)); 813 } else if (real_off == MSHV_REG_PAGE_OFFSET) { 814 if (!mshv_has_reg_page) 815 return VM_FAULT_SIGBUS; 816 page = mshv_vtl_cpu_reg_page(cpu); 817 } else { 818 return VM_FAULT_NOPAGE; 819 } 820 821 get_page(page); 822 vmf->page = page; 823 824 return 0; 825 } 826 827 static const struct vm_operations_struct mshv_vtl_vm_ops = { 828 .fault = mshv_vtl_fault, 829 }; 830 831 static int mshv_vtl_mmap(struct file *filp, struct vm_area_struct *vma) 832 { 833 vma->vm_ops = &mshv_vtl_vm_ops; 834 835 return 0; 836 } 837 838 static int mshv_vtl_release(struct inode *inode, struct file *filp) 839 { 840 struct mshv_vtl *vtl = filp->private_data; 841 842 kfree(vtl); 843 844 return 0; 845 } 846 847 static const struct file_operations mshv_vtl_fops = { 848 .owner = THIS_MODULE, 849 .unlocked_ioctl = mshv_vtl_ioctl, 850 .release = mshv_vtl_release, 851 .mmap = mshv_vtl_mmap, 852 }; 853 854 static void mshv_vtl_synic_mask_vmbus_sint(void *info) 855 { 856 union hv_synic_sint sint; 857 const u8 *mask = info; 858 859 sint.as_uint64 = 0; 860 sint.vector = HYPERVISOR_CALLBACK_VECTOR; 861 sint.masked = (*mask != 0); 862 sint.auto_eoi = hv_recommend_using_aeoi(); 863 864 hv_set_msr(HV_MSR_SINT0 + VTL2_VMBUS_SINT_INDEX, 865 sint.as_uint64); 866 867 if (!sint.masked) 868 pr_debug("%s: Unmasking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id()); 869 else 870 pr_debug("%s: Masking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id()); 871 } 872 873 static void mshv_vtl_read_remote(void *buffer) 874 { 875 struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context); 876 struct hv_message *msg = (struct hv_message *)mshv_cpu->hyp_synic_message_page + 877 VTL2_VMBUS_SINT_INDEX; 878 u32 message_type = READ_ONCE(msg->header.message_type); 879 880 WRITE_ONCE(has_message, false); 881 if (message_type == HVMSG_NONE) 882 return; 883 884 memcpy(buffer, msg, sizeof(*msg)); 885 vmbus_signal_eom(msg, message_type); 886 } 887 888 static bool vtl_synic_mask_vmbus_sint_masked = true; 889 890 static ssize_t mshv_vtl_sint_read(struct file *filp, char __user *arg, size_t size, loff_t *offset) 891 { 892 struct hv_message msg = {}; 893 int ret; 894 895 if (size < sizeof(msg)) 896 return -EINVAL; 897 898 for (;;) { 899 smp_call_function_single(VMBUS_CONNECT_CPU, mshv_vtl_read_remote, &msg, true); 900 if (msg.header.message_type != HVMSG_NONE) 901 break; 902 903 if (READ_ONCE(vtl_synic_mask_vmbus_sint_masked)) 904 return 0; /* EOF */ 905 906 if (filp->f_flags & O_NONBLOCK) 907 return -EAGAIN; 908 909 ret = wait_event_interruptible(fd_wait_queue, 910 READ_ONCE(has_message) || 911 READ_ONCE(vtl_synic_mask_vmbus_sint_masked)); 912 if (ret) 913 return ret; 914 } 915 916 if (copy_to_user(arg, &msg, sizeof(msg))) 917 return -EFAULT; 918 919 return sizeof(msg); 920 } 921 922 static __poll_t mshv_vtl_sint_poll(struct file *filp, poll_table *wait) 923 { 924 __poll_t mask = 0; 925 926 poll_wait(filp, &fd_wait_queue, wait); 927 if (READ_ONCE(has_message) || READ_ONCE(vtl_synic_mask_vmbus_sint_masked)) 928 mask |= EPOLLIN | EPOLLRDNORM; 929 930 return mask; 931 } 932 933 static void mshv_vtl_sint_on_msg_dpc(unsigned long data) 934 { 935 WRITE_ONCE(has_message, true); 936 wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN); 937 } 938 939 static int mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user *arg) 940 { 941 struct mshv_vtl_sint_post_msg message; 942 u8 payload[HV_MESSAGE_PAYLOAD_BYTE_COUNT]; 943 944 if (copy_from_user(&message, arg, sizeof(message))) 945 return -EFAULT; 946 if (message.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) 947 return -EINVAL; 948 if (copy_from_user(payload, (void __user *)message.payload_ptr, 949 message.payload_size)) 950 return -EFAULT; 951 952 return hv_post_message((union hv_connection_id)message.connection_id, 953 message.message_type, (void *)payload, 954 message.payload_size); 955 } 956 957 static int mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user *arg) 958 { 959 u64 input, status; 960 struct mshv_vtl_signal_event signal_event; 961 962 if (copy_from_user(&signal_event, arg, sizeof(signal_event))) 963 return -EFAULT; 964 965 input = signal_event.connection_id | ((u64)signal_event.flag << 32); 966 967 status = hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, input); 968 969 return hv_result_to_errno(status); 970 } 971 972 static int mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user *arg) 973 { 974 struct mshv_vtl_set_eventfd set_eventfd; 975 struct eventfd_ctx *eventfd, *old_eventfd; 976 977 if (copy_from_user(&set_eventfd, arg, sizeof(set_eventfd))) 978 return -EFAULT; 979 if (set_eventfd.flag >= HV_EVENT_FLAGS_COUNT) 980 return -EINVAL; 981 982 eventfd = NULL; 983 if (set_eventfd.fd >= 0) { 984 eventfd = eventfd_ctx_fdget(set_eventfd.fd); 985 if (IS_ERR(eventfd)) 986 return PTR_ERR(eventfd); 987 } 988 989 guard(mutex)(&flag_lock); 990 old_eventfd = READ_ONCE(flag_eventfds[set_eventfd.flag]); 991 WRITE_ONCE(flag_eventfds[set_eventfd.flag], eventfd); 992 993 if (old_eventfd) { 994 synchronize_rcu(); 995 eventfd_ctx_put(old_eventfd); 996 } 997 998 return 0; 999 } 1000 1001 static int mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user *arg) 1002 { 1003 static DEFINE_MUTEX(vtl2_vmbus_sint_mask_mutex); 1004 struct mshv_sint_mask mask; 1005 1006 if (copy_from_user(&mask, arg, sizeof(mask))) 1007 return -EFAULT; 1008 guard(mutex)(&vtl2_vmbus_sint_mask_mutex); 1009 on_each_cpu(mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1); 1010 WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0); 1011 if (mask.mask) 1012 wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN); 1013 1014 return 0; 1015 } 1016 1017 static long mshv_vtl_sint_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 1018 { 1019 switch (cmd) { 1020 case MSHV_SINT_POST_MESSAGE: 1021 return mshv_vtl_sint_ioctl_post_msg((struct mshv_vtl_sint_post_msg __user *)arg); 1022 case MSHV_SINT_SIGNAL_EVENT: 1023 return mshv_vtl_sint_ioctl_signal_event((struct mshv_vtl_signal_event __user *)arg); 1024 case MSHV_SINT_SET_EVENTFD: 1025 return mshv_vtl_sint_ioctl_set_eventfd((struct mshv_vtl_set_eventfd __user *)arg); 1026 case MSHV_SINT_PAUSE_MESSAGE_STREAM: 1027 return mshv_vtl_sint_ioctl_pause_msg_stream((struct mshv_sint_mask __user *)arg); 1028 default: 1029 return -ENOIOCTLCMD; 1030 } 1031 } 1032 1033 static const struct file_operations mshv_vtl_sint_ops = { 1034 .owner = THIS_MODULE, 1035 .read = mshv_vtl_sint_read, 1036 .poll = mshv_vtl_sint_poll, 1037 .unlocked_ioctl = mshv_vtl_sint_ioctl, 1038 }; 1039 1040 static struct miscdevice mshv_vtl_sint_dev = { 1041 .name = "mshv_sint", 1042 .fops = &mshv_vtl_sint_ops, 1043 .mode = 0600, 1044 .minor = MISC_DYNAMIC_MINOR, 1045 }; 1046 1047 static int mshv_vtl_hvcall_dev_open(struct inode *node, struct file *f) 1048 { 1049 struct miscdevice *dev = f->private_data; 1050 struct mshv_vtl_hvcall_fd *fd; 1051 1052 if (!capable(CAP_SYS_ADMIN)) 1053 return -EPERM; 1054 1055 fd = vzalloc(sizeof(*fd)); 1056 if (!fd) 1057 return -ENOMEM; 1058 fd->dev = dev; 1059 f->private_data = fd; 1060 mutex_init(&fd->init_mutex); 1061 1062 return 0; 1063 } 1064 1065 static int mshv_vtl_hvcall_dev_release(struct inode *node, struct file *f) 1066 { 1067 struct mshv_vtl_hvcall_fd *fd; 1068 1069 fd = f->private_data; 1070 if (fd) { 1071 vfree(fd); 1072 f->private_data = NULL; 1073 } 1074 1075 return 0; 1076 } 1077 1078 static int mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd *fd, 1079 struct mshv_vtl_hvcall_setup __user *hvcall_setup_user) 1080 { 1081 struct mshv_vtl_hvcall_setup hvcall_setup; 1082 1083 guard(mutex)(&fd->init_mutex); 1084 1085 if (fd->allow_map_initialized) { 1086 dev_err(fd->dev->this_device, 1087 "Hypercall allow map has already been set, pid %d\n", 1088 current->pid); 1089 return -EINVAL; 1090 } 1091 1092 if (copy_from_user(&hvcall_setup, hvcall_setup_user, 1093 sizeof(struct mshv_vtl_hvcall_setup))) { 1094 return -EFAULT; 1095 } 1096 if (hvcall_setup.bitmap_array_size > ARRAY_SIZE(fd->allow_bitmap)) 1097 return -EINVAL; 1098 1099 if (copy_from_user(&fd->allow_bitmap, 1100 (void __user *)hvcall_setup.allow_bitmap_ptr, 1101 hvcall_setup.bitmap_array_size)) { 1102 return -EFAULT; 1103 } 1104 1105 dev_info(fd->dev->this_device, "Hypercall allow map has been set, pid %d\n", 1106 current->pid); 1107 fd->allow_map_initialized = true; 1108 return 0; 1109 } 1110 1111 static bool mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd *fd, u16 call_code) 1112 { 1113 return test_bit(call_code, (unsigned long *)fd->allow_bitmap); 1114 } 1115 1116 static int mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd *fd, 1117 struct mshv_vtl_hvcall __user *hvcall_user) 1118 { 1119 struct mshv_vtl_hvcall hvcall; 1120 void *in, *out; 1121 int ret; 1122 1123 if (copy_from_user(&hvcall, hvcall_user, sizeof(struct mshv_vtl_hvcall))) 1124 return -EFAULT; 1125 if (hvcall.input_size > HV_HYP_PAGE_SIZE) 1126 return -EINVAL; 1127 if (hvcall.output_size > HV_HYP_PAGE_SIZE) 1128 return -EINVAL; 1129 1130 /* 1131 * By default, all hypercalls are not allowed. 1132 * The user mode code has to set up the allow bitmap once. 1133 */ 1134 1135 if (!mshv_vtl_hvcall_is_allowed(fd, hvcall.control & 0xFFFF)) { 1136 dev_err(fd->dev->this_device, 1137 "Hypercall with control data %#llx isn't allowed\n", 1138 hvcall.control); 1139 return -EPERM; 1140 } 1141 1142 /* 1143 * This may create a problem for Confidential VM (CVM) usecase where we need to use 1144 * Hyper-V driver allocated per-cpu input and output pages (hyperv_pcpu_input_arg and 1145 * hyperv_pcpu_output_arg) for making a hypervisor call. 1146 * 1147 * TODO: Take care of this when CVM support is added. 1148 */ 1149 in = (void *)__get_free_page(GFP_KERNEL); 1150 out = (void *)__get_free_page(GFP_KERNEL); 1151 1152 if (copy_from_user(in, (void __user *)hvcall.input_ptr, hvcall.input_size)) { 1153 ret = -EFAULT; 1154 goto free_pages; 1155 } 1156 1157 hvcall.status = hv_do_hypercall(hvcall.control, in, out); 1158 1159 if (copy_to_user((void __user *)hvcall.output_ptr, out, hvcall.output_size)) { 1160 ret = -EFAULT; 1161 goto free_pages; 1162 } 1163 ret = put_user(hvcall.status, &hvcall_user->status); 1164 free_pages: 1165 free_page((unsigned long)in); 1166 free_page((unsigned long)out); 1167 1168 return ret; 1169 } 1170 1171 static long mshv_vtl_hvcall_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 1172 { 1173 struct mshv_vtl_hvcall_fd *fd = f->private_data; 1174 1175 switch (cmd) { 1176 case MSHV_HVCALL_SETUP: 1177 return mshv_vtl_hvcall_do_setup(fd, (struct mshv_vtl_hvcall_setup __user *)arg); 1178 case MSHV_HVCALL: 1179 return mshv_vtl_hvcall_call(fd, (struct mshv_vtl_hvcall __user *)arg); 1180 default: 1181 break; 1182 } 1183 1184 return -ENOIOCTLCMD; 1185 } 1186 1187 static const struct file_operations mshv_vtl_hvcall_dev_file_ops = { 1188 .owner = THIS_MODULE, 1189 .open = mshv_vtl_hvcall_dev_open, 1190 .release = mshv_vtl_hvcall_dev_release, 1191 .unlocked_ioctl = mshv_vtl_hvcall_dev_ioctl, 1192 }; 1193 1194 static struct miscdevice mshv_vtl_hvcall_dev = { 1195 .name = "mshv_hvcall", 1196 .nodename = "mshv_hvcall", 1197 .fops = &mshv_vtl_hvcall_dev_file_ops, 1198 .mode = 0600, 1199 .minor = MISC_DYNAMIC_MINOR, 1200 }; 1201 1202 static int mshv_vtl_low_open(struct inode *inodep, struct file *filp) 1203 { 1204 pid_t pid = task_pid_vnr(current); 1205 uid_t uid = current_uid().val; 1206 int ret = 0; 1207 1208 pr_debug("%s: Opening VTL low, task group %d, uid %d\n", __func__, pid, uid); 1209 1210 if (capable(CAP_SYS_ADMIN)) { 1211 filp->private_data = inodep; 1212 } else { 1213 pr_err("%s: VTL low open failed: CAP_SYS_ADMIN required. task group %d, uid %d", 1214 __func__, pid, uid); 1215 ret = -EPERM; 1216 } 1217 1218 return ret; 1219 } 1220 1221 static bool can_fault(struct vm_fault *vmf, unsigned long size, unsigned long *pfn) 1222 { 1223 unsigned long mask = size - 1; 1224 unsigned long start = vmf->address & ~mask; 1225 unsigned long end = start + size; 1226 bool is_valid; 1227 1228 is_valid = (vmf->address & mask) == ((vmf->pgoff << PAGE_SHIFT) & mask) && 1229 start >= vmf->vma->vm_start && 1230 end <= vmf->vma->vm_end; 1231 1232 if (is_valid) 1233 *pfn = vmf->pgoff & ~(mask >> PAGE_SHIFT); 1234 1235 return is_valid; 1236 } 1237 1238 static vm_fault_t mshv_vtl_low_huge_fault(struct vm_fault *vmf, unsigned int order) 1239 { 1240 unsigned long pfn = vmf->pgoff; 1241 vm_fault_t ret = VM_FAULT_FALLBACK; 1242 1243 switch (order) { 1244 case 0: 1245 return vmf_insert_mixed(vmf->vma, vmf->address, pfn); 1246 1247 case PMD_ORDER: 1248 if (can_fault(vmf, PMD_SIZE, &pfn)) 1249 ret = vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE); 1250 return ret; 1251 1252 case PUD_ORDER: 1253 if (can_fault(vmf, PUD_SIZE, &pfn)) 1254 ret = vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE); 1255 return ret; 1256 1257 default: 1258 return VM_FAULT_SIGBUS; 1259 } 1260 } 1261 1262 static vm_fault_t mshv_vtl_low_fault(struct vm_fault *vmf) 1263 { 1264 return mshv_vtl_low_huge_fault(vmf, 0); 1265 } 1266 1267 static const struct vm_operations_struct mshv_vtl_low_vm_ops = { 1268 .fault = mshv_vtl_low_fault, 1269 .huge_fault = mshv_vtl_low_huge_fault, 1270 }; 1271 1272 static int mshv_vtl_low_mmap(struct file *filp, struct vm_area_struct *vma) 1273 { 1274 vma->vm_ops = &mshv_vtl_low_vm_ops; 1275 vm_flags_set(vma, VM_HUGEPAGE | VM_MIXEDMAP); 1276 1277 return 0; 1278 } 1279 1280 static const struct file_operations mshv_vtl_low_file_ops = { 1281 .owner = THIS_MODULE, 1282 .open = mshv_vtl_low_open, 1283 .mmap = mshv_vtl_low_mmap, 1284 }; 1285 1286 static struct miscdevice mshv_vtl_low = { 1287 .name = "mshv_vtl_low", 1288 .nodename = "mshv_vtl_low", 1289 .fops = &mshv_vtl_low_file_ops, 1290 .mode = 0600, 1291 .minor = MISC_DYNAMIC_MINOR, 1292 }; 1293 1294 static int __init mshv_vtl_init(void) 1295 { 1296 int ret; 1297 struct device *dev = mshv_dev.this_device; 1298 1299 /* 1300 * This creates /dev/mshv which provides functionality to create VTLs and partitions. 1301 */ 1302 ret = misc_register(&mshv_dev); 1303 if (ret) { 1304 dev_err(dev, "mshv device register failed: %d\n", ret); 1305 goto free_dev; 1306 } 1307 1308 tasklet_init(&msg_dpc, mshv_vtl_sint_on_msg_dpc, 0); 1309 init_waitqueue_head(&fd_wait_queue); 1310 1311 if (mshv_vtl_get_vsm_regs()) { 1312 dev_emerg(dev, "Unable to get VSM capabilities !!\n"); 1313 ret = -ENODEV; 1314 goto free_dev; 1315 } 1316 if (mshv_vtl_configure_vsm_partition(dev)) { 1317 dev_emerg(dev, "VSM configuration failed !!\n"); 1318 ret = -ENODEV; 1319 goto free_dev; 1320 } 1321 1322 mshv_vtl_return_call_init(mshv_vsm_page_offsets.vtl_return_offset); 1323 ret = hv_vtl_setup_synic(); 1324 if (ret) 1325 goto free_dev; 1326 1327 /* 1328 * mshv_sint device adds VMBus relay ioctl support. 1329 * This provides a channel for VTL0 to communicate with VTL2. 1330 */ 1331 ret = misc_register(&mshv_vtl_sint_dev); 1332 if (ret) 1333 goto free_synic; 1334 1335 /* 1336 * mshv_hvcall device adds interface to enable userspace for direct hypercalls support. 1337 */ 1338 ret = misc_register(&mshv_vtl_hvcall_dev); 1339 if (ret) 1340 goto free_sint; 1341 1342 /* 1343 * mshv_vtl_low device is used to map VTL0 address space to a user-mode process in VTL2. 1344 * It implements mmap() to allow a user-mode process in VTL2 to map to the address of VTL0. 1345 */ 1346 ret = misc_register(&mshv_vtl_low); 1347 if (ret) 1348 goto free_hvcall; 1349 1350 /* 1351 * "mshv vtl mem dev" device is later used to setup VTL0 memory. 1352 */ 1353 mem_dev = kzalloc_obj(*mem_dev); 1354 if (!mem_dev) { 1355 ret = -ENOMEM; 1356 goto free_low; 1357 } 1358 1359 mutex_init(&mshv_vtl_poll_file_lock); 1360 1361 device_initialize(mem_dev); 1362 dev_set_name(mem_dev, "mshv vtl mem dev"); 1363 ret = device_add(mem_dev); 1364 if (ret) { 1365 dev_err(dev, "mshv vtl mem dev add: %d\n", ret); 1366 goto free_mem; 1367 } 1368 1369 return 0; 1370 1371 free_mem: 1372 kfree(mem_dev); 1373 free_low: 1374 misc_deregister(&mshv_vtl_low); 1375 free_hvcall: 1376 misc_deregister(&mshv_vtl_hvcall_dev); 1377 free_sint: 1378 misc_deregister(&mshv_vtl_sint_dev); 1379 free_synic: 1380 hv_vtl_remove_synic(); 1381 free_dev: 1382 misc_deregister(&mshv_dev); 1383 1384 return ret; 1385 } 1386 1387 static void __exit mshv_vtl_exit(void) 1388 { 1389 device_del(mem_dev); 1390 kfree(mem_dev); 1391 misc_deregister(&mshv_vtl_low); 1392 misc_deregister(&mshv_vtl_hvcall_dev); 1393 misc_deregister(&mshv_vtl_sint_dev); 1394 hv_vtl_remove_synic(); 1395 misc_deregister(&mshv_dev); 1396 } 1397 1398 module_init(mshv_vtl_init); 1399 module_exit(mshv_vtl_exit); 1400