1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2023, Microsoft Corporation. 4 * 5 * Author: 6 * Roman Kisel <romank@linux.microsoft.com> 7 * Saurabh Sengar <ssengar@linux.microsoft.com> 8 * Naman Jain <namjain@linux.microsoft.com> 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/module.h> 13 #include <linux/miscdevice.h> 14 #include <linux/anon_inodes.h> 15 #include <linux/cpuhotplug.h> 16 #include <linux/count_zeros.h> 17 #include <linux/entry-virt.h> 18 #include <linux/eventfd.h> 19 #include <linux/poll.h> 20 #include <linux/file.h> 21 #include <linux/vmalloc.h> 22 #include <asm/debugreg.h> 23 #include <asm/mshyperv.h> 24 #include <trace/events/ipi.h> 25 #include <uapi/asm/mtrr.h> 26 #include <uapi/linux/mshv.h> 27 #include <hyperv/hvhdk.h> 28 29 #include "../../kernel/fpu/legacy.h" 30 #include "mshv.h" 31 #include "mshv_vtl.h" 32 #include "hyperv_vmbus.h" 33 34 MODULE_AUTHOR("Microsoft"); 35 MODULE_LICENSE("GPL"); 36 MODULE_DESCRIPTION("Microsoft Hyper-V VTL Driver"); 37 38 #define MSHV_ENTRY_REASON_LOWER_VTL_CALL 0x1 39 #define MSHV_ENTRY_REASON_INTERRUPT 0x2 40 #define MSHV_ENTRY_REASON_INTERCEPT 0x3 41 42 #define MSHV_REAL_OFF_SHIFT 16 43 #define MSHV_PG_OFF_CPU_MASK (BIT_ULL(MSHV_REAL_OFF_SHIFT) - 1) 44 #define MSHV_RUN_PAGE_OFFSET 0 45 #define MSHV_REG_PAGE_OFFSET 1 46 #define VTL2_VMBUS_SINT_INDEX 7 47 48 static struct device *mem_dev; 49 50 static struct tasklet_struct msg_dpc; 51 static wait_queue_head_t fd_wait_queue; 52 static bool has_message; 53 static struct eventfd_ctx *flag_eventfds[HV_EVENT_FLAGS_COUNT]; 54 static DEFINE_MUTEX(flag_lock); 55 static bool __read_mostly mshv_has_reg_page; 56 57 /* hvcall code is of type u16, allocate a bitmap of size (1 << 16) to accommodate it */ 58 #define MAX_BITMAP_SIZE ((U16_MAX + 1) / 8) 59 60 struct mshv_vtl_hvcall_fd { 61 u8 allow_bitmap[MAX_BITMAP_SIZE]; 62 bool allow_map_initialized; 63 /* 64 * Used to protect hvcall setup in IOCTLs 65 */ 66 struct mutex init_mutex; 67 struct miscdevice *dev; 68 }; 69 70 struct mshv_vtl_poll_file { 71 struct file *file; 72 wait_queue_entry_t wait; 73 wait_queue_head_t *wqh; 74 poll_table pt; 75 int cpu; 76 }; 77 78 struct mshv_vtl { 79 struct device *module_dev; 80 u64 id; 81 }; 82 83 struct mshv_vtl_per_cpu { 84 struct mshv_vtl_run *run; 85 struct page *reg_page; 86 }; 87 88 /* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */ 89 union hv_synic_overlay_page_msr { 90 u64 as_uint64; 91 struct { 92 u64 enabled: 1; 93 u64 reserved: 11; 94 u64 pfn: 52; 95 } __packed; 96 }; 97 98 static struct mutex mshv_vtl_poll_file_lock; 99 static union hv_register_vsm_page_offsets mshv_vsm_page_offsets; 100 static union hv_register_vsm_capabilities mshv_vsm_capabilities; 101 102 static DEFINE_PER_CPU(struct mshv_vtl_poll_file, mshv_vtl_poll_file); 103 static DEFINE_PER_CPU(unsigned long long, num_vtl0_transitions); 104 static DEFINE_PER_CPU(struct mshv_vtl_per_cpu, mshv_vtl_per_cpu); 105 106 static const union hv_input_vtl input_vtl_zero; 107 static const union hv_input_vtl input_vtl_normal = { 108 .use_target_vtl = 1, 109 }; 110 111 static const struct file_operations mshv_vtl_fops; 112 113 static long 114 mshv_ioctl_create_vtl(void __user *user_arg, struct device *module_dev) 115 { 116 struct mshv_vtl *vtl; 117 struct file *file; 118 int fd; 119 120 vtl = kzalloc_obj(*vtl); 121 if (!vtl) 122 return -ENOMEM; 123 124 fd = get_unused_fd_flags(O_CLOEXEC); 125 if (fd < 0) { 126 kfree(vtl); 127 return fd; 128 } 129 file = anon_inode_getfile("mshv_vtl", &mshv_vtl_fops, 130 vtl, O_RDWR); 131 if (IS_ERR(file)) { 132 kfree(vtl); 133 return PTR_ERR(file); 134 } 135 vtl->module_dev = module_dev; 136 fd_install(fd, file); 137 138 return fd; 139 } 140 141 static long 142 mshv_ioctl_check_extension(void __user *user_arg) 143 { 144 u32 arg; 145 146 if (copy_from_user(&arg, user_arg, sizeof(arg))) 147 return -EFAULT; 148 149 switch (arg) { 150 case MSHV_CAP_CORE_API_STABLE: 151 return 0; 152 case MSHV_CAP_REGISTER_PAGE: 153 return mshv_has_reg_page; 154 case MSHV_CAP_VTL_RETURN_ACTION: 155 return mshv_vsm_capabilities.return_action_available; 156 case MSHV_CAP_DR6_SHARED: 157 return mshv_vsm_capabilities.dr6_shared; 158 } 159 160 return -EOPNOTSUPP; 161 } 162 163 static long 164 mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) 165 { 166 struct miscdevice *misc = filp->private_data; 167 168 switch (ioctl) { 169 case MSHV_CHECK_EXTENSION: 170 return mshv_ioctl_check_extension((void __user *)arg); 171 case MSHV_CREATE_VTL: 172 return mshv_ioctl_create_vtl((void __user *)arg, misc->this_device); 173 } 174 175 return -ENOTTY; 176 } 177 178 static const struct file_operations mshv_dev_fops = { 179 .owner = THIS_MODULE, 180 .unlocked_ioctl = mshv_dev_ioctl, 181 .llseek = noop_llseek, 182 }; 183 184 static struct miscdevice mshv_dev = { 185 .minor = MISC_DYNAMIC_MINOR, 186 .name = "mshv", 187 .fops = &mshv_dev_fops, 188 .mode = 0600, 189 }; 190 191 static struct mshv_vtl_run *mshv_vtl_this_run(void) 192 { 193 return *this_cpu_ptr(&mshv_vtl_per_cpu.run); 194 } 195 196 static struct mshv_vtl_run *mshv_vtl_cpu_run(int cpu) 197 { 198 return *per_cpu_ptr(&mshv_vtl_per_cpu.run, cpu); 199 } 200 201 static struct page *mshv_vtl_cpu_reg_page(int cpu) 202 { 203 return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu); 204 } 205 206 static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu) 207 { 208 struct hv_register_assoc reg_assoc = {}; 209 union hv_synic_overlay_page_msr overlay = {}; 210 struct page *reg_page; 211 212 reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL); 213 if (!reg_page) { 214 WARN(1, "failed to allocate register page\n"); 215 return; 216 } 217 218 overlay.enabled = 1; 219 overlay.pfn = page_to_hvpfn(reg_page); 220 reg_assoc.name = HV_X64_REGISTER_REG_PAGE; 221 reg_assoc.value.reg64 = overlay.as_uint64; 222 223 if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF, 224 1, input_vtl_zero, ®_assoc)) { 225 WARN(1, "failed to setup register page\n"); 226 __free_page(reg_page); 227 return; 228 } 229 230 per_cpu->reg_page = reg_page; 231 mshv_has_reg_page = true; 232 } 233 234 static void mshv_vtl_synic_enable_regs(unsigned int cpu) 235 { 236 union hv_synic_sint sint; 237 238 sint.as_uint64 = 0; 239 sint.vector = HYPERVISOR_CALLBACK_VECTOR; 240 sint.masked = false; 241 sint.auto_eoi = hv_recommend_using_aeoi(); 242 243 /* Enable intercepts */ 244 if (!mshv_vsm_capabilities.intercept_page_available) 245 hv_set_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX, 246 sint.as_uint64); 247 248 /* VTL2 Host VSP SINT is (un)masked when the user mode requests that */ 249 } 250 251 static int mshv_vtl_get_vsm_regs(void) 252 { 253 struct hv_register_assoc registers[2]; 254 int ret, count = 2; 255 256 registers[0].name = HV_REGISTER_VSM_CODE_PAGE_OFFSETS; 257 registers[1].name = HV_REGISTER_VSM_CAPABILITIES; 258 259 ret = hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF, 260 count, input_vtl_zero, registers); 261 if (ret) 262 return ret; 263 264 mshv_vsm_page_offsets.as_uint64 = registers[0].value.reg64; 265 mshv_vsm_capabilities.as_uint64 = registers[1].value.reg64; 266 267 return ret; 268 } 269 270 static int mshv_vtl_configure_vsm_partition(struct device *dev) 271 { 272 union hv_register_vsm_partition_config config; 273 struct hv_register_assoc reg_assoc; 274 275 config.as_uint64 = 0; 276 config.default_vtl_protection_mask = HV_MAP_GPA_PERMISSIONS_MASK; 277 config.enable_vtl_protection = 1; 278 config.zero_memory_on_reset = 1; 279 config.intercept_vp_startup = 1; 280 config.intercept_cpuid_unimplemented = 1; 281 282 if (mshv_vsm_capabilities.intercept_page_available) { 283 dev_dbg(dev, "using intercept page\n"); 284 config.intercept_page = 1; 285 } 286 287 reg_assoc.name = HV_REGISTER_VSM_PARTITION_CONFIG; 288 reg_assoc.value.reg64 = config.as_uint64; 289 290 return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF, 291 1, input_vtl_zero, ®_assoc); 292 } 293 294 static void mshv_vtl_vmbus_isr(void) 295 { 296 struct hv_per_cpu_context *per_cpu; 297 struct hv_message *msg; 298 u32 message_type; 299 union hv_synic_event_flags *event_flags; 300 struct eventfd_ctx *eventfd; 301 u16 i; 302 303 per_cpu = this_cpu_ptr(hv_context.cpu_context); 304 if (smp_processor_id() == 0) { 305 msg = (struct hv_message *)per_cpu->hyp_synic_message_page + VTL2_VMBUS_SINT_INDEX; 306 message_type = READ_ONCE(msg->header.message_type); 307 if (message_type != HVMSG_NONE) 308 tasklet_schedule(&msg_dpc); 309 } 310 311 event_flags = (union hv_synic_event_flags *)per_cpu->hyp_synic_event_page + 312 VTL2_VMBUS_SINT_INDEX; 313 for_each_set_bit(i, event_flags->flags, HV_EVENT_FLAGS_COUNT) { 314 if (!sync_test_and_clear_bit(i, event_flags->flags)) 315 continue; 316 rcu_read_lock(); 317 eventfd = READ_ONCE(flag_eventfds[i]); 318 if (eventfd) 319 eventfd_signal(eventfd); 320 rcu_read_unlock(); 321 } 322 323 vmbus_isr(); 324 } 325 326 static int mshv_vtl_alloc_context(unsigned int cpu) 327 { 328 struct mshv_vtl_per_cpu *per_cpu = this_cpu_ptr(&mshv_vtl_per_cpu); 329 330 per_cpu->run = (struct mshv_vtl_run *)__get_free_page(GFP_KERNEL | __GFP_ZERO); 331 if (!per_cpu->run) 332 return -ENOMEM; 333 334 if (mshv_vsm_capabilities.intercept_page_available) 335 mshv_vtl_configure_reg_page(per_cpu); 336 337 mshv_vtl_synic_enable_regs(cpu); 338 339 return 0; 340 } 341 342 static int mshv_vtl_cpuhp_online; 343 344 static int hv_vtl_setup_synic(void) 345 { 346 int ret; 347 348 /* Use our isr to first filter out packets destined for userspace */ 349 hv_setup_vmbus_handler(mshv_vtl_vmbus_isr); 350 351 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vtl:online", 352 mshv_vtl_alloc_context, NULL); 353 if (ret < 0) { 354 hv_setup_vmbus_handler(vmbus_isr); 355 return ret; 356 } 357 358 mshv_vtl_cpuhp_online = ret; 359 360 return 0; 361 } 362 363 static void hv_vtl_remove_synic(void) 364 { 365 cpuhp_remove_state(mshv_vtl_cpuhp_online); 366 hv_setup_vmbus_handler(vmbus_isr); 367 } 368 369 static int vtl_get_vp_register(struct hv_register_assoc *reg) 370 { 371 return hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF, 372 1, input_vtl_normal, reg); 373 } 374 375 static int vtl_set_vp_register(struct hv_register_assoc *reg) 376 { 377 return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF, 378 1, input_vtl_normal, reg); 379 } 380 381 static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl *vtl, void __user *arg) 382 { 383 struct mshv_vtl_ram_disposition vtl0_mem; 384 struct dev_pagemap *pgmap; 385 void *addr; 386 387 if (copy_from_user(&vtl0_mem, arg, sizeof(vtl0_mem))) 388 return -EFAULT; 389 /* vtl0_mem.last_pfn is excluded in the pagemap range for VTL0 as per design */ 390 if (vtl0_mem.last_pfn <= vtl0_mem.start_pfn) { 391 dev_err(vtl->module_dev, "range start pfn (%llx) > end pfn (%llx)\n", 392 vtl0_mem.start_pfn, vtl0_mem.last_pfn); 393 return -EFAULT; 394 } 395 396 pgmap = kzalloc_obj(*pgmap); 397 if (!pgmap) 398 return -ENOMEM; 399 400 pgmap->ranges[0].start = PFN_PHYS(vtl0_mem.start_pfn); 401 pgmap->ranges[0].end = PFN_PHYS(vtl0_mem.last_pfn) - 1; 402 pgmap->nr_range = 1; 403 pgmap->type = MEMORY_DEVICE_GENERIC; 404 405 /* 406 * Determine the highest page order that can be used for the given memory range. 407 * This works best when the range is aligned; i.e. both the start and the length. 408 */ 409 pgmap->vmemmap_shift = count_trailing_zeros(vtl0_mem.start_pfn | vtl0_mem.last_pfn); 410 dev_dbg(vtl->module_dev, 411 "Add VTL0 memory: start: 0x%llx, end_pfn: 0x%llx, page order: %lu\n", 412 vtl0_mem.start_pfn, vtl0_mem.last_pfn, pgmap->vmemmap_shift); 413 414 addr = devm_memremap_pages(mem_dev, pgmap); 415 if (IS_ERR(addr)) { 416 dev_err(vtl->module_dev, "devm_memremap_pages error: %ld\n", PTR_ERR(addr)); 417 kfree(pgmap); 418 return -EFAULT; 419 } 420 421 /* Don't free pgmap, since it has to stick around until the memory 422 * is unmapped, which will never happen as there is no scenario 423 * where VTL0 can be released/shutdown without bringing down VTL2. 424 */ 425 return 0; 426 } 427 428 static void mshv_vtl_cancel(int cpu) 429 { 430 int here = get_cpu(); 431 432 if (here != cpu) { 433 if (!xchg_relaxed(&mshv_vtl_cpu_run(cpu)->cancel, 1)) 434 smp_send_reschedule(cpu); 435 } else { 436 WRITE_ONCE(mshv_vtl_this_run()->cancel, 1); 437 } 438 put_cpu(); 439 } 440 441 static int mshv_vtl_poll_file_wake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) 442 { 443 struct mshv_vtl_poll_file *poll_file = container_of(wait, struct mshv_vtl_poll_file, wait); 444 445 mshv_vtl_cancel(poll_file->cpu); 446 447 return 0; 448 } 449 450 static void mshv_vtl_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt) 451 { 452 struct mshv_vtl_poll_file *poll_file = container_of(pt, struct mshv_vtl_poll_file, pt); 453 454 WARN_ON(poll_file->wqh); 455 poll_file->wqh = wqh; 456 add_wait_queue(wqh, &poll_file->wait); 457 } 458 459 static int mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user *user_input) 460 { 461 struct file *file, *old_file; 462 struct mshv_vtl_poll_file *poll_file; 463 struct mshv_vtl_set_poll_file input; 464 465 if (copy_from_user(&input, user_input, sizeof(input))) 466 return -EFAULT; 467 468 if (input.cpu >= num_possible_cpus() || !cpu_online(input.cpu)) 469 return -EINVAL; 470 /* 471 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists. 472 * CPU is expected to remain online after above cpu_online() check. 473 */ 474 475 file = NULL; 476 file = fget(input.fd); 477 if (!file) 478 return -EBADFD; 479 480 poll_file = per_cpu_ptr(&mshv_vtl_poll_file, READ_ONCE(input.cpu)); 481 if (!poll_file) 482 return -EINVAL; 483 484 mutex_lock(&mshv_vtl_poll_file_lock); 485 486 if (poll_file->wqh) 487 remove_wait_queue(poll_file->wqh, &poll_file->wait); 488 poll_file->wqh = NULL; 489 490 old_file = poll_file->file; 491 poll_file->file = file; 492 poll_file->cpu = input.cpu; 493 494 if (file) { 495 init_waitqueue_func_entry(&poll_file->wait, mshv_vtl_poll_file_wake); 496 init_poll_funcptr(&poll_file->pt, mshv_vtl_ptable_queue_proc); 497 vfs_poll(file, &poll_file->pt); 498 } 499 500 mutex_unlock(&mshv_vtl_poll_file_lock); 501 502 if (old_file) 503 fput(old_file); 504 505 return 0; 506 } 507 508 /* Static table mapping register names to their corresponding actions */ 509 static const struct { 510 enum hv_register_name reg_name; 511 int debug_reg_num; /* -1 if not a debug register */ 512 u32 msr_addr; /* 0 if not an MSR */ 513 } reg_table[] = { 514 /* Debug registers */ 515 {HV_X64_REGISTER_DR0, 0, 0}, 516 {HV_X64_REGISTER_DR1, 1, 0}, 517 {HV_X64_REGISTER_DR2, 2, 0}, 518 {HV_X64_REGISTER_DR3, 3, 0}, 519 {HV_X64_REGISTER_DR6, 6, 0}, 520 /* MTRR MSRs */ 521 {HV_X64_REGISTER_MSR_MTRR_CAP, -1, MSR_MTRRcap}, 522 {HV_X64_REGISTER_MSR_MTRR_DEF_TYPE, -1, MSR_MTRRdefType}, 523 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0, -1, MTRRphysBase_MSR(0)}, 524 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1, -1, MTRRphysBase_MSR(1)}, 525 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2, -1, MTRRphysBase_MSR(2)}, 526 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3, -1, MTRRphysBase_MSR(3)}, 527 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4, -1, MTRRphysBase_MSR(4)}, 528 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5, -1, MTRRphysBase_MSR(5)}, 529 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6, -1, MTRRphysBase_MSR(6)}, 530 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7, -1, MTRRphysBase_MSR(7)}, 531 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8, -1, MTRRphysBase_MSR(8)}, 532 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9, -1, MTRRphysBase_MSR(9)}, 533 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA, -1, MTRRphysBase_MSR(0xa)}, 534 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB, -1, MTRRphysBase_MSR(0xb)}, 535 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC, -1, MTRRphysBase_MSR(0xc)}, 536 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASED, -1, MTRRphysBase_MSR(0xd)}, 537 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE, -1, MTRRphysBase_MSR(0xe)}, 538 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF, -1, MTRRphysBase_MSR(0xf)}, 539 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0, -1, MTRRphysMask_MSR(0)}, 540 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1, -1, MTRRphysMask_MSR(1)}, 541 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2, -1, MTRRphysMask_MSR(2)}, 542 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3, -1, MTRRphysMask_MSR(3)}, 543 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4, -1, MTRRphysMask_MSR(4)}, 544 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5, -1, MTRRphysMask_MSR(5)}, 545 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6, -1, MTRRphysMask_MSR(6)}, 546 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7, -1, MTRRphysMask_MSR(7)}, 547 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8, -1, MTRRphysMask_MSR(8)}, 548 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9, -1, MTRRphysMask_MSR(9)}, 549 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA, -1, MTRRphysMask_MSR(0xa)}, 550 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB, -1, MTRRphysMask_MSR(0xb)}, 551 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC, -1, MTRRphysMask_MSR(0xc)}, 552 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD, -1, MTRRphysMask_MSR(0xd)}, 553 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE, -1, MTRRphysMask_MSR(0xe)}, 554 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF, -1, MTRRphysMask_MSR(0xf)}, 555 {HV_X64_REGISTER_MSR_MTRR_FIX64K00000, -1, MSR_MTRRfix64K_00000}, 556 {HV_X64_REGISTER_MSR_MTRR_FIX16K80000, -1, MSR_MTRRfix16K_80000}, 557 {HV_X64_REGISTER_MSR_MTRR_FIX16KA0000, -1, MSR_MTRRfix16K_A0000}, 558 {HV_X64_REGISTER_MSR_MTRR_FIX4KC0000, -1, MSR_MTRRfix4K_C0000}, 559 {HV_X64_REGISTER_MSR_MTRR_FIX4KC8000, -1, MSR_MTRRfix4K_C8000}, 560 {HV_X64_REGISTER_MSR_MTRR_FIX4KD0000, -1, MSR_MTRRfix4K_D0000}, 561 {HV_X64_REGISTER_MSR_MTRR_FIX4KD8000, -1, MSR_MTRRfix4K_D8000}, 562 {HV_X64_REGISTER_MSR_MTRR_FIX4KE0000, -1, MSR_MTRRfix4K_E0000}, 563 {HV_X64_REGISTER_MSR_MTRR_FIX4KE8000, -1, MSR_MTRRfix4K_E8000}, 564 {HV_X64_REGISTER_MSR_MTRR_FIX4KF0000, -1, MSR_MTRRfix4K_F0000}, 565 {HV_X64_REGISTER_MSR_MTRR_FIX4KF8000, -1, MSR_MTRRfix4K_F8000}, 566 }; 567 568 static int mshv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set) 569 { 570 u64 *reg64; 571 enum hv_register_name gpr_name; 572 int i; 573 574 gpr_name = regs->name; 575 reg64 = ®s->value.reg64; 576 577 /* Search for the register in the table */ 578 for (i = 0; i < ARRAY_SIZE(reg_table); i++) { 579 if (reg_table[i].reg_name != gpr_name) 580 continue; 581 if (reg_table[i].debug_reg_num != -1) { 582 /* Handle debug registers */ 583 if (gpr_name == HV_X64_REGISTER_DR6 && 584 !mshv_vsm_capabilities.dr6_shared) 585 goto hypercall; 586 if (set) 587 native_set_debugreg(reg_table[i].debug_reg_num, *reg64); 588 else 589 *reg64 = native_get_debugreg(reg_table[i].debug_reg_num); 590 } else { 591 /* Handle MSRs */ 592 if (set) 593 wrmsrl(reg_table[i].msr_addr, *reg64); 594 else 595 rdmsrl(reg_table[i].msr_addr, *reg64); 596 } 597 return 0; 598 } 599 600 hypercall: 601 return 1; 602 } 603 604 static void mshv_vtl_return(struct mshv_vtl_cpu_context *vtl0) 605 { 606 struct hv_vp_assist_page *hvp; 607 608 hvp = hv_vp_assist_page[smp_processor_id()]; 609 610 /* 611 * Process signal event direct set in the run page, if any. 612 */ 613 if (mshv_vsm_capabilities.return_action_available) { 614 u32 offset = READ_ONCE(mshv_vtl_this_run()->vtl_ret_action_size); 615 616 WRITE_ONCE(mshv_vtl_this_run()->vtl_ret_action_size, 0); 617 618 /* 619 * Hypervisor will take care of clearing out the actions 620 * set in the assist page. 621 */ 622 memcpy(hvp->vtl_ret_actions, 623 mshv_vtl_this_run()->vtl_ret_actions, 624 min_t(u32, offset, sizeof(hvp->vtl_ret_actions))); 625 } 626 627 mshv_vtl_return_call(vtl0); 628 } 629 630 static bool mshv_vtl_process_intercept(void) 631 { 632 struct hv_per_cpu_context *mshv_cpu; 633 void *synic_message_page; 634 struct hv_message *msg; 635 u32 message_type; 636 637 mshv_cpu = this_cpu_ptr(hv_context.cpu_context); 638 synic_message_page = mshv_cpu->hyp_synic_message_page; 639 if (unlikely(!synic_message_page)) 640 return true; 641 642 msg = (struct hv_message *)synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX; 643 message_type = READ_ONCE(msg->header.message_type); 644 if (message_type == HVMSG_NONE) 645 return true; 646 647 memcpy(mshv_vtl_this_run()->exit_message, msg, sizeof(*msg)); 648 vmbus_signal_eom(msg, message_type); 649 650 return false; 651 } 652 653 static int mshv_vtl_ioctl_return_to_lower_vtl(void) 654 { 655 preempt_disable(); 656 for (;;) { 657 unsigned long irq_flags; 658 struct hv_vp_assist_page *hvp; 659 int ret; 660 661 if (__xfer_to_guest_mode_work_pending()) { 662 preempt_enable(); 663 ret = xfer_to_guest_mode_handle_work(); 664 if (ret) 665 return ret; 666 preempt_disable(); 667 } 668 669 local_irq_save(irq_flags); 670 if (READ_ONCE(mshv_vtl_this_run()->cancel)) { 671 local_irq_restore(irq_flags); 672 preempt_enable(); 673 return -EINTR; 674 } 675 676 mshv_vtl_return(&mshv_vtl_this_run()->cpu_context); 677 local_irq_restore(irq_flags); 678 679 hvp = hv_vp_assist_page[smp_processor_id()]; 680 this_cpu_inc(num_vtl0_transitions); 681 switch (hvp->vtl_entry_reason) { 682 case MSHV_ENTRY_REASON_INTERRUPT: 683 if (!mshv_vsm_capabilities.intercept_page_available && 684 likely(!mshv_vtl_process_intercept())) 685 goto done; 686 break; 687 688 case MSHV_ENTRY_REASON_INTERCEPT: 689 WARN_ON(!mshv_vsm_capabilities.intercept_page_available); 690 memcpy(mshv_vtl_this_run()->exit_message, hvp->intercept_message, 691 sizeof(hvp->intercept_message)); 692 goto done; 693 694 default: 695 panic("unknown entry reason: %d", hvp->vtl_entry_reason); 696 } 697 } 698 699 done: 700 preempt_enable(); 701 702 return 0; 703 } 704 705 static long 706 mshv_vtl_ioctl_get_regs(void __user *user_args) 707 { 708 struct mshv_vp_registers args; 709 struct hv_register_assoc reg; 710 long ret; 711 712 if (copy_from_user(&args, user_args, sizeof(args))) 713 return -EFAULT; 714 715 /* This IOCTL supports processing only one register at a time. */ 716 if (args.count != 1) 717 return -EINVAL; 718 719 if (copy_from_user(®, (void __user *)args.regs_ptr, 720 sizeof(reg))) 721 return -EFAULT; 722 723 ret = mshv_vtl_get_set_reg(®, false); 724 if (!ret) 725 goto copy_args; /* No need of hypercall */ 726 ret = vtl_get_vp_register(®); 727 if (ret) 728 return ret; 729 730 copy_args: 731 if (copy_to_user((void __user *)args.regs_ptr, ®, sizeof(reg))) 732 ret = -EFAULT; 733 734 return ret; 735 } 736 737 static long 738 mshv_vtl_ioctl_set_regs(void __user *user_args) 739 { 740 struct mshv_vp_registers args; 741 struct hv_register_assoc reg; 742 long ret; 743 744 if (copy_from_user(&args, user_args, sizeof(args))) 745 return -EFAULT; 746 747 /* This IOCTL supports processing only one register at a time. */ 748 if (args.count != 1) 749 return -EINVAL; 750 751 if (copy_from_user(®, (void __user *)args.regs_ptr, sizeof(reg))) 752 return -EFAULT; 753 754 ret = mshv_vtl_get_set_reg(®, true); 755 if (!ret) 756 return ret; /* No need of hypercall */ 757 ret = vtl_set_vp_register(®); 758 759 return ret; 760 } 761 762 static long 763 mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) 764 { 765 long ret; 766 struct mshv_vtl *vtl = filp->private_data; 767 768 switch (ioctl) { 769 case MSHV_SET_POLL_FILE: 770 ret = mshv_vtl_ioctl_set_poll_file((struct mshv_vtl_set_poll_file __user *)arg); 771 break; 772 case MSHV_GET_VP_REGISTERS: 773 ret = mshv_vtl_ioctl_get_regs((void __user *)arg); 774 break; 775 case MSHV_SET_VP_REGISTERS: 776 ret = mshv_vtl_ioctl_set_regs((void __user *)arg); 777 break; 778 case MSHV_RETURN_TO_LOWER_VTL: 779 ret = mshv_vtl_ioctl_return_to_lower_vtl(); 780 break; 781 case MSHV_ADD_VTL0_MEMORY: 782 ret = mshv_vtl_ioctl_add_vtl0_mem(vtl, (void __user *)arg); 783 break; 784 default: 785 dev_err(vtl->module_dev, "invalid vtl ioctl: %#x\n", ioctl); 786 ret = -ENOTTY; 787 } 788 789 return ret; 790 } 791 792 static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) 793 { 794 struct page *page; 795 int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK; 796 int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT; 797 798 if (!cpu_online(cpu)) 799 return VM_FAULT_SIGBUS; 800 /* 801 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists. 802 * CPU is expected to remain online after above cpu_online() check. 803 */ 804 805 if (real_off == MSHV_RUN_PAGE_OFFSET) { 806 page = virt_to_page(mshv_vtl_cpu_run(cpu)); 807 } else if (real_off == MSHV_REG_PAGE_OFFSET) { 808 if (!mshv_has_reg_page) 809 return VM_FAULT_SIGBUS; 810 page = mshv_vtl_cpu_reg_page(cpu); 811 } else { 812 return VM_FAULT_NOPAGE; 813 } 814 815 get_page(page); 816 vmf->page = page; 817 818 return 0; 819 } 820 821 static const struct vm_operations_struct mshv_vtl_vm_ops = { 822 .fault = mshv_vtl_fault, 823 }; 824 825 static int mshv_vtl_mmap(struct file *filp, struct vm_area_struct *vma) 826 { 827 vma->vm_ops = &mshv_vtl_vm_ops; 828 829 return 0; 830 } 831 832 static int mshv_vtl_release(struct inode *inode, struct file *filp) 833 { 834 struct mshv_vtl *vtl = filp->private_data; 835 836 kfree(vtl); 837 838 return 0; 839 } 840 841 static const struct file_operations mshv_vtl_fops = { 842 .owner = THIS_MODULE, 843 .unlocked_ioctl = mshv_vtl_ioctl, 844 .release = mshv_vtl_release, 845 .mmap = mshv_vtl_mmap, 846 }; 847 848 static void mshv_vtl_synic_mask_vmbus_sint(void *info) 849 { 850 union hv_synic_sint sint; 851 const u8 *mask = info; 852 853 sint.as_uint64 = 0; 854 sint.vector = HYPERVISOR_CALLBACK_VECTOR; 855 sint.masked = (*mask != 0); 856 sint.auto_eoi = hv_recommend_using_aeoi(); 857 858 hv_set_msr(HV_MSR_SINT0 + VTL2_VMBUS_SINT_INDEX, 859 sint.as_uint64); 860 861 if (!sint.masked) 862 pr_debug("%s: Unmasking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id()); 863 else 864 pr_debug("%s: Masking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id()); 865 } 866 867 static void mshv_vtl_read_remote(void *buffer) 868 { 869 struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context); 870 struct hv_message *msg = (struct hv_message *)mshv_cpu->hyp_synic_message_page + 871 VTL2_VMBUS_SINT_INDEX; 872 u32 message_type = READ_ONCE(msg->header.message_type); 873 874 WRITE_ONCE(has_message, false); 875 if (message_type == HVMSG_NONE) 876 return; 877 878 memcpy(buffer, msg, sizeof(*msg)); 879 vmbus_signal_eom(msg, message_type); 880 } 881 882 static bool vtl_synic_mask_vmbus_sint_masked = true; 883 884 static ssize_t mshv_vtl_sint_read(struct file *filp, char __user *arg, size_t size, loff_t *offset) 885 { 886 struct hv_message msg = {}; 887 int ret; 888 889 if (size < sizeof(msg)) 890 return -EINVAL; 891 892 for (;;) { 893 smp_call_function_single(VMBUS_CONNECT_CPU, mshv_vtl_read_remote, &msg, true); 894 if (msg.header.message_type != HVMSG_NONE) 895 break; 896 897 if (READ_ONCE(vtl_synic_mask_vmbus_sint_masked)) 898 return 0; /* EOF */ 899 900 if (filp->f_flags & O_NONBLOCK) 901 return -EAGAIN; 902 903 ret = wait_event_interruptible(fd_wait_queue, 904 READ_ONCE(has_message) || 905 READ_ONCE(vtl_synic_mask_vmbus_sint_masked)); 906 if (ret) 907 return ret; 908 } 909 910 if (copy_to_user(arg, &msg, sizeof(msg))) 911 return -EFAULT; 912 913 return sizeof(msg); 914 } 915 916 static __poll_t mshv_vtl_sint_poll(struct file *filp, poll_table *wait) 917 { 918 __poll_t mask = 0; 919 920 poll_wait(filp, &fd_wait_queue, wait); 921 if (READ_ONCE(has_message) || READ_ONCE(vtl_synic_mask_vmbus_sint_masked)) 922 mask |= EPOLLIN | EPOLLRDNORM; 923 924 return mask; 925 } 926 927 static void mshv_vtl_sint_on_msg_dpc(unsigned long data) 928 { 929 WRITE_ONCE(has_message, true); 930 wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN); 931 } 932 933 static int mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user *arg) 934 { 935 struct mshv_vtl_sint_post_msg message; 936 u8 payload[HV_MESSAGE_PAYLOAD_BYTE_COUNT]; 937 938 if (copy_from_user(&message, arg, sizeof(message))) 939 return -EFAULT; 940 if (message.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) 941 return -EINVAL; 942 if (copy_from_user(payload, (void __user *)message.payload_ptr, 943 message.payload_size)) 944 return -EFAULT; 945 946 return hv_post_message((union hv_connection_id)message.connection_id, 947 message.message_type, (void *)payload, 948 message.payload_size); 949 } 950 951 static int mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user *arg) 952 { 953 u64 input, status; 954 struct mshv_vtl_signal_event signal_event; 955 956 if (copy_from_user(&signal_event, arg, sizeof(signal_event))) 957 return -EFAULT; 958 959 input = signal_event.connection_id | ((u64)signal_event.flag << 32); 960 961 status = hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, input); 962 963 return hv_result_to_errno(status); 964 } 965 966 static int mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user *arg) 967 { 968 struct mshv_vtl_set_eventfd set_eventfd; 969 struct eventfd_ctx *eventfd, *old_eventfd; 970 971 if (copy_from_user(&set_eventfd, arg, sizeof(set_eventfd))) 972 return -EFAULT; 973 if (set_eventfd.flag >= HV_EVENT_FLAGS_COUNT) 974 return -EINVAL; 975 976 eventfd = NULL; 977 if (set_eventfd.fd >= 0) { 978 eventfd = eventfd_ctx_fdget(set_eventfd.fd); 979 if (IS_ERR(eventfd)) 980 return PTR_ERR(eventfd); 981 } 982 983 guard(mutex)(&flag_lock); 984 old_eventfd = READ_ONCE(flag_eventfds[set_eventfd.flag]); 985 WRITE_ONCE(flag_eventfds[set_eventfd.flag], eventfd); 986 987 if (old_eventfd) { 988 synchronize_rcu(); 989 eventfd_ctx_put(old_eventfd); 990 } 991 992 return 0; 993 } 994 995 static int mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user *arg) 996 { 997 static DEFINE_MUTEX(vtl2_vmbus_sint_mask_mutex); 998 struct mshv_sint_mask mask; 999 1000 if (copy_from_user(&mask, arg, sizeof(mask))) 1001 return -EFAULT; 1002 guard(mutex)(&vtl2_vmbus_sint_mask_mutex); 1003 on_each_cpu(mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1); 1004 WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0); 1005 if (mask.mask) 1006 wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN); 1007 1008 return 0; 1009 } 1010 1011 static long mshv_vtl_sint_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 1012 { 1013 switch (cmd) { 1014 case MSHV_SINT_POST_MESSAGE: 1015 return mshv_vtl_sint_ioctl_post_msg((struct mshv_vtl_sint_post_msg __user *)arg); 1016 case MSHV_SINT_SIGNAL_EVENT: 1017 return mshv_vtl_sint_ioctl_signal_event((struct mshv_vtl_signal_event __user *)arg); 1018 case MSHV_SINT_SET_EVENTFD: 1019 return mshv_vtl_sint_ioctl_set_eventfd((struct mshv_vtl_set_eventfd __user *)arg); 1020 case MSHV_SINT_PAUSE_MESSAGE_STREAM: 1021 return mshv_vtl_sint_ioctl_pause_msg_stream((struct mshv_sint_mask __user *)arg); 1022 default: 1023 return -ENOIOCTLCMD; 1024 } 1025 } 1026 1027 static const struct file_operations mshv_vtl_sint_ops = { 1028 .owner = THIS_MODULE, 1029 .read = mshv_vtl_sint_read, 1030 .poll = mshv_vtl_sint_poll, 1031 .unlocked_ioctl = mshv_vtl_sint_ioctl, 1032 }; 1033 1034 static struct miscdevice mshv_vtl_sint_dev = { 1035 .name = "mshv_sint", 1036 .fops = &mshv_vtl_sint_ops, 1037 .mode = 0600, 1038 .minor = MISC_DYNAMIC_MINOR, 1039 }; 1040 1041 static int mshv_vtl_hvcall_dev_open(struct inode *node, struct file *f) 1042 { 1043 struct miscdevice *dev = f->private_data; 1044 struct mshv_vtl_hvcall_fd *fd; 1045 1046 if (!capable(CAP_SYS_ADMIN)) 1047 return -EPERM; 1048 1049 fd = vzalloc(sizeof(*fd)); 1050 if (!fd) 1051 return -ENOMEM; 1052 fd->dev = dev; 1053 f->private_data = fd; 1054 mutex_init(&fd->init_mutex); 1055 1056 return 0; 1057 } 1058 1059 static int mshv_vtl_hvcall_dev_release(struct inode *node, struct file *f) 1060 { 1061 struct mshv_vtl_hvcall_fd *fd; 1062 1063 fd = f->private_data; 1064 if (fd) { 1065 vfree(fd); 1066 f->private_data = NULL; 1067 } 1068 1069 return 0; 1070 } 1071 1072 static int mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd *fd, 1073 struct mshv_vtl_hvcall_setup __user *hvcall_setup_user) 1074 { 1075 struct mshv_vtl_hvcall_setup hvcall_setup; 1076 1077 guard(mutex)(&fd->init_mutex); 1078 1079 if (fd->allow_map_initialized) { 1080 dev_err(fd->dev->this_device, 1081 "Hypercall allow map has already been set, pid %d\n", 1082 current->pid); 1083 return -EINVAL; 1084 } 1085 1086 if (copy_from_user(&hvcall_setup, hvcall_setup_user, 1087 sizeof(struct mshv_vtl_hvcall_setup))) { 1088 return -EFAULT; 1089 } 1090 if (hvcall_setup.bitmap_array_size > ARRAY_SIZE(fd->allow_bitmap)) 1091 return -EINVAL; 1092 1093 if (copy_from_user(&fd->allow_bitmap, 1094 (void __user *)hvcall_setup.allow_bitmap_ptr, 1095 hvcall_setup.bitmap_array_size)) { 1096 return -EFAULT; 1097 } 1098 1099 dev_info(fd->dev->this_device, "Hypercall allow map has been set, pid %d\n", 1100 current->pid); 1101 fd->allow_map_initialized = true; 1102 return 0; 1103 } 1104 1105 static bool mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd *fd, u16 call_code) 1106 { 1107 return test_bit(call_code, (unsigned long *)fd->allow_bitmap); 1108 } 1109 1110 static int mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd *fd, 1111 struct mshv_vtl_hvcall __user *hvcall_user) 1112 { 1113 struct mshv_vtl_hvcall hvcall; 1114 void *in, *out; 1115 int ret; 1116 1117 if (copy_from_user(&hvcall, hvcall_user, sizeof(struct mshv_vtl_hvcall))) 1118 return -EFAULT; 1119 if (hvcall.input_size > HV_HYP_PAGE_SIZE) 1120 return -EINVAL; 1121 if (hvcall.output_size > HV_HYP_PAGE_SIZE) 1122 return -EINVAL; 1123 1124 /* 1125 * By default, all hypercalls are not allowed. 1126 * The user mode code has to set up the allow bitmap once. 1127 */ 1128 1129 if (!mshv_vtl_hvcall_is_allowed(fd, hvcall.control & 0xFFFF)) { 1130 dev_err(fd->dev->this_device, 1131 "Hypercall with control data %#llx isn't allowed\n", 1132 hvcall.control); 1133 return -EPERM; 1134 } 1135 1136 /* 1137 * This may create a problem for Confidential VM (CVM) usecase where we need to use 1138 * Hyper-V driver allocated per-cpu input and output pages (hyperv_pcpu_input_arg and 1139 * hyperv_pcpu_output_arg) for making a hypervisor call. 1140 * 1141 * TODO: Take care of this when CVM support is added. 1142 */ 1143 in = (void *)__get_free_page(GFP_KERNEL); 1144 out = (void *)__get_free_page(GFP_KERNEL); 1145 1146 if (copy_from_user(in, (void __user *)hvcall.input_ptr, hvcall.input_size)) { 1147 ret = -EFAULT; 1148 goto free_pages; 1149 } 1150 1151 hvcall.status = hv_do_hypercall(hvcall.control, in, out); 1152 1153 if (copy_to_user((void __user *)hvcall.output_ptr, out, hvcall.output_size)) { 1154 ret = -EFAULT; 1155 goto free_pages; 1156 } 1157 ret = put_user(hvcall.status, &hvcall_user->status); 1158 free_pages: 1159 free_page((unsigned long)in); 1160 free_page((unsigned long)out); 1161 1162 return ret; 1163 } 1164 1165 static long mshv_vtl_hvcall_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 1166 { 1167 struct mshv_vtl_hvcall_fd *fd = f->private_data; 1168 1169 switch (cmd) { 1170 case MSHV_HVCALL_SETUP: 1171 return mshv_vtl_hvcall_do_setup(fd, (struct mshv_vtl_hvcall_setup __user *)arg); 1172 case MSHV_HVCALL: 1173 return mshv_vtl_hvcall_call(fd, (struct mshv_vtl_hvcall __user *)arg); 1174 default: 1175 break; 1176 } 1177 1178 return -ENOIOCTLCMD; 1179 } 1180 1181 static const struct file_operations mshv_vtl_hvcall_dev_file_ops = { 1182 .owner = THIS_MODULE, 1183 .open = mshv_vtl_hvcall_dev_open, 1184 .release = mshv_vtl_hvcall_dev_release, 1185 .unlocked_ioctl = mshv_vtl_hvcall_dev_ioctl, 1186 }; 1187 1188 static struct miscdevice mshv_vtl_hvcall_dev = { 1189 .name = "mshv_hvcall", 1190 .nodename = "mshv_hvcall", 1191 .fops = &mshv_vtl_hvcall_dev_file_ops, 1192 .mode = 0600, 1193 .minor = MISC_DYNAMIC_MINOR, 1194 }; 1195 1196 static int mshv_vtl_low_open(struct inode *inodep, struct file *filp) 1197 { 1198 pid_t pid = task_pid_vnr(current); 1199 uid_t uid = current_uid().val; 1200 int ret = 0; 1201 1202 pr_debug("%s: Opening VTL low, task group %d, uid %d\n", __func__, pid, uid); 1203 1204 if (capable(CAP_SYS_ADMIN)) { 1205 filp->private_data = inodep; 1206 } else { 1207 pr_err("%s: VTL low open failed: CAP_SYS_ADMIN required. task group %d, uid %d", 1208 __func__, pid, uid); 1209 ret = -EPERM; 1210 } 1211 1212 return ret; 1213 } 1214 1215 static bool can_fault(struct vm_fault *vmf, unsigned long size, unsigned long *pfn) 1216 { 1217 unsigned long mask = size - 1; 1218 unsigned long start = vmf->address & ~mask; 1219 unsigned long end = start + size; 1220 bool is_valid; 1221 1222 is_valid = (vmf->address & mask) == ((vmf->pgoff << PAGE_SHIFT) & mask) && 1223 start >= vmf->vma->vm_start && 1224 end <= vmf->vma->vm_end; 1225 1226 if (is_valid) 1227 *pfn = vmf->pgoff & ~(mask >> PAGE_SHIFT); 1228 1229 return is_valid; 1230 } 1231 1232 static vm_fault_t mshv_vtl_low_huge_fault(struct vm_fault *vmf, unsigned int order) 1233 { 1234 unsigned long pfn = vmf->pgoff; 1235 vm_fault_t ret = VM_FAULT_FALLBACK; 1236 1237 switch (order) { 1238 case 0: 1239 return vmf_insert_mixed(vmf->vma, vmf->address, pfn); 1240 1241 case PMD_ORDER: 1242 if (can_fault(vmf, PMD_SIZE, &pfn)) 1243 ret = vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE); 1244 return ret; 1245 1246 case PUD_ORDER: 1247 if (can_fault(vmf, PUD_SIZE, &pfn)) 1248 ret = vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE); 1249 return ret; 1250 1251 default: 1252 return VM_FAULT_SIGBUS; 1253 } 1254 } 1255 1256 static vm_fault_t mshv_vtl_low_fault(struct vm_fault *vmf) 1257 { 1258 return mshv_vtl_low_huge_fault(vmf, 0); 1259 } 1260 1261 static const struct vm_operations_struct mshv_vtl_low_vm_ops = { 1262 .fault = mshv_vtl_low_fault, 1263 .huge_fault = mshv_vtl_low_huge_fault, 1264 }; 1265 1266 static int mshv_vtl_low_mmap(struct file *filp, struct vm_area_struct *vma) 1267 { 1268 vma->vm_ops = &mshv_vtl_low_vm_ops; 1269 vm_flags_set(vma, VM_HUGEPAGE | VM_MIXEDMAP); 1270 1271 return 0; 1272 } 1273 1274 static const struct file_operations mshv_vtl_low_file_ops = { 1275 .owner = THIS_MODULE, 1276 .open = mshv_vtl_low_open, 1277 .mmap = mshv_vtl_low_mmap, 1278 }; 1279 1280 static struct miscdevice mshv_vtl_low = { 1281 .name = "mshv_vtl_low", 1282 .nodename = "mshv_vtl_low", 1283 .fops = &mshv_vtl_low_file_ops, 1284 .mode = 0600, 1285 .minor = MISC_DYNAMIC_MINOR, 1286 }; 1287 1288 static int __init mshv_vtl_init(void) 1289 { 1290 int ret; 1291 struct device *dev = mshv_dev.this_device; 1292 1293 /* 1294 * This creates /dev/mshv which provides functionality to create VTLs and partitions. 1295 */ 1296 ret = misc_register(&mshv_dev); 1297 if (ret) { 1298 dev_err(dev, "mshv device register failed: %d\n", ret); 1299 goto free_dev; 1300 } 1301 1302 tasklet_init(&msg_dpc, mshv_vtl_sint_on_msg_dpc, 0); 1303 init_waitqueue_head(&fd_wait_queue); 1304 1305 if (mshv_vtl_get_vsm_regs()) { 1306 dev_emerg(dev, "Unable to get VSM capabilities !!\n"); 1307 ret = -ENODEV; 1308 goto free_dev; 1309 } 1310 if (mshv_vtl_configure_vsm_partition(dev)) { 1311 dev_emerg(dev, "VSM configuration failed !!\n"); 1312 ret = -ENODEV; 1313 goto free_dev; 1314 } 1315 1316 mshv_vtl_return_call_init(mshv_vsm_page_offsets.vtl_return_offset); 1317 ret = hv_vtl_setup_synic(); 1318 if (ret) 1319 goto free_dev; 1320 1321 /* 1322 * mshv_sint device adds VMBus relay ioctl support. 1323 * This provides a channel for VTL0 to communicate with VTL2. 1324 */ 1325 ret = misc_register(&mshv_vtl_sint_dev); 1326 if (ret) 1327 goto free_synic; 1328 1329 /* 1330 * mshv_hvcall device adds interface to enable userspace for direct hypercalls support. 1331 */ 1332 ret = misc_register(&mshv_vtl_hvcall_dev); 1333 if (ret) 1334 goto free_sint; 1335 1336 /* 1337 * mshv_vtl_low device is used to map VTL0 address space to a user-mode process in VTL2. 1338 * It implements mmap() to allow a user-mode process in VTL2 to map to the address of VTL0. 1339 */ 1340 ret = misc_register(&mshv_vtl_low); 1341 if (ret) 1342 goto free_hvcall; 1343 1344 /* 1345 * "mshv vtl mem dev" device is later used to setup VTL0 memory. 1346 */ 1347 mem_dev = kzalloc_obj(*mem_dev); 1348 if (!mem_dev) { 1349 ret = -ENOMEM; 1350 goto free_low; 1351 } 1352 1353 mutex_init(&mshv_vtl_poll_file_lock); 1354 1355 device_initialize(mem_dev); 1356 dev_set_name(mem_dev, "mshv vtl mem dev"); 1357 ret = device_add(mem_dev); 1358 if (ret) { 1359 dev_err(dev, "mshv vtl mem dev add: %d\n", ret); 1360 goto free_mem; 1361 } 1362 1363 return 0; 1364 1365 free_mem: 1366 kfree(mem_dev); 1367 free_low: 1368 misc_deregister(&mshv_vtl_low); 1369 free_hvcall: 1370 misc_deregister(&mshv_vtl_hvcall_dev); 1371 free_sint: 1372 misc_deregister(&mshv_vtl_sint_dev); 1373 free_synic: 1374 hv_vtl_remove_synic(); 1375 free_dev: 1376 misc_deregister(&mshv_dev); 1377 1378 return ret; 1379 } 1380 1381 static void __exit mshv_vtl_exit(void) 1382 { 1383 device_del(mem_dev); 1384 kfree(mem_dev); 1385 misc_deregister(&mshv_vtl_low); 1386 misc_deregister(&mshv_vtl_hvcall_dev); 1387 misc_deregister(&mshv_vtl_sint_dev); 1388 hv_vtl_remove_synic(); 1389 misc_deregister(&mshv_dev); 1390 } 1391 1392 module_init(mshv_vtl_init); 1393 module_exit(mshv_vtl_exit); 1394