1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2023, Microsoft Corporation.
4 *
5 * Author:
6 * Roman Kisel <romank@linux.microsoft.com>
7 * Saurabh Sengar <ssengar@linux.microsoft.com>
8 * Naman Jain <namjain@linux.microsoft.com>
9 */
10
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/miscdevice.h>
14 #include <linux/anon_inodes.h>
15 #include <linux/cpuhotplug.h>
16 #include <linux/count_zeros.h>
17 #include <linux/entry-virt.h>
18 #include <linux/eventfd.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
21 #include <linux/vmalloc.h>
22 #include <asm/debugreg.h>
23 #include <asm/mshyperv.h>
24 #include <trace/events/ipi.h>
25 #include <uapi/asm/mtrr.h>
26 #include <uapi/linux/mshv.h>
27 #include <hyperv/hvhdk.h>
28
29 #include "../../kernel/fpu/legacy.h"
30 #include "mshv.h"
31 #include "mshv_vtl.h"
32 #include "hyperv_vmbus.h"
33
34 MODULE_AUTHOR("Microsoft");
35 MODULE_LICENSE("GPL");
36 MODULE_DESCRIPTION("Microsoft Hyper-V VTL Driver");
37
38 #define MSHV_ENTRY_REASON_LOWER_VTL_CALL 0x1
39 #define MSHV_ENTRY_REASON_INTERRUPT 0x2
40 #define MSHV_ENTRY_REASON_INTERCEPT 0x3
41
42 #define MSHV_REAL_OFF_SHIFT 16
43 #define MSHV_PG_OFF_CPU_MASK (BIT_ULL(MSHV_REAL_OFF_SHIFT) - 1)
44 #define MSHV_RUN_PAGE_OFFSET 0
45 #define MSHV_REG_PAGE_OFFSET 1
46 #define VTL2_VMBUS_SINT_INDEX 7
47
48 static struct device *mem_dev;
49
50 static struct tasklet_struct msg_dpc;
51 static wait_queue_head_t fd_wait_queue;
52 static bool has_message;
53 static struct eventfd_ctx *flag_eventfds[HV_EVENT_FLAGS_COUNT];
54 static DEFINE_MUTEX(flag_lock);
55 static bool __read_mostly mshv_has_reg_page;
56
57 /* hvcall code is of type u16, allocate a bitmap of size (1 << 16) to accommodate it */
58 #define MAX_BITMAP_SIZE ((U16_MAX + 1) / 8)
59
60 struct mshv_vtl_hvcall_fd {
61 u8 allow_bitmap[MAX_BITMAP_SIZE];
62 bool allow_map_initialized;
63 /*
64 * Used to protect hvcall setup in IOCTLs
65 */
66 struct mutex init_mutex;
67 struct miscdevice *dev;
68 };
69
70 struct mshv_vtl_poll_file {
71 struct file *file;
72 wait_queue_entry_t wait;
73 wait_queue_head_t *wqh;
74 poll_table pt;
75 int cpu;
76 };
77
78 struct mshv_vtl {
79 struct device *module_dev;
80 u64 id;
81 };
82
83 struct mshv_vtl_per_cpu {
84 struct mshv_vtl_run *run;
85 struct page *reg_page;
86 };
87
88 /* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */
89 union hv_synic_overlay_page_msr {
90 u64 as_uint64;
91 struct {
92 u64 enabled: 1;
93 u64 reserved: 11;
94 u64 pfn: 52;
95 } __packed;
96 };
97
98 static struct mutex mshv_vtl_poll_file_lock;
99 static union hv_register_vsm_page_offsets mshv_vsm_page_offsets;
100 static union hv_register_vsm_capabilities mshv_vsm_capabilities;
101
102 static DEFINE_PER_CPU(struct mshv_vtl_poll_file, mshv_vtl_poll_file);
103 static DEFINE_PER_CPU(unsigned long long, num_vtl0_transitions);
104 static DEFINE_PER_CPU(struct mshv_vtl_per_cpu, mshv_vtl_per_cpu);
105
106 static const union hv_input_vtl input_vtl_zero;
107 static const union hv_input_vtl input_vtl_normal = {
108 .use_target_vtl = 1,
109 };
110
111 static const struct file_operations mshv_vtl_fops;
112
113 static long
mshv_ioctl_create_vtl(void __user * user_arg,struct device * module_dev)114 mshv_ioctl_create_vtl(void __user *user_arg, struct device *module_dev)
115 {
116 struct mshv_vtl *vtl;
117 struct file *file;
118 int fd;
119
120 vtl = kzalloc(sizeof(*vtl), GFP_KERNEL);
121 if (!vtl)
122 return -ENOMEM;
123
124 fd = get_unused_fd_flags(O_CLOEXEC);
125 if (fd < 0) {
126 kfree(vtl);
127 return fd;
128 }
129 file = anon_inode_getfile("mshv_vtl", &mshv_vtl_fops,
130 vtl, O_RDWR);
131 if (IS_ERR(file)) {
132 kfree(vtl);
133 return PTR_ERR(file);
134 }
135 vtl->module_dev = module_dev;
136 fd_install(fd, file);
137
138 return fd;
139 }
140
141 static long
mshv_ioctl_check_extension(void __user * user_arg)142 mshv_ioctl_check_extension(void __user *user_arg)
143 {
144 u32 arg;
145
146 if (copy_from_user(&arg, user_arg, sizeof(arg)))
147 return -EFAULT;
148
149 switch (arg) {
150 case MSHV_CAP_CORE_API_STABLE:
151 return 0;
152 case MSHV_CAP_REGISTER_PAGE:
153 return mshv_has_reg_page;
154 case MSHV_CAP_VTL_RETURN_ACTION:
155 return mshv_vsm_capabilities.return_action_available;
156 case MSHV_CAP_DR6_SHARED:
157 return mshv_vsm_capabilities.dr6_shared;
158 }
159
160 return -EOPNOTSUPP;
161 }
162
163 static long
mshv_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)164 mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
165 {
166 struct miscdevice *misc = filp->private_data;
167
168 switch (ioctl) {
169 case MSHV_CHECK_EXTENSION:
170 return mshv_ioctl_check_extension((void __user *)arg);
171 case MSHV_CREATE_VTL:
172 return mshv_ioctl_create_vtl((void __user *)arg, misc->this_device);
173 }
174
175 return -ENOTTY;
176 }
177
178 static const struct file_operations mshv_dev_fops = {
179 .owner = THIS_MODULE,
180 .unlocked_ioctl = mshv_dev_ioctl,
181 .llseek = noop_llseek,
182 };
183
184 static struct miscdevice mshv_dev = {
185 .minor = MISC_DYNAMIC_MINOR,
186 .name = "mshv",
187 .fops = &mshv_dev_fops,
188 .mode = 0600,
189 };
190
mshv_vtl_this_run(void)191 static struct mshv_vtl_run *mshv_vtl_this_run(void)
192 {
193 return *this_cpu_ptr(&mshv_vtl_per_cpu.run);
194 }
195
mshv_vtl_cpu_run(int cpu)196 static struct mshv_vtl_run *mshv_vtl_cpu_run(int cpu)
197 {
198 return *per_cpu_ptr(&mshv_vtl_per_cpu.run, cpu);
199 }
200
mshv_vtl_cpu_reg_page(int cpu)201 static struct page *mshv_vtl_cpu_reg_page(int cpu)
202 {
203 return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu);
204 }
205
mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu * per_cpu)206 static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
207 {
208 struct hv_register_assoc reg_assoc = {};
209 union hv_synic_overlay_page_msr overlay = {};
210 struct page *reg_page;
211
212 reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL);
213 if (!reg_page) {
214 WARN(1, "failed to allocate register page\n");
215 return;
216 }
217
218 overlay.enabled = 1;
219 overlay.pfn = page_to_hvpfn(reg_page);
220 reg_assoc.name = HV_X64_REGISTER_REG_PAGE;
221 reg_assoc.value.reg64 = overlay.as_uint64;
222
223 if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
224 1, input_vtl_zero, ®_assoc)) {
225 WARN(1, "failed to setup register page\n");
226 __free_page(reg_page);
227 return;
228 }
229
230 per_cpu->reg_page = reg_page;
231 mshv_has_reg_page = true;
232 }
233
mshv_vtl_synic_enable_regs(unsigned int cpu)234 static void mshv_vtl_synic_enable_regs(unsigned int cpu)
235 {
236 union hv_synic_sint sint;
237
238 sint.as_uint64 = 0;
239 sint.vector = HYPERVISOR_CALLBACK_VECTOR;
240 sint.masked = false;
241 sint.auto_eoi = hv_recommend_using_aeoi();
242
243 /* Enable intercepts */
244 if (!mshv_vsm_capabilities.intercept_page_available)
245 hv_set_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX,
246 sint.as_uint64);
247
248 /* VTL2 Host VSP SINT is (un)masked when the user mode requests that */
249 }
250
mshv_vtl_get_vsm_regs(void)251 static int mshv_vtl_get_vsm_regs(void)
252 {
253 struct hv_register_assoc registers[2];
254 int ret, count = 2;
255
256 registers[0].name = HV_REGISTER_VSM_CODE_PAGE_OFFSETS;
257 registers[1].name = HV_REGISTER_VSM_CAPABILITIES;
258
259 ret = hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
260 count, input_vtl_zero, registers);
261 if (ret)
262 return ret;
263
264 mshv_vsm_page_offsets.as_uint64 = registers[0].value.reg64;
265 mshv_vsm_capabilities.as_uint64 = registers[1].value.reg64;
266
267 return ret;
268 }
269
mshv_vtl_configure_vsm_partition(struct device * dev)270 static int mshv_vtl_configure_vsm_partition(struct device *dev)
271 {
272 union hv_register_vsm_partition_config config;
273 struct hv_register_assoc reg_assoc;
274
275 config.as_uint64 = 0;
276 config.default_vtl_protection_mask = HV_MAP_GPA_PERMISSIONS_MASK;
277 config.enable_vtl_protection = 1;
278 config.zero_memory_on_reset = 1;
279 config.intercept_vp_startup = 1;
280 config.intercept_cpuid_unimplemented = 1;
281
282 if (mshv_vsm_capabilities.intercept_page_available) {
283 dev_dbg(dev, "using intercept page\n");
284 config.intercept_page = 1;
285 }
286
287 reg_assoc.name = HV_REGISTER_VSM_PARTITION_CONFIG;
288 reg_assoc.value.reg64 = config.as_uint64;
289
290 return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
291 1, input_vtl_zero, ®_assoc);
292 }
293
mshv_vtl_vmbus_isr(void)294 static void mshv_vtl_vmbus_isr(void)
295 {
296 struct hv_per_cpu_context *per_cpu;
297 struct hv_message *msg;
298 u32 message_type;
299 union hv_synic_event_flags *event_flags;
300 struct eventfd_ctx *eventfd;
301 u16 i;
302
303 per_cpu = this_cpu_ptr(hv_context.cpu_context);
304 if (smp_processor_id() == 0) {
305 msg = (struct hv_message *)per_cpu->hyp_synic_message_page + VTL2_VMBUS_SINT_INDEX;
306 message_type = READ_ONCE(msg->header.message_type);
307 if (message_type != HVMSG_NONE)
308 tasklet_schedule(&msg_dpc);
309 }
310
311 event_flags = (union hv_synic_event_flags *)per_cpu->hyp_synic_event_page +
312 VTL2_VMBUS_SINT_INDEX;
313 for_each_set_bit(i, event_flags->flags, HV_EVENT_FLAGS_COUNT) {
314 if (!sync_test_and_clear_bit(i, event_flags->flags))
315 continue;
316 rcu_read_lock();
317 eventfd = READ_ONCE(flag_eventfds[i]);
318 if (eventfd)
319 eventfd_signal(eventfd);
320 rcu_read_unlock();
321 }
322
323 vmbus_isr();
324 }
325
mshv_vtl_alloc_context(unsigned int cpu)326 static int mshv_vtl_alloc_context(unsigned int cpu)
327 {
328 struct mshv_vtl_per_cpu *per_cpu = this_cpu_ptr(&mshv_vtl_per_cpu);
329
330 per_cpu->run = (struct mshv_vtl_run *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
331 if (!per_cpu->run)
332 return -ENOMEM;
333
334 if (mshv_vsm_capabilities.intercept_page_available)
335 mshv_vtl_configure_reg_page(per_cpu);
336
337 mshv_vtl_synic_enable_regs(cpu);
338
339 return 0;
340 }
341
342 static int mshv_vtl_cpuhp_online;
343
hv_vtl_setup_synic(void)344 static int hv_vtl_setup_synic(void)
345 {
346 int ret;
347
348 /* Use our isr to first filter out packets destined for userspace */
349 hv_setup_vmbus_handler(mshv_vtl_vmbus_isr);
350
351 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vtl:online",
352 mshv_vtl_alloc_context, NULL);
353 if (ret < 0) {
354 hv_setup_vmbus_handler(vmbus_isr);
355 return ret;
356 }
357
358 mshv_vtl_cpuhp_online = ret;
359
360 return 0;
361 }
362
hv_vtl_remove_synic(void)363 static void hv_vtl_remove_synic(void)
364 {
365 cpuhp_remove_state(mshv_vtl_cpuhp_online);
366 hv_setup_vmbus_handler(vmbus_isr);
367 }
368
vtl_get_vp_register(struct hv_register_assoc * reg)369 static int vtl_get_vp_register(struct hv_register_assoc *reg)
370 {
371 return hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
372 1, input_vtl_normal, reg);
373 }
374
vtl_set_vp_register(struct hv_register_assoc * reg)375 static int vtl_set_vp_register(struct hv_register_assoc *reg)
376 {
377 return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
378 1, input_vtl_normal, reg);
379 }
380
mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl * vtl,void __user * arg)381 static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl *vtl, void __user *arg)
382 {
383 struct mshv_vtl_ram_disposition vtl0_mem;
384 struct dev_pagemap *pgmap;
385 void *addr;
386
387 if (copy_from_user(&vtl0_mem, arg, sizeof(vtl0_mem)))
388 return -EFAULT;
389 /* vtl0_mem.last_pfn is excluded in the pagemap range for VTL0 as per design */
390 if (vtl0_mem.last_pfn <= vtl0_mem.start_pfn) {
391 dev_err(vtl->module_dev, "range start pfn (%llx) > end pfn (%llx)\n",
392 vtl0_mem.start_pfn, vtl0_mem.last_pfn);
393 return -EFAULT;
394 }
395
396 pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL);
397 if (!pgmap)
398 return -ENOMEM;
399
400 pgmap->ranges[0].start = PFN_PHYS(vtl0_mem.start_pfn);
401 pgmap->ranges[0].end = PFN_PHYS(vtl0_mem.last_pfn) - 1;
402 pgmap->nr_range = 1;
403 pgmap->type = MEMORY_DEVICE_GENERIC;
404
405 /*
406 * Determine the highest page order that can be used for the given memory range.
407 * This works best when the range is aligned; i.e. both the start and the length.
408 */
409 pgmap->vmemmap_shift = count_trailing_zeros(vtl0_mem.start_pfn | vtl0_mem.last_pfn);
410 dev_dbg(vtl->module_dev,
411 "Add VTL0 memory: start: 0x%llx, end_pfn: 0x%llx, page order: %lu\n",
412 vtl0_mem.start_pfn, vtl0_mem.last_pfn, pgmap->vmemmap_shift);
413
414 addr = devm_memremap_pages(mem_dev, pgmap);
415 if (IS_ERR(addr)) {
416 dev_err(vtl->module_dev, "devm_memremap_pages error: %ld\n", PTR_ERR(addr));
417 kfree(pgmap);
418 return -EFAULT;
419 }
420
421 /* Don't free pgmap, since it has to stick around until the memory
422 * is unmapped, which will never happen as there is no scenario
423 * where VTL0 can be released/shutdown without bringing down VTL2.
424 */
425 return 0;
426 }
427
mshv_vtl_cancel(int cpu)428 static void mshv_vtl_cancel(int cpu)
429 {
430 int here = get_cpu();
431
432 if (here != cpu) {
433 if (!xchg_relaxed(&mshv_vtl_cpu_run(cpu)->cancel, 1))
434 smp_send_reschedule(cpu);
435 } else {
436 WRITE_ONCE(mshv_vtl_this_run()->cancel, 1);
437 }
438 put_cpu();
439 }
440
mshv_vtl_poll_file_wake(wait_queue_entry_t * wait,unsigned int mode,int sync,void * key)441 static int mshv_vtl_poll_file_wake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
442 {
443 struct mshv_vtl_poll_file *poll_file = container_of(wait, struct mshv_vtl_poll_file, wait);
444
445 mshv_vtl_cancel(poll_file->cpu);
446
447 return 0;
448 }
449
mshv_vtl_ptable_queue_proc(struct file * file,wait_queue_head_t * wqh,poll_table * pt)450 static void mshv_vtl_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
451 {
452 struct mshv_vtl_poll_file *poll_file = container_of(pt, struct mshv_vtl_poll_file, pt);
453
454 WARN_ON(poll_file->wqh);
455 poll_file->wqh = wqh;
456 add_wait_queue(wqh, &poll_file->wait);
457 }
458
mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user * user_input)459 static int mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user *user_input)
460 {
461 struct file *file, *old_file;
462 struct mshv_vtl_poll_file *poll_file;
463 struct mshv_vtl_set_poll_file input;
464
465 if (copy_from_user(&input, user_input, sizeof(input)))
466 return -EFAULT;
467
468 if (input.cpu >= num_possible_cpus() || !cpu_online(input.cpu))
469 return -EINVAL;
470 /*
471 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
472 * CPU is expected to remain online after above cpu_online() check.
473 */
474
475 file = NULL;
476 file = fget(input.fd);
477 if (!file)
478 return -EBADFD;
479
480 poll_file = per_cpu_ptr(&mshv_vtl_poll_file, READ_ONCE(input.cpu));
481 if (!poll_file)
482 return -EINVAL;
483
484 mutex_lock(&mshv_vtl_poll_file_lock);
485
486 if (poll_file->wqh)
487 remove_wait_queue(poll_file->wqh, &poll_file->wait);
488 poll_file->wqh = NULL;
489
490 old_file = poll_file->file;
491 poll_file->file = file;
492 poll_file->cpu = input.cpu;
493
494 if (file) {
495 init_waitqueue_func_entry(&poll_file->wait, mshv_vtl_poll_file_wake);
496 init_poll_funcptr(&poll_file->pt, mshv_vtl_ptable_queue_proc);
497 vfs_poll(file, &poll_file->pt);
498 }
499
500 mutex_unlock(&mshv_vtl_poll_file_lock);
501
502 if (old_file)
503 fput(old_file);
504
505 return 0;
506 }
507
508 /* Static table mapping register names to their corresponding actions */
509 static const struct {
510 enum hv_register_name reg_name;
511 int debug_reg_num; /* -1 if not a debug register */
512 u32 msr_addr; /* 0 if not an MSR */
513 } reg_table[] = {
514 /* Debug registers */
515 {HV_X64_REGISTER_DR0, 0, 0},
516 {HV_X64_REGISTER_DR1, 1, 0},
517 {HV_X64_REGISTER_DR2, 2, 0},
518 {HV_X64_REGISTER_DR3, 3, 0},
519 {HV_X64_REGISTER_DR6, 6, 0},
520 /* MTRR MSRs */
521 {HV_X64_REGISTER_MSR_MTRR_CAP, -1, MSR_MTRRcap},
522 {HV_X64_REGISTER_MSR_MTRR_DEF_TYPE, -1, MSR_MTRRdefType},
523 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0, -1, MTRRphysBase_MSR(0)},
524 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1, -1, MTRRphysBase_MSR(1)},
525 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2, -1, MTRRphysBase_MSR(2)},
526 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3, -1, MTRRphysBase_MSR(3)},
527 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4, -1, MTRRphysBase_MSR(4)},
528 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5, -1, MTRRphysBase_MSR(5)},
529 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6, -1, MTRRphysBase_MSR(6)},
530 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7, -1, MTRRphysBase_MSR(7)},
531 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8, -1, MTRRphysBase_MSR(8)},
532 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9, -1, MTRRphysBase_MSR(9)},
533 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA, -1, MTRRphysBase_MSR(0xa)},
534 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB, -1, MTRRphysBase_MSR(0xb)},
535 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC, -1, MTRRphysBase_MSR(0xc)},
536 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASED, -1, MTRRphysBase_MSR(0xd)},
537 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE, -1, MTRRphysBase_MSR(0xe)},
538 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF, -1, MTRRphysBase_MSR(0xf)},
539 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0, -1, MTRRphysMask_MSR(0)},
540 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1, -1, MTRRphysMask_MSR(1)},
541 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2, -1, MTRRphysMask_MSR(2)},
542 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3, -1, MTRRphysMask_MSR(3)},
543 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4, -1, MTRRphysMask_MSR(4)},
544 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5, -1, MTRRphysMask_MSR(5)},
545 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6, -1, MTRRphysMask_MSR(6)},
546 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7, -1, MTRRphysMask_MSR(7)},
547 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8, -1, MTRRphysMask_MSR(8)},
548 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9, -1, MTRRphysMask_MSR(9)},
549 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA, -1, MTRRphysMask_MSR(0xa)},
550 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB, -1, MTRRphysMask_MSR(0xb)},
551 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC, -1, MTRRphysMask_MSR(0xc)},
552 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD, -1, MTRRphysMask_MSR(0xd)},
553 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE, -1, MTRRphysMask_MSR(0xe)},
554 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF, -1, MTRRphysMask_MSR(0xf)},
555 {HV_X64_REGISTER_MSR_MTRR_FIX64K00000, -1, MSR_MTRRfix64K_00000},
556 {HV_X64_REGISTER_MSR_MTRR_FIX16K80000, -1, MSR_MTRRfix16K_80000},
557 {HV_X64_REGISTER_MSR_MTRR_FIX16KA0000, -1, MSR_MTRRfix16K_A0000},
558 {HV_X64_REGISTER_MSR_MTRR_FIX4KC0000, -1, MSR_MTRRfix4K_C0000},
559 {HV_X64_REGISTER_MSR_MTRR_FIX4KC8000, -1, MSR_MTRRfix4K_C8000},
560 {HV_X64_REGISTER_MSR_MTRR_FIX4KD0000, -1, MSR_MTRRfix4K_D0000},
561 {HV_X64_REGISTER_MSR_MTRR_FIX4KD8000, -1, MSR_MTRRfix4K_D8000},
562 {HV_X64_REGISTER_MSR_MTRR_FIX4KE0000, -1, MSR_MTRRfix4K_E0000},
563 {HV_X64_REGISTER_MSR_MTRR_FIX4KE8000, -1, MSR_MTRRfix4K_E8000},
564 {HV_X64_REGISTER_MSR_MTRR_FIX4KF0000, -1, MSR_MTRRfix4K_F0000},
565 {HV_X64_REGISTER_MSR_MTRR_FIX4KF8000, -1, MSR_MTRRfix4K_F8000},
566 };
567
mshv_vtl_get_set_reg(struct hv_register_assoc * regs,bool set)568 static int mshv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set)
569 {
570 u64 *reg64;
571 enum hv_register_name gpr_name;
572 int i;
573
574 gpr_name = regs->name;
575 reg64 = ®s->value.reg64;
576
577 /* Search for the register in the table */
578 for (i = 0; i < ARRAY_SIZE(reg_table); i++) {
579 if (reg_table[i].reg_name != gpr_name)
580 continue;
581 if (reg_table[i].debug_reg_num != -1) {
582 /* Handle debug registers */
583 if (gpr_name == HV_X64_REGISTER_DR6 &&
584 !mshv_vsm_capabilities.dr6_shared)
585 goto hypercall;
586 if (set)
587 native_set_debugreg(reg_table[i].debug_reg_num, *reg64);
588 else
589 *reg64 = native_get_debugreg(reg_table[i].debug_reg_num);
590 } else {
591 /* Handle MSRs */
592 if (set)
593 wrmsrl(reg_table[i].msr_addr, *reg64);
594 else
595 rdmsrl(reg_table[i].msr_addr, *reg64);
596 }
597 return 0;
598 }
599
600 hypercall:
601 return 1;
602 }
603
mshv_vtl_return(struct mshv_vtl_cpu_context * vtl0)604 static void mshv_vtl_return(struct mshv_vtl_cpu_context *vtl0)
605 {
606 struct hv_vp_assist_page *hvp;
607
608 hvp = hv_vp_assist_page[smp_processor_id()];
609
610 /*
611 * Process signal event direct set in the run page, if any.
612 */
613 if (mshv_vsm_capabilities.return_action_available) {
614 u32 offset = READ_ONCE(mshv_vtl_this_run()->vtl_ret_action_size);
615
616 WRITE_ONCE(mshv_vtl_this_run()->vtl_ret_action_size, 0);
617
618 /*
619 * Hypervisor will take care of clearing out the actions
620 * set in the assist page.
621 */
622 memcpy(hvp->vtl_ret_actions,
623 mshv_vtl_this_run()->vtl_ret_actions,
624 min_t(u32, offset, sizeof(hvp->vtl_ret_actions)));
625 }
626
627 mshv_vtl_return_call(vtl0);
628 }
629
mshv_vtl_process_intercept(void)630 static bool mshv_vtl_process_intercept(void)
631 {
632 struct hv_per_cpu_context *mshv_cpu;
633 void *synic_message_page;
634 struct hv_message *msg;
635 u32 message_type;
636
637 mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
638 synic_message_page = mshv_cpu->hyp_synic_message_page;
639 if (unlikely(!synic_message_page))
640 return true;
641
642 msg = (struct hv_message *)synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX;
643 message_type = READ_ONCE(msg->header.message_type);
644 if (message_type == HVMSG_NONE)
645 return true;
646
647 memcpy(mshv_vtl_this_run()->exit_message, msg, sizeof(*msg));
648 vmbus_signal_eom(msg, message_type);
649
650 return false;
651 }
652
mshv_vtl_ioctl_return_to_lower_vtl(void)653 static int mshv_vtl_ioctl_return_to_lower_vtl(void)
654 {
655 preempt_disable();
656 for (;;) {
657 unsigned long irq_flags;
658 struct hv_vp_assist_page *hvp;
659 int ret;
660
661 if (__xfer_to_guest_mode_work_pending()) {
662 preempt_enable();
663 ret = xfer_to_guest_mode_handle_work();
664 if (ret)
665 return ret;
666 preempt_disable();
667 }
668
669 local_irq_save(irq_flags);
670 if (READ_ONCE(mshv_vtl_this_run()->cancel)) {
671 local_irq_restore(irq_flags);
672 preempt_enable();
673 return -EINTR;
674 }
675
676 mshv_vtl_return(&mshv_vtl_this_run()->cpu_context);
677 local_irq_restore(irq_flags);
678
679 hvp = hv_vp_assist_page[smp_processor_id()];
680 this_cpu_inc(num_vtl0_transitions);
681 switch (hvp->vtl_entry_reason) {
682 case MSHV_ENTRY_REASON_INTERRUPT:
683 if (!mshv_vsm_capabilities.intercept_page_available &&
684 likely(!mshv_vtl_process_intercept()))
685 goto done;
686 break;
687
688 case MSHV_ENTRY_REASON_INTERCEPT:
689 WARN_ON(!mshv_vsm_capabilities.intercept_page_available);
690 memcpy(mshv_vtl_this_run()->exit_message, hvp->intercept_message,
691 sizeof(hvp->intercept_message));
692 goto done;
693
694 default:
695 panic("unknown entry reason: %d", hvp->vtl_entry_reason);
696 }
697 }
698
699 done:
700 preempt_enable();
701
702 return 0;
703 }
704
705 static long
mshv_vtl_ioctl_get_regs(void __user * user_args)706 mshv_vtl_ioctl_get_regs(void __user *user_args)
707 {
708 struct mshv_vp_registers args;
709 struct hv_register_assoc reg;
710 long ret;
711
712 if (copy_from_user(&args, user_args, sizeof(args)))
713 return -EFAULT;
714
715 /* This IOCTL supports processing only one register at a time. */
716 if (args.count != 1)
717 return -EINVAL;
718
719 if (copy_from_user(®, (void __user *)args.regs_ptr,
720 sizeof(reg)))
721 return -EFAULT;
722
723 ret = mshv_vtl_get_set_reg(®, false);
724 if (!ret)
725 goto copy_args; /* No need of hypercall */
726 ret = vtl_get_vp_register(®);
727 if (ret)
728 return ret;
729
730 copy_args:
731 if (copy_to_user((void __user *)args.regs_ptr, ®, sizeof(reg)))
732 ret = -EFAULT;
733
734 return ret;
735 }
736
737 static long
mshv_vtl_ioctl_set_regs(void __user * user_args)738 mshv_vtl_ioctl_set_regs(void __user *user_args)
739 {
740 struct mshv_vp_registers args;
741 struct hv_register_assoc reg;
742 long ret;
743
744 if (copy_from_user(&args, user_args, sizeof(args)))
745 return -EFAULT;
746
747 /* This IOCTL supports processing only one register at a time. */
748 if (args.count != 1)
749 return -EINVAL;
750
751 if (copy_from_user(®, (void __user *)args.regs_ptr, sizeof(reg)))
752 return -EFAULT;
753
754 ret = mshv_vtl_get_set_reg(®, true);
755 if (!ret)
756 return ret; /* No need of hypercall */
757 ret = vtl_set_vp_register(®);
758
759 return ret;
760 }
761
762 static long
mshv_vtl_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)763 mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
764 {
765 long ret;
766 struct mshv_vtl *vtl = filp->private_data;
767
768 switch (ioctl) {
769 case MSHV_SET_POLL_FILE:
770 ret = mshv_vtl_ioctl_set_poll_file((struct mshv_vtl_set_poll_file __user *)arg);
771 break;
772 case MSHV_GET_VP_REGISTERS:
773 ret = mshv_vtl_ioctl_get_regs((void __user *)arg);
774 break;
775 case MSHV_SET_VP_REGISTERS:
776 ret = mshv_vtl_ioctl_set_regs((void __user *)arg);
777 break;
778 case MSHV_RETURN_TO_LOWER_VTL:
779 ret = mshv_vtl_ioctl_return_to_lower_vtl();
780 break;
781 case MSHV_ADD_VTL0_MEMORY:
782 ret = mshv_vtl_ioctl_add_vtl0_mem(vtl, (void __user *)arg);
783 break;
784 default:
785 dev_err(vtl->module_dev, "invalid vtl ioctl: %#x\n", ioctl);
786 ret = -ENOTTY;
787 }
788
789 return ret;
790 }
791
mshv_vtl_fault(struct vm_fault * vmf)792 static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf)
793 {
794 struct page *page;
795 int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK;
796 int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT;
797
798 if (!cpu_online(cpu))
799 return VM_FAULT_SIGBUS;
800 /*
801 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
802 * CPU is expected to remain online after above cpu_online() check.
803 */
804
805 if (real_off == MSHV_RUN_PAGE_OFFSET) {
806 page = virt_to_page(mshv_vtl_cpu_run(cpu));
807 } else if (real_off == MSHV_REG_PAGE_OFFSET) {
808 if (!mshv_has_reg_page)
809 return VM_FAULT_SIGBUS;
810 page = mshv_vtl_cpu_reg_page(cpu);
811 } else {
812 return VM_FAULT_NOPAGE;
813 }
814
815 get_page(page);
816 vmf->page = page;
817
818 return 0;
819 }
820
821 static const struct vm_operations_struct mshv_vtl_vm_ops = {
822 .fault = mshv_vtl_fault,
823 };
824
mshv_vtl_mmap(struct file * filp,struct vm_area_struct * vma)825 static int mshv_vtl_mmap(struct file *filp, struct vm_area_struct *vma)
826 {
827 vma->vm_ops = &mshv_vtl_vm_ops;
828
829 return 0;
830 }
831
mshv_vtl_release(struct inode * inode,struct file * filp)832 static int mshv_vtl_release(struct inode *inode, struct file *filp)
833 {
834 struct mshv_vtl *vtl = filp->private_data;
835
836 kfree(vtl);
837
838 return 0;
839 }
840
841 static const struct file_operations mshv_vtl_fops = {
842 .owner = THIS_MODULE,
843 .unlocked_ioctl = mshv_vtl_ioctl,
844 .release = mshv_vtl_release,
845 .mmap = mshv_vtl_mmap,
846 };
847
mshv_vtl_synic_mask_vmbus_sint(const u8 * mask)848 static void mshv_vtl_synic_mask_vmbus_sint(const u8 *mask)
849 {
850 union hv_synic_sint sint;
851
852 sint.as_uint64 = 0;
853 sint.vector = HYPERVISOR_CALLBACK_VECTOR;
854 sint.masked = (*mask != 0);
855 sint.auto_eoi = hv_recommend_using_aeoi();
856
857 hv_set_msr(HV_MSR_SINT0 + VTL2_VMBUS_SINT_INDEX,
858 sint.as_uint64);
859
860 if (!sint.masked)
861 pr_debug("%s: Unmasking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
862 else
863 pr_debug("%s: Masking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
864 }
865
mshv_vtl_read_remote(void * buffer)866 static void mshv_vtl_read_remote(void *buffer)
867 {
868 struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
869 struct hv_message *msg = (struct hv_message *)mshv_cpu->hyp_synic_message_page +
870 VTL2_VMBUS_SINT_INDEX;
871 u32 message_type = READ_ONCE(msg->header.message_type);
872
873 WRITE_ONCE(has_message, false);
874 if (message_type == HVMSG_NONE)
875 return;
876
877 memcpy(buffer, msg, sizeof(*msg));
878 vmbus_signal_eom(msg, message_type);
879 }
880
881 static bool vtl_synic_mask_vmbus_sint_masked = true;
882
mshv_vtl_sint_read(struct file * filp,char __user * arg,size_t size,loff_t * offset)883 static ssize_t mshv_vtl_sint_read(struct file *filp, char __user *arg, size_t size, loff_t *offset)
884 {
885 struct hv_message msg = {};
886 int ret;
887
888 if (size < sizeof(msg))
889 return -EINVAL;
890
891 for (;;) {
892 smp_call_function_single(VMBUS_CONNECT_CPU, mshv_vtl_read_remote, &msg, true);
893 if (msg.header.message_type != HVMSG_NONE)
894 break;
895
896 if (READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
897 return 0; /* EOF */
898
899 if (filp->f_flags & O_NONBLOCK)
900 return -EAGAIN;
901
902 ret = wait_event_interruptible(fd_wait_queue,
903 READ_ONCE(has_message) ||
904 READ_ONCE(vtl_synic_mask_vmbus_sint_masked));
905 if (ret)
906 return ret;
907 }
908
909 if (copy_to_user(arg, &msg, sizeof(msg)))
910 return -EFAULT;
911
912 return sizeof(msg);
913 }
914
mshv_vtl_sint_poll(struct file * filp,poll_table * wait)915 static __poll_t mshv_vtl_sint_poll(struct file *filp, poll_table *wait)
916 {
917 __poll_t mask = 0;
918
919 poll_wait(filp, &fd_wait_queue, wait);
920 if (READ_ONCE(has_message) || READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
921 mask |= EPOLLIN | EPOLLRDNORM;
922
923 return mask;
924 }
925
mshv_vtl_sint_on_msg_dpc(unsigned long data)926 static void mshv_vtl_sint_on_msg_dpc(unsigned long data)
927 {
928 WRITE_ONCE(has_message, true);
929 wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
930 }
931
mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user * arg)932 static int mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user *arg)
933 {
934 struct mshv_vtl_sint_post_msg message;
935 u8 payload[HV_MESSAGE_PAYLOAD_BYTE_COUNT];
936
937 if (copy_from_user(&message, arg, sizeof(message)))
938 return -EFAULT;
939 if (message.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
940 return -EINVAL;
941 if (copy_from_user(payload, (void __user *)message.payload_ptr,
942 message.payload_size))
943 return -EFAULT;
944
945 return hv_post_message((union hv_connection_id)message.connection_id,
946 message.message_type, (void *)payload,
947 message.payload_size);
948 }
949
mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user * arg)950 static int mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user *arg)
951 {
952 u64 input, status;
953 struct mshv_vtl_signal_event signal_event;
954
955 if (copy_from_user(&signal_event, arg, sizeof(signal_event)))
956 return -EFAULT;
957
958 input = signal_event.connection_id | ((u64)signal_event.flag << 32);
959
960 status = hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, input);
961
962 return hv_result_to_errno(status);
963 }
964
mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user * arg)965 static int mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user *arg)
966 {
967 struct mshv_vtl_set_eventfd set_eventfd;
968 struct eventfd_ctx *eventfd, *old_eventfd;
969
970 if (copy_from_user(&set_eventfd, arg, sizeof(set_eventfd)))
971 return -EFAULT;
972 if (set_eventfd.flag >= HV_EVENT_FLAGS_COUNT)
973 return -EINVAL;
974
975 eventfd = NULL;
976 if (set_eventfd.fd >= 0) {
977 eventfd = eventfd_ctx_fdget(set_eventfd.fd);
978 if (IS_ERR(eventfd))
979 return PTR_ERR(eventfd);
980 }
981
982 guard(mutex)(&flag_lock);
983 old_eventfd = READ_ONCE(flag_eventfds[set_eventfd.flag]);
984 WRITE_ONCE(flag_eventfds[set_eventfd.flag], eventfd);
985
986 if (old_eventfd) {
987 synchronize_rcu();
988 eventfd_ctx_put(old_eventfd);
989 }
990
991 return 0;
992 }
993
mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user * arg)994 static int mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user *arg)
995 {
996 static DEFINE_MUTEX(vtl2_vmbus_sint_mask_mutex);
997 struct mshv_sint_mask mask;
998
999 if (copy_from_user(&mask, arg, sizeof(mask)))
1000 return -EFAULT;
1001 guard(mutex)(&vtl2_vmbus_sint_mask_mutex);
1002 on_each_cpu((smp_call_func_t)mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1);
1003 WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0);
1004 if (mask.mask)
1005 wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
1006
1007 return 0;
1008 }
1009
mshv_vtl_sint_ioctl(struct file * f,unsigned int cmd,unsigned long arg)1010 static long mshv_vtl_sint_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1011 {
1012 switch (cmd) {
1013 case MSHV_SINT_POST_MESSAGE:
1014 return mshv_vtl_sint_ioctl_post_msg((struct mshv_vtl_sint_post_msg __user *)arg);
1015 case MSHV_SINT_SIGNAL_EVENT:
1016 return mshv_vtl_sint_ioctl_signal_event((struct mshv_vtl_signal_event __user *)arg);
1017 case MSHV_SINT_SET_EVENTFD:
1018 return mshv_vtl_sint_ioctl_set_eventfd((struct mshv_vtl_set_eventfd __user *)arg);
1019 case MSHV_SINT_PAUSE_MESSAGE_STREAM:
1020 return mshv_vtl_sint_ioctl_pause_msg_stream((struct mshv_sint_mask __user *)arg);
1021 default:
1022 return -ENOIOCTLCMD;
1023 }
1024 }
1025
1026 static const struct file_operations mshv_vtl_sint_ops = {
1027 .owner = THIS_MODULE,
1028 .read = mshv_vtl_sint_read,
1029 .poll = mshv_vtl_sint_poll,
1030 .unlocked_ioctl = mshv_vtl_sint_ioctl,
1031 };
1032
1033 static struct miscdevice mshv_vtl_sint_dev = {
1034 .name = "mshv_sint",
1035 .fops = &mshv_vtl_sint_ops,
1036 .mode = 0600,
1037 .minor = MISC_DYNAMIC_MINOR,
1038 };
1039
mshv_vtl_hvcall_dev_open(struct inode * node,struct file * f)1040 static int mshv_vtl_hvcall_dev_open(struct inode *node, struct file *f)
1041 {
1042 struct miscdevice *dev = f->private_data;
1043 struct mshv_vtl_hvcall_fd *fd;
1044
1045 if (!capable(CAP_SYS_ADMIN))
1046 return -EPERM;
1047
1048 fd = vzalloc(sizeof(*fd));
1049 if (!fd)
1050 return -ENOMEM;
1051 fd->dev = dev;
1052 f->private_data = fd;
1053 mutex_init(&fd->init_mutex);
1054
1055 return 0;
1056 }
1057
mshv_vtl_hvcall_dev_release(struct inode * node,struct file * f)1058 static int mshv_vtl_hvcall_dev_release(struct inode *node, struct file *f)
1059 {
1060 struct mshv_vtl_hvcall_fd *fd;
1061
1062 fd = f->private_data;
1063 if (fd) {
1064 vfree(fd);
1065 f->private_data = NULL;
1066 }
1067
1068 return 0;
1069 }
1070
mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd * fd,struct mshv_vtl_hvcall_setup __user * hvcall_setup_user)1071 static int mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd *fd,
1072 struct mshv_vtl_hvcall_setup __user *hvcall_setup_user)
1073 {
1074 struct mshv_vtl_hvcall_setup hvcall_setup;
1075
1076 guard(mutex)(&fd->init_mutex);
1077
1078 if (fd->allow_map_initialized) {
1079 dev_err(fd->dev->this_device,
1080 "Hypercall allow map has already been set, pid %d\n",
1081 current->pid);
1082 return -EINVAL;
1083 }
1084
1085 if (copy_from_user(&hvcall_setup, hvcall_setup_user,
1086 sizeof(struct mshv_vtl_hvcall_setup))) {
1087 return -EFAULT;
1088 }
1089 if (hvcall_setup.bitmap_array_size > ARRAY_SIZE(fd->allow_bitmap))
1090 return -EINVAL;
1091
1092 if (copy_from_user(&fd->allow_bitmap,
1093 (void __user *)hvcall_setup.allow_bitmap_ptr,
1094 hvcall_setup.bitmap_array_size)) {
1095 return -EFAULT;
1096 }
1097
1098 dev_info(fd->dev->this_device, "Hypercall allow map has been set, pid %d\n",
1099 current->pid);
1100 fd->allow_map_initialized = true;
1101 return 0;
1102 }
1103
mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd * fd,u16 call_code)1104 static bool mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd *fd, u16 call_code)
1105 {
1106 return test_bit(call_code, (unsigned long *)fd->allow_bitmap);
1107 }
1108
mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd * fd,struct mshv_vtl_hvcall __user * hvcall_user)1109 static int mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd *fd,
1110 struct mshv_vtl_hvcall __user *hvcall_user)
1111 {
1112 struct mshv_vtl_hvcall hvcall;
1113 void *in, *out;
1114 int ret;
1115
1116 if (copy_from_user(&hvcall, hvcall_user, sizeof(struct mshv_vtl_hvcall)))
1117 return -EFAULT;
1118 if (hvcall.input_size > HV_HYP_PAGE_SIZE)
1119 return -EINVAL;
1120 if (hvcall.output_size > HV_HYP_PAGE_SIZE)
1121 return -EINVAL;
1122
1123 /*
1124 * By default, all hypercalls are not allowed.
1125 * The user mode code has to set up the allow bitmap once.
1126 */
1127
1128 if (!mshv_vtl_hvcall_is_allowed(fd, hvcall.control & 0xFFFF)) {
1129 dev_err(fd->dev->this_device,
1130 "Hypercall with control data %#llx isn't allowed\n",
1131 hvcall.control);
1132 return -EPERM;
1133 }
1134
1135 /*
1136 * This may create a problem for Confidential VM (CVM) usecase where we need to use
1137 * Hyper-V driver allocated per-cpu input and output pages (hyperv_pcpu_input_arg and
1138 * hyperv_pcpu_output_arg) for making a hypervisor call.
1139 *
1140 * TODO: Take care of this when CVM support is added.
1141 */
1142 in = (void *)__get_free_page(GFP_KERNEL);
1143 out = (void *)__get_free_page(GFP_KERNEL);
1144
1145 if (copy_from_user(in, (void __user *)hvcall.input_ptr, hvcall.input_size)) {
1146 ret = -EFAULT;
1147 goto free_pages;
1148 }
1149
1150 hvcall.status = hv_do_hypercall(hvcall.control, in, out);
1151
1152 if (copy_to_user((void __user *)hvcall.output_ptr, out, hvcall.output_size)) {
1153 ret = -EFAULT;
1154 goto free_pages;
1155 }
1156 ret = put_user(hvcall.status, &hvcall_user->status);
1157 free_pages:
1158 free_page((unsigned long)in);
1159 free_page((unsigned long)out);
1160
1161 return ret;
1162 }
1163
mshv_vtl_hvcall_dev_ioctl(struct file * f,unsigned int cmd,unsigned long arg)1164 static long mshv_vtl_hvcall_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1165 {
1166 struct mshv_vtl_hvcall_fd *fd = f->private_data;
1167
1168 switch (cmd) {
1169 case MSHV_HVCALL_SETUP:
1170 return mshv_vtl_hvcall_do_setup(fd, (struct mshv_vtl_hvcall_setup __user *)arg);
1171 case MSHV_HVCALL:
1172 return mshv_vtl_hvcall_call(fd, (struct mshv_vtl_hvcall __user *)arg);
1173 default:
1174 break;
1175 }
1176
1177 return -ENOIOCTLCMD;
1178 }
1179
1180 static const struct file_operations mshv_vtl_hvcall_dev_file_ops = {
1181 .owner = THIS_MODULE,
1182 .open = mshv_vtl_hvcall_dev_open,
1183 .release = mshv_vtl_hvcall_dev_release,
1184 .unlocked_ioctl = mshv_vtl_hvcall_dev_ioctl,
1185 };
1186
1187 static struct miscdevice mshv_vtl_hvcall_dev = {
1188 .name = "mshv_hvcall",
1189 .nodename = "mshv_hvcall",
1190 .fops = &mshv_vtl_hvcall_dev_file_ops,
1191 .mode = 0600,
1192 .minor = MISC_DYNAMIC_MINOR,
1193 };
1194
mshv_vtl_low_open(struct inode * inodep,struct file * filp)1195 static int mshv_vtl_low_open(struct inode *inodep, struct file *filp)
1196 {
1197 pid_t pid = task_pid_vnr(current);
1198 uid_t uid = current_uid().val;
1199 int ret = 0;
1200
1201 pr_debug("%s: Opening VTL low, task group %d, uid %d\n", __func__, pid, uid);
1202
1203 if (capable(CAP_SYS_ADMIN)) {
1204 filp->private_data = inodep;
1205 } else {
1206 pr_err("%s: VTL low open failed: CAP_SYS_ADMIN required. task group %d, uid %d",
1207 __func__, pid, uid);
1208 ret = -EPERM;
1209 }
1210
1211 return ret;
1212 }
1213
can_fault(struct vm_fault * vmf,unsigned long size,unsigned long * pfn)1214 static bool can_fault(struct vm_fault *vmf, unsigned long size, unsigned long *pfn)
1215 {
1216 unsigned long mask = size - 1;
1217 unsigned long start = vmf->address & ~mask;
1218 unsigned long end = start + size;
1219 bool is_valid;
1220
1221 is_valid = (vmf->address & mask) == ((vmf->pgoff << PAGE_SHIFT) & mask) &&
1222 start >= vmf->vma->vm_start &&
1223 end <= vmf->vma->vm_end;
1224
1225 if (is_valid)
1226 *pfn = vmf->pgoff & ~(mask >> PAGE_SHIFT);
1227
1228 return is_valid;
1229 }
1230
mshv_vtl_low_huge_fault(struct vm_fault * vmf,unsigned int order)1231 static vm_fault_t mshv_vtl_low_huge_fault(struct vm_fault *vmf, unsigned int order)
1232 {
1233 unsigned long pfn = vmf->pgoff;
1234 vm_fault_t ret = VM_FAULT_FALLBACK;
1235
1236 switch (order) {
1237 case 0:
1238 return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
1239
1240 case PMD_ORDER:
1241 if (can_fault(vmf, PMD_SIZE, &pfn))
1242 ret = vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
1243 return ret;
1244
1245 case PUD_ORDER:
1246 if (can_fault(vmf, PUD_SIZE, &pfn))
1247 ret = vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
1248 return ret;
1249
1250 default:
1251 return VM_FAULT_SIGBUS;
1252 }
1253 }
1254
mshv_vtl_low_fault(struct vm_fault * vmf)1255 static vm_fault_t mshv_vtl_low_fault(struct vm_fault *vmf)
1256 {
1257 return mshv_vtl_low_huge_fault(vmf, 0);
1258 }
1259
1260 static const struct vm_operations_struct mshv_vtl_low_vm_ops = {
1261 .fault = mshv_vtl_low_fault,
1262 .huge_fault = mshv_vtl_low_huge_fault,
1263 };
1264
mshv_vtl_low_mmap(struct file * filp,struct vm_area_struct * vma)1265 static int mshv_vtl_low_mmap(struct file *filp, struct vm_area_struct *vma)
1266 {
1267 vma->vm_ops = &mshv_vtl_low_vm_ops;
1268 vm_flags_set(vma, VM_HUGEPAGE | VM_MIXEDMAP);
1269
1270 return 0;
1271 }
1272
1273 static const struct file_operations mshv_vtl_low_file_ops = {
1274 .owner = THIS_MODULE,
1275 .open = mshv_vtl_low_open,
1276 .mmap = mshv_vtl_low_mmap,
1277 };
1278
1279 static struct miscdevice mshv_vtl_low = {
1280 .name = "mshv_vtl_low",
1281 .nodename = "mshv_vtl_low",
1282 .fops = &mshv_vtl_low_file_ops,
1283 .mode = 0600,
1284 .minor = MISC_DYNAMIC_MINOR,
1285 };
1286
mshv_vtl_init(void)1287 static int __init mshv_vtl_init(void)
1288 {
1289 int ret;
1290 struct device *dev = mshv_dev.this_device;
1291
1292 /*
1293 * This creates /dev/mshv which provides functionality to create VTLs and partitions.
1294 */
1295 ret = misc_register(&mshv_dev);
1296 if (ret) {
1297 dev_err(dev, "mshv device register failed: %d\n", ret);
1298 goto free_dev;
1299 }
1300
1301 tasklet_init(&msg_dpc, mshv_vtl_sint_on_msg_dpc, 0);
1302 init_waitqueue_head(&fd_wait_queue);
1303
1304 if (mshv_vtl_get_vsm_regs()) {
1305 dev_emerg(dev, "Unable to get VSM capabilities !!\n");
1306 ret = -ENODEV;
1307 goto free_dev;
1308 }
1309 if (mshv_vtl_configure_vsm_partition(dev)) {
1310 dev_emerg(dev, "VSM configuration failed !!\n");
1311 ret = -ENODEV;
1312 goto free_dev;
1313 }
1314
1315 mshv_vtl_return_call_init(mshv_vsm_page_offsets.vtl_return_offset);
1316 ret = hv_vtl_setup_synic();
1317 if (ret)
1318 goto free_dev;
1319
1320 /*
1321 * mshv_sint device adds VMBus relay ioctl support.
1322 * This provides a channel for VTL0 to communicate with VTL2.
1323 */
1324 ret = misc_register(&mshv_vtl_sint_dev);
1325 if (ret)
1326 goto free_synic;
1327
1328 /*
1329 * mshv_hvcall device adds interface to enable userspace for direct hypercalls support.
1330 */
1331 ret = misc_register(&mshv_vtl_hvcall_dev);
1332 if (ret)
1333 goto free_sint;
1334
1335 /*
1336 * mshv_vtl_low device is used to map VTL0 address space to a user-mode process in VTL2.
1337 * It implements mmap() to allow a user-mode process in VTL2 to map to the address of VTL0.
1338 */
1339 ret = misc_register(&mshv_vtl_low);
1340 if (ret)
1341 goto free_hvcall;
1342
1343 /*
1344 * "mshv vtl mem dev" device is later used to setup VTL0 memory.
1345 */
1346 mem_dev = kzalloc(sizeof(*mem_dev), GFP_KERNEL);
1347 if (!mem_dev) {
1348 ret = -ENOMEM;
1349 goto free_low;
1350 }
1351
1352 mutex_init(&mshv_vtl_poll_file_lock);
1353
1354 device_initialize(mem_dev);
1355 dev_set_name(mem_dev, "mshv vtl mem dev");
1356 ret = device_add(mem_dev);
1357 if (ret) {
1358 dev_err(dev, "mshv vtl mem dev add: %d\n", ret);
1359 goto free_mem;
1360 }
1361
1362 return 0;
1363
1364 free_mem:
1365 kfree(mem_dev);
1366 free_low:
1367 misc_deregister(&mshv_vtl_low);
1368 free_hvcall:
1369 misc_deregister(&mshv_vtl_hvcall_dev);
1370 free_sint:
1371 misc_deregister(&mshv_vtl_sint_dev);
1372 free_synic:
1373 hv_vtl_remove_synic();
1374 free_dev:
1375 misc_deregister(&mshv_dev);
1376
1377 return ret;
1378 }
1379
mshv_vtl_exit(void)1380 static void __exit mshv_vtl_exit(void)
1381 {
1382 device_del(mem_dev);
1383 kfree(mem_dev);
1384 misc_deregister(&mshv_vtl_low);
1385 misc_deregister(&mshv_vtl_hvcall_dev);
1386 misc_deregister(&mshv_vtl_sint_dev);
1387 hv_vtl_remove_synic();
1388 misc_deregister(&mshv_dev);
1389 }
1390
1391 module_init(mshv_vtl_init);
1392 module_exit(mshv_vtl_exit);
1393