1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2023, Microsoft Corporation.
4 *
5 * Author:
6 * Roman Kisel <romank@linux.microsoft.com>
7 * Saurabh Sengar <ssengar@linux.microsoft.com>
8 * Naman Jain <namjain@linux.microsoft.com>
9 */
10
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/miscdevice.h>
14 #include <linux/anon_inodes.h>
15 #include <linux/cpuhotplug.h>
16 #include <linux/count_zeros.h>
17 #include <linux/entry-virt.h>
18 #include <linux/eventfd.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
21 #include <linux/vmalloc.h>
22 #include <asm/debugreg.h>
23 #include <asm/mshyperv.h>
24 #include <trace/events/ipi.h>
25 #include <uapi/asm/mtrr.h>
26 #include <uapi/linux/mshv.h>
27 #include <hyperv/hvhdk.h>
28
29 #include "../../kernel/fpu/legacy.h"
30 #include "mshv.h"
31 #include "mshv_vtl.h"
32 #include "hyperv_vmbus.h"
33
34 MODULE_AUTHOR("Microsoft");
35 MODULE_LICENSE("GPL");
36 MODULE_DESCRIPTION("Microsoft Hyper-V VTL Driver");
37
38 #define MSHV_ENTRY_REASON_LOWER_VTL_CALL 0x1
39 #define MSHV_ENTRY_REASON_INTERRUPT 0x2
40 #define MSHV_ENTRY_REASON_INTERCEPT 0x3
41
42 #define MSHV_REAL_OFF_SHIFT 16
43 #define MSHV_PG_OFF_CPU_MASK (BIT_ULL(MSHV_REAL_OFF_SHIFT) - 1)
44 #define MSHV_RUN_PAGE_OFFSET 0
45 #define MSHV_REG_PAGE_OFFSET 1
46 #define VTL2_VMBUS_SINT_INDEX 7
47
48 static struct device *mem_dev;
49
50 static struct tasklet_struct msg_dpc;
51 static wait_queue_head_t fd_wait_queue;
52 static bool has_message;
53 static struct eventfd_ctx *flag_eventfds[HV_EVENT_FLAGS_COUNT];
54 static DEFINE_MUTEX(flag_lock);
55 static bool __read_mostly mshv_has_reg_page;
56
57 /* hvcall code is of type u16, allocate a bitmap of size (1 << 16) to accommodate it */
58 #define MAX_BITMAP_SIZE ((U16_MAX + 1) / 8)
59
60 struct mshv_vtl_hvcall_fd {
61 u8 allow_bitmap[MAX_BITMAP_SIZE];
62 bool allow_map_initialized;
63 /*
64 * Used to protect hvcall setup in IOCTLs
65 */
66 struct mutex init_mutex;
67 struct miscdevice *dev;
68 };
69
70 struct mshv_vtl_poll_file {
71 struct file *file;
72 wait_queue_entry_t wait;
73 wait_queue_head_t *wqh;
74 poll_table pt;
75 int cpu;
76 };
77
78 struct mshv_vtl {
79 struct device *module_dev;
80 u64 id;
81 };
82
83 struct mshv_vtl_per_cpu {
84 struct mshv_vtl_run *run;
85 struct page *reg_page;
86 };
87
88 /* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */
89 union hv_synic_overlay_page_msr {
90 u64 as_uint64;
91 struct {
92 u64 enabled: 1;
93 u64 reserved: 11;
94 u64 pfn: 52;
95 } __packed;
96 };
97
98 static struct mutex mshv_vtl_poll_file_lock;
99 static union hv_register_vsm_page_offsets mshv_vsm_page_offsets;
100 static union hv_register_vsm_capabilities mshv_vsm_capabilities;
101
102 static DEFINE_PER_CPU(struct mshv_vtl_poll_file, mshv_vtl_poll_file);
103 static DEFINE_PER_CPU(unsigned long long, num_vtl0_transitions);
104 static DEFINE_PER_CPU(struct mshv_vtl_per_cpu, mshv_vtl_per_cpu);
105
106 static const union hv_input_vtl input_vtl_zero;
107 static const union hv_input_vtl input_vtl_normal = {
108 .use_target_vtl = 1,
109 };
110
111 static const struct file_operations mshv_vtl_fops;
112
113 static long
mshv_ioctl_create_vtl(void __user * user_arg,struct device * module_dev)114 mshv_ioctl_create_vtl(void __user *user_arg, struct device *module_dev)
115 {
116 struct mshv_vtl *vtl;
117 struct file *file;
118 int fd;
119
120 vtl = kzalloc_obj(*vtl);
121 if (!vtl)
122 return -ENOMEM;
123
124 fd = get_unused_fd_flags(O_CLOEXEC);
125 if (fd < 0) {
126 kfree(vtl);
127 return fd;
128 }
129 file = anon_inode_getfile("mshv_vtl", &mshv_vtl_fops,
130 vtl, O_RDWR);
131 if (IS_ERR(file)) {
132 kfree(vtl);
133 return PTR_ERR(file);
134 }
135 vtl->module_dev = module_dev;
136 fd_install(fd, file);
137
138 return fd;
139 }
140
141 static long
mshv_ioctl_check_extension(void __user * user_arg)142 mshv_ioctl_check_extension(void __user *user_arg)
143 {
144 u32 arg;
145
146 if (copy_from_user(&arg, user_arg, sizeof(arg)))
147 return -EFAULT;
148
149 switch (arg) {
150 case MSHV_CAP_CORE_API_STABLE:
151 return 0;
152 case MSHV_CAP_REGISTER_PAGE:
153 return mshv_has_reg_page;
154 case MSHV_CAP_VTL_RETURN_ACTION:
155 return mshv_vsm_capabilities.return_action_available;
156 case MSHV_CAP_DR6_SHARED:
157 return mshv_vsm_capabilities.dr6_shared;
158 }
159
160 return -EOPNOTSUPP;
161 }
162
163 static long
mshv_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)164 mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
165 {
166 struct miscdevice *misc = filp->private_data;
167
168 switch (ioctl) {
169 case MSHV_CHECK_EXTENSION:
170 return mshv_ioctl_check_extension((void __user *)arg);
171 case MSHV_CREATE_VTL:
172 return mshv_ioctl_create_vtl((void __user *)arg, misc->this_device);
173 }
174
175 return -ENOTTY;
176 }
177
178 static const struct file_operations mshv_dev_fops = {
179 .owner = THIS_MODULE,
180 .unlocked_ioctl = mshv_dev_ioctl,
181 .llseek = noop_llseek,
182 };
183
184 static struct miscdevice mshv_dev = {
185 .minor = MISC_DYNAMIC_MINOR,
186 .name = "mshv",
187 .fops = &mshv_dev_fops,
188 .mode = 0600,
189 };
190
mshv_vtl_this_run(void)191 static struct mshv_vtl_run *mshv_vtl_this_run(void)
192 {
193 return *this_cpu_ptr(&mshv_vtl_per_cpu.run);
194 }
195
mshv_vtl_cpu_run(int cpu)196 static struct mshv_vtl_run *mshv_vtl_cpu_run(int cpu)
197 {
198 return *per_cpu_ptr(&mshv_vtl_per_cpu.run, cpu);
199 }
200
mshv_vtl_cpu_reg_page(int cpu)201 static struct page *mshv_vtl_cpu_reg_page(int cpu)
202 {
203 return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu);
204 }
205
mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu * per_cpu)206 static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
207 {
208 struct hv_register_assoc reg_assoc = {};
209 union hv_synic_overlay_page_msr overlay = {};
210 struct page *reg_page;
211
212 reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL);
213 if (!reg_page) {
214 WARN(1, "failed to allocate register page\n");
215 return;
216 }
217
218 overlay.enabled = 1;
219 overlay.pfn = page_to_hvpfn(reg_page);
220 reg_assoc.name = HV_X64_REGISTER_REG_PAGE;
221 reg_assoc.value.reg64 = overlay.as_uint64;
222
223 if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
224 1, input_vtl_zero, ®_assoc)) {
225 WARN(1, "failed to setup register page\n");
226 __free_page(reg_page);
227 return;
228 }
229
230 per_cpu->reg_page = reg_page;
231 mshv_has_reg_page = true;
232 }
233
mshv_vtl_synic_enable_regs(unsigned int cpu)234 static void mshv_vtl_synic_enable_regs(unsigned int cpu)
235 {
236 union hv_synic_sint sint;
237
238 sint.as_uint64 = 0;
239 sint.vector = HYPERVISOR_CALLBACK_VECTOR;
240 sint.masked = false;
241 sint.auto_eoi = hv_recommend_using_aeoi();
242
243 /* Enable intercepts */
244 if (!mshv_vsm_capabilities.intercept_page_available)
245 hv_set_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX,
246 sint.as_uint64);
247
248 /* VTL2 Host VSP SINT is (un)masked when the user mode requests that */
249 }
250
mshv_vtl_get_vsm_regs(void)251 static int mshv_vtl_get_vsm_regs(void)
252 {
253 struct hv_register_assoc registers[2];
254 int ret, count = 2;
255
256 registers[0].name = HV_REGISTER_VSM_CODE_PAGE_OFFSETS;
257 registers[1].name = HV_REGISTER_VSM_CAPABILITIES;
258
259 ret = hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
260 count, input_vtl_zero, registers);
261 if (ret)
262 return ret;
263
264 mshv_vsm_page_offsets.as_uint64 = registers[0].value.reg64;
265 mshv_vsm_capabilities.as_uint64 = registers[1].value.reg64;
266
267 return ret;
268 }
269
mshv_vtl_configure_vsm_partition(struct device * dev)270 static int mshv_vtl_configure_vsm_partition(struct device *dev)
271 {
272 union hv_register_vsm_partition_config config;
273 struct hv_register_assoc reg_assoc;
274
275 config.as_uint64 = 0;
276 config.default_vtl_protection_mask = HV_MAP_GPA_PERMISSIONS_MASK;
277 config.enable_vtl_protection = 1;
278 config.zero_memory_on_reset = 1;
279 config.intercept_vp_startup = 1;
280 config.intercept_cpuid_unimplemented = 1;
281
282 if (mshv_vsm_capabilities.intercept_page_available) {
283 dev_dbg(dev, "using intercept page\n");
284 config.intercept_page = 1;
285 }
286
287 reg_assoc.name = HV_REGISTER_VSM_PARTITION_CONFIG;
288 reg_assoc.value.reg64 = config.as_uint64;
289
290 return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
291 1, input_vtl_zero, ®_assoc);
292 }
293
mshv_vtl_vmbus_isr(void)294 static void mshv_vtl_vmbus_isr(void)
295 {
296 struct hv_per_cpu_context *per_cpu;
297 struct hv_message *msg;
298 u32 message_type;
299 union hv_synic_event_flags *event_flags;
300 struct eventfd_ctx *eventfd;
301 u16 i;
302
303 per_cpu = this_cpu_ptr(hv_context.cpu_context);
304 if (smp_processor_id() == 0) {
305 msg = (struct hv_message *)per_cpu->hyp_synic_message_page + VTL2_VMBUS_SINT_INDEX;
306 message_type = READ_ONCE(msg->header.message_type);
307 if (message_type != HVMSG_NONE)
308 tasklet_schedule(&msg_dpc);
309 }
310
311 event_flags = (union hv_synic_event_flags *)per_cpu->hyp_synic_event_page +
312 VTL2_VMBUS_SINT_INDEX;
313 for_each_set_bit(i, event_flags->flags, HV_EVENT_FLAGS_COUNT) {
314 if (!sync_test_and_clear_bit(i, event_flags->flags))
315 continue;
316 rcu_read_lock();
317 eventfd = READ_ONCE(flag_eventfds[i]);
318 if (eventfd)
319 eventfd_signal(eventfd);
320 rcu_read_unlock();
321 }
322
323 vmbus_isr();
324 }
325
mshv_vtl_alloc_context(unsigned int cpu)326 static int mshv_vtl_alloc_context(unsigned int cpu)
327 {
328 struct mshv_vtl_per_cpu *per_cpu = this_cpu_ptr(&mshv_vtl_per_cpu);
329
330 per_cpu->run = (struct mshv_vtl_run *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
331 if (!per_cpu->run)
332 return -ENOMEM;
333
334 if (mshv_vsm_capabilities.intercept_page_available)
335 mshv_vtl_configure_reg_page(per_cpu);
336
337 mshv_vtl_synic_enable_regs(cpu);
338
339 return 0;
340 }
341
342 static int mshv_vtl_cpuhp_online;
343
hv_vtl_setup_synic(void)344 static int hv_vtl_setup_synic(void)
345 {
346 int ret;
347
348 /* Use our isr to first filter out packets destined for userspace */
349 hv_setup_vmbus_handler(mshv_vtl_vmbus_isr);
350
351 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vtl:online",
352 mshv_vtl_alloc_context, NULL);
353 if (ret < 0) {
354 hv_setup_vmbus_handler(vmbus_isr);
355 return ret;
356 }
357
358 mshv_vtl_cpuhp_online = ret;
359
360 return 0;
361 }
362
hv_vtl_remove_synic(void)363 static void hv_vtl_remove_synic(void)
364 {
365 cpuhp_remove_state(mshv_vtl_cpuhp_online);
366 hv_setup_vmbus_handler(vmbus_isr);
367 }
368
vtl_get_vp_register(struct hv_register_assoc * reg)369 static int vtl_get_vp_register(struct hv_register_assoc *reg)
370 {
371 return hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
372 1, input_vtl_normal, reg);
373 }
374
vtl_set_vp_register(struct hv_register_assoc * reg)375 static int vtl_set_vp_register(struct hv_register_assoc *reg)
376 {
377 return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
378 1, input_vtl_normal, reg);
379 }
380
mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl * vtl,void __user * arg)381 static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl *vtl, void __user *arg)
382 {
383 struct mshv_vtl_ram_disposition vtl0_mem;
384 struct dev_pagemap *pgmap;
385 void *addr;
386
387 if (copy_from_user(&vtl0_mem, arg, sizeof(vtl0_mem)))
388 return -EFAULT;
389 if (vtl0_mem.last_pfn <= vtl0_mem.start_pfn) {
390 dev_err(vtl->module_dev, "range start pfn (%llx) > end pfn (%llx)\n",
391 vtl0_mem.start_pfn, vtl0_mem.last_pfn);
392 return -EFAULT;
393 }
394
395 pgmap = kzalloc_obj(*pgmap);
396 if (!pgmap)
397 return -ENOMEM;
398
399 /*
400 * vtl0_mem.last_pfn is excluded in the pagemap range for VTL0 as per design.
401 * last_pfn is not reserved or wasted, and reflects 'start_pfn + size' of pagemap range.
402 */
403 pgmap->ranges[0].start = PFN_PHYS(vtl0_mem.start_pfn);
404 pgmap->ranges[0].end = PFN_PHYS(vtl0_mem.last_pfn) - 1;
405 pgmap->nr_range = 1;
406 pgmap->type = MEMORY_DEVICE_GENERIC;
407
408 /*
409 * Determine the highest page order that can be used for the given memory range.
410 * This works best when the range is aligned; i.e. both the start and the length.
411 * Clamp to MAX_FOLIO_ORDER to avoid a WARN in memremap_pages() when the range
412 * alignment exceeds the maximum supported folio order for this kernel config.
413 */
414 pgmap->vmemmap_shift = min(count_trailing_zeros(vtl0_mem.start_pfn | vtl0_mem.last_pfn),
415 MAX_FOLIO_ORDER);
416 dev_dbg(vtl->module_dev,
417 "Add VTL0 memory: start: 0x%llx, end_pfn: 0x%llx, page order: %lu\n",
418 vtl0_mem.start_pfn, vtl0_mem.last_pfn, pgmap->vmemmap_shift);
419
420 addr = devm_memremap_pages(mem_dev, pgmap);
421 if (IS_ERR(addr)) {
422 dev_err(vtl->module_dev, "devm_memremap_pages error: %ld\n", PTR_ERR(addr));
423 kfree(pgmap);
424 return PTR_ERR(addr);
425 }
426
427 /* Don't free pgmap, since it has to stick around until the memory
428 * is unmapped, which will never happen as there is no scenario
429 * where VTL0 can be released/shutdown without bringing down VTL2.
430 */
431 return 0;
432 }
433
mshv_vtl_cancel(int cpu)434 static void mshv_vtl_cancel(int cpu)
435 {
436 int here = get_cpu();
437
438 if (here != cpu) {
439 if (!xchg_relaxed(&mshv_vtl_cpu_run(cpu)->cancel, 1))
440 smp_send_reschedule(cpu);
441 } else {
442 WRITE_ONCE(mshv_vtl_this_run()->cancel, 1);
443 }
444 put_cpu();
445 }
446
mshv_vtl_poll_file_wake(wait_queue_entry_t * wait,unsigned int mode,int sync,void * key)447 static int mshv_vtl_poll_file_wake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
448 {
449 struct mshv_vtl_poll_file *poll_file = container_of(wait, struct mshv_vtl_poll_file, wait);
450
451 mshv_vtl_cancel(poll_file->cpu);
452
453 return 0;
454 }
455
mshv_vtl_ptable_queue_proc(struct file * file,wait_queue_head_t * wqh,poll_table * pt)456 static void mshv_vtl_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
457 {
458 struct mshv_vtl_poll_file *poll_file = container_of(pt, struct mshv_vtl_poll_file, pt);
459
460 WARN_ON(poll_file->wqh);
461 poll_file->wqh = wqh;
462 add_wait_queue(wqh, &poll_file->wait);
463 }
464
mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user * user_input)465 static int mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user *user_input)
466 {
467 struct file *file, *old_file;
468 struct mshv_vtl_poll_file *poll_file;
469 struct mshv_vtl_set_poll_file input;
470
471 if (copy_from_user(&input, user_input, sizeof(input)))
472 return -EFAULT;
473
474 if (input.cpu >= num_possible_cpus() || !cpu_online(input.cpu))
475 return -EINVAL;
476 /*
477 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
478 * CPU is expected to remain online after above cpu_online() check.
479 */
480
481 file = NULL;
482 file = fget(input.fd);
483 if (!file)
484 return -EBADFD;
485
486 poll_file = per_cpu_ptr(&mshv_vtl_poll_file, READ_ONCE(input.cpu));
487 if (!poll_file)
488 return -EINVAL;
489
490 mutex_lock(&mshv_vtl_poll_file_lock);
491
492 if (poll_file->wqh)
493 remove_wait_queue(poll_file->wqh, &poll_file->wait);
494 poll_file->wqh = NULL;
495
496 old_file = poll_file->file;
497 poll_file->file = file;
498 poll_file->cpu = input.cpu;
499
500 if (file) {
501 init_waitqueue_func_entry(&poll_file->wait, mshv_vtl_poll_file_wake);
502 init_poll_funcptr(&poll_file->pt, mshv_vtl_ptable_queue_proc);
503 vfs_poll(file, &poll_file->pt);
504 }
505
506 mutex_unlock(&mshv_vtl_poll_file_lock);
507
508 if (old_file)
509 fput(old_file);
510
511 return 0;
512 }
513
514 /* Static table mapping register names to their corresponding actions */
515 static const struct {
516 enum hv_register_name reg_name;
517 int debug_reg_num; /* -1 if not a debug register */
518 u32 msr_addr; /* 0 if not an MSR */
519 } reg_table[] = {
520 /* Debug registers */
521 {HV_X64_REGISTER_DR0, 0, 0},
522 {HV_X64_REGISTER_DR1, 1, 0},
523 {HV_X64_REGISTER_DR2, 2, 0},
524 {HV_X64_REGISTER_DR3, 3, 0},
525 {HV_X64_REGISTER_DR6, 6, 0},
526 /* MTRR MSRs */
527 {HV_X64_REGISTER_MSR_MTRR_CAP, -1, MSR_MTRRcap},
528 {HV_X64_REGISTER_MSR_MTRR_DEF_TYPE, -1, MSR_MTRRdefType},
529 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0, -1, MTRRphysBase_MSR(0)},
530 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1, -1, MTRRphysBase_MSR(1)},
531 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2, -1, MTRRphysBase_MSR(2)},
532 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3, -1, MTRRphysBase_MSR(3)},
533 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4, -1, MTRRphysBase_MSR(4)},
534 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5, -1, MTRRphysBase_MSR(5)},
535 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6, -1, MTRRphysBase_MSR(6)},
536 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7, -1, MTRRphysBase_MSR(7)},
537 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8, -1, MTRRphysBase_MSR(8)},
538 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9, -1, MTRRphysBase_MSR(9)},
539 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA, -1, MTRRphysBase_MSR(0xa)},
540 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB, -1, MTRRphysBase_MSR(0xb)},
541 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC, -1, MTRRphysBase_MSR(0xc)},
542 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASED, -1, MTRRphysBase_MSR(0xd)},
543 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE, -1, MTRRphysBase_MSR(0xe)},
544 {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF, -1, MTRRphysBase_MSR(0xf)},
545 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0, -1, MTRRphysMask_MSR(0)},
546 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1, -1, MTRRphysMask_MSR(1)},
547 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2, -1, MTRRphysMask_MSR(2)},
548 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3, -1, MTRRphysMask_MSR(3)},
549 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4, -1, MTRRphysMask_MSR(4)},
550 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5, -1, MTRRphysMask_MSR(5)},
551 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6, -1, MTRRphysMask_MSR(6)},
552 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7, -1, MTRRphysMask_MSR(7)},
553 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8, -1, MTRRphysMask_MSR(8)},
554 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9, -1, MTRRphysMask_MSR(9)},
555 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA, -1, MTRRphysMask_MSR(0xa)},
556 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB, -1, MTRRphysMask_MSR(0xb)},
557 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC, -1, MTRRphysMask_MSR(0xc)},
558 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD, -1, MTRRphysMask_MSR(0xd)},
559 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE, -1, MTRRphysMask_MSR(0xe)},
560 {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF, -1, MTRRphysMask_MSR(0xf)},
561 {HV_X64_REGISTER_MSR_MTRR_FIX64K00000, -1, MSR_MTRRfix64K_00000},
562 {HV_X64_REGISTER_MSR_MTRR_FIX16K80000, -1, MSR_MTRRfix16K_80000},
563 {HV_X64_REGISTER_MSR_MTRR_FIX16KA0000, -1, MSR_MTRRfix16K_A0000},
564 {HV_X64_REGISTER_MSR_MTRR_FIX4KC0000, -1, MSR_MTRRfix4K_C0000},
565 {HV_X64_REGISTER_MSR_MTRR_FIX4KC8000, -1, MSR_MTRRfix4K_C8000},
566 {HV_X64_REGISTER_MSR_MTRR_FIX4KD0000, -1, MSR_MTRRfix4K_D0000},
567 {HV_X64_REGISTER_MSR_MTRR_FIX4KD8000, -1, MSR_MTRRfix4K_D8000},
568 {HV_X64_REGISTER_MSR_MTRR_FIX4KE0000, -1, MSR_MTRRfix4K_E0000},
569 {HV_X64_REGISTER_MSR_MTRR_FIX4KE8000, -1, MSR_MTRRfix4K_E8000},
570 {HV_X64_REGISTER_MSR_MTRR_FIX4KF0000, -1, MSR_MTRRfix4K_F0000},
571 {HV_X64_REGISTER_MSR_MTRR_FIX4KF8000, -1, MSR_MTRRfix4K_F8000},
572 };
573
mshv_vtl_get_set_reg(struct hv_register_assoc * regs,bool set)574 static int mshv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set)
575 {
576 u64 *reg64;
577 enum hv_register_name gpr_name;
578 int i;
579
580 gpr_name = regs->name;
581 reg64 = ®s->value.reg64;
582
583 /* Search for the register in the table */
584 for (i = 0; i < ARRAY_SIZE(reg_table); i++) {
585 if (reg_table[i].reg_name != gpr_name)
586 continue;
587 if (reg_table[i].debug_reg_num != -1) {
588 /* Handle debug registers */
589 if (gpr_name == HV_X64_REGISTER_DR6 &&
590 !mshv_vsm_capabilities.dr6_shared)
591 goto hypercall;
592 if (set)
593 native_set_debugreg(reg_table[i].debug_reg_num, *reg64);
594 else
595 *reg64 = native_get_debugreg(reg_table[i].debug_reg_num);
596 } else {
597 /* Handle MSRs */
598 if (set)
599 wrmsrl(reg_table[i].msr_addr, *reg64);
600 else
601 rdmsrl(reg_table[i].msr_addr, *reg64);
602 }
603 return 0;
604 }
605
606 hypercall:
607 return 1;
608 }
609
mshv_vtl_return(struct mshv_vtl_cpu_context * vtl0)610 static void mshv_vtl_return(struct mshv_vtl_cpu_context *vtl0)
611 {
612 struct hv_vp_assist_page *hvp;
613
614 hvp = hv_vp_assist_page[smp_processor_id()];
615
616 /*
617 * Process signal event direct set in the run page, if any.
618 */
619 if (mshv_vsm_capabilities.return_action_available) {
620 u32 offset = READ_ONCE(mshv_vtl_this_run()->vtl_ret_action_size);
621
622 WRITE_ONCE(mshv_vtl_this_run()->vtl_ret_action_size, 0);
623
624 /*
625 * Hypervisor will take care of clearing out the actions
626 * set in the assist page.
627 */
628 memcpy(hvp->vtl_ret_actions,
629 mshv_vtl_this_run()->vtl_ret_actions,
630 min_t(u32, offset, sizeof(hvp->vtl_ret_actions)));
631 }
632
633 mshv_vtl_return_call(vtl0);
634 }
635
mshv_vtl_process_intercept(void)636 static bool mshv_vtl_process_intercept(void)
637 {
638 struct hv_per_cpu_context *mshv_cpu;
639 void *synic_message_page;
640 struct hv_message *msg;
641 u32 message_type;
642
643 mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
644 synic_message_page = mshv_cpu->hyp_synic_message_page;
645 if (unlikely(!synic_message_page))
646 return true;
647
648 msg = (struct hv_message *)synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX;
649 message_type = READ_ONCE(msg->header.message_type);
650 if (message_type == HVMSG_NONE)
651 return true;
652
653 memcpy(mshv_vtl_this_run()->exit_message, msg, sizeof(*msg));
654 vmbus_signal_eom(msg, message_type);
655
656 return false;
657 }
658
mshv_vtl_ioctl_return_to_lower_vtl(void)659 static int mshv_vtl_ioctl_return_to_lower_vtl(void)
660 {
661 preempt_disable();
662 for (;;) {
663 unsigned long irq_flags;
664 struct hv_vp_assist_page *hvp;
665 int ret;
666
667 if (__xfer_to_guest_mode_work_pending()) {
668 preempt_enable();
669 ret = xfer_to_guest_mode_handle_work();
670 if (ret)
671 return ret;
672 preempt_disable();
673 }
674
675 local_irq_save(irq_flags);
676 if (READ_ONCE(mshv_vtl_this_run()->cancel)) {
677 local_irq_restore(irq_flags);
678 preempt_enable();
679 return -EINTR;
680 }
681
682 mshv_vtl_return(&mshv_vtl_this_run()->cpu_context);
683 local_irq_restore(irq_flags);
684
685 hvp = hv_vp_assist_page[smp_processor_id()];
686 this_cpu_inc(num_vtl0_transitions);
687 switch (hvp->vtl_entry_reason) {
688 case MSHV_ENTRY_REASON_INTERRUPT:
689 if (!mshv_vsm_capabilities.intercept_page_available &&
690 likely(!mshv_vtl_process_intercept()))
691 goto done;
692 break;
693
694 case MSHV_ENTRY_REASON_INTERCEPT:
695 WARN_ON(!mshv_vsm_capabilities.intercept_page_available);
696 memcpy(mshv_vtl_this_run()->exit_message, hvp->intercept_message,
697 sizeof(hvp->intercept_message));
698 goto done;
699
700 default:
701 panic("unknown entry reason: %d", hvp->vtl_entry_reason);
702 }
703 }
704
705 done:
706 preempt_enable();
707
708 return 0;
709 }
710
711 static long
mshv_vtl_ioctl_get_regs(void __user * user_args)712 mshv_vtl_ioctl_get_regs(void __user *user_args)
713 {
714 struct mshv_vp_registers args;
715 struct hv_register_assoc reg;
716 long ret;
717
718 if (copy_from_user(&args, user_args, sizeof(args)))
719 return -EFAULT;
720
721 /* This IOCTL supports processing only one register at a time. */
722 if (args.count != 1)
723 return -EINVAL;
724
725 if (copy_from_user(®, (void __user *)args.regs_ptr,
726 sizeof(reg)))
727 return -EFAULT;
728
729 ret = mshv_vtl_get_set_reg(®, false);
730 if (!ret)
731 goto copy_args; /* No need of hypercall */
732 ret = vtl_get_vp_register(®);
733 if (ret)
734 return ret;
735
736 copy_args:
737 if (copy_to_user((void __user *)args.regs_ptr, ®, sizeof(reg)))
738 ret = -EFAULT;
739
740 return ret;
741 }
742
743 static long
mshv_vtl_ioctl_set_regs(void __user * user_args)744 mshv_vtl_ioctl_set_regs(void __user *user_args)
745 {
746 struct mshv_vp_registers args;
747 struct hv_register_assoc reg;
748 long ret;
749
750 if (copy_from_user(&args, user_args, sizeof(args)))
751 return -EFAULT;
752
753 /* This IOCTL supports processing only one register at a time. */
754 if (args.count != 1)
755 return -EINVAL;
756
757 if (copy_from_user(®, (void __user *)args.regs_ptr, sizeof(reg)))
758 return -EFAULT;
759
760 ret = mshv_vtl_get_set_reg(®, true);
761 if (!ret)
762 return ret; /* No need of hypercall */
763 ret = vtl_set_vp_register(®);
764
765 return ret;
766 }
767
768 static long
mshv_vtl_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)769 mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
770 {
771 long ret;
772 struct mshv_vtl *vtl = filp->private_data;
773
774 switch (ioctl) {
775 case MSHV_SET_POLL_FILE:
776 ret = mshv_vtl_ioctl_set_poll_file((struct mshv_vtl_set_poll_file __user *)arg);
777 break;
778 case MSHV_GET_VP_REGISTERS:
779 ret = mshv_vtl_ioctl_get_regs((void __user *)arg);
780 break;
781 case MSHV_SET_VP_REGISTERS:
782 ret = mshv_vtl_ioctl_set_regs((void __user *)arg);
783 break;
784 case MSHV_RETURN_TO_LOWER_VTL:
785 ret = mshv_vtl_ioctl_return_to_lower_vtl();
786 break;
787 case MSHV_ADD_VTL0_MEMORY:
788 ret = mshv_vtl_ioctl_add_vtl0_mem(vtl, (void __user *)arg);
789 break;
790 default:
791 dev_err(vtl->module_dev, "invalid vtl ioctl: %#x\n", ioctl);
792 ret = -ENOTTY;
793 }
794
795 return ret;
796 }
797
mshv_vtl_fault(struct vm_fault * vmf)798 static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf)
799 {
800 struct page *page;
801 int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK;
802 int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT;
803
804 if (!cpu_online(cpu))
805 return VM_FAULT_SIGBUS;
806 /*
807 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
808 * CPU is expected to remain online after above cpu_online() check.
809 */
810
811 if (real_off == MSHV_RUN_PAGE_OFFSET) {
812 page = virt_to_page(mshv_vtl_cpu_run(cpu));
813 } else if (real_off == MSHV_REG_PAGE_OFFSET) {
814 if (!mshv_has_reg_page)
815 return VM_FAULT_SIGBUS;
816 page = mshv_vtl_cpu_reg_page(cpu);
817 } else {
818 return VM_FAULT_NOPAGE;
819 }
820
821 get_page(page);
822 vmf->page = page;
823
824 return 0;
825 }
826
827 static const struct vm_operations_struct mshv_vtl_vm_ops = {
828 .fault = mshv_vtl_fault,
829 };
830
mshv_vtl_mmap(struct file * filp,struct vm_area_struct * vma)831 static int mshv_vtl_mmap(struct file *filp, struct vm_area_struct *vma)
832 {
833 vma->vm_ops = &mshv_vtl_vm_ops;
834
835 return 0;
836 }
837
mshv_vtl_release(struct inode * inode,struct file * filp)838 static int mshv_vtl_release(struct inode *inode, struct file *filp)
839 {
840 struct mshv_vtl *vtl = filp->private_data;
841
842 kfree(vtl);
843
844 return 0;
845 }
846
847 static const struct file_operations mshv_vtl_fops = {
848 .owner = THIS_MODULE,
849 .unlocked_ioctl = mshv_vtl_ioctl,
850 .release = mshv_vtl_release,
851 .mmap = mshv_vtl_mmap,
852 };
853
mshv_vtl_synic_mask_vmbus_sint(void * info)854 static void mshv_vtl_synic_mask_vmbus_sint(void *info)
855 {
856 union hv_synic_sint sint;
857 const u8 *mask = info;
858
859 sint.as_uint64 = 0;
860 sint.vector = HYPERVISOR_CALLBACK_VECTOR;
861 sint.masked = (*mask != 0);
862 sint.auto_eoi = hv_recommend_using_aeoi();
863
864 hv_set_msr(HV_MSR_SINT0 + VTL2_VMBUS_SINT_INDEX,
865 sint.as_uint64);
866
867 if (!sint.masked)
868 pr_debug("%s: Unmasking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
869 else
870 pr_debug("%s: Masking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
871 }
872
mshv_vtl_read_remote(void * buffer)873 static void mshv_vtl_read_remote(void *buffer)
874 {
875 struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
876 struct hv_message *msg = (struct hv_message *)mshv_cpu->hyp_synic_message_page +
877 VTL2_VMBUS_SINT_INDEX;
878 u32 message_type = READ_ONCE(msg->header.message_type);
879
880 WRITE_ONCE(has_message, false);
881 if (message_type == HVMSG_NONE)
882 return;
883
884 memcpy(buffer, msg, sizeof(*msg));
885 vmbus_signal_eom(msg, message_type);
886 }
887
888 static bool vtl_synic_mask_vmbus_sint_masked = true;
889
mshv_vtl_sint_read(struct file * filp,char __user * arg,size_t size,loff_t * offset)890 static ssize_t mshv_vtl_sint_read(struct file *filp, char __user *arg, size_t size, loff_t *offset)
891 {
892 struct hv_message msg = {};
893 int ret;
894
895 if (size < sizeof(msg))
896 return -EINVAL;
897
898 for (;;) {
899 smp_call_function_single(VMBUS_CONNECT_CPU, mshv_vtl_read_remote, &msg, true);
900 if (msg.header.message_type != HVMSG_NONE)
901 break;
902
903 if (READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
904 return 0; /* EOF */
905
906 if (filp->f_flags & O_NONBLOCK)
907 return -EAGAIN;
908
909 ret = wait_event_interruptible(fd_wait_queue,
910 READ_ONCE(has_message) ||
911 READ_ONCE(vtl_synic_mask_vmbus_sint_masked));
912 if (ret)
913 return ret;
914 }
915
916 if (copy_to_user(arg, &msg, sizeof(msg)))
917 return -EFAULT;
918
919 return sizeof(msg);
920 }
921
mshv_vtl_sint_poll(struct file * filp,poll_table * wait)922 static __poll_t mshv_vtl_sint_poll(struct file *filp, poll_table *wait)
923 {
924 __poll_t mask = 0;
925
926 poll_wait(filp, &fd_wait_queue, wait);
927 if (READ_ONCE(has_message) || READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
928 mask |= EPOLLIN | EPOLLRDNORM;
929
930 return mask;
931 }
932
mshv_vtl_sint_on_msg_dpc(unsigned long data)933 static void mshv_vtl_sint_on_msg_dpc(unsigned long data)
934 {
935 WRITE_ONCE(has_message, true);
936 wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
937 }
938
mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user * arg)939 static int mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user *arg)
940 {
941 struct mshv_vtl_sint_post_msg message;
942 u8 payload[HV_MESSAGE_PAYLOAD_BYTE_COUNT];
943
944 if (copy_from_user(&message, arg, sizeof(message)))
945 return -EFAULT;
946 if (message.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
947 return -EINVAL;
948 if (copy_from_user(payload, (void __user *)message.payload_ptr,
949 message.payload_size))
950 return -EFAULT;
951
952 return hv_post_message((union hv_connection_id)message.connection_id,
953 message.message_type, (void *)payload,
954 message.payload_size);
955 }
956
mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user * arg)957 static int mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user *arg)
958 {
959 u64 input, status;
960 struct mshv_vtl_signal_event signal_event;
961
962 if (copy_from_user(&signal_event, arg, sizeof(signal_event)))
963 return -EFAULT;
964
965 input = signal_event.connection_id | ((u64)signal_event.flag << 32);
966
967 status = hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, input);
968
969 return hv_result_to_errno(status);
970 }
971
mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user * arg)972 static int mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user *arg)
973 {
974 struct mshv_vtl_set_eventfd set_eventfd;
975 struct eventfd_ctx *eventfd, *old_eventfd;
976
977 if (copy_from_user(&set_eventfd, arg, sizeof(set_eventfd)))
978 return -EFAULT;
979 if (set_eventfd.flag >= HV_EVENT_FLAGS_COUNT)
980 return -EINVAL;
981
982 eventfd = NULL;
983 if (set_eventfd.fd >= 0) {
984 eventfd = eventfd_ctx_fdget(set_eventfd.fd);
985 if (IS_ERR(eventfd))
986 return PTR_ERR(eventfd);
987 }
988
989 guard(mutex)(&flag_lock);
990 old_eventfd = READ_ONCE(flag_eventfds[set_eventfd.flag]);
991 WRITE_ONCE(flag_eventfds[set_eventfd.flag], eventfd);
992
993 if (old_eventfd) {
994 synchronize_rcu();
995 eventfd_ctx_put(old_eventfd);
996 }
997
998 return 0;
999 }
1000
mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user * arg)1001 static int mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user *arg)
1002 {
1003 static DEFINE_MUTEX(vtl2_vmbus_sint_mask_mutex);
1004 struct mshv_sint_mask mask;
1005
1006 if (copy_from_user(&mask, arg, sizeof(mask)))
1007 return -EFAULT;
1008 guard(mutex)(&vtl2_vmbus_sint_mask_mutex);
1009 on_each_cpu(mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1);
1010 WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0);
1011 if (mask.mask)
1012 wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
1013
1014 return 0;
1015 }
1016
mshv_vtl_sint_ioctl(struct file * f,unsigned int cmd,unsigned long arg)1017 static long mshv_vtl_sint_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1018 {
1019 switch (cmd) {
1020 case MSHV_SINT_POST_MESSAGE:
1021 return mshv_vtl_sint_ioctl_post_msg((struct mshv_vtl_sint_post_msg __user *)arg);
1022 case MSHV_SINT_SIGNAL_EVENT:
1023 return mshv_vtl_sint_ioctl_signal_event((struct mshv_vtl_signal_event __user *)arg);
1024 case MSHV_SINT_SET_EVENTFD:
1025 return mshv_vtl_sint_ioctl_set_eventfd((struct mshv_vtl_set_eventfd __user *)arg);
1026 case MSHV_SINT_PAUSE_MESSAGE_STREAM:
1027 return mshv_vtl_sint_ioctl_pause_msg_stream((struct mshv_sint_mask __user *)arg);
1028 default:
1029 return -ENOIOCTLCMD;
1030 }
1031 }
1032
1033 static const struct file_operations mshv_vtl_sint_ops = {
1034 .owner = THIS_MODULE,
1035 .read = mshv_vtl_sint_read,
1036 .poll = mshv_vtl_sint_poll,
1037 .unlocked_ioctl = mshv_vtl_sint_ioctl,
1038 };
1039
1040 static struct miscdevice mshv_vtl_sint_dev = {
1041 .name = "mshv_sint",
1042 .fops = &mshv_vtl_sint_ops,
1043 .mode = 0600,
1044 .minor = MISC_DYNAMIC_MINOR,
1045 };
1046
mshv_vtl_hvcall_dev_open(struct inode * node,struct file * f)1047 static int mshv_vtl_hvcall_dev_open(struct inode *node, struct file *f)
1048 {
1049 struct miscdevice *dev = f->private_data;
1050 struct mshv_vtl_hvcall_fd *fd;
1051
1052 if (!capable(CAP_SYS_ADMIN))
1053 return -EPERM;
1054
1055 fd = vzalloc(sizeof(*fd));
1056 if (!fd)
1057 return -ENOMEM;
1058 fd->dev = dev;
1059 f->private_data = fd;
1060 mutex_init(&fd->init_mutex);
1061
1062 return 0;
1063 }
1064
mshv_vtl_hvcall_dev_release(struct inode * node,struct file * f)1065 static int mshv_vtl_hvcall_dev_release(struct inode *node, struct file *f)
1066 {
1067 struct mshv_vtl_hvcall_fd *fd;
1068
1069 fd = f->private_data;
1070 if (fd) {
1071 vfree(fd);
1072 f->private_data = NULL;
1073 }
1074
1075 return 0;
1076 }
1077
mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd * fd,struct mshv_vtl_hvcall_setup __user * hvcall_setup_user)1078 static int mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd *fd,
1079 struct mshv_vtl_hvcall_setup __user *hvcall_setup_user)
1080 {
1081 struct mshv_vtl_hvcall_setup hvcall_setup;
1082
1083 guard(mutex)(&fd->init_mutex);
1084
1085 if (fd->allow_map_initialized) {
1086 dev_err(fd->dev->this_device,
1087 "Hypercall allow map has already been set, pid %d\n",
1088 current->pid);
1089 return -EINVAL;
1090 }
1091
1092 if (copy_from_user(&hvcall_setup, hvcall_setup_user,
1093 sizeof(struct mshv_vtl_hvcall_setup))) {
1094 return -EFAULT;
1095 }
1096 if (hvcall_setup.bitmap_array_size > ARRAY_SIZE(fd->allow_bitmap))
1097 return -EINVAL;
1098
1099 if (copy_from_user(&fd->allow_bitmap,
1100 (void __user *)hvcall_setup.allow_bitmap_ptr,
1101 hvcall_setup.bitmap_array_size)) {
1102 return -EFAULT;
1103 }
1104
1105 dev_info(fd->dev->this_device, "Hypercall allow map has been set, pid %d\n",
1106 current->pid);
1107 fd->allow_map_initialized = true;
1108 return 0;
1109 }
1110
mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd * fd,u16 call_code)1111 static bool mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd *fd, u16 call_code)
1112 {
1113 return test_bit(call_code, (unsigned long *)fd->allow_bitmap);
1114 }
1115
mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd * fd,struct mshv_vtl_hvcall __user * hvcall_user)1116 static int mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd *fd,
1117 struct mshv_vtl_hvcall __user *hvcall_user)
1118 {
1119 struct mshv_vtl_hvcall hvcall;
1120 void *in, *out;
1121 int ret;
1122
1123 if (copy_from_user(&hvcall, hvcall_user, sizeof(struct mshv_vtl_hvcall)))
1124 return -EFAULT;
1125 if (hvcall.input_size > HV_HYP_PAGE_SIZE)
1126 return -EINVAL;
1127 if (hvcall.output_size > HV_HYP_PAGE_SIZE)
1128 return -EINVAL;
1129
1130 /*
1131 * By default, all hypercalls are not allowed.
1132 * The user mode code has to set up the allow bitmap once.
1133 */
1134
1135 if (!mshv_vtl_hvcall_is_allowed(fd, hvcall.control & 0xFFFF)) {
1136 dev_err(fd->dev->this_device,
1137 "Hypercall with control data %#llx isn't allowed\n",
1138 hvcall.control);
1139 return -EPERM;
1140 }
1141
1142 /*
1143 * This may create a problem for Confidential VM (CVM) usecase where we need to use
1144 * Hyper-V driver allocated per-cpu input and output pages (hyperv_pcpu_input_arg and
1145 * hyperv_pcpu_output_arg) for making a hypervisor call.
1146 *
1147 * TODO: Take care of this when CVM support is added.
1148 */
1149 in = (void *)__get_free_page(GFP_KERNEL);
1150 out = (void *)__get_free_page(GFP_KERNEL);
1151
1152 if (copy_from_user(in, (void __user *)hvcall.input_ptr, hvcall.input_size)) {
1153 ret = -EFAULT;
1154 goto free_pages;
1155 }
1156
1157 hvcall.status = hv_do_hypercall(hvcall.control, in, out);
1158
1159 if (copy_to_user((void __user *)hvcall.output_ptr, out, hvcall.output_size)) {
1160 ret = -EFAULT;
1161 goto free_pages;
1162 }
1163 ret = put_user(hvcall.status, &hvcall_user->status);
1164 free_pages:
1165 free_page((unsigned long)in);
1166 free_page((unsigned long)out);
1167
1168 return ret;
1169 }
1170
mshv_vtl_hvcall_dev_ioctl(struct file * f,unsigned int cmd,unsigned long arg)1171 static long mshv_vtl_hvcall_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1172 {
1173 struct mshv_vtl_hvcall_fd *fd = f->private_data;
1174
1175 switch (cmd) {
1176 case MSHV_HVCALL_SETUP:
1177 return mshv_vtl_hvcall_do_setup(fd, (struct mshv_vtl_hvcall_setup __user *)arg);
1178 case MSHV_HVCALL:
1179 return mshv_vtl_hvcall_call(fd, (struct mshv_vtl_hvcall __user *)arg);
1180 default:
1181 break;
1182 }
1183
1184 return -ENOIOCTLCMD;
1185 }
1186
1187 static const struct file_operations mshv_vtl_hvcall_dev_file_ops = {
1188 .owner = THIS_MODULE,
1189 .open = mshv_vtl_hvcall_dev_open,
1190 .release = mshv_vtl_hvcall_dev_release,
1191 .unlocked_ioctl = mshv_vtl_hvcall_dev_ioctl,
1192 };
1193
1194 static struct miscdevice mshv_vtl_hvcall_dev = {
1195 .name = "mshv_hvcall",
1196 .nodename = "mshv_hvcall",
1197 .fops = &mshv_vtl_hvcall_dev_file_ops,
1198 .mode = 0600,
1199 .minor = MISC_DYNAMIC_MINOR,
1200 };
1201
mshv_vtl_low_open(struct inode * inodep,struct file * filp)1202 static int mshv_vtl_low_open(struct inode *inodep, struct file *filp)
1203 {
1204 pid_t pid = task_pid_vnr(current);
1205 uid_t uid = current_uid().val;
1206 int ret = 0;
1207
1208 pr_debug("%s: Opening VTL low, task group %d, uid %d\n", __func__, pid, uid);
1209
1210 if (capable(CAP_SYS_ADMIN)) {
1211 filp->private_data = inodep;
1212 } else {
1213 pr_err("%s: VTL low open failed: CAP_SYS_ADMIN required. task group %d, uid %d",
1214 __func__, pid, uid);
1215 ret = -EPERM;
1216 }
1217
1218 return ret;
1219 }
1220
can_fault(struct vm_fault * vmf,unsigned long size,unsigned long * pfn)1221 static bool can_fault(struct vm_fault *vmf, unsigned long size, unsigned long *pfn)
1222 {
1223 unsigned long mask = size - 1;
1224 unsigned long start = vmf->address & ~mask;
1225 unsigned long end = start + size;
1226 bool is_valid;
1227
1228 is_valid = (vmf->address & mask) == ((vmf->pgoff << PAGE_SHIFT) & mask) &&
1229 start >= vmf->vma->vm_start &&
1230 end <= vmf->vma->vm_end;
1231
1232 if (is_valid)
1233 *pfn = vmf->pgoff & ~(mask >> PAGE_SHIFT);
1234
1235 return is_valid;
1236 }
1237
mshv_vtl_low_huge_fault(struct vm_fault * vmf,unsigned int order)1238 static vm_fault_t mshv_vtl_low_huge_fault(struct vm_fault *vmf, unsigned int order)
1239 {
1240 unsigned long pfn = vmf->pgoff;
1241 vm_fault_t ret = VM_FAULT_FALLBACK;
1242
1243 switch (order) {
1244 case 0:
1245 return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
1246
1247 case PMD_ORDER:
1248 if (can_fault(vmf, PMD_SIZE, &pfn))
1249 ret = vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
1250 return ret;
1251
1252 case PUD_ORDER:
1253 if (can_fault(vmf, PUD_SIZE, &pfn))
1254 ret = vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
1255 return ret;
1256
1257 default:
1258 return VM_FAULT_SIGBUS;
1259 }
1260 }
1261
mshv_vtl_low_fault(struct vm_fault * vmf)1262 static vm_fault_t mshv_vtl_low_fault(struct vm_fault *vmf)
1263 {
1264 return mshv_vtl_low_huge_fault(vmf, 0);
1265 }
1266
1267 static const struct vm_operations_struct mshv_vtl_low_vm_ops = {
1268 .fault = mshv_vtl_low_fault,
1269 .huge_fault = mshv_vtl_low_huge_fault,
1270 };
1271
mshv_vtl_low_mmap(struct file * filp,struct vm_area_struct * vma)1272 static int mshv_vtl_low_mmap(struct file *filp, struct vm_area_struct *vma)
1273 {
1274 vma->vm_ops = &mshv_vtl_low_vm_ops;
1275 vm_flags_set(vma, VM_HUGEPAGE | VM_MIXEDMAP);
1276
1277 return 0;
1278 }
1279
1280 static const struct file_operations mshv_vtl_low_file_ops = {
1281 .owner = THIS_MODULE,
1282 .open = mshv_vtl_low_open,
1283 .mmap = mshv_vtl_low_mmap,
1284 };
1285
1286 static struct miscdevice mshv_vtl_low = {
1287 .name = "mshv_vtl_low",
1288 .nodename = "mshv_vtl_low",
1289 .fops = &mshv_vtl_low_file_ops,
1290 .mode = 0600,
1291 .minor = MISC_DYNAMIC_MINOR,
1292 };
1293
mshv_vtl_init(void)1294 static int __init mshv_vtl_init(void)
1295 {
1296 int ret;
1297 struct device *dev = mshv_dev.this_device;
1298
1299 /*
1300 * This creates /dev/mshv which provides functionality to create VTLs and partitions.
1301 */
1302 ret = misc_register(&mshv_dev);
1303 if (ret) {
1304 dev_err(dev, "mshv device register failed: %d\n", ret);
1305 goto free_dev;
1306 }
1307
1308 tasklet_init(&msg_dpc, mshv_vtl_sint_on_msg_dpc, 0);
1309 init_waitqueue_head(&fd_wait_queue);
1310
1311 if (mshv_vtl_get_vsm_regs()) {
1312 dev_emerg(dev, "Unable to get VSM capabilities !!\n");
1313 ret = -ENODEV;
1314 goto free_dev;
1315 }
1316 if (mshv_vtl_configure_vsm_partition(dev)) {
1317 dev_emerg(dev, "VSM configuration failed !!\n");
1318 ret = -ENODEV;
1319 goto free_dev;
1320 }
1321
1322 mshv_vtl_return_call_init(mshv_vsm_page_offsets.vtl_return_offset);
1323 ret = hv_vtl_setup_synic();
1324 if (ret)
1325 goto free_dev;
1326
1327 /*
1328 * mshv_sint device adds VMBus relay ioctl support.
1329 * This provides a channel for VTL0 to communicate with VTL2.
1330 */
1331 ret = misc_register(&mshv_vtl_sint_dev);
1332 if (ret)
1333 goto free_synic;
1334
1335 /*
1336 * mshv_hvcall device adds interface to enable userspace for direct hypercalls support.
1337 */
1338 ret = misc_register(&mshv_vtl_hvcall_dev);
1339 if (ret)
1340 goto free_sint;
1341
1342 /*
1343 * mshv_vtl_low device is used to map VTL0 address space to a user-mode process in VTL2.
1344 * It implements mmap() to allow a user-mode process in VTL2 to map to the address of VTL0.
1345 */
1346 ret = misc_register(&mshv_vtl_low);
1347 if (ret)
1348 goto free_hvcall;
1349
1350 /*
1351 * "mshv vtl mem dev" device is later used to setup VTL0 memory.
1352 */
1353 mem_dev = kzalloc_obj(*mem_dev);
1354 if (!mem_dev) {
1355 ret = -ENOMEM;
1356 goto free_low;
1357 }
1358
1359 mutex_init(&mshv_vtl_poll_file_lock);
1360
1361 device_initialize(mem_dev);
1362 dev_set_name(mem_dev, "mshv vtl mem dev");
1363 ret = device_add(mem_dev);
1364 if (ret) {
1365 dev_err(dev, "mshv vtl mem dev add: %d\n", ret);
1366 goto free_mem;
1367 }
1368
1369 return 0;
1370
1371 free_mem:
1372 kfree(mem_dev);
1373 free_low:
1374 misc_deregister(&mshv_vtl_low);
1375 free_hvcall:
1376 misc_deregister(&mshv_vtl_hvcall_dev);
1377 free_sint:
1378 misc_deregister(&mshv_vtl_sint_dev);
1379 free_synic:
1380 hv_vtl_remove_synic();
1381 free_dev:
1382 misc_deregister(&mshv_dev);
1383
1384 return ret;
1385 }
1386
mshv_vtl_exit(void)1387 static void __exit mshv_vtl_exit(void)
1388 {
1389 device_del(mem_dev);
1390 kfree(mem_dev);
1391 misc_deregister(&mshv_vtl_low);
1392 misc_deregister(&mshv_vtl_hvcall_dev);
1393 misc_deregister(&mshv_vtl_sint_dev);
1394 hv_vtl_remove_synic();
1395 misc_deregister(&mshv_dev);
1396 }
1397
1398 module_init(mshv_vtl_init);
1399 module_exit(mshv_vtl_exit);
1400