xref: /linux/drivers/hv/mshv_vtl_main.c (revision bf4afc53b77aeaa48b5409da5c8da6bb4eff7f43)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2023, Microsoft Corporation.
4  *
5  * Author:
6  *   Roman Kisel <romank@linux.microsoft.com>
7  *   Saurabh Sengar <ssengar@linux.microsoft.com>
8  *   Naman Jain <namjain@linux.microsoft.com>
9  */
10 
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/miscdevice.h>
14 #include <linux/anon_inodes.h>
15 #include <linux/cpuhotplug.h>
16 #include <linux/count_zeros.h>
17 #include <linux/entry-virt.h>
18 #include <linux/eventfd.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
21 #include <linux/vmalloc.h>
22 #include <asm/debugreg.h>
23 #include <asm/mshyperv.h>
24 #include <trace/events/ipi.h>
25 #include <uapi/asm/mtrr.h>
26 #include <uapi/linux/mshv.h>
27 #include <hyperv/hvhdk.h>
28 
29 #include "../../kernel/fpu/legacy.h"
30 #include "mshv.h"
31 #include "mshv_vtl.h"
32 #include "hyperv_vmbus.h"
33 
34 MODULE_AUTHOR("Microsoft");
35 MODULE_LICENSE("GPL");
36 MODULE_DESCRIPTION("Microsoft Hyper-V VTL Driver");
37 
38 #define MSHV_ENTRY_REASON_LOWER_VTL_CALL     0x1
39 #define MSHV_ENTRY_REASON_INTERRUPT          0x2
40 #define MSHV_ENTRY_REASON_INTERCEPT          0x3
41 
42 #define MSHV_REAL_OFF_SHIFT	16
43 #define MSHV_PG_OFF_CPU_MASK	(BIT_ULL(MSHV_REAL_OFF_SHIFT) - 1)
44 #define MSHV_RUN_PAGE_OFFSET	0
45 #define MSHV_REG_PAGE_OFFSET	1
46 #define VTL2_VMBUS_SINT_INDEX	7
47 
48 static struct device *mem_dev;
49 
50 static struct tasklet_struct msg_dpc;
51 static wait_queue_head_t fd_wait_queue;
52 static bool has_message;
53 static struct eventfd_ctx *flag_eventfds[HV_EVENT_FLAGS_COUNT];
54 static DEFINE_MUTEX(flag_lock);
55 static bool __read_mostly mshv_has_reg_page;
56 
57 /* hvcall code is of type u16, allocate a bitmap of size (1 << 16) to accommodate it */
58 #define MAX_BITMAP_SIZE ((U16_MAX + 1) / 8)
59 
60 struct mshv_vtl_hvcall_fd {
61 	u8 allow_bitmap[MAX_BITMAP_SIZE];
62 	bool allow_map_initialized;
63 	/*
64 	 * Used to protect hvcall setup in IOCTLs
65 	 */
66 	struct mutex init_mutex;
67 	struct miscdevice *dev;
68 };
69 
70 struct mshv_vtl_poll_file {
71 	struct file *file;
72 	wait_queue_entry_t wait;
73 	wait_queue_head_t *wqh;
74 	poll_table pt;
75 	int cpu;
76 };
77 
78 struct mshv_vtl {
79 	struct device *module_dev;
80 	u64 id;
81 };
82 
83 struct mshv_vtl_per_cpu {
84 	struct mshv_vtl_run *run;
85 	struct page *reg_page;
86 };
87 
88 /* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */
89 union hv_synic_overlay_page_msr {
90 	u64 as_uint64;
91 	struct {
92 		u64 enabled: 1;
93 		u64 reserved: 11;
94 		u64 pfn: 52;
95 	} __packed;
96 };
97 
98 static struct mutex mshv_vtl_poll_file_lock;
99 static union hv_register_vsm_page_offsets mshv_vsm_page_offsets;
100 static union hv_register_vsm_capabilities mshv_vsm_capabilities;
101 
102 static DEFINE_PER_CPU(struct mshv_vtl_poll_file, mshv_vtl_poll_file);
103 static DEFINE_PER_CPU(unsigned long long, num_vtl0_transitions);
104 static DEFINE_PER_CPU(struct mshv_vtl_per_cpu, mshv_vtl_per_cpu);
105 
106 static const union hv_input_vtl input_vtl_zero;
107 static const union hv_input_vtl input_vtl_normal = {
108 	.use_target_vtl = 1,
109 };
110 
111 static const struct file_operations mshv_vtl_fops;
112 
113 static long
114 mshv_ioctl_create_vtl(void __user *user_arg, struct device *module_dev)
115 {
116 	struct mshv_vtl *vtl;
117 	struct file *file;
118 	int fd;
119 
120 	vtl = kzalloc_obj(*vtl);
121 	if (!vtl)
122 		return -ENOMEM;
123 
124 	fd = get_unused_fd_flags(O_CLOEXEC);
125 	if (fd < 0) {
126 		kfree(vtl);
127 		return fd;
128 	}
129 	file = anon_inode_getfile("mshv_vtl", &mshv_vtl_fops,
130 				  vtl, O_RDWR);
131 	if (IS_ERR(file)) {
132 		kfree(vtl);
133 		return PTR_ERR(file);
134 	}
135 	vtl->module_dev = module_dev;
136 	fd_install(fd, file);
137 
138 	return fd;
139 }
140 
141 static long
142 mshv_ioctl_check_extension(void __user *user_arg)
143 {
144 	u32 arg;
145 
146 	if (copy_from_user(&arg, user_arg, sizeof(arg)))
147 		return -EFAULT;
148 
149 	switch (arg) {
150 	case MSHV_CAP_CORE_API_STABLE:
151 		return 0;
152 	case MSHV_CAP_REGISTER_PAGE:
153 		return mshv_has_reg_page;
154 	case MSHV_CAP_VTL_RETURN_ACTION:
155 		return mshv_vsm_capabilities.return_action_available;
156 	case MSHV_CAP_DR6_SHARED:
157 		return mshv_vsm_capabilities.dr6_shared;
158 	}
159 
160 	return -EOPNOTSUPP;
161 }
162 
163 static long
164 mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
165 {
166 	struct miscdevice *misc = filp->private_data;
167 
168 	switch (ioctl) {
169 	case MSHV_CHECK_EXTENSION:
170 		return mshv_ioctl_check_extension((void __user *)arg);
171 	case MSHV_CREATE_VTL:
172 		return mshv_ioctl_create_vtl((void __user *)arg, misc->this_device);
173 	}
174 
175 	return -ENOTTY;
176 }
177 
178 static const struct file_operations mshv_dev_fops = {
179 	.owner		= THIS_MODULE,
180 	.unlocked_ioctl	= mshv_dev_ioctl,
181 	.llseek		= noop_llseek,
182 };
183 
184 static struct miscdevice mshv_dev = {
185 	.minor = MISC_DYNAMIC_MINOR,
186 	.name = "mshv",
187 	.fops = &mshv_dev_fops,
188 	.mode = 0600,
189 };
190 
191 static struct mshv_vtl_run *mshv_vtl_this_run(void)
192 {
193 	return *this_cpu_ptr(&mshv_vtl_per_cpu.run);
194 }
195 
196 static struct mshv_vtl_run *mshv_vtl_cpu_run(int cpu)
197 {
198 	return *per_cpu_ptr(&mshv_vtl_per_cpu.run, cpu);
199 }
200 
201 static struct page *mshv_vtl_cpu_reg_page(int cpu)
202 {
203 	return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu);
204 }
205 
206 static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
207 {
208 	struct hv_register_assoc reg_assoc = {};
209 	union hv_synic_overlay_page_msr overlay = {};
210 	struct page *reg_page;
211 
212 	reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL);
213 	if (!reg_page) {
214 		WARN(1, "failed to allocate register page\n");
215 		return;
216 	}
217 
218 	overlay.enabled = 1;
219 	overlay.pfn = page_to_hvpfn(reg_page);
220 	reg_assoc.name = HV_X64_REGISTER_REG_PAGE;
221 	reg_assoc.value.reg64 = overlay.as_uint64;
222 
223 	if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
224 				     1, input_vtl_zero, &reg_assoc)) {
225 		WARN(1, "failed to setup register page\n");
226 		__free_page(reg_page);
227 		return;
228 	}
229 
230 	per_cpu->reg_page = reg_page;
231 	mshv_has_reg_page = true;
232 }
233 
234 static void mshv_vtl_synic_enable_regs(unsigned int cpu)
235 {
236 	union hv_synic_sint sint;
237 
238 	sint.as_uint64 = 0;
239 	sint.vector = HYPERVISOR_CALLBACK_VECTOR;
240 	sint.masked = false;
241 	sint.auto_eoi = hv_recommend_using_aeoi();
242 
243 	/* Enable intercepts */
244 	if (!mshv_vsm_capabilities.intercept_page_available)
245 		hv_set_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX,
246 			   sint.as_uint64);
247 
248 	/* VTL2 Host VSP SINT is (un)masked when the user mode requests that */
249 }
250 
251 static int mshv_vtl_get_vsm_regs(void)
252 {
253 	struct hv_register_assoc registers[2];
254 	int ret, count = 2;
255 
256 	registers[0].name = HV_REGISTER_VSM_CODE_PAGE_OFFSETS;
257 	registers[1].name = HV_REGISTER_VSM_CAPABILITIES;
258 
259 	ret = hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
260 				       count, input_vtl_zero, registers);
261 	if (ret)
262 		return ret;
263 
264 	mshv_vsm_page_offsets.as_uint64 = registers[0].value.reg64;
265 	mshv_vsm_capabilities.as_uint64 = registers[1].value.reg64;
266 
267 	return ret;
268 }
269 
270 static int mshv_vtl_configure_vsm_partition(struct device *dev)
271 {
272 	union hv_register_vsm_partition_config config;
273 	struct hv_register_assoc reg_assoc;
274 
275 	config.as_uint64 = 0;
276 	config.default_vtl_protection_mask = HV_MAP_GPA_PERMISSIONS_MASK;
277 	config.enable_vtl_protection = 1;
278 	config.zero_memory_on_reset = 1;
279 	config.intercept_vp_startup = 1;
280 	config.intercept_cpuid_unimplemented = 1;
281 
282 	if (mshv_vsm_capabilities.intercept_page_available) {
283 		dev_dbg(dev, "using intercept page\n");
284 		config.intercept_page = 1;
285 	}
286 
287 	reg_assoc.name = HV_REGISTER_VSM_PARTITION_CONFIG;
288 	reg_assoc.value.reg64 = config.as_uint64;
289 
290 	return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
291 				       1, input_vtl_zero, &reg_assoc);
292 }
293 
294 static void mshv_vtl_vmbus_isr(void)
295 {
296 	struct hv_per_cpu_context *per_cpu;
297 	struct hv_message *msg;
298 	u32 message_type;
299 	union hv_synic_event_flags *event_flags;
300 	struct eventfd_ctx *eventfd;
301 	u16 i;
302 
303 	per_cpu = this_cpu_ptr(hv_context.cpu_context);
304 	if (smp_processor_id() == 0) {
305 		msg = (struct hv_message *)per_cpu->hyp_synic_message_page + VTL2_VMBUS_SINT_INDEX;
306 		message_type = READ_ONCE(msg->header.message_type);
307 		if (message_type != HVMSG_NONE)
308 			tasklet_schedule(&msg_dpc);
309 	}
310 
311 	event_flags = (union hv_synic_event_flags *)per_cpu->hyp_synic_event_page +
312 			VTL2_VMBUS_SINT_INDEX;
313 	for_each_set_bit(i, event_flags->flags, HV_EVENT_FLAGS_COUNT) {
314 		if (!sync_test_and_clear_bit(i, event_flags->flags))
315 			continue;
316 		rcu_read_lock();
317 		eventfd = READ_ONCE(flag_eventfds[i]);
318 		if (eventfd)
319 			eventfd_signal(eventfd);
320 		rcu_read_unlock();
321 	}
322 
323 	vmbus_isr();
324 }
325 
326 static int mshv_vtl_alloc_context(unsigned int cpu)
327 {
328 	struct mshv_vtl_per_cpu *per_cpu = this_cpu_ptr(&mshv_vtl_per_cpu);
329 
330 	per_cpu->run = (struct mshv_vtl_run *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
331 	if (!per_cpu->run)
332 		return -ENOMEM;
333 
334 	if (mshv_vsm_capabilities.intercept_page_available)
335 		mshv_vtl_configure_reg_page(per_cpu);
336 
337 	mshv_vtl_synic_enable_regs(cpu);
338 
339 	return 0;
340 }
341 
342 static int mshv_vtl_cpuhp_online;
343 
344 static int hv_vtl_setup_synic(void)
345 {
346 	int ret;
347 
348 	/* Use our isr to first filter out packets destined for userspace */
349 	hv_setup_vmbus_handler(mshv_vtl_vmbus_isr);
350 
351 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vtl:online",
352 				mshv_vtl_alloc_context, NULL);
353 	if (ret < 0) {
354 		hv_setup_vmbus_handler(vmbus_isr);
355 		return ret;
356 	}
357 
358 	mshv_vtl_cpuhp_online = ret;
359 
360 	return 0;
361 }
362 
363 static void hv_vtl_remove_synic(void)
364 {
365 	cpuhp_remove_state(mshv_vtl_cpuhp_online);
366 	hv_setup_vmbus_handler(vmbus_isr);
367 }
368 
369 static int vtl_get_vp_register(struct hv_register_assoc *reg)
370 {
371 	return hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
372 					1, input_vtl_normal, reg);
373 }
374 
375 static int vtl_set_vp_register(struct hv_register_assoc *reg)
376 {
377 	return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
378 					1, input_vtl_normal, reg);
379 }
380 
381 static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl *vtl, void __user *arg)
382 {
383 	struct mshv_vtl_ram_disposition vtl0_mem;
384 	struct dev_pagemap *pgmap;
385 	void *addr;
386 
387 	if (copy_from_user(&vtl0_mem, arg, sizeof(vtl0_mem)))
388 		return -EFAULT;
389 	/* vtl0_mem.last_pfn is excluded in the pagemap range for VTL0 as per design */
390 	if (vtl0_mem.last_pfn <= vtl0_mem.start_pfn) {
391 		dev_err(vtl->module_dev, "range start pfn (%llx) > end pfn (%llx)\n",
392 			vtl0_mem.start_pfn, vtl0_mem.last_pfn);
393 		return -EFAULT;
394 	}
395 
396 	pgmap = kzalloc_obj(*pgmap);
397 	if (!pgmap)
398 		return -ENOMEM;
399 
400 	pgmap->ranges[0].start = PFN_PHYS(vtl0_mem.start_pfn);
401 	pgmap->ranges[0].end = PFN_PHYS(vtl0_mem.last_pfn) - 1;
402 	pgmap->nr_range = 1;
403 	pgmap->type = MEMORY_DEVICE_GENERIC;
404 
405 	/*
406 	 * Determine the highest page order that can be used for the given memory range.
407 	 * This works best when the range is aligned; i.e. both the start and the length.
408 	 */
409 	pgmap->vmemmap_shift = count_trailing_zeros(vtl0_mem.start_pfn | vtl0_mem.last_pfn);
410 	dev_dbg(vtl->module_dev,
411 		"Add VTL0 memory: start: 0x%llx, end_pfn: 0x%llx, page order: %lu\n",
412 		vtl0_mem.start_pfn, vtl0_mem.last_pfn, pgmap->vmemmap_shift);
413 
414 	addr = devm_memremap_pages(mem_dev, pgmap);
415 	if (IS_ERR(addr)) {
416 		dev_err(vtl->module_dev, "devm_memremap_pages error: %ld\n", PTR_ERR(addr));
417 		kfree(pgmap);
418 		return -EFAULT;
419 	}
420 
421 	/* Don't free pgmap, since it has to stick around until the memory
422 	 * is unmapped, which will never happen as there is no scenario
423 	 * where VTL0 can be released/shutdown without bringing down VTL2.
424 	 */
425 	return 0;
426 }
427 
428 static void mshv_vtl_cancel(int cpu)
429 {
430 	int here = get_cpu();
431 
432 	if (here != cpu) {
433 		if (!xchg_relaxed(&mshv_vtl_cpu_run(cpu)->cancel, 1))
434 			smp_send_reschedule(cpu);
435 	} else {
436 		WRITE_ONCE(mshv_vtl_this_run()->cancel, 1);
437 	}
438 	put_cpu();
439 }
440 
441 static int mshv_vtl_poll_file_wake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
442 {
443 	struct mshv_vtl_poll_file *poll_file = container_of(wait, struct mshv_vtl_poll_file, wait);
444 
445 	mshv_vtl_cancel(poll_file->cpu);
446 
447 	return 0;
448 }
449 
450 static void mshv_vtl_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
451 {
452 	struct mshv_vtl_poll_file *poll_file = container_of(pt, struct mshv_vtl_poll_file, pt);
453 
454 	WARN_ON(poll_file->wqh);
455 	poll_file->wqh = wqh;
456 	add_wait_queue(wqh, &poll_file->wait);
457 }
458 
459 static int mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user *user_input)
460 {
461 	struct file *file, *old_file;
462 	struct mshv_vtl_poll_file *poll_file;
463 	struct mshv_vtl_set_poll_file input;
464 
465 	if (copy_from_user(&input, user_input, sizeof(input)))
466 		return -EFAULT;
467 
468 	if (input.cpu >= num_possible_cpus() || !cpu_online(input.cpu))
469 		return -EINVAL;
470 	/*
471 	 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
472 	 * CPU is expected to remain online after above cpu_online() check.
473 	 */
474 
475 	file = NULL;
476 	file = fget(input.fd);
477 	if (!file)
478 		return -EBADFD;
479 
480 	poll_file = per_cpu_ptr(&mshv_vtl_poll_file, READ_ONCE(input.cpu));
481 	if (!poll_file)
482 		return -EINVAL;
483 
484 	mutex_lock(&mshv_vtl_poll_file_lock);
485 
486 	if (poll_file->wqh)
487 		remove_wait_queue(poll_file->wqh, &poll_file->wait);
488 	poll_file->wqh = NULL;
489 
490 	old_file = poll_file->file;
491 	poll_file->file = file;
492 	poll_file->cpu = input.cpu;
493 
494 	if (file) {
495 		init_waitqueue_func_entry(&poll_file->wait, mshv_vtl_poll_file_wake);
496 		init_poll_funcptr(&poll_file->pt, mshv_vtl_ptable_queue_proc);
497 		vfs_poll(file, &poll_file->pt);
498 	}
499 
500 	mutex_unlock(&mshv_vtl_poll_file_lock);
501 
502 	if (old_file)
503 		fput(old_file);
504 
505 	return 0;
506 }
507 
508 /* Static table mapping register names to their corresponding actions */
509 static const struct {
510 	enum hv_register_name reg_name;
511 	int debug_reg_num;  /* -1 if not a debug register */
512 	u32 msr_addr;       /* 0 if not an MSR */
513 } reg_table[] = {
514 	/* Debug registers */
515 	{HV_X64_REGISTER_DR0, 0, 0},
516 	{HV_X64_REGISTER_DR1, 1, 0},
517 	{HV_X64_REGISTER_DR2, 2, 0},
518 	{HV_X64_REGISTER_DR3, 3, 0},
519 	{HV_X64_REGISTER_DR6, 6, 0},
520 	/* MTRR MSRs */
521 	{HV_X64_REGISTER_MSR_MTRR_CAP, -1, MSR_MTRRcap},
522 	{HV_X64_REGISTER_MSR_MTRR_DEF_TYPE, -1, MSR_MTRRdefType},
523 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0, -1, MTRRphysBase_MSR(0)},
524 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1, -1, MTRRphysBase_MSR(1)},
525 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2, -1, MTRRphysBase_MSR(2)},
526 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3, -1, MTRRphysBase_MSR(3)},
527 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4, -1, MTRRphysBase_MSR(4)},
528 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5, -1, MTRRphysBase_MSR(5)},
529 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6, -1, MTRRphysBase_MSR(6)},
530 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7, -1, MTRRphysBase_MSR(7)},
531 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8, -1, MTRRphysBase_MSR(8)},
532 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9, -1, MTRRphysBase_MSR(9)},
533 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA, -1, MTRRphysBase_MSR(0xa)},
534 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB, -1, MTRRphysBase_MSR(0xb)},
535 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC, -1, MTRRphysBase_MSR(0xc)},
536 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASED, -1, MTRRphysBase_MSR(0xd)},
537 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE, -1, MTRRphysBase_MSR(0xe)},
538 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF, -1, MTRRphysBase_MSR(0xf)},
539 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0, -1, MTRRphysMask_MSR(0)},
540 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1, -1, MTRRphysMask_MSR(1)},
541 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2, -1, MTRRphysMask_MSR(2)},
542 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3, -1, MTRRphysMask_MSR(3)},
543 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4, -1, MTRRphysMask_MSR(4)},
544 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5, -1, MTRRphysMask_MSR(5)},
545 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6, -1, MTRRphysMask_MSR(6)},
546 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7, -1, MTRRphysMask_MSR(7)},
547 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8, -1, MTRRphysMask_MSR(8)},
548 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9, -1, MTRRphysMask_MSR(9)},
549 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA, -1, MTRRphysMask_MSR(0xa)},
550 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB, -1, MTRRphysMask_MSR(0xb)},
551 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC, -1, MTRRphysMask_MSR(0xc)},
552 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD, -1, MTRRphysMask_MSR(0xd)},
553 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE, -1, MTRRphysMask_MSR(0xe)},
554 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF, -1, MTRRphysMask_MSR(0xf)},
555 	{HV_X64_REGISTER_MSR_MTRR_FIX64K00000, -1, MSR_MTRRfix64K_00000},
556 	{HV_X64_REGISTER_MSR_MTRR_FIX16K80000, -1, MSR_MTRRfix16K_80000},
557 	{HV_X64_REGISTER_MSR_MTRR_FIX16KA0000, -1, MSR_MTRRfix16K_A0000},
558 	{HV_X64_REGISTER_MSR_MTRR_FIX4KC0000, -1, MSR_MTRRfix4K_C0000},
559 	{HV_X64_REGISTER_MSR_MTRR_FIX4KC8000, -1, MSR_MTRRfix4K_C8000},
560 	{HV_X64_REGISTER_MSR_MTRR_FIX4KD0000, -1, MSR_MTRRfix4K_D0000},
561 	{HV_X64_REGISTER_MSR_MTRR_FIX4KD8000, -1, MSR_MTRRfix4K_D8000},
562 	{HV_X64_REGISTER_MSR_MTRR_FIX4KE0000, -1, MSR_MTRRfix4K_E0000},
563 	{HV_X64_REGISTER_MSR_MTRR_FIX4KE8000, -1, MSR_MTRRfix4K_E8000},
564 	{HV_X64_REGISTER_MSR_MTRR_FIX4KF0000, -1, MSR_MTRRfix4K_F0000},
565 	{HV_X64_REGISTER_MSR_MTRR_FIX4KF8000, -1, MSR_MTRRfix4K_F8000},
566 };
567 
568 static int mshv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set)
569 {
570 	u64 *reg64;
571 	enum hv_register_name gpr_name;
572 	int i;
573 
574 	gpr_name = regs->name;
575 	reg64 = &regs->value.reg64;
576 
577 	/* Search for the register in the table */
578 	for (i = 0; i < ARRAY_SIZE(reg_table); i++) {
579 		if (reg_table[i].reg_name != gpr_name)
580 			continue;
581 		if (reg_table[i].debug_reg_num != -1) {
582 			/* Handle debug registers */
583 			if (gpr_name == HV_X64_REGISTER_DR6 &&
584 			    !mshv_vsm_capabilities.dr6_shared)
585 				goto hypercall;
586 			if (set)
587 				native_set_debugreg(reg_table[i].debug_reg_num, *reg64);
588 			else
589 				*reg64 = native_get_debugreg(reg_table[i].debug_reg_num);
590 		} else {
591 			/* Handle MSRs */
592 			if (set)
593 				wrmsrl(reg_table[i].msr_addr, *reg64);
594 			else
595 				rdmsrl(reg_table[i].msr_addr, *reg64);
596 		}
597 		return 0;
598 	}
599 
600 hypercall:
601 	return 1;
602 }
603 
604 static void mshv_vtl_return(struct mshv_vtl_cpu_context *vtl0)
605 {
606 	struct hv_vp_assist_page *hvp;
607 
608 	hvp = hv_vp_assist_page[smp_processor_id()];
609 
610 	/*
611 	 * Process signal event direct set in the run page, if any.
612 	 */
613 	if (mshv_vsm_capabilities.return_action_available) {
614 		u32 offset = READ_ONCE(mshv_vtl_this_run()->vtl_ret_action_size);
615 
616 		WRITE_ONCE(mshv_vtl_this_run()->vtl_ret_action_size, 0);
617 
618 		/*
619 		 * Hypervisor will take care of clearing out the actions
620 		 * set in the assist page.
621 		 */
622 		memcpy(hvp->vtl_ret_actions,
623 		       mshv_vtl_this_run()->vtl_ret_actions,
624 		       min_t(u32, offset, sizeof(hvp->vtl_ret_actions)));
625 	}
626 
627 	mshv_vtl_return_call(vtl0);
628 }
629 
630 static bool mshv_vtl_process_intercept(void)
631 {
632 	struct hv_per_cpu_context *mshv_cpu;
633 	void *synic_message_page;
634 	struct hv_message *msg;
635 	u32 message_type;
636 
637 	mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
638 	synic_message_page = mshv_cpu->hyp_synic_message_page;
639 	if (unlikely(!synic_message_page))
640 		return true;
641 
642 	msg = (struct hv_message *)synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX;
643 	message_type = READ_ONCE(msg->header.message_type);
644 	if (message_type == HVMSG_NONE)
645 		return true;
646 
647 	memcpy(mshv_vtl_this_run()->exit_message, msg, sizeof(*msg));
648 	vmbus_signal_eom(msg, message_type);
649 
650 	return false;
651 }
652 
653 static int mshv_vtl_ioctl_return_to_lower_vtl(void)
654 {
655 	preempt_disable();
656 	for (;;) {
657 		unsigned long irq_flags;
658 		struct hv_vp_assist_page *hvp;
659 		int ret;
660 
661 		if (__xfer_to_guest_mode_work_pending()) {
662 			preempt_enable();
663 			ret = xfer_to_guest_mode_handle_work();
664 			if (ret)
665 				return ret;
666 			preempt_disable();
667 		}
668 
669 		local_irq_save(irq_flags);
670 		if (READ_ONCE(mshv_vtl_this_run()->cancel)) {
671 			local_irq_restore(irq_flags);
672 			preempt_enable();
673 			return -EINTR;
674 		}
675 
676 		mshv_vtl_return(&mshv_vtl_this_run()->cpu_context);
677 		local_irq_restore(irq_flags);
678 
679 		hvp = hv_vp_assist_page[smp_processor_id()];
680 		this_cpu_inc(num_vtl0_transitions);
681 		switch (hvp->vtl_entry_reason) {
682 		case MSHV_ENTRY_REASON_INTERRUPT:
683 			if (!mshv_vsm_capabilities.intercept_page_available &&
684 			    likely(!mshv_vtl_process_intercept()))
685 				goto done;
686 			break;
687 
688 		case MSHV_ENTRY_REASON_INTERCEPT:
689 			WARN_ON(!mshv_vsm_capabilities.intercept_page_available);
690 			memcpy(mshv_vtl_this_run()->exit_message, hvp->intercept_message,
691 			       sizeof(hvp->intercept_message));
692 			goto done;
693 
694 		default:
695 			panic("unknown entry reason: %d", hvp->vtl_entry_reason);
696 		}
697 	}
698 
699 done:
700 	preempt_enable();
701 
702 	return 0;
703 }
704 
705 static long
706 mshv_vtl_ioctl_get_regs(void __user *user_args)
707 {
708 	struct mshv_vp_registers args;
709 	struct hv_register_assoc reg;
710 	long ret;
711 
712 	if (copy_from_user(&args, user_args, sizeof(args)))
713 		return -EFAULT;
714 
715 	/*  This IOCTL supports processing only one register at a time. */
716 	if (args.count != 1)
717 		return -EINVAL;
718 
719 	if (copy_from_user(&reg, (void __user *)args.regs_ptr,
720 			   sizeof(reg)))
721 		return -EFAULT;
722 
723 	ret = mshv_vtl_get_set_reg(&reg, false);
724 	if (!ret)
725 		goto copy_args; /* No need of hypercall */
726 	ret = vtl_get_vp_register(&reg);
727 	if (ret)
728 		return ret;
729 
730 copy_args:
731 	if (copy_to_user((void __user *)args.regs_ptr, &reg, sizeof(reg)))
732 		ret = -EFAULT;
733 
734 	return ret;
735 }
736 
737 static long
738 mshv_vtl_ioctl_set_regs(void __user *user_args)
739 {
740 	struct mshv_vp_registers args;
741 	struct hv_register_assoc reg;
742 	long ret;
743 
744 	if (copy_from_user(&args, user_args, sizeof(args)))
745 		return -EFAULT;
746 
747 	/*  This IOCTL supports processing only one register at a time. */
748 	if (args.count != 1)
749 		return -EINVAL;
750 
751 	if (copy_from_user(&reg, (void __user *)args.regs_ptr, sizeof(reg)))
752 		return -EFAULT;
753 
754 	ret = mshv_vtl_get_set_reg(&reg, true);
755 	if (!ret)
756 		return ret; /* No need of hypercall */
757 	ret = vtl_set_vp_register(&reg);
758 
759 	return ret;
760 }
761 
762 static long
763 mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
764 {
765 	long ret;
766 	struct mshv_vtl *vtl = filp->private_data;
767 
768 	switch (ioctl) {
769 	case MSHV_SET_POLL_FILE:
770 		ret = mshv_vtl_ioctl_set_poll_file((struct mshv_vtl_set_poll_file __user *)arg);
771 		break;
772 	case MSHV_GET_VP_REGISTERS:
773 		ret = mshv_vtl_ioctl_get_regs((void __user *)arg);
774 		break;
775 	case MSHV_SET_VP_REGISTERS:
776 		ret = mshv_vtl_ioctl_set_regs((void __user *)arg);
777 		break;
778 	case MSHV_RETURN_TO_LOWER_VTL:
779 		ret = mshv_vtl_ioctl_return_to_lower_vtl();
780 		break;
781 	case MSHV_ADD_VTL0_MEMORY:
782 		ret = mshv_vtl_ioctl_add_vtl0_mem(vtl, (void __user *)arg);
783 		break;
784 	default:
785 		dev_err(vtl->module_dev, "invalid vtl ioctl: %#x\n", ioctl);
786 		ret = -ENOTTY;
787 	}
788 
789 	return ret;
790 }
791 
792 static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf)
793 {
794 	struct page *page;
795 	int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK;
796 	int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT;
797 
798 	if (!cpu_online(cpu))
799 		return VM_FAULT_SIGBUS;
800 	/*
801 	 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
802 	 * CPU is expected to remain online after above cpu_online() check.
803 	 */
804 
805 	if (real_off == MSHV_RUN_PAGE_OFFSET) {
806 		page = virt_to_page(mshv_vtl_cpu_run(cpu));
807 	} else if (real_off == MSHV_REG_PAGE_OFFSET) {
808 		if (!mshv_has_reg_page)
809 			return VM_FAULT_SIGBUS;
810 		page = mshv_vtl_cpu_reg_page(cpu);
811 	} else {
812 		return VM_FAULT_NOPAGE;
813 	}
814 
815 	get_page(page);
816 	vmf->page = page;
817 
818 	return 0;
819 }
820 
821 static const struct vm_operations_struct mshv_vtl_vm_ops = {
822 	.fault = mshv_vtl_fault,
823 };
824 
825 static int mshv_vtl_mmap(struct file *filp, struct vm_area_struct *vma)
826 {
827 	vma->vm_ops = &mshv_vtl_vm_ops;
828 
829 	return 0;
830 }
831 
832 static int mshv_vtl_release(struct inode *inode, struct file *filp)
833 {
834 	struct mshv_vtl *vtl = filp->private_data;
835 
836 	kfree(vtl);
837 
838 	return 0;
839 }
840 
841 static const struct file_operations mshv_vtl_fops = {
842 	.owner = THIS_MODULE,
843 	.unlocked_ioctl = mshv_vtl_ioctl,
844 	.release = mshv_vtl_release,
845 	.mmap = mshv_vtl_mmap,
846 };
847 
848 static void mshv_vtl_synic_mask_vmbus_sint(void *info)
849 {
850 	union hv_synic_sint sint;
851 	const u8 *mask = info;
852 
853 	sint.as_uint64 = 0;
854 	sint.vector = HYPERVISOR_CALLBACK_VECTOR;
855 	sint.masked = (*mask != 0);
856 	sint.auto_eoi = hv_recommend_using_aeoi();
857 
858 	hv_set_msr(HV_MSR_SINT0 + VTL2_VMBUS_SINT_INDEX,
859 		   sint.as_uint64);
860 
861 	if (!sint.masked)
862 		pr_debug("%s: Unmasking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
863 	else
864 		pr_debug("%s: Masking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
865 }
866 
867 static void mshv_vtl_read_remote(void *buffer)
868 {
869 	struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
870 	struct hv_message *msg = (struct hv_message *)mshv_cpu->hyp_synic_message_page +
871 					VTL2_VMBUS_SINT_INDEX;
872 	u32 message_type = READ_ONCE(msg->header.message_type);
873 
874 	WRITE_ONCE(has_message, false);
875 	if (message_type == HVMSG_NONE)
876 		return;
877 
878 	memcpy(buffer, msg, sizeof(*msg));
879 	vmbus_signal_eom(msg, message_type);
880 }
881 
882 static bool vtl_synic_mask_vmbus_sint_masked = true;
883 
884 static ssize_t mshv_vtl_sint_read(struct file *filp, char __user *arg, size_t size, loff_t *offset)
885 {
886 	struct hv_message msg = {};
887 	int ret;
888 
889 	if (size < sizeof(msg))
890 		return -EINVAL;
891 
892 	for (;;) {
893 		smp_call_function_single(VMBUS_CONNECT_CPU, mshv_vtl_read_remote, &msg, true);
894 		if (msg.header.message_type != HVMSG_NONE)
895 			break;
896 
897 		if (READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
898 			return 0; /* EOF */
899 
900 		if (filp->f_flags & O_NONBLOCK)
901 			return -EAGAIN;
902 
903 		ret = wait_event_interruptible(fd_wait_queue,
904 					       READ_ONCE(has_message) ||
905 						READ_ONCE(vtl_synic_mask_vmbus_sint_masked));
906 		if (ret)
907 			return ret;
908 	}
909 
910 	if (copy_to_user(arg, &msg, sizeof(msg)))
911 		return -EFAULT;
912 
913 	return sizeof(msg);
914 }
915 
916 static __poll_t mshv_vtl_sint_poll(struct file *filp, poll_table *wait)
917 {
918 	__poll_t mask = 0;
919 
920 	poll_wait(filp, &fd_wait_queue, wait);
921 	if (READ_ONCE(has_message) || READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
922 		mask |= EPOLLIN | EPOLLRDNORM;
923 
924 	return mask;
925 }
926 
927 static void mshv_vtl_sint_on_msg_dpc(unsigned long data)
928 {
929 	WRITE_ONCE(has_message, true);
930 	wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
931 }
932 
933 static int mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user *arg)
934 {
935 	struct mshv_vtl_sint_post_msg message;
936 	u8 payload[HV_MESSAGE_PAYLOAD_BYTE_COUNT];
937 
938 	if (copy_from_user(&message, arg, sizeof(message)))
939 		return -EFAULT;
940 	if (message.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
941 		return -EINVAL;
942 	if (copy_from_user(payload, (void __user *)message.payload_ptr,
943 			   message.payload_size))
944 		return -EFAULT;
945 
946 	return hv_post_message((union hv_connection_id)message.connection_id,
947 			       message.message_type, (void *)payload,
948 			       message.payload_size);
949 }
950 
951 static int mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user *arg)
952 {
953 	u64 input, status;
954 	struct mshv_vtl_signal_event signal_event;
955 
956 	if (copy_from_user(&signal_event, arg, sizeof(signal_event)))
957 		return -EFAULT;
958 
959 	input = signal_event.connection_id | ((u64)signal_event.flag << 32);
960 
961 	status = hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, input);
962 
963 	return hv_result_to_errno(status);
964 }
965 
966 static int mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user *arg)
967 {
968 	struct mshv_vtl_set_eventfd set_eventfd;
969 	struct eventfd_ctx *eventfd, *old_eventfd;
970 
971 	if (copy_from_user(&set_eventfd, arg, sizeof(set_eventfd)))
972 		return -EFAULT;
973 	if (set_eventfd.flag >= HV_EVENT_FLAGS_COUNT)
974 		return -EINVAL;
975 
976 	eventfd = NULL;
977 	if (set_eventfd.fd >= 0) {
978 		eventfd = eventfd_ctx_fdget(set_eventfd.fd);
979 		if (IS_ERR(eventfd))
980 			return PTR_ERR(eventfd);
981 	}
982 
983 	guard(mutex)(&flag_lock);
984 	old_eventfd = READ_ONCE(flag_eventfds[set_eventfd.flag]);
985 	WRITE_ONCE(flag_eventfds[set_eventfd.flag], eventfd);
986 
987 	if (old_eventfd) {
988 		synchronize_rcu();
989 		eventfd_ctx_put(old_eventfd);
990 	}
991 
992 	return 0;
993 }
994 
995 static int mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user *arg)
996 {
997 	static DEFINE_MUTEX(vtl2_vmbus_sint_mask_mutex);
998 	struct mshv_sint_mask mask;
999 
1000 	if (copy_from_user(&mask, arg, sizeof(mask)))
1001 		return -EFAULT;
1002 	guard(mutex)(&vtl2_vmbus_sint_mask_mutex);
1003 	on_each_cpu(mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1);
1004 	WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0);
1005 	if (mask.mask)
1006 		wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
1007 
1008 	return 0;
1009 }
1010 
1011 static long mshv_vtl_sint_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1012 {
1013 	switch (cmd) {
1014 	case MSHV_SINT_POST_MESSAGE:
1015 		return mshv_vtl_sint_ioctl_post_msg((struct mshv_vtl_sint_post_msg __user *)arg);
1016 	case MSHV_SINT_SIGNAL_EVENT:
1017 		return mshv_vtl_sint_ioctl_signal_event((struct mshv_vtl_signal_event __user *)arg);
1018 	case MSHV_SINT_SET_EVENTFD:
1019 		return mshv_vtl_sint_ioctl_set_eventfd((struct mshv_vtl_set_eventfd __user *)arg);
1020 	case MSHV_SINT_PAUSE_MESSAGE_STREAM:
1021 		return mshv_vtl_sint_ioctl_pause_msg_stream((struct mshv_sint_mask __user *)arg);
1022 	default:
1023 		return -ENOIOCTLCMD;
1024 	}
1025 }
1026 
1027 static const struct file_operations mshv_vtl_sint_ops = {
1028 	.owner = THIS_MODULE,
1029 	.read = mshv_vtl_sint_read,
1030 	.poll = mshv_vtl_sint_poll,
1031 	.unlocked_ioctl = mshv_vtl_sint_ioctl,
1032 };
1033 
1034 static struct miscdevice mshv_vtl_sint_dev = {
1035 	.name = "mshv_sint",
1036 	.fops = &mshv_vtl_sint_ops,
1037 	.mode = 0600,
1038 	.minor = MISC_DYNAMIC_MINOR,
1039 };
1040 
1041 static int mshv_vtl_hvcall_dev_open(struct inode *node, struct file *f)
1042 {
1043 	struct miscdevice *dev = f->private_data;
1044 	struct mshv_vtl_hvcall_fd *fd;
1045 
1046 	if (!capable(CAP_SYS_ADMIN))
1047 		return -EPERM;
1048 
1049 	fd = vzalloc(sizeof(*fd));
1050 	if (!fd)
1051 		return -ENOMEM;
1052 	fd->dev = dev;
1053 	f->private_data = fd;
1054 	mutex_init(&fd->init_mutex);
1055 
1056 	return 0;
1057 }
1058 
1059 static int mshv_vtl_hvcall_dev_release(struct inode *node, struct file *f)
1060 {
1061 	struct mshv_vtl_hvcall_fd *fd;
1062 
1063 	fd = f->private_data;
1064 	if (fd) {
1065 		vfree(fd);
1066 		f->private_data = NULL;
1067 	}
1068 
1069 	return 0;
1070 }
1071 
1072 static int mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd *fd,
1073 				    struct mshv_vtl_hvcall_setup __user *hvcall_setup_user)
1074 {
1075 	struct mshv_vtl_hvcall_setup hvcall_setup;
1076 
1077 	guard(mutex)(&fd->init_mutex);
1078 
1079 	if (fd->allow_map_initialized) {
1080 		dev_err(fd->dev->this_device,
1081 			"Hypercall allow map has already been set, pid %d\n",
1082 			current->pid);
1083 		return -EINVAL;
1084 	}
1085 
1086 	if (copy_from_user(&hvcall_setup, hvcall_setup_user,
1087 			   sizeof(struct mshv_vtl_hvcall_setup))) {
1088 		return -EFAULT;
1089 	}
1090 	if (hvcall_setup.bitmap_array_size > ARRAY_SIZE(fd->allow_bitmap))
1091 		return -EINVAL;
1092 
1093 	if (copy_from_user(&fd->allow_bitmap,
1094 			   (void __user *)hvcall_setup.allow_bitmap_ptr,
1095 			   hvcall_setup.bitmap_array_size)) {
1096 		return -EFAULT;
1097 	}
1098 
1099 	dev_info(fd->dev->this_device, "Hypercall allow map has been set, pid %d\n",
1100 		 current->pid);
1101 	fd->allow_map_initialized = true;
1102 	return 0;
1103 }
1104 
1105 static bool mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd *fd, u16 call_code)
1106 {
1107 	return test_bit(call_code, (unsigned long *)fd->allow_bitmap);
1108 }
1109 
1110 static int mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd *fd,
1111 				struct mshv_vtl_hvcall __user *hvcall_user)
1112 {
1113 	struct mshv_vtl_hvcall hvcall;
1114 	void *in, *out;
1115 	int ret;
1116 
1117 	if (copy_from_user(&hvcall, hvcall_user, sizeof(struct mshv_vtl_hvcall)))
1118 		return -EFAULT;
1119 	if (hvcall.input_size > HV_HYP_PAGE_SIZE)
1120 		return -EINVAL;
1121 	if (hvcall.output_size > HV_HYP_PAGE_SIZE)
1122 		return -EINVAL;
1123 
1124 	/*
1125 	 * By default, all hypercalls are not allowed.
1126 	 * The user mode code has to set up the allow bitmap once.
1127 	 */
1128 
1129 	if (!mshv_vtl_hvcall_is_allowed(fd, hvcall.control & 0xFFFF)) {
1130 		dev_err(fd->dev->this_device,
1131 			"Hypercall with control data %#llx isn't allowed\n",
1132 			hvcall.control);
1133 		return -EPERM;
1134 	}
1135 
1136 	/*
1137 	 * This may create a problem for Confidential VM (CVM) usecase where we need to use
1138 	 * Hyper-V driver allocated per-cpu input and output pages (hyperv_pcpu_input_arg and
1139 	 * hyperv_pcpu_output_arg) for making a hypervisor call.
1140 	 *
1141 	 * TODO: Take care of this when CVM support is added.
1142 	 */
1143 	in = (void *)__get_free_page(GFP_KERNEL);
1144 	out = (void *)__get_free_page(GFP_KERNEL);
1145 
1146 	if (copy_from_user(in, (void __user *)hvcall.input_ptr, hvcall.input_size)) {
1147 		ret = -EFAULT;
1148 		goto free_pages;
1149 	}
1150 
1151 	hvcall.status = hv_do_hypercall(hvcall.control, in, out);
1152 
1153 	if (copy_to_user((void __user *)hvcall.output_ptr, out, hvcall.output_size)) {
1154 		ret = -EFAULT;
1155 		goto free_pages;
1156 	}
1157 	ret = put_user(hvcall.status, &hvcall_user->status);
1158 free_pages:
1159 	free_page((unsigned long)in);
1160 	free_page((unsigned long)out);
1161 
1162 	return ret;
1163 }
1164 
1165 static long mshv_vtl_hvcall_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1166 {
1167 	struct mshv_vtl_hvcall_fd *fd = f->private_data;
1168 
1169 	switch (cmd) {
1170 	case MSHV_HVCALL_SETUP:
1171 		return mshv_vtl_hvcall_do_setup(fd, (struct mshv_vtl_hvcall_setup __user *)arg);
1172 	case MSHV_HVCALL:
1173 		return mshv_vtl_hvcall_call(fd, (struct mshv_vtl_hvcall __user *)arg);
1174 	default:
1175 		break;
1176 	}
1177 
1178 	return -ENOIOCTLCMD;
1179 }
1180 
1181 static const struct file_operations mshv_vtl_hvcall_dev_file_ops = {
1182 	.owner = THIS_MODULE,
1183 	.open = mshv_vtl_hvcall_dev_open,
1184 	.release = mshv_vtl_hvcall_dev_release,
1185 	.unlocked_ioctl = mshv_vtl_hvcall_dev_ioctl,
1186 };
1187 
1188 static struct miscdevice mshv_vtl_hvcall_dev = {
1189 	.name = "mshv_hvcall",
1190 	.nodename = "mshv_hvcall",
1191 	.fops = &mshv_vtl_hvcall_dev_file_ops,
1192 	.mode = 0600,
1193 	.minor = MISC_DYNAMIC_MINOR,
1194 };
1195 
1196 static int mshv_vtl_low_open(struct inode *inodep, struct file *filp)
1197 {
1198 	pid_t pid = task_pid_vnr(current);
1199 	uid_t uid = current_uid().val;
1200 	int ret = 0;
1201 
1202 	pr_debug("%s: Opening VTL low, task group %d, uid %d\n", __func__, pid, uid);
1203 
1204 	if (capable(CAP_SYS_ADMIN)) {
1205 		filp->private_data = inodep;
1206 	} else {
1207 		pr_err("%s: VTL low open failed: CAP_SYS_ADMIN required. task group %d, uid %d",
1208 		       __func__, pid, uid);
1209 		ret = -EPERM;
1210 	}
1211 
1212 	return ret;
1213 }
1214 
1215 static bool can_fault(struct vm_fault *vmf, unsigned long size, unsigned long *pfn)
1216 {
1217 	unsigned long mask = size - 1;
1218 	unsigned long start = vmf->address & ~mask;
1219 	unsigned long end = start + size;
1220 	bool is_valid;
1221 
1222 	is_valid = (vmf->address & mask) == ((vmf->pgoff << PAGE_SHIFT) & mask) &&
1223 		start >= vmf->vma->vm_start &&
1224 		end <= vmf->vma->vm_end;
1225 
1226 	if (is_valid)
1227 		*pfn = vmf->pgoff & ~(mask >> PAGE_SHIFT);
1228 
1229 	return is_valid;
1230 }
1231 
1232 static vm_fault_t mshv_vtl_low_huge_fault(struct vm_fault *vmf, unsigned int order)
1233 {
1234 	unsigned long pfn = vmf->pgoff;
1235 	vm_fault_t ret = VM_FAULT_FALLBACK;
1236 
1237 	switch (order) {
1238 	case 0:
1239 		return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
1240 
1241 	case PMD_ORDER:
1242 		if (can_fault(vmf, PMD_SIZE, &pfn))
1243 			ret = vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
1244 		return ret;
1245 
1246 	case PUD_ORDER:
1247 		if (can_fault(vmf, PUD_SIZE, &pfn))
1248 			ret = vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
1249 		return ret;
1250 
1251 	default:
1252 		return VM_FAULT_SIGBUS;
1253 	}
1254 }
1255 
1256 static vm_fault_t mshv_vtl_low_fault(struct vm_fault *vmf)
1257 {
1258 	return mshv_vtl_low_huge_fault(vmf, 0);
1259 }
1260 
1261 static const struct vm_operations_struct mshv_vtl_low_vm_ops = {
1262 	.fault = mshv_vtl_low_fault,
1263 	.huge_fault = mshv_vtl_low_huge_fault,
1264 };
1265 
1266 static int mshv_vtl_low_mmap(struct file *filp, struct vm_area_struct *vma)
1267 {
1268 	vma->vm_ops = &mshv_vtl_low_vm_ops;
1269 	vm_flags_set(vma, VM_HUGEPAGE | VM_MIXEDMAP);
1270 
1271 	return 0;
1272 }
1273 
1274 static const struct file_operations mshv_vtl_low_file_ops = {
1275 	.owner		= THIS_MODULE,
1276 	.open		= mshv_vtl_low_open,
1277 	.mmap		= mshv_vtl_low_mmap,
1278 };
1279 
1280 static struct miscdevice mshv_vtl_low = {
1281 	.name = "mshv_vtl_low",
1282 	.nodename = "mshv_vtl_low",
1283 	.fops = &mshv_vtl_low_file_ops,
1284 	.mode = 0600,
1285 	.minor = MISC_DYNAMIC_MINOR,
1286 };
1287 
1288 static int __init mshv_vtl_init(void)
1289 {
1290 	int ret;
1291 	struct device *dev = mshv_dev.this_device;
1292 
1293 	/*
1294 	 * This creates /dev/mshv which provides functionality to create VTLs and partitions.
1295 	 */
1296 	ret = misc_register(&mshv_dev);
1297 	if (ret) {
1298 		dev_err(dev, "mshv device register failed: %d\n", ret);
1299 		goto free_dev;
1300 	}
1301 
1302 	tasklet_init(&msg_dpc, mshv_vtl_sint_on_msg_dpc, 0);
1303 	init_waitqueue_head(&fd_wait_queue);
1304 
1305 	if (mshv_vtl_get_vsm_regs()) {
1306 		dev_emerg(dev, "Unable to get VSM capabilities !!\n");
1307 		ret = -ENODEV;
1308 		goto free_dev;
1309 	}
1310 	if (mshv_vtl_configure_vsm_partition(dev)) {
1311 		dev_emerg(dev, "VSM configuration failed !!\n");
1312 		ret = -ENODEV;
1313 		goto free_dev;
1314 	}
1315 
1316 	mshv_vtl_return_call_init(mshv_vsm_page_offsets.vtl_return_offset);
1317 	ret = hv_vtl_setup_synic();
1318 	if (ret)
1319 		goto free_dev;
1320 
1321 	/*
1322 	 * mshv_sint device adds VMBus relay ioctl support.
1323 	 * This provides a channel for VTL0 to communicate with VTL2.
1324 	 */
1325 	ret = misc_register(&mshv_vtl_sint_dev);
1326 	if (ret)
1327 		goto free_synic;
1328 
1329 	/*
1330 	 * mshv_hvcall device adds interface to enable userspace for direct hypercalls support.
1331 	 */
1332 	ret = misc_register(&mshv_vtl_hvcall_dev);
1333 	if (ret)
1334 		goto free_sint;
1335 
1336 	/*
1337 	 * mshv_vtl_low device is used to map VTL0 address space to a user-mode process in VTL2.
1338 	 * It implements mmap() to allow a user-mode process in VTL2 to map to the address of VTL0.
1339 	 */
1340 	ret = misc_register(&mshv_vtl_low);
1341 	if (ret)
1342 		goto free_hvcall;
1343 
1344 	/*
1345 	 * "mshv vtl mem dev" device is later used to setup VTL0 memory.
1346 	 */
1347 	mem_dev = kzalloc_obj(*mem_dev);
1348 	if (!mem_dev) {
1349 		ret = -ENOMEM;
1350 		goto free_low;
1351 	}
1352 
1353 	mutex_init(&mshv_vtl_poll_file_lock);
1354 
1355 	device_initialize(mem_dev);
1356 	dev_set_name(mem_dev, "mshv vtl mem dev");
1357 	ret = device_add(mem_dev);
1358 	if (ret) {
1359 		dev_err(dev, "mshv vtl mem dev add: %d\n", ret);
1360 		goto free_mem;
1361 	}
1362 
1363 	return 0;
1364 
1365 free_mem:
1366 	kfree(mem_dev);
1367 free_low:
1368 	misc_deregister(&mshv_vtl_low);
1369 free_hvcall:
1370 	misc_deregister(&mshv_vtl_hvcall_dev);
1371 free_sint:
1372 	misc_deregister(&mshv_vtl_sint_dev);
1373 free_synic:
1374 	hv_vtl_remove_synic();
1375 free_dev:
1376 	misc_deregister(&mshv_dev);
1377 
1378 	return ret;
1379 }
1380 
1381 static void __exit mshv_vtl_exit(void)
1382 {
1383 	device_del(mem_dev);
1384 	kfree(mem_dev);
1385 	misc_deregister(&mshv_vtl_low);
1386 	misc_deregister(&mshv_vtl_hvcall_dev);
1387 	misc_deregister(&mshv_vtl_sint_dev);
1388 	hv_vtl_remove_synic();
1389 	misc_deregister(&mshv_dev);
1390 }
1391 
1392 module_init(mshv_vtl_init);
1393 module_exit(mshv_vtl_exit);
1394