xref: /linux/drivers/hv/mshv_vtl_main.c (revision feb06d2690bb826fd33798a99ce5cff8d07b38f9)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2023, Microsoft Corporation.
4  *
5  * Author:
6  *   Roman Kisel <romank@linux.microsoft.com>
7  *   Saurabh Sengar <ssengar@linux.microsoft.com>
8  *   Naman Jain <namjain@linux.microsoft.com>
9  */
10 
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/miscdevice.h>
14 #include <linux/anon_inodes.h>
15 #include <linux/cpuhotplug.h>
16 #include <linux/count_zeros.h>
17 #include <linux/entry-virt.h>
18 #include <linux/eventfd.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
21 #include <linux/vmalloc.h>
22 #include <asm/debugreg.h>
23 #include <asm/mshyperv.h>
24 #include <trace/events/ipi.h>
25 #include <uapi/asm/mtrr.h>
26 #include <uapi/linux/mshv.h>
27 #include <hyperv/hvhdk.h>
28 
29 #include "../../kernel/fpu/legacy.h"
30 #include "mshv.h"
31 #include "mshv_vtl.h"
32 #include "hyperv_vmbus.h"
33 
34 MODULE_AUTHOR("Microsoft");
35 MODULE_LICENSE("GPL");
36 MODULE_DESCRIPTION("Microsoft Hyper-V VTL Driver");
37 
38 #define MSHV_ENTRY_REASON_LOWER_VTL_CALL     0x1
39 #define MSHV_ENTRY_REASON_INTERRUPT          0x2
40 #define MSHV_ENTRY_REASON_INTERCEPT          0x3
41 
42 #define MSHV_REAL_OFF_SHIFT	16
43 #define MSHV_PG_OFF_CPU_MASK	(BIT_ULL(MSHV_REAL_OFF_SHIFT) - 1)
44 #define MSHV_RUN_PAGE_OFFSET	0
45 #define MSHV_REG_PAGE_OFFSET	1
46 #define VTL2_VMBUS_SINT_INDEX	7
47 
48 static struct device *mem_dev;
49 
50 static struct tasklet_struct msg_dpc;
51 static wait_queue_head_t fd_wait_queue;
52 static bool has_message;
53 static struct eventfd_ctx *flag_eventfds[HV_EVENT_FLAGS_COUNT];
54 static DEFINE_MUTEX(flag_lock);
55 static bool __read_mostly mshv_has_reg_page;
56 
57 /* hvcall code is of type u16, allocate a bitmap of size (1 << 16) to accommodate it */
58 #define MAX_BITMAP_SIZE ((U16_MAX + 1) / 8)
59 
60 struct mshv_vtl_hvcall_fd {
61 	u8 allow_bitmap[MAX_BITMAP_SIZE];
62 	bool allow_map_initialized;
63 	/*
64 	 * Used to protect hvcall setup in IOCTLs
65 	 */
66 	struct mutex init_mutex;
67 	struct miscdevice *dev;
68 };
69 
70 struct mshv_vtl_poll_file {
71 	struct file *file;
72 	wait_queue_entry_t wait;
73 	wait_queue_head_t *wqh;
74 	poll_table pt;
75 	int cpu;
76 };
77 
78 struct mshv_vtl {
79 	struct device *module_dev;
80 	u64 id;
81 };
82 
83 struct mshv_vtl_per_cpu {
84 	struct mshv_vtl_run *run;
85 	struct page *reg_page;
86 };
87 
88 /* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */
89 union hv_synic_overlay_page_msr {
90 	u64 as_uint64;
91 	struct {
92 		u64 enabled: 1;
93 		u64 reserved: 11;
94 		u64 pfn: 52;
95 	} __packed;
96 };
97 
98 static struct mutex mshv_vtl_poll_file_lock;
99 static union hv_register_vsm_page_offsets mshv_vsm_page_offsets;
100 static union hv_register_vsm_capabilities mshv_vsm_capabilities;
101 
102 static DEFINE_PER_CPU(struct mshv_vtl_poll_file, mshv_vtl_poll_file);
103 static DEFINE_PER_CPU(unsigned long long, num_vtl0_transitions);
104 static DEFINE_PER_CPU(struct mshv_vtl_per_cpu, mshv_vtl_per_cpu);
105 
106 static const union hv_input_vtl input_vtl_zero;
107 static const union hv_input_vtl input_vtl_normal = {
108 	.use_target_vtl = 1,
109 };
110 
111 static const struct file_operations mshv_vtl_fops;
112 
113 static long
mshv_ioctl_create_vtl(void __user * user_arg,struct device * module_dev)114 mshv_ioctl_create_vtl(void __user *user_arg, struct device *module_dev)
115 {
116 	struct mshv_vtl *vtl;
117 	struct file *file;
118 	int fd;
119 
120 	vtl = kzalloc(sizeof(*vtl), GFP_KERNEL);
121 	if (!vtl)
122 		return -ENOMEM;
123 
124 	fd = get_unused_fd_flags(O_CLOEXEC);
125 	if (fd < 0) {
126 		kfree(vtl);
127 		return fd;
128 	}
129 	file = anon_inode_getfile("mshv_vtl", &mshv_vtl_fops,
130 				  vtl, O_RDWR);
131 	if (IS_ERR(file)) {
132 		kfree(vtl);
133 		return PTR_ERR(file);
134 	}
135 	vtl->module_dev = module_dev;
136 	fd_install(fd, file);
137 
138 	return fd;
139 }
140 
141 static long
mshv_ioctl_check_extension(void __user * user_arg)142 mshv_ioctl_check_extension(void __user *user_arg)
143 {
144 	u32 arg;
145 
146 	if (copy_from_user(&arg, user_arg, sizeof(arg)))
147 		return -EFAULT;
148 
149 	switch (arg) {
150 	case MSHV_CAP_CORE_API_STABLE:
151 		return 0;
152 	case MSHV_CAP_REGISTER_PAGE:
153 		return mshv_has_reg_page;
154 	case MSHV_CAP_VTL_RETURN_ACTION:
155 		return mshv_vsm_capabilities.return_action_available;
156 	case MSHV_CAP_DR6_SHARED:
157 		return mshv_vsm_capabilities.dr6_shared;
158 	}
159 
160 	return -EOPNOTSUPP;
161 }
162 
163 static long
mshv_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)164 mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
165 {
166 	struct miscdevice *misc = filp->private_data;
167 
168 	switch (ioctl) {
169 	case MSHV_CHECK_EXTENSION:
170 		return mshv_ioctl_check_extension((void __user *)arg);
171 	case MSHV_CREATE_VTL:
172 		return mshv_ioctl_create_vtl((void __user *)arg, misc->this_device);
173 	}
174 
175 	return -ENOTTY;
176 }
177 
178 static const struct file_operations mshv_dev_fops = {
179 	.owner		= THIS_MODULE,
180 	.unlocked_ioctl	= mshv_dev_ioctl,
181 	.llseek		= noop_llseek,
182 };
183 
184 static struct miscdevice mshv_dev = {
185 	.minor = MISC_DYNAMIC_MINOR,
186 	.name = "mshv",
187 	.fops = &mshv_dev_fops,
188 	.mode = 0600,
189 };
190 
mshv_vtl_this_run(void)191 static struct mshv_vtl_run *mshv_vtl_this_run(void)
192 {
193 	return *this_cpu_ptr(&mshv_vtl_per_cpu.run);
194 }
195 
mshv_vtl_cpu_run(int cpu)196 static struct mshv_vtl_run *mshv_vtl_cpu_run(int cpu)
197 {
198 	return *per_cpu_ptr(&mshv_vtl_per_cpu.run, cpu);
199 }
200 
mshv_vtl_cpu_reg_page(int cpu)201 static struct page *mshv_vtl_cpu_reg_page(int cpu)
202 {
203 	return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu);
204 }
205 
mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu * per_cpu)206 static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
207 {
208 	struct hv_register_assoc reg_assoc = {};
209 	union hv_synic_overlay_page_msr overlay = {};
210 	struct page *reg_page;
211 
212 	reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL);
213 	if (!reg_page) {
214 		WARN(1, "failed to allocate register page\n");
215 		return;
216 	}
217 
218 	overlay.enabled = 1;
219 	overlay.pfn = page_to_hvpfn(reg_page);
220 	reg_assoc.name = HV_X64_REGISTER_REG_PAGE;
221 	reg_assoc.value.reg64 = overlay.as_uint64;
222 
223 	if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
224 				     1, input_vtl_zero, &reg_assoc)) {
225 		WARN(1, "failed to setup register page\n");
226 		__free_page(reg_page);
227 		return;
228 	}
229 
230 	per_cpu->reg_page = reg_page;
231 	mshv_has_reg_page = true;
232 }
233 
mshv_vtl_synic_enable_regs(unsigned int cpu)234 static void mshv_vtl_synic_enable_regs(unsigned int cpu)
235 {
236 	union hv_synic_sint sint;
237 
238 	sint.as_uint64 = 0;
239 	sint.vector = HYPERVISOR_CALLBACK_VECTOR;
240 	sint.masked = false;
241 	sint.auto_eoi = hv_recommend_using_aeoi();
242 
243 	/* Enable intercepts */
244 	if (!mshv_vsm_capabilities.intercept_page_available)
245 		hv_set_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX,
246 			   sint.as_uint64);
247 
248 	/* VTL2 Host VSP SINT is (un)masked when the user mode requests that */
249 }
250 
mshv_vtl_get_vsm_regs(void)251 static int mshv_vtl_get_vsm_regs(void)
252 {
253 	struct hv_register_assoc registers[2];
254 	int ret, count = 2;
255 
256 	registers[0].name = HV_REGISTER_VSM_CODE_PAGE_OFFSETS;
257 	registers[1].name = HV_REGISTER_VSM_CAPABILITIES;
258 
259 	ret = hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
260 				       count, input_vtl_zero, registers);
261 	if (ret)
262 		return ret;
263 
264 	mshv_vsm_page_offsets.as_uint64 = registers[0].value.reg64;
265 	mshv_vsm_capabilities.as_uint64 = registers[1].value.reg64;
266 
267 	return ret;
268 }
269 
mshv_vtl_configure_vsm_partition(struct device * dev)270 static int mshv_vtl_configure_vsm_partition(struct device *dev)
271 {
272 	union hv_register_vsm_partition_config config;
273 	struct hv_register_assoc reg_assoc;
274 
275 	config.as_uint64 = 0;
276 	config.default_vtl_protection_mask = HV_MAP_GPA_PERMISSIONS_MASK;
277 	config.enable_vtl_protection = 1;
278 	config.zero_memory_on_reset = 1;
279 	config.intercept_vp_startup = 1;
280 	config.intercept_cpuid_unimplemented = 1;
281 
282 	if (mshv_vsm_capabilities.intercept_page_available) {
283 		dev_dbg(dev, "using intercept page\n");
284 		config.intercept_page = 1;
285 	}
286 
287 	reg_assoc.name = HV_REGISTER_VSM_PARTITION_CONFIG;
288 	reg_assoc.value.reg64 = config.as_uint64;
289 
290 	return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
291 				       1, input_vtl_zero, &reg_assoc);
292 }
293 
mshv_vtl_vmbus_isr(void)294 static void mshv_vtl_vmbus_isr(void)
295 {
296 	struct hv_per_cpu_context *per_cpu;
297 	struct hv_message *msg;
298 	u32 message_type;
299 	union hv_synic_event_flags *event_flags;
300 	struct eventfd_ctx *eventfd;
301 	u16 i;
302 
303 	per_cpu = this_cpu_ptr(hv_context.cpu_context);
304 	if (smp_processor_id() == 0) {
305 		msg = (struct hv_message *)per_cpu->hyp_synic_message_page + VTL2_VMBUS_SINT_INDEX;
306 		message_type = READ_ONCE(msg->header.message_type);
307 		if (message_type != HVMSG_NONE)
308 			tasklet_schedule(&msg_dpc);
309 	}
310 
311 	event_flags = (union hv_synic_event_flags *)per_cpu->hyp_synic_event_page +
312 			VTL2_VMBUS_SINT_INDEX;
313 	for_each_set_bit(i, event_flags->flags, HV_EVENT_FLAGS_COUNT) {
314 		if (!sync_test_and_clear_bit(i, event_flags->flags))
315 			continue;
316 		rcu_read_lock();
317 		eventfd = READ_ONCE(flag_eventfds[i]);
318 		if (eventfd)
319 			eventfd_signal(eventfd);
320 		rcu_read_unlock();
321 	}
322 
323 	vmbus_isr();
324 }
325 
mshv_vtl_alloc_context(unsigned int cpu)326 static int mshv_vtl_alloc_context(unsigned int cpu)
327 {
328 	struct mshv_vtl_per_cpu *per_cpu = this_cpu_ptr(&mshv_vtl_per_cpu);
329 
330 	per_cpu->run = (struct mshv_vtl_run *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
331 	if (!per_cpu->run)
332 		return -ENOMEM;
333 
334 	if (mshv_vsm_capabilities.intercept_page_available)
335 		mshv_vtl_configure_reg_page(per_cpu);
336 
337 	mshv_vtl_synic_enable_regs(cpu);
338 
339 	return 0;
340 }
341 
342 static int mshv_vtl_cpuhp_online;
343 
hv_vtl_setup_synic(void)344 static int hv_vtl_setup_synic(void)
345 {
346 	int ret;
347 
348 	/* Use our isr to first filter out packets destined for userspace */
349 	hv_setup_vmbus_handler(mshv_vtl_vmbus_isr);
350 
351 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vtl:online",
352 				mshv_vtl_alloc_context, NULL);
353 	if (ret < 0) {
354 		hv_setup_vmbus_handler(vmbus_isr);
355 		return ret;
356 	}
357 
358 	mshv_vtl_cpuhp_online = ret;
359 
360 	return 0;
361 }
362 
hv_vtl_remove_synic(void)363 static void hv_vtl_remove_synic(void)
364 {
365 	cpuhp_remove_state(mshv_vtl_cpuhp_online);
366 	hv_setup_vmbus_handler(vmbus_isr);
367 }
368 
vtl_get_vp_register(struct hv_register_assoc * reg)369 static int vtl_get_vp_register(struct hv_register_assoc *reg)
370 {
371 	return hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
372 					1, input_vtl_normal, reg);
373 }
374 
vtl_set_vp_register(struct hv_register_assoc * reg)375 static int vtl_set_vp_register(struct hv_register_assoc *reg)
376 {
377 	return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
378 					1, input_vtl_normal, reg);
379 }
380 
mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl * vtl,void __user * arg)381 static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl *vtl, void __user *arg)
382 {
383 	struct mshv_vtl_ram_disposition vtl0_mem;
384 	struct dev_pagemap *pgmap;
385 	void *addr;
386 
387 	if (copy_from_user(&vtl0_mem, arg, sizeof(vtl0_mem)))
388 		return -EFAULT;
389 	/* vtl0_mem.last_pfn is excluded in the pagemap range for VTL0 as per design */
390 	if (vtl0_mem.last_pfn <= vtl0_mem.start_pfn) {
391 		dev_err(vtl->module_dev, "range start pfn (%llx) > end pfn (%llx)\n",
392 			vtl0_mem.start_pfn, vtl0_mem.last_pfn);
393 		return -EFAULT;
394 	}
395 
396 	pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL);
397 	if (!pgmap)
398 		return -ENOMEM;
399 
400 	pgmap->ranges[0].start = PFN_PHYS(vtl0_mem.start_pfn);
401 	pgmap->ranges[0].end = PFN_PHYS(vtl0_mem.last_pfn) - 1;
402 	pgmap->nr_range = 1;
403 	pgmap->type = MEMORY_DEVICE_GENERIC;
404 
405 	/*
406 	 * Determine the highest page order that can be used for the given memory range.
407 	 * This works best when the range is aligned; i.e. both the start and the length.
408 	 */
409 	pgmap->vmemmap_shift = count_trailing_zeros(vtl0_mem.start_pfn | vtl0_mem.last_pfn);
410 	dev_dbg(vtl->module_dev,
411 		"Add VTL0 memory: start: 0x%llx, end_pfn: 0x%llx, page order: %lu\n",
412 		vtl0_mem.start_pfn, vtl0_mem.last_pfn, pgmap->vmemmap_shift);
413 
414 	addr = devm_memremap_pages(mem_dev, pgmap);
415 	if (IS_ERR(addr)) {
416 		dev_err(vtl->module_dev, "devm_memremap_pages error: %ld\n", PTR_ERR(addr));
417 		kfree(pgmap);
418 		return -EFAULT;
419 	}
420 
421 	/* Don't free pgmap, since it has to stick around until the memory
422 	 * is unmapped, which will never happen as there is no scenario
423 	 * where VTL0 can be released/shutdown without bringing down VTL2.
424 	 */
425 	return 0;
426 }
427 
mshv_vtl_cancel(int cpu)428 static void mshv_vtl_cancel(int cpu)
429 {
430 	int here = get_cpu();
431 
432 	if (here != cpu) {
433 		if (!xchg_relaxed(&mshv_vtl_cpu_run(cpu)->cancel, 1))
434 			smp_send_reschedule(cpu);
435 	} else {
436 		WRITE_ONCE(mshv_vtl_this_run()->cancel, 1);
437 	}
438 	put_cpu();
439 }
440 
mshv_vtl_poll_file_wake(wait_queue_entry_t * wait,unsigned int mode,int sync,void * key)441 static int mshv_vtl_poll_file_wake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
442 {
443 	struct mshv_vtl_poll_file *poll_file = container_of(wait, struct mshv_vtl_poll_file, wait);
444 
445 	mshv_vtl_cancel(poll_file->cpu);
446 
447 	return 0;
448 }
449 
mshv_vtl_ptable_queue_proc(struct file * file,wait_queue_head_t * wqh,poll_table * pt)450 static void mshv_vtl_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
451 {
452 	struct mshv_vtl_poll_file *poll_file = container_of(pt, struct mshv_vtl_poll_file, pt);
453 
454 	WARN_ON(poll_file->wqh);
455 	poll_file->wqh = wqh;
456 	add_wait_queue(wqh, &poll_file->wait);
457 }
458 
mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user * user_input)459 static int mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user *user_input)
460 {
461 	struct file *file, *old_file;
462 	struct mshv_vtl_poll_file *poll_file;
463 	struct mshv_vtl_set_poll_file input;
464 
465 	if (copy_from_user(&input, user_input, sizeof(input)))
466 		return -EFAULT;
467 
468 	if (input.cpu >= num_possible_cpus() || !cpu_online(input.cpu))
469 		return -EINVAL;
470 	/*
471 	 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
472 	 * CPU is expected to remain online after above cpu_online() check.
473 	 */
474 
475 	file = NULL;
476 	file = fget(input.fd);
477 	if (!file)
478 		return -EBADFD;
479 
480 	poll_file = per_cpu_ptr(&mshv_vtl_poll_file, READ_ONCE(input.cpu));
481 	if (!poll_file)
482 		return -EINVAL;
483 
484 	mutex_lock(&mshv_vtl_poll_file_lock);
485 
486 	if (poll_file->wqh)
487 		remove_wait_queue(poll_file->wqh, &poll_file->wait);
488 	poll_file->wqh = NULL;
489 
490 	old_file = poll_file->file;
491 	poll_file->file = file;
492 	poll_file->cpu = input.cpu;
493 
494 	if (file) {
495 		init_waitqueue_func_entry(&poll_file->wait, mshv_vtl_poll_file_wake);
496 		init_poll_funcptr(&poll_file->pt, mshv_vtl_ptable_queue_proc);
497 		vfs_poll(file, &poll_file->pt);
498 	}
499 
500 	mutex_unlock(&mshv_vtl_poll_file_lock);
501 
502 	if (old_file)
503 		fput(old_file);
504 
505 	return 0;
506 }
507 
508 /* Static table mapping register names to their corresponding actions */
509 static const struct {
510 	enum hv_register_name reg_name;
511 	int debug_reg_num;  /* -1 if not a debug register */
512 	u32 msr_addr;       /* 0 if not an MSR */
513 } reg_table[] = {
514 	/* Debug registers */
515 	{HV_X64_REGISTER_DR0, 0, 0},
516 	{HV_X64_REGISTER_DR1, 1, 0},
517 	{HV_X64_REGISTER_DR2, 2, 0},
518 	{HV_X64_REGISTER_DR3, 3, 0},
519 	{HV_X64_REGISTER_DR6, 6, 0},
520 	/* MTRR MSRs */
521 	{HV_X64_REGISTER_MSR_MTRR_CAP, -1, MSR_MTRRcap},
522 	{HV_X64_REGISTER_MSR_MTRR_DEF_TYPE, -1, MSR_MTRRdefType},
523 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0, -1, MTRRphysBase_MSR(0)},
524 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1, -1, MTRRphysBase_MSR(1)},
525 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2, -1, MTRRphysBase_MSR(2)},
526 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3, -1, MTRRphysBase_MSR(3)},
527 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4, -1, MTRRphysBase_MSR(4)},
528 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5, -1, MTRRphysBase_MSR(5)},
529 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6, -1, MTRRphysBase_MSR(6)},
530 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7, -1, MTRRphysBase_MSR(7)},
531 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8, -1, MTRRphysBase_MSR(8)},
532 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9, -1, MTRRphysBase_MSR(9)},
533 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA, -1, MTRRphysBase_MSR(0xa)},
534 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB, -1, MTRRphysBase_MSR(0xb)},
535 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC, -1, MTRRphysBase_MSR(0xc)},
536 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASED, -1, MTRRphysBase_MSR(0xd)},
537 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE, -1, MTRRphysBase_MSR(0xe)},
538 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF, -1, MTRRphysBase_MSR(0xf)},
539 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0, -1, MTRRphysMask_MSR(0)},
540 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1, -1, MTRRphysMask_MSR(1)},
541 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2, -1, MTRRphysMask_MSR(2)},
542 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3, -1, MTRRphysMask_MSR(3)},
543 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4, -1, MTRRphysMask_MSR(4)},
544 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5, -1, MTRRphysMask_MSR(5)},
545 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6, -1, MTRRphysMask_MSR(6)},
546 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7, -1, MTRRphysMask_MSR(7)},
547 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8, -1, MTRRphysMask_MSR(8)},
548 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9, -1, MTRRphysMask_MSR(9)},
549 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA, -1, MTRRphysMask_MSR(0xa)},
550 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB, -1, MTRRphysMask_MSR(0xb)},
551 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC, -1, MTRRphysMask_MSR(0xc)},
552 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD, -1, MTRRphysMask_MSR(0xd)},
553 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE, -1, MTRRphysMask_MSR(0xe)},
554 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF, -1, MTRRphysMask_MSR(0xf)},
555 	{HV_X64_REGISTER_MSR_MTRR_FIX64K00000, -1, MSR_MTRRfix64K_00000},
556 	{HV_X64_REGISTER_MSR_MTRR_FIX16K80000, -1, MSR_MTRRfix16K_80000},
557 	{HV_X64_REGISTER_MSR_MTRR_FIX16KA0000, -1, MSR_MTRRfix16K_A0000},
558 	{HV_X64_REGISTER_MSR_MTRR_FIX4KC0000, -1, MSR_MTRRfix4K_C0000},
559 	{HV_X64_REGISTER_MSR_MTRR_FIX4KC8000, -1, MSR_MTRRfix4K_C8000},
560 	{HV_X64_REGISTER_MSR_MTRR_FIX4KD0000, -1, MSR_MTRRfix4K_D0000},
561 	{HV_X64_REGISTER_MSR_MTRR_FIX4KD8000, -1, MSR_MTRRfix4K_D8000},
562 	{HV_X64_REGISTER_MSR_MTRR_FIX4KE0000, -1, MSR_MTRRfix4K_E0000},
563 	{HV_X64_REGISTER_MSR_MTRR_FIX4KE8000, -1, MSR_MTRRfix4K_E8000},
564 	{HV_X64_REGISTER_MSR_MTRR_FIX4KF0000, -1, MSR_MTRRfix4K_F0000},
565 	{HV_X64_REGISTER_MSR_MTRR_FIX4KF8000, -1, MSR_MTRRfix4K_F8000},
566 };
567 
mshv_vtl_get_set_reg(struct hv_register_assoc * regs,bool set)568 static int mshv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set)
569 {
570 	u64 *reg64;
571 	enum hv_register_name gpr_name;
572 	int i;
573 
574 	gpr_name = regs->name;
575 	reg64 = &regs->value.reg64;
576 
577 	/* Search for the register in the table */
578 	for (i = 0; i < ARRAY_SIZE(reg_table); i++) {
579 		if (reg_table[i].reg_name != gpr_name)
580 			continue;
581 		if (reg_table[i].debug_reg_num != -1) {
582 			/* Handle debug registers */
583 			if (gpr_name == HV_X64_REGISTER_DR6 &&
584 			    !mshv_vsm_capabilities.dr6_shared)
585 				goto hypercall;
586 			if (set)
587 				native_set_debugreg(reg_table[i].debug_reg_num, *reg64);
588 			else
589 				*reg64 = native_get_debugreg(reg_table[i].debug_reg_num);
590 		} else {
591 			/* Handle MSRs */
592 			if (set)
593 				wrmsrl(reg_table[i].msr_addr, *reg64);
594 			else
595 				rdmsrl(reg_table[i].msr_addr, *reg64);
596 		}
597 		return 0;
598 	}
599 
600 hypercall:
601 	return 1;
602 }
603 
mshv_vtl_return(struct mshv_vtl_cpu_context * vtl0)604 static void mshv_vtl_return(struct mshv_vtl_cpu_context *vtl0)
605 {
606 	struct hv_vp_assist_page *hvp;
607 
608 	hvp = hv_vp_assist_page[smp_processor_id()];
609 
610 	/*
611 	 * Process signal event direct set in the run page, if any.
612 	 */
613 	if (mshv_vsm_capabilities.return_action_available) {
614 		u32 offset = READ_ONCE(mshv_vtl_this_run()->vtl_ret_action_size);
615 
616 		WRITE_ONCE(mshv_vtl_this_run()->vtl_ret_action_size, 0);
617 
618 		/*
619 		 * Hypervisor will take care of clearing out the actions
620 		 * set in the assist page.
621 		 */
622 		memcpy(hvp->vtl_ret_actions,
623 		       mshv_vtl_this_run()->vtl_ret_actions,
624 		       min_t(u32, offset, sizeof(hvp->vtl_ret_actions)));
625 	}
626 
627 	mshv_vtl_return_call(vtl0);
628 }
629 
mshv_vtl_process_intercept(void)630 static bool mshv_vtl_process_intercept(void)
631 {
632 	struct hv_per_cpu_context *mshv_cpu;
633 	void *synic_message_page;
634 	struct hv_message *msg;
635 	u32 message_type;
636 
637 	mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
638 	synic_message_page = mshv_cpu->hyp_synic_message_page;
639 	if (unlikely(!synic_message_page))
640 		return true;
641 
642 	msg = (struct hv_message *)synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX;
643 	message_type = READ_ONCE(msg->header.message_type);
644 	if (message_type == HVMSG_NONE)
645 		return true;
646 
647 	memcpy(mshv_vtl_this_run()->exit_message, msg, sizeof(*msg));
648 	vmbus_signal_eom(msg, message_type);
649 
650 	return false;
651 }
652 
mshv_vtl_ioctl_return_to_lower_vtl(void)653 static int mshv_vtl_ioctl_return_to_lower_vtl(void)
654 {
655 	preempt_disable();
656 	for (;;) {
657 		unsigned long irq_flags;
658 		struct hv_vp_assist_page *hvp;
659 		int ret;
660 
661 		if (__xfer_to_guest_mode_work_pending()) {
662 			preempt_enable();
663 			ret = xfer_to_guest_mode_handle_work();
664 			if (ret)
665 				return ret;
666 			preempt_disable();
667 		}
668 
669 		local_irq_save(irq_flags);
670 		if (READ_ONCE(mshv_vtl_this_run()->cancel)) {
671 			local_irq_restore(irq_flags);
672 			preempt_enable();
673 			return -EINTR;
674 		}
675 
676 		mshv_vtl_return(&mshv_vtl_this_run()->cpu_context);
677 		local_irq_restore(irq_flags);
678 
679 		hvp = hv_vp_assist_page[smp_processor_id()];
680 		this_cpu_inc(num_vtl0_transitions);
681 		switch (hvp->vtl_entry_reason) {
682 		case MSHV_ENTRY_REASON_INTERRUPT:
683 			if (!mshv_vsm_capabilities.intercept_page_available &&
684 			    likely(!mshv_vtl_process_intercept()))
685 				goto done;
686 			break;
687 
688 		case MSHV_ENTRY_REASON_INTERCEPT:
689 			WARN_ON(!mshv_vsm_capabilities.intercept_page_available);
690 			memcpy(mshv_vtl_this_run()->exit_message, hvp->intercept_message,
691 			       sizeof(hvp->intercept_message));
692 			goto done;
693 
694 		default:
695 			panic("unknown entry reason: %d", hvp->vtl_entry_reason);
696 		}
697 	}
698 
699 done:
700 	preempt_enable();
701 
702 	return 0;
703 }
704 
705 static long
mshv_vtl_ioctl_get_regs(void __user * user_args)706 mshv_vtl_ioctl_get_regs(void __user *user_args)
707 {
708 	struct mshv_vp_registers args;
709 	struct hv_register_assoc reg;
710 	long ret;
711 
712 	if (copy_from_user(&args, user_args, sizeof(args)))
713 		return -EFAULT;
714 
715 	/*  This IOCTL supports processing only one register at a time. */
716 	if (args.count != 1)
717 		return -EINVAL;
718 
719 	if (copy_from_user(&reg, (void __user *)args.regs_ptr,
720 			   sizeof(reg)))
721 		return -EFAULT;
722 
723 	ret = mshv_vtl_get_set_reg(&reg, false);
724 	if (!ret)
725 		goto copy_args; /* No need of hypercall */
726 	ret = vtl_get_vp_register(&reg);
727 	if (ret)
728 		return ret;
729 
730 copy_args:
731 	if (copy_to_user((void __user *)args.regs_ptr, &reg, sizeof(reg)))
732 		ret = -EFAULT;
733 
734 	return ret;
735 }
736 
737 static long
mshv_vtl_ioctl_set_regs(void __user * user_args)738 mshv_vtl_ioctl_set_regs(void __user *user_args)
739 {
740 	struct mshv_vp_registers args;
741 	struct hv_register_assoc reg;
742 	long ret;
743 
744 	if (copy_from_user(&args, user_args, sizeof(args)))
745 		return -EFAULT;
746 
747 	/*  This IOCTL supports processing only one register at a time. */
748 	if (args.count != 1)
749 		return -EINVAL;
750 
751 	if (copy_from_user(&reg, (void __user *)args.regs_ptr, sizeof(reg)))
752 		return -EFAULT;
753 
754 	ret = mshv_vtl_get_set_reg(&reg, true);
755 	if (!ret)
756 		return ret; /* No need of hypercall */
757 	ret = vtl_set_vp_register(&reg);
758 
759 	return ret;
760 }
761 
762 static long
mshv_vtl_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)763 mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
764 {
765 	long ret;
766 	struct mshv_vtl *vtl = filp->private_data;
767 
768 	switch (ioctl) {
769 	case MSHV_SET_POLL_FILE:
770 		ret = mshv_vtl_ioctl_set_poll_file((struct mshv_vtl_set_poll_file __user *)arg);
771 		break;
772 	case MSHV_GET_VP_REGISTERS:
773 		ret = mshv_vtl_ioctl_get_regs((void __user *)arg);
774 		break;
775 	case MSHV_SET_VP_REGISTERS:
776 		ret = mshv_vtl_ioctl_set_regs((void __user *)arg);
777 		break;
778 	case MSHV_RETURN_TO_LOWER_VTL:
779 		ret = mshv_vtl_ioctl_return_to_lower_vtl();
780 		break;
781 	case MSHV_ADD_VTL0_MEMORY:
782 		ret = mshv_vtl_ioctl_add_vtl0_mem(vtl, (void __user *)arg);
783 		break;
784 	default:
785 		dev_err(vtl->module_dev, "invalid vtl ioctl: %#x\n", ioctl);
786 		ret = -ENOTTY;
787 	}
788 
789 	return ret;
790 }
791 
mshv_vtl_fault(struct vm_fault * vmf)792 static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf)
793 {
794 	struct page *page;
795 	int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK;
796 	int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT;
797 
798 	if (!cpu_online(cpu))
799 		return VM_FAULT_SIGBUS;
800 	/*
801 	 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
802 	 * CPU is expected to remain online after above cpu_online() check.
803 	 */
804 
805 	if (real_off == MSHV_RUN_PAGE_OFFSET) {
806 		page = virt_to_page(mshv_vtl_cpu_run(cpu));
807 	} else if (real_off == MSHV_REG_PAGE_OFFSET) {
808 		if (!mshv_has_reg_page)
809 			return VM_FAULT_SIGBUS;
810 		page = mshv_vtl_cpu_reg_page(cpu);
811 	} else {
812 		return VM_FAULT_NOPAGE;
813 	}
814 
815 	get_page(page);
816 	vmf->page = page;
817 
818 	return 0;
819 }
820 
821 static const struct vm_operations_struct mshv_vtl_vm_ops = {
822 	.fault = mshv_vtl_fault,
823 };
824 
mshv_vtl_mmap(struct file * filp,struct vm_area_struct * vma)825 static int mshv_vtl_mmap(struct file *filp, struct vm_area_struct *vma)
826 {
827 	vma->vm_ops = &mshv_vtl_vm_ops;
828 
829 	return 0;
830 }
831 
mshv_vtl_release(struct inode * inode,struct file * filp)832 static int mshv_vtl_release(struct inode *inode, struct file *filp)
833 {
834 	struct mshv_vtl *vtl = filp->private_data;
835 
836 	kfree(vtl);
837 
838 	return 0;
839 }
840 
841 static const struct file_operations mshv_vtl_fops = {
842 	.owner = THIS_MODULE,
843 	.unlocked_ioctl = mshv_vtl_ioctl,
844 	.release = mshv_vtl_release,
845 	.mmap = mshv_vtl_mmap,
846 };
847 
mshv_vtl_synic_mask_vmbus_sint(const u8 * mask)848 static void mshv_vtl_synic_mask_vmbus_sint(const u8 *mask)
849 {
850 	union hv_synic_sint sint;
851 
852 	sint.as_uint64 = 0;
853 	sint.vector = HYPERVISOR_CALLBACK_VECTOR;
854 	sint.masked = (*mask != 0);
855 	sint.auto_eoi = hv_recommend_using_aeoi();
856 
857 	hv_set_msr(HV_MSR_SINT0 + VTL2_VMBUS_SINT_INDEX,
858 		   sint.as_uint64);
859 
860 	if (!sint.masked)
861 		pr_debug("%s: Unmasking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
862 	else
863 		pr_debug("%s: Masking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
864 }
865 
mshv_vtl_read_remote(void * buffer)866 static void mshv_vtl_read_remote(void *buffer)
867 {
868 	struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
869 	struct hv_message *msg = (struct hv_message *)mshv_cpu->hyp_synic_message_page +
870 					VTL2_VMBUS_SINT_INDEX;
871 	u32 message_type = READ_ONCE(msg->header.message_type);
872 
873 	WRITE_ONCE(has_message, false);
874 	if (message_type == HVMSG_NONE)
875 		return;
876 
877 	memcpy(buffer, msg, sizeof(*msg));
878 	vmbus_signal_eom(msg, message_type);
879 }
880 
881 static bool vtl_synic_mask_vmbus_sint_masked = true;
882 
mshv_vtl_sint_read(struct file * filp,char __user * arg,size_t size,loff_t * offset)883 static ssize_t mshv_vtl_sint_read(struct file *filp, char __user *arg, size_t size, loff_t *offset)
884 {
885 	struct hv_message msg = {};
886 	int ret;
887 
888 	if (size < sizeof(msg))
889 		return -EINVAL;
890 
891 	for (;;) {
892 		smp_call_function_single(VMBUS_CONNECT_CPU, mshv_vtl_read_remote, &msg, true);
893 		if (msg.header.message_type != HVMSG_NONE)
894 			break;
895 
896 		if (READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
897 			return 0; /* EOF */
898 
899 		if (filp->f_flags & O_NONBLOCK)
900 			return -EAGAIN;
901 
902 		ret = wait_event_interruptible(fd_wait_queue,
903 					       READ_ONCE(has_message) ||
904 						READ_ONCE(vtl_synic_mask_vmbus_sint_masked));
905 		if (ret)
906 			return ret;
907 	}
908 
909 	if (copy_to_user(arg, &msg, sizeof(msg)))
910 		return -EFAULT;
911 
912 	return sizeof(msg);
913 }
914 
mshv_vtl_sint_poll(struct file * filp,poll_table * wait)915 static __poll_t mshv_vtl_sint_poll(struct file *filp, poll_table *wait)
916 {
917 	__poll_t mask = 0;
918 
919 	poll_wait(filp, &fd_wait_queue, wait);
920 	if (READ_ONCE(has_message) || READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
921 		mask |= EPOLLIN | EPOLLRDNORM;
922 
923 	return mask;
924 }
925 
mshv_vtl_sint_on_msg_dpc(unsigned long data)926 static void mshv_vtl_sint_on_msg_dpc(unsigned long data)
927 {
928 	WRITE_ONCE(has_message, true);
929 	wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
930 }
931 
mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user * arg)932 static int mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user *arg)
933 {
934 	struct mshv_vtl_sint_post_msg message;
935 	u8 payload[HV_MESSAGE_PAYLOAD_BYTE_COUNT];
936 
937 	if (copy_from_user(&message, arg, sizeof(message)))
938 		return -EFAULT;
939 	if (message.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
940 		return -EINVAL;
941 	if (copy_from_user(payload, (void __user *)message.payload_ptr,
942 			   message.payload_size))
943 		return -EFAULT;
944 
945 	return hv_post_message((union hv_connection_id)message.connection_id,
946 			       message.message_type, (void *)payload,
947 			       message.payload_size);
948 }
949 
mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user * arg)950 static int mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user *arg)
951 {
952 	u64 input, status;
953 	struct mshv_vtl_signal_event signal_event;
954 
955 	if (copy_from_user(&signal_event, arg, sizeof(signal_event)))
956 		return -EFAULT;
957 
958 	input = signal_event.connection_id | ((u64)signal_event.flag << 32);
959 
960 	status = hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, input);
961 
962 	return hv_result_to_errno(status);
963 }
964 
mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user * arg)965 static int mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user *arg)
966 {
967 	struct mshv_vtl_set_eventfd set_eventfd;
968 	struct eventfd_ctx *eventfd, *old_eventfd;
969 
970 	if (copy_from_user(&set_eventfd, arg, sizeof(set_eventfd)))
971 		return -EFAULT;
972 	if (set_eventfd.flag >= HV_EVENT_FLAGS_COUNT)
973 		return -EINVAL;
974 
975 	eventfd = NULL;
976 	if (set_eventfd.fd >= 0) {
977 		eventfd = eventfd_ctx_fdget(set_eventfd.fd);
978 		if (IS_ERR(eventfd))
979 			return PTR_ERR(eventfd);
980 	}
981 
982 	guard(mutex)(&flag_lock);
983 	old_eventfd = READ_ONCE(flag_eventfds[set_eventfd.flag]);
984 	WRITE_ONCE(flag_eventfds[set_eventfd.flag], eventfd);
985 
986 	if (old_eventfd) {
987 		synchronize_rcu();
988 		eventfd_ctx_put(old_eventfd);
989 	}
990 
991 	return 0;
992 }
993 
mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user * arg)994 static int mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user *arg)
995 {
996 	static DEFINE_MUTEX(vtl2_vmbus_sint_mask_mutex);
997 	struct mshv_sint_mask mask;
998 
999 	if (copy_from_user(&mask, arg, sizeof(mask)))
1000 		return -EFAULT;
1001 	guard(mutex)(&vtl2_vmbus_sint_mask_mutex);
1002 	on_each_cpu((smp_call_func_t)mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1);
1003 	WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0);
1004 	if (mask.mask)
1005 		wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
1006 
1007 	return 0;
1008 }
1009 
mshv_vtl_sint_ioctl(struct file * f,unsigned int cmd,unsigned long arg)1010 static long mshv_vtl_sint_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1011 {
1012 	switch (cmd) {
1013 	case MSHV_SINT_POST_MESSAGE:
1014 		return mshv_vtl_sint_ioctl_post_msg((struct mshv_vtl_sint_post_msg __user *)arg);
1015 	case MSHV_SINT_SIGNAL_EVENT:
1016 		return mshv_vtl_sint_ioctl_signal_event((struct mshv_vtl_signal_event __user *)arg);
1017 	case MSHV_SINT_SET_EVENTFD:
1018 		return mshv_vtl_sint_ioctl_set_eventfd((struct mshv_vtl_set_eventfd __user *)arg);
1019 	case MSHV_SINT_PAUSE_MESSAGE_STREAM:
1020 		return mshv_vtl_sint_ioctl_pause_msg_stream((struct mshv_sint_mask __user *)arg);
1021 	default:
1022 		return -ENOIOCTLCMD;
1023 	}
1024 }
1025 
1026 static const struct file_operations mshv_vtl_sint_ops = {
1027 	.owner = THIS_MODULE,
1028 	.read = mshv_vtl_sint_read,
1029 	.poll = mshv_vtl_sint_poll,
1030 	.unlocked_ioctl = mshv_vtl_sint_ioctl,
1031 };
1032 
1033 static struct miscdevice mshv_vtl_sint_dev = {
1034 	.name = "mshv_sint",
1035 	.fops = &mshv_vtl_sint_ops,
1036 	.mode = 0600,
1037 	.minor = MISC_DYNAMIC_MINOR,
1038 };
1039 
mshv_vtl_hvcall_dev_open(struct inode * node,struct file * f)1040 static int mshv_vtl_hvcall_dev_open(struct inode *node, struct file *f)
1041 {
1042 	struct miscdevice *dev = f->private_data;
1043 	struct mshv_vtl_hvcall_fd *fd;
1044 
1045 	if (!capable(CAP_SYS_ADMIN))
1046 		return -EPERM;
1047 
1048 	fd = vzalloc(sizeof(*fd));
1049 	if (!fd)
1050 		return -ENOMEM;
1051 	fd->dev = dev;
1052 	f->private_data = fd;
1053 	mutex_init(&fd->init_mutex);
1054 
1055 	return 0;
1056 }
1057 
mshv_vtl_hvcall_dev_release(struct inode * node,struct file * f)1058 static int mshv_vtl_hvcall_dev_release(struct inode *node, struct file *f)
1059 {
1060 	struct mshv_vtl_hvcall_fd *fd;
1061 
1062 	fd = f->private_data;
1063 	if (fd) {
1064 		vfree(fd);
1065 		f->private_data = NULL;
1066 	}
1067 
1068 	return 0;
1069 }
1070 
mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd * fd,struct mshv_vtl_hvcall_setup __user * hvcall_setup_user)1071 static int mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd *fd,
1072 				    struct mshv_vtl_hvcall_setup __user *hvcall_setup_user)
1073 {
1074 	struct mshv_vtl_hvcall_setup hvcall_setup;
1075 
1076 	guard(mutex)(&fd->init_mutex);
1077 
1078 	if (fd->allow_map_initialized) {
1079 		dev_err(fd->dev->this_device,
1080 			"Hypercall allow map has already been set, pid %d\n",
1081 			current->pid);
1082 		return -EINVAL;
1083 	}
1084 
1085 	if (copy_from_user(&hvcall_setup, hvcall_setup_user,
1086 			   sizeof(struct mshv_vtl_hvcall_setup))) {
1087 		return -EFAULT;
1088 	}
1089 	if (hvcall_setup.bitmap_array_size > ARRAY_SIZE(fd->allow_bitmap))
1090 		return -EINVAL;
1091 
1092 	if (copy_from_user(&fd->allow_bitmap,
1093 			   (void __user *)hvcall_setup.allow_bitmap_ptr,
1094 			   hvcall_setup.bitmap_array_size)) {
1095 		return -EFAULT;
1096 	}
1097 
1098 	dev_info(fd->dev->this_device, "Hypercall allow map has been set, pid %d\n",
1099 		 current->pid);
1100 	fd->allow_map_initialized = true;
1101 	return 0;
1102 }
1103 
mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd * fd,u16 call_code)1104 static bool mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd *fd, u16 call_code)
1105 {
1106 	return test_bit(call_code, (unsigned long *)fd->allow_bitmap);
1107 }
1108 
mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd * fd,struct mshv_vtl_hvcall __user * hvcall_user)1109 static int mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd *fd,
1110 				struct mshv_vtl_hvcall __user *hvcall_user)
1111 {
1112 	struct mshv_vtl_hvcall hvcall;
1113 	void *in, *out;
1114 	int ret;
1115 
1116 	if (copy_from_user(&hvcall, hvcall_user, sizeof(struct mshv_vtl_hvcall)))
1117 		return -EFAULT;
1118 	if (hvcall.input_size > HV_HYP_PAGE_SIZE)
1119 		return -EINVAL;
1120 	if (hvcall.output_size > HV_HYP_PAGE_SIZE)
1121 		return -EINVAL;
1122 
1123 	/*
1124 	 * By default, all hypercalls are not allowed.
1125 	 * The user mode code has to set up the allow bitmap once.
1126 	 */
1127 
1128 	if (!mshv_vtl_hvcall_is_allowed(fd, hvcall.control & 0xFFFF)) {
1129 		dev_err(fd->dev->this_device,
1130 			"Hypercall with control data %#llx isn't allowed\n",
1131 			hvcall.control);
1132 		return -EPERM;
1133 	}
1134 
1135 	/*
1136 	 * This may create a problem for Confidential VM (CVM) usecase where we need to use
1137 	 * Hyper-V driver allocated per-cpu input and output pages (hyperv_pcpu_input_arg and
1138 	 * hyperv_pcpu_output_arg) for making a hypervisor call.
1139 	 *
1140 	 * TODO: Take care of this when CVM support is added.
1141 	 */
1142 	in = (void *)__get_free_page(GFP_KERNEL);
1143 	out = (void *)__get_free_page(GFP_KERNEL);
1144 
1145 	if (copy_from_user(in, (void __user *)hvcall.input_ptr, hvcall.input_size)) {
1146 		ret = -EFAULT;
1147 		goto free_pages;
1148 	}
1149 
1150 	hvcall.status = hv_do_hypercall(hvcall.control, in, out);
1151 
1152 	if (copy_to_user((void __user *)hvcall.output_ptr, out, hvcall.output_size)) {
1153 		ret = -EFAULT;
1154 		goto free_pages;
1155 	}
1156 	ret = put_user(hvcall.status, &hvcall_user->status);
1157 free_pages:
1158 	free_page((unsigned long)in);
1159 	free_page((unsigned long)out);
1160 
1161 	return ret;
1162 }
1163 
mshv_vtl_hvcall_dev_ioctl(struct file * f,unsigned int cmd,unsigned long arg)1164 static long mshv_vtl_hvcall_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1165 {
1166 	struct mshv_vtl_hvcall_fd *fd = f->private_data;
1167 
1168 	switch (cmd) {
1169 	case MSHV_HVCALL_SETUP:
1170 		return mshv_vtl_hvcall_do_setup(fd, (struct mshv_vtl_hvcall_setup __user *)arg);
1171 	case MSHV_HVCALL:
1172 		return mshv_vtl_hvcall_call(fd, (struct mshv_vtl_hvcall __user *)arg);
1173 	default:
1174 		break;
1175 	}
1176 
1177 	return -ENOIOCTLCMD;
1178 }
1179 
1180 static const struct file_operations mshv_vtl_hvcall_dev_file_ops = {
1181 	.owner = THIS_MODULE,
1182 	.open = mshv_vtl_hvcall_dev_open,
1183 	.release = mshv_vtl_hvcall_dev_release,
1184 	.unlocked_ioctl = mshv_vtl_hvcall_dev_ioctl,
1185 };
1186 
1187 static struct miscdevice mshv_vtl_hvcall_dev = {
1188 	.name = "mshv_hvcall",
1189 	.nodename = "mshv_hvcall",
1190 	.fops = &mshv_vtl_hvcall_dev_file_ops,
1191 	.mode = 0600,
1192 	.minor = MISC_DYNAMIC_MINOR,
1193 };
1194 
mshv_vtl_low_open(struct inode * inodep,struct file * filp)1195 static int mshv_vtl_low_open(struct inode *inodep, struct file *filp)
1196 {
1197 	pid_t pid = task_pid_vnr(current);
1198 	uid_t uid = current_uid().val;
1199 	int ret = 0;
1200 
1201 	pr_debug("%s: Opening VTL low, task group %d, uid %d\n", __func__, pid, uid);
1202 
1203 	if (capable(CAP_SYS_ADMIN)) {
1204 		filp->private_data = inodep;
1205 	} else {
1206 		pr_err("%s: VTL low open failed: CAP_SYS_ADMIN required. task group %d, uid %d",
1207 		       __func__, pid, uid);
1208 		ret = -EPERM;
1209 	}
1210 
1211 	return ret;
1212 }
1213 
can_fault(struct vm_fault * vmf,unsigned long size,unsigned long * pfn)1214 static bool can_fault(struct vm_fault *vmf, unsigned long size, unsigned long *pfn)
1215 {
1216 	unsigned long mask = size - 1;
1217 	unsigned long start = vmf->address & ~mask;
1218 	unsigned long end = start + size;
1219 	bool is_valid;
1220 
1221 	is_valid = (vmf->address & mask) == ((vmf->pgoff << PAGE_SHIFT) & mask) &&
1222 		start >= vmf->vma->vm_start &&
1223 		end <= vmf->vma->vm_end;
1224 
1225 	if (is_valid)
1226 		*pfn = vmf->pgoff & ~(mask >> PAGE_SHIFT);
1227 
1228 	return is_valid;
1229 }
1230 
mshv_vtl_low_huge_fault(struct vm_fault * vmf,unsigned int order)1231 static vm_fault_t mshv_vtl_low_huge_fault(struct vm_fault *vmf, unsigned int order)
1232 {
1233 	unsigned long pfn = vmf->pgoff;
1234 	vm_fault_t ret = VM_FAULT_FALLBACK;
1235 
1236 	switch (order) {
1237 	case 0:
1238 		return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
1239 
1240 	case PMD_ORDER:
1241 		if (can_fault(vmf, PMD_SIZE, &pfn))
1242 			ret = vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
1243 		return ret;
1244 
1245 	case PUD_ORDER:
1246 		if (can_fault(vmf, PUD_SIZE, &pfn))
1247 			ret = vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
1248 		return ret;
1249 
1250 	default:
1251 		return VM_FAULT_SIGBUS;
1252 	}
1253 }
1254 
mshv_vtl_low_fault(struct vm_fault * vmf)1255 static vm_fault_t mshv_vtl_low_fault(struct vm_fault *vmf)
1256 {
1257 	return mshv_vtl_low_huge_fault(vmf, 0);
1258 }
1259 
1260 static const struct vm_operations_struct mshv_vtl_low_vm_ops = {
1261 	.fault = mshv_vtl_low_fault,
1262 	.huge_fault = mshv_vtl_low_huge_fault,
1263 };
1264 
mshv_vtl_low_mmap(struct file * filp,struct vm_area_struct * vma)1265 static int mshv_vtl_low_mmap(struct file *filp, struct vm_area_struct *vma)
1266 {
1267 	vma->vm_ops = &mshv_vtl_low_vm_ops;
1268 	vm_flags_set(vma, VM_HUGEPAGE | VM_MIXEDMAP);
1269 
1270 	return 0;
1271 }
1272 
1273 static const struct file_operations mshv_vtl_low_file_ops = {
1274 	.owner		= THIS_MODULE,
1275 	.open		= mshv_vtl_low_open,
1276 	.mmap		= mshv_vtl_low_mmap,
1277 };
1278 
1279 static struct miscdevice mshv_vtl_low = {
1280 	.name = "mshv_vtl_low",
1281 	.nodename = "mshv_vtl_low",
1282 	.fops = &mshv_vtl_low_file_ops,
1283 	.mode = 0600,
1284 	.minor = MISC_DYNAMIC_MINOR,
1285 };
1286 
mshv_vtl_init(void)1287 static int __init mshv_vtl_init(void)
1288 {
1289 	int ret;
1290 	struct device *dev = mshv_dev.this_device;
1291 
1292 	/*
1293 	 * This creates /dev/mshv which provides functionality to create VTLs and partitions.
1294 	 */
1295 	ret = misc_register(&mshv_dev);
1296 	if (ret) {
1297 		dev_err(dev, "mshv device register failed: %d\n", ret);
1298 		goto free_dev;
1299 	}
1300 
1301 	tasklet_init(&msg_dpc, mshv_vtl_sint_on_msg_dpc, 0);
1302 	init_waitqueue_head(&fd_wait_queue);
1303 
1304 	if (mshv_vtl_get_vsm_regs()) {
1305 		dev_emerg(dev, "Unable to get VSM capabilities !!\n");
1306 		ret = -ENODEV;
1307 		goto free_dev;
1308 	}
1309 	if (mshv_vtl_configure_vsm_partition(dev)) {
1310 		dev_emerg(dev, "VSM configuration failed !!\n");
1311 		ret = -ENODEV;
1312 		goto free_dev;
1313 	}
1314 
1315 	mshv_vtl_return_call_init(mshv_vsm_page_offsets.vtl_return_offset);
1316 	ret = hv_vtl_setup_synic();
1317 	if (ret)
1318 		goto free_dev;
1319 
1320 	/*
1321 	 * mshv_sint device adds VMBus relay ioctl support.
1322 	 * This provides a channel for VTL0 to communicate with VTL2.
1323 	 */
1324 	ret = misc_register(&mshv_vtl_sint_dev);
1325 	if (ret)
1326 		goto free_synic;
1327 
1328 	/*
1329 	 * mshv_hvcall device adds interface to enable userspace for direct hypercalls support.
1330 	 */
1331 	ret = misc_register(&mshv_vtl_hvcall_dev);
1332 	if (ret)
1333 		goto free_sint;
1334 
1335 	/*
1336 	 * mshv_vtl_low device is used to map VTL0 address space to a user-mode process in VTL2.
1337 	 * It implements mmap() to allow a user-mode process in VTL2 to map to the address of VTL0.
1338 	 */
1339 	ret = misc_register(&mshv_vtl_low);
1340 	if (ret)
1341 		goto free_hvcall;
1342 
1343 	/*
1344 	 * "mshv vtl mem dev" device is later used to setup VTL0 memory.
1345 	 */
1346 	mem_dev = kzalloc(sizeof(*mem_dev), GFP_KERNEL);
1347 	if (!mem_dev) {
1348 		ret = -ENOMEM;
1349 		goto free_low;
1350 	}
1351 
1352 	mutex_init(&mshv_vtl_poll_file_lock);
1353 
1354 	device_initialize(mem_dev);
1355 	dev_set_name(mem_dev, "mshv vtl mem dev");
1356 	ret = device_add(mem_dev);
1357 	if (ret) {
1358 		dev_err(dev, "mshv vtl mem dev add: %d\n", ret);
1359 		goto free_mem;
1360 	}
1361 
1362 	return 0;
1363 
1364 free_mem:
1365 	kfree(mem_dev);
1366 free_low:
1367 	misc_deregister(&mshv_vtl_low);
1368 free_hvcall:
1369 	misc_deregister(&mshv_vtl_hvcall_dev);
1370 free_sint:
1371 	misc_deregister(&mshv_vtl_sint_dev);
1372 free_synic:
1373 	hv_vtl_remove_synic();
1374 free_dev:
1375 	misc_deregister(&mshv_dev);
1376 
1377 	return ret;
1378 }
1379 
mshv_vtl_exit(void)1380 static void __exit mshv_vtl_exit(void)
1381 {
1382 	device_del(mem_dev);
1383 	kfree(mem_dev);
1384 	misc_deregister(&mshv_vtl_low);
1385 	misc_deregister(&mshv_vtl_hvcall_dev);
1386 	misc_deregister(&mshv_vtl_sint_dev);
1387 	hv_vtl_remove_synic();
1388 	misc_deregister(&mshv_dev);
1389 }
1390 
1391 module_init(mshv_vtl_init);
1392 module_exit(mshv_vtl_exit);
1393