xref: /linux/drivers/hv/mshv_vtl_main.c (revision 8fd12b03c7c888303c3c45559d8c3e270a916f9f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2023, Microsoft Corporation.
4  *
5  * Author:
6  *   Roman Kisel <romank@linux.microsoft.com>
7  *   Saurabh Sengar <ssengar@linux.microsoft.com>
8  *   Naman Jain <namjain@linux.microsoft.com>
9  */
10 
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/miscdevice.h>
14 #include <linux/anon_inodes.h>
15 #include <linux/cpuhotplug.h>
16 #include <linux/count_zeros.h>
17 #include <linux/entry-virt.h>
18 #include <linux/eventfd.h>
19 #include <linux/poll.h>
20 #include <linux/file.h>
21 #include <linux/vmalloc.h>
22 #include <asm/debugreg.h>
23 #include <asm/mshyperv.h>
24 #include <trace/events/ipi.h>
25 #include <uapi/asm/mtrr.h>
26 #include <uapi/linux/mshv.h>
27 #include <hyperv/hvhdk.h>
28 
29 #include "../../kernel/fpu/legacy.h"
30 #include "mshv.h"
31 #include "mshv_vtl.h"
32 #include "hyperv_vmbus.h"
33 
34 MODULE_AUTHOR("Microsoft");
35 MODULE_LICENSE("GPL");
36 MODULE_DESCRIPTION("Microsoft Hyper-V VTL Driver");
37 
38 #define MSHV_ENTRY_REASON_LOWER_VTL_CALL     0x1
39 #define MSHV_ENTRY_REASON_INTERRUPT          0x2
40 #define MSHV_ENTRY_REASON_INTERCEPT          0x3
41 
42 #define MSHV_REAL_OFF_SHIFT	16
43 #define MSHV_PG_OFF_CPU_MASK	(BIT_ULL(MSHV_REAL_OFF_SHIFT) - 1)
44 #define MSHV_RUN_PAGE_OFFSET	0
45 #define MSHV_REG_PAGE_OFFSET	1
46 #define VTL2_VMBUS_SINT_INDEX	7
47 
48 static struct device *mem_dev;
49 
50 static struct tasklet_struct msg_dpc;
51 static wait_queue_head_t fd_wait_queue;
52 static bool has_message;
53 static struct eventfd_ctx *flag_eventfds[HV_EVENT_FLAGS_COUNT];
54 static DEFINE_MUTEX(flag_lock);
55 static bool __read_mostly mshv_has_reg_page;
56 
57 /* hvcall code is of type u16, allocate a bitmap of size (1 << 16) to accommodate it */
58 #define MAX_BITMAP_SIZE ((U16_MAX + 1) / 8)
59 
60 struct mshv_vtl_hvcall_fd {
61 	u8 allow_bitmap[MAX_BITMAP_SIZE];
62 	bool allow_map_initialized;
63 	/*
64 	 * Used to protect hvcall setup in IOCTLs
65 	 */
66 	struct mutex init_mutex;
67 	struct miscdevice *dev;
68 };
69 
70 struct mshv_vtl_poll_file {
71 	struct file *file;
72 	wait_queue_entry_t wait;
73 	wait_queue_head_t *wqh;
74 	poll_table pt;
75 	int cpu;
76 };
77 
78 struct mshv_vtl {
79 	struct device *module_dev;
80 	u64 id;
81 };
82 
83 struct mshv_vtl_per_cpu {
84 	struct mshv_vtl_run *run;
85 	struct page *reg_page;
86 };
87 
88 /* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */
89 union hv_synic_overlay_page_msr {
90 	u64 as_uint64;
91 	struct {
92 		u64 enabled: 1;
93 		u64 reserved: 11;
94 		u64 pfn: 52;
95 	} __packed;
96 };
97 
98 static struct mutex mshv_vtl_poll_file_lock;
99 static union hv_register_vsm_page_offsets mshv_vsm_page_offsets;
100 static union hv_register_vsm_capabilities mshv_vsm_capabilities;
101 
102 static DEFINE_PER_CPU(struct mshv_vtl_poll_file, mshv_vtl_poll_file);
103 static DEFINE_PER_CPU(unsigned long long, num_vtl0_transitions);
104 static DEFINE_PER_CPU(struct mshv_vtl_per_cpu, mshv_vtl_per_cpu);
105 
106 static const union hv_input_vtl input_vtl_zero;
107 static const union hv_input_vtl input_vtl_normal = {
108 	.use_target_vtl = 1,
109 };
110 
111 static const struct file_operations mshv_vtl_fops;
112 
113 static long
mshv_ioctl_create_vtl(void __user * user_arg,struct device * module_dev)114 mshv_ioctl_create_vtl(void __user *user_arg, struct device *module_dev)
115 {
116 	struct mshv_vtl *vtl;
117 	struct file *file;
118 	int fd;
119 
120 	vtl = kzalloc_obj(*vtl);
121 	if (!vtl)
122 		return -ENOMEM;
123 
124 	fd = get_unused_fd_flags(O_CLOEXEC);
125 	if (fd < 0) {
126 		kfree(vtl);
127 		return fd;
128 	}
129 	file = anon_inode_getfile("mshv_vtl", &mshv_vtl_fops,
130 				  vtl, O_RDWR);
131 	if (IS_ERR(file)) {
132 		kfree(vtl);
133 		return PTR_ERR(file);
134 	}
135 	vtl->module_dev = module_dev;
136 	fd_install(fd, file);
137 
138 	return fd;
139 }
140 
141 static long
mshv_ioctl_check_extension(void __user * user_arg)142 mshv_ioctl_check_extension(void __user *user_arg)
143 {
144 	u32 arg;
145 
146 	if (copy_from_user(&arg, user_arg, sizeof(arg)))
147 		return -EFAULT;
148 
149 	switch (arg) {
150 	case MSHV_CAP_CORE_API_STABLE:
151 		return 0;
152 	case MSHV_CAP_REGISTER_PAGE:
153 		return mshv_has_reg_page;
154 	case MSHV_CAP_VTL_RETURN_ACTION:
155 		return mshv_vsm_capabilities.return_action_available;
156 	case MSHV_CAP_DR6_SHARED:
157 		return mshv_vsm_capabilities.dr6_shared;
158 	}
159 
160 	return -EOPNOTSUPP;
161 }
162 
163 static long
mshv_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)164 mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
165 {
166 	struct miscdevice *misc = filp->private_data;
167 
168 	switch (ioctl) {
169 	case MSHV_CHECK_EXTENSION:
170 		return mshv_ioctl_check_extension((void __user *)arg);
171 	case MSHV_CREATE_VTL:
172 		return mshv_ioctl_create_vtl((void __user *)arg, misc->this_device);
173 	}
174 
175 	return -ENOTTY;
176 }
177 
178 static const struct file_operations mshv_dev_fops = {
179 	.owner		= THIS_MODULE,
180 	.unlocked_ioctl	= mshv_dev_ioctl,
181 	.llseek		= noop_llseek,
182 };
183 
184 static struct miscdevice mshv_dev = {
185 	.minor = MISC_DYNAMIC_MINOR,
186 	.name = "mshv",
187 	.fops = &mshv_dev_fops,
188 	.mode = 0600,
189 };
190 
mshv_vtl_this_run(void)191 static struct mshv_vtl_run *mshv_vtl_this_run(void)
192 {
193 	return *this_cpu_ptr(&mshv_vtl_per_cpu.run);
194 }
195 
mshv_vtl_cpu_run(int cpu)196 static struct mshv_vtl_run *mshv_vtl_cpu_run(int cpu)
197 {
198 	return *per_cpu_ptr(&mshv_vtl_per_cpu.run, cpu);
199 }
200 
mshv_vtl_cpu_reg_page(int cpu)201 static struct page *mshv_vtl_cpu_reg_page(int cpu)
202 {
203 	return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu);
204 }
205 
mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu * per_cpu)206 static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
207 {
208 	struct hv_register_assoc reg_assoc = {};
209 	union hv_synic_overlay_page_msr overlay = {};
210 	struct page *reg_page;
211 
212 	reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL);
213 	if (!reg_page) {
214 		WARN(1, "failed to allocate register page\n");
215 		return;
216 	}
217 
218 	overlay.enabled = 1;
219 	overlay.pfn = page_to_hvpfn(reg_page);
220 	reg_assoc.name = HV_X64_REGISTER_REG_PAGE;
221 	reg_assoc.value.reg64 = overlay.as_uint64;
222 
223 	if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
224 				     1, input_vtl_zero, &reg_assoc)) {
225 		WARN(1, "failed to setup register page\n");
226 		__free_page(reg_page);
227 		return;
228 	}
229 
230 	per_cpu->reg_page = reg_page;
231 	mshv_has_reg_page = true;
232 }
233 
mshv_vtl_synic_enable_regs(unsigned int cpu)234 static void mshv_vtl_synic_enable_regs(unsigned int cpu)
235 {
236 	union hv_synic_sint sint;
237 
238 	sint.as_uint64 = 0;
239 	sint.vector = HYPERVISOR_CALLBACK_VECTOR;
240 	sint.masked = false;
241 	sint.auto_eoi = hv_recommend_using_aeoi();
242 
243 	/* Enable intercepts */
244 	if (!mshv_vsm_capabilities.intercept_page_available)
245 		hv_set_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX,
246 			   sint.as_uint64);
247 
248 	/* VTL2 Host VSP SINT is (un)masked when the user mode requests that */
249 }
250 
mshv_vtl_get_vsm_regs(void)251 static int mshv_vtl_get_vsm_regs(void)
252 {
253 	struct hv_register_assoc registers[2];
254 	int ret, count = 2;
255 
256 	registers[0].name = HV_REGISTER_VSM_CODE_PAGE_OFFSETS;
257 	registers[1].name = HV_REGISTER_VSM_CAPABILITIES;
258 
259 	ret = hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
260 				       count, input_vtl_zero, registers);
261 	if (ret)
262 		return ret;
263 
264 	mshv_vsm_page_offsets.as_uint64 = registers[0].value.reg64;
265 	mshv_vsm_capabilities.as_uint64 = registers[1].value.reg64;
266 
267 	return ret;
268 }
269 
mshv_vtl_configure_vsm_partition(struct device * dev)270 static int mshv_vtl_configure_vsm_partition(struct device *dev)
271 {
272 	union hv_register_vsm_partition_config config;
273 	struct hv_register_assoc reg_assoc;
274 
275 	config.as_uint64 = 0;
276 	config.default_vtl_protection_mask = HV_MAP_GPA_PERMISSIONS_MASK;
277 	config.enable_vtl_protection = 1;
278 	config.zero_memory_on_reset = 1;
279 	config.intercept_vp_startup = 1;
280 	config.intercept_cpuid_unimplemented = 1;
281 
282 	if (mshv_vsm_capabilities.intercept_page_available) {
283 		dev_dbg(dev, "using intercept page\n");
284 		config.intercept_page = 1;
285 	}
286 
287 	reg_assoc.name = HV_REGISTER_VSM_PARTITION_CONFIG;
288 	reg_assoc.value.reg64 = config.as_uint64;
289 
290 	return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
291 				       1, input_vtl_zero, &reg_assoc);
292 }
293 
mshv_vtl_vmbus_isr(void)294 static void mshv_vtl_vmbus_isr(void)
295 {
296 	struct hv_per_cpu_context *per_cpu;
297 	struct hv_message *msg;
298 	u32 message_type;
299 	union hv_synic_event_flags *event_flags;
300 	struct eventfd_ctx *eventfd;
301 	u16 i;
302 
303 	per_cpu = this_cpu_ptr(hv_context.cpu_context);
304 	if (smp_processor_id() == 0) {
305 		msg = (struct hv_message *)per_cpu->hyp_synic_message_page + VTL2_VMBUS_SINT_INDEX;
306 		message_type = READ_ONCE(msg->header.message_type);
307 		if (message_type != HVMSG_NONE)
308 			tasklet_schedule(&msg_dpc);
309 	}
310 
311 	event_flags = (union hv_synic_event_flags *)per_cpu->hyp_synic_event_page +
312 			VTL2_VMBUS_SINT_INDEX;
313 	for_each_set_bit(i, event_flags->flags, HV_EVENT_FLAGS_COUNT) {
314 		if (!sync_test_and_clear_bit(i, event_flags->flags))
315 			continue;
316 		rcu_read_lock();
317 		eventfd = READ_ONCE(flag_eventfds[i]);
318 		if (eventfd)
319 			eventfd_signal(eventfd);
320 		rcu_read_unlock();
321 	}
322 
323 	vmbus_isr();
324 }
325 
mshv_vtl_alloc_context(unsigned int cpu)326 static int mshv_vtl_alloc_context(unsigned int cpu)
327 {
328 	struct mshv_vtl_per_cpu *per_cpu = this_cpu_ptr(&mshv_vtl_per_cpu);
329 
330 	per_cpu->run = (struct mshv_vtl_run *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
331 	if (!per_cpu->run)
332 		return -ENOMEM;
333 
334 	if (mshv_vsm_capabilities.intercept_page_available)
335 		mshv_vtl_configure_reg_page(per_cpu);
336 
337 	mshv_vtl_synic_enable_regs(cpu);
338 
339 	return 0;
340 }
341 
342 static int mshv_vtl_cpuhp_online;
343 
hv_vtl_setup_synic(void)344 static int hv_vtl_setup_synic(void)
345 {
346 	int ret;
347 
348 	/* Use our isr to first filter out packets destined for userspace */
349 	hv_setup_vmbus_handler(mshv_vtl_vmbus_isr);
350 
351 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vtl:online",
352 				mshv_vtl_alloc_context, NULL);
353 	if (ret < 0) {
354 		hv_setup_vmbus_handler(vmbus_isr);
355 		return ret;
356 	}
357 
358 	mshv_vtl_cpuhp_online = ret;
359 
360 	return 0;
361 }
362 
hv_vtl_remove_synic(void)363 static void hv_vtl_remove_synic(void)
364 {
365 	cpuhp_remove_state(mshv_vtl_cpuhp_online);
366 	hv_setup_vmbus_handler(vmbus_isr);
367 }
368 
vtl_get_vp_register(struct hv_register_assoc * reg)369 static int vtl_get_vp_register(struct hv_register_assoc *reg)
370 {
371 	return hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
372 					1, input_vtl_normal, reg);
373 }
374 
vtl_set_vp_register(struct hv_register_assoc * reg)375 static int vtl_set_vp_register(struct hv_register_assoc *reg)
376 {
377 	return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
378 					1, input_vtl_normal, reg);
379 }
380 
mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl * vtl,void __user * arg)381 static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl *vtl, void __user *arg)
382 {
383 	struct mshv_vtl_ram_disposition vtl0_mem;
384 	struct dev_pagemap *pgmap;
385 	void *addr;
386 
387 	if (copy_from_user(&vtl0_mem, arg, sizeof(vtl0_mem)))
388 		return -EFAULT;
389 	if (vtl0_mem.last_pfn <= vtl0_mem.start_pfn) {
390 		dev_err(vtl->module_dev, "range start pfn (%llx) > end pfn (%llx)\n",
391 			vtl0_mem.start_pfn, vtl0_mem.last_pfn);
392 		return -EFAULT;
393 	}
394 
395 	pgmap = kzalloc_obj(*pgmap);
396 	if (!pgmap)
397 		return -ENOMEM;
398 
399 	/*
400 	 * vtl0_mem.last_pfn is excluded in the pagemap range for VTL0 as per design.
401 	 * last_pfn is not reserved or wasted, and reflects 'start_pfn + size' of pagemap range.
402 	 */
403 	pgmap->ranges[0].start = PFN_PHYS(vtl0_mem.start_pfn);
404 	pgmap->ranges[0].end = PFN_PHYS(vtl0_mem.last_pfn) - 1;
405 	pgmap->nr_range = 1;
406 	pgmap->type = MEMORY_DEVICE_GENERIC;
407 
408 	/*
409 	 * Determine the highest page order that can be used for the given memory range.
410 	 * This works best when the range is aligned; i.e. both the start and the length.
411 	 * Clamp to MAX_FOLIO_ORDER to avoid a WARN in memremap_pages() when the range
412 	 * alignment exceeds the maximum supported folio order for this kernel config.
413 	 */
414 	pgmap->vmemmap_shift = min(count_trailing_zeros(vtl0_mem.start_pfn | vtl0_mem.last_pfn),
415 				   MAX_FOLIO_ORDER);
416 	dev_dbg(vtl->module_dev,
417 		"Add VTL0 memory: start: 0x%llx, end_pfn: 0x%llx, page order: %lu\n",
418 		vtl0_mem.start_pfn, vtl0_mem.last_pfn, pgmap->vmemmap_shift);
419 
420 	addr = devm_memremap_pages(mem_dev, pgmap);
421 	if (IS_ERR(addr)) {
422 		dev_err(vtl->module_dev, "devm_memremap_pages error: %ld\n", PTR_ERR(addr));
423 		kfree(pgmap);
424 		return PTR_ERR(addr);
425 	}
426 
427 	/* Don't free pgmap, since it has to stick around until the memory
428 	 * is unmapped, which will never happen as there is no scenario
429 	 * where VTL0 can be released/shutdown without bringing down VTL2.
430 	 */
431 	return 0;
432 }
433 
mshv_vtl_cancel(int cpu)434 static void mshv_vtl_cancel(int cpu)
435 {
436 	int here = get_cpu();
437 
438 	if (here != cpu) {
439 		if (!xchg_relaxed(&mshv_vtl_cpu_run(cpu)->cancel, 1))
440 			smp_send_reschedule(cpu);
441 	} else {
442 		WRITE_ONCE(mshv_vtl_this_run()->cancel, 1);
443 	}
444 	put_cpu();
445 }
446 
mshv_vtl_poll_file_wake(wait_queue_entry_t * wait,unsigned int mode,int sync,void * key)447 static int mshv_vtl_poll_file_wake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
448 {
449 	struct mshv_vtl_poll_file *poll_file = container_of(wait, struct mshv_vtl_poll_file, wait);
450 
451 	mshv_vtl_cancel(poll_file->cpu);
452 
453 	return 0;
454 }
455 
mshv_vtl_ptable_queue_proc(struct file * file,wait_queue_head_t * wqh,poll_table * pt)456 static void mshv_vtl_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
457 {
458 	struct mshv_vtl_poll_file *poll_file = container_of(pt, struct mshv_vtl_poll_file, pt);
459 
460 	WARN_ON(poll_file->wqh);
461 	poll_file->wqh = wqh;
462 	add_wait_queue(wqh, &poll_file->wait);
463 }
464 
mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user * user_input)465 static int mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user *user_input)
466 {
467 	struct file *file, *old_file;
468 	struct mshv_vtl_poll_file *poll_file;
469 	struct mshv_vtl_set_poll_file input;
470 
471 	if (copy_from_user(&input, user_input, sizeof(input)))
472 		return -EFAULT;
473 
474 	if (input.cpu >= num_possible_cpus() || !cpu_online(input.cpu))
475 		return -EINVAL;
476 	/*
477 	 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
478 	 * CPU is expected to remain online after above cpu_online() check.
479 	 */
480 
481 	file = NULL;
482 	file = fget(input.fd);
483 	if (!file)
484 		return -EBADFD;
485 
486 	poll_file = per_cpu_ptr(&mshv_vtl_poll_file, READ_ONCE(input.cpu));
487 	if (!poll_file)
488 		return -EINVAL;
489 
490 	mutex_lock(&mshv_vtl_poll_file_lock);
491 
492 	if (poll_file->wqh)
493 		remove_wait_queue(poll_file->wqh, &poll_file->wait);
494 	poll_file->wqh = NULL;
495 
496 	old_file = poll_file->file;
497 	poll_file->file = file;
498 	poll_file->cpu = input.cpu;
499 
500 	if (file) {
501 		init_waitqueue_func_entry(&poll_file->wait, mshv_vtl_poll_file_wake);
502 		init_poll_funcptr(&poll_file->pt, mshv_vtl_ptable_queue_proc);
503 		vfs_poll(file, &poll_file->pt);
504 	}
505 
506 	mutex_unlock(&mshv_vtl_poll_file_lock);
507 
508 	if (old_file)
509 		fput(old_file);
510 
511 	return 0;
512 }
513 
514 /* Static table mapping register names to their corresponding actions */
515 static const struct {
516 	enum hv_register_name reg_name;
517 	int debug_reg_num;  /* -1 if not a debug register */
518 	u32 msr_addr;       /* 0 if not an MSR */
519 } reg_table[] = {
520 	/* Debug registers */
521 	{HV_X64_REGISTER_DR0, 0, 0},
522 	{HV_X64_REGISTER_DR1, 1, 0},
523 	{HV_X64_REGISTER_DR2, 2, 0},
524 	{HV_X64_REGISTER_DR3, 3, 0},
525 	{HV_X64_REGISTER_DR6, 6, 0},
526 	/* MTRR MSRs */
527 	{HV_X64_REGISTER_MSR_MTRR_CAP, -1, MSR_MTRRcap},
528 	{HV_X64_REGISTER_MSR_MTRR_DEF_TYPE, -1, MSR_MTRRdefType},
529 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0, -1, MTRRphysBase_MSR(0)},
530 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1, -1, MTRRphysBase_MSR(1)},
531 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2, -1, MTRRphysBase_MSR(2)},
532 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3, -1, MTRRphysBase_MSR(3)},
533 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4, -1, MTRRphysBase_MSR(4)},
534 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5, -1, MTRRphysBase_MSR(5)},
535 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6, -1, MTRRphysBase_MSR(6)},
536 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7, -1, MTRRphysBase_MSR(7)},
537 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8, -1, MTRRphysBase_MSR(8)},
538 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9, -1, MTRRphysBase_MSR(9)},
539 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA, -1, MTRRphysBase_MSR(0xa)},
540 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB, -1, MTRRphysBase_MSR(0xb)},
541 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC, -1, MTRRphysBase_MSR(0xc)},
542 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASED, -1, MTRRphysBase_MSR(0xd)},
543 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE, -1, MTRRphysBase_MSR(0xe)},
544 	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF, -1, MTRRphysBase_MSR(0xf)},
545 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0, -1, MTRRphysMask_MSR(0)},
546 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1, -1, MTRRphysMask_MSR(1)},
547 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2, -1, MTRRphysMask_MSR(2)},
548 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3, -1, MTRRphysMask_MSR(3)},
549 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4, -1, MTRRphysMask_MSR(4)},
550 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5, -1, MTRRphysMask_MSR(5)},
551 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6, -1, MTRRphysMask_MSR(6)},
552 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7, -1, MTRRphysMask_MSR(7)},
553 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8, -1, MTRRphysMask_MSR(8)},
554 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9, -1, MTRRphysMask_MSR(9)},
555 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA, -1, MTRRphysMask_MSR(0xa)},
556 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB, -1, MTRRphysMask_MSR(0xb)},
557 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC, -1, MTRRphysMask_MSR(0xc)},
558 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD, -1, MTRRphysMask_MSR(0xd)},
559 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE, -1, MTRRphysMask_MSR(0xe)},
560 	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF, -1, MTRRphysMask_MSR(0xf)},
561 	{HV_X64_REGISTER_MSR_MTRR_FIX64K00000, -1, MSR_MTRRfix64K_00000},
562 	{HV_X64_REGISTER_MSR_MTRR_FIX16K80000, -1, MSR_MTRRfix16K_80000},
563 	{HV_X64_REGISTER_MSR_MTRR_FIX16KA0000, -1, MSR_MTRRfix16K_A0000},
564 	{HV_X64_REGISTER_MSR_MTRR_FIX4KC0000, -1, MSR_MTRRfix4K_C0000},
565 	{HV_X64_REGISTER_MSR_MTRR_FIX4KC8000, -1, MSR_MTRRfix4K_C8000},
566 	{HV_X64_REGISTER_MSR_MTRR_FIX4KD0000, -1, MSR_MTRRfix4K_D0000},
567 	{HV_X64_REGISTER_MSR_MTRR_FIX4KD8000, -1, MSR_MTRRfix4K_D8000},
568 	{HV_X64_REGISTER_MSR_MTRR_FIX4KE0000, -1, MSR_MTRRfix4K_E0000},
569 	{HV_X64_REGISTER_MSR_MTRR_FIX4KE8000, -1, MSR_MTRRfix4K_E8000},
570 	{HV_X64_REGISTER_MSR_MTRR_FIX4KF0000, -1, MSR_MTRRfix4K_F0000},
571 	{HV_X64_REGISTER_MSR_MTRR_FIX4KF8000, -1, MSR_MTRRfix4K_F8000},
572 };
573 
mshv_vtl_get_set_reg(struct hv_register_assoc * regs,bool set)574 static int mshv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set)
575 {
576 	u64 *reg64;
577 	enum hv_register_name gpr_name;
578 	int i;
579 
580 	gpr_name = regs->name;
581 	reg64 = &regs->value.reg64;
582 
583 	/* Search for the register in the table */
584 	for (i = 0; i < ARRAY_SIZE(reg_table); i++) {
585 		if (reg_table[i].reg_name != gpr_name)
586 			continue;
587 		if (reg_table[i].debug_reg_num != -1) {
588 			/* Handle debug registers */
589 			if (gpr_name == HV_X64_REGISTER_DR6 &&
590 			    !mshv_vsm_capabilities.dr6_shared)
591 				goto hypercall;
592 			if (set)
593 				native_set_debugreg(reg_table[i].debug_reg_num, *reg64);
594 			else
595 				*reg64 = native_get_debugreg(reg_table[i].debug_reg_num);
596 		} else {
597 			/* Handle MSRs */
598 			if (set)
599 				wrmsrl(reg_table[i].msr_addr, *reg64);
600 			else
601 				rdmsrl(reg_table[i].msr_addr, *reg64);
602 		}
603 		return 0;
604 	}
605 
606 hypercall:
607 	return 1;
608 }
609 
mshv_vtl_return(struct mshv_vtl_cpu_context * vtl0)610 static void mshv_vtl_return(struct mshv_vtl_cpu_context *vtl0)
611 {
612 	struct hv_vp_assist_page *hvp;
613 
614 	hvp = hv_vp_assist_page[smp_processor_id()];
615 
616 	/*
617 	 * Process signal event direct set in the run page, if any.
618 	 */
619 	if (mshv_vsm_capabilities.return_action_available) {
620 		u32 offset = READ_ONCE(mshv_vtl_this_run()->vtl_ret_action_size);
621 
622 		WRITE_ONCE(mshv_vtl_this_run()->vtl_ret_action_size, 0);
623 
624 		/*
625 		 * Hypervisor will take care of clearing out the actions
626 		 * set in the assist page.
627 		 */
628 		memcpy(hvp->vtl_ret_actions,
629 		       mshv_vtl_this_run()->vtl_ret_actions,
630 		       min_t(u32, offset, sizeof(hvp->vtl_ret_actions)));
631 	}
632 
633 	mshv_vtl_return_call(vtl0);
634 }
635 
mshv_vtl_process_intercept(void)636 static bool mshv_vtl_process_intercept(void)
637 {
638 	struct hv_per_cpu_context *mshv_cpu;
639 	void *synic_message_page;
640 	struct hv_message *msg;
641 	u32 message_type;
642 
643 	mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
644 	synic_message_page = mshv_cpu->hyp_synic_message_page;
645 	if (unlikely(!synic_message_page))
646 		return true;
647 
648 	msg = (struct hv_message *)synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX;
649 	message_type = READ_ONCE(msg->header.message_type);
650 	if (message_type == HVMSG_NONE)
651 		return true;
652 
653 	memcpy(mshv_vtl_this_run()->exit_message, msg, sizeof(*msg));
654 	vmbus_signal_eom(msg, message_type);
655 
656 	return false;
657 }
658 
mshv_vtl_ioctl_return_to_lower_vtl(void)659 static int mshv_vtl_ioctl_return_to_lower_vtl(void)
660 {
661 	preempt_disable();
662 	for (;;) {
663 		unsigned long irq_flags;
664 		struct hv_vp_assist_page *hvp;
665 		int ret;
666 
667 		if (__xfer_to_guest_mode_work_pending()) {
668 			preempt_enable();
669 			ret = xfer_to_guest_mode_handle_work();
670 			if (ret)
671 				return ret;
672 			preempt_disable();
673 		}
674 
675 		local_irq_save(irq_flags);
676 		if (READ_ONCE(mshv_vtl_this_run()->cancel)) {
677 			local_irq_restore(irq_flags);
678 			preempt_enable();
679 			return -EINTR;
680 		}
681 
682 		mshv_vtl_return(&mshv_vtl_this_run()->cpu_context);
683 		local_irq_restore(irq_flags);
684 
685 		hvp = hv_vp_assist_page[smp_processor_id()];
686 		this_cpu_inc(num_vtl0_transitions);
687 		switch (hvp->vtl_entry_reason) {
688 		case MSHV_ENTRY_REASON_INTERRUPT:
689 			if (!mshv_vsm_capabilities.intercept_page_available &&
690 			    likely(!mshv_vtl_process_intercept()))
691 				goto done;
692 			break;
693 
694 		case MSHV_ENTRY_REASON_INTERCEPT:
695 			WARN_ON(!mshv_vsm_capabilities.intercept_page_available);
696 			memcpy(mshv_vtl_this_run()->exit_message, hvp->intercept_message,
697 			       sizeof(hvp->intercept_message));
698 			goto done;
699 
700 		default:
701 			panic("unknown entry reason: %d", hvp->vtl_entry_reason);
702 		}
703 	}
704 
705 done:
706 	preempt_enable();
707 
708 	return 0;
709 }
710 
711 static long
mshv_vtl_ioctl_get_regs(void __user * user_args)712 mshv_vtl_ioctl_get_regs(void __user *user_args)
713 {
714 	struct mshv_vp_registers args;
715 	struct hv_register_assoc reg;
716 	long ret;
717 
718 	if (copy_from_user(&args, user_args, sizeof(args)))
719 		return -EFAULT;
720 
721 	/*  This IOCTL supports processing only one register at a time. */
722 	if (args.count != 1)
723 		return -EINVAL;
724 
725 	if (copy_from_user(&reg, (void __user *)args.regs_ptr,
726 			   sizeof(reg)))
727 		return -EFAULT;
728 
729 	ret = mshv_vtl_get_set_reg(&reg, false);
730 	if (!ret)
731 		goto copy_args; /* No need of hypercall */
732 	ret = vtl_get_vp_register(&reg);
733 	if (ret)
734 		return ret;
735 
736 copy_args:
737 	if (copy_to_user((void __user *)args.regs_ptr, &reg, sizeof(reg)))
738 		ret = -EFAULT;
739 
740 	return ret;
741 }
742 
743 static long
mshv_vtl_ioctl_set_regs(void __user * user_args)744 mshv_vtl_ioctl_set_regs(void __user *user_args)
745 {
746 	struct mshv_vp_registers args;
747 	struct hv_register_assoc reg;
748 	long ret;
749 
750 	if (copy_from_user(&args, user_args, sizeof(args)))
751 		return -EFAULT;
752 
753 	/*  This IOCTL supports processing only one register at a time. */
754 	if (args.count != 1)
755 		return -EINVAL;
756 
757 	if (copy_from_user(&reg, (void __user *)args.regs_ptr, sizeof(reg)))
758 		return -EFAULT;
759 
760 	ret = mshv_vtl_get_set_reg(&reg, true);
761 	if (!ret)
762 		return ret; /* No need of hypercall */
763 	ret = vtl_set_vp_register(&reg);
764 
765 	return ret;
766 }
767 
768 static long
mshv_vtl_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)769 mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
770 {
771 	long ret;
772 	struct mshv_vtl *vtl = filp->private_data;
773 
774 	switch (ioctl) {
775 	case MSHV_SET_POLL_FILE:
776 		ret = mshv_vtl_ioctl_set_poll_file((struct mshv_vtl_set_poll_file __user *)arg);
777 		break;
778 	case MSHV_GET_VP_REGISTERS:
779 		ret = mshv_vtl_ioctl_get_regs((void __user *)arg);
780 		break;
781 	case MSHV_SET_VP_REGISTERS:
782 		ret = mshv_vtl_ioctl_set_regs((void __user *)arg);
783 		break;
784 	case MSHV_RETURN_TO_LOWER_VTL:
785 		ret = mshv_vtl_ioctl_return_to_lower_vtl();
786 		break;
787 	case MSHV_ADD_VTL0_MEMORY:
788 		ret = mshv_vtl_ioctl_add_vtl0_mem(vtl, (void __user *)arg);
789 		break;
790 	default:
791 		dev_err(vtl->module_dev, "invalid vtl ioctl: %#x\n", ioctl);
792 		ret = -ENOTTY;
793 	}
794 
795 	return ret;
796 }
797 
mshv_vtl_fault(struct vm_fault * vmf)798 static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf)
799 {
800 	struct page *page;
801 	int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK;
802 	int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT;
803 
804 	if (!cpu_online(cpu))
805 		return VM_FAULT_SIGBUS;
806 	/*
807 	 * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
808 	 * CPU is expected to remain online after above cpu_online() check.
809 	 */
810 
811 	if (real_off == MSHV_RUN_PAGE_OFFSET) {
812 		page = virt_to_page(mshv_vtl_cpu_run(cpu));
813 	} else if (real_off == MSHV_REG_PAGE_OFFSET) {
814 		if (!mshv_has_reg_page)
815 			return VM_FAULT_SIGBUS;
816 		page = mshv_vtl_cpu_reg_page(cpu);
817 	} else {
818 		return VM_FAULT_NOPAGE;
819 	}
820 
821 	get_page(page);
822 	vmf->page = page;
823 
824 	return 0;
825 }
826 
827 static const struct vm_operations_struct mshv_vtl_vm_ops = {
828 	.fault = mshv_vtl_fault,
829 };
830 
mshv_vtl_mmap(struct file * filp,struct vm_area_struct * vma)831 static int mshv_vtl_mmap(struct file *filp, struct vm_area_struct *vma)
832 {
833 	vma->vm_ops = &mshv_vtl_vm_ops;
834 
835 	return 0;
836 }
837 
mshv_vtl_release(struct inode * inode,struct file * filp)838 static int mshv_vtl_release(struct inode *inode, struct file *filp)
839 {
840 	struct mshv_vtl *vtl = filp->private_data;
841 
842 	kfree(vtl);
843 
844 	return 0;
845 }
846 
847 static const struct file_operations mshv_vtl_fops = {
848 	.owner = THIS_MODULE,
849 	.unlocked_ioctl = mshv_vtl_ioctl,
850 	.release = mshv_vtl_release,
851 	.mmap = mshv_vtl_mmap,
852 };
853 
mshv_vtl_synic_mask_vmbus_sint(void * info)854 static void mshv_vtl_synic_mask_vmbus_sint(void *info)
855 {
856 	union hv_synic_sint sint;
857 	const u8 *mask = info;
858 
859 	sint.as_uint64 = 0;
860 	sint.vector = HYPERVISOR_CALLBACK_VECTOR;
861 	sint.masked = (*mask != 0);
862 	sint.auto_eoi = hv_recommend_using_aeoi();
863 
864 	hv_set_msr(HV_MSR_SINT0 + VTL2_VMBUS_SINT_INDEX,
865 		   sint.as_uint64);
866 
867 	if (!sint.masked)
868 		pr_debug("%s: Unmasking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
869 	else
870 		pr_debug("%s: Masking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
871 }
872 
mshv_vtl_read_remote(void * buffer)873 static void mshv_vtl_read_remote(void *buffer)
874 {
875 	struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
876 	struct hv_message *msg = (struct hv_message *)mshv_cpu->hyp_synic_message_page +
877 					VTL2_VMBUS_SINT_INDEX;
878 	u32 message_type = READ_ONCE(msg->header.message_type);
879 
880 	WRITE_ONCE(has_message, false);
881 	if (message_type == HVMSG_NONE)
882 		return;
883 
884 	memcpy(buffer, msg, sizeof(*msg));
885 	vmbus_signal_eom(msg, message_type);
886 }
887 
888 static bool vtl_synic_mask_vmbus_sint_masked = true;
889 
mshv_vtl_sint_read(struct file * filp,char __user * arg,size_t size,loff_t * offset)890 static ssize_t mshv_vtl_sint_read(struct file *filp, char __user *arg, size_t size, loff_t *offset)
891 {
892 	struct hv_message msg = {};
893 	int ret;
894 
895 	if (size < sizeof(msg))
896 		return -EINVAL;
897 
898 	for (;;) {
899 		smp_call_function_single(VMBUS_CONNECT_CPU, mshv_vtl_read_remote, &msg, true);
900 		if (msg.header.message_type != HVMSG_NONE)
901 			break;
902 
903 		if (READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
904 			return 0; /* EOF */
905 
906 		if (filp->f_flags & O_NONBLOCK)
907 			return -EAGAIN;
908 
909 		ret = wait_event_interruptible(fd_wait_queue,
910 					       READ_ONCE(has_message) ||
911 						READ_ONCE(vtl_synic_mask_vmbus_sint_masked));
912 		if (ret)
913 			return ret;
914 	}
915 
916 	if (copy_to_user(arg, &msg, sizeof(msg)))
917 		return -EFAULT;
918 
919 	return sizeof(msg);
920 }
921 
mshv_vtl_sint_poll(struct file * filp,poll_table * wait)922 static __poll_t mshv_vtl_sint_poll(struct file *filp, poll_table *wait)
923 {
924 	__poll_t mask = 0;
925 
926 	poll_wait(filp, &fd_wait_queue, wait);
927 	if (READ_ONCE(has_message) || READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
928 		mask |= EPOLLIN | EPOLLRDNORM;
929 
930 	return mask;
931 }
932 
mshv_vtl_sint_on_msg_dpc(unsigned long data)933 static void mshv_vtl_sint_on_msg_dpc(unsigned long data)
934 {
935 	WRITE_ONCE(has_message, true);
936 	wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
937 }
938 
mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user * arg)939 static int mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user *arg)
940 {
941 	struct mshv_vtl_sint_post_msg message;
942 	u8 payload[HV_MESSAGE_PAYLOAD_BYTE_COUNT];
943 
944 	if (copy_from_user(&message, arg, sizeof(message)))
945 		return -EFAULT;
946 	if (message.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
947 		return -EINVAL;
948 	if (copy_from_user(payload, (void __user *)message.payload_ptr,
949 			   message.payload_size))
950 		return -EFAULT;
951 
952 	return hv_post_message((union hv_connection_id)message.connection_id,
953 			       message.message_type, (void *)payload,
954 			       message.payload_size);
955 }
956 
mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user * arg)957 static int mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user *arg)
958 {
959 	u64 input, status;
960 	struct mshv_vtl_signal_event signal_event;
961 
962 	if (copy_from_user(&signal_event, arg, sizeof(signal_event)))
963 		return -EFAULT;
964 
965 	input = signal_event.connection_id | ((u64)signal_event.flag << 32);
966 
967 	status = hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, input);
968 
969 	return hv_result_to_errno(status);
970 }
971 
mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user * arg)972 static int mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user *arg)
973 {
974 	struct mshv_vtl_set_eventfd set_eventfd;
975 	struct eventfd_ctx *eventfd, *old_eventfd;
976 
977 	if (copy_from_user(&set_eventfd, arg, sizeof(set_eventfd)))
978 		return -EFAULT;
979 	if (set_eventfd.flag >= HV_EVENT_FLAGS_COUNT)
980 		return -EINVAL;
981 
982 	eventfd = NULL;
983 	if (set_eventfd.fd >= 0) {
984 		eventfd = eventfd_ctx_fdget(set_eventfd.fd);
985 		if (IS_ERR(eventfd))
986 			return PTR_ERR(eventfd);
987 	}
988 
989 	guard(mutex)(&flag_lock);
990 	old_eventfd = READ_ONCE(flag_eventfds[set_eventfd.flag]);
991 	WRITE_ONCE(flag_eventfds[set_eventfd.flag], eventfd);
992 
993 	if (old_eventfd) {
994 		synchronize_rcu();
995 		eventfd_ctx_put(old_eventfd);
996 	}
997 
998 	return 0;
999 }
1000 
mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user * arg)1001 static int mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user *arg)
1002 {
1003 	static DEFINE_MUTEX(vtl2_vmbus_sint_mask_mutex);
1004 	struct mshv_sint_mask mask;
1005 
1006 	if (copy_from_user(&mask, arg, sizeof(mask)))
1007 		return -EFAULT;
1008 	guard(mutex)(&vtl2_vmbus_sint_mask_mutex);
1009 	on_each_cpu(mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1);
1010 	WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0);
1011 	if (mask.mask)
1012 		wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
1013 
1014 	return 0;
1015 }
1016 
mshv_vtl_sint_ioctl(struct file * f,unsigned int cmd,unsigned long arg)1017 static long mshv_vtl_sint_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1018 {
1019 	switch (cmd) {
1020 	case MSHV_SINT_POST_MESSAGE:
1021 		return mshv_vtl_sint_ioctl_post_msg((struct mshv_vtl_sint_post_msg __user *)arg);
1022 	case MSHV_SINT_SIGNAL_EVENT:
1023 		return mshv_vtl_sint_ioctl_signal_event((struct mshv_vtl_signal_event __user *)arg);
1024 	case MSHV_SINT_SET_EVENTFD:
1025 		return mshv_vtl_sint_ioctl_set_eventfd((struct mshv_vtl_set_eventfd __user *)arg);
1026 	case MSHV_SINT_PAUSE_MESSAGE_STREAM:
1027 		return mshv_vtl_sint_ioctl_pause_msg_stream((struct mshv_sint_mask __user *)arg);
1028 	default:
1029 		return -ENOIOCTLCMD;
1030 	}
1031 }
1032 
1033 static const struct file_operations mshv_vtl_sint_ops = {
1034 	.owner = THIS_MODULE,
1035 	.read = mshv_vtl_sint_read,
1036 	.poll = mshv_vtl_sint_poll,
1037 	.unlocked_ioctl = mshv_vtl_sint_ioctl,
1038 };
1039 
1040 static struct miscdevice mshv_vtl_sint_dev = {
1041 	.name = "mshv_sint",
1042 	.fops = &mshv_vtl_sint_ops,
1043 	.mode = 0600,
1044 	.minor = MISC_DYNAMIC_MINOR,
1045 };
1046 
mshv_vtl_hvcall_dev_open(struct inode * node,struct file * f)1047 static int mshv_vtl_hvcall_dev_open(struct inode *node, struct file *f)
1048 {
1049 	struct miscdevice *dev = f->private_data;
1050 	struct mshv_vtl_hvcall_fd *fd;
1051 
1052 	if (!capable(CAP_SYS_ADMIN))
1053 		return -EPERM;
1054 
1055 	fd = vzalloc(sizeof(*fd));
1056 	if (!fd)
1057 		return -ENOMEM;
1058 	fd->dev = dev;
1059 	f->private_data = fd;
1060 	mutex_init(&fd->init_mutex);
1061 
1062 	return 0;
1063 }
1064 
mshv_vtl_hvcall_dev_release(struct inode * node,struct file * f)1065 static int mshv_vtl_hvcall_dev_release(struct inode *node, struct file *f)
1066 {
1067 	struct mshv_vtl_hvcall_fd *fd;
1068 
1069 	fd = f->private_data;
1070 	if (fd) {
1071 		vfree(fd);
1072 		f->private_data = NULL;
1073 	}
1074 
1075 	return 0;
1076 }
1077 
mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd * fd,struct mshv_vtl_hvcall_setup __user * hvcall_setup_user)1078 static int mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd *fd,
1079 				    struct mshv_vtl_hvcall_setup __user *hvcall_setup_user)
1080 {
1081 	struct mshv_vtl_hvcall_setup hvcall_setup;
1082 
1083 	guard(mutex)(&fd->init_mutex);
1084 
1085 	if (fd->allow_map_initialized) {
1086 		dev_err(fd->dev->this_device,
1087 			"Hypercall allow map has already been set, pid %d\n",
1088 			current->pid);
1089 		return -EINVAL;
1090 	}
1091 
1092 	if (copy_from_user(&hvcall_setup, hvcall_setup_user,
1093 			   sizeof(struct mshv_vtl_hvcall_setup))) {
1094 		return -EFAULT;
1095 	}
1096 	if (hvcall_setup.bitmap_array_size > ARRAY_SIZE(fd->allow_bitmap))
1097 		return -EINVAL;
1098 
1099 	if (copy_from_user(&fd->allow_bitmap,
1100 			   (void __user *)hvcall_setup.allow_bitmap_ptr,
1101 			   hvcall_setup.bitmap_array_size)) {
1102 		return -EFAULT;
1103 	}
1104 
1105 	dev_info(fd->dev->this_device, "Hypercall allow map has been set, pid %d\n",
1106 		 current->pid);
1107 	fd->allow_map_initialized = true;
1108 	return 0;
1109 }
1110 
mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd * fd,u16 call_code)1111 static bool mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd *fd, u16 call_code)
1112 {
1113 	return test_bit(call_code, (unsigned long *)fd->allow_bitmap);
1114 }
1115 
mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd * fd,struct mshv_vtl_hvcall __user * hvcall_user)1116 static int mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd *fd,
1117 				struct mshv_vtl_hvcall __user *hvcall_user)
1118 {
1119 	struct mshv_vtl_hvcall hvcall;
1120 	void *in, *out;
1121 	int ret;
1122 
1123 	if (copy_from_user(&hvcall, hvcall_user, sizeof(struct mshv_vtl_hvcall)))
1124 		return -EFAULT;
1125 	if (hvcall.input_size > HV_HYP_PAGE_SIZE)
1126 		return -EINVAL;
1127 	if (hvcall.output_size > HV_HYP_PAGE_SIZE)
1128 		return -EINVAL;
1129 
1130 	/*
1131 	 * By default, all hypercalls are not allowed.
1132 	 * The user mode code has to set up the allow bitmap once.
1133 	 */
1134 
1135 	if (!mshv_vtl_hvcall_is_allowed(fd, hvcall.control & 0xFFFF)) {
1136 		dev_err(fd->dev->this_device,
1137 			"Hypercall with control data %#llx isn't allowed\n",
1138 			hvcall.control);
1139 		return -EPERM;
1140 	}
1141 
1142 	/*
1143 	 * This may create a problem for Confidential VM (CVM) usecase where we need to use
1144 	 * Hyper-V driver allocated per-cpu input and output pages (hyperv_pcpu_input_arg and
1145 	 * hyperv_pcpu_output_arg) for making a hypervisor call.
1146 	 *
1147 	 * TODO: Take care of this when CVM support is added.
1148 	 */
1149 	in = (void *)__get_free_page(GFP_KERNEL);
1150 	out = (void *)__get_free_page(GFP_KERNEL);
1151 
1152 	if (copy_from_user(in, (void __user *)hvcall.input_ptr, hvcall.input_size)) {
1153 		ret = -EFAULT;
1154 		goto free_pages;
1155 	}
1156 
1157 	hvcall.status = hv_do_hypercall(hvcall.control, in, out);
1158 
1159 	if (copy_to_user((void __user *)hvcall.output_ptr, out, hvcall.output_size)) {
1160 		ret = -EFAULT;
1161 		goto free_pages;
1162 	}
1163 	ret = put_user(hvcall.status, &hvcall_user->status);
1164 free_pages:
1165 	free_page((unsigned long)in);
1166 	free_page((unsigned long)out);
1167 
1168 	return ret;
1169 }
1170 
mshv_vtl_hvcall_dev_ioctl(struct file * f,unsigned int cmd,unsigned long arg)1171 static long mshv_vtl_hvcall_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1172 {
1173 	struct mshv_vtl_hvcall_fd *fd = f->private_data;
1174 
1175 	switch (cmd) {
1176 	case MSHV_HVCALL_SETUP:
1177 		return mshv_vtl_hvcall_do_setup(fd, (struct mshv_vtl_hvcall_setup __user *)arg);
1178 	case MSHV_HVCALL:
1179 		return mshv_vtl_hvcall_call(fd, (struct mshv_vtl_hvcall __user *)arg);
1180 	default:
1181 		break;
1182 	}
1183 
1184 	return -ENOIOCTLCMD;
1185 }
1186 
1187 static const struct file_operations mshv_vtl_hvcall_dev_file_ops = {
1188 	.owner = THIS_MODULE,
1189 	.open = mshv_vtl_hvcall_dev_open,
1190 	.release = mshv_vtl_hvcall_dev_release,
1191 	.unlocked_ioctl = mshv_vtl_hvcall_dev_ioctl,
1192 };
1193 
1194 static struct miscdevice mshv_vtl_hvcall_dev = {
1195 	.name = "mshv_hvcall",
1196 	.nodename = "mshv_hvcall",
1197 	.fops = &mshv_vtl_hvcall_dev_file_ops,
1198 	.mode = 0600,
1199 	.minor = MISC_DYNAMIC_MINOR,
1200 };
1201 
mshv_vtl_low_open(struct inode * inodep,struct file * filp)1202 static int mshv_vtl_low_open(struct inode *inodep, struct file *filp)
1203 {
1204 	pid_t pid = task_pid_vnr(current);
1205 	uid_t uid = current_uid().val;
1206 	int ret = 0;
1207 
1208 	pr_debug("%s: Opening VTL low, task group %d, uid %d\n", __func__, pid, uid);
1209 
1210 	if (capable(CAP_SYS_ADMIN)) {
1211 		filp->private_data = inodep;
1212 	} else {
1213 		pr_err("%s: VTL low open failed: CAP_SYS_ADMIN required. task group %d, uid %d",
1214 		       __func__, pid, uid);
1215 		ret = -EPERM;
1216 	}
1217 
1218 	return ret;
1219 }
1220 
can_fault(struct vm_fault * vmf,unsigned long size,unsigned long * pfn)1221 static bool can_fault(struct vm_fault *vmf, unsigned long size, unsigned long *pfn)
1222 {
1223 	unsigned long mask = size - 1;
1224 	unsigned long start = vmf->address & ~mask;
1225 	unsigned long end = start + size;
1226 	bool is_valid;
1227 
1228 	is_valid = (vmf->address & mask) == ((vmf->pgoff << PAGE_SHIFT) & mask) &&
1229 		start >= vmf->vma->vm_start &&
1230 		end <= vmf->vma->vm_end;
1231 
1232 	if (is_valid)
1233 		*pfn = vmf->pgoff & ~(mask >> PAGE_SHIFT);
1234 
1235 	return is_valid;
1236 }
1237 
mshv_vtl_low_huge_fault(struct vm_fault * vmf,unsigned int order)1238 static vm_fault_t mshv_vtl_low_huge_fault(struct vm_fault *vmf, unsigned int order)
1239 {
1240 	unsigned long pfn = vmf->pgoff;
1241 	vm_fault_t ret = VM_FAULT_FALLBACK;
1242 
1243 	switch (order) {
1244 	case 0:
1245 		return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
1246 
1247 	case PMD_ORDER:
1248 		if (can_fault(vmf, PMD_SIZE, &pfn))
1249 			ret = vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
1250 		return ret;
1251 
1252 	case PUD_ORDER:
1253 		if (can_fault(vmf, PUD_SIZE, &pfn))
1254 			ret = vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
1255 		return ret;
1256 
1257 	default:
1258 		return VM_FAULT_SIGBUS;
1259 	}
1260 }
1261 
mshv_vtl_low_fault(struct vm_fault * vmf)1262 static vm_fault_t mshv_vtl_low_fault(struct vm_fault *vmf)
1263 {
1264 	return mshv_vtl_low_huge_fault(vmf, 0);
1265 }
1266 
1267 static const struct vm_operations_struct mshv_vtl_low_vm_ops = {
1268 	.fault = mshv_vtl_low_fault,
1269 	.huge_fault = mshv_vtl_low_huge_fault,
1270 };
1271 
mshv_vtl_low_mmap(struct file * filp,struct vm_area_struct * vma)1272 static int mshv_vtl_low_mmap(struct file *filp, struct vm_area_struct *vma)
1273 {
1274 	vma->vm_ops = &mshv_vtl_low_vm_ops;
1275 	vm_flags_set(vma, VM_HUGEPAGE | VM_MIXEDMAP);
1276 
1277 	return 0;
1278 }
1279 
1280 static const struct file_operations mshv_vtl_low_file_ops = {
1281 	.owner		= THIS_MODULE,
1282 	.open		= mshv_vtl_low_open,
1283 	.mmap		= mshv_vtl_low_mmap,
1284 };
1285 
1286 static struct miscdevice mshv_vtl_low = {
1287 	.name = "mshv_vtl_low",
1288 	.nodename = "mshv_vtl_low",
1289 	.fops = &mshv_vtl_low_file_ops,
1290 	.mode = 0600,
1291 	.minor = MISC_DYNAMIC_MINOR,
1292 };
1293 
mshv_vtl_init(void)1294 static int __init mshv_vtl_init(void)
1295 {
1296 	int ret;
1297 	struct device *dev = mshv_dev.this_device;
1298 
1299 	/*
1300 	 * This creates /dev/mshv which provides functionality to create VTLs and partitions.
1301 	 */
1302 	ret = misc_register(&mshv_dev);
1303 	if (ret) {
1304 		dev_err(dev, "mshv device register failed: %d\n", ret);
1305 		goto free_dev;
1306 	}
1307 
1308 	tasklet_init(&msg_dpc, mshv_vtl_sint_on_msg_dpc, 0);
1309 	init_waitqueue_head(&fd_wait_queue);
1310 
1311 	if (mshv_vtl_get_vsm_regs()) {
1312 		dev_emerg(dev, "Unable to get VSM capabilities !!\n");
1313 		ret = -ENODEV;
1314 		goto free_dev;
1315 	}
1316 	if (mshv_vtl_configure_vsm_partition(dev)) {
1317 		dev_emerg(dev, "VSM configuration failed !!\n");
1318 		ret = -ENODEV;
1319 		goto free_dev;
1320 	}
1321 
1322 	mshv_vtl_return_call_init(mshv_vsm_page_offsets.vtl_return_offset);
1323 	ret = hv_vtl_setup_synic();
1324 	if (ret)
1325 		goto free_dev;
1326 
1327 	/*
1328 	 * mshv_sint device adds VMBus relay ioctl support.
1329 	 * This provides a channel for VTL0 to communicate with VTL2.
1330 	 */
1331 	ret = misc_register(&mshv_vtl_sint_dev);
1332 	if (ret)
1333 		goto free_synic;
1334 
1335 	/*
1336 	 * mshv_hvcall device adds interface to enable userspace for direct hypercalls support.
1337 	 */
1338 	ret = misc_register(&mshv_vtl_hvcall_dev);
1339 	if (ret)
1340 		goto free_sint;
1341 
1342 	/*
1343 	 * mshv_vtl_low device is used to map VTL0 address space to a user-mode process in VTL2.
1344 	 * It implements mmap() to allow a user-mode process in VTL2 to map to the address of VTL0.
1345 	 */
1346 	ret = misc_register(&mshv_vtl_low);
1347 	if (ret)
1348 		goto free_hvcall;
1349 
1350 	/*
1351 	 * "mshv vtl mem dev" device is later used to setup VTL0 memory.
1352 	 */
1353 	mem_dev = kzalloc_obj(*mem_dev);
1354 	if (!mem_dev) {
1355 		ret = -ENOMEM;
1356 		goto free_low;
1357 	}
1358 
1359 	mutex_init(&mshv_vtl_poll_file_lock);
1360 
1361 	device_initialize(mem_dev);
1362 	dev_set_name(mem_dev, "mshv vtl mem dev");
1363 	ret = device_add(mem_dev);
1364 	if (ret) {
1365 		dev_err(dev, "mshv vtl mem dev add: %d\n", ret);
1366 		goto free_mem;
1367 	}
1368 
1369 	return 0;
1370 
1371 free_mem:
1372 	kfree(mem_dev);
1373 free_low:
1374 	misc_deregister(&mshv_vtl_low);
1375 free_hvcall:
1376 	misc_deregister(&mshv_vtl_hvcall_dev);
1377 free_sint:
1378 	misc_deregister(&mshv_vtl_sint_dev);
1379 free_synic:
1380 	hv_vtl_remove_synic();
1381 free_dev:
1382 	misc_deregister(&mshv_dev);
1383 
1384 	return ret;
1385 }
1386 
mshv_vtl_exit(void)1387 static void __exit mshv_vtl_exit(void)
1388 {
1389 	device_del(mem_dev);
1390 	kfree(mem_dev);
1391 	misc_deregister(&mshv_vtl_low);
1392 	misc_deregister(&mshv_vtl_hvcall_dev);
1393 	misc_deregister(&mshv_vtl_sint_dev);
1394 	hv_vtl_remove_synic();
1395 	misc_deregister(&mshv_dev);
1396 }
1397 
1398 module_init(mshv_vtl_init);
1399 module_exit(mshv_vtl_exit);
1400