xref: /linux/drivers/hv/mshv_root.h (revision 69050f8d6d075dc01af7a5f2f550a8067510366f)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (c) 2023, Microsoft Corporation.
4  */
5 
6 #ifndef _MSHV_ROOT_H_
7 #define _MSHV_ROOT_H_
8 
9 #include <linux/spinlock.h>
10 #include <linux/mutex.h>
11 #include <linux/semaphore.h>
12 #include <linux/sched.h>
13 #include <linux/srcu.h>
14 #include <linux/wait.h>
15 #include <linux/hashtable.h>
16 #include <linux/dev_printk.h>
17 #include <linux/build_bug.h>
18 #include <linux/mmu_notifier.h>
19 #include <uapi/linux/mshv.h>
20 
21 /*
22  * Hypervisor must be between these version numbers (inclusive)
23  * to guarantee compatibility
24  */
25 #define MSHV_HV_MIN_VERSION		(27744)
26 #define MSHV_HV_MAX_VERSION		(27751)
27 
28 static_assert(HV_HYP_PAGE_SIZE == MSHV_HV_PAGE_SIZE);
29 
30 #define MSHV_MAX_VPS			256
31 
32 #define MSHV_PARTITIONS_HASH_BITS	9
33 
34 #define MSHV_PIN_PAGES_BATCH_SIZE	(0x10000000ULL / HV_HYP_PAGE_SIZE)
35 
36 struct mshv_vp {
37 	u32 vp_index;
38 	struct mshv_partition *vp_partition;
39 	struct mutex vp_mutex;
40 	struct hv_vp_register_page *vp_register_page;
41 	struct hv_message *vp_intercept_msg_page;
42 	void *vp_ghcb_page;
43 	struct hv_stats_page *vp_stats_pages[2];
44 	struct {
45 		atomic64_t vp_signaled_count;
46 		struct {
47 			u64 intercept_suspend: 1;
48 			u64 root_sched_blocked: 1; /* root scheduler only */
49 			u64 root_sched_dispatched: 1; /* root scheduler only */
50 			u64 reserved: 61;
51 		} flags;
52 		unsigned int kicked_by_hv;
53 		wait_queue_head_t vp_suspend_queue;
54 	} run;
55 #if IS_ENABLED(CONFIG_DEBUG_FS)
56 	struct dentry *vp_stats_dentry;
57 #endif
58 };
59 
60 #define vp_fmt(fmt) "p%lluvp%u: " fmt
61 #define vp_devprintk(level, v, fmt, ...) \
62 do { \
63 	const struct mshv_vp *__vp = (v); \
64 	const struct mshv_partition *__pt = __vp->vp_partition; \
65 	dev_##level(__pt->pt_module_dev, vp_fmt(fmt), __pt->pt_id, \
66 		    __vp->vp_index, ##__VA_ARGS__); \
67 } while (0)
68 #define vp_emerg(v, fmt, ...)	vp_devprintk(emerg, v, fmt, ##__VA_ARGS__)
69 #define vp_crit(v, fmt, ...)	vp_devprintk(crit, v, fmt, ##__VA_ARGS__)
70 #define vp_alert(v, fmt, ...)	vp_devprintk(alert, v, fmt, ##__VA_ARGS__)
71 #define vp_err(v, fmt, ...)	vp_devprintk(err, v, fmt, ##__VA_ARGS__)
72 #define vp_warn(v, fmt, ...)	vp_devprintk(warn, v, fmt, ##__VA_ARGS__)
73 #define vp_notice(v, fmt, ...)	vp_devprintk(notice, v, fmt, ##__VA_ARGS__)
74 #define vp_info(v, fmt, ...)	vp_devprintk(info, v, fmt, ##__VA_ARGS__)
75 #define vp_dbg(v, fmt, ...)	vp_devprintk(dbg, v, fmt, ##__VA_ARGS__)
76 
77 enum mshv_region_type {
78 	MSHV_REGION_TYPE_MEM_PINNED,
79 	MSHV_REGION_TYPE_MEM_MOVABLE,
80 	MSHV_REGION_TYPE_MMIO
81 };
82 
83 struct mshv_mem_region {
84 	struct hlist_node hnode;
85 	struct kref mreg_refcount;
86 	u64 nr_pages;
87 	u64 start_gfn;
88 	u64 start_uaddr;
89 	u32 hv_map_flags;
90 	struct mshv_partition *partition;
91 	enum mshv_region_type mreg_type;
92 	struct mmu_interval_notifier mreg_mni;
93 	struct mutex mreg_mutex;	/* protects region pages remapping */
94 	struct page *mreg_pages[];
95 };
96 
97 struct mshv_irq_ack_notifier {
98 	struct hlist_node link;
99 	unsigned int irq_ack_gsi;
100 	void (*irq_acked)(struct mshv_irq_ack_notifier *mian);
101 };
102 
103 struct mshv_partition {
104 	struct device *pt_module_dev;
105 
106 	struct hlist_node pt_hnode;
107 	u64 pt_id;
108 	refcount_t pt_ref_count;
109 	struct mutex pt_mutex;
110 
111 	spinlock_t pt_mem_regions_lock;
112 	struct hlist_head pt_mem_regions; // not ordered
113 
114 	u32 pt_vp_count;
115 	struct mshv_vp *pt_vp_array[MSHV_MAX_VPS];
116 
117 	struct mutex pt_irq_lock;
118 	struct srcu_struct pt_irq_srcu;
119 	struct hlist_head irq_ack_notifier_list;
120 
121 	struct hlist_head pt_devices;
122 
123 	/*
124 	 * MSHV does not support more than one async hypercall in flight
125 	 * for a single partition. Thus, it is okay to define per partition
126 	 * async hypercall status.
127 	 */
128 	struct completion async_hypercall;
129 	u64 async_hypercall_status;
130 
131 	spinlock_t	  pt_irqfds_lock;
132 	struct hlist_head pt_irqfds_list;
133 	struct mutex	  irqfds_resampler_lock;
134 	struct hlist_head irqfds_resampler_list;
135 
136 	struct hlist_head ioeventfds_list;
137 
138 	struct mshv_girq_routing_table __rcu *pt_girq_tbl;
139 	u64 isolation_type;
140 	bool import_completed;
141 	bool pt_initialized;
142 #if IS_ENABLED(CONFIG_DEBUG_FS)
143 	struct dentry *pt_stats_dentry;
144 	struct dentry *pt_vp_dentry;
145 #endif
146 };
147 
148 #define pt_fmt(fmt) "p%llu: " fmt
149 #define pt_devprintk(level, p, fmt, ...) \
150 do { \
151 	const struct mshv_partition *__pt = (p); \
152 	dev_##level(__pt->pt_module_dev, pt_fmt(fmt), __pt->pt_id, \
153 		    ##__VA_ARGS__); \
154 } while (0)
155 #define pt_emerg(p, fmt, ...)	pt_devprintk(emerg, p, fmt, ##__VA_ARGS__)
156 #define pt_crit(p, fmt, ...)	pt_devprintk(crit, p, fmt, ##__VA_ARGS__)
157 #define pt_alert(p, fmt, ...)	pt_devprintk(alert, p, fmt, ##__VA_ARGS__)
158 #define pt_err(p, fmt, ...)	pt_devprintk(err, p, fmt, ##__VA_ARGS__)
159 #define pt_warn(p, fmt, ...)	pt_devprintk(warn, p, fmt, ##__VA_ARGS__)
160 #define pt_notice(p, fmt, ...)	pt_devprintk(notice, p, fmt, ##__VA_ARGS__)
161 #define pt_info(p, fmt, ...)	pt_devprintk(info, p, fmt, ##__VA_ARGS__)
162 #define pt_dbg(p, fmt, ...)	pt_devprintk(dbg, p, fmt, ##__VA_ARGS__)
163 
164 struct mshv_lapic_irq {
165 	u32 lapic_vector;
166 	u64 lapic_apic_id;
167 	union hv_interrupt_control lapic_control;
168 };
169 
170 #define MSHV_MAX_GUEST_IRQS		4096
171 
172 /* representation of one guest irq entry, either msi or legacy */
173 struct mshv_guest_irq_ent {
174 	u32 girq_entry_valid;	/* vfio looks at this */
175 	u32 guest_irq_num;	/* a unique number for each irq */
176 	u32 girq_addr_lo;	/* guest irq msi address info */
177 	u32 girq_addr_hi;
178 	u32 girq_irq_data;	/* idt vector in some cases */
179 };
180 
181 struct mshv_girq_routing_table {
182 	u32 num_rt_entries;
183 	struct mshv_guest_irq_ent mshv_girq_info_tbl[];
184 };
185 
186 struct hv_synic_pages {
187 	struct hv_message_page *hyp_synic_message_page;
188 	struct hv_synic_event_flags_page *synic_event_flags_page;
189 	struct hv_synic_event_ring_page *synic_event_ring_page;
190 };
191 
192 struct mshv_root {
193 	struct hv_synic_pages __percpu *synic_pages;
194 	spinlock_t pt_ht_lock;
195 	DECLARE_HASHTABLE(pt_htable, MSHV_PARTITIONS_HASH_BITS);
196 	struct hv_partition_property_vmm_capabilities vmm_caps;
197 };
198 
199 /*
200  * Callback for doorbell events.
201  * NOTE: This is called in interrupt context. Callback
202  * should defer slow and sleeping logic to later.
203  */
204 typedef void (*doorbell_cb_t) (int doorbell_id, void *);
205 
206 /*
207  * port table information
208  */
209 struct port_table_info {
210 	struct rcu_head portbl_rcu;
211 	enum hv_port_type hv_port_type;
212 	union {
213 		struct {
214 			u64 reserved[2];
215 		} hv_port_message;
216 		struct {
217 			u64 reserved[2];
218 		} hv_port_event;
219 		struct {
220 			u64 reserved[2];
221 		} hv_port_monitor;
222 		struct {
223 			doorbell_cb_t doorbell_cb;
224 			void *data;
225 		} hv_port_doorbell;
226 	};
227 };
228 
229 int mshv_update_routing_table(struct mshv_partition *partition,
230 			      const struct mshv_user_irq_entry *entries,
231 			      unsigned int numents);
232 void mshv_free_routing_table(struct mshv_partition *partition);
233 
234 struct mshv_guest_irq_ent mshv_ret_girq_entry(struct mshv_partition *partition,
235 					      u32 irq_num);
236 
237 void mshv_copy_girq_info(struct mshv_guest_irq_ent *src_irq,
238 			 struct mshv_lapic_irq *dest_irq);
239 
240 void mshv_irqfd_routing_update(struct mshv_partition *partition);
241 
242 void mshv_port_table_fini(void);
243 int mshv_portid_alloc(struct port_table_info *info);
244 int mshv_portid_lookup(int port_id, struct port_table_info *info);
245 void mshv_portid_free(int port_id);
246 
247 int mshv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb,
248 			   void *data, u64 gpa, u64 val, u64 flags);
249 void mshv_unregister_doorbell(u64 partition_id, int doorbell_portid);
250 
251 void mshv_isr(void);
252 int mshv_synic_init(unsigned int cpu);
253 int mshv_synic_cleanup(unsigned int cpu);
254 
255 static inline bool mshv_partition_encrypted(struct mshv_partition *partition)
256 {
257 	return partition->isolation_type == HV_PARTITION_ISOLATION_TYPE_SNP;
258 }
259 
260 struct mshv_partition *mshv_partition_get(struct mshv_partition *partition);
261 void mshv_partition_put(struct mshv_partition *partition);
262 struct mshv_partition *mshv_partition_find(u64 partition_id) __must_hold(RCU);
263 
264 static inline bool is_l1vh_parent(u64 partition_id)
265 {
266 	return hv_l1vh_partition() && (partition_id == HV_PARTITION_ID_SELF);
267 }
268 
269 int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
270 		      struct hv_stats_page **stats_pages);
271 void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index,
272 			 struct hv_stats_page **stats_pages);
273 
274 /* hypercalls */
275 
276 int hv_call_withdraw_memory(u64 count, int node, u64 partition_id);
277 int hv_call_create_partition(u64 flags,
278 			     struct hv_partition_creation_properties creation_properties,
279 			     union hv_partition_isolation_properties isolation_properties,
280 			     u64 *partition_id);
281 int hv_call_initialize_partition(u64 partition_id);
282 int hv_call_finalize_partition(u64 partition_id);
283 int hv_call_delete_partition(u64 partition_id);
284 int hv_call_map_mmio_pages(u64 partition_id, u64 gfn, u64 mmio_spa, u64 numpgs);
285 int hv_call_map_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count,
286 			  u32 flags, struct page **pages);
287 int hv_call_unmap_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count,
288 			    u32 flags);
289 int hv_call_delete_vp(u64 partition_id, u32 vp_index);
290 int hv_call_assert_virtual_interrupt(u64 partition_id, u32 vector,
291 				     u64 dest_addr,
292 				     union hv_interrupt_control control);
293 int hv_call_clear_virtual_interrupt(u64 partition_id);
294 int hv_call_get_gpa_access_states(u64 partition_id, u32 count, u64 gpa_base_pfn,
295 				  union hv_gpa_page_access_state_flags state_flags,
296 				  int *written_total,
297 				  union hv_gpa_page_access_state *states);
298 int hv_call_get_vp_state(u32 vp_index, u64 partition_id,
299 			 struct hv_vp_state_data state_data,
300 			 /* Choose between pages and ret_output */
301 			 u64 page_count, struct page **pages,
302 			 union hv_output_get_vp_state *ret_output);
303 int hv_call_set_vp_state(u32 vp_index, u64 partition_id,
304 			 /* Choose between pages and bytes */
305 			 struct hv_vp_state_data state_data, u64 page_count,
306 			 struct page **pages, u32 num_bytes, u8 *bytes);
307 int hv_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
308 			 union hv_input_vtl input_vtl,
309 			 struct page **state_page);
310 int hv_unmap_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
311 			   struct page *state_page,
312 			   union hv_input_vtl input_vtl);
313 int hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
314 			u64 connection_partition_id, struct hv_port_info *port_info,
315 			u8 port_vtl, u8 min_connection_vtl, int node);
316 int hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id);
317 int hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
318 			 u64 connection_partition_id,
319 			 union hv_connection_id connection_id,
320 			 struct hv_connection_info *connection_info,
321 			 u8 connection_vtl, int node);
322 int hv_call_disconnect_port(u64 connection_partition_id,
323 			    union hv_connection_id connection_id);
324 int hv_call_notify_port_ring_empty(u32 sint_index);
325 int hv_map_stats_page(enum hv_stats_object_type type,
326 		      const union hv_stats_object_identity *identity,
327 		      struct hv_stats_page **addr);
328 int hv_unmap_stats_page(enum hv_stats_object_type type,
329 			struct hv_stats_page *page_addr,
330 			const union hv_stats_object_identity *identity);
331 int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
332 				   u64 page_struct_count, u32 host_access,
333 				   u32 flags, u8 acquire);
334 int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 arg,
335 				      void *property_value, size_t property_value_sz);
336 
337 #if IS_ENABLED(CONFIG_DEBUG_FS)
338 int __init mshv_debugfs_init(void);
339 void mshv_debugfs_exit(void);
340 
341 int mshv_debugfs_partition_create(struct mshv_partition *partition);
342 void mshv_debugfs_partition_remove(struct mshv_partition *partition);
343 int mshv_debugfs_vp_create(struct mshv_vp *vp);
344 void mshv_debugfs_vp_remove(struct mshv_vp *vp);
345 #else
346 static inline int __init mshv_debugfs_init(void)
347 {
348 	return 0;
349 }
350 static inline void mshv_debugfs_exit(void) { }
351 
352 static inline int mshv_debugfs_partition_create(struct mshv_partition *partition)
353 {
354 	return 0;
355 }
356 static inline void mshv_debugfs_partition_remove(struct mshv_partition *partition) { }
357 static inline int mshv_debugfs_vp_create(struct mshv_vp *vp)
358 {
359 	return 0;
360 }
361 static inline void mshv_debugfs_vp_remove(struct mshv_vp *vp) { }
362 #endif
363 
364 extern struct mshv_root mshv_root;
365 extern enum hv_scheduler_type hv_scheduler_type;
366 extern u8 * __percpu *hv_synic_eventring_tail;
367 
368 struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
369 					   u64 uaddr, u32 flags);
370 int mshv_region_share(struct mshv_mem_region *region);
371 int mshv_region_unshare(struct mshv_mem_region *region);
372 int mshv_region_map(struct mshv_mem_region *region);
373 void mshv_region_invalidate(struct mshv_mem_region *region);
374 int mshv_region_pin(struct mshv_mem_region *region);
375 void mshv_region_put(struct mshv_mem_region *region);
376 int mshv_region_get(struct mshv_mem_region *region);
377 bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn);
378 void mshv_region_movable_fini(struct mshv_mem_region *region);
379 bool mshv_region_movable_init(struct mshv_mem_region *region);
380 
381 #endif /* _MSHV_ROOT_H_ */
382