1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * Copyright (c) 2023, Microsoft Corporation.
4 */
5
6 #ifndef _MSHV_ROOT_H_
7 #define _MSHV_ROOT_H_
8
9 #include <linux/spinlock.h>
10 #include <linux/mutex.h>
11 #include <linux/semaphore.h>
12 #include <linux/sched.h>
13 #include <linux/srcu.h>
14 #include <linux/wait.h>
15 #include <linux/hashtable.h>
16 #include <linux/dev_printk.h>
17 #include <linux/build_bug.h>
18 #include <linux/mmu_notifier.h>
19 #include <uapi/linux/mshv.h>
20
21 /*
22 * Hypervisor must be between these version numbers (inclusive)
23 * to guarantee compatibility
24 */
25 #define MSHV_HV_MIN_VERSION (27744)
26 #define MSHV_HV_MAX_VERSION (27751)
27
28 static_assert(HV_HYP_PAGE_SIZE == MSHV_HV_PAGE_SIZE);
29
30 #define MSHV_MAX_VPS 256
31
32 #define MSHV_PARTITIONS_HASH_BITS 9
33
34 #define MSHV_PIN_PAGES_BATCH_SIZE (0x10000000ULL / HV_HYP_PAGE_SIZE)
35
36 struct mshv_vp {
37 u32 vp_index;
38 struct mshv_partition *vp_partition;
39 struct mutex vp_mutex;
40 struct hv_vp_register_page *vp_register_page;
41 struct hv_message *vp_intercept_msg_page;
42 void *vp_ghcb_page;
43 struct hv_stats_page *vp_stats_pages[2];
44 struct {
45 atomic64_t vp_signaled_count;
46 struct {
47 u64 intercept_suspend: 1;
48 u64 root_sched_blocked: 1; /* root scheduler only */
49 u64 root_sched_dispatched: 1; /* root scheduler only */
50 u64 reserved: 61;
51 } flags;
52 unsigned int kicked_by_hv;
53 wait_queue_head_t vp_suspend_queue;
54 } run;
55 };
56
57 #define vp_fmt(fmt) "p%lluvp%u: " fmt
58 #define vp_devprintk(level, v, fmt, ...) \
59 do { \
60 const struct mshv_vp *__vp = (v); \
61 const struct mshv_partition *__pt = __vp->vp_partition; \
62 dev_##level(__pt->pt_module_dev, vp_fmt(fmt), __pt->pt_id, \
63 __vp->vp_index, ##__VA_ARGS__); \
64 } while (0)
65 #define vp_emerg(v, fmt, ...) vp_devprintk(emerg, v, fmt, ##__VA_ARGS__)
66 #define vp_crit(v, fmt, ...) vp_devprintk(crit, v, fmt, ##__VA_ARGS__)
67 #define vp_alert(v, fmt, ...) vp_devprintk(alert, v, fmt, ##__VA_ARGS__)
68 #define vp_err(v, fmt, ...) vp_devprintk(err, v, fmt, ##__VA_ARGS__)
69 #define vp_warn(v, fmt, ...) vp_devprintk(warn, v, fmt, ##__VA_ARGS__)
70 #define vp_notice(v, fmt, ...) vp_devprintk(notice, v, fmt, ##__VA_ARGS__)
71 #define vp_info(v, fmt, ...) vp_devprintk(info, v, fmt, ##__VA_ARGS__)
72 #define vp_dbg(v, fmt, ...) vp_devprintk(dbg, v, fmt, ##__VA_ARGS__)
73
74 enum mshv_region_type {
75 MSHV_REGION_TYPE_MEM_PINNED,
76 MSHV_REGION_TYPE_MEM_MOVABLE,
77 MSHV_REGION_TYPE_MMIO
78 };
79
80 struct mshv_mem_region {
81 struct hlist_node hnode;
82 struct kref refcount;
83 u64 nr_pages;
84 u64 start_gfn;
85 u64 start_uaddr;
86 u32 hv_map_flags;
87 struct mshv_partition *partition;
88 enum mshv_region_type type;
89 struct mmu_interval_notifier mni;
90 struct mutex mutex; /* protects region pages remapping */
91 struct page *pages[];
92 };
93
94 struct mshv_irq_ack_notifier {
95 struct hlist_node link;
96 unsigned int irq_ack_gsi;
97 void (*irq_acked)(struct mshv_irq_ack_notifier *mian);
98 };
99
100 struct mshv_partition {
101 struct device *pt_module_dev;
102
103 struct hlist_node pt_hnode;
104 u64 pt_id;
105 refcount_t pt_ref_count;
106 struct mutex pt_mutex;
107
108 spinlock_t pt_mem_regions_lock;
109 struct hlist_head pt_mem_regions; // not ordered
110
111 u32 pt_vp_count;
112 struct mshv_vp *pt_vp_array[MSHV_MAX_VPS];
113
114 struct mutex pt_irq_lock;
115 struct srcu_struct pt_irq_srcu;
116 struct hlist_head irq_ack_notifier_list;
117
118 struct hlist_head pt_devices;
119
120 /*
121 * MSHV does not support more than one async hypercall in flight
122 * for a single partition. Thus, it is okay to define per partition
123 * async hypercall status.
124 */
125 struct completion async_hypercall;
126 u64 async_hypercall_status;
127
128 spinlock_t pt_irqfds_lock;
129 struct hlist_head pt_irqfds_list;
130 struct mutex irqfds_resampler_lock;
131 struct hlist_head irqfds_resampler_list;
132
133 struct hlist_head ioeventfds_list;
134
135 struct mshv_girq_routing_table __rcu *pt_girq_tbl;
136 u64 isolation_type;
137 bool import_completed;
138 bool pt_initialized;
139 };
140
141 #define pt_fmt(fmt) "p%llu: " fmt
142 #define pt_devprintk(level, p, fmt, ...) \
143 do { \
144 const struct mshv_partition *__pt = (p); \
145 dev_##level(__pt->pt_module_dev, pt_fmt(fmt), __pt->pt_id, \
146 ##__VA_ARGS__); \
147 } while (0)
148 #define pt_emerg(p, fmt, ...) pt_devprintk(emerg, p, fmt, ##__VA_ARGS__)
149 #define pt_crit(p, fmt, ...) pt_devprintk(crit, p, fmt, ##__VA_ARGS__)
150 #define pt_alert(p, fmt, ...) pt_devprintk(alert, p, fmt, ##__VA_ARGS__)
151 #define pt_err(p, fmt, ...) pt_devprintk(err, p, fmt, ##__VA_ARGS__)
152 #define pt_warn(p, fmt, ...) pt_devprintk(warn, p, fmt, ##__VA_ARGS__)
153 #define pt_notice(p, fmt, ...) pt_devprintk(notice, p, fmt, ##__VA_ARGS__)
154 #define pt_info(p, fmt, ...) pt_devprintk(info, p, fmt, ##__VA_ARGS__)
155 #define pt_dbg(p, fmt, ...) pt_devprintk(dbg, p, fmt, ##__VA_ARGS__)
156
157 struct mshv_lapic_irq {
158 u32 lapic_vector;
159 u64 lapic_apic_id;
160 union hv_interrupt_control lapic_control;
161 };
162
163 #define MSHV_MAX_GUEST_IRQS 4096
164
165 /* representation of one guest irq entry, either msi or legacy */
166 struct mshv_guest_irq_ent {
167 u32 girq_entry_valid; /* vfio looks at this */
168 u32 guest_irq_num; /* a unique number for each irq */
169 u32 girq_addr_lo; /* guest irq msi address info */
170 u32 girq_addr_hi;
171 u32 girq_irq_data; /* idt vector in some cases */
172 };
173
174 struct mshv_girq_routing_table {
175 u32 num_rt_entries;
176 struct mshv_guest_irq_ent mshv_girq_info_tbl[];
177 };
178
179 struct hv_synic_pages {
180 struct hv_message_page *hyp_synic_message_page;
181 struct hv_synic_event_flags_page *synic_event_flags_page;
182 struct hv_synic_event_ring_page *synic_event_ring_page;
183 };
184
185 struct mshv_root {
186 struct hv_synic_pages __percpu *synic_pages;
187 spinlock_t pt_ht_lock;
188 DECLARE_HASHTABLE(pt_htable, MSHV_PARTITIONS_HASH_BITS);
189 struct hv_partition_property_vmm_capabilities vmm_caps;
190 };
191
192 /*
193 * Callback for doorbell events.
194 * NOTE: This is called in interrupt context. Callback
195 * should defer slow and sleeping logic to later.
196 */
197 typedef void (*doorbell_cb_t) (int doorbell_id, void *);
198
199 /*
200 * port table information
201 */
202 struct port_table_info {
203 struct rcu_head portbl_rcu;
204 enum hv_port_type hv_port_type;
205 union {
206 struct {
207 u64 reserved[2];
208 } hv_port_message;
209 struct {
210 u64 reserved[2];
211 } hv_port_event;
212 struct {
213 u64 reserved[2];
214 } hv_port_monitor;
215 struct {
216 doorbell_cb_t doorbell_cb;
217 void *data;
218 } hv_port_doorbell;
219 };
220 };
221
222 int mshv_update_routing_table(struct mshv_partition *partition,
223 const struct mshv_user_irq_entry *entries,
224 unsigned int numents);
225 void mshv_free_routing_table(struct mshv_partition *partition);
226
227 struct mshv_guest_irq_ent mshv_ret_girq_entry(struct mshv_partition *partition,
228 u32 irq_num);
229
230 void mshv_copy_girq_info(struct mshv_guest_irq_ent *src_irq,
231 struct mshv_lapic_irq *dest_irq);
232
233 void mshv_irqfd_routing_update(struct mshv_partition *partition);
234
235 void mshv_port_table_fini(void);
236 int mshv_portid_alloc(struct port_table_info *info);
237 int mshv_portid_lookup(int port_id, struct port_table_info *info);
238 void mshv_portid_free(int port_id);
239
240 int mshv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb,
241 void *data, u64 gpa, u64 val, u64 flags);
242 void mshv_unregister_doorbell(u64 partition_id, int doorbell_portid);
243
244 void mshv_isr(void);
245 int mshv_synic_init(unsigned int cpu);
246 int mshv_synic_cleanup(unsigned int cpu);
247
mshv_partition_encrypted(struct mshv_partition * partition)248 static inline bool mshv_partition_encrypted(struct mshv_partition *partition)
249 {
250 return partition->isolation_type == HV_PARTITION_ISOLATION_TYPE_SNP;
251 }
252
253 struct mshv_partition *mshv_partition_get(struct mshv_partition *partition);
254 void mshv_partition_put(struct mshv_partition *partition);
255 struct mshv_partition *mshv_partition_find(u64 partition_id) __must_hold(RCU);
256
257 /* hypercalls */
258
259 int hv_call_withdraw_memory(u64 count, int node, u64 partition_id);
260 int hv_call_create_partition(u64 flags,
261 struct hv_partition_creation_properties creation_properties,
262 union hv_partition_isolation_properties isolation_properties,
263 u64 *partition_id);
264 int hv_call_initialize_partition(u64 partition_id);
265 int hv_call_finalize_partition(u64 partition_id);
266 int hv_call_delete_partition(u64 partition_id);
267 int hv_call_map_mmio_pages(u64 partition_id, u64 gfn, u64 mmio_spa, u64 numpgs);
268 int hv_call_map_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count,
269 u32 flags, struct page **pages);
270 int hv_call_unmap_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count,
271 u32 flags);
272 int hv_call_delete_vp(u64 partition_id, u32 vp_index);
273 int hv_call_assert_virtual_interrupt(u64 partition_id, u32 vector,
274 u64 dest_addr,
275 union hv_interrupt_control control);
276 int hv_call_clear_virtual_interrupt(u64 partition_id);
277 int hv_call_get_gpa_access_states(u64 partition_id, u32 count, u64 gpa_base_pfn,
278 union hv_gpa_page_access_state_flags state_flags,
279 int *written_total,
280 union hv_gpa_page_access_state *states);
281 int hv_call_get_vp_state(u32 vp_index, u64 partition_id,
282 struct hv_vp_state_data state_data,
283 /* Choose between pages and ret_output */
284 u64 page_count, struct page **pages,
285 union hv_output_get_vp_state *ret_output);
286 int hv_call_set_vp_state(u32 vp_index, u64 partition_id,
287 /* Choose between pages and bytes */
288 struct hv_vp_state_data state_data, u64 page_count,
289 struct page **pages, u32 num_bytes, u8 *bytes);
290 int hv_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
291 union hv_input_vtl input_vtl,
292 struct page **state_page);
293 int hv_unmap_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
294 struct page *state_page,
295 union hv_input_vtl input_vtl);
296 int hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
297 u64 connection_partition_id, struct hv_port_info *port_info,
298 u8 port_vtl, u8 min_connection_vtl, int node);
299 int hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id);
300 int hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
301 u64 connection_partition_id,
302 union hv_connection_id connection_id,
303 struct hv_connection_info *connection_info,
304 u8 connection_vtl, int node);
305 int hv_call_disconnect_port(u64 connection_partition_id,
306 union hv_connection_id connection_id);
307 int hv_call_notify_port_ring_empty(u32 sint_index);
308 int hv_map_stats_page(enum hv_stats_object_type type,
309 const union hv_stats_object_identity *identity,
310 void **addr);
311 int hv_unmap_stats_page(enum hv_stats_object_type type, void *page_addr,
312 const union hv_stats_object_identity *identity);
313 int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
314 u64 page_struct_count, u32 host_access,
315 u32 flags, u8 acquire);
316 int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 arg,
317 void *property_value, size_t property_value_sz);
318
319 extern struct mshv_root mshv_root;
320 extern enum hv_scheduler_type hv_scheduler_type;
321 extern u8 * __percpu *hv_synic_eventring_tail;
322
323 struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
324 u64 uaddr, u32 flags);
325 int mshv_region_share(struct mshv_mem_region *region);
326 int mshv_region_unshare(struct mshv_mem_region *region);
327 int mshv_region_map(struct mshv_mem_region *region);
328 void mshv_region_invalidate(struct mshv_mem_region *region);
329 int mshv_region_pin(struct mshv_mem_region *region);
330 void mshv_region_put(struct mshv_mem_region *region);
331 int mshv_region_get(struct mshv_mem_region *region);
332 bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn);
333 void mshv_region_movable_fini(struct mshv_mem_region *region);
334 bool mshv_region_movable_init(struct mshv_mem_region *region);
335
336 #endif /* _MSHV_ROOT_H_ */
337