1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (c) 2023, Microsoft Corporation. 4 */ 5 6 #ifndef _MSHV_ROOT_H_ 7 #define _MSHV_ROOT_H_ 8 9 #include <linux/spinlock.h> 10 #include <linux/mutex.h> 11 #include <linux/semaphore.h> 12 #include <linux/sched.h> 13 #include <linux/srcu.h> 14 #include <linux/wait.h> 15 #include <linux/hashtable.h> 16 #include <linux/dev_printk.h> 17 #include <linux/build_bug.h> 18 #include <linux/mmu_notifier.h> 19 #include <uapi/linux/mshv.h> 20 21 /* 22 * Hypervisor must be between these version numbers (inclusive) 23 * to guarantee compatibility 24 */ 25 #define MSHV_HV_MIN_VERSION (27744) 26 #define MSHV_HV_MAX_VERSION (27751) 27 28 static_assert(HV_HYP_PAGE_SIZE == MSHV_HV_PAGE_SIZE); 29 30 #define MSHV_MAX_VPS 256 31 32 #define MSHV_PARTITIONS_HASH_BITS 9 33 34 #define MSHV_PIN_PAGES_BATCH_SIZE (0x10000000ULL / HV_HYP_PAGE_SIZE) 35 36 struct mshv_vp { 37 u32 vp_index; 38 struct mshv_partition *vp_partition; 39 struct mutex vp_mutex; 40 struct hv_vp_register_page *vp_register_page; 41 struct hv_message *vp_intercept_msg_page; 42 void *vp_ghcb_page; 43 struct hv_stats_page *vp_stats_pages[2]; 44 struct { 45 atomic64_t vp_signaled_count; 46 struct { 47 u64 intercept_suspend: 1; 48 u64 root_sched_blocked: 1; /* root scheduler only */ 49 u64 root_sched_dispatched: 1; /* root scheduler only */ 50 u64 reserved: 61; 51 } flags; 52 unsigned int kicked_by_hv; 53 wait_queue_head_t vp_suspend_queue; 54 } run; 55 }; 56 57 #define vp_fmt(fmt) "p%lluvp%u: " fmt 58 #define vp_devprintk(level, v, fmt, ...) \ 59 do { \ 60 const struct mshv_vp *__vp = (v); \ 61 const struct mshv_partition *__pt = __vp->vp_partition; \ 62 dev_##level(__pt->pt_module_dev, vp_fmt(fmt), __pt->pt_id, \ 63 __vp->vp_index, ##__VA_ARGS__); \ 64 } while (0) 65 #define vp_emerg(v, fmt, ...) vp_devprintk(emerg, v, fmt, ##__VA_ARGS__) 66 #define vp_crit(v, fmt, ...) vp_devprintk(crit, v, fmt, ##__VA_ARGS__) 67 #define vp_alert(v, fmt, ...) vp_devprintk(alert, v, fmt, ##__VA_ARGS__) 68 #define vp_err(v, fmt, ...) vp_devprintk(err, v, fmt, ##__VA_ARGS__) 69 #define vp_warn(v, fmt, ...) vp_devprintk(warn, v, fmt, ##__VA_ARGS__) 70 #define vp_notice(v, fmt, ...) vp_devprintk(notice, v, fmt, ##__VA_ARGS__) 71 #define vp_info(v, fmt, ...) vp_devprintk(info, v, fmt, ##__VA_ARGS__) 72 #define vp_dbg(v, fmt, ...) vp_devprintk(dbg, v, fmt, ##__VA_ARGS__) 73 74 enum mshv_region_type { 75 MSHV_REGION_TYPE_MEM_PINNED, 76 MSHV_REGION_TYPE_MEM_MOVABLE, 77 MSHV_REGION_TYPE_MMIO 78 }; 79 80 struct mshv_mem_region { 81 struct hlist_node hnode; 82 struct kref refcount; 83 u64 nr_pages; 84 u64 start_gfn; 85 u64 start_uaddr; 86 u32 hv_map_flags; 87 struct mshv_partition *partition; 88 enum mshv_region_type type; 89 struct mmu_interval_notifier mni; 90 struct mutex mutex; /* protects region pages remapping */ 91 struct page *pages[]; 92 }; 93 94 struct mshv_irq_ack_notifier { 95 struct hlist_node link; 96 unsigned int irq_ack_gsi; 97 void (*irq_acked)(struct mshv_irq_ack_notifier *mian); 98 }; 99 100 struct mshv_partition { 101 struct device *pt_module_dev; 102 103 struct hlist_node pt_hnode; 104 u64 pt_id; 105 refcount_t pt_ref_count; 106 struct mutex pt_mutex; 107 108 spinlock_t pt_mem_regions_lock; 109 struct hlist_head pt_mem_regions; // not ordered 110 111 u32 pt_vp_count; 112 struct mshv_vp *pt_vp_array[MSHV_MAX_VPS]; 113 114 struct mutex pt_irq_lock; 115 struct srcu_struct pt_irq_srcu; 116 struct hlist_head irq_ack_notifier_list; 117 118 struct hlist_head pt_devices; 119 120 /* 121 * MSHV does not support more than one async hypercall in flight 122 * for a single partition. Thus, it is okay to define per partition 123 * async hypercall status. 124 */ 125 struct completion async_hypercall; 126 u64 async_hypercall_status; 127 128 spinlock_t pt_irqfds_lock; 129 struct hlist_head pt_irqfds_list; 130 struct mutex irqfds_resampler_lock; 131 struct hlist_head irqfds_resampler_list; 132 133 struct hlist_head ioeventfds_list; 134 135 struct mshv_girq_routing_table __rcu *pt_girq_tbl; 136 u64 isolation_type; 137 bool import_completed; 138 bool pt_initialized; 139 }; 140 141 #define pt_fmt(fmt) "p%llu: " fmt 142 #define pt_devprintk(level, p, fmt, ...) \ 143 do { \ 144 const struct mshv_partition *__pt = (p); \ 145 dev_##level(__pt->pt_module_dev, pt_fmt(fmt), __pt->pt_id, \ 146 ##__VA_ARGS__); \ 147 } while (0) 148 #define pt_emerg(p, fmt, ...) pt_devprintk(emerg, p, fmt, ##__VA_ARGS__) 149 #define pt_crit(p, fmt, ...) pt_devprintk(crit, p, fmt, ##__VA_ARGS__) 150 #define pt_alert(p, fmt, ...) pt_devprintk(alert, p, fmt, ##__VA_ARGS__) 151 #define pt_err(p, fmt, ...) pt_devprintk(err, p, fmt, ##__VA_ARGS__) 152 #define pt_warn(p, fmt, ...) pt_devprintk(warn, p, fmt, ##__VA_ARGS__) 153 #define pt_notice(p, fmt, ...) pt_devprintk(notice, p, fmt, ##__VA_ARGS__) 154 #define pt_info(p, fmt, ...) pt_devprintk(info, p, fmt, ##__VA_ARGS__) 155 #define pt_dbg(p, fmt, ...) pt_devprintk(dbg, p, fmt, ##__VA_ARGS__) 156 157 struct mshv_lapic_irq { 158 u32 lapic_vector; 159 u64 lapic_apic_id; 160 union hv_interrupt_control lapic_control; 161 }; 162 163 #define MSHV_MAX_GUEST_IRQS 4096 164 165 /* representation of one guest irq entry, either msi or legacy */ 166 struct mshv_guest_irq_ent { 167 u32 girq_entry_valid; /* vfio looks at this */ 168 u32 guest_irq_num; /* a unique number for each irq */ 169 u32 girq_addr_lo; /* guest irq msi address info */ 170 u32 girq_addr_hi; 171 u32 girq_irq_data; /* idt vector in some cases */ 172 }; 173 174 struct mshv_girq_routing_table { 175 u32 num_rt_entries; 176 struct mshv_guest_irq_ent mshv_girq_info_tbl[]; 177 }; 178 179 struct hv_synic_pages { 180 struct hv_message_page *hyp_synic_message_page; 181 struct hv_synic_event_flags_page *synic_event_flags_page; 182 struct hv_synic_event_ring_page *synic_event_ring_page; 183 }; 184 185 struct mshv_root { 186 struct hv_synic_pages __percpu *synic_pages; 187 spinlock_t pt_ht_lock; 188 DECLARE_HASHTABLE(pt_htable, MSHV_PARTITIONS_HASH_BITS); 189 struct hv_partition_property_vmm_capabilities vmm_caps; 190 }; 191 192 /* 193 * Callback for doorbell events. 194 * NOTE: This is called in interrupt context. Callback 195 * should defer slow and sleeping logic to later. 196 */ 197 typedef void (*doorbell_cb_t) (int doorbell_id, void *); 198 199 /* 200 * port table information 201 */ 202 struct port_table_info { 203 struct rcu_head portbl_rcu; 204 enum hv_port_type hv_port_type; 205 union { 206 struct { 207 u64 reserved[2]; 208 } hv_port_message; 209 struct { 210 u64 reserved[2]; 211 } hv_port_event; 212 struct { 213 u64 reserved[2]; 214 } hv_port_monitor; 215 struct { 216 doorbell_cb_t doorbell_cb; 217 void *data; 218 } hv_port_doorbell; 219 }; 220 }; 221 222 int mshv_update_routing_table(struct mshv_partition *partition, 223 const struct mshv_user_irq_entry *entries, 224 unsigned int numents); 225 void mshv_free_routing_table(struct mshv_partition *partition); 226 227 struct mshv_guest_irq_ent mshv_ret_girq_entry(struct mshv_partition *partition, 228 u32 irq_num); 229 230 void mshv_copy_girq_info(struct mshv_guest_irq_ent *src_irq, 231 struct mshv_lapic_irq *dest_irq); 232 233 void mshv_irqfd_routing_update(struct mshv_partition *partition); 234 235 void mshv_port_table_fini(void); 236 int mshv_portid_alloc(struct port_table_info *info); 237 int mshv_portid_lookup(int port_id, struct port_table_info *info); 238 void mshv_portid_free(int port_id); 239 240 int mshv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb, 241 void *data, u64 gpa, u64 val, u64 flags); 242 void mshv_unregister_doorbell(u64 partition_id, int doorbell_portid); 243 244 void mshv_isr(void); 245 int mshv_synic_init(unsigned int cpu); 246 int mshv_synic_cleanup(unsigned int cpu); 247 248 static inline bool mshv_partition_encrypted(struct mshv_partition *partition) 249 { 250 return partition->isolation_type == HV_PARTITION_ISOLATION_TYPE_SNP; 251 } 252 253 struct mshv_partition *mshv_partition_get(struct mshv_partition *partition); 254 void mshv_partition_put(struct mshv_partition *partition); 255 struct mshv_partition *mshv_partition_find(u64 partition_id) __must_hold(RCU); 256 257 /* hypercalls */ 258 259 int hv_call_withdraw_memory(u64 count, int node, u64 partition_id); 260 int hv_call_create_partition(u64 flags, 261 struct hv_partition_creation_properties creation_properties, 262 union hv_partition_isolation_properties isolation_properties, 263 u64 *partition_id); 264 int hv_call_initialize_partition(u64 partition_id); 265 int hv_call_finalize_partition(u64 partition_id); 266 int hv_call_delete_partition(u64 partition_id); 267 int hv_call_map_mmio_pages(u64 partition_id, u64 gfn, u64 mmio_spa, u64 numpgs); 268 int hv_call_map_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count, 269 u32 flags, struct page **pages); 270 int hv_call_unmap_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count, 271 u32 flags); 272 int hv_call_delete_vp(u64 partition_id, u32 vp_index); 273 int hv_call_assert_virtual_interrupt(u64 partition_id, u32 vector, 274 u64 dest_addr, 275 union hv_interrupt_control control); 276 int hv_call_clear_virtual_interrupt(u64 partition_id); 277 int hv_call_get_gpa_access_states(u64 partition_id, u32 count, u64 gpa_base_pfn, 278 union hv_gpa_page_access_state_flags state_flags, 279 int *written_total, 280 union hv_gpa_page_access_state *states); 281 int hv_call_get_vp_state(u32 vp_index, u64 partition_id, 282 struct hv_vp_state_data state_data, 283 /* Choose between pages and ret_output */ 284 u64 page_count, struct page **pages, 285 union hv_output_get_vp_state *ret_output); 286 int hv_call_set_vp_state(u32 vp_index, u64 partition_id, 287 /* Choose between pages and bytes */ 288 struct hv_vp_state_data state_data, u64 page_count, 289 struct page **pages, u32 num_bytes, u8 *bytes); 290 int hv_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type, 291 union hv_input_vtl input_vtl, 292 struct page **state_page); 293 int hv_unmap_vp_state_page(u64 partition_id, u32 vp_index, u32 type, 294 struct page *state_page, 295 union hv_input_vtl input_vtl); 296 int hv_call_create_port(u64 port_partition_id, union hv_port_id port_id, 297 u64 connection_partition_id, struct hv_port_info *port_info, 298 u8 port_vtl, u8 min_connection_vtl, int node); 299 int hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id); 300 int hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id, 301 u64 connection_partition_id, 302 union hv_connection_id connection_id, 303 struct hv_connection_info *connection_info, 304 u8 connection_vtl, int node); 305 int hv_call_disconnect_port(u64 connection_partition_id, 306 union hv_connection_id connection_id); 307 int hv_call_notify_port_ring_empty(u32 sint_index); 308 int hv_map_stats_page(enum hv_stats_object_type type, 309 const union hv_stats_object_identity *identity, 310 void **addr); 311 int hv_unmap_stats_page(enum hv_stats_object_type type, void *page_addr, 312 const union hv_stats_object_identity *identity); 313 int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages, 314 u64 page_struct_count, u32 host_access, 315 u32 flags, u8 acquire); 316 int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 arg, 317 void *property_value, size_t property_value_sz); 318 319 extern struct mshv_root mshv_root; 320 extern enum hv_scheduler_type hv_scheduler_type; 321 extern u8 * __percpu *hv_synic_eventring_tail; 322 323 struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages, 324 u64 uaddr, u32 flags); 325 int mshv_region_share(struct mshv_mem_region *region); 326 int mshv_region_unshare(struct mshv_mem_region *region); 327 int mshv_region_map(struct mshv_mem_region *region); 328 void mshv_region_invalidate(struct mshv_mem_region *region); 329 int mshv_region_pin(struct mshv_mem_region *region); 330 void mshv_region_put(struct mshv_mem_region *region); 331 int mshv_region_get(struct mshv_mem_region *region); 332 bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn); 333 void mshv_region_movable_fini(struct mshv_mem_region *region); 334 bool mshv_region_movable_init(struct mshv_mem_region *region); 335 336 #endif /* _MSHV_ROOT_H_ */ 337