1 /* 2 * Copyright (c) 2013 Chris Torek <torek @ torek net> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 /* 27 * This file and its contents are supplied under the terms of the 28 * Common Development and Distribution License ("CDDL"), version 1.0. 29 * You may only use this file in accordance with the terms of version 30 * 1.0 of the CDDL. 31 * 32 * A full copy of the text of the CDDL should have accompanied this 33 * source. A copy of the CDDL is also available via the Internet at 34 * http://www.illumos.org/license/CDDL. 35 * 36 * Copyright 2015 Pluribus Networks Inc. 37 * Copyright 2019 Joyent, Inc. 38 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. 39 * Copyright 2024 Oxide Computer Company 40 */ 41 42 #ifndef _VIONA_IMPL_H 43 #define _VIONA_IMPL_H 44 45 #include <sys/ddi.h> 46 #include <sys/list.h> 47 #include <sys/sunddi.h> 48 #include <sys/sunndi.h> 49 #include <sys/strsun.h> 50 #include <sys/sysmacros.h> 51 #include <sys/uio.h> 52 53 #include <sys/mac_client.h> 54 #include <sys/mac_provider.h> 55 #include <sys/mac_client_priv.h> 56 #include <sys/neti.h> 57 #include <inet/ip.h> 58 #include <inet/tcp.h> 59 60 #include <sys/vmm_drv.h> 61 #include <sys/viona_io.h> 62 63 struct viona_link; 64 typedef struct viona_link viona_link_t; 65 struct viona_desb; 66 typedef struct viona_desb viona_desb_t; 67 struct viona_net; 68 typedef struct viona_neti viona_neti_t; 69 70 typedef struct viona_transfer_stats { 71 /* Packets transferred successfully */ 72 uint64_t vts_packets; 73 /* Bytes transferred successfully */ 74 uint64_t vts_bytes; 75 /* 76 * Count of transfers which encountered errors, not including 77 * insufficient space in ring. 78 */ 79 uint64_t vts_errors; 80 /* 81 * Count of packets dropped due to insufficient space in the ring or by 82 * order of associated hook. 83 */ 84 uint64_t vts_drops; 85 } viona_transfer_stats_t; 86 87 enum viona_ring_state { 88 VRS_RESET = 0x0, /* just allocated or reset */ 89 VRS_SETUP = 0x1, /* addrs setup and starting worker thread */ 90 VRS_INIT = 0x2, /* worker thread started & waiting to run */ 91 VRS_RUN = 0x3, /* running work routine */ 92 VRS_STOP = 0x4, /* worker is exiting */ 93 }; 94 enum viona_ring_state_flags { 95 VRSF_REQ_START = 0x1, /* start running from INIT state */ 96 VRSF_REQ_STOP = 0x2, /* stop running, clean up, goto RESET state */ 97 VRSF_REQ_PAUSE = 0x4, /* stop running, goto INIT state */ 98 VRSF_RENEW = 0x8, /* ring renewing lease */ 99 }; 100 101 typedef struct viona_vring_tx { 102 /* 103 * Temporary store of kernel-virtual addresses of guest buffers in a 104 * descriptor chain undergoing transmission. 105 * 106 * Length stored in vrt_iov_cnt. 107 */ 108 struct iovec *vrt_iov; 109 /* 110 * When device is configured to "loan" guest memory for transmitted 111 * packets, rather than allocating and copying them in their entirety, 112 * this holds a ring-sized array of viona_desb_t entries. 113 * 114 * In addition to the desballoc() accounting, those descriptors also 115 * hold a pre-allocated buffer sized to receive the packet headers 116 * (which must be copied despite for TOCTOU reasons). 117 */ 118 viona_desb_t *vrt_desb; 119 /* 120 * Length of vrt_iov 121 */ 122 uint_t vrt_iov_cnt; 123 /* 124 * Length in bytes to leave "empty" in front of the headers for each 125 * transmitted packet. This allows subsequent encapsulation (such as 126 * vlans, VXLAN, etc) to use the space without requiring an additional 127 * allocation and header copy. 128 */ 129 uint_t vrt_header_pad; 130 } viona_vring_tx_t; 131 132 typedef struct viona_vring { 133 viona_link_t *vr_link; 134 135 kmutex_t vr_lock; 136 kcondvar_t vr_cv; 137 uint16_t vr_state; 138 uint16_t vr_state_flags; 139 uint_t vr_xfer_outstanding; 140 kthread_t *vr_worker_thread; 141 vmm_lease_t *vr_lease; 142 143 /* Resources required for transmission on TX ring(s) */ 144 struct viona_vring_tx vr_tx; 145 146 uint_t vr_intr_enabled; 147 uint64_t vr_msi_addr; 148 uint64_t vr_msi_msg; 149 150 /* Internal ring-related state */ 151 kmutex_t vr_a_mutex; /* sync consumers of 'avail' */ 152 kmutex_t vr_u_mutex; /* sync consumers of 'used' */ 153 uint64_t vr_pa; 154 uint16_t vr_size; 155 uint16_t vr_mask; /* cached from vr_size */ 156 uint16_t vr_cur_aidx; /* trails behind 'avail_idx' */ 157 uint16_t vr_cur_uidx; /* drives 'used_idx' */ 158 159 /* Reference to guest pages holding virtqueue */ 160 void **vr_map_pages; 161 vmm_page_t *vr_map_hold; 162 163 /* Per-ring general statistics */ 164 struct viona_transfer_stats vr_stats; 165 166 /* Per-ring error condition statistics */ 167 struct viona_ring_err_stats { 168 uint64_t rs_ndesc_too_high; 169 uint64_t rs_bad_idx; 170 uint64_t rs_indir_bad_len; 171 uint64_t rs_indir_bad_nest; 172 uint64_t rs_indir_bad_next; 173 uint64_t rs_no_space; 174 uint64_t rs_too_many_desc; 175 uint64_t rs_desc_bad_len; 176 uint64_t rs_len_overflow; 177 178 uint64_t rs_bad_ring_addr; 179 180 uint64_t rs_fail_hcksum; 181 uint64_t rs_fail_hcksum6; 182 uint64_t rs_fail_hcksum_proto; 183 184 uint64_t rs_bad_rx_frame; 185 uint64_t rs_rx_merge_overrun; 186 uint64_t rs_rx_merge_underrun; 187 uint64_t rs_rx_pad_short; 188 uint64_t rs_rx_mcast_check; 189 uint64_t rs_too_short; 190 uint64_t rs_tx_absent; 191 uint64_t rs_tx_gso_fail; 192 193 uint64_t rs_rx_hookdrop; 194 uint64_t rs_tx_hookdrop; 195 } vr_err_stats; 196 } viona_vring_t; 197 198 typedef struct viona_link_params { 199 /* Amount of free space to prepend to TX header mblk */ 200 uint16_t vlp_tx_header_pad; 201 /* Force copying of TX data, rather than "loaning" guest memory */ 202 boolean_t vlp_tx_copy_data; 203 } viona_link_params_t; 204 205 struct viona_link { 206 vmm_hold_t *l_vm_hold; 207 boolean_t l_destroyed; 208 209 viona_vring_t l_vrings[VIONA_VQ_MAX]; 210 211 uint32_t l_features; 212 uint32_t l_features_hw; 213 uint32_t l_cap_csum; 214 viona_link_params_t l_params; 215 216 uint16_t l_notify_ioport; 217 void *l_notify_cookie; 218 219 datalink_id_t l_linkid; 220 mac_handle_t l_mh; 221 mac_client_handle_t l_mch; 222 mac_promisc_handle_t l_mph; 223 mac_unicast_handle_t l_muh; 224 viona_promisc_t l_promisc; 225 226 pollhead_t l_pollhead; 227 228 viona_neti_t *l_neti; 229 230 kmutex_t l_stats_lock; 231 struct viona_link_stats { 232 struct viona_transfer_stats vls_rx; 233 struct viona_transfer_stats vls_tx; 234 } l_stats; 235 }; 236 237 typedef struct viona_nethook { 238 net_handle_t vnh_neti; 239 hook_family_t vnh_family; 240 hook_event_t vnh_event_in; 241 hook_event_t vnh_event_out; 242 hook_event_token_t vnh_token_in; 243 hook_event_token_t vnh_token_out; 244 boolean_t vnh_hooked; 245 } viona_nethook_t; 246 247 struct viona_neti { 248 list_node_t vni_node; 249 250 netid_t vni_netid; 251 zoneid_t vni_zid; 252 253 viona_nethook_t vni_nethook; 254 255 kmutex_t vni_lock; /* Protects remaining members */ 256 kcondvar_t vni_ref_change; /* Protected by vni_lock */ 257 uint_t vni_ref; /* Protected by vni_lock */ 258 list_t vni_dev_list; /* Protected by vni_lock */ 259 }; 260 261 typedef struct viona_kstats { 262 kstat_named_t vk_rx_packets; 263 kstat_named_t vk_rx_bytes; 264 kstat_named_t vk_rx_errors; 265 kstat_named_t vk_rx_drops; 266 kstat_named_t vk_tx_packets; 267 kstat_named_t vk_tx_bytes; 268 kstat_named_t vk_tx_errors; 269 kstat_named_t vk_tx_drops; 270 } viona_kstats_t; 271 272 typedef struct used_elem { 273 uint16_t id; 274 uint32_t len; 275 } used_elem_t; 276 277 /* 278 * Helper for performing copies from an array of iovec entries. 279 */ 280 typedef struct iov_bunch { 281 /* 282 * Head of array of iovec entries, which have an iov_len sum covering 283 * ib_remain bytes. 284 */ 285 struct iovec *ib_iov; 286 /* Byte offset in current ib_iov entry */ 287 uint32_t ib_offset; 288 /* 289 * Bytes remaining in entries covered by ib_iov entries, not including 290 * the offset specified by ib_offset 291 */ 292 uint32_t ib_remain; 293 } iov_bunch_t; 294 295 typedef struct viona_soft_state { 296 kmutex_t ss_lock; 297 viona_link_t *ss_link; 298 list_node_t ss_node; 299 kstat_t *ss_kstat; 300 minor_t ss_minor; 301 } viona_soft_state_t; 302 303 #pragma pack(1) 304 struct virtio_desc { 305 uint64_t vd_addr; 306 uint32_t vd_len; 307 uint16_t vd_flags; 308 uint16_t vd_next; 309 }; 310 311 struct virtio_used { 312 uint32_t vu_idx; 313 uint32_t vu_tlen; 314 }; 315 316 struct virtio_net_mrgrxhdr { 317 uint8_t vrh_flags; 318 uint8_t vrh_gso_type; 319 uint16_t vrh_hdr_len; 320 uint16_t vrh_gso_size; 321 uint16_t vrh_csum_start; 322 uint16_t vrh_csum_offset; 323 uint16_t vrh_bufs; 324 }; 325 326 struct virtio_net_hdr { 327 uint8_t vrh_flags; 328 uint8_t vrh_gso_type; 329 uint16_t vrh_hdr_len; 330 uint16_t vrh_gso_size; 331 uint16_t vrh_csum_start; 332 uint16_t vrh_csum_offset; 333 }; 334 #pragma pack() 335 336 #define VNETHOOK_INTERESTED_IN(neti) \ 337 (neti)->vni_nethook.vnh_event_in.he_interested 338 #define VNETHOOK_INTERESTED_OUT(neti) \ 339 (neti)->vni_nethook.vnh_event_out.he_interested 340 341 342 #define VIONA_PROBE(name) DTRACE_PROBE(viona__##name) 343 #define VIONA_PROBE1(name, arg1, arg2) \ 344 DTRACE_PROBE1(viona__##name, arg1, arg2) 345 #define VIONA_PROBE2(name, arg1, arg2, arg3, arg4) \ 346 DTRACE_PROBE2(viona__##name, arg1, arg2, arg3, arg4) 347 #define VIONA_PROBE3(name, arg1, arg2, arg3, arg4, arg5, arg6) \ 348 DTRACE_PROBE3(viona__##name, arg1, arg2, arg3, arg4, arg5, arg6) 349 #define VIONA_PROBE4(name, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ 350 DTRACE_PROBE4(viona__##name, arg1, arg2, arg3, arg4, arg5, arg6, arg7, \ 351 arg8) 352 #define VIONA_PROBE5(name, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, \ 353 arg9, arg10) \ 354 DTRACE_PROBE5(viona__##name, arg1, arg2, arg3, arg4, arg5, arg6, arg7, \ 355 arg8, arg9, arg10) 356 #define VIONA_PROBE_BAD_RING_ADDR(r, a) \ 357 VIONA_PROBE2(bad_ring_addr, viona_vring_t *, r, void *, (void *)(a)) 358 359 /* Increment one of the named ring error stats */ 360 #define VIONA_RING_STAT_INCR(r, name) \ 361 (((r)->vr_err_stats.rs_ ## name)++) 362 363 #define VIONA_MAX_HDRS_LEN (sizeof (struct ether_vlan_header) + \ 364 IP_MAX_HDR_LENGTH + TCP_MAX_HDR_LENGTH) 365 366 #define VRING_AVAIL_F_NO_INTERRUPT 1 367 #define VRING_USED_F_NO_NOTIFY 1 368 369 #define VRING_DESC_F_NEXT (1 << 0) 370 #define VRING_DESC_F_WRITE (1 << 1) 371 #define VRING_DESC_F_INDIRECT (1 << 2) 372 373 #define VIRTIO_NET_HDR_F_NEEDS_CSUM (1 << 0) 374 #define VIRTIO_NET_HDR_F_DATA_VALID (1 << 1) 375 376 #define VIRTIO_NET_HDR_GSO_NONE 0 377 #define VIRTIO_NET_HDR_GSO_TCPV4 1 378 379 #define VIRTIO_NET_F_CSUM (1 << 0) 380 #define VIRTIO_NET_F_GUEST_CSUM (1 << 1) 381 #define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */ 382 #define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can accept TSO */ 383 #define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can accept TSO */ 384 #define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX bufs */ 385 #define VIRTIO_NET_F_STATUS (1 << 16) /* cfg status field present */ 386 #define VIRTIO_F_RING_NOTIFY_ON_EMPTY (1 << 24) 387 #define VIRTIO_F_RING_INDIRECT_DESC (1 << 28) 388 #define VIRTIO_F_RING_EVENT_IDX (1 << 29) 389 390 /* 391 * Place an upper bound on the size of packets viona is willing to handle, 392 * particularly in the TX case, where guest behavior directs the sizing of 393 * buffer allocations. 394 */ 395 #define VIONA_MAX_PACKET_SIZE UINT16_MAX 396 397 struct viona_ring_params { 398 uint64_t vrp_pa; 399 uint16_t vrp_size; 400 uint16_t vrp_avail_idx; 401 uint16_t vrp_used_idx; 402 }; 403 404 void viona_ring_alloc(viona_link_t *, viona_vring_t *); 405 void viona_ring_free(viona_vring_t *); 406 int viona_ring_get_state(viona_link_t *, uint16_t, struct viona_ring_params *); 407 int viona_ring_set_state(viona_link_t *, uint16_t, 408 const struct viona_ring_params *); 409 int viona_ring_reset(viona_vring_t *, boolean_t); 410 int viona_ring_init(viona_link_t *, uint16_t, const struct viona_ring_params *); 411 boolean_t viona_ring_lease_renew(viona_vring_t *); 412 bool vring_need_bail(const viona_vring_t *); 413 int viona_ring_pause(viona_vring_t *); 414 415 int vq_popchain(viona_vring_t *, struct iovec *, uint_t, uint16_t *, 416 vmm_page_t **, uint32_t *); 417 void vq_pushchain(viona_vring_t *, uint32_t, uint16_t); 418 void vq_pushchain_many(viona_vring_t *, uint_t, used_elem_t *); 419 420 void viona_intr_ring(viona_vring_t *ring, boolean_t); 421 void viona_ring_set_no_notify(viona_vring_t *, boolean_t); 422 void viona_ring_disable_notify(viona_vring_t *); 423 void viona_ring_enable_notify(viona_vring_t *); 424 uint16_t viona_ring_num_avail(viona_vring_t *); 425 426 void viona_ring_stat_accept(viona_vring_t *, uint32_t); 427 void viona_ring_stat_drop(viona_vring_t *); 428 void viona_ring_stat_error(viona_vring_t *); 429 430 bool iov_bunch_copy(iov_bunch_t *, void *, uint32_t); 431 bool iov_bunch_next_chunk(iov_bunch_t *, caddr_t *, uint32_t *); 432 433 void viona_rx_init(void); 434 void viona_rx_fini(void); 435 int viona_rx_set(viona_link_t *, viona_promisc_t); 436 void viona_rx_clear(viona_link_t *); 437 void viona_worker_rx(viona_vring_t *, viona_link_t *); 438 439 extern kmutex_t viona_force_copy_lock; 440 extern uint_t viona_max_header_pad; 441 boolean_t viona_tx_copy_needed(void); 442 void viona_worker_tx(viona_vring_t *, viona_link_t *); 443 void viona_tx_ring_alloc(viona_vring_t *, const uint16_t); 444 void viona_tx_ring_free(viona_vring_t *, const uint16_t); 445 446 void viona_neti_attach(void); 447 void viona_neti_detach(void); 448 viona_neti_t *viona_neti_lookup_by_zid(zoneid_t); 449 void viona_neti_rele(viona_neti_t *); 450 int viona_hook(viona_link_t *, viona_vring_t *, mblk_t **, boolean_t); 451 452 #endif /* _VIONA_IMPL_H */ 453