1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019 Joyent, Inc. 14 */ 15 16 /* 17 * VIRTIO NETWORK DRIVER 18 */ 19 20 #ifndef _VIOIF_H 21 #define _VIOIF_H 22 23 #include "virtio.h" 24 25 #ifdef __cplusplus 26 extern "C" { 27 #endif 28 29 /* 30 * VIRTIO NETWORK CONFIGURATION REGISTERS 31 * 32 * These are offsets into the device-specific configuration space available 33 * through the virtio_dev_*() family of functions. 34 */ 35 #define VIRTIO_NET_CONFIG_MAC 0x00 /* 48 R/W */ 36 #define VIRTIO_NET_CONFIG_STATUS 0x06 /* 16 R */ 37 #define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS 0x08 /* 16 R */ 38 #define VIRTIO_NET_CONFIG_MTU 0x0A /* 16 R */ 39 40 /* 41 * VIRTIO NETWORK VIRTQUEUES 42 * 43 * Note that the control queue is only present if VIRTIO_NET_F_CTRL_VQ is 44 * negotiated with the device. 45 */ 46 #define VIRTIO_NET_VIRTQ_RX 0 47 #define VIRTIO_NET_VIRTQ_TX 1 48 #define VIRTIO_NET_VIRTQ_CONTROL 2 49 50 /* 51 * VIRTIO NETWORK FEATURE BITS 52 */ 53 54 /* 55 * CSUM, GUEST_CSUM: 56 * Partial checksum support. These features signal that the device will 57 * accept packets with partial checksums (CSUM), and that the driver will 58 * accept packets with partial checksums (GUEST_CSUM). These features 59 * combine the use of the VIRTIO_NET_HDR_F_NEEDS_CSUM flag, and the 60 * "csum_start" and "csum_offset" fields, in the virtio net header. 61 */ 62 #define VIRTIO_NET_F_CSUM (1ULL << 0) 63 #define VIRTIO_NET_F_GUEST_CSUM (1ULL << 1) 64 65 /* 66 * MTU: 67 * The device offers a maximum MTU value at VIRTIO_NET_CONFIG_MTU. If 68 * this is not negotiated, we allow the largest possible MTU that our 69 * buffer allocations support in case jumbo frames are tacitly supported 70 * by the device. The default MTU is always 1500. 71 */ 72 #define VIRTIO_NET_F_MTU (1ULL << 3) 73 74 /* 75 * MAC: 76 * The device has an assigned primary MAC address. If this feature bit is 77 * not set, the driver must provide a locally assigned MAC address. See 78 * IEEE 802, "48-bit universal LAN MAC addresses" for more details on 79 * assignment. 80 */ 81 #define VIRTIO_NET_F_MAC (1ULL << 5) 82 83 /* 84 * GUEST_TSO4, GUEST_TSO6, GUEST_UFO: 85 * Inbound segmentation offload support. These features depend on having 86 * VIRTIO_NET_F_GUEST_CSUM and signal that the driver can accept large 87 * combined TCP (v4 or v6) packets, or reassembled UDP fragments. 88 */ 89 #define VIRTIO_NET_F_GUEST_TSO4 (1ULL << 7) 90 #define VIRTIO_NET_F_GUEST_TSO6 (1ULL << 8) 91 #define VIRTIO_NET_F_GUEST_UFO (1ULL << 10) 92 93 /* 94 * GUEST_ECN: 95 * Depends on either VIRTIO_NET_F_GUEST_TSO4 or VIRTIO_NET_F_GUEST_TSO6. 96 * This feature means the driver will look for the VIRTIO_NET_HDR_GSO_ECN 97 * bit in the "gso_type" of the virtio net header. This bit tells the 98 * driver that the Explicit Congestion Notification (ECN) bit was set in 99 * the original TCP packets. 100 */ 101 #define VIRTIO_NET_F_GUEST_ECN (1ULL << 9) 102 103 /* 104 * HOST_TSO4, HOST_TSO6, HOST_UFO: 105 * Outbound segmentation offload support. These features depend on having 106 * VIRTIO_NET_F_CSUM and signal that the device will accept large combined 107 * TCP (v4 or v6) packets that require segmentation offload, or large 108 * combined UDP packets that require fragmentation offload. 109 */ 110 #define VIRTIO_NET_F_HOST_TSO4 (1ULL << 11) 111 #define VIRTIO_NET_F_HOST_TSO6 (1ULL << 12) 112 #define VIRTIO_NET_F_HOST_UFO (1ULL << 14) 113 114 /* 115 * HOST_ECN: 116 * Depends on either VIRTIO_NET_F_HOST_TSO4 or VIRTIO_NET_F_HOST_TSO6. 117 * This features means the device will accept packets that both require 118 * segmentation offload and have the Explicit Congestion Notification 119 * (ECN) bit set. If this feature is not present, the device must not 120 * send large segments that require ECN to be set. 121 */ 122 #define VIRTIO_NET_F_HOST_ECN (1ULL << 13) 123 124 /* 125 * GSO: 126 * The GSO feature is, in theory, the combination of HOST_TSO4, HOST_TSO6, 127 * and HOST_ECN. This is only useful for legacy devices; newer devices 128 * should be using the more specific bits above. 129 */ 130 #define VIRTIO_NET_F_GSO (1ULL << 6) 131 132 /* 133 * MRG_RXBUF: 134 * This feature allows the receipt of large packets without needing to 135 * allocate large buffers. The "virtio_net_hdr" will include an extra 136 * value: the number of buffers to gang together. 137 */ 138 #define VIRTIO_NET_F_MRG_RXBUF (1ULL << 15) 139 140 /* 141 * STATUS: 142 * The VIRTIO_NET_CONFIG_STATUS configuration register is available, which 143 * allows the driver to read the link state from the device. 144 */ 145 #define VIRTIO_NET_F_STATUS (1ULL << 16) 146 147 /* 148 * CTRL_VQ, CTRL_RX, CTRL_VLAN: 149 * These features signal that the device exposes the control queue 150 * (VIRTIO_NET_VIRTQ_CONTROL), in the case of CTRL_VQ; and that the 151 * control queue supports extra commands (CTRL_RX, CTRL_VLAN). 152 */ 153 #define VIRTIO_NET_F_CTRL_VQ (1ULL << 17) 154 #define VIRTIO_NET_F_CTRL_RX (1ULL << 18) 155 #define VIRTIO_NET_F_CTRL_VLAN (1ULL << 19) 156 #define VIRTIO_NET_F_CTRL_RX_EXTRA (1ULL << 20) 157 158 /* 159 * These features are supported by the driver and we will request them from the 160 * device. Note that we do not currently request GUEST_CSUM, as the driver 161 * does not presently support receiving frames with any offload features from 162 * the device. 163 */ 164 #define VIRTIO_NET_WANTED_FEATURES (VIRTIO_NET_F_CSUM | \ 165 VIRTIO_NET_F_GSO | \ 166 VIRTIO_NET_F_HOST_TSO4 | \ 167 VIRTIO_NET_F_HOST_TSO6 | \ 168 VIRTIO_NET_F_HOST_ECN | \ 169 VIRTIO_NET_F_MAC | \ 170 VIRTIO_NET_F_MTU) 171 172 /* 173 * VIRTIO NETWORK HEADER 174 * 175 * This structure appears at the start of each transmit or receive packet 176 * buffer. 177 */ 178 struct virtio_net_hdr { 179 uint8_t vnh_flags; 180 uint8_t vnh_gso_type; 181 uint16_t vnh_hdr_len; 182 uint16_t vnh_gso_size; 183 uint16_t vnh_csum_start; 184 uint16_t vnh_csum_offset; 185 } __packed; 186 187 /* 188 * VIRTIO NETWORK HEADER: FLAGS (vnh_flags) 189 */ 190 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 0x01 191 192 /* 193 * VIRTIO NETWORK HEADER: OFFLOAD OPTIONS (vnh_gso_type) 194 * 195 * Each of these is an offload type, except for the ECN value which is 196 * logically OR-ed with one of the other types. 197 */ 198 #define VIRTIO_NET_HDR_GSO_NONE 0 199 #define VIRTIO_NET_HDR_GSO_TCPV4 1 200 #define VIRTIO_NET_HDR_GSO_UDP 3 201 #define VIRTIO_NET_HDR_GSO_TCPV6 4 202 #define VIRTIO_NET_HDR_GSO_ECN 0x80 203 204 205 /* 206 * DRIVER PARAMETERS 207 */ 208 209 /* 210 * At attach, we allocate a fixed pool of buffers for receipt and transmission 211 * of frames. The maximum number of buffers of each type that we will allocate 212 * is specified here. If the ring size is smaller than this number, we will 213 * use the ring size instead. 214 */ 215 #define VIRTIO_NET_TX_BUFS 256 216 #define VIRTIO_NET_RX_BUFS 256 217 218 /* 219 * The virtio net header and the first buffer segment share the same DMA 220 * allocation. We round up the virtio header size to a multiple of 4 and add 2 221 * bytes so that the IP header, which starts immediately after the 14 or 18 222 * byte Ethernet header, is then correctly aligned: 223 * 224 * 0 10 16 18 32/36 225 * | virtio_net_hdr | %4==0 | +2 | Ethernet header (14/18 bytes) | IPv4 ... 226 * 227 * Note that for this to work correctly, the DMA allocation must also be 4 byte 228 * aligned. 229 */ 230 #define VIOIF_HEADER_ALIGN 4 231 #define VIOIF_HEADER_SKIP (P2ROUNDUP( \ 232 sizeof (struct virtio_net_hdr), \ 233 VIOIF_HEADER_ALIGN) + 2) 234 235 /* 236 * Given we are not negotiating VIRTIO_NET_F_MRG_RXBUF, the specification says 237 * we must be able to accept a 1514 byte packet, or if any segmentation offload 238 * features have been negotiated a 65550 byte packet. To keep things simple, 239 * we'll assume segmentation offload is possible in most cases. In addition to 240 * the packet payload, we need to account for the Ethernet header and the 241 * virtio_net_hdr. 242 */ 243 #define VIOIF_RX_DATA_SIZE 65550 244 #define VIOIF_RX_BUF_SIZE (VIOIF_RX_DATA_SIZE + \ 245 sizeof (struct ether_header) + \ 246 VIOIF_HEADER_SKIP) 247 248 /* 249 * If we assume that a large allocation will probably have mostly 4K page sized 250 * cookies, 64 segments allows us 256KB for a single frame. We're in control 251 * of the allocation we use for receive buffers, so this value only has an 252 * impact on the length of chain we're able to create for external transmit 253 * buffer mappings. 254 */ 255 #define VIOIF_MAX_SEGS 64 256 257 /* 258 * We pre-allocate a reasonably large buffer to copy small packets 259 * there. Bigger packets are mapped, packets with multiple 260 * cookies are mapped as indirect buffers. 261 */ 262 #define VIOIF_TX_INLINE_SIZE (2 * 1024) 263 264 265 /* 266 * TYPE DEFINITIONS 267 */ 268 269 typedef struct vioif vioif_t; 270 271 /* 272 * Receive buffers are allocated in advance as a combination of DMA memory and 273 * a descriptor chain. Receive buffers can be loaned to the networking stack 274 * to avoid copying, and this object contains the free routine to pass to 275 * desballoc(). 276 * 277 * When receive buffers are not in use, they are linked into the per-instance 278 * free list, "vif_rxbufs" via "rb_link". Under normal conditions, we expect 279 * the free list to be empty much of the time; most buffers will be in the ring 280 * or on loan. 281 */ 282 typedef struct vioif_rxbuf { 283 vioif_t *rb_vioif; 284 frtn_t rb_frtn; 285 286 virtio_dma_t *rb_dma; 287 virtio_chain_t *rb_chain; 288 289 list_node_t rb_link; 290 } vioif_rxbuf_t; 291 292 /* 293 * Transmit buffers are also allocated in advance. DMA memory is allocated for 294 * the virtio net header, and to hold small packets. Larger packets are mapped 295 * from storage loaned to the driver by the network stack. 296 * 297 * When transmit buffers are not in use, they are linked into the per-instance 298 * free list, "vif_txbufs" via "tb_link". 299 */ 300 typedef struct vioif_txbuf { 301 mblk_t *tb_mp; 302 303 /* 304 * Inline buffer space (VIOIF_TX_INLINE_SIZE) for storage of the virtio 305 * net header, and to hold copied (rather than mapped) packet data. 306 */ 307 virtio_dma_t *tb_dma; 308 virtio_chain_t *tb_chain; 309 310 /* 311 * External buffer mapping. The capacity is fixed at allocation time, 312 * and "tb_ndmaext" tracks the current number of mappings. 313 */ 314 virtio_dma_t **tb_dmaext; 315 uint_t tb_dmaext_capacity; 316 uint_t tb_ndmaext; 317 318 list_node_t tb_link; 319 } vioif_txbuf_t; 320 321 typedef enum vioif_runstate { 322 VIOIF_RUNSTATE_STOPPED = 1, 323 VIOIF_RUNSTATE_STOPPING, 324 VIOIF_RUNSTATE_RUNNING 325 } vioif_runstate_t; 326 327 /* 328 * Per-instance driver object. 329 */ 330 struct vioif { 331 dev_info_t *vif_dip; 332 virtio_t *vif_virtio; 333 334 kmutex_t vif_mutex; 335 336 /* 337 * The NIC is considered RUNNING between the mc_start(9E) and 338 * mc_stop(9E) calls. Otherwise it is STOPPING (while draining 339 * resources) then STOPPED. When not RUNNING, we will drop incoming 340 * frames and refuse to insert more receive buffers into the receive 341 * queue. 342 */ 343 vioif_runstate_t vif_runstate; 344 345 mac_handle_t vif_mac_handle; 346 347 virtio_queue_t *vif_rx_vq; 348 virtio_queue_t *vif_tx_vq; 349 350 /* TX virtqueue management resources */ 351 boolean_t vif_tx_corked; 352 boolean_t vif_tx_drain; 353 timeout_id_t vif_tx_reclaim_tid; 354 355 /* 356 * Configured offload features: 357 */ 358 unsigned int vif_tx_csum:1; 359 unsigned int vif_tx_tso4:1; 360 unsigned int vif_tx_tso6:1; 361 362 /* 363 * For debugging, it is useful to know whether the MAC address we 364 * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or 365 * was otherwise generated or set from within the guest. 366 */ 367 unsigned int vif_mac_from_host:1; 368 369 uint_t vif_mtu; 370 uint_t vif_mtu_max; 371 uint8_t vif_mac[ETHERADDRL]; 372 373 /* 374 * Receive buffer free list and accounting: 375 */ 376 list_t vif_rxbufs; 377 uint_t vif_nrxbufs_alloc; 378 uint_t vif_nrxbufs_onloan; 379 uint_t vif_nrxbufs_onloan_max; 380 uint_t vif_rxbufs_capacity; 381 vioif_rxbuf_t *vif_rxbufs_mem; 382 383 /* 384 * Transmit buffer free list and accounting: 385 */ 386 list_t vif_txbufs; 387 uint_t vif_ntxbufs_alloc; 388 uint_t vif_txbufs_capacity; 389 vioif_txbuf_t *vif_txbufs_mem; 390 391 /* 392 * These copy size thresholds are exposed as private MAC properties so 393 * that they can be tuned without rebooting. 394 */ 395 uint_t vif_rxcopy_thresh; 396 uint_t vif_txcopy_thresh; 397 398 /* 399 * Statistics visible through mac: 400 */ 401 uint64_t vif_ipackets; 402 uint64_t vif_opackets; 403 uint64_t vif_rbytes; 404 uint64_t vif_obytes; 405 uint64_t vif_brdcstxmt; 406 uint64_t vif_brdcstrcv; 407 uint64_t vif_multixmt; 408 uint64_t vif_multircv; 409 uint64_t vif_norecvbuf; 410 uint64_t vif_notxbuf; 411 uint64_t vif_ierrors; 412 uint64_t vif_oerrors; 413 414 /* 415 * Internal debugging statistics: 416 */ 417 uint64_t vif_rxfail_dma_handle; 418 uint64_t vif_rxfail_dma_buffer; 419 uint64_t vif_rxfail_dma_bind; 420 uint64_t vif_rxfail_chain_undersize; 421 uint64_t vif_rxfail_no_descriptors; 422 uint64_t vif_txfail_dma_handle; 423 uint64_t vif_txfail_dma_bind; 424 uint64_t vif_txfail_indirect_limit; 425 426 uint64_t vif_stat_tx_reclaim; 427 }; 428 429 #ifdef __cplusplus 430 } 431 #endif 432 433 #endif /* _VIOIF_H */ 434