1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2021 Joyent, Inc. 14 */ 15 16 /* 17 * VIRTIO NETWORK DRIVER 18 */ 19 20 #ifndef _VIOIF_H 21 #define _VIOIF_H 22 23 #include "virtio.h" 24 25 #ifdef __cplusplus 26 extern "C" { 27 #endif 28 29 /* 30 * VIRTIO NETWORK CONFIGURATION REGISTERS 31 * 32 * These are offsets into the device-specific configuration space available 33 * through the virtio_dev_*() family of functions. 34 */ 35 #define VIRTIO_NET_CONFIG_MAC 0x00 /* 48 R/W */ 36 #define VIRTIO_NET_CONFIG_STATUS 0x06 /* 16 R */ 37 #define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS 0x08 /* 16 R */ 38 #define VIRTIO_NET_CONFIG_MTU 0x0A /* 16 R */ 39 40 /* 41 * VIRTIO NETWORK VIRTQUEUES 42 * 43 * Note that the control queue is only present if VIRTIO_NET_F_CTRL_VQ is 44 * negotiated with the device. 45 */ 46 #define VIRTIO_NET_VIRTQ_RX 0 47 #define VIRTIO_NET_VIRTQ_TX 1 48 #define VIRTIO_NET_VIRTQ_CONTROL 2 49 50 /* 51 * VIRTIO NETWORK FEATURE BITS 52 */ 53 54 /* 55 * CSUM, GUEST_CSUM: 56 * Partial checksum support. These features signal that the device will 57 * accept packets with partial checksums (CSUM), and that the driver will 58 * accept packets with partial checksums (GUEST_CSUM). These features 59 * combine the use of the VIRTIO_NET_HDR_F_NEEDS_CSUM flag, and the 60 * "csum_start" and "csum_offset" fields, in the virtio net header. 61 */ 62 #define VIRTIO_NET_F_CSUM (1ULL << 0) 63 #define VIRTIO_NET_F_GUEST_CSUM (1ULL << 1) 64 65 /* 66 * MTU: 67 * The device offers a maximum MTU value at VIRTIO_NET_CONFIG_MTU. If 68 * this is not negotiated, we allow the largest possible MTU that our 69 * buffer allocations support in case jumbo frames are tacitly supported 70 * by the device. The default MTU is always 1500. 71 */ 72 #define VIRTIO_NET_F_MTU (1ULL << 3) 73 74 /* 75 * MAC: 76 * The device has an assigned primary MAC address. If this feature bit is 77 * not set, the driver must provide a locally assigned MAC address. See 78 * IEEE 802, "48-bit universal LAN MAC addresses" for more details on 79 * assignment. 80 */ 81 #define VIRTIO_NET_F_MAC (1ULL << 5) 82 83 /* 84 * GUEST_TSO4, GUEST_TSO6, GUEST_UFO: 85 * Inbound segmentation offload support. These features depend on having 86 * VIRTIO_NET_F_GUEST_CSUM and signal that the driver can accept large 87 * combined TCP (v4 or v6) packets, or reassembled UDP fragments. 88 */ 89 #define VIRTIO_NET_F_GUEST_TSO4 (1ULL << 7) 90 #define VIRTIO_NET_F_GUEST_TSO6 (1ULL << 8) 91 #define VIRTIO_NET_F_GUEST_UFO (1ULL << 10) 92 93 /* 94 * GUEST_ECN: 95 * Depends on either VIRTIO_NET_F_GUEST_TSO4 or VIRTIO_NET_F_GUEST_TSO6. 96 * This feature means the driver will look for the VIRTIO_NET_HDR_GSO_ECN 97 * bit in the "gso_type" of the virtio net header. This bit tells the 98 * driver that the Explicit Congestion Notification (ECN) bit was set in 99 * the original TCP packets. 100 */ 101 #define VIRTIO_NET_F_GUEST_ECN (1ULL << 9) 102 103 /* 104 * HOST_TSO4, HOST_TSO6, HOST_UFO: 105 * Outbound segmentation offload support. These features depend on having 106 * VIRTIO_NET_F_CSUM and signal that the device will accept large combined 107 * TCP (v4 or v6) packets that require segmentation offload, or large 108 * combined UDP packets that require fragmentation offload. 109 */ 110 #define VIRTIO_NET_F_HOST_TSO4 (1ULL << 11) 111 #define VIRTIO_NET_F_HOST_TSO6 (1ULL << 12) 112 #define VIRTIO_NET_F_HOST_UFO (1ULL << 14) 113 114 /* 115 * HOST_ECN: 116 * Depends on either VIRTIO_NET_F_HOST_TSO4 or VIRTIO_NET_F_HOST_TSO6. 117 * This features means the device will accept packets that both require 118 * segmentation offload and have the Explicit Congestion Notification 119 * (ECN) bit set. If this feature is not present, the device must not 120 * send large segments that require ECN to be set. 121 */ 122 #define VIRTIO_NET_F_HOST_ECN (1ULL << 13) 123 124 /* 125 * GSO: 126 * The GSO feature is, in theory, the combination of HOST_TSO4, HOST_TSO6, 127 * and HOST_ECN. This is only useful for legacy devices; newer devices 128 * should be using the more specific bits above. 129 */ 130 #define VIRTIO_NET_F_GSO (1ULL << 6) 131 132 /* 133 * MRG_RXBUF: 134 * This feature allows the receipt of large packets without needing to 135 * allocate large buffers. The "virtio_net_hdr" will include an extra 136 * value: the number of buffers to gang together. 137 */ 138 #define VIRTIO_NET_F_MRG_RXBUF (1ULL << 15) 139 140 /* 141 * STATUS: 142 * The VIRTIO_NET_CONFIG_STATUS configuration register is available, which 143 * allows the driver to read the link state from the device. 144 */ 145 #define VIRTIO_NET_F_STATUS (1ULL << 16) 146 147 /* 148 * CTRL_VQ, CTRL_RX, CTRL_VLAN: 149 * These features signal that the device exposes the control queue 150 * (VIRTIO_NET_VIRTQ_CONTROL), in the case of CTRL_VQ; and that the 151 * control queue supports extra commands (CTRL_RX, CTRL_VLAN). 152 */ 153 #define VIRTIO_NET_F_CTRL_VQ (1ULL << 17) 154 #define VIRTIO_NET_F_CTRL_RX (1ULL << 18) 155 #define VIRTIO_NET_F_CTRL_VLAN (1ULL << 19) 156 #define VIRTIO_NET_F_CTRL_RX_EXTRA (1ULL << 20) 157 158 /* 159 * These features are supported by the driver and we will request them from the 160 * device. Note that we do not currently request GUEST_CSUM, as the driver 161 * does not presently support receiving frames with any offload features from 162 * the device. 163 */ 164 #define VIRTIO_NET_WANTED_FEATURES (VIRTIO_NET_F_CSUM | \ 165 VIRTIO_NET_F_GSO | \ 166 VIRTIO_NET_F_HOST_TSO4 | \ 167 VIRTIO_NET_F_HOST_TSO6 | \ 168 VIRTIO_NET_F_HOST_ECN | \ 169 VIRTIO_NET_F_MAC | \ 170 VIRTIO_NET_F_MTU | \ 171 VIRTIO_NET_F_CTRL_VQ | \ 172 VIRTIO_NET_F_CTRL_RX) 173 174 /* 175 * VIRTIO NETWORK HEADER 176 * 177 * This structure appears at the start of each transmit or receive packet 178 * buffer. 179 */ 180 struct virtio_net_hdr { 181 uint8_t vnh_flags; 182 uint8_t vnh_gso_type; 183 uint16_t vnh_hdr_len; 184 uint16_t vnh_gso_size; 185 uint16_t vnh_csum_start; 186 uint16_t vnh_csum_offset; 187 } __packed; 188 189 /* 190 * VIRTIO NETWORK HEADER: FLAGS (vnh_flags) 191 */ 192 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 0x01 193 194 /* 195 * VIRTIO NETWORK HEADER: OFFLOAD OPTIONS (vnh_gso_type) 196 * 197 * Each of these is an offload type, except for the ECN value which is 198 * logically OR-ed with one of the other types. 199 */ 200 #define VIRTIO_NET_HDR_GSO_NONE 0 201 #define VIRTIO_NET_HDR_GSO_TCPV4 1 202 #define VIRTIO_NET_HDR_GSO_UDP 3 203 #define VIRTIO_NET_HDR_GSO_TCPV6 4 204 #define VIRTIO_NET_HDR_GSO_ECN 0x80 205 206 /* 207 * VIRTIO CONTROL VIRTQUEUE HEADER 208 * 209 * This structure appears at the start of each control virtqueue request. 210 */ 211 struct virtio_net_ctrlq_hdr { 212 uint8_t vnch_class; 213 uint8_t vnch_command; 214 } __packed; 215 216 /* 217 * Contol Queue Classes 218 */ 219 #define VIRTIO_NET_CTRL_RX 0 220 221 /* 222 * CTRL_RX commands 223 */ 224 #define VIRTIO_NET_CTRL_RX_PROMISC 0 225 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 226 #define VIRTIO_NET_CTRL_RX_ALLUNI 2 227 #define VIRTIO_NET_CTRL_RX_NOMULTI 3 228 #define VIRTIO_NET_CTRL_RX_NOUNI 4 229 #define VIRTIO_NET_CTRL_RX_NOBCAST 5 230 231 /* 232 * Control queue ack values 233 */ 234 #define VIRTIO_NET_CQ_OK 0 235 #define VIRTIO_NET_CQ_ERR 1 236 237 238 /* 239 * DRIVER PARAMETERS 240 */ 241 242 /* 243 * At attach, we allocate a fixed pool of buffers for receipt and transmission 244 * of frames. The maximum number of buffers of each type that we will allocate 245 * is specified here. If the ring size is smaller than this number, we will 246 * use the ring size instead. 247 */ 248 #define VIRTIO_NET_TX_BUFS 256 249 #define VIRTIO_NET_RX_BUFS 256 250 251 /* 252 * Initially, only use a single buf for control queue requests (when 253 * present). If this becomes a bottleneck, we can simply increase this 254 * value as necessary. 255 */ 256 #define VIRTIO_NET_CTRL_BUFS 1 257 258 /* 259 * The virtio net header and the first buffer segment share the same DMA 260 * allocation. We round up the virtio header size to a multiple of 4 and add 2 261 * bytes so that the IP header, which starts immediately after the 14 or 18 262 * byte Ethernet header, is then correctly aligned: 263 * 264 * 0 10 16 18 32/36 265 * | virtio_net_hdr | %4==0 | +2 | Ethernet header (14/18 bytes) | IPv4 ... 266 * 267 * Note that for this to work correctly, the DMA allocation must also be 4 byte 268 * aligned. 269 */ 270 #define VIOIF_HEADER_ALIGN 4 271 #define VIOIF_HEADER_SKIP (P2ROUNDUP( \ 272 sizeof (struct virtio_net_hdr), \ 273 VIOIF_HEADER_ALIGN) + 2) 274 275 /* 276 * Given we are not negotiating VIRTIO_NET_F_MRG_RXBUF, the specification says 277 * we must be able to accept a 1514 byte packet, or if any segmentation offload 278 * features have been negotiated a 65550 byte packet. To keep things simple, 279 * we'll assume segmentation offload is possible in most cases. In addition to 280 * the packet payload, we need to account for the Ethernet header and the 281 * virtio_net_hdr. 282 */ 283 #define VIOIF_RX_DATA_SIZE 65550 284 #define VIOIF_RX_BUF_SIZE (VIOIF_RX_DATA_SIZE + \ 285 sizeof (struct ether_header) + \ 286 VIOIF_HEADER_SKIP) 287 288 /* 289 * If we assume that a large allocation will probably have mostly 4K page sized 290 * cookies, 64 segments allows us 256KB for a single frame. We're in control 291 * of the allocation we use for receive buffers, so this value only has an 292 * impact on the length of chain we're able to create for external transmit 293 * buffer mappings. 294 */ 295 #define VIOIF_MAX_SEGS 64 296 297 /* 298 * We pre-allocate a reasonably large buffer to copy small packets 299 * there. Bigger packets are mapped, packets with multiple 300 * cookies are mapped as indirect buffers. 301 */ 302 #define VIOIF_TX_INLINE_SIZE (2 * 1024) 303 304 /* 305 * Control queue messages are very small. This is a rather arbitrary small 306 * bufer size that should be sufficiently large for any control queue 307 * messages we will send. 308 */ 309 #define VIOIF_CTRL_SIZE 256 310 311 /* 312 * TYPE DEFINITIONS 313 */ 314 315 typedef struct vioif vioif_t; 316 317 /* 318 * Receive buffers are allocated in advance as a combination of DMA memory and 319 * a descriptor chain. Receive buffers can be loaned to the networking stack 320 * to avoid copying, and this object contains the free routine to pass to 321 * desballoc(). 322 * 323 * When receive buffers are not in use, they are linked into the per-instance 324 * free list, "vif_rxbufs" via "rb_link". Under normal conditions, we expect 325 * the free list to be empty much of the time; most buffers will be in the ring 326 * or on loan. 327 */ 328 typedef struct vioif_rxbuf { 329 vioif_t *rb_vioif; 330 frtn_t rb_frtn; 331 332 virtio_dma_t *rb_dma; 333 virtio_chain_t *rb_chain; 334 335 list_node_t rb_link; 336 } vioif_rxbuf_t; 337 338 typedef struct vioif_ctrlbuf { 339 vioif_t *cb_vioif; 340 341 virtio_dma_t *cb_dma; 342 virtio_chain_t *cb_chain; 343 344 list_node_t cb_link; 345 } vioif_ctrlbuf_t; 346 347 /* 348 * Transmit buffers are also allocated in advance. DMA memory is allocated for 349 * the virtio net header, and to hold small packets. Larger packets are mapped 350 * from storage loaned to the driver by the network stack. 351 * 352 * When transmit buffers are not in use, they are linked into the per-instance 353 * free list, "vif_txbufs" via "tb_link". 354 */ 355 typedef struct vioif_txbuf { 356 mblk_t *tb_mp; 357 358 /* 359 * Inline buffer space (VIOIF_TX_INLINE_SIZE) for storage of the virtio 360 * net header, and to hold copied (rather than mapped) packet data. 361 */ 362 virtio_dma_t *tb_dma; 363 virtio_chain_t *tb_chain; 364 365 /* 366 * External buffer mapping. The capacity is fixed at allocation time, 367 * and "tb_ndmaext" tracks the current number of mappings. 368 */ 369 virtio_dma_t **tb_dmaext; 370 uint_t tb_dmaext_capacity; 371 uint_t tb_ndmaext; 372 373 list_node_t tb_link; 374 } vioif_txbuf_t; 375 376 typedef enum vioif_runstate { 377 VIOIF_RUNSTATE_STOPPED = 1, 378 VIOIF_RUNSTATE_STOPPING, 379 VIOIF_RUNSTATE_RUNNING 380 } vioif_runstate_t; 381 382 /* 383 * Per-instance driver object. 384 */ 385 struct vioif { 386 dev_info_t *vif_dip; 387 virtio_t *vif_virtio; 388 389 kmutex_t vif_mutex; 390 391 /* 392 * The NIC is considered RUNNING between the mc_start(9E) and 393 * mc_stop(9E) calls. Otherwise it is STOPPING (while draining 394 * resources) then STOPPED. When not RUNNING, we will drop incoming 395 * frames and refuse to insert more receive buffers into the receive 396 * queue. 397 */ 398 vioif_runstate_t vif_runstate; 399 400 mac_handle_t vif_mac_handle; 401 402 virtio_queue_t *vif_rx_vq; 403 virtio_queue_t *vif_tx_vq; 404 virtio_queue_t *vif_ctrl_vq; 405 406 /* TX virtqueue management resources */ 407 boolean_t vif_tx_corked; 408 boolean_t vif_tx_drain; 409 timeout_id_t vif_tx_reclaim_tid; 410 411 /* 412 * Configured offload features: 413 */ 414 unsigned int vif_tx_csum:1; 415 unsigned int vif_tx_tso4:1; 416 unsigned int vif_tx_tso6:1; 417 418 /* 419 * For debugging, it is useful to know whether the MAC address we 420 * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or 421 * was otherwise generated or set from within the guest. 422 */ 423 unsigned int vif_mac_from_host:1; 424 425 unsigned int vif_has_ctrlq:1; 426 unsigned int vif_has_ctrlq_rx:1; 427 428 uint_t vif_mtu; 429 uint_t vif_mtu_max; 430 uint8_t vif_mac[ETHERADDRL]; 431 432 /* 433 * Receive buffer free list and accounting: 434 */ 435 list_t vif_rxbufs; 436 uint_t vif_nrxbufs_alloc; 437 uint_t vif_nrxbufs_onloan; 438 uint_t vif_nrxbufs_onloan_max; 439 uint_t vif_rxbufs_capacity; 440 vioif_rxbuf_t *vif_rxbufs_mem; 441 442 /* 443 * Transmit buffer free list and accounting: 444 */ 445 list_t vif_txbufs; 446 uint_t vif_ntxbufs_alloc; 447 uint_t vif_txbufs_capacity; 448 vioif_txbuf_t *vif_txbufs_mem; 449 450 /* 451 * These copy size thresholds are exposed as private MAC properties so 452 * that they can be tuned without rebooting. 453 */ 454 uint_t vif_rxcopy_thresh; 455 uint_t vif_txcopy_thresh; 456 457 list_t vif_ctrlbufs; 458 uint_t vif_nctrlbufs_alloc; 459 uint_t vif_ctrlbufs_capacity; 460 vioif_ctrlbuf_t *vif_ctrlbufs_mem; 461 462 /* 463 * Statistics visible through mac: 464 */ 465 uint64_t vif_ipackets; 466 uint64_t vif_opackets; 467 uint64_t vif_rbytes; 468 uint64_t vif_obytes; 469 uint64_t vif_brdcstxmt; 470 uint64_t vif_brdcstrcv; 471 uint64_t vif_multixmt; 472 uint64_t vif_multircv; 473 uint64_t vif_norecvbuf; 474 uint64_t vif_notxbuf; 475 uint64_t vif_ierrors; 476 uint64_t vif_oerrors; 477 478 /* 479 * Internal debugging statistics: 480 */ 481 uint64_t vif_rxfail_dma_handle; 482 uint64_t vif_rxfail_dma_buffer; 483 uint64_t vif_rxfail_dma_bind; 484 uint64_t vif_rxfail_chain_undersize; 485 uint64_t vif_rxfail_no_descriptors; 486 uint64_t vif_txfail_dma_handle; 487 uint64_t vif_txfail_dma_bind; 488 uint64_t vif_txfail_indirect_limit; 489 490 uint64_t vif_stat_tx_reclaim; 491 492 uint64_t vif_noctrlbuf; 493 uint64_t vif_ctrlbuf_toosmall; 494 }; 495 496 #ifdef __cplusplus 497 } 498 #endif 499 500 #endif /* _VIOIF_H */ 501