1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019 Joyent, Inc. 14 */ 15 16 /* 17 * VIRTIO NETWORK DRIVER 18 */ 19 20 #ifndef _VIOIF_H 21 #define _VIOIF_H 22 23 #include "virtio.h" 24 25 #ifdef __cplusplus 26 extern "C" { 27 #endif 28 29 /* 30 * VIRTIO NETWORK CONFIGURATION REGISTERS 31 * 32 * These are offsets into the device-specific configuration space available 33 * through the virtio_dev_*() family of functions. 34 */ 35 #define VIRTIO_NET_CONFIG_MAC 0x00 /* 48 R/W */ 36 #define VIRTIO_NET_CONFIG_STATUS 0x06 /* 16 R */ 37 #define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS 0x08 /* 16 R */ 38 #define VIRTIO_NET_CONFIG_MTU 0x0A /* 16 R */ 39 40 /* 41 * VIRTIO NETWORK VIRTQUEUES 42 * 43 * Note that the control queue is only present if VIRTIO_NET_F_CTRL_VQ is 44 * negotiated with the device. 45 */ 46 #define VIRTIO_NET_VIRTQ_RX 0 47 #define VIRTIO_NET_VIRTQ_TX 1 48 #define VIRTIO_NET_VIRTQ_CONTROL 2 49 50 /* 51 * VIRTIO NETWORK FEATURE BITS 52 */ 53 54 /* 55 * CSUM, GUEST_CSUM: 56 * Partial checksum support. These features signal that the device will 57 * accept packets with partial checksums (CSUM), and that the driver will 58 * accept packets with partial checksums (GUEST_CSUM). These features 59 * combine the use of the VIRTIO_NET_HDR_F_NEEDS_CSUM flag, and the 60 * "csum_start" and "csum_offset" fields, in the virtio net header. 61 */ 62 #define VIRTIO_NET_F_CSUM (1ULL << 0) 63 #define VIRTIO_NET_F_GUEST_CSUM (1ULL << 1) 64 65 /* 66 * MTU: 67 * The device offers a maximum MTU value at VIRTIO_NET_CONFIG_MTU. If 68 * this is not negotiated, we allow the largest possible MTU that our 69 * buffer allocations support in case jumbo frames are tacitly supported 70 * by the device. The default MTU is always 1500. 71 */ 72 #define VIRTIO_NET_F_MTU (1ULL << 3) 73 74 /* 75 * MAC: 76 * The device has an assigned primary MAC address. If this feature bit is 77 * not set, the driver must provide a locally assigned MAC address. See 78 * IEEE 802, "48-bit universal LAN MAC addresses" for more details on 79 * assignment. 80 */ 81 #define VIRTIO_NET_F_MAC (1ULL << 5) 82 83 /* 84 * GUEST_TSO4, GUEST_TSO6, GUEST_UFO: 85 * Inbound segmentation offload support. These features depend on having 86 * VIRTIO_NET_F_GUEST_CSUM and signal that the driver can accept large 87 * combined TCP (v4 or v6) packets, or reassembled UDP fragments. 88 */ 89 #define VIRTIO_NET_F_GUEST_TSO4 (1ULL << 7) 90 #define VIRTIO_NET_F_GUEST_TSO6 (1ULL << 8) 91 #define VIRTIO_NET_F_GUEST_UFO (1ULL << 10) 92 93 /* 94 * GUEST_ECN: 95 * Depends on either VIRTIO_NET_F_GUEST_TSO4 or VIRTIO_NET_F_GUEST_TSO6. 96 * This feature means the driver will look for the VIRTIO_NET_HDR_GSO_ECN 97 * bit in the "gso_type" of the virtio net header. This bit tells the 98 * driver that the Explicit Congestion Notification (ECN) bit was set in 99 * the original TCP packets. 100 */ 101 #define VIRTIO_NET_F_GUEST_ECN (1ULL << 9) 102 103 /* 104 * HOST_TSO4, HOST_TSO6, HOST_UFO: 105 * Outbound segmentation offload support. These features depend on having 106 * VIRTIO_NET_F_CSUM and signal that the device will accept large combined 107 * TCP (v4 or v6) packets that require segmentation offload, or large 108 * combined UDP packets that require fragmentation offload. 109 */ 110 #define VIRTIO_NET_F_HOST_TSO4 (1ULL << 11) 111 #define VIRTIO_NET_F_HOST_TSO6 (1ULL << 12) 112 #define VIRTIO_NET_F_HOST_UFO (1ULL << 14) 113 114 /* 115 * HOST_ECN: 116 * Depends on either VIRTIO_NET_F_HOST_TSO4 or VIRTIO_NET_F_HOST_TSO6. 117 * This features means the device will accept packets that both require 118 * segmentation offload and have the Explicit Congestion Notification 119 * (ECN) bit set. If this feature is not present, the device must not 120 * send large segments that require ECN to be set. 121 */ 122 #define VIRTIO_NET_F_HOST_ECN (1ULL << 13) 123 124 /* 125 * GSO: 126 * The GSO feature is, in theory, the combination of HOST_TSO4, HOST_TSO6, 127 * and HOST_ECN. This is only useful for legacy devices; newer devices 128 * should be using the more specific bits above. 129 */ 130 #define VIRTIO_NET_F_GSO (1ULL << 6) 131 132 /* 133 * MRG_RXBUF: 134 * This feature allows the receipt of large packets without needing to 135 * allocate large buffers. The "virtio_net_hdr" will include an extra 136 * value: the number of buffers to gang together. 137 */ 138 #define VIRTIO_NET_F_MRG_RXBUF (1ULL << 15) 139 140 /* 141 * STATUS: 142 * The VIRTIO_NET_CONFIG_STATUS configuration register is available, which 143 * allows the driver to read the link state from the device. 144 */ 145 #define VIRTIO_NET_F_STATUS (1ULL << 16) 146 147 /* 148 * CTRL_VQ, CTRL_RX, CTRL_VLAN: 149 * These features signal that the device exposes the control queue 150 * (VIRTIO_NET_VIRTQ_CONTROL), in the case of CTRL_VQ; and that the 151 * control queue supports extra commands (CTRL_RX, CTRL_VLAN). 152 */ 153 #define VIRTIO_NET_F_CTRL_VQ (1ULL << 17) 154 #define VIRTIO_NET_F_CTRL_RX (1ULL << 18) 155 #define VIRTIO_NET_F_CTRL_VLAN (1ULL << 19) 156 #define VIRTIO_NET_F_CTRL_RX_EXTRA (1ULL << 20) 157 158 /* 159 * These features are supported by the driver and we will request them from the 160 * device. Note that we do not currently request GUEST_CSUM, as the driver 161 * does not presently support receiving frames with any offload features from 162 * the device. 163 */ 164 #define VIRTIO_NET_WANTED_FEATURES (VIRTIO_NET_F_CSUM | \ 165 VIRTIO_NET_F_GSO | \ 166 VIRTIO_NET_F_HOST_TSO4 | \ 167 VIRTIO_NET_F_HOST_ECN | \ 168 VIRTIO_NET_F_MAC | \ 169 VIRTIO_NET_F_MTU) 170 171 /* 172 * VIRTIO NETWORK HEADER 173 * 174 * This structure appears at the start of each transmit or receive packet 175 * buffer. 176 */ 177 struct virtio_net_hdr { 178 uint8_t vnh_flags; 179 uint8_t vnh_gso_type; 180 uint16_t vnh_hdr_len; 181 uint16_t vnh_gso_size; 182 uint16_t vnh_csum_start; 183 uint16_t vnh_csum_offset; 184 } __packed; 185 186 /* 187 * VIRTIO NETWORK HEADER: FLAGS (vnh_flags) 188 */ 189 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 0x01 190 191 /* 192 * VIRTIO NETWORK HEADER: OFFLOAD OPTIONS (vnh_gso_type) 193 * 194 * Each of these is an offload type, except for the ECN value which is 195 * logically OR-ed with one of the other types. 196 */ 197 #define VIRTIO_NET_HDR_GSO_NONE 0 198 #define VIRTIO_NET_HDR_GSO_TCPV4 1 199 #define VIRTIO_NET_HDR_GSO_UDP 3 200 #define VIRTIO_NET_HDR_GSO_TCPV6 4 201 #define VIRTIO_NET_HDR_GSO_ECN 0x80 202 203 204 /* 205 * DRIVER PARAMETERS 206 */ 207 208 /* 209 * At attach, we allocate a fixed pool of buffers for receipt and transmission 210 * of frames. The maximum number of buffers of each type that we will allocate 211 * is specified here. If the ring size is smaller than this number, we will 212 * use the ring size instead. 213 */ 214 #define VIRTIO_NET_TX_BUFS 256 215 #define VIRTIO_NET_RX_BUFS 256 216 217 /* 218 * The virtio net header and the first buffer segment share the same DMA 219 * allocation. We round up the virtio header size to a multiple of 4 and add 2 220 * bytes so that the IP header, which starts immediately after the 14 or 18 221 * byte Ethernet header, is then correctly aligned: 222 * 223 * 0 10 16 18 32/36 224 * | virtio_net_hdr | %4==0 | +2 | Ethernet header (14/18 bytes) | IPv4 ... 225 * 226 * Note that for this to work correctly, the DMA allocation must also be 4 byte 227 * aligned. 228 */ 229 #define VIOIF_HEADER_ALIGN 4 230 #define VIOIF_HEADER_SKIP (P2ROUNDUP( \ 231 sizeof (struct virtio_net_hdr), \ 232 VIOIF_HEADER_ALIGN) + 2) 233 234 /* 235 * Given we are not negotiating VIRTIO_NET_F_MRG_RXBUF, the specification says 236 * we must be able to accept a 1514 byte packet, or if any segmentation offload 237 * features have been negotiated a 65550 byte packet. To keep things simple, 238 * we'll assume segmentation offload is possible in most cases. In addition to 239 * the packet payload, we need to account for the Ethernet header and the 240 * virtio_net_hdr. 241 */ 242 #define VIOIF_RX_DATA_SIZE 65550 243 #define VIOIF_RX_BUF_SIZE (VIOIF_RX_DATA_SIZE + \ 244 sizeof (struct ether_header) + \ 245 VIOIF_HEADER_SKIP) 246 247 /* 248 * If we assume that a large allocation will probably have mostly 4K page sized 249 * cookies, 64 segments allows us 256KB for a single frame. We're in control 250 * of the allocation we use for receive buffers, so this value only has an 251 * impact on the length of chain we're able to create for external transmit 252 * buffer mappings. 253 */ 254 #define VIOIF_MAX_SEGS 64 255 256 /* 257 * We pre-allocate a reasonably large buffer to copy small packets 258 * there. Bigger packets are mapped, packets with multiple 259 * cookies are mapped as indirect buffers. 260 */ 261 #define VIOIF_TX_INLINE_SIZE (2 * 1024) 262 263 264 /* 265 * TYPE DEFINITIONS 266 */ 267 268 typedef struct vioif vioif_t; 269 270 /* 271 * Receive buffers are allocated in advance as a combination of DMA memory and 272 * a descriptor chain. Receive buffers can be loaned to the networking stack 273 * to avoid copying, and this object contains the free routine to pass to 274 * desballoc(). 275 * 276 * When receive buffers are not in use, they are linked into the per-instance 277 * free list, "vif_rxbufs" via "rb_link". Under normal conditions, we expect 278 * the free list to be empty much of the time; most buffers will be in the ring 279 * or on loan. 280 */ 281 typedef struct vioif_rxbuf { 282 vioif_t *rb_vioif; 283 frtn_t rb_frtn; 284 285 virtio_dma_t *rb_dma; 286 virtio_chain_t *rb_chain; 287 288 list_node_t rb_link; 289 } vioif_rxbuf_t; 290 291 /* 292 * Transmit buffers are also allocated in advance. DMA memory is allocated for 293 * the virtio net header, and to hold small packets. Larger packets are mapped 294 * from storage loaned to the driver by the network stack. 295 * 296 * When transmit buffers are not in use, they are linked into the per-instance 297 * free list, "vif_txbufs" via "tb_link". 298 */ 299 typedef struct vioif_txbuf { 300 mblk_t *tb_mp; 301 302 /* 303 * Inline buffer space (VIOIF_TX_INLINE_SIZE) for storage of the virtio 304 * net header, and to hold copied (rather than mapped) packet data. 305 */ 306 virtio_dma_t *tb_dma; 307 virtio_chain_t *tb_chain; 308 309 /* 310 * External buffer mapping. The capacity is fixed at allocation time, 311 * and "tb_ndmaext" tracks the current number of mappings. 312 */ 313 virtio_dma_t **tb_dmaext; 314 uint_t tb_dmaext_capacity; 315 uint_t tb_ndmaext; 316 317 list_node_t tb_link; 318 } vioif_txbuf_t; 319 320 typedef enum vioif_runstate { 321 VIOIF_RUNSTATE_STOPPED = 1, 322 VIOIF_RUNSTATE_STOPPING, 323 VIOIF_RUNSTATE_RUNNING 324 } vioif_runstate_t; 325 326 /* 327 * Per-instance driver object. 328 */ 329 struct vioif { 330 dev_info_t *vif_dip; 331 virtio_t *vif_virtio; 332 333 kmutex_t vif_mutex; 334 335 /* 336 * The NIC is considered RUNNING between the mc_start(9E) and 337 * mc_stop(9E) calls. Otherwise it is STOPPING (while draining 338 * resources) then STOPPED. When not RUNNING, we will drop incoming 339 * frames and refuse to insert more receive buffers into the receive 340 * queue. 341 */ 342 vioif_runstate_t vif_runstate; 343 344 mac_handle_t vif_mac_handle; 345 346 virtio_queue_t *vif_rx_vq; 347 virtio_queue_t *vif_tx_vq; 348 349 /* TX virtqueue management resources */ 350 boolean_t vif_tx_corked; 351 boolean_t vif_tx_drain; 352 timeout_id_t vif_tx_reclaim_tid; 353 354 /* 355 * Configured offload features: 356 */ 357 unsigned int vif_tx_csum:1; 358 unsigned int vif_tx_tso4:1; 359 360 /* 361 * For debugging, it is useful to know whether the MAC address we 362 * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or 363 * was otherwise generated or set from within the guest. 364 */ 365 unsigned int vif_mac_from_host:1; 366 367 uint_t vif_mtu; 368 uint_t vif_mtu_max; 369 uint8_t vif_mac[ETHERADDRL]; 370 371 /* 372 * Receive buffer free list and accounting: 373 */ 374 list_t vif_rxbufs; 375 uint_t vif_nrxbufs_alloc; 376 uint_t vif_nrxbufs_onloan; 377 uint_t vif_nrxbufs_onloan_max; 378 uint_t vif_rxbufs_capacity; 379 vioif_rxbuf_t *vif_rxbufs_mem; 380 381 /* 382 * Transmit buffer free list and accounting: 383 */ 384 list_t vif_txbufs; 385 uint_t vif_ntxbufs_alloc; 386 uint_t vif_txbufs_capacity; 387 vioif_txbuf_t *vif_txbufs_mem; 388 389 /* 390 * These copy size thresholds are exposed as private MAC properties so 391 * that they can be tuned without rebooting. 392 */ 393 uint_t vif_rxcopy_thresh; 394 uint_t vif_txcopy_thresh; 395 396 /* 397 * Statistics visible through mac: 398 */ 399 uint64_t vif_ipackets; 400 uint64_t vif_opackets; 401 uint64_t vif_rbytes; 402 uint64_t vif_obytes; 403 uint64_t vif_brdcstxmt; 404 uint64_t vif_brdcstrcv; 405 uint64_t vif_multixmt; 406 uint64_t vif_multircv; 407 uint64_t vif_norecvbuf; 408 uint64_t vif_notxbuf; 409 uint64_t vif_ierrors; 410 uint64_t vif_oerrors; 411 412 /* 413 * Internal debugging statistics: 414 */ 415 uint64_t vif_rxfail_dma_handle; 416 uint64_t vif_rxfail_dma_buffer; 417 uint64_t vif_rxfail_dma_bind; 418 uint64_t vif_rxfail_chain_undersize; 419 uint64_t vif_rxfail_no_descriptors; 420 uint64_t vif_txfail_dma_handle; 421 uint64_t vif_txfail_dma_bind; 422 uint64_t vif_txfail_indirect_limit; 423 424 uint64_t vif_stat_tx_reclaim; 425 }; 426 427 #ifdef __cplusplus 428 } 429 #endif 430 431 #endif /* _VIOIF_H */ 432