1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * This header is BSD licensed so anyone can use the definitions to implement 5 * compatible drivers/servers. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of IBM nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $FreeBSD$ 31 */ 32 33 #ifndef _VIRTIO_NET_H 34 #define _VIRTIO_NET_H 35 36 /* The feature bitmap for virtio net */ 37 #define VIRTIO_NET_F_CSUM 0x00001 /* Host handles pkts w/ partial csum */ 38 #define VIRTIO_NET_F_GUEST_CSUM 0x00002 /* Guest handles pkts w/ partial csum*/ 39 #define VIRTIO_NET_F_MAC 0x00020 /* Host has given MAC address. */ 40 #define VIRTIO_NET_F_GSO 0x00040 /* Host handles pkts w/ any GSO type */ 41 #define VIRTIO_NET_F_GUEST_TSO4 0x00080 /* Guest can handle TSOv4 in. */ 42 #define VIRTIO_NET_F_GUEST_TSO6 0x00100 /* Guest can handle TSOv6 in. */ 43 #define VIRTIO_NET_F_GUEST_ECN 0x00200 /* Guest can handle TSO[6] w/ ECN in.*/ 44 #define VIRTIO_NET_F_GUEST_UFO 0x00400 /* Guest can handle UFO in. */ 45 #define VIRTIO_NET_F_HOST_TSO4 0x00800 /* Host can handle TSOv4 in. */ 46 #define VIRTIO_NET_F_HOST_TSO6 0x01000 /* Host can handle TSOv6 in. */ 47 #define VIRTIO_NET_F_HOST_ECN 0x02000 /* Host can handle TSO[6] w/ ECN in. */ 48 #define VIRTIO_NET_F_HOST_UFO 0x04000 /* Host can handle UFO in. */ 49 #define VIRTIO_NET_F_MRG_RXBUF 0x08000 /* Host can merge receive buffers. */ 50 #define VIRTIO_NET_F_STATUS 0x10000 /* virtio_net_config.status available*/ 51 #define VIRTIO_NET_F_CTRL_VQ 0x20000 /* Control channel available */ 52 #define VIRTIO_NET_F_CTRL_RX 0x40000 /* Control channel RX mode support */ 53 #define VIRTIO_NET_F_CTRL_VLAN 0x80000 /* Control channel VLAN filtering */ 54 #define VIRTIO_NET_F_CTRL_RX_EXTRA 0x100000 /* Extra RX mode control support */ 55 #define VIRTIO_NET_F_GUEST_ANNOUNCE 0x200000 /* Announce device on network */ 56 #define VIRTIO_NET_F_MQ 0x400000 /* Device supports RFS */ 57 #define VIRTIO_NET_F_CTRL_MAC_ADDR 0x800000 /* Set MAC address */ 58 59 #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ 60 61 struct virtio_net_config { 62 /* The config defining mac address (if VIRTIO_NET_F_MAC) */ 63 uint8_t mac[ETHER_ADDR_LEN]; 64 /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ 65 uint16_t status; 66 /* Maximum number of each of transmit and receive queues; 67 * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ. 68 * Legal values are between 1 and 0x8000. 69 */ 70 uint16_t max_virtqueue_pairs; 71 } __packed; 72 73 /* 74 * This is the first element of the scatter-gather list. If you don't 75 * specify GSO or CSUM features, you can simply ignore the header. 76 */ 77 struct virtio_net_hdr { 78 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start,csum_offset*/ 79 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ 80 uint8_t flags; 81 #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ 82 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ 83 #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ 84 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ 85 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ 86 uint8_t gso_type; 87 uint16_t hdr_len; /* Ethernet + IP + tcp/udp hdrs */ 88 uint16_t gso_size; /* Bytes to append to hdr_len per frame */ 89 uint16_t csum_start; /* Position to start checksumming from */ 90 uint16_t csum_offset; /* Offset after that to place checksum */ 91 }; 92 93 /* 94 * This is the version of the header to use when the MRG_RXBUF 95 * feature has been negotiated. 96 */ 97 struct virtio_net_hdr_mrg_rxbuf { 98 struct virtio_net_hdr hdr; 99 uint16_t num_buffers; /* Number of merged rx buffers */ 100 }; 101 102 /* 103 * Control virtqueue data structures 104 * 105 * The control virtqueue expects a header in the first sg entry 106 * and an ack/status response in the last entry. Data for the 107 * command goes in between. 108 */ 109 struct virtio_net_ctrl_hdr { 110 uint8_t class; 111 uint8_t cmd; 112 } __packed; 113 114 #define VIRTIO_NET_OK 0 115 #define VIRTIO_NET_ERR 1 116 117 /* 118 * Control the RX mode, ie. promiscuous, allmulti, etc... 119 * All commands require an "out" sg entry containing a 1 byte 120 * state value, zero = disable, non-zero = enable. Commands 121 * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature. 122 * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA. 123 */ 124 #define VIRTIO_NET_CTRL_RX 0 125 #define VIRTIO_NET_CTRL_RX_PROMISC 0 126 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 127 #define VIRTIO_NET_CTRL_RX_ALLUNI 2 128 #define VIRTIO_NET_CTRL_RX_NOMULTI 3 129 #define VIRTIO_NET_CTRL_RX_NOUNI 4 130 #define VIRTIO_NET_CTRL_RX_NOBCAST 5 131 132 /* 133 * Control the MAC filter table. 134 * 135 * The MAC filter table is managed by the hypervisor, the guest should 136 * assume the size is infinite. Filtering should be considered 137 * non-perfect, ie. based on hypervisor resources, the guest may 138 * received packets from sources not specified in the filter list. 139 * 140 * In addition to the class/cmd header, the TABLE_SET command requires 141 * two out scatterlists. Each contains a 4 byte count of entries followed 142 * by a concatenated byte stream of the ETH_ALEN MAC addresses. The 143 * first sg list contains unicast addresses, the second is for multicast. 144 * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature 145 * is available. 146 * 147 * The ADDR_SET command requests one out scatterlist, it contains a 148 * 6 bytes MAC address. This functionality is present if the 149 * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. 150 */ 151 struct virtio_net_ctrl_mac { 152 uint32_t entries; 153 uint8_t macs[][ETHER_ADDR_LEN]; 154 } __packed; 155 156 #define VIRTIO_NET_CTRL_MAC 1 157 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 158 #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 159 160 /* 161 * Control VLAN filtering 162 * 163 * The VLAN filter table is controlled via a simple ADD/DEL interface. 164 * VLAN IDs not added may be filtered by the hypervisor. Del is the 165 * opposite of add. Both commands expect an out entry containing a 2 166 * byte VLAN ID. VLAN filtering is available with the 167 * VIRTIO_NET_F_CTRL_VLAN feature bit. 168 */ 169 #define VIRTIO_NET_CTRL_VLAN 2 170 #define VIRTIO_NET_CTRL_VLAN_ADD 0 171 #define VIRTIO_NET_CTRL_VLAN_DEL 1 172 173 /* 174 * Control link announce acknowledgement 175 * 176 * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that 177 * driver has recevied the notification; device would clear the 178 * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives 179 * this command. 180 */ 181 #define VIRTIO_NET_CTRL_ANNOUNCE 3 182 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 183 184 /* 185 * Control Receive Flow Steering 186 * 187 * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET enables Receive Flow 188 * Steering, specifying the number of the transmit and receive queues 189 * that will be used. After the command is consumed and acked by the 190 * device, the device will not steer new packets on receive virtqueues 191 * other than specified nor read from transmit virtqueues other than 192 * specified. Accordingly, driver should not transmit new packets on 193 * virtqueues other than specified. 194 */ 195 struct virtio_net_ctrl_mq { 196 uint16_t virtqueue_pairs; 197 } __packed; 198 199 #define VIRTIO_NET_CTRL_MQ 4 200 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 201 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 202 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 203 204 /* 205 * Use the checksum offset in the VirtIO header to set the 206 * correct CSUM_* flags. 207 */ 208 static inline int 209 virtio_net_rx_csum_by_offset(struct mbuf *m, uint16_t eth_type, int ip_start, 210 struct virtio_net_hdr *hdr) 211 { 212 #if defined(INET) || defined(INET6) 213 int offset = hdr->csum_start + hdr->csum_offset; 214 #endif 215 216 /* Only do a basic sanity check on the offset. */ 217 switch (eth_type) { 218 #if defined(INET) 219 case ETHERTYPE_IP: 220 if (__predict_false(offset < ip_start + sizeof(struct ip))) 221 return (1); 222 break; 223 #endif 224 #if defined(INET6) 225 case ETHERTYPE_IPV6: 226 if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) 227 return (1); 228 break; 229 #endif 230 default: 231 /* Here we should increment the rx_csum_bad_ethtype counter. */ 232 return (1); 233 } 234 235 /* 236 * Use the offset to determine the appropriate CSUM_* flags. This is 237 * a bit dirty, but we can get by with it since the checksum offsets 238 * happen to be different. We assume the host host does not do IPv4 239 * header checksum offloading. 240 */ 241 switch (hdr->csum_offset) { 242 case offsetof(struct udphdr, uh_sum): 243 case offsetof(struct tcphdr, th_sum): 244 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 245 m->m_pkthdr.csum_data = 0xFFFF; 246 break; 247 default: 248 /* Here we should increment the rx_csum_bad_offset counter. */ 249 return (1); 250 } 251 252 return (0); 253 } 254 255 static inline int 256 virtio_net_rx_csum_by_parse(struct mbuf *m, uint16_t eth_type, int ip_start, 257 struct virtio_net_hdr *hdr) 258 { 259 int offset, proto; 260 261 switch (eth_type) { 262 #if defined(INET) 263 case ETHERTYPE_IP: { 264 struct ip *ip; 265 if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) 266 return (1); 267 ip = (struct ip *)(m->m_data + ip_start); 268 proto = ip->ip_p; 269 offset = ip_start + (ip->ip_hl << 2); 270 break; 271 } 272 #endif 273 #if defined(INET6) 274 case ETHERTYPE_IPV6: 275 if (__predict_false(m->m_len < ip_start + 276 sizeof(struct ip6_hdr))) 277 return (1); 278 offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); 279 if (__predict_false(offset < 0)) 280 return (1); 281 break; 282 #endif 283 default: 284 /* Here we should increment the rx_csum_bad_ethtype counter. */ 285 return (1); 286 } 287 288 switch (proto) { 289 case IPPROTO_TCP: 290 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) 291 return (1); 292 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 293 m->m_pkthdr.csum_data = 0xFFFF; 294 break; 295 case IPPROTO_UDP: 296 if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) 297 return (1); 298 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 299 m->m_pkthdr.csum_data = 0xFFFF; 300 break; 301 default: 302 /* 303 * For the remaining protocols, FreeBSD does not support 304 * checksum offloading, so the checksum will be recomputed. 305 */ 306 #if 0 307 if_printf(ifp, "cksum offload of unsupported " 308 "protocol eth_type=%#x proto=%d csum_start=%d " 309 "csum_offset=%d\n", __func__, eth_type, proto, 310 hdr->csum_start, hdr->csum_offset); 311 #endif 312 break; 313 } 314 315 return (0); 316 } 317 318 /* 319 * Set the appropriate CSUM_* flags. Unfortunately, the information 320 * provided is not directly useful to us. The VirtIO header gives the 321 * offset of the checksum, which is all Linux needs, but this is not 322 * how FreeBSD does things. We are forced to peek inside the packet 323 * a bit. 324 * 325 * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD 326 * could accept the offsets and let the stack figure it out. 327 */ 328 static inline int 329 virtio_net_rx_csum(struct mbuf *m, struct virtio_net_hdr *hdr) 330 { 331 struct ether_header *eh; 332 struct ether_vlan_header *evh; 333 uint16_t eth_type; 334 int offset, error; 335 336 if ((hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | 337 VIRTIO_NET_HDR_F_DATA_VALID)) == 0) { 338 return (0); 339 } 340 341 eh = mtod(m, struct ether_header *); 342 eth_type = ntohs(eh->ether_type); 343 if (eth_type == ETHERTYPE_VLAN) { 344 /* BMV: We should handle nested VLAN tags too. */ 345 evh = mtod(m, struct ether_vlan_header *); 346 eth_type = ntohs(evh->evl_proto); 347 offset = sizeof(struct ether_vlan_header); 348 } else 349 offset = sizeof(struct ether_header); 350 351 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 352 error = virtio_net_rx_csum_by_offset(m, eth_type, offset, hdr); 353 else 354 error = virtio_net_rx_csum_by_parse(m, eth_type, offset, hdr); 355 356 return (error); 357 } 358 359 static inline int 360 virtio_net_tx_offload_ctx(struct mbuf *m, int *etype, int *proto, int *start) 361 { 362 struct ether_vlan_header *evh; 363 int offset; 364 365 evh = mtod(m, struct ether_vlan_header *); 366 if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 367 /* BMV: We should handle nested VLAN tags too. */ 368 *etype = ntohs(evh->evl_proto); 369 offset = sizeof(struct ether_vlan_header); 370 } else { 371 *etype = ntohs(evh->evl_encap_proto); 372 offset = sizeof(struct ether_header); 373 } 374 375 switch (*etype) { 376 #if defined(INET) 377 case ETHERTYPE_IP: { 378 struct ip *ip, iphdr; 379 if (__predict_false(m->m_len < offset + sizeof(struct ip))) { 380 m_copydata(m, offset, sizeof(struct ip), 381 (caddr_t) &iphdr); 382 ip = &iphdr; 383 } else 384 ip = (struct ip *)(m->m_data + offset); 385 *proto = ip->ip_p; 386 *start = offset + (ip->ip_hl << 2); 387 break; 388 } 389 #endif 390 #if defined(INET6) 391 case ETHERTYPE_IPV6: 392 *proto = -1; 393 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); 394 /* Assert the network stack sent us a valid packet. */ 395 KASSERT(*start > offset, 396 ("%s: mbuf %p start %d offset %d proto %d", __func__, m, 397 *start, offset, *proto)); 398 break; 399 #endif 400 default: 401 /* Here we should increment the tx_csum_bad_ethtype counter. */ 402 return (EINVAL); 403 } 404 405 return (0); 406 } 407 408 static inline int 409 virtio_net_tx_offload_tso(if_t ifp, struct mbuf *m, int eth_type, 410 int offset, bool allow_ecn, struct virtio_net_hdr *hdr) 411 { 412 static struct timeval lastecn; 413 static int curecn; 414 struct tcphdr *tcp, tcphdr; 415 416 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { 417 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); 418 tcp = &tcphdr; 419 } else 420 tcp = (struct tcphdr *)(m->m_data + offset); 421 422 hdr->hdr_len = offset + (tcp->th_off << 2); 423 hdr->gso_size = m->m_pkthdr.tso_segsz; 424 hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : 425 VIRTIO_NET_HDR_GSO_TCPV6; 426 427 if (tcp->th_flags & TH_CWR) { 428 /* 429 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, 430 * ECN support is not on a per-interface basis, but globally via 431 * the net.inet.tcp.ecn.enable sysctl knob. The default is off. 432 */ 433 if (!allow_ecn) { 434 if (ppsratecheck(&lastecn, &curecn, 1)) 435 if_printf(ifp, 436 "TSO with ECN not negotiated with host\n"); 437 return (ENOTSUP); 438 } 439 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 440 } 441 442 /* Here we should increment tx_tso counter. */ 443 444 return (0); 445 } 446 447 static inline struct mbuf * 448 virtio_net_tx_offload(if_t ifp, struct mbuf *m, bool allow_ecn, 449 struct virtio_net_hdr *hdr) 450 { 451 int flags, etype, csum_start, proto, error; 452 453 flags = m->m_pkthdr.csum_flags; 454 455 error = virtio_net_tx_offload_ctx(m, &etype, &proto, &csum_start); 456 if (error) 457 goto drop; 458 459 if ((etype == ETHERTYPE_IP && (flags & (CSUM_TCP | CSUM_UDP))) || 460 (etype == ETHERTYPE_IPV6 && 461 (flags & (CSUM_TCP_IPV6 | CSUM_UDP_IPV6)))) { 462 /* 463 * We could compare the IP protocol vs the CSUM_ flag too, 464 * but that really should not be necessary. 465 */ 466 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; 467 hdr->csum_start = csum_start; 468 hdr->csum_offset = m->m_pkthdr.csum_data; 469 /* Here we should increment the tx_csum counter. */ 470 } 471 472 if (flags & CSUM_TSO) { 473 if (__predict_false(proto != IPPROTO_TCP)) { 474 /* Likely failed to correctly parse the mbuf. 475 * Here we should increment the tx_tso_not_tcp 476 * counter. */ 477 goto drop; 478 } 479 480 KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, 481 ("%s: mbuf %p TSO without checksum offload %#x", 482 __func__, m, flags)); 483 484 error = virtio_net_tx_offload_tso(ifp, m, etype, csum_start, 485 allow_ecn, hdr); 486 if (error) 487 goto drop; 488 } 489 490 return (m); 491 492 drop: 493 m_freem(m); 494 return (NULL); 495 } 496 497 #endif /* _VIRTIO_NET_H */ 498