1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2014-2015 Vincenzo Maffione 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* $FreeBSD$ */ 30 31 #if defined(__FreeBSD__) 32 #include <sys/cdefs.h> /* prerequisite */ 33 34 #include <sys/types.h> 35 #include <sys/errno.h> 36 #include <sys/param.h> /* defines used in kernel.h */ 37 #include <sys/kernel.h> /* types used in module initialization */ 38 #include <sys/sockio.h> 39 #include <sys/malloc.h> 40 #include <sys/socketvar.h> /* struct socket */ 41 #include <sys/socket.h> /* sockaddrs */ 42 #include <net/if.h> 43 #include <net/if_var.h> 44 #include <machine/bus.h> /* bus_dmamap_* */ 45 #include <sys/endian.h> 46 47 #elif defined(linux) 48 49 #include "bsd_glue.h" 50 51 #elif defined(__APPLE__) 52 53 #warning OSX support is only partial 54 #include "osx_glue.h" 55 56 #else 57 58 #error Unsupported platform 59 60 #endif /* unsupported */ 61 62 #include <net/netmap.h> 63 #include <dev/netmap/netmap_kern.h> 64 65 66 67 /* This routine is called by bdg_mismatch_datapath() when it finishes 68 * accumulating bytes for a segment, in order to fix some fields in the 69 * segment headers (which still contain the same content as the header 70 * of the original GSO packet). 'pkt' points to the beginning of the IP 71 * header of the segment, while 'len' is the length of the IP packet. 72 */ 73 static void 74 gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp, 75 u_int idx, u_int segmented_bytes, u_int last_segment) 76 { 77 struct nm_iphdr *iph = (struct nm_iphdr *)(pkt); 78 struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt); 79 uint16_t *check = NULL; 80 uint8_t *check_data = NULL; 81 82 if (ipv4) { 83 /* Set the IPv4 "Total Length" field. */ 84 iph->tot_len = htobe16(len); 85 nm_prdis("ip total length %u", be16toh(ip->tot_len)); 86 87 /* Set the IPv4 "Identification" field. */ 88 iph->id = htobe16(be16toh(iph->id) + idx); 89 nm_prdis("ip identification %u", be16toh(iph->id)); 90 91 /* Compute and insert the IPv4 header checksum. */ 92 iph->check = 0; 93 iph->check = nm_os_csum_ipv4(iph); 94 nm_prdis("IP csum %x", be16toh(iph->check)); 95 } else { 96 /* Set the IPv6 "Payload Len" field. */ 97 ip6h->payload_len = htobe16(len-iphlen); 98 } 99 100 if (tcp) { 101 struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen); 102 103 /* Set the TCP sequence number. */ 104 tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes); 105 nm_prdis("tcp seq %u", be32toh(tcph->seq)); 106 107 /* Zero the PSH and FIN TCP flags if this is not the last 108 segment. */ 109 if (!last_segment) 110 tcph->flags &= ~(0x8 | 0x1); 111 nm_prdis("last_segment %u", last_segment); 112 113 check = &tcph->check; 114 check_data = (uint8_t *)tcph; 115 } else { /* UDP */ 116 struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen); 117 118 /* Set the UDP 'Length' field. */ 119 udph->len = htobe16(len-iphlen); 120 121 check = &udph->check; 122 check_data = (uint8_t *)udph; 123 } 124 125 /* Compute and insert TCP/UDP checksum. */ 126 *check = 0; 127 if (ipv4) 128 nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check); 129 else 130 nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check); 131 132 nm_prdis("TCP/UDP csum %x", be16toh(*check)); 133 } 134 135 static inline int 136 vnet_hdr_is_bad(struct nm_vnet_hdr *vh) 137 { 138 uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN; 139 140 return ( 141 (gso_type != VIRTIO_NET_HDR_GSO_NONE && 142 gso_type != VIRTIO_NET_HDR_GSO_TCPV4 && 143 gso_type != VIRTIO_NET_HDR_GSO_UDP && 144 gso_type != VIRTIO_NET_HDR_GSO_TCPV6) 145 || 146 (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM 147 | VIRTIO_NET_HDR_F_DATA_VALID)) 148 ); 149 } 150 151 /* The VALE mismatch datapath implementation. */ 152 void 153 bdg_mismatch_datapath(struct netmap_vp_adapter *na, 154 struct netmap_vp_adapter *dst_na, 155 const struct nm_bdg_fwd *ft_p, 156 struct netmap_ring *dst_ring, 157 u_int *j, u_int lim, u_int *howmany) 158 { 159 struct netmap_slot *dst_slot = NULL; 160 struct nm_vnet_hdr *vh = NULL; 161 const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags; 162 163 /* Source and destination pointers. */ 164 uint8_t *dst, *src; 165 size_t src_len, dst_len; 166 167 /* Indices and counters for the destination ring. */ 168 u_int j_start = *j; 169 u_int j_cur = j_start; 170 u_int dst_slots = 0; 171 172 if (unlikely(ft_p == ft_end)) { 173 nm_prlim(1, "No source slots to process"); 174 return; 175 } 176 177 /* Init source and dest pointers. */ 178 src = ft_p->ft_buf; 179 src_len = ft_p->ft_len; 180 dst_slot = &dst_ring->slot[j_cur]; 181 dst = NMB(&dst_na->up, dst_slot); 182 dst_len = src_len; 183 184 /* If the source port uses the offloadings, while destination doesn't, 185 * we grab the source virtio-net header and do the offloadings here. 186 */ 187 if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) { 188 vh = (struct nm_vnet_hdr *)src; 189 /* Initial sanity check on the source virtio-net header. If 190 * something seems wrong, just drop the packet. */ 191 if (src_len < na->up.virt_hdr_len) { 192 nm_prlim(1, "Short src vnet header, dropping"); 193 return; 194 } 195 if (unlikely(vnet_hdr_is_bad(vh))) { 196 nm_prlim(1, "Bad src vnet header, dropping"); 197 return; 198 } 199 } 200 201 /* We are processing the first input slot and there is a mismatch 202 * between source and destination virt_hdr_len (SHL and DHL). 203 * When the a client is using virtio-net headers, the header length 204 * can be: 205 * - 10: the header corresponds to the struct nm_vnet_hdr 206 * - 12: the first 10 bytes correspond to the struct 207 * virtio_net_hdr, and the last 2 bytes store the 208 * "mergeable buffers" info, which is an optional 209 * hint that can be zeroed for compatibility 210 * 211 * The destination header is therefore built according to the 212 * following table: 213 * 214 * SHL | DHL | destination header 215 * ----------------------------- 216 * 0 | 10 | zero 217 * 0 | 12 | zero 218 * 10 | 0 | doesn't exist 219 * 10 | 12 | first 10 bytes are copied from source header, last 2 are zero 220 * 12 | 0 | doesn't exist 221 * 12 | 10 | copied from the first 10 bytes of source header 222 */ 223 bzero(dst, dst_na->up.virt_hdr_len); 224 if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len) 225 memcpy(dst, src, sizeof(struct nm_vnet_hdr)); 226 /* Skip the virtio-net headers. */ 227 src += na->up.virt_hdr_len; 228 src_len -= na->up.virt_hdr_len; 229 dst += dst_na->up.virt_hdr_len; 230 dst_len = dst_na->up.virt_hdr_len + src_len; 231 232 /* Here it could be dst_len == 0 (which implies src_len == 0), 233 * so we avoid passing a zero length fragment. 234 */ 235 if (dst_len == 0) { 236 ft_p++; 237 src = ft_p->ft_buf; 238 src_len = ft_p->ft_len; 239 dst_len = src_len; 240 } 241 242 if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 243 u_int gso_bytes = 0; 244 /* Length of the GSO packet header. */ 245 u_int gso_hdr_len = 0; 246 /* Pointer to the GSO packet header. Assume it is in a single fragment. */ 247 uint8_t *gso_hdr = NULL; 248 /* Index of the current segment. */ 249 u_int gso_idx = 0; 250 /* Payload data bytes segmented so far (e.g. TCP data bytes). */ 251 u_int segmented_bytes = 0; 252 /* Is this an IPv4 or IPv6 GSO packet? */ 253 u_int ipv4 = 0; 254 /* Length of the IP header (20 if IPv4, 40 if IPv6). */ 255 u_int iphlen = 0; 256 /* Length of the Ethernet header (18 if 802.1q, otherwise 14). */ 257 u_int ethhlen = 14; 258 /* Is this a TCP or an UDP GSO packet? */ 259 u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) 260 == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1; 261 262 /* Segment the GSO packet contained into the input slots (frags). */ 263 for (;;) { 264 size_t copy; 265 266 if (dst_slots >= *howmany) { 267 /* We still have work to do, but we've run out of 268 * dst slots, so we have to drop the packet. */ 269 nm_prdis(1, "Not enough slots, dropping GSO packet"); 270 return; 271 } 272 273 /* Grab the GSO header if we don't have it. */ 274 if (!gso_hdr) { 275 uint16_t ethertype; 276 277 gso_hdr = src; 278 279 /* Look at the 'Ethertype' field to see if this packet 280 * is IPv4 or IPv6, taking into account VLAN 281 * encapsulation. */ 282 for (;;) { 283 if (src_len < ethhlen) { 284 nm_prlim(1, "Short GSO fragment [eth], dropping"); 285 return; 286 } 287 ethertype = be16toh(*((uint16_t *) 288 (gso_hdr + ethhlen - 2))); 289 if (ethertype != 0x8100) /* not 802.1q */ 290 break; 291 ethhlen += 4; 292 } 293 switch (ethertype) { 294 case 0x0800: /* IPv4 */ 295 { 296 struct nm_iphdr *iph = (struct nm_iphdr *) 297 (gso_hdr + ethhlen); 298 299 if (src_len < ethhlen + 20) { 300 nm_prlim(1, "Short GSO fragment " 301 "[IPv4], dropping"); 302 return; 303 } 304 ipv4 = 1; 305 iphlen = 4 * (iph->version_ihl & 0x0F); 306 break; 307 } 308 case 0x86DD: /* IPv6 */ 309 ipv4 = 0; 310 iphlen = 40; 311 break; 312 default: 313 nm_prlim(1, "Unsupported ethertype, " 314 "dropping GSO packet"); 315 return; 316 } 317 nm_prdis(3, "type=%04x", ethertype); 318 319 if (src_len < ethhlen + iphlen) { 320 nm_prlim(1, "Short GSO fragment [IP], dropping"); 321 return; 322 } 323 324 /* Compute gso_hdr_len. For TCP we need to read the 325 * content of the 'Data Offset' field. 326 */ 327 if (tcp) { 328 struct nm_tcphdr *tcph = (struct nm_tcphdr *) 329 (gso_hdr + ethhlen + iphlen); 330 331 if (src_len < ethhlen + iphlen + 20) { 332 nm_prlim(1, "Short GSO fragment " 333 "[TCP], dropping"); 334 return; 335 } 336 gso_hdr_len = ethhlen + iphlen + 337 4 * (tcph->doff >> 4); 338 } else { 339 gso_hdr_len = ethhlen + iphlen + 8; /* UDP */ 340 } 341 342 if (src_len < gso_hdr_len) { 343 nm_prlim(1, "Short GSO fragment [TCP/UDP], dropping"); 344 return; 345 } 346 347 nm_prdis(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len, 348 dst_na->mfs); 349 350 /* Advance source pointers. */ 351 src += gso_hdr_len; 352 src_len -= gso_hdr_len; 353 if (src_len == 0) { 354 ft_p++; 355 if (ft_p == ft_end) 356 break; 357 src = ft_p->ft_buf; 358 src_len = ft_p->ft_len; 359 } 360 } 361 362 /* Fill in the header of the current segment. */ 363 if (gso_bytes == 0) { 364 memcpy(dst, gso_hdr, gso_hdr_len); 365 gso_bytes = gso_hdr_len; 366 } 367 368 /* Fill in data and update source and dest pointers. */ 369 copy = src_len; 370 if (gso_bytes + copy > dst_na->mfs) 371 copy = dst_na->mfs - gso_bytes; 372 memcpy(dst + gso_bytes, src, copy); 373 gso_bytes += copy; 374 src += copy; 375 src_len -= copy; 376 377 /* A segment is complete or we have processed all the 378 the GSO payload bytes. */ 379 if (gso_bytes >= dst_na->mfs || 380 (src_len == 0 && ft_p + 1 == ft_end)) { 381 /* After raw segmentation, we must fix some header 382 * fields and compute checksums, in a protocol dependent 383 * way. */ 384 gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen, 385 ipv4, iphlen, tcp, 386 gso_idx, segmented_bytes, 387 src_len == 0 && ft_p + 1 == ft_end); 388 389 nm_prdis("frame %u completed with %d bytes", gso_idx, (int)gso_bytes); 390 dst_slot->len = gso_bytes; 391 dst_slot->flags = 0; 392 dst_slots++; 393 segmented_bytes += gso_bytes - gso_hdr_len; 394 395 gso_bytes = 0; 396 gso_idx++; 397 398 /* Next destination slot. */ 399 j_cur = nm_next(j_cur, lim); 400 dst_slot = &dst_ring->slot[j_cur]; 401 dst = NMB(&dst_na->up, dst_slot); 402 } 403 404 /* Next input slot. */ 405 if (src_len == 0) { 406 ft_p++; 407 if (ft_p == ft_end) 408 break; 409 src = ft_p->ft_buf; 410 src_len = ft_p->ft_len; 411 } 412 } 413 nm_prdis(3, "%d bytes segmented", segmented_bytes); 414 415 } else { 416 /* Address of a checksum field into a destination slot. */ 417 uint16_t *check = NULL; 418 /* Accumulator for an unfolded checksum. */ 419 rawsum_t csum = 0; 420 421 /* Process a non-GSO packet. */ 422 423 /* Init 'check' if necessary. */ 424 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 425 if (unlikely(vh->csum_offset + vh->csum_start > src_len)) 426 nm_prerr("invalid checksum request"); 427 else 428 check = (uint16_t *)(dst + vh->csum_start + 429 vh->csum_offset); 430 } 431 432 while (ft_p != ft_end) { 433 /* Init/update the packet checksum if needed. */ 434 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 435 if (!dst_slots) 436 csum = nm_os_csum_raw(src + vh->csum_start, 437 src_len - vh->csum_start, 0); 438 else 439 csum = nm_os_csum_raw(src, src_len, csum); 440 } 441 442 /* Round to a multiple of 64 */ 443 src_len = (src_len + 63) & ~63; 444 445 if (ft_p->ft_flags & NS_INDIRECT) { 446 if (copyin(src, dst, src_len)) { 447 /* Invalid user pointer, pretend len is 0. */ 448 dst_len = 0; 449 } 450 } else { 451 memcpy(dst, src, (int)src_len); 452 } 453 dst_slot->len = dst_len; 454 dst_slots++; 455 456 /* Next destination slot. */ 457 j_cur = nm_next(j_cur, lim); 458 dst_slot = &dst_ring->slot[j_cur]; 459 dst = NMB(&dst_na->up, dst_slot); 460 461 /* Next source slot. */ 462 ft_p++; 463 src = ft_p->ft_buf; 464 dst_len = src_len = ft_p->ft_len; 465 } 466 467 /* Finalize (fold) the checksum if needed. */ 468 if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 469 *check = nm_os_csum_fold(csum); 470 } 471 nm_prdis(3, "using %u dst_slots", dst_slots); 472 473 /* A second pass on the destination slots to set the slot flags, 474 * using the right number of destination slots. 475 */ 476 while (j_start != j_cur) { 477 dst_slot = &dst_ring->slot[j_start]; 478 dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG; 479 j_start = nm_next(j_start, lim); 480 } 481 /* Clear NS_MOREFRAG flag on last entry. */ 482 dst_slot->flags = (dst_slots << 8); 483 } 484 485 /* Update howmany and j. This is to commit the use of 486 * those slots in the destination ring. */ 487 if (unlikely(dst_slots > *howmany)) { 488 nm_prerr("bug: slot allocation error"); 489 } 490 *j = j_cur; 491 *howmany -= dst_slots; 492 } 493