1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2014-2015 Vincenzo Maffione 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 30 #if defined(__FreeBSD__) 31 #include <sys/cdefs.h> /* prerequisite */ 32 33 #include <sys/types.h> 34 #include <sys/errno.h> 35 #include <sys/param.h> /* defines used in kernel.h */ 36 #include <sys/kernel.h> /* types used in module initialization */ 37 #include <sys/sockio.h> 38 #include <sys/malloc.h> 39 #include <sys/socketvar.h> /* struct socket */ 40 #include <sys/socket.h> /* sockaddrs */ 41 #include <net/if.h> 42 #include <net/if_var.h> 43 #include <machine/bus.h> /* bus_dmamap_* */ 44 #include <sys/endian.h> 45 46 #elif defined(linux) 47 48 #include "bsd_glue.h" 49 50 #elif defined(__APPLE__) 51 52 #warning OSX support is only partial 53 #include "osx_glue.h" 54 55 #else 56 57 #error Unsupported platform 58 59 #endif /* unsupported */ 60 61 #include <net/netmap.h> 62 #include <dev/netmap/netmap_kern.h> 63 64 65 66 /* This routine is called by bdg_mismatch_datapath() when it finishes 67 * accumulating bytes for a segment, in order to fix some fields in the 68 * segment headers (which still contain the same content as the header 69 * of the original GSO packet). 'pkt' points to the beginning of the IP 70 * header of the segment, while 'len' is the length of the IP packet. 71 */ 72 static void 73 gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp, 74 u_int idx, u_int segmented_bytes, u_int last_segment) 75 { 76 struct nm_iphdr *iph = (struct nm_iphdr *)(pkt); 77 struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt); 78 uint16_t *check = NULL; 79 uint8_t *check_data = NULL; 80 81 if (ipv4) { 82 /* Set the IPv4 "Total Length" field. */ 83 iph->tot_len = htobe16(len); 84 nm_prdis("ip total length %u", be16toh(ip->tot_len)); 85 86 /* Set the IPv4 "Identification" field. */ 87 iph->id = htobe16(be16toh(iph->id) + idx); 88 nm_prdis("ip identification %u", be16toh(iph->id)); 89 90 /* Compute and insert the IPv4 header checksum. */ 91 iph->check = 0; 92 iph->check = nm_os_csum_ipv4(iph); 93 nm_prdis("IP csum %x", be16toh(iph->check)); 94 } else { 95 /* Set the IPv6 "Payload Len" field. */ 96 ip6h->payload_len = htobe16(len-iphlen); 97 } 98 99 if (tcp) { 100 struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen); 101 102 /* Set the TCP sequence number. */ 103 tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes); 104 nm_prdis("tcp seq %u", be32toh(tcph->seq)); 105 106 /* Zero the PSH and FIN TCP flags if this is not the last 107 segment. */ 108 if (!last_segment) 109 tcph->flags &= ~(0x8 | 0x1); 110 nm_prdis("last_segment %u", last_segment); 111 112 check = &tcph->check; 113 check_data = (uint8_t *)tcph; 114 } else { /* UDP */ 115 struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen); 116 117 /* Set the UDP 'Length' field. */ 118 udph->len = htobe16(len-iphlen); 119 120 check = &udph->check; 121 check_data = (uint8_t *)udph; 122 } 123 124 /* Compute and insert TCP/UDP checksum. */ 125 *check = 0; 126 if (ipv4) 127 nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check); 128 else 129 nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check); 130 131 nm_prdis("TCP/UDP csum %x", be16toh(*check)); 132 } 133 134 static inline int 135 vnet_hdr_is_bad(struct nm_vnet_hdr *vh) 136 { 137 uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN; 138 139 return ( 140 (gso_type != VIRTIO_NET_HDR_GSO_NONE && 141 gso_type != VIRTIO_NET_HDR_GSO_TCPV4 && 142 gso_type != VIRTIO_NET_HDR_GSO_UDP && 143 gso_type != VIRTIO_NET_HDR_GSO_TCPV6) 144 || 145 (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM 146 | VIRTIO_NET_HDR_F_DATA_VALID)) 147 ); 148 } 149 150 /* The VALE mismatch datapath implementation. */ 151 void 152 bdg_mismatch_datapath(struct netmap_vp_adapter *na, 153 struct netmap_vp_adapter *dst_na, 154 const struct nm_bdg_fwd *ft_p, 155 struct netmap_ring *dst_ring, 156 u_int *j, u_int lim, u_int *howmany) 157 { 158 struct netmap_slot *dst_slot = NULL; 159 struct nm_vnet_hdr *vh = NULL; 160 const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags; 161 162 /* Source and destination pointers. */ 163 uint8_t *dst, *src; 164 size_t src_len, dst_len; 165 166 /* Indices and counters for the destination ring. */ 167 u_int j_start = *j; 168 u_int j_cur = j_start; 169 u_int dst_slots = 0; 170 171 if (unlikely(ft_p == ft_end)) { 172 nm_prlim(1, "No source slots to process"); 173 return; 174 } 175 176 /* Init source and dest pointers. */ 177 src = ft_p->ft_buf; 178 src_len = ft_p->ft_len; 179 dst_slot = &dst_ring->slot[j_cur]; 180 dst = NMB(&dst_na->up, dst_slot); 181 dst_len = src_len; 182 183 /* If the source port uses the offloadings, while destination doesn't, 184 * we grab the source virtio-net header and do the offloadings here. 185 */ 186 if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) { 187 vh = (struct nm_vnet_hdr *)src; 188 /* Initial sanity check on the source virtio-net header. If 189 * something seems wrong, just drop the packet. */ 190 if (src_len < na->up.virt_hdr_len) { 191 nm_prlim(1, "Short src vnet header, dropping"); 192 return; 193 } 194 if (unlikely(vnet_hdr_is_bad(vh))) { 195 nm_prlim(1, "Bad src vnet header, dropping"); 196 return; 197 } 198 } 199 200 /* We are processing the first input slot and there is a mismatch 201 * between source and destination virt_hdr_len (SHL and DHL). 202 * When the a client is using virtio-net headers, the header length 203 * can be: 204 * - 10: the header corresponds to the struct nm_vnet_hdr 205 * - 12: the first 10 bytes correspond to the struct 206 * virtio_net_hdr, and the last 2 bytes store the 207 * "mergeable buffers" info, which is an optional 208 * hint that can be zeroed for compatibility 209 * 210 * The destination header is therefore built according to the 211 * following table: 212 * 213 * SHL | DHL | destination header 214 * ----------------------------- 215 * 0 | 10 | zero 216 * 0 | 12 | zero 217 * 10 | 0 | doesn't exist 218 * 10 | 12 | first 10 bytes are copied from source header, last 2 are zero 219 * 12 | 0 | doesn't exist 220 * 12 | 10 | copied from the first 10 bytes of source header 221 */ 222 bzero(dst, dst_na->up.virt_hdr_len); 223 if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len) 224 memcpy(dst, src, sizeof(struct nm_vnet_hdr)); 225 /* Skip the virtio-net headers. */ 226 src += na->up.virt_hdr_len; 227 src_len -= na->up.virt_hdr_len; 228 dst += dst_na->up.virt_hdr_len; 229 dst_len = dst_na->up.virt_hdr_len + src_len; 230 231 /* Here it could be dst_len == 0 (which implies src_len == 0), 232 * so we avoid passing a zero length fragment. 233 */ 234 if (dst_len == 0) { 235 ft_p++; 236 src = ft_p->ft_buf; 237 src_len = ft_p->ft_len; 238 dst_len = src_len; 239 } 240 241 if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 242 u_int gso_bytes = 0; 243 /* Length of the GSO packet header. */ 244 u_int gso_hdr_len = 0; 245 /* Pointer to the GSO packet header. Assume it is in a single fragment. */ 246 uint8_t *gso_hdr = NULL; 247 /* Index of the current segment. */ 248 u_int gso_idx = 0; 249 /* Payload data bytes segmented so far (e.g. TCP data bytes). */ 250 u_int segmented_bytes = 0; 251 /* Is this an IPv4 or IPv6 GSO packet? */ 252 u_int ipv4 = 0; 253 /* Length of the IP header (20 if IPv4, 40 if IPv6). */ 254 u_int iphlen = 0; 255 /* Length of the Ethernet header (18 if 802.1q, otherwise 14). */ 256 u_int ethhlen = 14; 257 /* Is this a TCP or an UDP GSO packet? */ 258 u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) 259 == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1; 260 261 /* Segment the GSO packet contained into the input slots (frags). */ 262 for (;;) { 263 size_t copy; 264 265 if (dst_slots >= *howmany) { 266 /* We still have work to do, but we've run out of 267 * dst slots, so we have to drop the packet. */ 268 nm_prdis(1, "Not enough slots, dropping GSO packet"); 269 return; 270 } 271 272 /* Grab the GSO header if we don't have it. */ 273 if (!gso_hdr) { 274 uint16_t ethertype; 275 276 gso_hdr = src; 277 278 /* Look at the 'Ethertype' field to see if this packet 279 * is IPv4 or IPv6, taking into account VLAN 280 * encapsulation. */ 281 for (;;) { 282 if (src_len < ethhlen) { 283 nm_prlim(1, "Short GSO fragment [eth], dropping"); 284 return; 285 } 286 ethertype = be16toh(*((uint16_t *) 287 (gso_hdr + ethhlen - 2))); 288 if (ethertype != 0x8100) /* not 802.1q */ 289 break; 290 ethhlen += 4; 291 } 292 switch (ethertype) { 293 case 0x0800: /* IPv4 */ 294 { 295 struct nm_iphdr *iph = (struct nm_iphdr *) 296 (gso_hdr + ethhlen); 297 298 if (src_len < ethhlen + 20) { 299 nm_prlim(1, "Short GSO fragment " 300 "[IPv4], dropping"); 301 return; 302 } 303 ipv4 = 1; 304 iphlen = 4 * (iph->version_ihl & 0x0F); 305 break; 306 } 307 case 0x86DD: /* IPv6 */ 308 ipv4 = 0; 309 iphlen = 40; 310 break; 311 default: 312 nm_prlim(1, "Unsupported ethertype, " 313 "dropping GSO packet"); 314 return; 315 } 316 nm_prdis(3, "type=%04x", ethertype); 317 318 if (src_len < ethhlen + iphlen) { 319 nm_prlim(1, "Short GSO fragment [IP], dropping"); 320 return; 321 } 322 323 /* Compute gso_hdr_len. For TCP we need to read the 324 * content of the 'Data Offset' field. 325 */ 326 if (tcp) { 327 struct nm_tcphdr *tcph = (struct nm_tcphdr *) 328 (gso_hdr + ethhlen + iphlen); 329 330 if (src_len < ethhlen + iphlen + 20) { 331 nm_prlim(1, "Short GSO fragment " 332 "[TCP], dropping"); 333 return; 334 } 335 gso_hdr_len = ethhlen + iphlen + 336 4 * (tcph->doff >> 4); 337 } else { 338 gso_hdr_len = ethhlen + iphlen + 8; /* UDP */ 339 } 340 341 if (src_len < gso_hdr_len) { 342 nm_prlim(1, "Short GSO fragment [TCP/UDP], dropping"); 343 return; 344 } 345 346 nm_prdis(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len, 347 dst_na->mfs); 348 349 /* Advance source pointers. */ 350 src += gso_hdr_len; 351 src_len -= gso_hdr_len; 352 if (src_len == 0) { 353 ft_p++; 354 if (ft_p == ft_end) 355 break; 356 src = ft_p->ft_buf; 357 src_len = ft_p->ft_len; 358 } 359 } 360 361 /* Fill in the header of the current segment. */ 362 if (gso_bytes == 0) { 363 memcpy(dst, gso_hdr, gso_hdr_len); 364 gso_bytes = gso_hdr_len; 365 } 366 367 /* Fill in data and update source and dest pointers. */ 368 copy = src_len; 369 if (gso_bytes + copy > dst_na->mfs) 370 copy = dst_na->mfs - gso_bytes; 371 memcpy(dst + gso_bytes, src, copy); 372 gso_bytes += copy; 373 src += copy; 374 src_len -= copy; 375 376 /* A segment is complete or we have processed all the 377 the GSO payload bytes. */ 378 if (gso_bytes >= dst_na->mfs || 379 (src_len == 0 && ft_p + 1 == ft_end)) { 380 /* After raw segmentation, we must fix some header 381 * fields and compute checksums, in a protocol dependent 382 * way. */ 383 gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen, 384 ipv4, iphlen, tcp, 385 gso_idx, segmented_bytes, 386 src_len == 0 && ft_p + 1 == ft_end); 387 388 nm_prdis("frame %u completed with %d bytes", gso_idx, (int)gso_bytes); 389 dst_slot->len = gso_bytes; 390 dst_slot->flags = 0; 391 dst_slots++; 392 segmented_bytes += gso_bytes - gso_hdr_len; 393 394 gso_bytes = 0; 395 gso_idx++; 396 397 /* Next destination slot. */ 398 j_cur = nm_next(j_cur, lim); 399 dst_slot = &dst_ring->slot[j_cur]; 400 dst = NMB(&dst_na->up, dst_slot); 401 } 402 403 /* Next input slot. */ 404 if (src_len == 0) { 405 ft_p++; 406 if (ft_p == ft_end) 407 break; 408 src = ft_p->ft_buf; 409 src_len = ft_p->ft_len; 410 } 411 } 412 nm_prdis(3, "%d bytes segmented", segmented_bytes); 413 414 } else { 415 /* Address of a checksum field into a destination slot. */ 416 uint16_t *check = NULL; 417 /* Accumulator for an unfolded checksum. */ 418 rawsum_t csum = 0; 419 420 /* Process a non-GSO packet. */ 421 422 /* Init 'check' if necessary. */ 423 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 424 if (unlikely(vh->csum_offset + vh->csum_start > src_len)) 425 nm_prerr("invalid checksum request"); 426 else 427 check = (uint16_t *)(dst + vh->csum_start + 428 vh->csum_offset); 429 } 430 431 while (ft_p != ft_end) { 432 /* Init/update the packet checksum if needed. */ 433 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 434 if (!dst_slots) 435 csum = nm_os_csum_raw(src + vh->csum_start, 436 src_len - vh->csum_start, 0); 437 else 438 csum = nm_os_csum_raw(src, src_len, csum); 439 } 440 441 /* Round to a multiple of 64 */ 442 src_len = (src_len + 63) & ~63; 443 444 if (ft_p->ft_flags & NS_INDIRECT) { 445 if (copyin(src, dst, src_len)) { 446 /* Invalid user pointer, pretend len is 0. */ 447 dst_len = 0; 448 } 449 } else { 450 memcpy(dst, src, (int)src_len); 451 } 452 dst_slot->len = dst_len; 453 dst_slots++; 454 455 /* Next destination slot. */ 456 j_cur = nm_next(j_cur, lim); 457 dst_slot = &dst_ring->slot[j_cur]; 458 dst = NMB(&dst_na->up, dst_slot); 459 460 /* Next source slot. */ 461 ft_p++; 462 src = ft_p->ft_buf; 463 dst_len = src_len = ft_p->ft_len; 464 } 465 466 /* Finalize (fold) the checksum if needed. */ 467 if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 468 *check = nm_os_csum_fold(csum); 469 } 470 nm_prdis(3, "using %u dst_slots", dst_slots); 471 472 /* A second pass on the destination slots to set the slot flags, 473 * using the right number of destination slots. 474 */ 475 while (j_start != j_cur) { 476 dst_slot = &dst_ring->slot[j_start]; 477 dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG; 478 j_start = nm_next(j_start, lim); 479 } 480 /* Clear NS_MOREFRAG flag on last entry. */ 481 dst_slot->flags = (dst_slots << 8); 482 } 483 484 /* Update howmany and j. This is to commit the use of 485 * those slots in the destination ring. */ 486 if (unlikely(dst_slots > *howmany)) { 487 nm_prerr("bug: slot allocation error"); 488 } 489 *j = j_cur; 490 *howmany -= dst_slots; 491 } 492