1 /* 2 * Copyright (C) 2014-2015 Vincenzo Maffione 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* $FreeBSD$ */ 28 29 #if defined(__FreeBSD__) 30 #include <sys/cdefs.h> /* prerequisite */ 31 32 #include <sys/types.h> 33 #include <sys/errno.h> 34 #include <sys/param.h> /* defines used in kernel.h */ 35 #include <sys/kernel.h> /* types used in module initialization */ 36 #include <sys/sockio.h> 37 #include <sys/malloc.h> 38 #include <sys/socketvar.h> /* struct socket */ 39 #include <sys/socket.h> /* sockaddrs */ 40 #include <net/if.h> 41 #include <net/if_var.h> 42 #include <machine/bus.h> /* bus_dmamap_* */ 43 #include <sys/endian.h> 44 45 #elif defined(linux) 46 47 #include "bsd_glue.h" 48 49 #elif defined(__APPLE__) 50 51 #warning OSX support is only partial 52 #include "osx_glue.h" 53 54 #else 55 56 #error Unsupported platform 57 58 #endif /* unsupported */ 59 60 #include <net/netmap.h> 61 #include <dev/netmap/netmap_kern.h> 62 63 64 65 /* This routine is called by bdg_mismatch_datapath() when it finishes 66 * accumulating bytes for a segment, in order to fix some fields in the 67 * segment headers (which still contain the same content as the header 68 * of the original GSO packet). 'pkt' points to the beginning of the IP 69 * header of the segment, while 'len' is the length of the IP packet. 70 */ 71 static void 72 gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp, 73 u_int idx, u_int segmented_bytes, u_int last_segment) 74 { 75 struct nm_iphdr *iph = (struct nm_iphdr *)(pkt); 76 struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt); 77 uint16_t *check = NULL; 78 uint8_t *check_data = NULL; 79 80 if (ipv4) { 81 /* Set the IPv4 "Total Length" field. */ 82 iph->tot_len = htobe16(len); 83 ND("ip total length %u", be16toh(ip->tot_len)); 84 85 /* Set the IPv4 "Identification" field. */ 86 iph->id = htobe16(be16toh(iph->id) + idx); 87 ND("ip identification %u", be16toh(iph->id)); 88 89 /* Compute and insert the IPv4 header checksum. */ 90 iph->check = 0; 91 iph->check = nm_os_csum_ipv4(iph); 92 ND("IP csum %x", be16toh(iph->check)); 93 } else { 94 /* Set the IPv6 "Payload Len" field. */ 95 ip6h->payload_len = htobe16(len-iphlen); 96 } 97 98 if (tcp) { 99 struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen); 100 101 /* Set the TCP sequence number. */ 102 tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes); 103 ND("tcp seq %u", be32toh(tcph->seq)); 104 105 /* Zero the PSH and FIN TCP flags if this is not the last 106 segment. */ 107 if (!last_segment) 108 tcph->flags &= ~(0x8 | 0x1); 109 ND("last_segment %u", last_segment); 110 111 check = &tcph->check; 112 check_data = (uint8_t *)tcph; 113 } else { /* UDP */ 114 struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen); 115 116 /* Set the UDP 'Length' field. */ 117 udph->len = htobe16(len-iphlen); 118 119 check = &udph->check; 120 check_data = (uint8_t *)udph; 121 } 122 123 /* Compute and insert TCP/UDP checksum. */ 124 *check = 0; 125 if (ipv4) 126 nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check); 127 else 128 nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check); 129 130 ND("TCP/UDP csum %x", be16toh(*check)); 131 } 132 133 static int 134 vnet_hdr_is_bad(struct nm_vnet_hdr *vh) 135 { 136 uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN; 137 138 return ( 139 (gso_type != VIRTIO_NET_HDR_GSO_NONE && 140 gso_type != VIRTIO_NET_HDR_GSO_TCPV4 && 141 gso_type != VIRTIO_NET_HDR_GSO_UDP && 142 gso_type != VIRTIO_NET_HDR_GSO_TCPV6) 143 || 144 (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM 145 | VIRTIO_NET_HDR_F_DATA_VALID)) 146 ); 147 } 148 149 /* The VALE mismatch datapath implementation. */ 150 void 151 bdg_mismatch_datapath(struct netmap_vp_adapter *na, 152 struct netmap_vp_adapter *dst_na, 153 const struct nm_bdg_fwd *ft_p, 154 struct netmap_ring *dst_ring, 155 u_int *j, u_int lim, u_int *howmany) 156 { 157 struct netmap_slot *dst_slot = NULL; 158 struct nm_vnet_hdr *vh = NULL; 159 const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags; 160 161 /* Source and destination pointers. */ 162 uint8_t *dst, *src; 163 size_t src_len, dst_len; 164 165 /* Indices and counters for the destination ring. */ 166 u_int j_start = *j; 167 u_int j_cur = j_start; 168 u_int dst_slots = 0; 169 170 if (unlikely(ft_p == ft_end)) { 171 RD(3, "No source slots to process"); 172 return; 173 } 174 175 /* Init source and dest pointers. */ 176 src = ft_p->ft_buf; 177 src_len = ft_p->ft_len; 178 dst_slot = &dst_ring->slot[j_cur]; 179 dst = NMB(&dst_na->up, dst_slot); 180 dst_len = src_len; 181 182 /* If the source port uses the offloadings, while destination doesn't, 183 * we grab the source virtio-net header and do the offloadings here. 184 */ 185 if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) { 186 vh = (struct nm_vnet_hdr *)src; 187 /* Initial sanity check on the source virtio-net header. If 188 * something seems wrong, just drop the packet. */ 189 if (src_len < na->up.virt_hdr_len) { 190 RD(3, "Short src vnet header, dropping"); 191 return; 192 } 193 if (vnet_hdr_is_bad(vh)) { 194 RD(3, "Bad src vnet header, dropping"); 195 return; 196 } 197 } 198 199 /* We are processing the first input slot and there is a mismatch 200 * between source and destination virt_hdr_len (SHL and DHL). 201 * When the a client is using virtio-net headers, the header length 202 * can be: 203 * - 10: the header corresponds to the struct nm_vnet_hdr 204 * - 12: the first 10 bytes correspond to the struct 205 * virtio_net_hdr, and the last 2 bytes store the 206 * "mergeable buffers" info, which is an optional 207 * hint that can be zeroed for compatibility 208 * 209 * The destination header is therefore built according to the 210 * following table: 211 * 212 * SHL | DHL | destination header 213 * ----------------------------- 214 * 0 | 10 | zero 215 * 0 | 12 | zero 216 * 10 | 0 | doesn't exist 217 * 10 | 12 | first 10 bytes are copied from source header, last 2 are zero 218 * 12 | 0 | doesn't exist 219 * 12 | 10 | copied from the first 10 bytes of source header 220 */ 221 bzero(dst, dst_na->up.virt_hdr_len); 222 if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len) 223 memcpy(dst, src, sizeof(struct nm_vnet_hdr)); 224 /* Skip the virtio-net headers. */ 225 src += na->up.virt_hdr_len; 226 src_len -= na->up.virt_hdr_len; 227 dst += dst_na->up.virt_hdr_len; 228 dst_len = dst_na->up.virt_hdr_len + src_len; 229 230 /* Here it could be dst_len == 0 (which implies src_len == 0), 231 * so we avoid passing a zero length fragment. 232 */ 233 if (dst_len == 0) { 234 ft_p++; 235 src = ft_p->ft_buf; 236 src_len = ft_p->ft_len; 237 dst_len = src_len; 238 } 239 240 if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 241 u_int gso_bytes = 0; 242 /* Length of the GSO packet header. */ 243 u_int gso_hdr_len = 0; 244 /* Pointer to the GSO packet header. Assume it is in a single fragment. */ 245 uint8_t *gso_hdr = NULL; 246 /* Index of the current segment. */ 247 u_int gso_idx = 0; 248 /* Payload data bytes segmented so far (e.g. TCP data bytes). */ 249 u_int segmented_bytes = 0; 250 /* Is this an IPv4 or IPv6 GSO packet? */ 251 u_int ipv4 = 0; 252 /* Length of the IP header (20 if IPv4, 40 if IPv6). */ 253 u_int iphlen = 0; 254 /* Length of the Ethernet header (18 if 802.1q, otherwise 14). */ 255 u_int ethhlen = 14; 256 /* Is this a TCP or an UDP GSO packet? */ 257 u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) 258 == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1; 259 260 /* Segment the GSO packet contained into the input slots (frags). */ 261 for (;;) { 262 size_t copy; 263 264 if (dst_slots >= *howmany) { 265 /* We still have work to do, but we've run out of 266 * dst slots, so we have to drop the packet. */ 267 RD(3, "Not enough slots, dropping GSO packet"); 268 return; 269 } 270 271 /* Grab the GSO header if we don't have it. */ 272 if (!gso_hdr) { 273 uint16_t ethertype; 274 275 gso_hdr = src; 276 277 /* Look at the 'Ethertype' field to see if this packet 278 * is IPv4 or IPv6, taking into account VLAN 279 * encapsulation. */ 280 for (;;) { 281 if (src_len < ethhlen) { 282 RD(3, "Short GSO fragment [eth], dropping"); 283 return; 284 } 285 ethertype = be16toh(*((uint16_t *) 286 (gso_hdr + ethhlen - 2))); 287 if (ethertype != 0x8100) /* not 802.1q */ 288 break; 289 ethhlen += 4; 290 } 291 switch (ethertype) { 292 case 0x0800: /* IPv4 */ 293 { 294 struct nm_iphdr *iph = (struct nm_iphdr *) 295 (gso_hdr + ethhlen); 296 297 if (src_len < ethhlen + 20) { 298 RD(3, "Short GSO fragment " 299 "[IPv4], dropping"); 300 return; 301 } 302 ipv4 = 1; 303 iphlen = 4 * (iph->version_ihl & 0x0F); 304 break; 305 } 306 case 0x86DD: /* IPv6 */ 307 ipv4 = 0; 308 iphlen = 40; 309 break; 310 default: 311 RD(3, "Unsupported ethertype, " 312 "dropping GSO packet"); 313 return; 314 } 315 ND(3, "type=%04x", ethertype); 316 317 if (src_len < ethhlen + iphlen) { 318 RD(3, "Short GSO fragment [IP], dropping"); 319 return; 320 } 321 322 /* Compute gso_hdr_len. For TCP we need to read the 323 * content of the 'Data Offset' field. 324 */ 325 if (tcp) { 326 struct nm_tcphdr *tcph = (struct nm_tcphdr *) 327 (gso_hdr + ethhlen + iphlen); 328 329 if (src_len < ethhlen + iphlen + 20) { 330 RD(3, "Short GSO fragment " 331 "[TCP], dropping"); 332 return; 333 } 334 gso_hdr_len = ethhlen + iphlen + 335 4 * (tcph->doff >> 4); 336 } else { 337 gso_hdr_len = ethhlen + iphlen + 8; /* UDP */ 338 } 339 340 if (src_len < gso_hdr_len) { 341 RD(3, "Short GSO fragment [TCP/UDP], dropping"); 342 return; 343 } 344 345 ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len, 346 dst_na->mfs); 347 348 /* Advance source pointers. */ 349 src += gso_hdr_len; 350 src_len -= gso_hdr_len; 351 if (src_len == 0) { 352 ft_p++; 353 if (ft_p == ft_end) 354 break; 355 src = ft_p->ft_buf; 356 src_len = ft_p->ft_len; 357 } 358 } 359 360 /* Fill in the header of the current segment. */ 361 if (gso_bytes == 0) { 362 memcpy(dst, gso_hdr, gso_hdr_len); 363 gso_bytes = gso_hdr_len; 364 } 365 366 /* Fill in data and update source and dest pointers. */ 367 copy = src_len; 368 if (gso_bytes + copy > dst_na->mfs) 369 copy = dst_na->mfs - gso_bytes; 370 memcpy(dst + gso_bytes, src, copy); 371 gso_bytes += copy; 372 src += copy; 373 src_len -= copy; 374 375 /* A segment is complete or we have processed all the 376 the GSO payload bytes. */ 377 if (gso_bytes >= dst_na->mfs || 378 (src_len == 0 && ft_p + 1 == ft_end)) { 379 /* After raw segmentation, we must fix some header 380 * fields and compute checksums, in a protocol dependent 381 * way. */ 382 gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen, 383 ipv4, iphlen, tcp, 384 gso_idx, segmented_bytes, 385 src_len == 0 && ft_p + 1 == ft_end); 386 387 ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes); 388 dst_slot->len = gso_bytes; 389 dst_slot->flags = 0; 390 dst_slots++; 391 segmented_bytes += gso_bytes - gso_hdr_len; 392 393 gso_bytes = 0; 394 gso_idx++; 395 396 /* Next destination slot. */ 397 j_cur = nm_next(j_cur, lim); 398 dst_slot = &dst_ring->slot[j_cur]; 399 dst = NMB(&dst_na->up, dst_slot); 400 } 401 402 /* Next input slot. */ 403 if (src_len == 0) { 404 ft_p++; 405 if (ft_p == ft_end) 406 break; 407 src = ft_p->ft_buf; 408 src_len = ft_p->ft_len; 409 } 410 } 411 ND(3, "%d bytes segmented", segmented_bytes); 412 413 } else { 414 /* Address of a checksum field into a destination slot. */ 415 uint16_t *check = NULL; 416 /* Accumulator for an unfolded checksum. */ 417 rawsum_t csum = 0; 418 419 /* Process a non-GSO packet. */ 420 421 /* Init 'check' if necessary. */ 422 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 423 if (unlikely(vh->csum_offset + vh->csum_start > src_len)) 424 D("invalid checksum request"); 425 else 426 check = (uint16_t *)(dst + vh->csum_start + 427 vh->csum_offset); 428 } 429 430 while (ft_p != ft_end) { 431 /* Init/update the packet checksum if needed. */ 432 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 433 if (!dst_slots) 434 csum = nm_os_csum_raw(src + vh->csum_start, 435 src_len - vh->csum_start, 0); 436 else 437 csum = nm_os_csum_raw(src, src_len, csum); 438 } 439 440 /* Round to a multiple of 64 */ 441 src_len = (src_len + 63) & ~63; 442 443 if (ft_p->ft_flags & NS_INDIRECT) { 444 if (copyin(src, dst, src_len)) { 445 /* Invalid user pointer, pretend len is 0. */ 446 dst_len = 0; 447 } 448 } else { 449 memcpy(dst, src, (int)src_len); 450 } 451 dst_slot->len = dst_len; 452 dst_slots++; 453 454 /* Next destination slot. */ 455 j_cur = nm_next(j_cur, lim); 456 dst_slot = &dst_ring->slot[j_cur]; 457 dst = NMB(&dst_na->up, dst_slot); 458 459 /* Next source slot. */ 460 ft_p++; 461 src = ft_p->ft_buf; 462 dst_len = src_len = ft_p->ft_len; 463 } 464 465 /* Finalize (fold) the checksum if needed. */ 466 if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 467 *check = nm_os_csum_fold(csum); 468 } 469 ND(3, "using %u dst_slots", dst_slots); 470 471 /* A second pass on the destination slots to set the slot flags, 472 * using the right number of destination slots. 473 */ 474 while (j_start != j_cur) { 475 dst_slot = &dst_ring->slot[j_start]; 476 dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG; 477 j_start = nm_next(j_start, lim); 478 } 479 /* Clear NS_MOREFRAG flag on last entry. */ 480 dst_slot->flags = (dst_slots << 8); 481 } 482 483 /* Update howmany and j. This is to commit the use of 484 * those slots in the destination ring. */ 485 if (unlikely(dst_slots > *howmany)) { 486 D("Slot allocation error: This is a bug"); 487 } 488 *j = j_cur; 489 *howmany -= dst_slots; 490 } 491