1 /* 2 * Copyright (C) 2014 Vincenzo Maffione. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 /* $FreeBSD$ */ 27 28 #if defined(__FreeBSD__) 29 #include <sys/cdefs.h> /* prerequisite */ 30 31 #include <sys/types.h> 32 #include <sys/errno.h> 33 #include <sys/param.h> /* defines used in kernel.h */ 34 #include <sys/malloc.h> /* types used in module initialization */ 35 #include <sys/kernel.h> /* types used in module initialization */ 36 #include <sys/sockio.h> 37 #include <sys/socketvar.h> /* struct socket */ 38 #include <sys/socket.h> /* sockaddrs */ 39 #include <net/if.h> 40 #include <net/if_var.h> 41 #include <machine/bus.h> /* bus_dmamap_* */ 42 #include <sys/endian.h> 43 44 #elif defined(linux) 45 46 #include "bsd_glue.h" 47 48 #elif defined(__APPLE__) 49 50 #warning OSX support is only partial 51 #include "osx_glue.h" 52 53 #else 54 55 #error Unsupported platform 56 57 #endif /* unsupported */ 58 59 #include <net/netmap.h> 60 #include <dev/netmap/netmap_kern.h> 61 62 63 64 /* This routine is called by bdg_mismatch_datapath() when it finishes 65 * accumulating bytes for a segment, in order to fix some fields in the 66 * segment headers (which still contain the same content as the header 67 * of the original GSO packet). 'buf' points to the beginning (e.g. 68 * the ethernet header) of the segment, and 'len' is its length. 69 */ 70 static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx, 71 u_int segmented_bytes, u_int last_segment, 72 u_int tcp, u_int iphlen) 73 { 74 struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14); 75 struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14); 76 uint16_t *check = NULL; 77 uint8_t *check_data = NULL; 78 79 if (iphlen == 20) { 80 /* Set the IPv4 "Total Length" field. */ 81 iph->tot_len = htobe16(len-14); 82 ND("ip total length %u", be16toh(ip->tot_len)); 83 84 /* Set the IPv4 "Identification" field. */ 85 iph->id = htobe16(be16toh(iph->id) + idx); 86 ND("ip identification %u", be16toh(iph->id)); 87 88 /* Compute and insert the IPv4 header checksum. */ 89 iph->check = 0; 90 iph->check = nm_csum_ipv4(iph); 91 ND("IP csum %x", be16toh(iph->check)); 92 } else {/* if (iphlen == 40) */ 93 /* Set the IPv6 "Payload Len" field. */ 94 ip6h->payload_len = htobe16(len-14-iphlen); 95 } 96 97 if (tcp) { 98 struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen); 99 100 /* Set the TCP sequence number. */ 101 tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes); 102 ND("tcp seq %u", be32toh(tcph->seq)); 103 104 /* Zero the PSH and FIN TCP flags if this is not the last 105 segment. */ 106 if (!last_segment) 107 tcph->flags &= ~(0x8 | 0x1); 108 ND("last_segment %u", last_segment); 109 110 check = &tcph->check; 111 check_data = (uint8_t *)tcph; 112 } else { /* UDP */ 113 struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen); 114 115 /* Set the UDP 'Length' field. */ 116 udph->len = htobe16(len-14-iphlen); 117 118 check = &udph->check; 119 check_data = (uint8_t *)udph; 120 } 121 122 /* Compute and insert TCP/UDP checksum. */ 123 *check = 0; 124 if (iphlen == 20) 125 nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check); 126 else 127 nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check); 128 129 ND("TCP/UDP csum %x", be16toh(*check)); 130 } 131 132 133 /* The VALE mismatch datapath implementation. */ 134 void bdg_mismatch_datapath(struct netmap_vp_adapter *na, 135 struct netmap_vp_adapter *dst_na, 136 struct nm_bdg_fwd *ft_p, struct netmap_ring *ring, 137 u_int *j, u_int lim, u_int *howmany) 138 { 139 struct netmap_slot *slot = NULL; 140 struct nm_vnet_hdr *vh = NULL; 141 /* Number of source slots to process. */ 142 u_int frags = ft_p->ft_frags; 143 struct nm_bdg_fwd *ft_end = ft_p + frags; 144 145 /* Source and destination pointers. */ 146 uint8_t *dst, *src; 147 size_t src_len, dst_len; 148 149 u_int j_start = *j; 150 u_int dst_slots = 0; 151 152 /* If the source port uses the offloadings, while destination doesn't, 153 * we grab the source virtio-net header and do the offloadings here. 154 */ 155 if (na->virt_hdr_len && !dst_na->virt_hdr_len) { 156 vh = (struct nm_vnet_hdr *)ft_p->ft_buf; 157 } 158 159 /* Init source and dest pointers. */ 160 src = ft_p->ft_buf; 161 src_len = ft_p->ft_len; 162 slot = &ring->slot[*j]; 163 dst = NMB(&dst_na->up, slot); 164 dst_len = src_len; 165 166 /* We are processing the first input slot and there is a mismatch 167 * between source and destination virt_hdr_len (SHL and DHL). 168 * When the a client is using virtio-net headers, the header length 169 * can be: 170 * - 10: the header corresponds to the struct nm_vnet_hdr 171 * - 12: the first 10 bytes correspond to the struct 172 * virtio_net_hdr, and the last 2 bytes store the 173 * "mergeable buffers" info, which is an optional 174 * hint that can be zeroed for compability 175 * 176 * The destination header is therefore built according to the 177 * following table: 178 * 179 * SHL | DHL | destination header 180 * ----------------------------- 181 * 0 | 10 | zero 182 * 0 | 12 | zero 183 * 10 | 0 | doesn't exist 184 * 10 | 12 | first 10 bytes are copied from source header, last 2 are zero 185 * 12 | 0 | doesn't exist 186 * 12 | 10 | copied from the first 10 bytes of source header 187 */ 188 bzero(dst, dst_na->virt_hdr_len); 189 if (na->virt_hdr_len && dst_na->virt_hdr_len) 190 memcpy(dst, src, sizeof(struct nm_vnet_hdr)); 191 /* Skip the virtio-net headers. */ 192 src += na->virt_hdr_len; 193 src_len -= na->virt_hdr_len; 194 dst += dst_na->virt_hdr_len; 195 dst_len = dst_na->virt_hdr_len + src_len; 196 197 /* Here it could be dst_len == 0 (which implies src_len == 0), 198 * so we avoid passing a zero length fragment. 199 */ 200 if (dst_len == 0) { 201 ft_p++; 202 src = ft_p->ft_buf; 203 src_len = ft_p->ft_len; 204 dst_len = src_len; 205 } 206 207 if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 208 u_int gso_bytes = 0; 209 /* Length of the GSO packet header. */ 210 u_int gso_hdr_len = 0; 211 /* Pointer to the GSO packet header. Assume it is in a single fragment. */ 212 uint8_t *gso_hdr = NULL; 213 /* Index of the current segment. */ 214 u_int gso_idx = 0; 215 /* Payload data bytes segmented so far (e.g. TCP data bytes). */ 216 u_int segmented_bytes = 0; 217 /* Length of the IP header (20 if IPv4, 40 if IPv6). */ 218 u_int iphlen = 0; 219 /* Is this a TCP or an UDP GSO packet? */ 220 u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) 221 == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1; 222 223 /* Segment the GSO packet contained into the input slots (frags). */ 224 while (ft_p != ft_end) { 225 size_t copy; 226 227 /* Grab the GSO header if we don't have it. */ 228 if (!gso_hdr) { 229 uint16_t ethertype; 230 231 gso_hdr = src; 232 233 /* Look at the 'Ethertype' field to see if this packet 234 * is IPv4 or IPv6. 235 */ 236 ethertype = be16toh(*((uint16_t *)(gso_hdr + 12))); 237 if (ethertype == 0x0800) 238 iphlen = 20; 239 else /* if (ethertype == 0x86DD) */ 240 iphlen = 40; 241 ND(3, "type=%04x", ethertype); 242 243 /* Compute gso_hdr_len. For TCP we need to read the 244 * content of the 'Data Offset' field. 245 */ 246 if (tcp) { 247 struct nm_tcphdr *tcph = 248 (struct nm_tcphdr *)&gso_hdr[14+iphlen]; 249 250 gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4); 251 } else 252 gso_hdr_len = 14 + iphlen + 8; /* UDP */ 253 254 ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len, 255 dst_na->mfs); 256 257 /* Advance source pointers. */ 258 src += gso_hdr_len; 259 src_len -= gso_hdr_len; 260 if (src_len == 0) { 261 ft_p++; 262 if (ft_p == ft_end) 263 break; 264 src = ft_p->ft_buf; 265 src_len = ft_p->ft_len; 266 continue; 267 } 268 } 269 270 /* Fill in the header of the current segment. */ 271 if (gso_bytes == 0) { 272 memcpy(dst, gso_hdr, gso_hdr_len); 273 gso_bytes = gso_hdr_len; 274 } 275 276 /* Fill in data and update source and dest pointers. */ 277 copy = src_len; 278 if (gso_bytes + copy > dst_na->mfs) 279 copy = dst_na->mfs - gso_bytes; 280 memcpy(dst + gso_bytes, src, copy); 281 gso_bytes += copy; 282 src += copy; 283 src_len -= copy; 284 285 /* A segment is complete or we have processed all the 286 the GSO payload bytes. */ 287 if (gso_bytes >= dst_na->mfs || 288 (src_len == 0 && ft_p + 1 == ft_end)) { 289 /* After raw segmentation, we must fix some header 290 * fields and compute checksums, in a protocol dependent 291 * way. */ 292 gso_fix_segment(dst, gso_bytes, gso_idx, 293 segmented_bytes, 294 src_len == 0 && ft_p + 1 == ft_end, 295 tcp, iphlen); 296 297 ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes); 298 slot->len = gso_bytes; 299 slot->flags = 0; 300 segmented_bytes += gso_bytes - gso_hdr_len; 301 302 dst_slots++; 303 304 /* Next destination slot. */ 305 *j = nm_next(*j, lim); 306 slot = &ring->slot[*j]; 307 dst = NMB(&dst_na->up, slot); 308 309 gso_bytes = 0; 310 gso_idx++; 311 } 312 313 /* Next input slot. */ 314 if (src_len == 0) { 315 ft_p++; 316 if (ft_p == ft_end) 317 break; 318 src = ft_p->ft_buf; 319 src_len = ft_p->ft_len; 320 } 321 } 322 ND(3, "%d bytes segmented", segmented_bytes); 323 324 } else { 325 /* Address of a checksum field into a destination slot. */ 326 uint16_t *check = NULL; 327 /* Accumulator for an unfolded checksum. */ 328 rawsum_t csum = 0; 329 330 /* Process a non-GSO packet. */ 331 332 /* Init 'check' if necessary. */ 333 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 334 if (unlikely(vh->csum_offset + vh->csum_start > src_len)) 335 D("invalid checksum request"); 336 else 337 check = (uint16_t *)(dst + vh->csum_start + 338 vh->csum_offset); 339 } 340 341 while (ft_p != ft_end) { 342 /* Init/update the packet checksum if needed. */ 343 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 344 if (!dst_slots) 345 csum = nm_csum_raw(src + vh->csum_start, 346 src_len - vh->csum_start, 0); 347 else 348 csum = nm_csum_raw(src, src_len, csum); 349 } 350 351 /* Round to a multiple of 64 */ 352 src_len = (src_len + 63) & ~63; 353 354 if (ft_p->ft_flags & NS_INDIRECT) { 355 if (copyin(src, dst, src_len)) { 356 /* Invalid user pointer, pretend len is 0. */ 357 dst_len = 0; 358 } 359 } else { 360 memcpy(dst, src, (int)src_len); 361 } 362 slot->len = dst_len; 363 364 dst_slots++; 365 366 /* Next destination slot. */ 367 *j = nm_next(*j, lim); 368 slot = &ring->slot[*j]; 369 dst = NMB(&dst_na->up, slot); 370 371 /* Next source slot. */ 372 ft_p++; 373 src = ft_p->ft_buf; 374 dst_len = src_len = ft_p->ft_len; 375 376 } 377 378 /* Finalize (fold) the checksum if needed. */ 379 if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 380 *check = nm_csum_fold(csum); 381 } 382 ND(3, "using %u dst_slots", dst_slots); 383 384 /* A second pass on the desitations slots to set the slot flags, 385 * using the right number of destination slots. 386 */ 387 while (j_start != *j) { 388 slot = &ring->slot[j_start]; 389 slot->flags = (dst_slots << 8)| NS_MOREFRAG; 390 j_start = nm_next(j_start, lim); 391 } 392 /* Clear NS_MOREFRAG flag on last entry. */ 393 slot->flags = (dst_slots << 8); 394 } 395 396 /* Update howmany. */ 397 if (unlikely(dst_slots > *howmany)) { 398 dst_slots = *howmany; 399 D("Slot allocation error: Should never happen"); 400 } 401 *howmany -= dst_slots; 402 } 403