xref: /freebsd/sys/dev/netmap/netmap_offloadings.c (revision 5dae51da3da0cc94d17bd67b308fad304ebec7e0)
1 /*
2  * Copyright (C) 2014-2015 Vincenzo Maffione
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *      documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /* $FreeBSD$ */
28 
29 #if defined(__FreeBSD__)
30 #include <sys/cdefs.h> /* prerequisite */
31 
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/param.h>	/* defines used in kernel.h */
35 #include <sys/kernel.h>	/* types used in module initialization */
36 #include <sys/sockio.h>
37 #include <sys/malloc.h>
38 #include <sys/socketvar.h>	/* struct socket */
39 #include <sys/socket.h> /* sockaddrs */
40 #include <net/if.h>
41 #include <net/if_var.h>
42 #include <machine/bus.h>	/* bus_dmamap_* */
43 #include <sys/endian.h>
44 
45 #elif defined(linux)
46 
47 #include "bsd_glue.h"
48 
49 #elif defined(__APPLE__)
50 
51 #warning OSX support is only partial
52 #include "osx_glue.h"
53 
54 #else
55 
56 #error	Unsupported platform
57 
58 #endif /* unsupported */
59 
60 #include <net/netmap.h>
61 #include <dev/netmap/netmap_kern.h>
62 
63 
64 
65 /* This routine is called by bdg_mismatch_datapath() when it finishes
66  * accumulating bytes for a segment, in order to fix some fields in the
67  * segment headers (which still contain the same content as the header
68  * of the original GSO packet). 'pkt' points to the beginning of the IP
69  * header of the segment, while 'len' is the length of the IP packet.
70  */
71 static void
72 gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
73 		u_int idx, u_int segmented_bytes, u_int last_segment)
74 {
75 	struct nm_iphdr *iph = (struct nm_iphdr *)(pkt);
76 	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt);
77 	uint16_t *check = NULL;
78 	uint8_t *check_data = NULL;
79 
80 	if (ipv4) {
81 		/* Set the IPv4 "Total Length" field. */
82 		iph->tot_len = htobe16(len);
83 		ND("ip total length %u", be16toh(ip->tot_len));
84 
85 		/* Set the IPv4 "Identification" field. */
86 		iph->id = htobe16(be16toh(iph->id) + idx);
87 		ND("ip identification %u", be16toh(iph->id));
88 
89 		/* Compute and insert the IPv4 header checksum. */
90 		iph->check = 0;
91 		iph->check = nm_os_csum_ipv4(iph);
92 		ND("IP csum %x", be16toh(iph->check));
93 	} else {
94 		/* Set the IPv6 "Payload Len" field. */
95 		ip6h->payload_len = htobe16(len-iphlen);
96 	}
97 
98 	if (tcp) {
99 		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen);
100 
101 		/* Set the TCP sequence number. */
102 		tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
103 		ND("tcp seq %u", be32toh(tcph->seq));
104 
105 		/* Zero the PSH and FIN TCP flags if this is not the last
106 		   segment. */
107 		if (!last_segment)
108 			tcph->flags &= ~(0x8 | 0x1);
109 		ND("last_segment %u", last_segment);
110 
111 		check = &tcph->check;
112 		check_data = (uint8_t *)tcph;
113 	} else { /* UDP */
114 		struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen);
115 
116 		/* Set the UDP 'Length' field. */
117 		udph->len = htobe16(len-iphlen);
118 
119 		check = &udph->check;
120 		check_data = (uint8_t *)udph;
121 	}
122 
123 	/* Compute and insert TCP/UDP checksum. */
124 	*check = 0;
125 	if (ipv4)
126 		nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check);
127 	else
128 		nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check);
129 
130 	ND("TCP/UDP csum %x", be16toh(*check));
131 }
132 
133 static int
134 vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
135 {
136 	uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
137 
138 	return (
139 		(gso_type != VIRTIO_NET_HDR_GSO_NONE &&
140 		 gso_type != VIRTIO_NET_HDR_GSO_TCPV4 &&
141 		 gso_type != VIRTIO_NET_HDR_GSO_UDP &&
142 		 gso_type != VIRTIO_NET_HDR_GSO_TCPV6)
143 		||
144 		 (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM
145 			       | VIRTIO_NET_HDR_F_DATA_VALID))
146 	       );
147 }
148 
149 /* The VALE mismatch datapath implementation. */
150 void
151 bdg_mismatch_datapath(struct netmap_vp_adapter *na,
152 		      struct netmap_vp_adapter *dst_na,
153 		      const struct nm_bdg_fwd *ft_p,
154 		      struct netmap_ring *dst_ring,
155 		      u_int *j, u_int lim, u_int *howmany)
156 {
157 	struct netmap_slot *dst_slot = NULL;
158 	struct nm_vnet_hdr *vh = NULL;
159 	const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags;
160 
161 	/* Source and destination pointers. */
162 	uint8_t *dst, *src;
163 	size_t src_len, dst_len;
164 
165 	/* Indices and counters for the destination ring. */
166 	u_int j_start = *j;
167 	u_int j_cur = j_start;
168 	u_int dst_slots = 0;
169 
170 	if (unlikely(ft_p == ft_end)) {
171 		RD(3, "No source slots to process");
172 		return;
173 	}
174 
175 	/* Init source and dest pointers. */
176 	src = ft_p->ft_buf;
177 	src_len = ft_p->ft_len;
178 	dst_slot = &dst_ring->slot[j_cur];
179 	dst = NMB(&dst_na->up, dst_slot);
180 	dst_len = src_len;
181 
182 	/* If the source port uses the offloadings, while destination doesn't,
183 	 * we grab the source virtio-net header and do the offloadings here.
184 	 */
185 	if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) {
186 		vh = (struct nm_vnet_hdr *)src;
187 		/* Initial sanity check on the source virtio-net header. If
188 		 * something seems wrong, just drop the packet. */
189 		if (src_len < na->up.virt_hdr_len) {
190 			RD(3, "Short src vnet header, dropping");
191 			return;
192 		}
193 		if (vnet_hdr_is_bad(vh)) {
194 			RD(3, "Bad src vnet header, dropping");
195 			return;
196 		}
197 	}
198 
199 	/* We are processing the first input slot and there is a mismatch
200 	 * between source and destination virt_hdr_len (SHL and DHL).
201 	 * When the a client is using virtio-net headers, the header length
202 	 * can be:
203 	 *    - 10: the header corresponds to the struct nm_vnet_hdr
204 	 *    - 12: the first 10 bytes correspond to the struct
205 	 *          virtio_net_hdr, and the last 2 bytes store the
206 	 *          "mergeable buffers" info, which is an optional
207 	 *	    hint that can be zeroed for compatibility
208 	 *
209 	 * The destination header is therefore built according to the
210 	 * following table:
211 	 *
212 	 * SHL | DHL | destination header
213 	 * -----------------------------
214 	 *   0 |  10 | zero
215 	 *   0 |  12 | zero
216 	 *  10 |   0 | doesn't exist
217 	 *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
218 	 *  12 |   0 | doesn't exist
219 	 *  12 |  10 | copied from the first 10 bytes of source header
220 	 */
221 	bzero(dst, dst_na->up.virt_hdr_len);
222 	if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len)
223 		memcpy(dst, src, sizeof(struct nm_vnet_hdr));
224 	/* Skip the virtio-net headers. */
225 	src += na->up.virt_hdr_len;
226 	src_len -= na->up.virt_hdr_len;
227 	dst += dst_na->up.virt_hdr_len;
228 	dst_len = dst_na->up.virt_hdr_len + src_len;
229 
230 	/* Here it could be dst_len == 0 (which implies src_len == 0),
231 	 * so we avoid passing a zero length fragment.
232 	 */
233 	if (dst_len == 0) {
234 		ft_p++;
235 		src = ft_p->ft_buf;
236 		src_len = ft_p->ft_len;
237 		dst_len = src_len;
238 	}
239 
240 	if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
241 		u_int gso_bytes = 0;
242 		/* Length of the GSO packet header. */
243 		u_int gso_hdr_len = 0;
244 		/* Pointer to the GSO packet header. Assume it is in a single fragment. */
245 		uint8_t *gso_hdr = NULL;
246 		/* Index of the current segment. */
247 		u_int gso_idx = 0;
248 		/* Payload data bytes segmented so far (e.g. TCP data bytes). */
249 		u_int segmented_bytes = 0;
250 		/* Is this an IPv4 or IPv6 GSO packet? */
251 		u_int ipv4 = 0;
252 		/* Length of the IP header (20 if IPv4, 40 if IPv6). */
253 		u_int iphlen = 0;
254 		/* Length of the Ethernet header (18 if 802.1q, otherwise 14). */
255 		u_int ethhlen = 14;
256 		/* Is this a TCP or an UDP GSO packet? */
257 		u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
258 				== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
259 
260 		/* Segment the GSO packet contained into the input slots (frags). */
261 		for (;;) {
262 			size_t copy;
263 
264 			if (dst_slots >= *howmany) {
265 				/* We still have work to do, but we've run out of
266 				 * dst slots, so we have to drop the packet. */
267 				RD(3, "Not enough slots, dropping GSO packet");
268 				return;
269 			}
270 
271 			/* Grab the GSO header if we don't have it. */
272 			if (!gso_hdr) {
273 				uint16_t ethertype;
274 
275 				gso_hdr = src;
276 
277 				/* Look at the 'Ethertype' field to see if this packet
278 				 * is IPv4 or IPv6, taking into account VLAN
279 				 * encapsulation. */
280 				for (;;) {
281 					if (src_len < ethhlen) {
282 						RD(3, "Short GSO fragment [eth], dropping");
283 						return;
284 					}
285 					ethertype = be16toh(*((uint16_t *)
286 							    (gso_hdr + ethhlen - 2)));
287 					if (ethertype != 0x8100) /* not 802.1q */
288 						break;
289 					ethhlen += 4;
290 				}
291 				switch (ethertype) {
292 					case 0x0800:  /* IPv4 */
293 					{
294 						struct nm_iphdr *iph = (struct nm_iphdr *)
295 									(gso_hdr + ethhlen);
296 
297 						if (src_len < ethhlen + 20) {
298 							RD(3, "Short GSO fragment "
299 							      "[IPv4], dropping");
300 							return;
301 						}
302 						ipv4 = 1;
303 						iphlen = 4 * (iph->version_ihl & 0x0F);
304 						break;
305 					}
306 					case 0x86DD:  /* IPv6 */
307 						ipv4 = 0;
308 						iphlen = 40;
309 						break;
310 					default:
311 						RD(3, "Unsupported ethertype, "
312 						      "dropping GSO packet");
313 						return;
314 				}
315 				ND(3, "type=%04x", ethertype);
316 
317 				if (src_len < ethhlen + iphlen) {
318 					RD(3, "Short GSO fragment [IP], dropping");
319 					return;
320 				}
321 
322 				/* Compute gso_hdr_len. For TCP we need to read the
323 				 * content of the 'Data Offset' field.
324 				 */
325 				if (tcp) {
326 					struct nm_tcphdr *tcph = (struct nm_tcphdr *)
327 								(gso_hdr + ethhlen + iphlen);
328 
329 					if (src_len < ethhlen + iphlen + 20) {
330 						RD(3, "Short GSO fragment "
331 								"[TCP], dropping");
332 						return;
333 					}
334 					gso_hdr_len = ethhlen + iphlen +
335 						      4 * (tcph->doff >> 4);
336 				} else {
337 					gso_hdr_len = ethhlen + iphlen + 8; /* UDP */
338 				}
339 
340 				if (src_len < gso_hdr_len) {
341 					RD(3, "Short GSO fragment [TCP/UDP], dropping");
342 					return;
343 				}
344 
345 				ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
346 								   dst_na->mfs);
347 
348 				/* Advance source pointers. */
349 				src += gso_hdr_len;
350 				src_len -= gso_hdr_len;
351 				if (src_len == 0) {
352 					ft_p++;
353 					if (ft_p == ft_end)
354 						break;
355 					src = ft_p->ft_buf;
356 					src_len = ft_p->ft_len;
357 				}
358 			}
359 
360 			/* Fill in the header of the current segment. */
361 			if (gso_bytes == 0) {
362 				memcpy(dst, gso_hdr, gso_hdr_len);
363 				gso_bytes = gso_hdr_len;
364 			}
365 
366 			/* Fill in data and update source and dest pointers. */
367 			copy = src_len;
368 			if (gso_bytes + copy > dst_na->mfs)
369 				copy = dst_na->mfs - gso_bytes;
370 			memcpy(dst + gso_bytes, src, copy);
371 			gso_bytes += copy;
372 			src += copy;
373 			src_len -= copy;
374 
375 			/* A segment is complete or we have processed all the
376 			   the GSO payload bytes. */
377 			if (gso_bytes >= dst_na->mfs ||
378 				(src_len == 0 && ft_p + 1 == ft_end)) {
379 				/* After raw segmentation, we must fix some header
380 				 * fields and compute checksums, in a protocol dependent
381 				 * way. */
382 				gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen,
383 						ipv4, iphlen, tcp,
384 						gso_idx, segmented_bytes,
385 						src_len == 0 && ft_p + 1 == ft_end);
386 
387 				ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
388 				dst_slot->len = gso_bytes;
389 				dst_slot->flags = 0;
390 				dst_slots++;
391 				segmented_bytes += gso_bytes - gso_hdr_len;
392 
393 				gso_bytes = 0;
394 				gso_idx++;
395 
396 				/* Next destination slot. */
397 				j_cur = nm_next(j_cur, lim);
398 				dst_slot = &dst_ring->slot[j_cur];
399 				dst = NMB(&dst_na->up, dst_slot);
400 			}
401 
402 			/* Next input slot. */
403 			if (src_len == 0) {
404 				ft_p++;
405 				if (ft_p == ft_end)
406 					break;
407 				src = ft_p->ft_buf;
408 				src_len = ft_p->ft_len;
409 			}
410 		}
411 		ND(3, "%d bytes segmented", segmented_bytes);
412 
413 	} else {
414 		/* Address of a checksum field into a destination slot. */
415 		uint16_t *check = NULL;
416 		/* Accumulator for an unfolded checksum. */
417 		rawsum_t csum = 0;
418 
419 		/* Process a non-GSO packet. */
420 
421 		/* Init 'check' if necessary. */
422 		if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
423 			if (unlikely(vh->csum_offset + vh->csum_start > src_len))
424 				D("invalid checksum request");
425 			else
426 				check = (uint16_t *)(dst + vh->csum_start +
427 						vh->csum_offset);
428 		}
429 
430 		while (ft_p != ft_end) {
431 			/* Init/update the packet checksum if needed. */
432 			if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
433 				if (!dst_slots)
434 					csum = nm_os_csum_raw(src + vh->csum_start,
435 								src_len - vh->csum_start, 0);
436 				else
437 					csum = nm_os_csum_raw(src, src_len, csum);
438 			}
439 
440 			/* Round to a multiple of 64 */
441 			src_len = (src_len + 63) & ~63;
442 
443 			if (ft_p->ft_flags & NS_INDIRECT) {
444 				if (copyin(src, dst, src_len)) {
445 					/* Invalid user pointer, pretend len is 0. */
446 					dst_len = 0;
447 				}
448 			} else {
449 				memcpy(dst, src, (int)src_len);
450 			}
451 			dst_slot->len = dst_len;
452 			dst_slots++;
453 
454 			/* Next destination slot. */
455 			j_cur = nm_next(j_cur, lim);
456 			dst_slot = &dst_ring->slot[j_cur];
457 			dst = NMB(&dst_na->up, dst_slot);
458 
459 			/* Next source slot. */
460 			ft_p++;
461 			src = ft_p->ft_buf;
462 			dst_len = src_len = ft_p->ft_len;
463 		}
464 
465 		/* Finalize (fold) the checksum if needed. */
466 		if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
467 			*check = nm_os_csum_fold(csum);
468 		}
469 		ND(3, "using %u dst_slots", dst_slots);
470 
471 		/* A second pass on the destination slots to set the slot flags,
472 		 * using the right number of destination slots.
473 		 */
474 		while (j_start != j_cur) {
475 			dst_slot = &dst_ring->slot[j_start];
476 			dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG;
477 			j_start = nm_next(j_start, lim);
478 		}
479 		/* Clear NS_MOREFRAG flag on last entry. */
480 		dst_slot->flags = (dst_slots << 8);
481 	}
482 
483 	/* Update howmany and j. This is to commit the use of
484 	 * those slots in the destination ring. */
485 	if (unlikely(dst_slots > *howmany)) {
486 		D("Slot allocation error: This is a bug");
487 	}
488 	*j = j_cur;
489 	*howmany -= dst_slots;
490 }
491