xref: /illumos-gate/usr/src/uts/common/os/ip_cksum.c (revision 2c65701281156d8db8fa6f6f9c5faa6ca021c621)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2021 Joyent, Inc.
25  */
26 /* Copyright (c) 1990 Mentat Inc. */
27 
28 #include <sys/types.h>
29 #include <sys/inttypes.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/strsun.h>
33 #include <sys/debug.h>
34 #include <sys/ddi.h>
35 #include <sys/vtrace.h>
36 #include <inet/sctp_crc32.h>
37 #include <inet/ip.h>
38 #include <inet/ip6.h>
39 
40 #include <sys/multidata.h>
41 #include <sys/multidata_impl.h>
42 
43 extern unsigned int ip_ocsum(ushort_t *, int, unsigned int);
44 
45 /*
46  * Checksum routine for Internet Protocol family headers.
47  * This routine is very heavily used in the network
48  * code and should be modified for each CPU to be as fast as possible.
49  */
50 
51 #define	mp_len(mp) ((mp)->b_wptr - (mp)->b_rptr)
52 
53 /*
54  * Even/Odd checks. Usually it is performed on pointers but may be
55  * used on integers as well. uintptr_t is long enough to hold both
56  * integer and pointer.
57  */
58 #define	is_odd(p) (((uintptr_t)(p) & 0x1) != 0)
59 #define	is_even(p) (!is_odd(p))
60 
61 
62 #ifdef ZC_TEST
63 /*
64  * Disable the TCP s/w cksum.
65  * XXX - This is just a hack for testing purpose. Don't use it for
66  * anything else!
67  */
68 int noswcksum = 0;
69 #endif
70 /*
71  * Note: this does not ones-complement the result since it is used
72  * when computing partial checksums.
73  * For nonSTRUIO_IP mblks, assumes mp->b_rptr+offset is 16 bit aligned.
74  * For STRUIO_IP mblks, assumes mp->b_datap->db_struiobase is 16 bit aligned.
75  *
76  * Note: for STRUIO_IP special mblks some data may have been previously
77  *	 checksumed, this routine will handle additional data prefixed within
78  *	 an mblk or b_cont (chained) mblk(s). This routine will also handle
79  *	 suffixed b_cont mblk(s) and data suffixed within an mblk.
80  */
81 unsigned int
82 ip_cksum(mblk_t *mp, int offset, uint_t sum)
83 {
84 	ushort_t *w;
85 	ssize_t	mlen;
86 	int pmlen;
87 	mblk_t *pmp;
88 	dblk_t *dp = mp->b_datap;
89 	ushort_t psum = 0;
90 
91 #ifdef ZC_TEST
92 	if (noswcksum)
93 		return (0xffff);
94 #endif
95 	ASSERT(dp);
96 
97 	if (mp->b_cont == NULL) {
98 		/*
99 		 * May be fast-path, only one mblk.
100 		 */
101 		w = (ushort_t *)(mp->b_rptr + offset);
102 		if (dp->db_struioflag & STRUIO_IP) {
103 			/*
104 			 * Checksum any data not already done by
105 			 * the caller and add in any partial checksum.
106 			 */
107 			if ((offset > dp->db_cksumstart) ||
108 			    mp->b_wptr != (uchar_t *)(mp->b_rptr +
109 			    dp->db_cksumend)) {
110 				/*
111 				 * Mblk data pointers aren't inclusive
112 				 * of uio data, so disregard checksum.
113 				 *
114 				 * not using all of data in dblk make sure
115 				 * not use to use the precalculated checksum
116 				 * in this case.
117 				 */
118 				dp->db_struioflag &= ~STRUIO_IP;
119 				goto norm;
120 			}
121 			ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend));
122 			psum = *(ushort_t *)dp->db_struioun.data;
123 			if ((mlen = dp->db_cksumstart - offset) < 0)
124 				mlen = 0;
125 			if (is_odd(mlen))
126 				goto slow;
127 			if (mlen && dp->db_cksumstart != dp->db_cksumstuff &&
128 			    dp->db_cksumend != dp->db_cksumstuff) {
129 				/*
130 				 * There is prefix data to do and some uio
131 				 * data has already been checksumed and there
132 				 * is more uio data to do, so do the prefix
133 				 * data first, then do the remainder of the
134 				 * uio data.
135 				 */
136 				sum = ip_ocsum(w, mlen >> 1, sum);
137 				w = (ushort_t *)(mp->b_rptr +
138 				    dp->db_cksumstuff);
139 				if (is_odd(w)) {
140 					pmp = mp;
141 					goto slow1;
142 				}
143 				mlen = dp->db_cksumend - dp->db_cksumstuff;
144 			} else if (dp->db_cksumend != dp->db_cksumstuff) {
145 				/*
146 				 * There may be uio data to do, if there is
147 				 * prefix data to do then add in all of the
148 				 * uio data (if any) to do, else just do any
149 				 * uio data.
150 				 */
151 				if (mlen)
152 					mlen += dp->db_cksumend
153 					    - dp->db_cksumstuff;
154 				else {
155 					w = (ushort_t *)(mp->b_rptr +
156 					    dp->db_cksumstuff);
157 					if (is_odd(w))
158 						goto slow;
159 					mlen = dp->db_cksumend
160 					    - dp->db_cksumstuff;
161 				}
162 			} else if (mlen == 0)
163 				return (psum);
164 
165 			if (is_odd(mlen))
166 				goto slow;
167 			sum += psum;
168 		} else {
169 			/*
170 			 * Checksum all data not already done by the caller.
171 			 */
172 		norm:
173 			mlen = mp->b_wptr - (uchar_t *)w;
174 			if (is_odd(mlen))
175 				goto slow;
176 		}
177 		ASSERT(is_even(w));
178 		ASSERT(is_even(mlen));
179 		return (ip_ocsum(w, mlen >> 1, sum));
180 	}
181 	if (dp->db_struioflag & STRUIO_IP)
182 		psum = *(ushort_t *)dp->db_struioun.data;
183 slow:
184 	pmp = 0;
185 slow1:
186 	mlen = 0;
187 	pmlen = 0;
188 	for (; ; ) {
189 		/*
190 		 * Each trip around loop adds in word(s) from one mbuf segment
191 		 * (except for when pmp == mp, then its two partial trips).
192 		 */
193 		w = (ushort_t *)(mp->b_rptr + offset);
194 		if (pmp) {
195 			/*
196 			 * This is the second trip around for this mblk.
197 			 */
198 			pmp = 0;
199 			mlen = 0;
200 			goto douio;
201 		} else if (dp->db_struioflag & STRUIO_IP) {
202 			/*
203 			 * Checksum any data not already done by the
204 			 * caller and add in any partial checksum.
205 			 */
206 			if ((offset > dp->db_cksumstart) ||
207 			    mp->b_wptr != (uchar_t *)(mp->b_rptr +
208 			    dp->db_cksumend)) {
209 				/*
210 				 * Mblk data pointers aren't inclusive
211 				 * of uio data, so disregard checksum.
212 				 *
213 				 * not using all of data in dblk make sure
214 				 * not use to use the precalculated checksum
215 				 * in this case.
216 				 */
217 				dp->db_struioflag &= ~STRUIO_IP;
218 				goto snorm;
219 			}
220 			ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend));
221 			if ((mlen = dp->db_cksumstart - offset) < 0)
222 				mlen = 0;
223 			if (mlen && dp->db_cksumstart != dp->db_cksumstuff) {
224 				/*
225 				 * There is prefix data too do and some
226 				 * uio data has already been checksumed,
227 				 * so do the prefix data only this trip.
228 				 */
229 				pmp = mp;
230 			} else {
231 				/*
232 				 * Add in any partial cksum (if any) and
233 				 * do the remainder of the uio data.
234 				 */
235 				int odd;
236 			douio:
237 				odd = is_odd(dp->db_cksumstuff -
238 				    dp->db_cksumstart);
239 				if (pmlen == -1) {
240 					/*
241 					 * Previous mlen was odd, so swap
242 					 * the partial checksum bytes.
243 					 */
244 					sum += ((psum << 8) & 0xffff)
245 					    | (psum >> 8);
246 					if (odd)
247 						pmlen = 0;
248 				} else {
249 					sum += psum;
250 					if (odd)
251 						pmlen = -1;
252 				}
253 				if (dp->db_cksumend != dp->db_cksumstuff) {
254 					/*
255 					 * If prefix data to do and then all
256 					 * the uio data nees to be checksumed,
257 					 * else just do any uio data.
258 					 */
259 					if (mlen)
260 						mlen += dp->db_cksumend
261 						    - dp->db_cksumstuff;
262 					else {
263 						w = (ushort_t *)(mp->b_rptr +
264 						    dp->db_cksumstuff);
265 						mlen = dp->db_cksumend -
266 						    dp->db_cksumstuff;
267 					}
268 				}
269 			}
270 		} else {
271 			/*
272 			 * Checksum all of the mblk data.
273 			 */
274 		snorm:
275 			mlen = mp->b_wptr - (uchar_t *)w;
276 		}
277 
278 		mp = mp->b_cont;
279 		if (mlen > 0 && pmlen == -1) {
280 			/*
281 			 * There is a byte left from the last
282 			 * segment; add it into the checksum.
283 			 * Don't have to worry about a carry-
284 			 * out here because we make sure that
285 			 * high part of (32 bit) sum is small
286 			 * below.
287 			 */
288 #ifdef _LITTLE_ENDIAN
289 			sum += *(uchar_t *)w << 8;
290 #else
291 			sum += *(uchar_t *)w;
292 #endif
293 			w = (ushort_t *)((char *)w + 1);
294 			mlen--;
295 			pmlen = 0;
296 		}
297 		if (mlen > 0) {
298 			if (is_even(w)) {
299 				sum = ip_ocsum(w, mlen>>1, sum);
300 				w += mlen>>1;
301 				/*
302 				 * If we had an odd number of bytes,
303 				 * then the last byte goes in the high
304 				 * part of the sum, and we take the
305 				 * first byte to the low part of the sum
306 				 * the next time around the loop.
307 				 */
308 				if (is_odd(mlen)) {
309 #ifdef _LITTLE_ENDIAN
310 					sum += *(uchar_t *)w;
311 #else
312 					sum += *(uchar_t *)w << 8;
313 #endif
314 					pmlen = -1;
315 				}
316 			} else {
317 				ushort_t swsum;
318 #ifdef _LITTLE_ENDIAN
319 				sum += *(uchar_t *)w;
320 #else
321 				sum += *(uchar_t *)w << 8;
322 #endif
323 				mlen--;
324 				w = (ushort_t *)(1 + (uintptr_t)w);
325 
326 				/* Do a separate checksum and copy operation */
327 				swsum = ip_ocsum(w, mlen>>1, 0);
328 				sum += ((swsum << 8) & 0xffff) | (swsum >> 8);
329 				w += mlen>>1;
330 				/*
331 				 * If we had an even number of bytes,
332 				 * then the last byte goes in the low
333 				 * part of the sum.  Otherwise we had an
334 				 * odd number of bytes and we take the first
335 				 * byte to the low part of the sum the
336 				 * next time around the loop.
337 				 */
338 				if (is_odd(mlen)) {
339 #ifdef _LITTLE_ENDIAN
340 					sum += *(uchar_t *)w << 8;
341 #else
342 					sum += *(uchar_t *)w;
343 #endif
344 				}
345 				else
346 					pmlen = -1;
347 			}
348 		}
349 		/*
350 		 * Locate the next block with some data.
351 		 * If there is a word split across a boundary we
352 		 * will wrap to the top with mlen == -1 and
353 		 * then add it in shifted appropriately.
354 		 */
355 		offset = 0;
356 		if (! pmp) {
357 			for (; ; ) {
358 				if (mp == 0) {
359 					goto done;
360 				}
361 				if (mp_len(mp))
362 					break;
363 				mp = mp->b_cont;
364 			}
365 			dp = mp->b_datap;
366 			if (dp->db_struioflag & STRUIO_IP)
367 				psum = *(ushort_t *)dp->db_struioun.data;
368 		} else
369 			mp = pmp;
370 	}
371 done:
372 	/*
373 	 * Add together high and low parts of sum
374 	 * and carry to get cksum.
375 	 * Have to be careful to not drop the last
376 	 * carry here.
377 	 */
378 	sum = (sum & 0xFFFF) + (sum >> 16);
379 	sum = (sum & 0xFFFF) + (sum >> 16);
380 	TRACE_3(TR_FAC_IP, TR_IP_CKSUM_END,
381 	    "ip_cksum_end:(%S) type %d (%X)", "ip_cksum", 1, sum);
382 	return (sum);
383 }
384 
385 uint32_t
386 sctp_cksum(mblk_t *mp, int offset)
387 {
388 	uint32_t crc32;
389 	uchar_t *p = NULL;
390 
391 	crc32 = 0xFFFFFFFF;
392 	p = mp->b_rptr + offset;
393 	crc32 = sctp_crc32(crc32, p, mp->b_wptr - p);
394 	for (mp = mp->b_cont; mp != NULL; mp = mp->b_cont) {
395 		crc32 = sctp_crc32(crc32, mp->b_rptr, MBLKL(mp));
396 	}
397 
398 	/* Complement the result */
399 	crc32 = ~crc32;
400 
401 	return (crc32);
402 }
403 
404 /*
405  * Routine to compute Internet checksum (16-bit 1's complement) of a given
406  * Multidata packet descriptor.  As in the non-Multidata routine, this doesn't
407  * 1's complement the result, such that it may be used to compute partial
408  * checksums.  Since it works on buffer spans rather than mblks, this routine
409  * does not handle existing partial checksum value as in the STRUIO_IP special
410  * mblk case (supporting this is rather trivial, but is perhaps of no use at
411  * the moment unless synchronous streams and delayed checksum calculation are
412  * revived.)
413  *
414  * Note also here that the given Multidata packet descriptor must refer to
415  * a header buffer, i.e. it must have a header fragment.  In addition, the
416  * offset must lie within the boundary of the header fragment.  For the
417  * outbound tcp (MDT) case, this will not be an issue because the stack
418  * ensures that such conditions are met, and that there is no need whatsoever
419  * to compute partial checksums on an arbitrary offset that is not part of
420  * the header fragment.  We may need to revisit this routine to handle all
421  * cases of the inbound (MDR) case, especially when we need to perform partial
422  * checksum calculation due to padded bytes (non-zeroes) in the frame.
423  */
424 uint_t
425 ip_md_cksum(pdesc_t *pd, int offset, uint_t sum)
426 {
427 	pdescinfo_t	*pdi = &pd->pd_pdi;
428 	uchar_t		*reg_start, *reg_end;
429 	ssize_t		mlen, i;
430 	ushort_t	*w;
431 	boolean_t	byteleft = B_FALSE;
432 
433 	ASSERT((pdi->flags & PDESC_HAS_REF) != 0);
434 	ASSERT(pdi->hdr_rptr != NULL && pdi->hdr_wptr != NULL);
435 	ASSERT(offset <= PDESC_HDRL(pdi));
436 
437 	for (i = 0; i < pdi->pld_cnt + 1; i++) {
438 		if (i == 0) {
439 			reg_start = pdi->hdr_rptr;
440 			reg_end = pdi->hdr_wptr;
441 		} else {
442 			reg_start = pdi->pld_ary[i - 1].pld_rptr;
443 			reg_end = pdi->pld_ary[i - 1].pld_wptr;
444 			offset = 0;
445 		}
446 
447 		w = (ushort_t *)(reg_start + offset);
448 		mlen = reg_end - (uchar_t *)w;
449 
450 		if (mlen > 0 && byteleft) {
451 			/*
452 			 * There is a byte left from the last
453 			 * segment; add it into the checksum.
454 			 * Don't have to worry about a carry-
455 			 * out here because we make sure that
456 			 * high part of (32 bit) sum is small
457 			 * below.
458 			 */
459 #ifdef _LITTLE_ENDIAN
460 			sum += *(uchar_t *)w << 8;
461 #else
462 			sum += *(uchar_t *)w;
463 #endif
464 			w = (ushort_t *)((char *)w + 1);
465 			mlen--;
466 			byteleft = B_FALSE;
467 		}
468 
469 		if (mlen == 0)
470 			continue;
471 
472 		if (is_even(w)) {
473 			sum = ip_ocsum(w, mlen >> 1, sum);
474 			w += mlen >> 1;
475 			/*
476 			 * If we had an odd number of bytes,
477 			 * then the last byte goes in the high
478 			 * part of the sum, and we take the
479 			 * first byte to the low part of the sum
480 			 * the next time around the loop.
481 			 */
482 			if (is_odd(mlen)) {
483 #ifdef _LITTLE_ENDIAN
484 				sum += *(uchar_t *)w;
485 #else
486 				sum += *(uchar_t *)w << 8;
487 #endif
488 				byteleft = B_TRUE;
489 			}
490 		} else {
491 			ushort_t swsum;
492 #ifdef _LITTLE_ENDIAN
493 			sum += *(uchar_t *)w;
494 #else
495 			sum += *(uchar_t *)w << 8;
496 #endif
497 			mlen--;
498 			w = (ushort_t *)(1 + (uintptr_t)w);
499 
500 			/* Do a separate checksum and copy operation */
501 			swsum = ip_ocsum(w, mlen >> 1, 0);
502 			sum += ((swsum << 8) & 0xffff) | (swsum >> 8);
503 			w += mlen >> 1;
504 			/*
505 			 * If we had an even number of bytes,
506 			 * then the last byte goes in the low
507 			 * part of the sum.  Otherwise we had an
508 			 * odd number of bytes and we take the first
509 			 * byte to the low part of the sum the
510 			 * next time around the loop.
511 			 */
512 			if (is_odd(mlen)) {
513 #ifdef _LITTLE_ENDIAN
514 				sum += *(uchar_t *)w << 8;
515 #else
516 				sum += *(uchar_t *)w;
517 #endif
518 			} else {
519 				byteleft = B_TRUE;
520 			}
521 		}
522 	}
523 
524 	/*
525 	 * Add together high and low parts of sum and carry to get cksum.
526 	 * Have to be careful to not drop the last carry here.
527 	 */
528 	sum = (sum & 0xffff) + (sum >> 16);
529 	sum = (sum & 0xffff) + (sum >> 16);
530 
531 	return (sum);
532 }
533 
534 /* Return the IP checksum for the IP header at "iph". */
535 uint16_t
536 ip_csum_hdr(ipha_t *ipha)
537 {
538 	uint16_t	*uph;
539 	uint32_t	sum;
540 	int		opt_len;
541 
542 	opt_len = (ipha->ipha_version_and_hdr_length & 0xF) -
543 	    IP_SIMPLE_HDR_LENGTH_IN_WORDS;
544 	uph = (uint16_t *)ipha;
545 	sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] +
546 	    uph[5] + uph[6] + uph[7] + uph[8] + uph[9];
547 	if (opt_len > 0) {
548 		do {
549 			sum += uph[10];
550 			sum += uph[11];
551 			uph += 2;
552 		} while (--opt_len);
553 	}
554 	sum = (sum & 0xFFFF) + (sum >> 16);
555 	sum = ~(sum + (sum >> 16)) & 0xFFFF;
556 	if (sum == 0xffff)
557 		sum = 0;
558 	return ((uint16_t)sum);
559 }
560 
561 /*
562  * This function takes an mblk and IPv6 header as input and returns
563  * three pieces of information.
564  *
565  * 'hdr_length_ptr': The IPv6 header length including extension headers.
566  *
567  * 'nethdrpp': A pointer to the "next hedader" value, aka the
568  *             transport header. This argument may be set to NULL if
569  *             only the length is desired.
570  *
571  * return: Whether or not the header was malformed.
572  *
573  * This function assumes the IPv6 header along with all extensions are
574  * contained solely in this mblk: i.e., there is no b_cont walking.
575  */
576 boolean_t
577 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
578     uint8_t **nexthdrpp)
579 {
580 	uint16_t length;
581 	uint_t	ehdrlen;
582 	uint8_t	*nexthdrp;
583 	uint8_t *whereptr;
584 	uint8_t *endptr;
585 	ip6_dest_t *desthdr;
586 	ip6_rthdr_t *rthdr;
587 	ip6_frag_t *fraghdr;
588 
589 	if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION)
590 		return (B_FALSE);
591 	length = IPV6_HDR_LEN;
592 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
593 	endptr = mp->b_wptr;
594 
595 	nexthdrp = &ip6h->ip6_nxt;
596 	while (whereptr < endptr) {
597 		/* Is there enough left for len + nexthdr? */
598 		if (whereptr + MIN_EHDR_LEN > endptr)
599 			break;
600 
601 		switch (*nexthdrp) {
602 		case IPPROTO_HOPOPTS:
603 		case IPPROTO_DSTOPTS:
604 			/* Assumes the headers are identical for hbh and dst */
605 			desthdr = (ip6_dest_t *)whereptr;
606 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
607 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
608 				return (B_FALSE);
609 			nexthdrp = &desthdr->ip6d_nxt;
610 			break;
611 		case IPPROTO_ROUTING:
612 			rthdr = (ip6_rthdr_t *)whereptr;
613 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
614 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
615 				return (B_FALSE);
616 			nexthdrp = &rthdr->ip6r_nxt;
617 			break;
618 		case IPPROTO_FRAGMENT:
619 			fraghdr = (ip6_frag_t *)whereptr;
620 			ehdrlen = sizeof (ip6_frag_t);
621 			if ((uchar_t *)&fraghdr[1] > endptr)
622 				return (B_FALSE);
623 			nexthdrp = &fraghdr->ip6f_nxt;
624 			break;
625 		case IPPROTO_NONE:
626 			/* No next header means we're finished */
627 		default:
628 			*hdr_length_ptr = length;
629 
630 			if (nexthdrpp != NULL)
631 				*nexthdrpp = nexthdrp;
632 
633 			return (B_TRUE);
634 		}
635 		length += ehdrlen;
636 		whereptr += ehdrlen;
637 		*hdr_length_ptr = length;
638 
639 		if (nexthdrpp != NULL)
640 			*nexthdrpp = nexthdrp;
641 	}
642 	switch (*nexthdrp) {
643 	case IPPROTO_HOPOPTS:
644 	case IPPROTO_DSTOPTS:
645 	case IPPROTO_ROUTING:
646 	case IPPROTO_FRAGMENT:
647 		/*
648 		 * If any know extension headers are still to be processed,
649 		 * the packet's malformed (or at least all the IP header(s) are
650 		 * not in the same mblk - and that should never happen.
651 		 */
652 		return (B_FALSE);
653 
654 	default:
655 		/*
656 		 * If we get here, we know that all of the IP headers were in
657 		 * the same mblk, even if the ULP header is in the next mblk.
658 		 */
659 		*hdr_length_ptr = length;
660 
661 		if (nexthdrpp != NULL)
662 			*nexthdrpp = nexthdrp;
663 
664 		return (B_TRUE);
665 	}
666 }
667