xref: /illumos-gate/usr/src/uts/common/os/ip_cksum.c (revision 7b34a9a5df26271af0da06974fc361c468cd48d3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2019 Joyent, Inc.
25  */
26 /* Copyright (c) 1990 Mentat Inc. */
27 
28 #include <sys/types.h>
29 #include <sys/inttypes.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/strsun.h>
33 #include <sys/debug.h>
34 #include <sys/ddi.h>
35 #include <sys/vtrace.h>
36 #include <inet/sctp_crc32.h>
37 #include <inet/ip.h>
38 #include <inet/ip6.h>
39 
40 #include <sys/multidata.h>
41 #include <sys/multidata_impl.h>
42 
43 extern unsigned int 	ip_ocsum(ushort_t *address, int halfword_count,
44     unsigned int sum);
45 
46 /*
47  * Checksum routine for Internet Protocol family headers.
48  * This routine is very heavily used in the network
49  * code and should be modified for each CPU to be as fast as possible.
50  */
51 
52 #define	mp_len(mp) ((mp)->b_wptr - (mp)->b_rptr)
53 
54 /*
55  * Even/Odd checks. Usually it is performed on pointers but may be
56  * used on integers as well. uintptr_t is long enough to hold both
57  * integer and pointer.
58  */
59 #define	is_odd(p) (((uintptr_t)(p) & 0x1) != 0)
60 #define	is_even(p) (!is_odd(p))
61 
62 
63 #ifdef ZC_TEST
64 /*
65  * Disable the TCP s/w cksum.
66  * XXX - This is just a hack for testing purpose. Don't use it for
67  * anything else!
68  */
69 int noswcksum = 0;
70 #endif
71 /*
72  * Note: this does not ones-complement the result since it is used
73  * when computing partial checksums.
74  * For nonSTRUIO_IP mblks, assumes mp->b_rptr+offset is 16 bit aligned.
75  * For STRUIO_IP mblks, assumes mp->b_datap->db_struiobase is 16 bit aligned.
76  *
77  * Note: for STRUIO_IP special mblks some data may have been previously
78  *	 checksumed, this routine will handle additional data prefixed within
79  *	 an mblk or b_cont (chained) mblk(s). This routine will also handle
80  *	 suffixed b_cont mblk(s) and data suffixed within an mblk.
81  */
82 unsigned int
83 ip_cksum(mblk_t *mp, int offset, uint_t sum)
84 {
85 	ushort_t *w;
86 	ssize_t	mlen;
87 	int pmlen;
88 	mblk_t *pmp;
89 	dblk_t *dp = mp->b_datap;
90 	ushort_t psum = 0;
91 
92 #ifdef ZC_TEST
93 	if (noswcksum)
94 		return (0xffff);
95 #endif
96 	ASSERT(dp);
97 
98 	if (mp->b_cont == NULL) {
99 		/*
100 		 * May be fast-path, only one mblk.
101 		 */
102 		w = (ushort_t *)(mp->b_rptr + offset);
103 		if (dp->db_struioflag & STRUIO_IP) {
104 			/*
105 			 * Checksum any data not already done by
106 			 * the caller and add in any partial checksum.
107 			 */
108 			if ((offset > dp->db_cksumstart) ||
109 			    mp->b_wptr != (uchar_t *)(mp->b_rptr +
110 			    dp->db_cksumend)) {
111 				/*
112 				 * Mblk data pointers aren't inclusive
113 				 * of uio data, so disregard checksum.
114 				 *
115 				 * not using all of data in dblk make sure
116 				 * not use to use the precalculated checksum
117 				 * in this case.
118 				 */
119 				dp->db_struioflag &= ~STRUIO_IP;
120 				goto norm;
121 			}
122 			ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend));
123 			psum = *(ushort_t *)dp->db_struioun.data;
124 			if ((mlen = dp->db_cksumstart - offset) < 0)
125 				mlen = 0;
126 			if (is_odd(mlen))
127 				goto slow;
128 			if (mlen && dp->db_cksumstart != dp->db_cksumstuff &&
129 			    dp->db_cksumend != dp->db_cksumstuff) {
130 				/*
131 				 * There is prefix data to do and some uio
132 				 * data has already been checksumed and there
133 				 * is more uio data to do, so do the prefix
134 				 * data first, then do the remainder of the
135 				 * uio data.
136 				 */
137 				sum = ip_ocsum(w, mlen >> 1, sum);
138 				w = (ushort_t *)(mp->b_rptr +
139 				    dp->db_cksumstuff);
140 				if (is_odd(w)) {
141 					pmp = mp;
142 					goto slow1;
143 				}
144 				mlen = dp->db_cksumend - dp->db_cksumstuff;
145 			} else if (dp->db_cksumend != dp->db_cksumstuff) {
146 				/*
147 				 * There may be uio data to do, if there is
148 				 * prefix data to do then add in all of the
149 				 * uio data (if any) to do, else just do any
150 				 * uio data.
151 				 */
152 				if (mlen)
153 					mlen += dp->db_cksumend
154 					    - dp->db_cksumstuff;
155 				else {
156 					w = (ushort_t *)(mp->b_rptr +
157 					    dp->db_cksumstuff);
158 					if (is_odd(w))
159 						goto slow;
160 					mlen = dp->db_cksumend
161 					    - dp->db_cksumstuff;
162 				}
163 			} else if (mlen == 0)
164 				return (psum);
165 
166 			if (is_odd(mlen))
167 				goto slow;
168 			sum += psum;
169 		} else {
170 			/*
171 			 * Checksum all data not already done by the caller.
172 			 */
173 		norm:
174 			mlen = mp->b_wptr - (uchar_t *)w;
175 			if (is_odd(mlen))
176 				goto slow;
177 		}
178 		ASSERT(is_even(w));
179 		ASSERT(is_even(mlen));
180 		return (ip_ocsum(w, mlen >> 1, sum));
181 	}
182 	if (dp->db_struioflag & STRUIO_IP)
183 		psum = *(ushort_t *)dp->db_struioun.data;
184 slow:
185 	pmp = 0;
186 slow1:
187 	mlen = 0;
188 	pmlen = 0;
189 	for (; ; ) {
190 		/*
191 		 * Each trip around loop adds in word(s) from one mbuf segment
192 		 * (except for when pmp == mp, then its two partial trips).
193 		 */
194 		w = (ushort_t *)(mp->b_rptr + offset);
195 		if (pmp) {
196 			/*
197 			 * This is the second trip around for this mblk.
198 			 */
199 			pmp = 0;
200 			mlen = 0;
201 			goto douio;
202 		} else if (dp->db_struioflag & STRUIO_IP) {
203 			/*
204 			 * Checksum any data not already done by the
205 			 * caller and add in any partial checksum.
206 			 */
207 			if ((offset > dp->db_cksumstart) ||
208 			    mp->b_wptr != (uchar_t *)(mp->b_rptr +
209 			    dp->db_cksumend)) {
210 				/*
211 				 * Mblk data pointers aren't inclusive
212 				 * of uio data, so disregard checksum.
213 				 *
214 				 * not using all of data in dblk make sure
215 				 * not use to use the precalculated checksum
216 				 * in this case.
217 				 */
218 				dp->db_struioflag &= ~STRUIO_IP;
219 				goto snorm;
220 			}
221 			ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend));
222 			if ((mlen = dp->db_cksumstart - offset) < 0)
223 				mlen = 0;
224 			if (mlen && dp->db_cksumstart != dp->db_cksumstuff) {
225 				/*
226 				 * There is prefix data too do and some
227 				 * uio data has already been checksumed,
228 				 * so do the prefix data only this trip.
229 				 */
230 				pmp = mp;
231 			} else {
232 				/*
233 				 * Add in any partial cksum (if any) and
234 				 * do the remainder of the uio data.
235 				 */
236 				int odd;
237 			douio:
238 				odd = is_odd(dp->db_cksumstuff -
239 				    dp->db_cksumstart);
240 				if (pmlen == -1) {
241 					/*
242 					 * Previous mlen was odd, so swap
243 					 * the partial checksum bytes.
244 					 */
245 					sum += ((psum << 8) & 0xffff)
246 					    | (psum >> 8);
247 					if (odd)
248 						pmlen = 0;
249 				} else {
250 					sum += psum;
251 					if (odd)
252 						pmlen = -1;
253 				}
254 				if (dp->db_cksumend != dp->db_cksumstuff) {
255 					/*
256 					 * If prefix data to do and then all
257 					 * the uio data nees to be checksumed,
258 					 * else just do any uio data.
259 					 */
260 					if (mlen)
261 						mlen += dp->db_cksumend
262 						    - dp->db_cksumstuff;
263 					else {
264 						w = (ushort_t *)(mp->b_rptr +
265 						    dp->db_cksumstuff);
266 						mlen = dp->db_cksumend -
267 						    dp->db_cksumstuff;
268 					}
269 				}
270 			}
271 		} else {
272 			/*
273 			 * Checksum all of the mblk data.
274 			 */
275 		snorm:
276 			mlen = mp->b_wptr - (uchar_t *)w;
277 		}
278 
279 		mp = mp->b_cont;
280 		if (mlen > 0 && pmlen == -1) {
281 			/*
282 			 * There is a byte left from the last
283 			 * segment; add it into the checksum.
284 			 * Don't have to worry about a carry-
285 			 * out here because we make sure that
286 			 * high part of (32 bit) sum is small
287 			 * below.
288 			 */
289 #ifdef _LITTLE_ENDIAN
290 			sum += *(uchar_t *)w << 8;
291 #else
292 			sum += *(uchar_t *)w;
293 #endif
294 			w = (ushort_t *)((char *)w + 1);
295 			mlen--;
296 			pmlen = 0;
297 		}
298 		if (mlen > 0) {
299 			if (is_even(w)) {
300 				sum = ip_ocsum(w, mlen>>1, sum);
301 				w += mlen>>1;
302 				/*
303 				 * If we had an odd number of bytes,
304 				 * then the last byte goes in the high
305 				 * part of the sum, and we take the
306 				 * first byte to the low part of the sum
307 				 * the next time around the loop.
308 				 */
309 				if (is_odd(mlen)) {
310 #ifdef _LITTLE_ENDIAN
311 					sum += *(uchar_t *)w;
312 #else
313 					sum += *(uchar_t *)w << 8;
314 #endif
315 					pmlen = -1;
316 				}
317 			} else {
318 				ushort_t swsum;
319 #ifdef _LITTLE_ENDIAN
320 				sum += *(uchar_t *)w;
321 #else
322 				sum += *(uchar_t *)w << 8;
323 #endif
324 				mlen--;
325 				w = (ushort_t *)(1 + (uintptr_t)w);
326 
327 				/* Do a separate checksum and copy operation */
328 				swsum = ip_ocsum(w, mlen>>1, 0);
329 				sum += ((swsum << 8) & 0xffff) | (swsum >> 8);
330 				w += mlen>>1;
331 				/*
332 				 * If we had an even number of bytes,
333 				 * then the last byte goes in the low
334 				 * part of the sum.  Otherwise we had an
335 				 * odd number of bytes and we take the first
336 				 * byte to the low part of the sum the
337 				 * next time around the loop.
338 				 */
339 				if (is_odd(mlen)) {
340 #ifdef _LITTLE_ENDIAN
341 					sum += *(uchar_t *)w << 8;
342 #else
343 					sum += *(uchar_t *)w;
344 #endif
345 				}
346 				else
347 					pmlen = -1;
348 			}
349 		}
350 		/*
351 		 * Locate the next block with some data.
352 		 * If there is a word split across a boundary we
353 		 * will wrap to the top with mlen == -1 and
354 		 * then add it in shifted appropriately.
355 		 */
356 		offset = 0;
357 		if (! pmp) {
358 			for (; ; ) {
359 				if (mp == 0) {
360 					goto done;
361 				}
362 				if (mp_len(mp))
363 					break;
364 				mp = mp->b_cont;
365 			}
366 			dp = mp->b_datap;
367 			if (dp->db_struioflag & STRUIO_IP)
368 				psum = *(ushort_t *)dp->db_struioun.data;
369 		} else
370 			mp = pmp;
371 	}
372 done:
373 	/*
374 	 * Add together high and low parts of sum
375 	 * and carry to get cksum.
376 	 * Have to be careful to not drop the last
377 	 * carry here.
378 	 */
379 	sum = (sum & 0xFFFF) + (sum >> 16);
380 	sum = (sum & 0xFFFF) + (sum >> 16);
381 	TRACE_3(TR_FAC_IP, TR_IP_CKSUM_END,
382 	    "ip_cksum_end:(%S) type %d (%X)", "ip_cksum", 1, sum);
383 	return (sum);
384 }
385 
386 uint32_t
387 sctp_cksum(mblk_t *mp, int offset)
388 {
389 	uint32_t crc32;
390 	uchar_t *p = NULL;
391 
392 	crc32 = 0xFFFFFFFF;
393 	p = mp->b_rptr + offset;
394 	crc32 = sctp_crc32(crc32, p, mp->b_wptr - p);
395 	for (mp = mp->b_cont; mp != NULL; mp = mp->b_cont) {
396 		crc32 = sctp_crc32(crc32, mp->b_rptr, MBLKL(mp));
397 	}
398 
399 	/* Complement the result */
400 	crc32 = ~crc32;
401 
402 	return (crc32);
403 }
404 
405 /*
406  * Routine to compute Internet checksum (16-bit 1's complement) of a given
407  * Multidata packet descriptor.  As in the non-Multidata routine, this doesn't
408  * 1's complement the result, such that it may be used to compute partial
409  * checksums.  Since it works on buffer spans rather than mblks, this routine
410  * does not handle existing partial checksum value as in the STRUIO_IP special
411  * mblk case (supporting this is rather trivial, but is perhaps of no use at
412  * the moment unless synchronous streams and delayed checksum calculation are
413  * revived.)
414  *
415  * Note also here that the given Multidata packet descriptor must refer to
416  * a header buffer, i.e. it must have a header fragment.  In addition, the
417  * offset must lie within the boundary of the header fragment.  For the
418  * outbound tcp (MDT) case, this will not be an issue because the stack
419  * ensures that such conditions are met, and that there is no need whatsoever
420  * to compute partial checksums on an arbitrary offset that is not part of
421  * the header fragment.  We may need to revisit this routine to handle all
422  * cases of the inbound (MDR) case, especially when we need to perform partial
423  * checksum calculation due to padded bytes (non-zeroes) in the frame.
424  */
425 uint_t
426 ip_md_cksum(pdesc_t *pd, int offset, uint_t sum)
427 {
428 	pdescinfo_t	*pdi = &pd->pd_pdi;
429 	uchar_t		*reg_start, *reg_end;
430 	ssize_t		mlen, i;
431 	ushort_t	*w;
432 	boolean_t	byteleft = B_FALSE;
433 
434 	ASSERT((pdi->flags & PDESC_HAS_REF) != 0);
435 	ASSERT(pdi->hdr_rptr != NULL && pdi->hdr_wptr != NULL);
436 	ASSERT(offset <= PDESC_HDRL(pdi));
437 
438 	for (i = 0; i < pdi->pld_cnt + 1; i++) {
439 		if (i == 0) {
440 			reg_start = pdi->hdr_rptr;
441 			reg_end = pdi->hdr_wptr;
442 		} else {
443 			reg_start = pdi->pld_ary[i - 1].pld_rptr;
444 			reg_end = pdi->pld_ary[i - 1].pld_wptr;
445 			offset = 0;
446 		}
447 
448 		w = (ushort_t *)(reg_start + offset);
449 		mlen = reg_end - (uchar_t *)w;
450 
451 		if (mlen > 0 && byteleft) {
452 			/*
453 			 * There is a byte left from the last
454 			 * segment; add it into the checksum.
455 			 * Don't have to worry about a carry-
456 			 * out here because we make sure that
457 			 * high part of (32 bit) sum is small
458 			 * below.
459 			 */
460 #ifdef _LITTLE_ENDIAN
461 			sum += *(uchar_t *)w << 8;
462 #else
463 			sum += *(uchar_t *)w;
464 #endif
465 			w = (ushort_t *)((char *)w + 1);
466 			mlen--;
467 			byteleft = B_FALSE;
468 		}
469 
470 		if (mlen == 0)
471 			continue;
472 
473 		if (is_even(w)) {
474 			sum = ip_ocsum(w, mlen >> 1, sum);
475 			w += mlen >> 1;
476 			/*
477 			 * If we had an odd number of bytes,
478 			 * then the last byte goes in the high
479 			 * part of the sum, and we take the
480 			 * first byte to the low part of the sum
481 			 * the next time around the loop.
482 			 */
483 			if (is_odd(mlen)) {
484 #ifdef _LITTLE_ENDIAN
485 				sum += *(uchar_t *)w;
486 #else
487 				sum += *(uchar_t *)w << 8;
488 #endif
489 				byteleft = B_TRUE;
490 			}
491 		} else {
492 			ushort_t swsum;
493 #ifdef _LITTLE_ENDIAN
494 			sum += *(uchar_t *)w;
495 #else
496 			sum += *(uchar_t *)w << 8;
497 #endif
498 			mlen--;
499 			w = (ushort_t *)(1 + (uintptr_t)w);
500 
501 			/* Do a separate checksum and copy operation */
502 			swsum = ip_ocsum(w, mlen >> 1, 0);
503 			sum += ((swsum << 8) & 0xffff) | (swsum >> 8);
504 			w += mlen >> 1;
505 			/*
506 			 * If we had an even number of bytes,
507 			 * then the last byte goes in the low
508 			 * part of the sum.  Otherwise we had an
509 			 * odd number of bytes and we take the first
510 			 * byte to the low part of the sum the
511 			 * next time around the loop.
512 			 */
513 			if (is_odd(mlen)) {
514 #ifdef _LITTLE_ENDIAN
515 				sum += *(uchar_t *)w << 8;
516 #else
517 				sum += *(uchar_t *)w;
518 #endif
519 			} else {
520 				byteleft = B_TRUE;
521 			}
522 		}
523 	}
524 
525 	/*
526 	 * Add together high and low parts of sum and carry to get cksum.
527 	 * Have to be careful to not drop the last carry here.
528 	 */
529 	sum = (sum & 0xffff) + (sum >> 16);
530 	sum = (sum & 0xffff) + (sum >> 16);
531 
532 	return (sum);
533 }
534 
535 /* Return the IP checksum for the IP header at "iph". */
536 uint16_t
537 ip_csum_hdr(ipha_t *ipha)
538 {
539 	uint16_t	*uph;
540 	uint32_t	sum;
541 	int		opt_len;
542 
543 	opt_len = (ipha->ipha_version_and_hdr_length & 0xF) -
544 	    IP_SIMPLE_HDR_LENGTH_IN_WORDS;
545 	uph = (uint16_t *)ipha;
546 	sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] +
547 	    uph[5] + uph[6] + uph[7] + uph[8] + uph[9];
548 	if (opt_len > 0) {
549 		do {
550 			sum += uph[10];
551 			sum += uph[11];
552 			uph += 2;
553 		} while (--opt_len);
554 	}
555 	sum = (sum & 0xFFFF) + (sum >> 16);
556 	sum = ~(sum + (sum >> 16)) & 0xFFFF;
557 	if (sum == 0xffff)
558 		sum = 0;
559 	return ((uint16_t)sum);
560 }
561 
562 /*
563  * This function takes an mblk and IPv6 header as input and returns
564  * three pieces of information.
565  *
566  * 'hdr_length_ptr': The IPv6 header length including extension headers.
567  *
568  * 'nethdrpp': A pointer to the "next hedader" value, aka the
569  *             transport header. This argument may be set to NULL if
570  *             only the length is desired.
571  *
572  * return: Whether or not the header was malformed.
573  *
574  * This function assumes the IPv6 header along with all extensions are
575  * contained solely in this mblk: i.e., there is no b_cont walking.
576  */
577 boolean_t
578 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
579     uint8_t **nexthdrpp)
580 {
581 	uint16_t length;
582 	uint_t	ehdrlen;
583 	uint8_t	*nexthdrp;
584 	uint8_t *whereptr;
585 	uint8_t *endptr;
586 	ip6_dest_t *desthdr;
587 	ip6_rthdr_t *rthdr;
588 	ip6_frag_t *fraghdr;
589 
590 	ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
591 	length = IPV6_HDR_LEN;
592 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
593 	endptr = mp->b_wptr;
594 
595 	nexthdrp = &ip6h->ip6_nxt;
596 	while (whereptr < endptr) {
597 		/* Is there enough left for len + nexthdr? */
598 		if (whereptr + MIN_EHDR_LEN > endptr)
599 			break;
600 
601 		switch (*nexthdrp) {
602 		case IPPROTO_HOPOPTS:
603 		case IPPROTO_DSTOPTS:
604 			/* Assumes the headers are identical for hbh and dst */
605 			desthdr = (ip6_dest_t *)whereptr;
606 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
607 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
608 				return (B_FALSE);
609 			nexthdrp = &desthdr->ip6d_nxt;
610 			break;
611 		case IPPROTO_ROUTING:
612 			rthdr = (ip6_rthdr_t *)whereptr;
613 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
614 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
615 				return (B_FALSE);
616 			nexthdrp = &rthdr->ip6r_nxt;
617 			break;
618 		case IPPROTO_FRAGMENT:
619 			fraghdr = (ip6_frag_t *)whereptr;
620 			ehdrlen = sizeof (ip6_frag_t);
621 			if ((uchar_t *)&fraghdr[1] > endptr)
622 				return (B_FALSE);
623 			nexthdrp = &fraghdr->ip6f_nxt;
624 			break;
625 		case IPPROTO_NONE:
626 			/* No next header means we're finished */
627 		default:
628 			*hdr_length_ptr = length;
629 
630 			if (nexthdrpp != NULL)
631 				*nexthdrpp = nexthdrp;
632 
633 			return (B_TRUE);
634 		}
635 		length += ehdrlen;
636 		whereptr += ehdrlen;
637 		*hdr_length_ptr = length;
638 
639 		if (nexthdrpp != NULL)
640 			*nexthdrpp = nexthdrp;
641 	}
642 	switch (*nexthdrp) {
643 	case IPPROTO_HOPOPTS:
644 	case IPPROTO_DSTOPTS:
645 	case IPPROTO_ROUTING:
646 	case IPPROTO_FRAGMENT:
647 		/*
648 		 * If any know extension headers are still to be processed,
649 		 * the packet's malformed (or at least all the IP header(s) are
650 		 * not in the same mblk - and that should never happen.
651 		 */
652 		return (B_FALSE);
653 
654 	default:
655 		/*
656 		 * If we get here, we know that all of the IP headers were in
657 		 * the same mblk, even if the ULP header is in the next mblk.
658 		 */
659 		*hdr_length_ptr = length;
660 
661 		if (nexthdrpp != NULL)
662 			*nexthdrpp = nexthdrp;
663 
664 		return (B_TRUE);
665 	}
666 }
667