xref: /illumos-gate/usr/src/uts/common/os/ip_cksum.c (revision 18d738ddd2d0f4a4b4d5b1939e627aacd420b59d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2021 Joyent, Inc.
25  * Copyright 2022 Garrett D'Amore
26  */
27 /* Copyright (c) 1990 Mentat Inc. */
28 
29 #include <sys/types.h>
30 #include <sys/inttypes.h>
31 #include <sys/systm.h>
32 #include <sys/stream.h>
33 #include <sys/strsun.h>
34 #include <sys/debug.h>
35 #include <sys/ddi.h>
36 #include <sys/vtrace.h>
37 #include <inet/sctp_crc32.h>
38 #include <inet/ip.h>
39 #include <inet/ip6.h>
40 
41 extern unsigned int ip_ocsum(ushort_t *, int, unsigned int);
42 
43 /*
44  * Checksum routine for Internet Protocol family headers.
45  * This routine is very heavily used in the network
46  * code and should be modified for each CPU to be as fast as possible.
47  */
48 
49 #define	mp_len(mp) ((mp)->b_wptr - (mp)->b_rptr)
50 
51 /*
52  * Even/Odd checks. Usually it is performed on pointers but may be
53  * used on integers as well. uintptr_t is long enough to hold both
54  * integer and pointer.
55  */
56 #define	is_odd(p) (((uintptr_t)(p) & 0x1) != 0)
57 #define	is_even(p) (!is_odd(p))
58 
59 
60 #ifdef ZC_TEST
61 /*
62  * Disable the TCP s/w cksum.
63  * XXX - This is just a hack for testing purpose. Don't use it for
64  * anything else!
65  */
66 int noswcksum = 0;
67 #endif
68 /*
69  * Note: this does not ones-complement the result since it is used
70  * when computing partial checksums.
71  * For nonSTRUIO_IP mblks, assumes mp->b_rptr+offset is 16 bit aligned.
72  * For STRUIO_IP mblks, assumes mp->b_datap->db_struiobase is 16 bit aligned.
73  *
74  * Note: for STRUIO_IP special mblks some data may have been previously
75  *	 checksumed, this routine will handle additional data prefixed within
76  *	 an mblk or b_cont (chained) mblk(s). This routine will also handle
77  *	 suffixed b_cont mblk(s) and data suffixed within an mblk.
78  */
79 unsigned int
80 ip_cksum(mblk_t *mp, int offset, uint_t sum)
81 {
82 	ushort_t *w;
83 	ssize_t	mlen;
84 	int pmlen;
85 	mblk_t *pmp;
86 	dblk_t *dp = mp->b_datap;
87 	ushort_t psum = 0;
88 
89 #ifdef ZC_TEST
90 	if (noswcksum)
91 		return (0xffff);
92 #endif
93 	ASSERT(dp);
94 
95 	if (mp->b_cont == NULL) {
96 		/*
97 		 * May be fast-path, only one mblk.
98 		 */
99 		w = (ushort_t *)(mp->b_rptr + offset);
100 		if (dp->db_struioflag & STRUIO_IP) {
101 			/*
102 			 * Checksum any data not already done by
103 			 * the caller and add in any partial checksum.
104 			 */
105 			if ((offset > dp->db_cksumstart) ||
106 			    mp->b_wptr != (uchar_t *)(mp->b_rptr +
107 			    dp->db_cksumend)) {
108 				/*
109 				 * Mblk data pointers aren't inclusive
110 				 * of uio data, so disregard checksum.
111 				 *
112 				 * not using all of data in dblk make sure
113 				 * not use to use the precalculated checksum
114 				 * in this case.
115 				 */
116 				dp->db_struioflag &= ~STRUIO_IP;
117 				goto norm;
118 			}
119 			ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend));
120 			psum = *(ushort_t *)dp->db_struioun.data;
121 			if ((mlen = dp->db_cksumstart - offset) < 0)
122 				mlen = 0;
123 			if (is_odd(mlen))
124 				goto slow;
125 			if (mlen && dp->db_cksumstart != dp->db_cksumstuff &&
126 			    dp->db_cksumend != dp->db_cksumstuff) {
127 				/*
128 				 * There is prefix data to do and some uio
129 				 * data has already been checksumed and there
130 				 * is more uio data to do, so do the prefix
131 				 * data first, then do the remainder of the
132 				 * uio data.
133 				 */
134 				sum = ip_ocsum(w, mlen >> 1, sum);
135 				w = (ushort_t *)(mp->b_rptr +
136 				    dp->db_cksumstuff);
137 				if (is_odd(w)) {
138 					pmp = mp;
139 					goto slow1;
140 				}
141 				mlen = dp->db_cksumend - dp->db_cksumstuff;
142 			} else if (dp->db_cksumend != dp->db_cksumstuff) {
143 				/*
144 				 * There may be uio data to do, if there is
145 				 * prefix data to do then add in all of the
146 				 * uio data (if any) to do, else just do any
147 				 * uio data.
148 				 */
149 				if (mlen)
150 					mlen += dp->db_cksumend
151 					    - dp->db_cksumstuff;
152 				else {
153 					w = (ushort_t *)(mp->b_rptr +
154 					    dp->db_cksumstuff);
155 					if (is_odd(w))
156 						goto slow;
157 					mlen = dp->db_cksumend
158 					    - dp->db_cksumstuff;
159 				}
160 			} else if (mlen == 0)
161 				return (psum);
162 
163 			if (is_odd(mlen))
164 				goto slow;
165 			sum += psum;
166 		} else {
167 			/*
168 			 * Checksum all data not already done by the caller.
169 			 */
170 		norm:
171 			mlen = mp->b_wptr - (uchar_t *)w;
172 			if (is_odd(mlen))
173 				goto slow;
174 		}
175 		ASSERT(is_even(w));
176 		ASSERT(is_even(mlen));
177 		return (ip_ocsum(w, mlen >> 1, sum));
178 	}
179 	if (dp->db_struioflag & STRUIO_IP)
180 		psum = *(ushort_t *)dp->db_struioun.data;
181 slow:
182 	pmp = 0;
183 slow1:
184 	mlen = 0;
185 	pmlen = 0;
186 	for (; ; ) {
187 		/*
188 		 * Each trip around loop adds in word(s) from one mbuf segment
189 		 * (except for when pmp == mp, then its two partial trips).
190 		 */
191 		w = (ushort_t *)(mp->b_rptr + offset);
192 		if (pmp) {
193 			/*
194 			 * This is the second trip around for this mblk.
195 			 */
196 			pmp = 0;
197 			mlen = 0;
198 			goto douio;
199 		} else if (dp->db_struioflag & STRUIO_IP) {
200 			/*
201 			 * Checksum any data not already done by the
202 			 * caller and add in any partial checksum.
203 			 */
204 			if ((offset > dp->db_cksumstart) ||
205 			    mp->b_wptr != (uchar_t *)(mp->b_rptr +
206 			    dp->db_cksumend)) {
207 				/*
208 				 * Mblk data pointers aren't inclusive
209 				 * of uio data, so disregard checksum.
210 				 *
211 				 * not using all of data in dblk make sure
212 				 * not use to use the precalculated checksum
213 				 * in this case.
214 				 */
215 				dp->db_struioflag &= ~STRUIO_IP;
216 				goto snorm;
217 			}
218 			ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend));
219 			if ((mlen = dp->db_cksumstart - offset) < 0)
220 				mlen = 0;
221 			if (mlen && dp->db_cksumstart != dp->db_cksumstuff) {
222 				/*
223 				 * There is prefix data too do and some
224 				 * uio data has already been checksumed,
225 				 * so do the prefix data only this trip.
226 				 */
227 				pmp = mp;
228 			} else {
229 				/*
230 				 * Add in any partial cksum (if any) and
231 				 * do the remainder of the uio data.
232 				 */
233 				int odd;
234 			douio:
235 				odd = is_odd(dp->db_cksumstuff -
236 				    dp->db_cksumstart);
237 				if (pmlen == -1) {
238 					/*
239 					 * Previous mlen was odd, so swap
240 					 * the partial checksum bytes.
241 					 */
242 					sum += ((psum << 8) & 0xffff)
243 					    | (psum >> 8);
244 					if (odd)
245 						pmlen = 0;
246 				} else {
247 					sum += psum;
248 					if (odd)
249 						pmlen = -1;
250 				}
251 				if (dp->db_cksumend != dp->db_cksumstuff) {
252 					/*
253 					 * If prefix data to do and then all
254 					 * the uio data nees to be checksumed,
255 					 * else just do any uio data.
256 					 */
257 					if (mlen)
258 						mlen += dp->db_cksumend
259 						    - dp->db_cksumstuff;
260 					else {
261 						w = (ushort_t *)(mp->b_rptr +
262 						    dp->db_cksumstuff);
263 						mlen = dp->db_cksumend -
264 						    dp->db_cksumstuff;
265 					}
266 				}
267 			}
268 		} else {
269 			/*
270 			 * Checksum all of the mblk data.
271 			 */
272 		snorm:
273 			mlen = mp->b_wptr - (uchar_t *)w;
274 		}
275 
276 		mp = mp->b_cont;
277 		if (mlen > 0 && pmlen == -1) {
278 			/*
279 			 * There is a byte left from the last
280 			 * segment; add it into the checksum.
281 			 * Don't have to worry about a carry-
282 			 * out here because we make sure that
283 			 * high part of (32 bit) sum is small
284 			 * below.
285 			 */
286 #ifdef _LITTLE_ENDIAN
287 			sum += *(uchar_t *)w << 8;
288 #else
289 			sum += *(uchar_t *)w;
290 #endif
291 			w = (ushort_t *)((char *)w + 1);
292 			mlen--;
293 			pmlen = 0;
294 		}
295 		if (mlen > 0) {
296 			if (is_even(w)) {
297 				sum = ip_ocsum(w, mlen>>1, sum);
298 				w += mlen>>1;
299 				/*
300 				 * If we had an odd number of bytes,
301 				 * then the last byte goes in the high
302 				 * part of the sum, and we take the
303 				 * first byte to the low part of the sum
304 				 * the next time around the loop.
305 				 */
306 				if (is_odd(mlen)) {
307 #ifdef _LITTLE_ENDIAN
308 					sum += *(uchar_t *)w;
309 #else
310 					sum += *(uchar_t *)w << 8;
311 #endif
312 					pmlen = -1;
313 				}
314 			} else {
315 				ushort_t swsum;
316 #ifdef _LITTLE_ENDIAN
317 				sum += *(uchar_t *)w;
318 #else
319 				sum += *(uchar_t *)w << 8;
320 #endif
321 				mlen--;
322 				w = (ushort_t *)(1 + (uintptr_t)w);
323 
324 				/* Do a separate checksum and copy operation */
325 				swsum = ip_ocsum(w, mlen>>1, 0);
326 				sum += ((swsum << 8) & 0xffff) | (swsum >> 8);
327 				w += mlen>>1;
328 				/*
329 				 * If we had an even number of bytes,
330 				 * then the last byte goes in the low
331 				 * part of the sum.  Otherwise we had an
332 				 * odd number of bytes and we take the first
333 				 * byte to the low part of the sum the
334 				 * next time around the loop.
335 				 */
336 				if (is_odd(mlen)) {
337 #ifdef _LITTLE_ENDIAN
338 					sum += *(uchar_t *)w << 8;
339 #else
340 					sum += *(uchar_t *)w;
341 #endif
342 				}
343 				else
344 					pmlen = -1;
345 			}
346 		}
347 		/*
348 		 * Locate the next block with some data.
349 		 * If there is a word split across a boundary we
350 		 * will wrap to the top with mlen == -1 and
351 		 * then add it in shifted appropriately.
352 		 */
353 		offset = 0;
354 		if (! pmp) {
355 			for (; ; ) {
356 				if (mp == 0) {
357 					goto done;
358 				}
359 				if (mp_len(mp))
360 					break;
361 				mp = mp->b_cont;
362 			}
363 			dp = mp->b_datap;
364 			if (dp->db_struioflag & STRUIO_IP)
365 				psum = *(ushort_t *)dp->db_struioun.data;
366 		} else
367 			mp = pmp;
368 	}
369 done:
370 	/*
371 	 * Add together high and low parts of sum
372 	 * and carry to get cksum.
373 	 * Have to be careful to not drop the last
374 	 * carry here.
375 	 */
376 	sum = (sum & 0xFFFF) + (sum >> 16);
377 	sum = (sum & 0xFFFF) + (sum >> 16);
378 	TRACE_3(TR_FAC_IP, TR_IP_CKSUM_END,
379 	    "ip_cksum_end:(%S) type %d (%X)", "ip_cksum", 1, sum);
380 	return (sum);
381 }
382 
383 uint32_t
384 sctp_cksum(mblk_t *mp, int offset)
385 {
386 	uint32_t crc32;
387 	uchar_t *p = NULL;
388 
389 	crc32 = 0xFFFFFFFF;
390 	p = mp->b_rptr + offset;
391 	crc32 = sctp_crc32(crc32, p, mp->b_wptr - p);
392 	for (mp = mp->b_cont; mp != NULL; mp = mp->b_cont) {
393 		crc32 = sctp_crc32(crc32, mp->b_rptr, MBLKL(mp));
394 	}
395 
396 	/* Complement the result */
397 	crc32 = ~crc32;
398 
399 	return (crc32);
400 }
401 
402 /* Return the IP checksum for the IP header at "iph". */
403 uint16_t
404 ip_csum_hdr(ipha_t *ipha)
405 {
406 	uint16_t	*uph;
407 	uint32_t	sum;
408 	int		opt_len;
409 
410 	opt_len = (ipha->ipha_version_and_hdr_length & 0xF) -
411 	    IP_SIMPLE_HDR_LENGTH_IN_WORDS;
412 	uph = (uint16_t *)ipha;
413 	sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] +
414 	    uph[5] + uph[6] + uph[7] + uph[8] + uph[9];
415 	if (opt_len > 0) {
416 		do {
417 			sum += uph[10];
418 			sum += uph[11];
419 			uph += 2;
420 		} while (--opt_len);
421 	}
422 	sum = (sum & 0xFFFF) + (sum >> 16);
423 	sum = ~(sum + (sum >> 16)) & 0xFFFF;
424 	if (sum == 0xffff)
425 		sum = 0;
426 	return ((uint16_t)sum);
427 }
428 
429 /*
430  * This function takes an mblk and IPv6 header as input and returns
431  * three pieces of information.
432  *
433  * 'hdr_length_ptr': The IPv6 header length including extension headers.
434  *
435  * 'nethdrpp': A pointer to the "next hedader" value, aka the
436  *             transport header. This argument may be set to NULL if
437  *             only the length is desired.
438  *
439  * return: Whether or not the header was malformed.
440  *
441  * This function assumes the IPv6 header along with all extensions are
442  * contained solely in this mblk: i.e., there is no b_cont walking.
443  */
444 boolean_t
445 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
446     uint8_t **nexthdrpp)
447 {
448 	uint16_t length;
449 	uint_t	ehdrlen;
450 	uint8_t	*nexthdrp;
451 	uint8_t *whereptr;
452 	uint8_t *endptr;
453 	ip6_dest_t *desthdr;
454 	ip6_rthdr_t *rthdr;
455 	ip6_frag_t *fraghdr;
456 
457 	if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION)
458 		return (B_FALSE);
459 	length = IPV6_HDR_LEN;
460 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
461 	endptr = mp->b_wptr;
462 
463 	nexthdrp = &ip6h->ip6_nxt;
464 	while (whereptr < endptr) {
465 		/* Is there enough left for len + nexthdr? */
466 		if (whereptr + MIN_EHDR_LEN > endptr)
467 			break;
468 
469 		switch (*nexthdrp) {
470 		case IPPROTO_HOPOPTS:
471 		case IPPROTO_DSTOPTS:
472 			/* Assumes the headers are identical for hbh and dst */
473 			desthdr = (ip6_dest_t *)whereptr;
474 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
475 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
476 				return (B_FALSE);
477 			nexthdrp = &desthdr->ip6d_nxt;
478 			break;
479 		case IPPROTO_ROUTING:
480 			rthdr = (ip6_rthdr_t *)whereptr;
481 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
482 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
483 				return (B_FALSE);
484 			nexthdrp = &rthdr->ip6r_nxt;
485 			break;
486 		case IPPROTO_FRAGMENT:
487 			fraghdr = (ip6_frag_t *)whereptr;
488 			ehdrlen = sizeof (ip6_frag_t);
489 			if ((uchar_t *)&fraghdr[1] > endptr)
490 				return (B_FALSE);
491 			nexthdrp = &fraghdr->ip6f_nxt;
492 			break;
493 		case IPPROTO_NONE:
494 			/* No next header means we're finished */
495 		default:
496 			*hdr_length_ptr = length;
497 
498 			if (nexthdrpp != NULL)
499 				*nexthdrpp = nexthdrp;
500 
501 			return (B_TRUE);
502 		}
503 		length += ehdrlen;
504 		whereptr += ehdrlen;
505 		*hdr_length_ptr = length;
506 
507 		if (nexthdrpp != NULL)
508 			*nexthdrpp = nexthdrp;
509 	}
510 	switch (*nexthdrp) {
511 	case IPPROTO_HOPOPTS:
512 	case IPPROTO_DSTOPTS:
513 	case IPPROTO_ROUTING:
514 	case IPPROTO_FRAGMENT:
515 		/*
516 		 * If any know extension headers are still to be processed,
517 		 * the packet's malformed (or at least all the IP header(s) are
518 		 * not in the same mblk - and that should never happen.
519 		 */
520 		return (B_FALSE);
521 
522 	default:
523 		/*
524 		 * If we get here, we know that all of the IP headers were in
525 		 * the same mblk, even if the ULP header is in the next mblk.
526 		 */
527 		*hdr_length_ptr = length;
528 
529 		if (nexthdrpp != NULL)
530 			*nexthdrpp = nexthdrp;
531 
532 		return (B_TRUE);
533 	}
534 }
535