xref: /illumos-gate/usr/src/uts/common/os/ip_cksum.c (revision b424305435881ac456a9343be2898f1f86440f31)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/inttypes.h>
29 #include <sys/systm.h>
30 #include <sys/stream.h>
31 #include <sys/strsun.h>
32 #include <sys/debug.h>
33 #include <sys/ddi.h>
34 #include <sys/vtrace.h>
35 #include <inet/sctp_crc32.h>
36 #include <inet/ip.h>
37 
38 #include <sys/multidata.h>
39 #include <sys/multidata_impl.h>
40 
41 extern unsigned int 	ip_ocsum(ushort_t *address, int halfword_count,
42     unsigned int sum);
43 
44 /*
45  * Checksum routine for Internet Protocol family headers.
46  * This routine is very heavily used in the network
47  * code and should be modified for each CPU to be as fast as possible.
48  */
49 
50 #define	mp_len(mp) ((mp)->b_wptr - (mp)->b_rptr)
51 
52 /*
53  * Even/Odd checks. Usually it is performed on pointers but may be
54  * used on integers as well. uintptr_t is long enough to hold both
55  * integer and pointer.
56  */
57 #define	is_odd(p) (((uintptr_t)(p) & 0x1) != 0)
58 #define	is_even(p) (!is_odd(p))
59 
60 
61 #ifdef ZC_TEST
62 /*
63  * Disable the TCP s/w cksum.
64  * XXX - This is just a hack for testing purpose. Don't use it for
65  * anything else!
66  */
67 int noswcksum = 0;
68 #endif
69 /*
70  * Note: this does not ones-complement the result since it is used
71  * when computing partial checksums.
72  * For nonSTRUIO_IP mblks, assumes mp->b_rptr+offset is 16 bit aligned.
73  * For STRUIO_IP mblks, assumes mp->b_datap->db_struiobase is 16 bit aligned.
74  *
75  * Note: for STRUIO_IP special mblks some data may have been previously
76  *	 checksumed, this routine will handle additional data prefixed within
77  *	 an mblk or b_cont (chained) mblk(s). This routine will also handle
78  *	 suffixed b_cont mblk(s) and data suffixed within an mblk.
79  */
80 unsigned int
81 ip_cksum(mblk_t *mp, int offset, uint_t sum)
82 {
83 	ushort_t *w;
84 	ssize_t	mlen;
85 	int pmlen;
86 	mblk_t *pmp;
87 	dblk_t *dp = mp->b_datap;
88 	ushort_t psum = 0;
89 
90 #ifdef ZC_TEST
91 	if (noswcksum)
92 		return (0xffff);
93 #endif
94 	ASSERT(dp);
95 
96 	TRACE_2(TR_FAC_IP, TR_IP_CKSUM_START,
97 	    "ip_cksum_start:%p (%X)", mp, sum);
98 
99 	if (mp->b_cont == NULL) {
100 		/*
101 		 * May be fast-path, only one mblk.
102 		 */
103 		w = (ushort_t *)(mp->b_rptr + offset);
104 		if (dp->db_struioflag & STRUIO_IP) {
105 			/*
106 			 * Checksum any data not already done by
107 			 * the caller and add in any partial checksum.
108 			 */
109 			if ((offset > dp->db_cksumstart) ||
110 			    mp->b_wptr != (uchar_t *)(mp->b_rptr +
111 			    dp->db_cksumend)) {
112 				/*
113 				 * Mblk data pointers aren't inclusive
114 				 * of uio data, so disregard checksum.
115 				 *
116 				 * not using all of data in dblk make sure
117 				 * not use to use the precalculated checksum
118 				 * in this case.
119 				 */
120 				dp->db_struioflag &= ~STRUIO_IP;
121 				goto norm;
122 			}
123 			ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend));
124 			psum = *(ushort_t *)dp->db_struioun.data;
125 			if ((mlen = dp->db_cksumstart - offset) < 0)
126 				mlen = 0;
127 			if (is_odd(mlen))
128 				goto slow;
129 			if (mlen && dp->db_cksumstart != dp->db_cksumstuff &&
130 			    dp->db_cksumend != dp->db_cksumstuff) {
131 				/*
132 				 * There is prefix data to do and some uio
133 				 * data has already been checksumed and there
134 				 * is more uio data to do, so do the prefix
135 				 * data first, then do the remainder of the
136 				 * uio data.
137 				 */
138 				sum = ip_ocsum(w, mlen >> 1, sum);
139 				w = (ushort_t *)(mp->b_rptr +
140 				    dp->db_cksumstuff);
141 				if (is_odd(w)) {
142 					pmp = mp;
143 					goto slow1;
144 				}
145 				mlen = dp->db_cksumend - dp->db_cksumstuff;
146 			} else if (dp->db_cksumend != dp->db_cksumstuff) {
147 				/*
148 				 * There may be uio data to do, if there is
149 				 * prefix data to do then add in all of the
150 				 * uio data (if any) to do, else just do any
151 				 * uio data.
152 				 */
153 				if (mlen)
154 					mlen += dp->db_cksumend
155 					    - dp->db_cksumstuff;
156 				else {
157 					w = (ushort_t *)(mp->b_rptr +
158 					    dp->db_cksumstuff);
159 					if (is_odd(w))
160 						goto slow;
161 					mlen = dp->db_cksumend
162 					    - dp->db_cksumstuff;
163 				}
164 			} else if (mlen == 0)
165 				return (psum);
166 
167 			if (is_odd(mlen))
168 				goto slow;
169 			sum += psum;
170 		} else {
171 			/*
172 			 * Checksum all data not already done by the caller.
173 			 */
174 		norm:
175 			mlen = mp->b_wptr - (uchar_t *)w;
176 			if (is_odd(mlen))
177 				goto slow;
178 		}
179 		ASSERT(is_even(w));
180 		ASSERT(is_even(mlen));
181 		return (ip_ocsum(w, mlen >> 1, sum));
182 	}
183 	if (dp->db_struioflag & STRUIO_IP)
184 		psum = *(ushort_t *)dp->db_struioun.data;
185 slow:
186 	pmp = 0;
187 slow1:
188 	mlen = 0;
189 	pmlen = 0;
190 	for (; ; ) {
191 		/*
192 		 * Each trip around loop adds in word(s) from one mbuf segment
193 		 * (except for when pmp == mp, then its two partial trips).
194 		 */
195 		w = (ushort_t *)(mp->b_rptr + offset);
196 		if (pmp) {
197 			/*
198 			 * This is the second trip around for this mblk.
199 			 */
200 			pmp = 0;
201 			mlen = 0;
202 			goto douio;
203 		} else if (dp->db_struioflag & STRUIO_IP) {
204 			/*
205 			 * Checksum any data not already done by the
206 			 * caller and add in any partial checksum.
207 			 */
208 			if ((offset > dp->db_cksumstart) ||
209 			    mp->b_wptr != (uchar_t *)(mp->b_rptr +
210 			    dp->db_cksumend)) {
211 				/*
212 				 * Mblk data pointers aren't inclusive
213 				 * of uio data, so disregard checksum.
214 				 *
215 				 * not using all of data in dblk make sure
216 				 * not use to use the precalculated checksum
217 				 * in this case.
218 				 */
219 				dp->db_struioflag &= ~STRUIO_IP;
220 				goto snorm;
221 			}
222 			ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend));
223 			if ((mlen = dp->db_cksumstart - offset) < 0)
224 				mlen = 0;
225 			if (mlen && dp->db_cksumstart != dp->db_cksumstuff) {
226 				/*
227 				 * There is prefix data too do and some
228 				 * uio data has already been checksumed,
229 				 * so do the prefix data only this trip.
230 				 */
231 				pmp = mp;
232 			} else {
233 				/*
234 				 * Add in any partial cksum (if any) and
235 				 * do the remainder of the uio data.
236 				 */
237 				int odd;
238 			douio:
239 				odd = is_odd(dp->db_cksumstuff -
240 				    dp->db_cksumstart);
241 				if (pmlen == -1) {
242 					/*
243 					 * Previous mlen was odd, so swap
244 					 * the partial checksum bytes.
245 					 */
246 					sum += ((psum << 8) & 0xffff)
247 					    | (psum >> 8);
248 					if (odd)
249 						pmlen = 0;
250 				} else {
251 					sum += psum;
252 					if (odd)
253 						pmlen = -1;
254 				}
255 				if (dp->db_cksumend != dp->db_cksumstuff) {
256 					/*
257 					 * If prefix data to do and then all
258 					 * the uio data nees to be checksumed,
259 					 * else just do any uio data.
260 					 */
261 					if (mlen)
262 						mlen += dp->db_cksumend
263 						    - dp->db_cksumstuff;
264 					else {
265 						w = (ushort_t *)(mp->b_rptr +
266 						    dp->db_cksumstuff);
267 						mlen = dp->db_cksumend -
268 						    dp->db_cksumstuff;
269 					}
270 				}
271 			}
272 		} else {
273 			/*
274 			 * Checksum all of the mblk data.
275 			 */
276 		snorm:
277 			mlen = mp->b_wptr - (uchar_t *)w;
278 		}
279 
280 		TRACE_2(TR_FAC_IP, TR_IP_CKSUM_START,
281 		    "ip_cksum_start:%p (%X)", mp, sum)
282 
283 		mp = mp->b_cont;
284 		if (mlen > 0 && pmlen == -1) {
285 			/*
286 			 * There is a byte left from the last
287 			 * segment; add it into the checksum.
288 			 * Don't have to worry about a carry-
289 			 * out here because we make sure that
290 			 * high part of (32 bit) sum is small
291 			 * below.
292 			 */
293 #ifdef _LITTLE_ENDIAN
294 			sum += *(uchar_t *)w << 8;
295 #else
296 			sum += *(uchar_t *)w;
297 #endif
298 			w = (ushort_t *)((char *)w + 1);
299 			mlen--;
300 			pmlen = 0;
301 		}
302 		if (mlen > 0) {
303 			if (is_even(w)) {
304 				sum = ip_ocsum(w, mlen>>1, sum);
305 				w += mlen>>1;
306 				/*
307 				 * If we had an odd number of bytes,
308 				 * then the last byte goes in the high
309 				 * part of the sum, and we take the
310 				 * first byte to the low part of the sum
311 				 * the next time around the loop.
312 				 */
313 				if (is_odd(mlen)) {
314 #ifdef _LITTLE_ENDIAN
315 					sum += *(uchar_t *)w;
316 #else
317 					sum += *(uchar_t *)w << 8;
318 #endif
319 					pmlen = -1;
320 				}
321 			} else {
322 				ushort_t swsum;
323 #ifdef _LITTLE_ENDIAN
324 				sum += *(uchar_t *)w;
325 #else
326 				sum += *(uchar_t *)w << 8;
327 #endif
328 				mlen--;
329 				w = (ushort_t *)(1 + (uintptr_t)w);
330 
331 				/* Do a separate checksum and copy operation */
332 				swsum = ip_ocsum(w, mlen>>1, 0);
333 				sum += ((swsum << 8) & 0xffff) | (swsum >> 8);
334 				w += mlen>>1;
335 				/*
336 				 * If we had an even number of bytes,
337 				 * then the last byte goes in the low
338 				 * part of the sum.  Otherwise we had an
339 				 * odd number of bytes and we take the first
340 				 * byte to the low part of the sum the
341 				 * next time around the loop.
342 				 */
343 				if (is_odd(mlen)) {
344 #ifdef _LITTLE_ENDIAN
345 					sum += *(uchar_t *)w << 8;
346 #else
347 					sum += *(uchar_t *)w;
348 #endif
349 				}
350 				else
351 					pmlen = -1;
352 			}
353 		}
354 		/*
355 		 * Locate the next block with some data.
356 		 * If there is a word split across a boundary we
357 		 * will wrap to the top with mlen == -1 and
358 		 * then add it in shifted appropriately.
359 		 */
360 		offset = 0;
361 		if (! pmp) {
362 			for (; ; ) {
363 				if (mp == 0) {
364 					goto done;
365 				}
366 				if (mp_len(mp))
367 					break;
368 				mp = mp->b_cont;
369 			}
370 			dp = mp->b_datap;
371 			if (dp->db_struioflag & STRUIO_IP)
372 				psum = *(ushort_t *)dp->db_struioun.data;
373 		} else
374 			mp = pmp;
375 	}
376 done:
377 	/*
378 	 * Add together high and low parts of sum
379 	 * and carry to get cksum.
380 	 * Have to be careful to not drop the last
381 	 * carry here.
382 	 */
383 	sum = (sum & 0xFFFF) + (sum >> 16);
384 	sum = (sum & 0xFFFF) + (sum >> 16);
385 	TRACE_3(TR_FAC_IP, TR_IP_CKSUM_END,
386 	    "ip_cksum_end:(%S) type %d (%X)", "ip_cksum", 1, sum);
387 	return (sum);
388 }
389 
390 uint32_t
391 sctp_cksum(mblk_t *mp, int offset)
392 {
393 	uint32_t crc32;
394 	uchar_t *p = NULL;
395 
396 	crc32 = 0xFFFFFFFF;
397 	p = mp->b_rptr + offset;
398 	crc32 = sctp_crc32(crc32, p, mp->b_wptr - p);
399 	for (mp = mp->b_cont; mp != NULL; mp = mp->b_cont) {
400 		crc32 = sctp_crc32(crc32, mp->b_rptr, MBLKL(mp));
401 	}
402 
403 	/* Complement the result */
404 	crc32 = ~crc32;
405 
406 	return (crc32);
407 }
408 
409 /*
410  * Routine to compute Internet checksum (16-bit 1's complement) of a given
411  * Multidata packet descriptor.  As in the non-Multidata routine, this doesn't
412  * 1's complement the result, such that it may be used to compute partial
413  * checksums.  Since it works on buffer spans rather than mblks, this routine
414  * does not handle existing partial checksum value as in the STRUIO_IP special
415  * mblk case (supporting this is rather trivial, but is perhaps of no use at
416  * the moment unless synchronous streams and delayed checksum calculation are
417  * revived.)
418  *
419  * Note also here that the given Multidata packet descriptor must refer to
420  * a header buffer, i.e. it must have a header fragment.  In addition, the
421  * offset must lie within the boundary of the header fragment.  For the
422  * outbound tcp (MDT) case, this will not be an issue because the stack
423  * ensures that such conditions are met, and that there is no need whatsoever
424  * to compute partial checksums on an arbitrary offset that is not part of
425  * the header fragment.  We may need to revisit this routine to handle all
426  * cases of the inbound (MDR) case, especially when we need to perform partial
427  * checksum calculation due to padded bytes (non-zeroes) in the frame.
428  */
429 uint_t
430 ip_md_cksum(pdesc_t *pd, int offset, uint_t sum)
431 {
432 	pdescinfo_t	*pdi = &pd->pd_pdi;
433 	uchar_t		*reg_start, *reg_end;
434 	ssize_t		mlen, i;
435 	ushort_t	*w;
436 	boolean_t	byteleft = B_FALSE;
437 
438 	ASSERT((pdi->flags & PDESC_HAS_REF) != 0);
439 	ASSERT(pdi->hdr_rptr != NULL && pdi->hdr_wptr != NULL);
440 	ASSERT(offset <= PDESC_HDRL(pdi));
441 
442 	for (i = 0; i < pdi->pld_cnt + 1; i++) {
443 		if (i == 0) {
444 			reg_start = pdi->hdr_rptr;
445 			reg_end = pdi->hdr_wptr;
446 		} else {
447 			reg_start = pdi->pld_ary[i - 1].pld_rptr;
448 			reg_end = pdi->pld_ary[i - 1].pld_wptr;
449 			offset = 0;
450 		}
451 
452 		w = (ushort_t *)(reg_start + offset);
453 		mlen = reg_end - (uchar_t *)w;
454 
455 		if (mlen > 0 && byteleft) {
456 			/*
457 			 * There is a byte left from the last
458 			 * segment; add it into the checksum.
459 			 * Don't have to worry about a carry-
460 			 * out here because we make sure that
461 			 * high part of (32 bit) sum is small
462 			 * below.
463 			 */
464 #ifdef _LITTLE_ENDIAN
465 			sum += *(uchar_t *)w << 8;
466 #else
467 			sum += *(uchar_t *)w;
468 #endif
469 			w = (ushort_t *)((char *)w + 1);
470 			mlen--;
471 			byteleft = B_FALSE;
472 		}
473 
474 		if (mlen == 0)
475 			continue;
476 
477 		if (is_even(w)) {
478 			sum = ip_ocsum(w, mlen >> 1, sum);
479 			w += mlen >> 1;
480 			/*
481 			 * If we had an odd number of bytes,
482 			 * then the last byte goes in the high
483 			 * part of the sum, and we take the
484 			 * first byte to the low part of the sum
485 			 * the next time around the loop.
486 			 */
487 			if (is_odd(mlen)) {
488 #ifdef _LITTLE_ENDIAN
489 				sum += *(uchar_t *)w;
490 #else
491 				sum += *(uchar_t *)w << 8;
492 #endif
493 				byteleft = B_TRUE;
494 			}
495 		} else {
496 			ushort_t swsum;
497 #ifdef _LITTLE_ENDIAN
498 			sum += *(uchar_t *)w;
499 #else
500 			sum += *(uchar_t *)w << 8;
501 #endif
502 			mlen--;
503 			w = (ushort_t *)(1 + (uintptr_t)w);
504 
505 			/* Do a separate checksum and copy operation */
506 			swsum = ip_ocsum(w, mlen >> 1, 0);
507 			sum += ((swsum << 8) & 0xffff) | (swsum >> 8);
508 			w += mlen >> 1;
509 			/*
510 			 * If we had an even number of bytes,
511 			 * then the last byte goes in the low
512 			 * part of the sum.  Otherwise we had an
513 			 * odd number of bytes and we take the first
514 			 * byte to the low part of the sum the
515 			 * next time around the loop.
516 			 */
517 			if (is_odd(mlen)) {
518 #ifdef _LITTLE_ENDIAN
519 				sum += *(uchar_t *)w << 8;
520 #else
521 				sum += *(uchar_t *)w;
522 #endif
523 			} else {
524 				byteleft = B_TRUE;
525 			}
526 		}
527 	}
528 
529 	/*
530 	 * Add together high and low parts of sum and carry to get cksum.
531 	 * Have to be careful to not drop the last carry here.
532 	 */
533 	sum = (sum & 0xffff) + (sum >> 16);
534 	sum = (sum & 0xffff) + (sum >> 16);
535 
536 	return (sum);
537 }
538 
539 /* Return the IP checksum for the IP header at "iph". */
540 uint16_t
541 ip_csum_hdr(ipha_t *ipha)
542 {
543 	uint16_t	*uph;
544 	uint32_t	sum;
545 	int		opt_len;
546 
547 	opt_len = (ipha->ipha_version_and_hdr_length & 0xF) -
548 	    IP_SIMPLE_HDR_LENGTH_IN_WORDS;
549 	uph = (uint16_t *)ipha;
550 	sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] +
551 	    uph[5] + uph[6] + uph[7] + uph[8] + uph[9];
552 	if (opt_len > 0) {
553 		do {
554 			sum += uph[10];
555 			sum += uph[11];
556 			uph += 2;
557 		} while (--opt_len);
558 	}
559 	sum = (sum & 0xFFFF) + (sum >> 16);
560 	sum = ~(sum + (sum >> 16)) & 0xFFFF;
561 	if (sum == 0xffff)
562 		sum = 0;
563 	return ((uint16_t)sum);
564 }
565