1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2021 Joyent, Inc.
25 * Copyright 2022 Garrett D'Amore
26 * Copyright 2025 Oxide Computer Company
27 */
28 /* Copyright (c) 1990 Mentat Inc. */
29
30 #include <sys/types.h>
31 #include <sys/inttypes.h>
32 #include <sys/systm.h>
33 #include <sys/stream.h>
34 #include <sys/strsun.h>
35 #include <sys/debug.h>
36 #include <sys/ddi.h>
37 #include <sys/vtrace.h>
38 #include <inet/sctp_crc32.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41
42 extern unsigned int ip_ocsum(ushort_t *, int, unsigned int);
43
44 /*
45 * Checksum routine for Internet Protocol family headers.
46 * This routine is very heavily used in the network
47 * code and should be modified for each CPU to be as fast as possible.
48 */
49
50 #define mp_len(mp) ((mp)->b_wptr - (mp)->b_rptr)
51
52 /*
53 * Even/Odd checks. Usually it is performed on pointers but may be
54 * used on integers as well. uintptr_t is long enough to hold both
55 * integer and pointer.
56 */
57 #define is_odd(p) (((uintptr_t)(p) & 0x1) != 0)
58 #define is_even(p) (!is_odd(p))
59
60
61 #ifdef ZC_TEST
62 /*
63 * Disable the TCP s/w cksum.
64 * XXX - This is just a hack for testing purpose. Don't use it for
65 * anything else!
66 */
67 int noswcksum = 0;
68 #endif
69 /*
70 * Note: this does not ones-complement the result since it is used
71 * when computing partial checksums.
72 * For nonSTRUIO_IP mblks, assumes mp->b_rptr+offset is 16 bit aligned.
73 * For STRUIO_IP mblks, assumes mp->b_datap->db_struiobase is 16 bit aligned.
74 *
75 * Note: for STRUIO_IP special mblks some data may have been previously
76 * checksumed, this routine will handle additional data prefixed within
77 * an mblk or b_cont (chained) mblk(s). This routine will also handle
78 * suffixed b_cont mblk(s) and data suffixed within an mblk.
79 */
80 unsigned int
ip_cksum(mblk_t * mp,int offset,uint_t sum)81 ip_cksum(mblk_t *mp, int offset, uint_t sum)
82 {
83 ushort_t *w;
84 ssize_t mlen;
85 int pmlen;
86 mblk_t *pmp;
87 dblk_t *dp = mp->b_datap;
88 ushort_t psum = 0;
89
90 #ifdef ZC_TEST
91 if (noswcksum)
92 return (0xffff);
93 #endif
94 ASSERT(dp);
95
96 if (mp->b_cont == NULL) {
97 /*
98 * May be fast-path, only one mblk.
99 */
100 w = (ushort_t *)(mp->b_rptr + offset);
101 if (dp->db_struioflag & STRUIO_IP) {
102 /*
103 * Checksum any data not already done by
104 * the caller and add in any partial checksum.
105 */
106 if ((offset > dp->db_cksumstart) ||
107 mp->b_wptr != (uchar_t *)(mp->b_rptr +
108 dp->db_cksumend)) {
109 /*
110 * Mblk data pointers aren't inclusive
111 * of uio data, so disregard checksum.
112 *
113 * not using all of data in dblk make sure
114 * not use to use the precalculated checksum
115 * in this case.
116 */
117 dp->db_struioflag &= ~STRUIO_IP;
118 goto norm;
119 }
120 ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend));
121 psum = *(ushort_t *)dp->db_struioun.data;
122 if ((mlen = dp->db_cksumstart - offset) < 0)
123 mlen = 0;
124 if (is_odd(mlen))
125 goto slow;
126 if (mlen && dp->db_cksumstart != dp->db_cksumstuff &&
127 dp->db_cksumend != dp->db_cksumstuff) {
128 /*
129 * There is prefix data to do and some uio
130 * data has already been checksumed and there
131 * is more uio data to do, so do the prefix
132 * data first, then do the remainder of the
133 * uio data.
134 */
135 sum = ip_ocsum(w, mlen >> 1, sum);
136 w = (ushort_t *)(mp->b_rptr +
137 dp->db_cksumstuff);
138 if (is_odd(w)) {
139 pmp = mp;
140 goto slow1;
141 }
142 mlen = dp->db_cksumend - dp->db_cksumstuff;
143 } else if (dp->db_cksumend != dp->db_cksumstuff) {
144 /*
145 * There may be uio data to do, if there is
146 * prefix data to do then add in all of the
147 * uio data (if any) to do, else just do any
148 * uio data.
149 */
150 if (mlen)
151 mlen += dp->db_cksumend
152 - dp->db_cksumstuff;
153 else {
154 w = (ushort_t *)(mp->b_rptr +
155 dp->db_cksumstuff);
156 if (is_odd(w))
157 goto slow;
158 mlen = dp->db_cksumend
159 - dp->db_cksumstuff;
160 }
161 } else if (mlen == 0)
162 return (psum);
163
164 if (is_odd(mlen))
165 goto slow;
166 sum += psum;
167 } else {
168 /*
169 * Checksum all data not already done by the caller.
170 */
171 norm:
172 mlen = mp->b_wptr - (uchar_t *)w;
173 if (is_odd(mlen))
174 goto slow;
175 }
176 ASSERT(is_even(w));
177 ASSERT(is_even(mlen));
178 return (ip_ocsum(w, mlen >> 1, sum));
179 }
180 if (dp->db_struioflag & STRUIO_IP)
181 psum = *(ushort_t *)dp->db_struioun.data;
182 slow:
183 DTRACE_PROBE(ip_cksum_slow);
184 pmp = 0;
185 slow1:
186 mlen = 0;
187 pmlen = 0;
188 for (; ; ) {
189 /*
190 * Each trip around loop adds in word(s) from one mbuf segment
191 * (except for when pmp == mp, then its two partial trips).
192 */
193 w = (ushort_t *)(mp->b_rptr + offset);
194 if (pmp) {
195 /*
196 * This is the second trip around for this mblk.
197 */
198 pmp = 0;
199 mlen = 0;
200 goto douio;
201 } else if (dp->db_struioflag & STRUIO_IP) {
202 /*
203 * Checksum any data not already done by the
204 * caller and add in any partial checksum.
205 */
206 if ((offset > dp->db_cksumstart) ||
207 mp->b_wptr != (uchar_t *)(mp->b_rptr +
208 dp->db_cksumend)) {
209 /*
210 * Mblk data pointers aren't inclusive
211 * of uio data, so disregard checksum.
212 *
213 * not using all of data in dblk make sure
214 * not use to use the precalculated checksum
215 * in this case.
216 */
217 dp->db_struioflag &= ~STRUIO_IP;
218 goto snorm;
219 }
220 ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend));
221 if ((mlen = dp->db_cksumstart - offset) < 0)
222 mlen = 0;
223 if (mlen && dp->db_cksumstart != dp->db_cksumstuff) {
224 /*
225 * There is prefix data too do and some
226 * uio data has already been checksumed,
227 * so do the prefix data only this trip.
228 */
229 pmp = mp;
230 } else {
231 /*
232 * Add in any partial cksum (if any) and
233 * do the remainder of the uio data.
234 */
235 int odd;
236 douio:
237 odd = is_odd(dp->db_cksumstuff -
238 dp->db_cksumstart);
239 if (pmlen == -1) {
240 /*
241 * Previous mlen was odd, so swap
242 * the partial checksum bytes.
243 */
244 sum += ((psum << 8) & 0xffff)
245 | (psum >> 8);
246 if (odd)
247 pmlen = 0;
248 } else {
249 sum += psum;
250 if (odd)
251 pmlen = -1;
252 }
253 if (dp->db_cksumend != dp->db_cksumstuff) {
254 /*
255 * If prefix data to do and then all
256 * the uio data nees to be checksumed,
257 * else just do any uio data.
258 */
259 if (mlen)
260 mlen += dp->db_cksumend
261 - dp->db_cksumstuff;
262 else {
263 w = (ushort_t *)(mp->b_rptr +
264 dp->db_cksumstuff);
265 mlen = dp->db_cksumend -
266 dp->db_cksumstuff;
267 }
268 }
269 }
270 } else {
271 /*
272 * Checksum all of the mblk data.
273 */
274 snorm:
275 mlen = mp->b_wptr - (uchar_t *)w;
276 }
277
278 mp = mp->b_cont;
279 if (mlen > 0 && pmlen == -1) {
280 /*
281 * There is a byte left from the last
282 * segment; add it into the checksum.
283 * Don't have to worry about a carry-
284 * out here because we make sure that
285 * high part of (32 bit) sum is small
286 * below.
287 */
288 #ifdef _LITTLE_ENDIAN
289 sum += *(uchar_t *)w << 8;
290 #else
291 sum += *(uchar_t *)w;
292 #endif
293 w = (ushort_t *)((char *)w + 1);
294 mlen--;
295 pmlen = 0;
296 }
297 if (mlen > 0) {
298 if (is_even(w)) {
299 sum = ip_ocsum(w, mlen>>1, sum);
300 w += mlen>>1;
301 /*
302 * If we had an odd number of bytes,
303 * then the last byte goes in the high
304 * part of the sum, and we take the
305 * first byte to the low part of the sum
306 * the next time around the loop.
307 */
308 if (is_odd(mlen)) {
309 #ifdef _LITTLE_ENDIAN
310 sum += *(uchar_t *)w;
311 #else
312 sum += *(uchar_t *)w << 8;
313 #endif
314 pmlen = -1;
315 }
316 } else {
317 ushort_t swsum;
318 #ifdef _LITTLE_ENDIAN
319 sum += *(uchar_t *)w;
320 #else
321 sum += *(uchar_t *)w << 8;
322 #endif
323 mlen--;
324 w = (ushort_t *)(1 + (uintptr_t)w);
325
326 /* Do a separate checksum and copy operation */
327 swsum = ip_ocsum(w, mlen>>1, 0);
328 sum += ((swsum << 8) & 0xffff) | (swsum >> 8);
329 w += mlen>>1;
330 /*
331 * If we had an even number of bytes,
332 * then the last byte goes in the low
333 * part of the sum. Otherwise we had an
334 * odd number of bytes and we take the first
335 * byte to the low part of the sum the
336 * next time around the loop.
337 */
338 if (is_odd(mlen)) {
339 #ifdef _LITTLE_ENDIAN
340 sum += *(uchar_t *)w << 8;
341 #else
342 sum += *(uchar_t *)w;
343 #endif
344 }
345 else
346 pmlen = -1;
347 }
348 }
349 /*
350 * Locate the next block with some data.
351 * If there is a word split across a boundary we
352 * will wrap to the top with mlen == -1 and
353 * then add it in shifted appropriately.
354 */
355 offset = 0;
356 if (! pmp) {
357 for (; ; ) {
358 if (mp == 0) {
359 goto done;
360 }
361 if (mp_len(mp))
362 break;
363 mp = mp->b_cont;
364 }
365 dp = mp->b_datap;
366 if (dp->db_struioflag & STRUIO_IP)
367 psum = *(ushort_t *)dp->db_struioun.data;
368 } else
369 mp = pmp;
370 }
371 done:
372 /*
373 * Add together high and low parts of sum
374 * and carry to get cksum.
375 * Have to be careful to not drop the last
376 * carry here.
377 */
378 sum = (sum & 0xFFFF) + (sum >> 16);
379 sum = (sum & 0xFFFF) + (sum >> 16);
380 TRACE_3(TR_FAC_IP, TR_IP_CKSUM_END,
381 "ip_cksum_end:(%S) type %d (%X)", "ip_cksum", 1, sum);
382 return (sum);
383 }
384
385 uint32_t
sctp_cksum(mblk_t * mp,int offset)386 sctp_cksum(mblk_t *mp, int offset)
387 {
388 uint32_t crc32;
389 uchar_t *p = NULL;
390
391 crc32 = 0xFFFFFFFF;
392 p = mp->b_rptr + offset;
393 crc32 = sctp_crc32(crc32, p, mp->b_wptr - p);
394 for (mp = mp->b_cont; mp != NULL; mp = mp->b_cont) {
395 crc32 = sctp_crc32(crc32, mp->b_rptr, MBLKL(mp));
396 }
397
398 /* Complement the result */
399 crc32 = ~crc32;
400
401 return (crc32);
402 }
403
404 /* Return the IP checksum for the IP header at "iph". */
405 uint16_t
ip_csum_hdr(ipha_t * ipha)406 ip_csum_hdr(ipha_t *ipha)
407 {
408 uint16_t *uph;
409 uint32_t sum;
410 int opt_len;
411
412 opt_len = (ipha->ipha_version_and_hdr_length & 0xF) -
413 IP_SIMPLE_HDR_LENGTH_IN_WORDS;
414 uph = (uint16_t *)ipha;
415 sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] +
416 uph[5] + uph[6] + uph[7] + uph[8] + uph[9];
417 if (opt_len > 0) {
418 do {
419 sum += uph[10];
420 sum += uph[11];
421 uph += 2;
422 } while (--opt_len);
423 }
424 sum = (sum & 0xFFFF) + (sum >> 16);
425 sum = ~(sum + (sum >> 16)) & 0xFFFF;
426 if (sum == 0xffff)
427 sum = 0;
428 return ((uint16_t)sum);
429 }
430
431 /*
432 * This function takes an mblk and IPv6 header as input and returns
433 * three pieces of information.
434 *
435 * 'hdr_length_ptr': The IPv6 header length including extension headers.
436 *
437 * 'nethdrpp': A pointer to the "next hedader" value, aka the
438 * transport header. This argument may be set to NULL if
439 * only the length is desired.
440 *
441 * return: Whether or not the header is well formed.
442 *
443 * This function assumes the IPv6 header along with all extensions are
444 * contained solely in this mblk: i.e., there is no b_cont walking.
445 */
446 boolean_t
ip_hdr_length_nexthdr_v6(mblk_t * mp,ip6_t * ip6h,uint16_t * hdr_length_ptr,uint8_t ** nexthdrpp)447 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
448 uint8_t **nexthdrpp)
449 {
450 uint16_t length;
451 uint_t ehdrlen;
452 uint8_t *nexthdrp;
453 uint8_t *whereptr;
454 uint8_t *endptr;
455 ip6_dest_t *desthdr;
456 ip6_rthdr_t *rthdr;
457 ip6_frag_t *fraghdr;
458
459 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION)
460 return (B_FALSE);
461 length = IPV6_HDR_LEN;
462 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
463 endptr = mp->b_wptr;
464
465 nexthdrp = &ip6h->ip6_nxt;
466 while (whereptr < endptr) {
467 /* Is there enough left for len + nexthdr? */
468 if (whereptr + MIN_EHDR_LEN > endptr)
469 break;
470
471 switch (*nexthdrp) {
472 case IPPROTO_HOPOPTS:
473 case IPPROTO_DSTOPTS:
474 /* Assumes the headers are identical for hbh and dst */
475 desthdr = (ip6_dest_t *)whereptr;
476 ehdrlen = 8 * (desthdr->ip6d_len + 1);
477 if ((uchar_t *)desthdr + ehdrlen > endptr)
478 return (B_FALSE);
479 nexthdrp = &desthdr->ip6d_nxt;
480 break;
481 case IPPROTO_ROUTING:
482 rthdr = (ip6_rthdr_t *)whereptr;
483 ehdrlen = 8 * (rthdr->ip6r_len + 1);
484 if ((uchar_t *)rthdr + ehdrlen > endptr)
485 return (B_FALSE);
486 nexthdrp = &rthdr->ip6r_nxt;
487 break;
488 case IPPROTO_FRAGMENT:
489 fraghdr = (ip6_frag_t *)whereptr;
490 ehdrlen = sizeof (ip6_frag_t);
491 if ((uchar_t *)&fraghdr[1] > endptr)
492 return (B_FALSE);
493 nexthdrp = &fraghdr->ip6f_nxt;
494 break;
495 case IPPROTO_NONE:
496 /* No next header means we're finished */
497 default:
498 *hdr_length_ptr = length;
499
500 if (nexthdrpp != NULL)
501 *nexthdrpp = nexthdrp;
502
503 return (B_TRUE);
504 }
505 length += ehdrlen;
506 whereptr += ehdrlen;
507 *hdr_length_ptr = length;
508
509 if (nexthdrpp != NULL)
510 *nexthdrpp = nexthdrp;
511 }
512 switch (*nexthdrp) {
513 case IPPROTO_HOPOPTS:
514 case IPPROTO_DSTOPTS:
515 case IPPROTO_ROUTING:
516 case IPPROTO_FRAGMENT:
517 /*
518 * If any know extension headers are still to be processed,
519 * the packet's malformed (or at least all the IP header(s) are
520 * not in the same mblk - and that should never happen.
521 */
522 return (B_FALSE);
523
524 default:
525 /*
526 * If we get here, we know that all of the IP headers were in
527 * the same mblk, even if the ULP header is in the next mblk.
528 */
529 *hdr_length_ptr = length;
530
531 if (nexthdrpp != NULL)
532 *nexthdrpp = nexthdrp;
533
534 return (B_TRUE);
535 }
536 }
537