1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2021 Joyent, Inc. 25 * Copyright 2022 Garrett D'Amore 26 * Copyright 2025 Oxide Computer Company 27 */ 28 /* Copyright (c) 1990 Mentat Inc. */ 29 30 #include <sys/types.h> 31 #include <sys/inttypes.h> 32 #include <sys/systm.h> 33 #include <sys/stream.h> 34 #include <sys/strsun.h> 35 #include <sys/debug.h> 36 #include <sys/ddi.h> 37 #include <sys/vtrace.h> 38 #include <inet/sctp_crc32.h> 39 #include <inet/ip.h> 40 #include <inet/ip6.h> 41 42 extern unsigned int ip_ocsum(ushort_t *, int, unsigned int); 43 44 /* 45 * Checksum routine for Internet Protocol family headers. 46 * This routine is very heavily used in the network 47 * code and should be modified for each CPU to be as fast as possible. 48 */ 49 50 #define mp_len(mp) ((mp)->b_wptr - (mp)->b_rptr) 51 52 /* 53 * Even/Odd checks. Usually it is performed on pointers but may be 54 * used on integers as well. uintptr_t is long enough to hold both 55 * integer and pointer. 56 */ 57 #define is_odd(p) (((uintptr_t)(p) & 0x1) != 0) 58 #define is_even(p) (!is_odd(p)) 59 60 61 #ifdef ZC_TEST 62 /* 63 * Disable the TCP s/w cksum. 64 * XXX - This is just a hack for testing purpose. Don't use it for 65 * anything else! 66 */ 67 int noswcksum = 0; 68 #endif 69 /* 70 * Note: this does not ones-complement the result since it is used 71 * when computing partial checksums. 72 * For nonSTRUIO_IP mblks, assumes mp->b_rptr+offset is 16 bit aligned. 73 * For STRUIO_IP mblks, assumes mp->b_datap->db_struiobase is 16 bit aligned. 74 * 75 * Note: for STRUIO_IP special mblks some data may have been previously 76 * checksumed, this routine will handle additional data prefixed within 77 * an mblk or b_cont (chained) mblk(s). This routine will also handle 78 * suffixed b_cont mblk(s) and data suffixed within an mblk. 79 */ 80 unsigned int 81 ip_cksum(mblk_t *mp, int offset, uint_t sum) 82 { 83 ushort_t *w; 84 ssize_t mlen; 85 int pmlen; 86 mblk_t *pmp; 87 dblk_t *dp = mp->b_datap; 88 ushort_t psum = 0; 89 90 #ifdef ZC_TEST 91 if (noswcksum) 92 return (0xffff); 93 #endif 94 ASSERT(dp); 95 96 if (mp->b_cont == NULL) { 97 /* 98 * May be fast-path, only one mblk. 99 */ 100 w = (ushort_t *)(mp->b_rptr + offset); 101 if (dp->db_struioflag & STRUIO_IP) { 102 /* 103 * Checksum any data not already done by 104 * the caller and add in any partial checksum. 105 */ 106 if ((offset > dp->db_cksumstart) || 107 mp->b_wptr != (uchar_t *)(mp->b_rptr + 108 dp->db_cksumend)) { 109 /* 110 * Mblk data pointers aren't inclusive 111 * of uio data, so disregard checksum. 112 * 113 * not using all of data in dblk make sure 114 * not use to use the precalculated checksum 115 * in this case. 116 */ 117 dp->db_struioflag &= ~STRUIO_IP; 118 goto norm; 119 } 120 ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend)); 121 psum = *(ushort_t *)dp->db_struioun.data; 122 if ((mlen = dp->db_cksumstart - offset) < 0) 123 mlen = 0; 124 if (is_odd(mlen)) 125 goto slow; 126 if (mlen && dp->db_cksumstart != dp->db_cksumstuff && 127 dp->db_cksumend != dp->db_cksumstuff) { 128 /* 129 * There is prefix data to do and some uio 130 * data has already been checksumed and there 131 * is more uio data to do, so do the prefix 132 * data first, then do the remainder of the 133 * uio data. 134 */ 135 sum = ip_ocsum(w, mlen >> 1, sum); 136 w = (ushort_t *)(mp->b_rptr + 137 dp->db_cksumstuff); 138 if (is_odd(w)) { 139 pmp = mp; 140 goto slow1; 141 } 142 mlen = dp->db_cksumend - dp->db_cksumstuff; 143 } else if (dp->db_cksumend != dp->db_cksumstuff) { 144 /* 145 * There may be uio data to do, if there is 146 * prefix data to do then add in all of the 147 * uio data (if any) to do, else just do any 148 * uio data. 149 */ 150 if (mlen) 151 mlen += dp->db_cksumend 152 - dp->db_cksumstuff; 153 else { 154 w = (ushort_t *)(mp->b_rptr + 155 dp->db_cksumstuff); 156 if (is_odd(w)) 157 goto slow; 158 mlen = dp->db_cksumend 159 - dp->db_cksumstuff; 160 } 161 } else if (mlen == 0) 162 return (psum); 163 164 if (is_odd(mlen)) 165 goto slow; 166 sum += psum; 167 } else { 168 /* 169 * Checksum all data not already done by the caller. 170 */ 171 norm: 172 mlen = mp->b_wptr - (uchar_t *)w; 173 if (is_odd(mlen)) 174 goto slow; 175 } 176 ASSERT(is_even(w)); 177 ASSERT(is_even(mlen)); 178 return (ip_ocsum(w, mlen >> 1, sum)); 179 } 180 if (dp->db_struioflag & STRUIO_IP) 181 psum = *(ushort_t *)dp->db_struioun.data; 182 slow: 183 DTRACE_PROBE(ip_cksum_slow); 184 pmp = 0; 185 slow1: 186 mlen = 0; 187 pmlen = 0; 188 for (; ; ) { 189 /* 190 * Each trip around loop adds in word(s) from one mbuf segment 191 * (except for when pmp == mp, then its two partial trips). 192 */ 193 w = (ushort_t *)(mp->b_rptr + offset); 194 if (pmp) { 195 /* 196 * This is the second trip around for this mblk. 197 */ 198 pmp = 0; 199 mlen = 0; 200 goto douio; 201 } else if (dp->db_struioflag & STRUIO_IP) { 202 /* 203 * Checksum any data not already done by the 204 * caller and add in any partial checksum. 205 */ 206 if ((offset > dp->db_cksumstart) || 207 mp->b_wptr != (uchar_t *)(mp->b_rptr + 208 dp->db_cksumend)) { 209 /* 210 * Mblk data pointers aren't inclusive 211 * of uio data, so disregard checksum. 212 * 213 * not using all of data in dblk make sure 214 * not use to use the precalculated checksum 215 * in this case. 216 */ 217 dp->db_struioflag &= ~STRUIO_IP; 218 goto snorm; 219 } 220 ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend)); 221 if ((mlen = dp->db_cksumstart - offset) < 0) 222 mlen = 0; 223 if (mlen && dp->db_cksumstart != dp->db_cksumstuff) { 224 /* 225 * There is prefix data too do and some 226 * uio data has already been checksumed, 227 * so do the prefix data only this trip. 228 */ 229 pmp = mp; 230 } else { 231 /* 232 * Add in any partial cksum (if any) and 233 * do the remainder of the uio data. 234 */ 235 int odd; 236 douio: 237 odd = is_odd(dp->db_cksumstuff - 238 dp->db_cksumstart); 239 if (pmlen == -1) { 240 /* 241 * Previous mlen was odd, so swap 242 * the partial checksum bytes. 243 */ 244 sum += ((psum << 8) & 0xffff) 245 | (psum >> 8); 246 if (odd) 247 pmlen = 0; 248 } else { 249 sum += psum; 250 if (odd) 251 pmlen = -1; 252 } 253 if (dp->db_cksumend != dp->db_cksumstuff) { 254 /* 255 * If prefix data to do and then all 256 * the uio data nees to be checksumed, 257 * else just do any uio data. 258 */ 259 if (mlen) 260 mlen += dp->db_cksumend 261 - dp->db_cksumstuff; 262 else { 263 w = (ushort_t *)(mp->b_rptr + 264 dp->db_cksumstuff); 265 mlen = dp->db_cksumend - 266 dp->db_cksumstuff; 267 } 268 } 269 } 270 } else { 271 /* 272 * Checksum all of the mblk data. 273 */ 274 snorm: 275 mlen = mp->b_wptr - (uchar_t *)w; 276 } 277 278 mp = mp->b_cont; 279 if (mlen > 0 && pmlen == -1) { 280 /* 281 * There is a byte left from the last 282 * segment; add it into the checksum. 283 * Don't have to worry about a carry- 284 * out here because we make sure that 285 * high part of (32 bit) sum is small 286 * below. 287 */ 288 #ifdef _LITTLE_ENDIAN 289 sum += *(uchar_t *)w << 8; 290 #else 291 sum += *(uchar_t *)w; 292 #endif 293 w = (ushort_t *)((char *)w + 1); 294 mlen--; 295 pmlen = 0; 296 } 297 if (mlen > 0) { 298 if (is_even(w)) { 299 sum = ip_ocsum(w, mlen>>1, sum); 300 w += mlen>>1; 301 /* 302 * If we had an odd number of bytes, 303 * then the last byte goes in the high 304 * part of the sum, and we take the 305 * first byte to the low part of the sum 306 * the next time around the loop. 307 */ 308 if (is_odd(mlen)) { 309 #ifdef _LITTLE_ENDIAN 310 sum += *(uchar_t *)w; 311 #else 312 sum += *(uchar_t *)w << 8; 313 #endif 314 pmlen = -1; 315 } 316 } else { 317 ushort_t swsum; 318 #ifdef _LITTLE_ENDIAN 319 sum += *(uchar_t *)w; 320 #else 321 sum += *(uchar_t *)w << 8; 322 #endif 323 mlen--; 324 w = (ushort_t *)(1 + (uintptr_t)w); 325 326 /* Do a separate checksum and copy operation */ 327 swsum = ip_ocsum(w, mlen>>1, 0); 328 sum += ((swsum << 8) & 0xffff) | (swsum >> 8); 329 w += mlen>>1; 330 /* 331 * If we had an even number of bytes, 332 * then the last byte goes in the low 333 * part of the sum. Otherwise we had an 334 * odd number of bytes and we take the first 335 * byte to the low part of the sum the 336 * next time around the loop. 337 */ 338 if (is_odd(mlen)) { 339 #ifdef _LITTLE_ENDIAN 340 sum += *(uchar_t *)w << 8; 341 #else 342 sum += *(uchar_t *)w; 343 #endif 344 } 345 else 346 pmlen = -1; 347 } 348 } 349 /* 350 * Locate the next block with some data. 351 * If there is a word split across a boundary we 352 * will wrap to the top with mlen == -1 and 353 * then add it in shifted appropriately. 354 */ 355 offset = 0; 356 if (! pmp) { 357 for (; ; ) { 358 if (mp == 0) { 359 goto done; 360 } 361 if (mp_len(mp)) 362 break; 363 mp = mp->b_cont; 364 } 365 dp = mp->b_datap; 366 if (dp->db_struioflag & STRUIO_IP) 367 psum = *(ushort_t *)dp->db_struioun.data; 368 } else 369 mp = pmp; 370 } 371 done: 372 /* 373 * Add together high and low parts of sum 374 * and carry to get cksum. 375 * Have to be careful to not drop the last 376 * carry here. 377 */ 378 sum = (sum & 0xFFFF) + (sum >> 16); 379 sum = (sum & 0xFFFF) + (sum >> 16); 380 TRACE_3(TR_FAC_IP, TR_IP_CKSUM_END, 381 "ip_cksum_end:(%S) type %d (%X)", "ip_cksum", 1, sum); 382 return (sum); 383 } 384 385 uint32_t 386 sctp_cksum(mblk_t *mp, int offset) 387 { 388 uint32_t crc32; 389 uchar_t *p = NULL; 390 391 crc32 = 0xFFFFFFFF; 392 p = mp->b_rptr + offset; 393 crc32 = sctp_crc32(crc32, p, mp->b_wptr - p); 394 for (mp = mp->b_cont; mp != NULL; mp = mp->b_cont) { 395 crc32 = sctp_crc32(crc32, mp->b_rptr, MBLKL(mp)); 396 } 397 398 /* Complement the result */ 399 crc32 = ~crc32; 400 401 return (crc32); 402 } 403 404 /* Return the IP checksum for the IP header at "iph". */ 405 uint16_t 406 ip_csum_hdr(ipha_t *ipha) 407 { 408 uint16_t *uph; 409 uint32_t sum; 410 int opt_len; 411 412 opt_len = (ipha->ipha_version_and_hdr_length & 0xF) - 413 IP_SIMPLE_HDR_LENGTH_IN_WORDS; 414 uph = (uint16_t *)ipha; 415 sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] + 416 uph[5] + uph[6] + uph[7] + uph[8] + uph[9]; 417 if (opt_len > 0) { 418 do { 419 sum += uph[10]; 420 sum += uph[11]; 421 uph += 2; 422 } while (--opt_len); 423 } 424 sum = (sum & 0xFFFF) + (sum >> 16); 425 sum = ~(sum + (sum >> 16)) & 0xFFFF; 426 if (sum == 0xffff) 427 sum = 0; 428 return ((uint16_t)sum); 429 } 430 431 /* 432 * This function takes an mblk and IPv6 header as input and returns 433 * three pieces of information. 434 * 435 * 'hdr_length_ptr': The IPv6 header length including extension headers. 436 * 437 * 'nethdrpp': A pointer to the "next hedader" value, aka the 438 * transport header. This argument may be set to NULL if 439 * only the length is desired. 440 * 441 * return: Whether or not the header is well formed. 442 * 443 * This function assumes the IPv6 header along with all extensions are 444 * contained solely in this mblk: i.e., there is no b_cont walking. 445 */ 446 boolean_t 447 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 448 uint8_t **nexthdrpp) 449 { 450 uint16_t length; 451 uint_t ehdrlen; 452 uint8_t *nexthdrp; 453 uint8_t *whereptr; 454 uint8_t *endptr; 455 ip6_dest_t *desthdr; 456 ip6_rthdr_t *rthdr; 457 ip6_frag_t *fraghdr; 458 459 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) 460 return (B_FALSE); 461 length = IPV6_HDR_LEN; 462 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 463 endptr = mp->b_wptr; 464 465 nexthdrp = &ip6h->ip6_nxt; 466 while (whereptr < endptr) { 467 /* Is there enough left for len + nexthdr? */ 468 if (whereptr + MIN_EHDR_LEN > endptr) 469 break; 470 471 switch (*nexthdrp) { 472 case IPPROTO_HOPOPTS: 473 case IPPROTO_DSTOPTS: 474 /* Assumes the headers are identical for hbh and dst */ 475 desthdr = (ip6_dest_t *)whereptr; 476 ehdrlen = 8 * (desthdr->ip6d_len + 1); 477 if ((uchar_t *)desthdr + ehdrlen > endptr) 478 return (B_FALSE); 479 nexthdrp = &desthdr->ip6d_nxt; 480 break; 481 case IPPROTO_ROUTING: 482 rthdr = (ip6_rthdr_t *)whereptr; 483 ehdrlen = 8 * (rthdr->ip6r_len + 1); 484 if ((uchar_t *)rthdr + ehdrlen > endptr) 485 return (B_FALSE); 486 nexthdrp = &rthdr->ip6r_nxt; 487 break; 488 case IPPROTO_FRAGMENT: 489 fraghdr = (ip6_frag_t *)whereptr; 490 ehdrlen = sizeof (ip6_frag_t); 491 if ((uchar_t *)&fraghdr[1] > endptr) 492 return (B_FALSE); 493 nexthdrp = &fraghdr->ip6f_nxt; 494 break; 495 case IPPROTO_NONE: 496 /* No next header means we're finished */ 497 default: 498 *hdr_length_ptr = length; 499 500 if (nexthdrpp != NULL) 501 *nexthdrpp = nexthdrp; 502 503 return (B_TRUE); 504 } 505 length += ehdrlen; 506 whereptr += ehdrlen; 507 *hdr_length_ptr = length; 508 509 if (nexthdrpp != NULL) 510 *nexthdrpp = nexthdrp; 511 } 512 switch (*nexthdrp) { 513 case IPPROTO_HOPOPTS: 514 case IPPROTO_DSTOPTS: 515 case IPPROTO_ROUTING: 516 case IPPROTO_FRAGMENT: 517 /* 518 * If any know extension headers are still to be processed, 519 * the packet's malformed (or at least all the IP header(s) are 520 * not in the same mblk - and that should never happen. 521 */ 522 return (B_FALSE); 523 524 default: 525 /* 526 * If we get here, we know that all of the IP headers were in 527 * the same mblk, even if the ULP header is in the next mblk. 528 */ 529 *hdr_length_ptr = length; 530 531 if (nexthdrpp != NULL) 532 *nexthdrpp = nexthdrp; 533 534 return (B_TRUE); 535 } 536 } 537