1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2021 Joyent, Inc. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #include <sys/types.h> 29 #include <sys/inttypes.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/strsun.h> 33 #include <sys/debug.h> 34 #include <sys/ddi.h> 35 #include <sys/vtrace.h> 36 #include <inet/sctp_crc32.h> 37 #include <inet/ip.h> 38 #include <inet/ip6.h> 39 40 #include <sys/multidata.h> 41 #include <sys/multidata_impl.h> 42 43 extern unsigned int ip_ocsum(ushort_t *, int, unsigned int); 44 45 /* 46 * Checksum routine for Internet Protocol family headers. 47 * This routine is very heavily used in the network 48 * code and should be modified for each CPU to be as fast as possible. 49 */ 50 51 #define mp_len(mp) ((mp)->b_wptr - (mp)->b_rptr) 52 53 /* 54 * Even/Odd checks. Usually it is performed on pointers but may be 55 * used on integers as well. uintptr_t is long enough to hold both 56 * integer and pointer. 57 */ 58 #define is_odd(p) (((uintptr_t)(p) & 0x1) != 0) 59 #define is_even(p) (!is_odd(p)) 60 61 62 #ifdef ZC_TEST 63 /* 64 * Disable the TCP s/w cksum. 65 * XXX - This is just a hack for testing purpose. Don't use it for 66 * anything else! 67 */ 68 int noswcksum = 0; 69 #endif 70 /* 71 * Note: this does not ones-complement the result since it is used 72 * when computing partial checksums. 73 * For nonSTRUIO_IP mblks, assumes mp->b_rptr+offset is 16 bit aligned. 74 * For STRUIO_IP mblks, assumes mp->b_datap->db_struiobase is 16 bit aligned. 75 * 76 * Note: for STRUIO_IP special mblks some data may have been previously 77 * checksumed, this routine will handle additional data prefixed within 78 * an mblk or b_cont (chained) mblk(s). This routine will also handle 79 * suffixed b_cont mblk(s) and data suffixed within an mblk. 80 */ 81 unsigned int 82 ip_cksum(mblk_t *mp, int offset, uint_t sum) 83 { 84 ushort_t *w; 85 ssize_t mlen; 86 int pmlen; 87 mblk_t *pmp; 88 dblk_t *dp = mp->b_datap; 89 ushort_t psum = 0; 90 91 #ifdef ZC_TEST 92 if (noswcksum) 93 return (0xffff); 94 #endif 95 ASSERT(dp); 96 97 if (mp->b_cont == NULL) { 98 /* 99 * May be fast-path, only one mblk. 100 */ 101 w = (ushort_t *)(mp->b_rptr + offset); 102 if (dp->db_struioflag & STRUIO_IP) { 103 /* 104 * Checksum any data not already done by 105 * the caller and add in any partial checksum. 106 */ 107 if ((offset > dp->db_cksumstart) || 108 mp->b_wptr != (uchar_t *)(mp->b_rptr + 109 dp->db_cksumend)) { 110 /* 111 * Mblk data pointers aren't inclusive 112 * of uio data, so disregard checksum. 113 * 114 * not using all of data in dblk make sure 115 * not use to use the precalculated checksum 116 * in this case. 117 */ 118 dp->db_struioflag &= ~STRUIO_IP; 119 goto norm; 120 } 121 ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend)); 122 psum = *(ushort_t *)dp->db_struioun.data; 123 if ((mlen = dp->db_cksumstart - offset) < 0) 124 mlen = 0; 125 if (is_odd(mlen)) 126 goto slow; 127 if (mlen && dp->db_cksumstart != dp->db_cksumstuff && 128 dp->db_cksumend != dp->db_cksumstuff) { 129 /* 130 * There is prefix data to do and some uio 131 * data has already been checksumed and there 132 * is more uio data to do, so do the prefix 133 * data first, then do the remainder of the 134 * uio data. 135 */ 136 sum = ip_ocsum(w, mlen >> 1, sum); 137 w = (ushort_t *)(mp->b_rptr + 138 dp->db_cksumstuff); 139 if (is_odd(w)) { 140 pmp = mp; 141 goto slow1; 142 } 143 mlen = dp->db_cksumend - dp->db_cksumstuff; 144 } else if (dp->db_cksumend != dp->db_cksumstuff) { 145 /* 146 * There may be uio data to do, if there is 147 * prefix data to do then add in all of the 148 * uio data (if any) to do, else just do any 149 * uio data. 150 */ 151 if (mlen) 152 mlen += dp->db_cksumend 153 - dp->db_cksumstuff; 154 else { 155 w = (ushort_t *)(mp->b_rptr + 156 dp->db_cksumstuff); 157 if (is_odd(w)) 158 goto slow; 159 mlen = dp->db_cksumend 160 - dp->db_cksumstuff; 161 } 162 } else if (mlen == 0) 163 return (psum); 164 165 if (is_odd(mlen)) 166 goto slow; 167 sum += psum; 168 } else { 169 /* 170 * Checksum all data not already done by the caller. 171 */ 172 norm: 173 mlen = mp->b_wptr - (uchar_t *)w; 174 if (is_odd(mlen)) 175 goto slow; 176 } 177 ASSERT(is_even(w)); 178 ASSERT(is_even(mlen)); 179 return (ip_ocsum(w, mlen >> 1, sum)); 180 } 181 if (dp->db_struioflag & STRUIO_IP) 182 psum = *(ushort_t *)dp->db_struioun.data; 183 slow: 184 pmp = 0; 185 slow1: 186 mlen = 0; 187 pmlen = 0; 188 for (; ; ) { 189 /* 190 * Each trip around loop adds in word(s) from one mbuf segment 191 * (except for when pmp == mp, then its two partial trips). 192 */ 193 w = (ushort_t *)(mp->b_rptr + offset); 194 if (pmp) { 195 /* 196 * This is the second trip around for this mblk. 197 */ 198 pmp = 0; 199 mlen = 0; 200 goto douio; 201 } else if (dp->db_struioflag & STRUIO_IP) { 202 /* 203 * Checksum any data not already done by the 204 * caller and add in any partial checksum. 205 */ 206 if ((offset > dp->db_cksumstart) || 207 mp->b_wptr != (uchar_t *)(mp->b_rptr + 208 dp->db_cksumend)) { 209 /* 210 * Mblk data pointers aren't inclusive 211 * of uio data, so disregard checksum. 212 * 213 * not using all of data in dblk make sure 214 * not use to use the precalculated checksum 215 * in this case. 216 */ 217 dp->db_struioflag &= ~STRUIO_IP; 218 goto snorm; 219 } 220 ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend)); 221 if ((mlen = dp->db_cksumstart - offset) < 0) 222 mlen = 0; 223 if (mlen && dp->db_cksumstart != dp->db_cksumstuff) { 224 /* 225 * There is prefix data too do and some 226 * uio data has already been checksumed, 227 * so do the prefix data only this trip. 228 */ 229 pmp = mp; 230 } else { 231 /* 232 * Add in any partial cksum (if any) and 233 * do the remainder of the uio data. 234 */ 235 int odd; 236 douio: 237 odd = is_odd(dp->db_cksumstuff - 238 dp->db_cksumstart); 239 if (pmlen == -1) { 240 /* 241 * Previous mlen was odd, so swap 242 * the partial checksum bytes. 243 */ 244 sum += ((psum << 8) & 0xffff) 245 | (psum >> 8); 246 if (odd) 247 pmlen = 0; 248 } else { 249 sum += psum; 250 if (odd) 251 pmlen = -1; 252 } 253 if (dp->db_cksumend != dp->db_cksumstuff) { 254 /* 255 * If prefix data to do and then all 256 * the uio data nees to be checksumed, 257 * else just do any uio data. 258 */ 259 if (mlen) 260 mlen += dp->db_cksumend 261 - dp->db_cksumstuff; 262 else { 263 w = (ushort_t *)(mp->b_rptr + 264 dp->db_cksumstuff); 265 mlen = dp->db_cksumend - 266 dp->db_cksumstuff; 267 } 268 } 269 } 270 } else { 271 /* 272 * Checksum all of the mblk data. 273 */ 274 snorm: 275 mlen = mp->b_wptr - (uchar_t *)w; 276 } 277 278 mp = mp->b_cont; 279 if (mlen > 0 && pmlen == -1) { 280 /* 281 * There is a byte left from the last 282 * segment; add it into the checksum. 283 * Don't have to worry about a carry- 284 * out here because we make sure that 285 * high part of (32 bit) sum is small 286 * below. 287 */ 288 #ifdef _LITTLE_ENDIAN 289 sum += *(uchar_t *)w << 8; 290 #else 291 sum += *(uchar_t *)w; 292 #endif 293 w = (ushort_t *)((char *)w + 1); 294 mlen--; 295 pmlen = 0; 296 } 297 if (mlen > 0) { 298 if (is_even(w)) { 299 sum = ip_ocsum(w, mlen>>1, sum); 300 w += mlen>>1; 301 /* 302 * If we had an odd number of bytes, 303 * then the last byte goes in the high 304 * part of the sum, and we take the 305 * first byte to the low part of the sum 306 * the next time around the loop. 307 */ 308 if (is_odd(mlen)) { 309 #ifdef _LITTLE_ENDIAN 310 sum += *(uchar_t *)w; 311 #else 312 sum += *(uchar_t *)w << 8; 313 #endif 314 pmlen = -1; 315 } 316 } else { 317 ushort_t swsum; 318 #ifdef _LITTLE_ENDIAN 319 sum += *(uchar_t *)w; 320 #else 321 sum += *(uchar_t *)w << 8; 322 #endif 323 mlen--; 324 w = (ushort_t *)(1 + (uintptr_t)w); 325 326 /* Do a separate checksum and copy operation */ 327 swsum = ip_ocsum(w, mlen>>1, 0); 328 sum += ((swsum << 8) & 0xffff) | (swsum >> 8); 329 w += mlen>>1; 330 /* 331 * If we had an even number of bytes, 332 * then the last byte goes in the low 333 * part of the sum. Otherwise we had an 334 * odd number of bytes and we take the first 335 * byte to the low part of the sum the 336 * next time around the loop. 337 */ 338 if (is_odd(mlen)) { 339 #ifdef _LITTLE_ENDIAN 340 sum += *(uchar_t *)w << 8; 341 #else 342 sum += *(uchar_t *)w; 343 #endif 344 } 345 else 346 pmlen = -1; 347 } 348 } 349 /* 350 * Locate the next block with some data. 351 * If there is a word split across a boundary we 352 * will wrap to the top with mlen == -1 and 353 * then add it in shifted appropriately. 354 */ 355 offset = 0; 356 if (! pmp) { 357 for (; ; ) { 358 if (mp == 0) { 359 goto done; 360 } 361 if (mp_len(mp)) 362 break; 363 mp = mp->b_cont; 364 } 365 dp = mp->b_datap; 366 if (dp->db_struioflag & STRUIO_IP) 367 psum = *(ushort_t *)dp->db_struioun.data; 368 } else 369 mp = pmp; 370 } 371 done: 372 /* 373 * Add together high and low parts of sum 374 * and carry to get cksum. 375 * Have to be careful to not drop the last 376 * carry here. 377 */ 378 sum = (sum & 0xFFFF) + (sum >> 16); 379 sum = (sum & 0xFFFF) + (sum >> 16); 380 TRACE_3(TR_FAC_IP, TR_IP_CKSUM_END, 381 "ip_cksum_end:(%S) type %d (%X)", "ip_cksum", 1, sum); 382 return (sum); 383 } 384 385 uint32_t 386 sctp_cksum(mblk_t *mp, int offset) 387 { 388 uint32_t crc32; 389 uchar_t *p = NULL; 390 391 crc32 = 0xFFFFFFFF; 392 p = mp->b_rptr + offset; 393 crc32 = sctp_crc32(crc32, p, mp->b_wptr - p); 394 for (mp = mp->b_cont; mp != NULL; mp = mp->b_cont) { 395 crc32 = sctp_crc32(crc32, mp->b_rptr, MBLKL(mp)); 396 } 397 398 /* Complement the result */ 399 crc32 = ~crc32; 400 401 return (crc32); 402 } 403 404 /* 405 * Routine to compute Internet checksum (16-bit 1's complement) of a given 406 * Multidata packet descriptor. As in the non-Multidata routine, this doesn't 407 * 1's complement the result, such that it may be used to compute partial 408 * checksums. Since it works on buffer spans rather than mblks, this routine 409 * does not handle existing partial checksum value as in the STRUIO_IP special 410 * mblk case (supporting this is rather trivial, but is perhaps of no use at 411 * the moment unless synchronous streams and delayed checksum calculation are 412 * revived.) 413 * 414 * Note also here that the given Multidata packet descriptor must refer to 415 * a header buffer, i.e. it must have a header fragment. In addition, the 416 * offset must lie within the boundary of the header fragment. For the 417 * outbound tcp (MDT) case, this will not be an issue because the stack 418 * ensures that such conditions are met, and that there is no need whatsoever 419 * to compute partial checksums on an arbitrary offset that is not part of 420 * the header fragment. We may need to revisit this routine to handle all 421 * cases of the inbound (MDR) case, especially when we need to perform partial 422 * checksum calculation due to padded bytes (non-zeroes) in the frame. 423 */ 424 uint_t 425 ip_md_cksum(pdesc_t *pd, int offset, uint_t sum) 426 { 427 pdescinfo_t *pdi = &pd->pd_pdi; 428 uchar_t *reg_start, *reg_end; 429 ssize_t mlen, i; 430 ushort_t *w; 431 boolean_t byteleft = B_FALSE; 432 433 ASSERT((pdi->flags & PDESC_HAS_REF) != 0); 434 ASSERT(pdi->hdr_rptr != NULL && pdi->hdr_wptr != NULL); 435 ASSERT(offset <= PDESC_HDRL(pdi)); 436 437 for (i = 0; i < pdi->pld_cnt + 1; i++) { 438 if (i == 0) { 439 reg_start = pdi->hdr_rptr; 440 reg_end = pdi->hdr_wptr; 441 } else { 442 reg_start = pdi->pld_ary[i - 1].pld_rptr; 443 reg_end = pdi->pld_ary[i - 1].pld_wptr; 444 offset = 0; 445 } 446 447 w = (ushort_t *)(reg_start + offset); 448 mlen = reg_end - (uchar_t *)w; 449 450 if (mlen > 0 && byteleft) { 451 /* 452 * There is a byte left from the last 453 * segment; add it into the checksum. 454 * Don't have to worry about a carry- 455 * out here because we make sure that 456 * high part of (32 bit) sum is small 457 * below. 458 */ 459 #ifdef _LITTLE_ENDIAN 460 sum += *(uchar_t *)w << 8; 461 #else 462 sum += *(uchar_t *)w; 463 #endif 464 w = (ushort_t *)((char *)w + 1); 465 mlen--; 466 byteleft = B_FALSE; 467 } 468 469 if (mlen == 0) 470 continue; 471 472 if (is_even(w)) { 473 sum = ip_ocsum(w, mlen >> 1, sum); 474 w += mlen >> 1; 475 /* 476 * If we had an odd number of bytes, 477 * then the last byte goes in the high 478 * part of the sum, and we take the 479 * first byte to the low part of the sum 480 * the next time around the loop. 481 */ 482 if (is_odd(mlen)) { 483 #ifdef _LITTLE_ENDIAN 484 sum += *(uchar_t *)w; 485 #else 486 sum += *(uchar_t *)w << 8; 487 #endif 488 byteleft = B_TRUE; 489 } 490 } else { 491 ushort_t swsum; 492 #ifdef _LITTLE_ENDIAN 493 sum += *(uchar_t *)w; 494 #else 495 sum += *(uchar_t *)w << 8; 496 #endif 497 mlen--; 498 w = (ushort_t *)(1 + (uintptr_t)w); 499 500 /* Do a separate checksum and copy operation */ 501 swsum = ip_ocsum(w, mlen >> 1, 0); 502 sum += ((swsum << 8) & 0xffff) | (swsum >> 8); 503 w += mlen >> 1; 504 /* 505 * If we had an even number of bytes, 506 * then the last byte goes in the low 507 * part of the sum. Otherwise we had an 508 * odd number of bytes and we take the first 509 * byte to the low part of the sum the 510 * next time around the loop. 511 */ 512 if (is_odd(mlen)) { 513 #ifdef _LITTLE_ENDIAN 514 sum += *(uchar_t *)w << 8; 515 #else 516 sum += *(uchar_t *)w; 517 #endif 518 } else { 519 byteleft = B_TRUE; 520 } 521 } 522 } 523 524 /* 525 * Add together high and low parts of sum and carry to get cksum. 526 * Have to be careful to not drop the last carry here. 527 */ 528 sum = (sum & 0xffff) + (sum >> 16); 529 sum = (sum & 0xffff) + (sum >> 16); 530 531 return (sum); 532 } 533 534 /* Return the IP checksum for the IP header at "iph". */ 535 uint16_t 536 ip_csum_hdr(ipha_t *ipha) 537 { 538 uint16_t *uph; 539 uint32_t sum; 540 int opt_len; 541 542 opt_len = (ipha->ipha_version_and_hdr_length & 0xF) - 543 IP_SIMPLE_HDR_LENGTH_IN_WORDS; 544 uph = (uint16_t *)ipha; 545 sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] + 546 uph[5] + uph[6] + uph[7] + uph[8] + uph[9]; 547 if (opt_len > 0) { 548 do { 549 sum += uph[10]; 550 sum += uph[11]; 551 uph += 2; 552 } while (--opt_len); 553 } 554 sum = (sum & 0xFFFF) + (sum >> 16); 555 sum = ~(sum + (sum >> 16)) & 0xFFFF; 556 if (sum == 0xffff) 557 sum = 0; 558 return ((uint16_t)sum); 559 } 560 561 /* 562 * This function takes an mblk and IPv6 header as input and returns 563 * three pieces of information. 564 * 565 * 'hdr_length_ptr': The IPv6 header length including extension headers. 566 * 567 * 'nethdrpp': A pointer to the "next hedader" value, aka the 568 * transport header. This argument may be set to NULL if 569 * only the length is desired. 570 * 571 * return: Whether or not the header was malformed. 572 * 573 * This function assumes the IPv6 header along with all extensions are 574 * contained solely in this mblk: i.e., there is no b_cont walking. 575 */ 576 boolean_t 577 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 578 uint8_t **nexthdrpp) 579 { 580 uint16_t length; 581 uint_t ehdrlen; 582 uint8_t *nexthdrp; 583 uint8_t *whereptr; 584 uint8_t *endptr; 585 ip6_dest_t *desthdr; 586 ip6_rthdr_t *rthdr; 587 ip6_frag_t *fraghdr; 588 589 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) 590 return (B_FALSE); 591 length = IPV6_HDR_LEN; 592 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 593 endptr = mp->b_wptr; 594 595 nexthdrp = &ip6h->ip6_nxt; 596 while (whereptr < endptr) { 597 /* Is there enough left for len + nexthdr? */ 598 if (whereptr + MIN_EHDR_LEN > endptr) 599 break; 600 601 switch (*nexthdrp) { 602 case IPPROTO_HOPOPTS: 603 case IPPROTO_DSTOPTS: 604 /* Assumes the headers are identical for hbh and dst */ 605 desthdr = (ip6_dest_t *)whereptr; 606 ehdrlen = 8 * (desthdr->ip6d_len + 1); 607 if ((uchar_t *)desthdr + ehdrlen > endptr) 608 return (B_FALSE); 609 nexthdrp = &desthdr->ip6d_nxt; 610 break; 611 case IPPROTO_ROUTING: 612 rthdr = (ip6_rthdr_t *)whereptr; 613 ehdrlen = 8 * (rthdr->ip6r_len + 1); 614 if ((uchar_t *)rthdr + ehdrlen > endptr) 615 return (B_FALSE); 616 nexthdrp = &rthdr->ip6r_nxt; 617 break; 618 case IPPROTO_FRAGMENT: 619 fraghdr = (ip6_frag_t *)whereptr; 620 ehdrlen = sizeof (ip6_frag_t); 621 if ((uchar_t *)&fraghdr[1] > endptr) 622 return (B_FALSE); 623 nexthdrp = &fraghdr->ip6f_nxt; 624 break; 625 case IPPROTO_NONE: 626 /* No next header means we're finished */ 627 default: 628 *hdr_length_ptr = length; 629 630 if (nexthdrpp != NULL) 631 *nexthdrpp = nexthdrp; 632 633 return (B_TRUE); 634 } 635 length += ehdrlen; 636 whereptr += ehdrlen; 637 *hdr_length_ptr = length; 638 639 if (nexthdrpp != NULL) 640 *nexthdrpp = nexthdrp; 641 } 642 switch (*nexthdrp) { 643 case IPPROTO_HOPOPTS: 644 case IPPROTO_DSTOPTS: 645 case IPPROTO_ROUTING: 646 case IPPROTO_FRAGMENT: 647 /* 648 * If any know extension headers are still to be processed, 649 * the packet's malformed (or at least all the IP header(s) are 650 * not in the same mblk - and that should never happen. 651 */ 652 return (B_FALSE); 653 654 default: 655 /* 656 * If we get here, we know that all of the IP headers were in 657 * the same mblk, even if the ULP header is in the next mblk. 658 */ 659 *hdr_length_ptr = length; 660 661 if (nexthdrpp != NULL) 662 *nexthdrpp = nexthdrp; 663 664 return (B_TRUE); 665 } 666 } 667