1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2019 Joyent, Inc. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #include <sys/types.h> 29 #include <sys/inttypes.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/strsun.h> 33 #include <sys/debug.h> 34 #include <sys/ddi.h> 35 #include <sys/vtrace.h> 36 #include <inet/sctp_crc32.h> 37 #include <inet/ip.h> 38 #include <inet/ip6.h> 39 40 #include <sys/multidata.h> 41 #include <sys/multidata_impl.h> 42 43 extern unsigned int ip_ocsum(ushort_t *address, int halfword_count, 44 unsigned int sum); 45 46 /* 47 * Checksum routine for Internet Protocol family headers. 48 * This routine is very heavily used in the network 49 * code and should be modified for each CPU to be as fast as possible. 50 */ 51 52 #define mp_len(mp) ((mp)->b_wptr - (mp)->b_rptr) 53 54 /* 55 * Even/Odd checks. Usually it is performed on pointers but may be 56 * used on integers as well. uintptr_t is long enough to hold both 57 * integer and pointer. 58 */ 59 #define is_odd(p) (((uintptr_t)(p) & 0x1) != 0) 60 #define is_even(p) (!is_odd(p)) 61 62 63 #ifdef ZC_TEST 64 /* 65 * Disable the TCP s/w cksum. 66 * XXX - This is just a hack for testing purpose. Don't use it for 67 * anything else! 68 */ 69 int noswcksum = 0; 70 #endif 71 /* 72 * Note: this does not ones-complement the result since it is used 73 * when computing partial checksums. 74 * For nonSTRUIO_IP mblks, assumes mp->b_rptr+offset is 16 bit aligned. 75 * For STRUIO_IP mblks, assumes mp->b_datap->db_struiobase is 16 bit aligned. 76 * 77 * Note: for STRUIO_IP special mblks some data may have been previously 78 * checksumed, this routine will handle additional data prefixed within 79 * an mblk or b_cont (chained) mblk(s). This routine will also handle 80 * suffixed b_cont mblk(s) and data suffixed within an mblk. 81 */ 82 unsigned int 83 ip_cksum(mblk_t *mp, int offset, uint_t sum) 84 { 85 ushort_t *w; 86 ssize_t mlen; 87 int pmlen; 88 mblk_t *pmp; 89 dblk_t *dp = mp->b_datap; 90 ushort_t psum = 0; 91 92 #ifdef ZC_TEST 93 if (noswcksum) 94 return (0xffff); 95 #endif 96 ASSERT(dp); 97 98 if (mp->b_cont == NULL) { 99 /* 100 * May be fast-path, only one mblk. 101 */ 102 w = (ushort_t *)(mp->b_rptr + offset); 103 if (dp->db_struioflag & STRUIO_IP) { 104 /* 105 * Checksum any data not already done by 106 * the caller and add in any partial checksum. 107 */ 108 if ((offset > dp->db_cksumstart) || 109 mp->b_wptr != (uchar_t *)(mp->b_rptr + 110 dp->db_cksumend)) { 111 /* 112 * Mblk data pointers aren't inclusive 113 * of uio data, so disregard checksum. 114 * 115 * not using all of data in dblk make sure 116 * not use to use the precalculated checksum 117 * in this case. 118 */ 119 dp->db_struioflag &= ~STRUIO_IP; 120 goto norm; 121 } 122 ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend)); 123 psum = *(ushort_t *)dp->db_struioun.data; 124 if ((mlen = dp->db_cksumstart - offset) < 0) 125 mlen = 0; 126 if (is_odd(mlen)) 127 goto slow; 128 if (mlen && dp->db_cksumstart != dp->db_cksumstuff && 129 dp->db_cksumend != dp->db_cksumstuff) { 130 /* 131 * There is prefix data to do and some uio 132 * data has already been checksumed and there 133 * is more uio data to do, so do the prefix 134 * data first, then do the remainder of the 135 * uio data. 136 */ 137 sum = ip_ocsum(w, mlen >> 1, sum); 138 w = (ushort_t *)(mp->b_rptr + 139 dp->db_cksumstuff); 140 if (is_odd(w)) { 141 pmp = mp; 142 goto slow1; 143 } 144 mlen = dp->db_cksumend - dp->db_cksumstuff; 145 } else if (dp->db_cksumend != dp->db_cksumstuff) { 146 /* 147 * There may be uio data to do, if there is 148 * prefix data to do then add in all of the 149 * uio data (if any) to do, else just do any 150 * uio data. 151 */ 152 if (mlen) 153 mlen += dp->db_cksumend 154 - dp->db_cksumstuff; 155 else { 156 w = (ushort_t *)(mp->b_rptr + 157 dp->db_cksumstuff); 158 if (is_odd(w)) 159 goto slow; 160 mlen = dp->db_cksumend 161 - dp->db_cksumstuff; 162 } 163 } else if (mlen == 0) 164 return (psum); 165 166 if (is_odd(mlen)) 167 goto slow; 168 sum += psum; 169 } else { 170 /* 171 * Checksum all data not already done by the caller. 172 */ 173 norm: 174 mlen = mp->b_wptr - (uchar_t *)w; 175 if (is_odd(mlen)) 176 goto slow; 177 } 178 ASSERT(is_even(w)); 179 ASSERT(is_even(mlen)); 180 return (ip_ocsum(w, mlen >> 1, sum)); 181 } 182 if (dp->db_struioflag & STRUIO_IP) 183 psum = *(ushort_t *)dp->db_struioun.data; 184 slow: 185 pmp = 0; 186 slow1: 187 mlen = 0; 188 pmlen = 0; 189 for (; ; ) { 190 /* 191 * Each trip around loop adds in word(s) from one mbuf segment 192 * (except for when pmp == mp, then its two partial trips). 193 */ 194 w = (ushort_t *)(mp->b_rptr + offset); 195 if (pmp) { 196 /* 197 * This is the second trip around for this mblk. 198 */ 199 pmp = 0; 200 mlen = 0; 201 goto douio; 202 } else if (dp->db_struioflag & STRUIO_IP) { 203 /* 204 * Checksum any data not already done by the 205 * caller and add in any partial checksum. 206 */ 207 if ((offset > dp->db_cksumstart) || 208 mp->b_wptr != (uchar_t *)(mp->b_rptr + 209 dp->db_cksumend)) { 210 /* 211 * Mblk data pointers aren't inclusive 212 * of uio data, so disregard checksum. 213 * 214 * not using all of data in dblk make sure 215 * not use to use the precalculated checksum 216 * in this case. 217 */ 218 dp->db_struioflag &= ~STRUIO_IP; 219 goto snorm; 220 } 221 ASSERT(mp->b_wptr == (mp->b_rptr + dp->db_cksumend)); 222 if ((mlen = dp->db_cksumstart - offset) < 0) 223 mlen = 0; 224 if (mlen && dp->db_cksumstart != dp->db_cksumstuff) { 225 /* 226 * There is prefix data too do and some 227 * uio data has already been checksumed, 228 * so do the prefix data only this trip. 229 */ 230 pmp = mp; 231 } else { 232 /* 233 * Add in any partial cksum (if any) and 234 * do the remainder of the uio data. 235 */ 236 int odd; 237 douio: 238 odd = is_odd(dp->db_cksumstuff - 239 dp->db_cksumstart); 240 if (pmlen == -1) { 241 /* 242 * Previous mlen was odd, so swap 243 * the partial checksum bytes. 244 */ 245 sum += ((psum << 8) & 0xffff) 246 | (psum >> 8); 247 if (odd) 248 pmlen = 0; 249 } else { 250 sum += psum; 251 if (odd) 252 pmlen = -1; 253 } 254 if (dp->db_cksumend != dp->db_cksumstuff) { 255 /* 256 * If prefix data to do and then all 257 * the uio data nees to be checksumed, 258 * else just do any uio data. 259 */ 260 if (mlen) 261 mlen += dp->db_cksumend 262 - dp->db_cksumstuff; 263 else { 264 w = (ushort_t *)(mp->b_rptr + 265 dp->db_cksumstuff); 266 mlen = dp->db_cksumend - 267 dp->db_cksumstuff; 268 } 269 } 270 } 271 } else { 272 /* 273 * Checksum all of the mblk data. 274 */ 275 snorm: 276 mlen = mp->b_wptr - (uchar_t *)w; 277 } 278 279 mp = mp->b_cont; 280 if (mlen > 0 && pmlen == -1) { 281 /* 282 * There is a byte left from the last 283 * segment; add it into the checksum. 284 * Don't have to worry about a carry- 285 * out here because we make sure that 286 * high part of (32 bit) sum is small 287 * below. 288 */ 289 #ifdef _LITTLE_ENDIAN 290 sum += *(uchar_t *)w << 8; 291 #else 292 sum += *(uchar_t *)w; 293 #endif 294 w = (ushort_t *)((char *)w + 1); 295 mlen--; 296 pmlen = 0; 297 } 298 if (mlen > 0) { 299 if (is_even(w)) { 300 sum = ip_ocsum(w, mlen>>1, sum); 301 w += mlen>>1; 302 /* 303 * If we had an odd number of bytes, 304 * then the last byte goes in the high 305 * part of the sum, and we take the 306 * first byte to the low part of the sum 307 * the next time around the loop. 308 */ 309 if (is_odd(mlen)) { 310 #ifdef _LITTLE_ENDIAN 311 sum += *(uchar_t *)w; 312 #else 313 sum += *(uchar_t *)w << 8; 314 #endif 315 pmlen = -1; 316 } 317 } else { 318 ushort_t swsum; 319 #ifdef _LITTLE_ENDIAN 320 sum += *(uchar_t *)w; 321 #else 322 sum += *(uchar_t *)w << 8; 323 #endif 324 mlen--; 325 w = (ushort_t *)(1 + (uintptr_t)w); 326 327 /* Do a separate checksum and copy operation */ 328 swsum = ip_ocsum(w, mlen>>1, 0); 329 sum += ((swsum << 8) & 0xffff) | (swsum >> 8); 330 w += mlen>>1; 331 /* 332 * If we had an even number of bytes, 333 * then the last byte goes in the low 334 * part of the sum. Otherwise we had an 335 * odd number of bytes and we take the first 336 * byte to the low part of the sum the 337 * next time around the loop. 338 */ 339 if (is_odd(mlen)) { 340 #ifdef _LITTLE_ENDIAN 341 sum += *(uchar_t *)w << 8; 342 #else 343 sum += *(uchar_t *)w; 344 #endif 345 } 346 else 347 pmlen = -1; 348 } 349 } 350 /* 351 * Locate the next block with some data. 352 * If there is a word split across a boundary we 353 * will wrap to the top with mlen == -1 and 354 * then add it in shifted appropriately. 355 */ 356 offset = 0; 357 if (! pmp) { 358 for (; ; ) { 359 if (mp == 0) { 360 goto done; 361 } 362 if (mp_len(mp)) 363 break; 364 mp = mp->b_cont; 365 } 366 dp = mp->b_datap; 367 if (dp->db_struioflag & STRUIO_IP) 368 psum = *(ushort_t *)dp->db_struioun.data; 369 } else 370 mp = pmp; 371 } 372 done: 373 /* 374 * Add together high and low parts of sum 375 * and carry to get cksum. 376 * Have to be careful to not drop the last 377 * carry here. 378 */ 379 sum = (sum & 0xFFFF) + (sum >> 16); 380 sum = (sum & 0xFFFF) + (sum >> 16); 381 TRACE_3(TR_FAC_IP, TR_IP_CKSUM_END, 382 "ip_cksum_end:(%S) type %d (%X)", "ip_cksum", 1, sum); 383 return (sum); 384 } 385 386 uint32_t 387 sctp_cksum(mblk_t *mp, int offset) 388 { 389 uint32_t crc32; 390 uchar_t *p = NULL; 391 392 crc32 = 0xFFFFFFFF; 393 p = mp->b_rptr + offset; 394 crc32 = sctp_crc32(crc32, p, mp->b_wptr - p); 395 for (mp = mp->b_cont; mp != NULL; mp = mp->b_cont) { 396 crc32 = sctp_crc32(crc32, mp->b_rptr, MBLKL(mp)); 397 } 398 399 /* Complement the result */ 400 crc32 = ~crc32; 401 402 return (crc32); 403 } 404 405 /* 406 * Routine to compute Internet checksum (16-bit 1's complement) of a given 407 * Multidata packet descriptor. As in the non-Multidata routine, this doesn't 408 * 1's complement the result, such that it may be used to compute partial 409 * checksums. Since it works on buffer spans rather than mblks, this routine 410 * does not handle existing partial checksum value as in the STRUIO_IP special 411 * mblk case (supporting this is rather trivial, but is perhaps of no use at 412 * the moment unless synchronous streams and delayed checksum calculation are 413 * revived.) 414 * 415 * Note also here that the given Multidata packet descriptor must refer to 416 * a header buffer, i.e. it must have a header fragment. In addition, the 417 * offset must lie within the boundary of the header fragment. For the 418 * outbound tcp (MDT) case, this will not be an issue because the stack 419 * ensures that such conditions are met, and that there is no need whatsoever 420 * to compute partial checksums on an arbitrary offset that is not part of 421 * the header fragment. We may need to revisit this routine to handle all 422 * cases of the inbound (MDR) case, especially when we need to perform partial 423 * checksum calculation due to padded bytes (non-zeroes) in the frame. 424 */ 425 uint_t 426 ip_md_cksum(pdesc_t *pd, int offset, uint_t sum) 427 { 428 pdescinfo_t *pdi = &pd->pd_pdi; 429 uchar_t *reg_start, *reg_end; 430 ssize_t mlen, i; 431 ushort_t *w; 432 boolean_t byteleft = B_FALSE; 433 434 ASSERT((pdi->flags & PDESC_HAS_REF) != 0); 435 ASSERT(pdi->hdr_rptr != NULL && pdi->hdr_wptr != NULL); 436 ASSERT(offset <= PDESC_HDRL(pdi)); 437 438 for (i = 0; i < pdi->pld_cnt + 1; i++) { 439 if (i == 0) { 440 reg_start = pdi->hdr_rptr; 441 reg_end = pdi->hdr_wptr; 442 } else { 443 reg_start = pdi->pld_ary[i - 1].pld_rptr; 444 reg_end = pdi->pld_ary[i - 1].pld_wptr; 445 offset = 0; 446 } 447 448 w = (ushort_t *)(reg_start + offset); 449 mlen = reg_end - (uchar_t *)w; 450 451 if (mlen > 0 && byteleft) { 452 /* 453 * There is a byte left from the last 454 * segment; add it into the checksum. 455 * Don't have to worry about a carry- 456 * out here because we make sure that 457 * high part of (32 bit) sum is small 458 * below. 459 */ 460 #ifdef _LITTLE_ENDIAN 461 sum += *(uchar_t *)w << 8; 462 #else 463 sum += *(uchar_t *)w; 464 #endif 465 w = (ushort_t *)((char *)w + 1); 466 mlen--; 467 byteleft = B_FALSE; 468 } 469 470 if (mlen == 0) 471 continue; 472 473 if (is_even(w)) { 474 sum = ip_ocsum(w, mlen >> 1, sum); 475 w += mlen >> 1; 476 /* 477 * If we had an odd number of bytes, 478 * then the last byte goes in the high 479 * part of the sum, and we take the 480 * first byte to the low part of the sum 481 * the next time around the loop. 482 */ 483 if (is_odd(mlen)) { 484 #ifdef _LITTLE_ENDIAN 485 sum += *(uchar_t *)w; 486 #else 487 sum += *(uchar_t *)w << 8; 488 #endif 489 byteleft = B_TRUE; 490 } 491 } else { 492 ushort_t swsum; 493 #ifdef _LITTLE_ENDIAN 494 sum += *(uchar_t *)w; 495 #else 496 sum += *(uchar_t *)w << 8; 497 #endif 498 mlen--; 499 w = (ushort_t *)(1 + (uintptr_t)w); 500 501 /* Do a separate checksum and copy operation */ 502 swsum = ip_ocsum(w, mlen >> 1, 0); 503 sum += ((swsum << 8) & 0xffff) | (swsum >> 8); 504 w += mlen >> 1; 505 /* 506 * If we had an even number of bytes, 507 * then the last byte goes in the low 508 * part of the sum. Otherwise we had an 509 * odd number of bytes and we take the first 510 * byte to the low part of the sum the 511 * next time around the loop. 512 */ 513 if (is_odd(mlen)) { 514 #ifdef _LITTLE_ENDIAN 515 sum += *(uchar_t *)w << 8; 516 #else 517 sum += *(uchar_t *)w; 518 #endif 519 } else { 520 byteleft = B_TRUE; 521 } 522 } 523 } 524 525 /* 526 * Add together high and low parts of sum and carry to get cksum. 527 * Have to be careful to not drop the last carry here. 528 */ 529 sum = (sum & 0xffff) + (sum >> 16); 530 sum = (sum & 0xffff) + (sum >> 16); 531 532 return (sum); 533 } 534 535 /* Return the IP checksum for the IP header at "iph". */ 536 uint16_t 537 ip_csum_hdr(ipha_t *ipha) 538 { 539 uint16_t *uph; 540 uint32_t sum; 541 int opt_len; 542 543 opt_len = (ipha->ipha_version_and_hdr_length & 0xF) - 544 IP_SIMPLE_HDR_LENGTH_IN_WORDS; 545 uph = (uint16_t *)ipha; 546 sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] + 547 uph[5] + uph[6] + uph[7] + uph[8] + uph[9]; 548 if (opt_len > 0) { 549 do { 550 sum += uph[10]; 551 sum += uph[11]; 552 uph += 2; 553 } while (--opt_len); 554 } 555 sum = (sum & 0xFFFF) + (sum >> 16); 556 sum = ~(sum + (sum >> 16)) & 0xFFFF; 557 if (sum == 0xffff) 558 sum = 0; 559 return ((uint16_t)sum); 560 } 561 562 /* 563 * This function takes an mblk and IPv6 header as input and returns 564 * three pieces of information. 565 * 566 * 'hdr_length_ptr': The IPv6 header length including extension headers. 567 * 568 * 'nethdrpp': A pointer to the "next hedader" value, aka the 569 * transport header. This argument may be set to NULL if 570 * only the length is desired. 571 * 572 * return: Whether or not the header was malformed. 573 * 574 * This function assumes the IPv6 header along with all extensions are 575 * contained solely in this mblk: i.e., there is no b_cont walking. 576 */ 577 boolean_t 578 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 579 uint8_t **nexthdrpp) 580 { 581 uint16_t length; 582 uint_t ehdrlen; 583 uint8_t *nexthdrp; 584 uint8_t *whereptr; 585 uint8_t *endptr; 586 ip6_dest_t *desthdr; 587 ip6_rthdr_t *rthdr; 588 ip6_frag_t *fraghdr; 589 590 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 591 length = IPV6_HDR_LEN; 592 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 593 endptr = mp->b_wptr; 594 595 nexthdrp = &ip6h->ip6_nxt; 596 while (whereptr < endptr) { 597 /* Is there enough left for len + nexthdr? */ 598 if (whereptr + MIN_EHDR_LEN > endptr) 599 break; 600 601 switch (*nexthdrp) { 602 case IPPROTO_HOPOPTS: 603 case IPPROTO_DSTOPTS: 604 /* Assumes the headers are identical for hbh and dst */ 605 desthdr = (ip6_dest_t *)whereptr; 606 ehdrlen = 8 * (desthdr->ip6d_len + 1); 607 if ((uchar_t *)desthdr + ehdrlen > endptr) 608 return (B_FALSE); 609 nexthdrp = &desthdr->ip6d_nxt; 610 break; 611 case IPPROTO_ROUTING: 612 rthdr = (ip6_rthdr_t *)whereptr; 613 ehdrlen = 8 * (rthdr->ip6r_len + 1); 614 if ((uchar_t *)rthdr + ehdrlen > endptr) 615 return (B_FALSE); 616 nexthdrp = &rthdr->ip6r_nxt; 617 break; 618 case IPPROTO_FRAGMENT: 619 fraghdr = (ip6_frag_t *)whereptr; 620 ehdrlen = sizeof (ip6_frag_t); 621 if ((uchar_t *)&fraghdr[1] > endptr) 622 return (B_FALSE); 623 nexthdrp = &fraghdr->ip6f_nxt; 624 break; 625 case IPPROTO_NONE: 626 /* No next header means we're finished */ 627 default: 628 *hdr_length_ptr = length; 629 630 if (nexthdrpp != NULL) 631 *nexthdrpp = nexthdrp; 632 633 return (B_TRUE); 634 } 635 length += ehdrlen; 636 whereptr += ehdrlen; 637 *hdr_length_ptr = length; 638 639 if (nexthdrpp != NULL) 640 *nexthdrpp = nexthdrp; 641 } 642 switch (*nexthdrp) { 643 case IPPROTO_HOPOPTS: 644 case IPPROTO_DSTOPTS: 645 case IPPROTO_ROUTING: 646 case IPPROTO_FRAGMENT: 647 /* 648 * If any know extension headers are still to be processed, 649 * the packet's malformed (or at least all the IP header(s) are 650 * not in the same mblk - and that should never happen. 651 */ 652 return (B_FALSE); 653 654 default: 655 /* 656 * If we get here, we know that all of the IP headers were in 657 * the same mblk, even if the ULP header is in the next mblk. 658 */ 659 *hdr_length_ptr = length; 660 661 if (nexthdrpp != NULL) 662 *nexthdrpp = nexthdrp; 663 664 return (B_TRUE); 665 } 666 } 667