1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * MAC Services Module - misc utilities 28 */ 29 30 #include <sys/types.h> 31 #include <sys/mac.h> 32 #include <sys/mac_impl.h> 33 #include <sys/mac_client_priv.h> 34 #include <sys/mac_client_impl.h> 35 #include <sys/mac_soft_ring.h> 36 #include <sys/strsubr.h> 37 #include <sys/strsun.h> 38 #include <sys/vlan.h> 39 #include <sys/pattr.h> 40 #include <sys/pci_tools.h> 41 #include <inet/ip.h> 42 #include <inet/ip_impl.h> 43 #include <inet/ip6.h> 44 #include <sys/vtrace.h> 45 #include <sys/dlpi.h> 46 #include <sys/sunndi.h> 47 #include <inet/ipsec_impl.h> 48 #include <inet/sadb.h> 49 #include <inet/ipsecesp.h> 50 #include <inet/ipsecah.h> 51 52 /* 53 * Copy an mblk, preserving its hardware checksum flags. 54 */ 55 static mblk_t * 56 mac_copymsg_cksum(mblk_t *mp) 57 { 58 mblk_t *mp1; 59 uint32_t start, stuff, end, value, flags; 60 61 mp1 = copymsg(mp); 62 if (mp1 == NULL) 63 return (NULL); 64 65 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 66 (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, 67 flags, KM_NOSLEEP); 68 69 return (mp1); 70 } 71 72 /* 73 * Copy an mblk chain, presenting the hardware checksum flags of the 74 * individual mblks. 75 */ 76 mblk_t * 77 mac_copymsgchain_cksum(mblk_t *mp) 78 { 79 mblk_t *nmp = NULL; 80 mblk_t **nmpp = &nmp; 81 82 for (; mp != NULL; mp = mp->b_next) { 83 if ((*nmpp = mac_copymsg_cksum(mp)) == NULL) { 84 freemsgchain(nmp); 85 return (NULL); 86 } 87 88 nmpp = &((*nmpp)->b_next); 89 } 90 91 return (nmp); 92 } 93 94 /* 95 * Process the specified mblk chain for proper handling of hardware 96 * checksum offload. This routine is invoked for loopback traffic 97 * between MAC clients. 98 * The function handles a NULL mblk chain passed as argument. 99 */ 100 mblk_t * 101 mac_fix_cksum(mblk_t *mp_chain) 102 { 103 mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; 104 uint32_t flags, start, stuff, end, value; 105 106 for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { 107 uint16_t len; 108 uint32_t offset; 109 struct ether_header *ehp; 110 uint16_t sap; 111 112 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, 113 &flags); 114 if (flags == 0) 115 continue; 116 117 /* 118 * Since the processing of checksum offload for loopback 119 * traffic requires modification of the packet contents, 120 * ensure sure that we are always modifying our own copy. 121 */ 122 if (DB_REF(mp) > 1) { 123 mp1 = copymsg(mp); 124 if (mp1 == NULL) 125 continue; 126 mp1->b_next = mp->b_next; 127 mp->b_next = NULL; 128 freemsg(mp); 129 if (prev != NULL) 130 prev->b_next = mp1; 131 else 132 new_chain = mp1; 133 mp = mp1; 134 } 135 136 /* 137 * Ethernet, and optionally VLAN header. 138 */ 139 /* LINTED: improper alignment cast */ 140 ehp = (struct ether_header *)mp->b_rptr; 141 if (ntohs(ehp->ether_type) == VLAN_TPID) { 142 struct ether_vlan_header *evhp; 143 144 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 145 /* LINTED: improper alignment cast */ 146 evhp = (struct ether_vlan_header *)mp->b_rptr; 147 sap = ntohs(evhp->ether_type); 148 offset = sizeof (struct ether_vlan_header); 149 } else { 150 sap = ntohs(ehp->ether_type); 151 offset = sizeof (struct ether_header); 152 } 153 154 if (MBLKL(mp) <= offset) { 155 offset -= MBLKL(mp); 156 if (mp->b_cont == NULL) { 157 /* corrupted packet, skip it */ 158 if (prev != NULL) 159 prev->b_next = mp->b_next; 160 else 161 new_chain = mp->b_next; 162 mp1 = mp->b_next; 163 mp->b_next = NULL; 164 freemsg(mp); 165 mp = mp1; 166 continue; 167 } 168 mp = mp->b_cont; 169 } 170 171 if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { 172 ipha_t *ipha = NULL; 173 174 /* 175 * In order to compute the full and header 176 * checksums, we need to find and parse 177 * the IP and/or ULP headers. 178 */ 179 180 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 181 182 /* 183 * IP header. 184 */ 185 if (sap != ETHERTYPE_IP) 186 continue; 187 188 ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); 189 /* LINTED: improper alignment cast */ 190 ipha = (ipha_t *)(mp->b_rptr + offset); 191 192 if (flags & HCK_FULLCKSUM) { 193 ipaddr_t src, dst; 194 uint32_t cksum; 195 uint16_t *up; 196 uint8_t proto; 197 198 /* 199 * Pointer to checksum field in ULP header. 200 */ 201 proto = ipha->ipha_protocol; 202 ASSERT(ipha->ipha_version_and_hdr_length == 203 IP_SIMPLE_HDR_VERSION); 204 205 switch (proto) { 206 case IPPROTO_TCP: 207 /* LINTED: improper alignment cast */ 208 up = IPH_TCPH_CHECKSUMP(ipha, 209 IP_SIMPLE_HDR_LENGTH); 210 break; 211 212 case IPPROTO_UDP: 213 /* LINTED: improper alignment cast */ 214 up = IPH_UDPH_CHECKSUMP(ipha, 215 IP_SIMPLE_HDR_LENGTH); 216 break; 217 218 default: 219 cmn_err(CE_WARN, "mac_fix_cksum: " 220 "unexpected protocol: %d", proto); 221 continue; 222 } 223 224 /* 225 * Pseudo-header checksum. 226 */ 227 src = ipha->ipha_src; 228 dst = ipha->ipha_dst; 229 len = ntohs(ipha->ipha_length) - 230 IP_SIMPLE_HDR_LENGTH; 231 232 cksum = (dst >> 16) + (dst & 0xFFFF) + 233 (src >> 16) + (src & 0xFFFF); 234 cksum += htons(len); 235 236 /* 237 * The checksum value stored in the packet needs 238 * to be correct. Compute it here. 239 */ 240 *up = 0; 241 cksum += (((proto) == IPPROTO_UDP) ? 242 IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); 243 cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + 244 offset, cksum); 245 *(up) = (uint16_t)(cksum ? cksum : ~cksum); 246 247 flags |= HCK_FULLCKSUM_OK; 248 value = 0xffff; 249 } 250 251 if (flags & HCK_IPV4_HDRCKSUM) { 252 ASSERT(ipha != NULL); 253 ipha->ipha_hdr_checksum = 254 (uint16_t)ip_csum_hdr(ipha); 255 } 256 } 257 258 if (flags & HCK_PARTIALCKSUM) { 259 uint16_t *up, partial, cksum; 260 uchar_t *ipp; /* ptr to beginning of IP header */ 261 262 if (mp->b_cont != NULL) { 263 mblk_t *mp1; 264 265 mp1 = msgpullup(mp, offset + end); 266 if (mp1 == NULL) 267 continue; 268 mp1->b_next = mp->b_next; 269 mp->b_next = NULL; 270 freemsg(mp); 271 if (prev != NULL) 272 prev->b_next = mp1; 273 else 274 new_chain = mp1; 275 mp = mp1; 276 } 277 278 ipp = mp->b_rptr + offset; 279 /* LINTED: cast may result in improper alignment */ 280 up = (uint16_t *)((uchar_t *)ipp + stuff); 281 partial = *up; 282 *up = 0; 283 284 cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, 285 end - start, partial); 286 cksum = ~cksum; 287 *up = cksum ? cksum : ~cksum; 288 289 /* 290 * Since we already computed the whole checksum, 291 * indicate to the stack that it has already 292 * been verified by the hardware. 293 */ 294 flags &= ~HCK_PARTIALCKSUM; 295 flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); 296 value = 0xffff; 297 } 298 299 (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, 300 value, flags, KM_NOSLEEP); 301 } 302 303 return (new_chain); 304 } 305 306 /* 307 * Add VLAN tag to the specified mblk. 308 */ 309 mblk_t * 310 mac_add_vlan_tag(mblk_t *mp, uint_t pri, uint16_t vid) 311 { 312 mblk_t *hmp; 313 struct ether_vlan_header *evhp; 314 struct ether_header *ehp; 315 uint32_t start, stuff, end, value, flags; 316 317 ASSERT(pri != 0 || vid != 0); 318 319 /* 320 * Allocate an mblk for the new tagged ethernet header, 321 * and copy the MAC addresses and ethertype from the 322 * original header. 323 */ 324 325 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 326 if (hmp == NULL) { 327 freemsg(mp); 328 return (NULL); 329 } 330 331 evhp = (struct ether_vlan_header *)hmp->b_rptr; 332 ehp = (struct ether_header *)mp->b_rptr; 333 334 bcopy(ehp, evhp, (ETHERADDRL * 2)); 335 evhp->ether_type = ehp->ether_type; 336 evhp->ether_tpid = htons(ETHERTYPE_VLAN); 337 338 hmp->b_wptr += sizeof (struct ether_vlan_header); 339 mp->b_rptr += sizeof (struct ether_header); 340 341 /* 342 * Free the original message if it's now empty. Link the 343 * rest of messages to the header message. 344 */ 345 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 346 (void) hcksum_assoc(hmp, NULL, NULL, start, stuff, end, value, flags, 347 KM_NOSLEEP); 348 if (MBLKL(mp) == 0) { 349 hmp->b_cont = mp->b_cont; 350 freeb(mp); 351 } else { 352 hmp->b_cont = mp; 353 } 354 ASSERT(MBLKL(hmp) >= sizeof (struct ether_vlan_header)); 355 356 /* 357 * Initialize the new TCI (Tag Control Information). 358 */ 359 evhp->ether_tci = htons(VLAN_TCI(pri, 0, vid)); 360 361 return (hmp); 362 } 363 364 /* 365 * Adds a VLAN tag with the specified VID and priority to each mblk of 366 * the specified chain. 367 */ 368 mblk_t * 369 mac_add_vlan_tag_chain(mblk_t *mp_chain, uint_t pri, uint16_t vid) 370 { 371 mblk_t *next_mp, **prev, *mp; 372 373 mp = mp_chain; 374 prev = &mp_chain; 375 376 while (mp != NULL) { 377 next_mp = mp->b_next; 378 mp->b_next = NULL; 379 if ((mp = mac_add_vlan_tag(mp, pri, vid)) == NULL) { 380 freemsgchain(next_mp); 381 break; 382 } 383 *prev = mp; 384 prev = &mp->b_next; 385 mp = mp->b_next = next_mp; 386 } 387 388 return (mp_chain); 389 } 390 391 /* 392 * Strip VLAN tag 393 */ 394 mblk_t * 395 mac_strip_vlan_tag(mblk_t *mp) 396 { 397 mblk_t *newmp; 398 struct ether_vlan_header *evhp; 399 400 evhp = (struct ether_vlan_header *)mp->b_rptr; 401 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 402 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 403 404 if (DB_REF(mp) > 1) { 405 newmp = copymsg(mp); 406 if (newmp == NULL) 407 return (NULL); 408 freemsg(mp); 409 mp = newmp; 410 } 411 412 evhp = (struct ether_vlan_header *)mp->b_rptr; 413 414 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 415 mp->b_rptr += VLAN_TAGSZ; 416 } 417 return (mp); 418 } 419 420 /* 421 * Strip VLAN tag from each mblk of the chain. 422 */ 423 mblk_t * 424 mac_strip_vlan_tag_chain(mblk_t *mp_chain) 425 { 426 mblk_t *mp, *next_mp, **prev; 427 428 mp = mp_chain; 429 prev = &mp_chain; 430 431 while (mp != NULL) { 432 next_mp = mp->b_next; 433 mp->b_next = NULL; 434 if ((mp = mac_strip_vlan_tag(mp)) == NULL) { 435 freemsgchain(next_mp); 436 break; 437 } 438 *prev = mp; 439 prev = &mp->b_next; 440 mp = mp->b_next = next_mp; 441 } 442 443 return (mp_chain); 444 } 445 446 /* 447 * Default callback function. Used when the datapath is not yet initialized. 448 */ 449 /* ARGSUSED */ 450 void 451 mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp, 452 boolean_t loopback) 453 { 454 mblk_t *mp1 = mp; 455 456 while (mp1 != NULL) { 457 mp1->b_prev = NULL; 458 mp1->b_queue = NULL; 459 mp1 = mp1->b_next; 460 } 461 freemsgchain(mp); 462 } 463 464 /* 465 * Determines the IPv6 header length accounting for all the optional IPv6 466 * headers (hop-by-hop, destination, routing and fragment). The header length 467 * and next header value (a transport header) is captured. 468 * 469 * Returns B_FALSE if all the IP headers are not in the same mblk otherwise 470 * returns B_TRUE. 471 */ 472 boolean_t 473 mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length, 474 uint8_t *next_hdr, boolean_t *ip_fragmented, uint32_t *ip_frag_ident) 475 { 476 uint16_t length; 477 uint_t ehdrlen; 478 uint8_t *whereptr; 479 uint8_t *endptr; 480 uint8_t *nexthdrp; 481 ip6_dest_t *desthdr; 482 ip6_rthdr_t *rthdr; 483 ip6_frag_t *fraghdr; 484 485 endptr = mp->b_wptr; 486 if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr) 487 return (B_FALSE); 488 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 489 length = IPV6_HDR_LEN; 490 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 491 492 if (ip_fragmented != NULL) 493 *ip_fragmented = B_FALSE; 494 495 nexthdrp = &ip6h->ip6_nxt; 496 while (whereptr < endptr) { 497 /* Is there enough left for len + nexthdr? */ 498 if (whereptr + MIN_EHDR_LEN > endptr) 499 break; 500 501 switch (*nexthdrp) { 502 case IPPROTO_HOPOPTS: 503 case IPPROTO_DSTOPTS: 504 /* Assumes the headers are identical for hbh and dst */ 505 desthdr = (ip6_dest_t *)whereptr; 506 ehdrlen = 8 * (desthdr->ip6d_len + 1); 507 if ((uchar_t *)desthdr + ehdrlen > endptr) 508 return (B_FALSE); 509 nexthdrp = &desthdr->ip6d_nxt; 510 break; 511 case IPPROTO_ROUTING: 512 rthdr = (ip6_rthdr_t *)whereptr; 513 ehdrlen = 8 * (rthdr->ip6r_len + 1); 514 if ((uchar_t *)rthdr + ehdrlen > endptr) 515 return (B_FALSE); 516 nexthdrp = &rthdr->ip6r_nxt; 517 break; 518 case IPPROTO_FRAGMENT: 519 fraghdr = (ip6_frag_t *)whereptr; 520 ehdrlen = sizeof (ip6_frag_t); 521 if ((uchar_t *)&fraghdr[1] > endptr) 522 return (B_FALSE); 523 nexthdrp = &fraghdr->ip6f_nxt; 524 if (ip_fragmented != NULL) 525 *ip_fragmented = B_TRUE; 526 if (ip_frag_ident != NULL) 527 *ip_frag_ident = fraghdr->ip6f_ident; 528 break; 529 case IPPROTO_NONE: 530 /* No next header means we're finished */ 531 default: 532 *hdr_length = length; 533 *next_hdr = *nexthdrp; 534 return (B_TRUE); 535 } 536 length += ehdrlen; 537 whereptr += ehdrlen; 538 *hdr_length = length; 539 *next_hdr = *nexthdrp; 540 } 541 switch (*nexthdrp) { 542 case IPPROTO_HOPOPTS: 543 case IPPROTO_DSTOPTS: 544 case IPPROTO_ROUTING: 545 case IPPROTO_FRAGMENT: 546 /* 547 * If any know extension headers are still to be processed, 548 * the packet's malformed (or at least all the IP header(s) are 549 * not in the same mblk - and that should never happen. 550 */ 551 return (B_FALSE); 552 553 default: 554 /* 555 * If we get here, we know that all of the IP headers were in 556 * the same mblk, even if the ULP header is in the next mblk. 557 */ 558 *hdr_length = length; 559 *next_hdr = *nexthdrp; 560 return (B_TRUE); 561 } 562 } 563 564 typedef struct mac_dladm_intr { 565 int ino; 566 int cpu_id; 567 char driver_path[MAXPATHLEN]; 568 char nexus_path[MAXPATHLEN]; 569 } mac_dladm_intr_t; 570 571 /* Bind the interrupt to cpu_num */ 572 static int 573 mac_set_intr(ldi_handle_t lh, processorid_t cpu_num, int ino) 574 { 575 pcitool_intr_set_t iset; 576 int err; 577 578 iset.ino = ino; 579 iset.cpu_id = cpu_num; 580 iset.user_version = PCITOOL_VERSION; 581 err = ldi_ioctl(lh, PCITOOL_DEVICE_SET_INTR, (intptr_t)&iset, FKIOCTL, 582 kcred, NULL); 583 584 return (err); 585 } 586 587 /* 588 * Search interrupt information. iget is filled in with the info to search 589 */ 590 static boolean_t 591 mac_search_intrinfo(pcitool_intr_get_t *iget_p, mac_dladm_intr_t *dln) 592 { 593 int i; 594 char driver_path[2 * MAXPATHLEN]; 595 596 for (i = 0; i < iget_p->num_devs; i++) { 597 (void) strlcpy(driver_path, iget_p->dev[i].path, MAXPATHLEN); 598 (void) snprintf(&driver_path[strlen(driver_path)], MAXPATHLEN, 599 ":%s%d", iget_p->dev[i].driver_name, 600 iget_p->dev[i].dev_inst); 601 /* Match the device path for the device path */ 602 if (strcmp(driver_path, dln->driver_path) == 0) { 603 dln->ino = iget_p->ino; 604 dln->cpu_id = iget_p->cpu_id; 605 return (B_TRUE); 606 } 607 } 608 return (B_FALSE); 609 } 610 611 /* 612 * Get information about ino, i.e. if this is the interrupt for our 613 * device and where it is bound etc. 614 */ 615 static boolean_t 616 mac_get_single_intr(ldi_handle_t lh, int ino, mac_dladm_intr_t *dln) 617 { 618 pcitool_intr_get_t *iget_p; 619 int ipsz; 620 int nipsz; 621 int err; 622 uint8_t inum; 623 624 /* 625 * Check if SLEEP is OK, i.e if could come here in response to 626 * changing the fanout due to some callback from the driver, say 627 * link speed changes. 628 */ 629 ipsz = PCITOOL_IGET_SIZE(0); 630 iget_p = kmem_zalloc(ipsz, KM_SLEEP); 631 632 iget_p->num_devs_ret = 0; 633 iget_p->user_version = PCITOOL_VERSION; 634 iget_p->ino = ino; 635 636 err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p, 637 FKIOCTL, kcred, NULL); 638 if (err != 0) { 639 kmem_free(iget_p, ipsz); 640 return (B_FALSE); 641 } 642 if (iget_p->num_devs == 0) { 643 kmem_free(iget_p, ipsz); 644 return (B_FALSE); 645 } 646 inum = iget_p->num_devs; 647 if (iget_p->num_devs_ret < iget_p->num_devs) { 648 /* Reallocate */ 649 nipsz = PCITOOL_IGET_SIZE(iget_p->num_devs); 650 651 kmem_free(iget_p, ipsz); 652 ipsz = nipsz; 653 iget_p = kmem_zalloc(ipsz, KM_SLEEP); 654 655 iget_p->num_devs_ret = inum; 656 iget_p->ino = ino; 657 iget_p->user_version = PCITOOL_VERSION; 658 err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p, 659 FKIOCTL, kcred, NULL); 660 if (err != 0) { 661 kmem_free(iget_p, ipsz); 662 return (B_FALSE); 663 } 664 /* defensive */ 665 if (iget_p->num_devs != iget_p->num_devs_ret) { 666 kmem_free(iget_p, ipsz); 667 return (B_FALSE); 668 } 669 } 670 671 if (mac_search_intrinfo(iget_p, dln)) { 672 kmem_free(iget_p, ipsz); 673 return (B_TRUE); 674 } 675 kmem_free(iget_p, ipsz); 676 return (B_FALSE); 677 } 678 679 /* 680 * Get the interrupts and check each one to see if it is for our device. 681 */ 682 static int 683 mac_validate_intr(ldi_handle_t lh, mac_dladm_intr_t *dln, processorid_t cpuid) 684 { 685 pcitool_intr_info_t intr_info; 686 int err; 687 int ino; 688 689 err = ldi_ioctl(lh, PCITOOL_SYSTEM_INTR_INFO, (intptr_t)&intr_info, 690 FKIOCTL, kcred, NULL); 691 if (err != 0) 692 return (-1); 693 694 for (ino = 0; ino < intr_info.num_intr; ino++) { 695 if (mac_get_single_intr(lh, ino, dln)) { 696 if (dln->cpu_id == cpuid) 697 return (0); 698 return (1); 699 } 700 } 701 return (-1); 702 } 703 704 /* 705 * Obtain the nexus parent node info. for mdip. 706 */ 707 static dev_info_t * 708 mac_get_nexus_node(dev_info_t *mdip, mac_dladm_intr_t *dln) 709 { 710 struct dev_info *tdip = (struct dev_info *)mdip; 711 struct ddi_minor_data *minordata; 712 int circ; 713 dev_info_t *pdip; 714 char pathname[MAXPATHLEN]; 715 716 while (tdip != NULL) { 717 /* 718 * The netboot code could call this function while walking the 719 * device tree so we need to use ndi_devi_tryenter() here to 720 * avoid deadlock. 721 */ 722 if (ndi_devi_tryenter((dev_info_t *)tdip, &circ) == 0) 723 break; 724 725 for (minordata = tdip->devi_minor; minordata != NULL; 726 minordata = minordata->next) { 727 if (strncmp(minordata->ddm_node_type, DDI_NT_INTRCTL, 728 strlen(DDI_NT_INTRCTL)) == 0) { 729 pdip = minordata->dip; 730 (void) ddi_pathname(pdip, pathname); 731 (void) snprintf(dln->nexus_path, MAXPATHLEN, 732 "/devices%s:intr", pathname); 733 (void) ddi_pathname_minor(minordata, pathname); 734 ndi_devi_exit((dev_info_t *)tdip, circ); 735 return (pdip); 736 } 737 } 738 ndi_devi_exit((dev_info_t *)tdip, circ); 739 tdip = tdip->devi_parent; 740 } 741 return (NULL); 742 } 743 744 /* 745 * For a primary MAC client, if the user has set a list or CPUs or 746 * we have obtained it implicitly, we try to retarget the interrupt 747 * for that device on one of the CPUs in the list. 748 * We assign the interrupt to the same CPU as the poll thread. 749 */ 750 static boolean_t 751 mac_check_interrupt_binding(dev_info_t *mdip, int32_t cpuid) 752 { 753 ldi_handle_t lh = NULL; 754 ldi_ident_t li = NULL; 755 int err; 756 int ret; 757 mac_dladm_intr_t dln; 758 dev_info_t *dip; 759 struct ddi_minor_data *minordata; 760 761 dln.nexus_path[0] = '\0'; 762 dln.driver_path[0] = '\0'; 763 764 minordata = ((struct dev_info *)mdip)->devi_minor; 765 while (minordata != NULL) { 766 if (minordata->type == DDM_MINOR) 767 break; 768 minordata = minordata->next; 769 } 770 if (minordata == NULL) 771 return (B_FALSE); 772 773 (void) ddi_pathname_minor(minordata, dln.driver_path); 774 775 dip = mac_get_nexus_node(mdip, &dln); 776 /* defensive */ 777 if (dip == NULL) 778 return (B_FALSE); 779 780 err = ldi_ident_from_major(ddi_driver_major(dip), &li); 781 if (err != 0) 782 return (B_FALSE); 783 784 err = ldi_open_by_name(dln.nexus_path, FREAD|FWRITE, kcred, &lh, li); 785 if (err != 0) 786 return (B_FALSE); 787 788 ret = mac_validate_intr(lh, &dln, cpuid); 789 if (ret < 0) { 790 (void) ldi_close(lh, FREAD|FWRITE, kcred); 791 return (B_FALSE); 792 } 793 /* cmn_note? */ 794 if (ret != 0) 795 if ((err = (mac_set_intr(lh, cpuid, dln.ino))) != 0) { 796 (void) ldi_close(lh, FREAD|FWRITE, kcred); 797 return (B_FALSE); 798 } 799 (void) ldi_close(lh, FREAD|FWRITE, kcred); 800 return (B_TRUE); 801 } 802 803 void 804 mac_client_set_intr_cpu(void *arg, mac_client_handle_t mch, int32_t cpuid) 805 { 806 dev_info_t *mdip = (dev_info_t *)arg; 807 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 808 mac_resource_props_t *mrp; 809 mac_perim_handle_t mph; 810 811 if (cpuid == -1 || !mac_check_interrupt_binding(mdip, cpuid)) 812 return; 813 814 mac_perim_enter_by_mh((mac_handle_t)mcip->mci_mip, &mph); 815 mrp = MCIP_RESOURCE_PROPS(mcip); 816 mrp->mrp_intr_cpu = cpuid; 817 mac_perim_exit(mph); 818 } 819 820 int32_t 821 mac_client_intr_cpu(mac_client_handle_t mch) 822 { 823 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 824 mac_cpus_t *srs_cpu; 825 mac_soft_ring_set_t *rx_srs; 826 flow_entry_t *flent = mcip->mci_flent; 827 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 828 829 /* 830 * Check if we need to retarget the interrupt. We do this only 831 * for the primary MAC client. We do this if we have the only 832 * exclusive ring in the group. 833 */ 834 if (mac_is_primary_client(mcip) && flent->fe_rx_srs_cnt == 2) { 835 rx_srs = flent->fe_rx_srs[1]; 836 srs_cpu = &rx_srs->srs_cpu; 837 if (mrp->mrp_intr_cpu == srs_cpu->mc_pollid) 838 return (-1); 839 return (srs_cpu->mc_pollid); 840 } 841 return (-1); 842 } 843 844 void * 845 mac_get_devinfo(mac_handle_t mh) 846 { 847 mac_impl_t *mip = (mac_impl_t *)mh; 848 849 return ((void *)mip->mi_dip); 850 } 851 852 #define PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1]) 853 #define PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3]) 854 #define PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5]) 855 856 uint64_t 857 mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) 858 { 859 struct ether_header *ehp; 860 uint64_t hash = 0; 861 uint16_t sap; 862 uint_t skip_len; 863 uint8_t proto; 864 boolean_t ip_fragmented; 865 866 /* 867 * We may want to have one of these per MAC type plugin in the 868 * future. For now supports only ethernet. 869 */ 870 if (media != DL_ETHER) 871 return (0L); 872 873 /* for now we support only outbound packets */ 874 ASSERT(is_outbound); 875 ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); 876 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 877 878 /* compute L2 hash */ 879 880 ehp = (struct ether_header *)mp->b_rptr; 881 882 if ((policy & MAC_PKT_HASH_L2) != 0) { 883 uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; 884 uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; 885 hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst); 886 policy &= ~MAC_PKT_HASH_L2; 887 } 888 889 if (policy == 0) 890 goto done; 891 892 /* skip ethernet header */ 893 894 sap = ntohs(ehp->ether_type); 895 if (sap == ETHERTYPE_VLAN) { 896 struct ether_vlan_header *evhp; 897 mblk_t *newmp = NULL; 898 899 skip_len = sizeof (struct ether_vlan_header); 900 if (MBLKL(mp) < skip_len) { 901 /* the vlan tag is the payload, pull up first */ 902 newmp = msgpullup(mp, -1); 903 if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) { 904 goto done; 905 } 906 evhp = (struct ether_vlan_header *)newmp->b_rptr; 907 } else { 908 evhp = (struct ether_vlan_header *)mp->b_rptr; 909 } 910 911 sap = ntohs(evhp->ether_type); 912 freemsg(newmp); 913 } else { 914 skip_len = sizeof (struct ether_header); 915 } 916 917 /* if ethernet header is in its own mblk, skip it */ 918 if (MBLKL(mp) <= skip_len) { 919 skip_len -= MBLKL(mp); 920 mp = mp->b_cont; 921 if (mp == NULL) 922 goto done; 923 } 924 925 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 926 927 /* compute IP src/dst addresses hash and skip IPv{4,6} header */ 928 929 switch (sap) { 930 case ETHERTYPE_IP: { 931 ipha_t *iphp; 932 933 /* 934 * If the header is not aligned or the header doesn't fit 935 * in the mblk, bail now. Note that this may cause packets 936 * reordering. 937 */ 938 iphp = (ipha_t *)(mp->b_rptr + skip_len); 939 if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) || 940 !OK_32PTR((char *)iphp)) 941 goto done; 942 943 proto = iphp->ipha_protocol; 944 skip_len += IPH_HDR_LENGTH(iphp); 945 946 /* Check if the packet is fragmented. */ 947 ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) & 948 IPH_OFFSET; 949 950 /* 951 * For fragmented packets, use addresses in addition to 952 * the frag_id to generate the hash inorder to get 953 * better distribution. 954 */ 955 if (ip_fragmented || (policy & MAC_PKT_HASH_L3) != 0) { 956 uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src); 957 uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst); 958 959 hash ^= (PKT_HASH_4BYTES(ip_src) ^ 960 PKT_HASH_4BYTES(ip_dst)); 961 policy &= ~MAC_PKT_HASH_L3; 962 } 963 964 if (ip_fragmented) { 965 uint8_t *identp = (uint8_t *)&iphp->ipha_ident; 966 hash ^= PKT_HASH_2BYTES(identp); 967 goto done; 968 } 969 break; 970 } 971 case ETHERTYPE_IPV6: { 972 ip6_t *ip6hp; 973 uint16_t hdr_length; 974 uint32_t ip_frag_ident; 975 976 /* 977 * If the header is not aligned or the header doesn't fit 978 * in the mblk, bail now. Note that this may cause packets 979 * reordering. 980 */ 981 982 ip6hp = (ip6_t *)(mp->b_rptr + skip_len); 983 if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) || 984 !OK_32PTR((char *)ip6hp)) 985 goto done; 986 987 if (!mac_ip_hdr_length_v6(mp, ip6hp, &hdr_length, &proto, 988 &ip_fragmented, &ip_frag_ident)) 989 goto done; 990 skip_len += hdr_length; 991 992 /* 993 * For fragmented packets, use addresses in addition to 994 * the frag_id to generate the hash inorder to get 995 * better distribution. 996 */ 997 if (ip_fragmented || (policy & MAC_PKT_HASH_L3) != 0) { 998 uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]); 999 uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]); 1000 1001 hash ^= (PKT_HASH_4BYTES(ip_src) ^ 1002 PKT_HASH_4BYTES(ip_dst)); 1003 policy &= ~MAC_PKT_HASH_L3; 1004 } 1005 1006 if (ip_fragmented) { 1007 uint8_t *identp = (uint8_t *)&ip_frag_ident; 1008 hash ^= PKT_HASH_4BYTES(identp); 1009 goto done; 1010 } 1011 break; 1012 } 1013 default: 1014 goto done; 1015 } 1016 1017 if (policy == 0) 1018 goto done; 1019 1020 /* if ip header is in its own mblk, skip it */ 1021 if (MBLKL(mp) <= skip_len) { 1022 skip_len -= MBLKL(mp); 1023 mp = mp->b_cont; 1024 if (mp == NULL) 1025 goto done; 1026 } 1027 1028 /* parse ULP header */ 1029 again: 1030 switch (proto) { 1031 case IPPROTO_TCP: 1032 case IPPROTO_UDP: 1033 case IPPROTO_ESP: 1034 case IPPROTO_SCTP: 1035 /* 1036 * These Internet Protocols are intentionally designed 1037 * for hashing from the git-go. Port numbers are in the first 1038 * word for transports, SPI is first for ESP. 1039 */ 1040 if (mp->b_rptr + skip_len + 4 > mp->b_wptr) 1041 goto done; 1042 hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len)); 1043 break; 1044 1045 case IPPROTO_AH: { 1046 ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); 1047 uint_t ah_length = AH_TOTAL_LEN(ah); 1048 1049 if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr) 1050 goto done; 1051 1052 proto = ah->ah_nexthdr; 1053 skip_len += ah_length; 1054 1055 /* if AH header is in its own mblk, skip it */ 1056 if (MBLKL(mp) <= skip_len) { 1057 skip_len -= MBLKL(mp); 1058 mp = mp->b_cont; 1059 if (mp == NULL) 1060 goto done; 1061 } 1062 1063 goto again; 1064 } 1065 } 1066 1067 done: 1068 return (hash); 1069 } 1070