1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * MAC Services Module - misc utilities 28 */ 29 30 #include <sys/types.h> 31 #include <sys/mac.h> 32 #include <sys/mac_impl.h> 33 #include <sys/mac_client_priv.h> 34 #include <sys/mac_client_impl.h> 35 #include <sys/mac_soft_ring.h> 36 #include <sys/strsubr.h> 37 #include <sys/strsun.h> 38 #include <sys/vlan.h> 39 #include <sys/pattr.h> 40 #include <sys/pci_tools.h> 41 #include <inet/ip.h> 42 #include <inet/ip_impl.h> 43 #include <inet/ip6.h> 44 #include <sys/vtrace.h> 45 #include <sys/dlpi.h> 46 #include <sys/sunndi.h> 47 #include <inet/ipsec_impl.h> 48 #include <inet/sadb.h> 49 #include <inet/ipsecesp.h> 50 #include <inet/ipsecah.h> 51 52 /* 53 * Copy an mblk, preserving its hardware checksum flags. 54 */ 55 static mblk_t * 56 mac_copymsg_cksum(mblk_t *mp) 57 { 58 mblk_t *mp1; 59 uint32_t start, stuff, end, value, flags; 60 61 mp1 = copymsg(mp); 62 if (mp1 == NULL) 63 return (NULL); 64 65 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 66 (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, 67 flags, KM_NOSLEEP); 68 69 return (mp1); 70 } 71 72 /* 73 * Copy an mblk chain, presenting the hardware checksum flags of the 74 * individual mblks. 75 */ 76 mblk_t * 77 mac_copymsgchain_cksum(mblk_t *mp) 78 { 79 mblk_t *nmp = NULL; 80 mblk_t **nmpp = &nmp; 81 82 for (; mp != NULL; mp = mp->b_next) { 83 if ((*nmpp = mac_copymsg_cksum(mp)) == NULL) { 84 freemsgchain(nmp); 85 return (NULL); 86 } 87 88 nmpp = &((*nmpp)->b_next); 89 } 90 91 return (nmp); 92 } 93 94 /* 95 * Process the specified mblk chain for proper handling of hardware 96 * checksum offload. This routine is invoked for loopback traffic 97 * between MAC clients. 98 * The function handles a NULL mblk chain passed as argument. 99 */ 100 mblk_t * 101 mac_fix_cksum(mblk_t *mp_chain) 102 { 103 mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; 104 uint32_t flags, start, stuff, end, value; 105 106 for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { 107 uint16_t len; 108 uint32_t offset; 109 struct ether_header *ehp; 110 uint16_t sap; 111 112 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, 113 &flags); 114 if (flags == 0) 115 continue; 116 117 /* 118 * Since the processing of checksum offload for loopback 119 * traffic requires modification of the packet contents, 120 * ensure sure that we are always modifying our own copy. 121 */ 122 if (DB_REF(mp) > 1) { 123 mp1 = copymsg(mp); 124 if (mp1 == NULL) 125 continue; 126 mp1->b_next = mp->b_next; 127 mp->b_next = NULL; 128 freemsg(mp); 129 if (prev != NULL) 130 prev->b_next = mp1; 131 else 132 new_chain = mp1; 133 mp = mp1; 134 } 135 136 /* 137 * Ethernet, and optionally VLAN header. 138 */ 139 /* LINTED: improper alignment cast */ 140 ehp = (struct ether_header *)mp->b_rptr; 141 if (ntohs(ehp->ether_type) == VLAN_TPID) { 142 struct ether_vlan_header *evhp; 143 144 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 145 /* LINTED: improper alignment cast */ 146 evhp = (struct ether_vlan_header *)mp->b_rptr; 147 sap = ntohs(evhp->ether_type); 148 offset = sizeof (struct ether_vlan_header); 149 } else { 150 sap = ntohs(ehp->ether_type); 151 offset = sizeof (struct ether_header); 152 } 153 154 if (MBLKL(mp) <= offset) { 155 offset -= MBLKL(mp); 156 if (mp->b_cont == NULL) { 157 /* corrupted packet, skip it */ 158 if (prev != NULL) 159 prev->b_next = mp->b_next; 160 else 161 new_chain = mp->b_next; 162 mp1 = mp->b_next; 163 mp->b_next = NULL; 164 freemsg(mp); 165 mp = mp1; 166 continue; 167 } 168 mp = mp->b_cont; 169 } 170 171 if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { 172 ipha_t *ipha = NULL; 173 174 /* 175 * In order to compute the full and header 176 * checksums, we need to find and parse 177 * the IP and/or ULP headers. 178 */ 179 180 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 181 182 /* 183 * IP header. 184 */ 185 if (sap != ETHERTYPE_IP) 186 continue; 187 188 ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); 189 /* LINTED: improper alignment cast */ 190 ipha = (ipha_t *)(mp->b_rptr + offset); 191 192 if (flags & HCK_FULLCKSUM) { 193 ipaddr_t src, dst; 194 uint32_t cksum; 195 uint16_t *up; 196 uint8_t proto; 197 198 /* 199 * Pointer to checksum field in ULP header. 200 */ 201 proto = ipha->ipha_protocol; 202 ASSERT(ipha->ipha_version_and_hdr_length == 203 IP_SIMPLE_HDR_VERSION); 204 if (proto == IPPROTO_TCP) { 205 /* LINTED: improper alignment cast */ 206 up = IPH_TCPH_CHECKSUMP(ipha, 207 IP_SIMPLE_HDR_LENGTH); 208 } else { 209 ASSERT(proto == IPPROTO_UDP); 210 /* LINTED: improper alignment cast */ 211 up = IPH_UDPH_CHECKSUMP(ipha, 212 IP_SIMPLE_HDR_LENGTH); 213 } 214 215 /* 216 * Pseudo-header checksum. 217 */ 218 src = ipha->ipha_src; 219 dst = ipha->ipha_dst; 220 len = ntohs(ipha->ipha_length) - 221 IP_SIMPLE_HDR_LENGTH; 222 223 cksum = (dst >> 16) + (dst & 0xFFFF) + 224 (src >> 16) + (src & 0xFFFF); 225 cksum += htons(len); 226 227 /* 228 * The checksum value stored in the packet needs 229 * to be correct. Compute it here. 230 */ 231 *up = 0; 232 cksum += (((proto) == IPPROTO_UDP) ? 233 IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); 234 cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + 235 offset, cksum); 236 *(up) = (uint16_t)(cksum ? cksum : ~cksum); 237 238 flags |= HCK_FULLCKSUM_OK; 239 value = 0xffff; 240 } 241 242 if (flags & HCK_IPV4_HDRCKSUM) { 243 ASSERT(ipha != NULL); 244 ipha->ipha_hdr_checksum = 245 (uint16_t)ip_csum_hdr(ipha); 246 } 247 } 248 249 if (flags & HCK_PARTIALCKSUM) { 250 uint16_t *up, partial, cksum; 251 uchar_t *ipp; /* ptr to beginning of IP header */ 252 253 if (mp->b_cont != NULL) { 254 mblk_t *mp1; 255 256 mp1 = msgpullup(mp, offset + end); 257 if (mp1 == NULL) 258 continue; 259 mp1->b_next = mp->b_next; 260 mp->b_next = NULL; 261 freemsg(mp); 262 if (prev != NULL) 263 prev->b_next = mp1; 264 else 265 new_chain = mp1; 266 mp = mp1; 267 } 268 269 ipp = mp->b_rptr + offset; 270 /* LINTED: cast may result in improper alignment */ 271 up = (uint16_t *)((uchar_t *)ipp + stuff); 272 partial = *up; 273 *up = 0; 274 275 cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, 276 end - start, partial); 277 cksum = ~cksum; 278 *up = cksum ? cksum : ~cksum; 279 280 /* 281 * Since we already computed the whole checksum, 282 * indicate to the stack that it has already 283 * been verified by the hardware. 284 */ 285 flags &= ~HCK_PARTIALCKSUM; 286 flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); 287 value = 0xffff; 288 } 289 290 (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, 291 value, flags, KM_NOSLEEP); 292 } 293 294 return (new_chain); 295 } 296 297 /* 298 * Add VLAN tag to the specified mblk. 299 */ 300 mblk_t * 301 mac_add_vlan_tag(mblk_t *mp, uint_t pri, uint16_t vid) 302 { 303 mblk_t *hmp; 304 struct ether_vlan_header *evhp; 305 struct ether_header *ehp; 306 uint32_t start, stuff, end, value, flags; 307 308 ASSERT(pri != 0 || vid != 0); 309 310 /* 311 * Allocate an mblk for the new tagged ethernet header, 312 * and copy the MAC addresses and ethertype from the 313 * original header. 314 */ 315 316 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 317 if (hmp == NULL) { 318 freemsg(mp); 319 return (NULL); 320 } 321 322 evhp = (struct ether_vlan_header *)hmp->b_rptr; 323 ehp = (struct ether_header *)mp->b_rptr; 324 325 bcopy(ehp, evhp, (ETHERADDRL * 2)); 326 evhp->ether_type = ehp->ether_type; 327 evhp->ether_tpid = htons(ETHERTYPE_VLAN); 328 329 hmp->b_wptr += sizeof (struct ether_vlan_header); 330 mp->b_rptr += sizeof (struct ether_header); 331 332 /* 333 * Free the original message if it's now empty. Link the 334 * rest of messages to the header message. 335 */ 336 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 337 (void) hcksum_assoc(hmp, NULL, NULL, start, stuff, end, value, flags, 338 KM_NOSLEEP); 339 if (MBLKL(mp) == 0) { 340 hmp->b_cont = mp->b_cont; 341 freeb(mp); 342 } else { 343 hmp->b_cont = mp; 344 } 345 ASSERT(MBLKL(hmp) >= sizeof (struct ether_vlan_header)); 346 347 /* 348 * Initialize the new TCI (Tag Control Information). 349 */ 350 evhp->ether_tci = htons(VLAN_TCI(pri, 0, vid)); 351 352 return (hmp); 353 } 354 355 /* 356 * Adds a VLAN tag with the specified VID and priority to each mblk of 357 * the specified chain. 358 */ 359 mblk_t * 360 mac_add_vlan_tag_chain(mblk_t *mp_chain, uint_t pri, uint16_t vid) 361 { 362 mblk_t *next_mp, **prev, *mp; 363 364 mp = mp_chain; 365 prev = &mp_chain; 366 367 while (mp != NULL) { 368 next_mp = mp->b_next; 369 mp->b_next = NULL; 370 if ((mp = mac_add_vlan_tag(mp, pri, vid)) == NULL) { 371 freemsgchain(next_mp); 372 break; 373 } 374 *prev = mp; 375 prev = &mp->b_next; 376 mp = mp->b_next = next_mp; 377 } 378 379 return (mp_chain); 380 } 381 382 /* 383 * Strip VLAN tag 384 */ 385 mblk_t * 386 mac_strip_vlan_tag(mblk_t *mp) 387 { 388 mblk_t *newmp; 389 struct ether_vlan_header *evhp; 390 391 evhp = (struct ether_vlan_header *)mp->b_rptr; 392 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 393 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 394 395 if (DB_REF(mp) > 1) { 396 newmp = copymsg(mp); 397 if (newmp == NULL) 398 return (NULL); 399 freemsg(mp); 400 mp = newmp; 401 } 402 403 evhp = (struct ether_vlan_header *)mp->b_rptr; 404 405 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 406 mp->b_rptr += VLAN_TAGSZ; 407 } 408 return (mp); 409 } 410 411 /* 412 * Strip VLAN tag from each mblk of the chain. 413 */ 414 mblk_t * 415 mac_strip_vlan_tag_chain(mblk_t *mp_chain) 416 { 417 mblk_t *mp, *next_mp, **prev; 418 419 mp = mp_chain; 420 prev = &mp_chain; 421 422 while (mp != NULL) { 423 next_mp = mp->b_next; 424 mp->b_next = NULL; 425 if ((mp = mac_strip_vlan_tag(mp)) == NULL) { 426 freemsgchain(next_mp); 427 break; 428 } 429 *prev = mp; 430 prev = &mp->b_next; 431 mp = mp->b_next = next_mp; 432 } 433 434 return (mp_chain); 435 } 436 437 /* 438 * Default callback function. Used when the datapath is not yet initialized. 439 */ 440 /* ARGSUSED */ 441 void 442 mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp, 443 boolean_t loopback) 444 { 445 mblk_t *mp1 = mp; 446 447 while (mp1 != NULL) { 448 mp1->b_prev = NULL; 449 mp1->b_queue = NULL; 450 mp1 = mp1->b_next; 451 } 452 freemsgchain(mp); 453 } 454 455 /* 456 * Determines the IPv6 header length accounting for all the optional IPv6 457 * headers (hop-by-hop, destination, routing and fragment). The header length 458 * and next header value (a transport header) is captured. 459 * 460 * Returns B_FALSE if all the IP headers are not in the same mblk otherwise 461 * returns B_TRUE. 462 */ 463 boolean_t 464 mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length, 465 uint8_t *next_hdr) 466 { 467 uint16_t length; 468 uint_t ehdrlen; 469 uint8_t *whereptr; 470 uint8_t *endptr; 471 uint8_t *nexthdrp; 472 ip6_dest_t *desthdr; 473 ip6_rthdr_t *rthdr; 474 ip6_frag_t *fraghdr; 475 476 endptr = mp->b_wptr; 477 if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr) 478 return (B_FALSE); 479 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 480 length = IPV6_HDR_LEN; 481 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 482 483 nexthdrp = &ip6h->ip6_nxt; 484 while (whereptr < endptr) { 485 /* Is there enough left for len + nexthdr? */ 486 if (whereptr + MIN_EHDR_LEN > endptr) 487 break; 488 489 switch (*nexthdrp) { 490 case IPPROTO_HOPOPTS: 491 case IPPROTO_DSTOPTS: 492 /* Assumes the headers are identical for hbh and dst */ 493 desthdr = (ip6_dest_t *)whereptr; 494 ehdrlen = 8 * (desthdr->ip6d_len + 1); 495 if ((uchar_t *)desthdr + ehdrlen > endptr) 496 return (B_FALSE); 497 nexthdrp = &desthdr->ip6d_nxt; 498 break; 499 case IPPROTO_ROUTING: 500 rthdr = (ip6_rthdr_t *)whereptr; 501 ehdrlen = 8 * (rthdr->ip6r_len + 1); 502 if ((uchar_t *)rthdr + ehdrlen > endptr) 503 return (B_FALSE); 504 nexthdrp = &rthdr->ip6r_nxt; 505 break; 506 case IPPROTO_FRAGMENT: 507 fraghdr = (ip6_frag_t *)whereptr; 508 ehdrlen = sizeof (ip6_frag_t); 509 if ((uchar_t *)&fraghdr[1] > endptr) 510 return (B_FALSE); 511 nexthdrp = &fraghdr->ip6f_nxt; 512 break; 513 case IPPROTO_NONE: 514 /* No next header means we're finished */ 515 default: 516 *hdr_length = length; 517 *next_hdr = *nexthdrp; 518 return (B_TRUE); 519 } 520 length += ehdrlen; 521 whereptr += ehdrlen; 522 *hdr_length = length; 523 *next_hdr = *nexthdrp; 524 } 525 switch (*nexthdrp) { 526 case IPPROTO_HOPOPTS: 527 case IPPROTO_DSTOPTS: 528 case IPPROTO_ROUTING: 529 case IPPROTO_FRAGMENT: 530 /* 531 * If any know extension headers are still to be processed, 532 * the packet's malformed (or at least all the IP header(s) are 533 * not in the same mblk - and that should never happen. 534 */ 535 return (B_FALSE); 536 537 default: 538 /* 539 * If we get here, we know that all of the IP headers were in 540 * the same mblk, even if the ULP header is in the next mblk. 541 */ 542 *hdr_length = length; 543 *next_hdr = *nexthdrp; 544 return (B_TRUE); 545 } 546 } 547 548 typedef struct mac_dladm_intr { 549 int ino; 550 int cpu_id; 551 char driver_path[MAXPATHLEN]; 552 char nexus_path[MAXPATHLEN]; 553 } mac_dladm_intr_t; 554 555 /* Bind the interrupt to cpu_num */ 556 static int 557 mac_set_intr(ldi_handle_t lh, processorid_t cpu_num, int ino) 558 { 559 pcitool_intr_set_t iset; 560 int err; 561 562 iset.ino = ino; 563 iset.cpu_id = cpu_num; 564 iset.user_version = PCITOOL_VERSION; 565 err = ldi_ioctl(lh, PCITOOL_DEVICE_SET_INTR, (intptr_t)&iset, FKIOCTL, 566 kcred, NULL); 567 568 return (err); 569 } 570 571 /* 572 * Search interrupt information. iget is filled in with the info to search 573 */ 574 static boolean_t 575 mac_search_intrinfo(pcitool_intr_get_t *iget_p, mac_dladm_intr_t *dln) 576 { 577 int i; 578 char driver_path[2 * MAXPATHLEN]; 579 580 for (i = 0; i < iget_p->num_devs; i++) { 581 (void) strlcpy(driver_path, iget_p->dev[i].path, MAXPATHLEN); 582 (void) snprintf(&driver_path[strlen(driver_path)], MAXPATHLEN, 583 ":%s%d", iget_p->dev[i].driver_name, 584 iget_p->dev[i].dev_inst); 585 /* Match the device path for the device path */ 586 if (strcmp(driver_path, dln->driver_path) == 0) { 587 dln->ino = iget_p->ino; 588 dln->cpu_id = iget_p->cpu_id; 589 return (B_TRUE); 590 } 591 } 592 return (B_FALSE); 593 } 594 595 /* 596 * Get information about ino, i.e. if this is the interrupt for our 597 * device and where it is bound etc. 598 */ 599 static boolean_t 600 mac_get_single_intr(ldi_handle_t lh, int ino, mac_dladm_intr_t *dln) 601 { 602 pcitool_intr_get_t *iget_p; 603 int ipsz; 604 int nipsz; 605 int err; 606 uint8_t inum; 607 608 /* 609 * Check if SLEEP is OK, i.e if could come here in response to 610 * changing the fanout due to some callback from the driver, say 611 * link speed changes. 612 */ 613 ipsz = PCITOOL_IGET_SIZE(0); 614 iget_p = kmem_zalloc(ipsz, KM_SLEEP); 615 616 iget_p->num_devs_ret = 0; 617 iget_p->user_version = PCITOOL_VERSION; 618 iget_p->ino = ino; 619 620 err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p, 621 FKIOCTL, kcred, NULL); 622 if (err != 0) { 623 kmem_free(iget_p, ipsz); 624 return (B_FALSE); 625 } 626 if (iget_p->num_devs == 0) { 627 kmem_free(iget_p, ipsz); 628 return (B_FALSE); 629 } 630 inum = iget_p->num_devs; 631 if (iget_p->num_devs_ret < iget_p->num_devs) { 632 /* Reallocate */ 633 nipsz = PCITOOL_IGET_SIZE(iget_p->num_devs); 634 635 kmem_free(iget_p, ipsz); 636 ipsz = nipsz; 637 iget_p = kmem_zalloc(ipsz, KM_SLEEP); 638 639 iget_p->num_devs_ret = inum; 640 iget_p->ino = ino; 641 iget_p->user_version = PCITOOL_VERSION; 642 err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p, 643 FKIOCTL, kcred, NULL); 644 if (err != 0) { 645 kmem_free(iget_p, ipsz); 646 return (B_FALSE); 647 } 648 /* defensive */ 649 if (iget_p->num_devs != iget_p->num_devs_ret) { 650 kmem_free(iget_p, ipsz); 651 return (B_FALSE); 652 } 653 } 654 655 if (mac_search_intrinfo(iget_p, dln)) { 656 kmem_free(iget_p, ipsz); 657 return (B_TRUE); 658 } 659 kmem_free(iget_p, ipsz); 660 return (B_FALSE); 661 } 662 663 /* 664 * Get the interrupts and check each one to see if it is for our device. 665 */ 666 static int 667 mac_validate_intr(ldi_handle_t lh, mac_dladm_intr_t *dln, processorid_t cpuid) 668 { 669 pcitool_intr_info_t intr_info; 670 int err; 671 int ino; 672 673 err = ldi_ioctl(lh, PCITOOL_SYSTEM_INTR_INFO, (intptr_t)&intr_info, 674 FKIOCTL, kcred, NULL); 675 if (err != 0) 676 return (-1); 677 678 for (ino = 0; ino < intr_info.num_intr; ino++) { 679 if (mac_get_single_intr(lh, ino, dln)) { 680 if (dln->cpu_id == cpuid) 681 return (0); 682 return (1); 683 } 684 } 685 return (-1); 686 } 687 688 /* 689 * Obtain the nexus parent node info. for mdip. 690 */ 691 static dev_info_t * 692 mac_get_nexus_node(dev_info_t *mdip, mac_dladm_intr_t *dln) 693 { 694 struct dev_info *tdip = (struct dev_info *)mdip; 695 struct ddi_minor_data *minordata; 696 int circ; 697 dev_info_t *pdip; 698 char pathname[MAXPATHLEN]; 699 700 while (tdip != NULL) { 701 /* 702 * The netboot code could call this function while walking the 703 * device tree so we need to use ndi_devi_tryenter() here to 704 * avoid deadlock. 705 */ 706 if (ndi_devi_tryenter((dev_info_t *)tdip, &circ) == 0) 707 break; 708 709 for (minordata = tdip->devi_minor; minordata != NULL; 710 minordata = minordata->next) { 711 if (strncmp(minordata->ddm_node_type, DDI_NT_INTRCTL, 712 strlen(DDI_NT_INTRCTL)) == 0) { 713 pdip = minordata->dip; 714 (void) ddi_pathname(pdip, pathname); 715 (void) snprintf(dln->nexus_path, MAXPATHLEN, 716 "/devices%s:intr", pathname); 717 (void) ddi_pathname_minor(minordata, pathname); 718 ndi_devi_exit((dev_info_t *)tdip, circ); 719 return (pdip); 720 } 721 } 722 ndi_devi_exit((dev_info_t *)tdip, circ); 723 tdip = tdip->devi_parent; 724 } 725 return (NULL); 726 } 727 728 /* 729 * For a primary MAC client, if the user has set a list or CPUs or 730 * we have obtained it implicitly, we try to retarget the interrupt 731 * for that device on one of the CPUs in the list. 732 * We assign the interrupt to the same CPU as the poll thread. 733 */ 734 static boolean_t 735 mac_check_interrupt_binding(dev_info_t *mdip, int32_t cpuid) 736 { 737 ldi_handle_t lh = NULL; 738 ldi_ident_t li = NULL; 739 int err; 740 int ret; 741 mac_dladm_intr_t dln; 742 dev_info_t *dip; 743 struct ddi_minor_data *minordata; 744 745 dln.nexus_path[0] = '\0'; 746 dln.driver_path[0] = '\0'; 747 748 minordata = ((struct dev_info *)mdip)->devi_minor; 749 while (minordata != NULL) { 750 if (minordata->type == DDM_MINOR) 751 break; 752 minordata = minordata->next; 753 } 754 if (minordata == NULL) 755 return (B_FALSE); 756 757 (void) ddi_pathname_minor(minordata, dln.driver_path); 758 759 dip = mac_get_nexus_node(mdip, &dln); 760 /* defensive */ 761 if (dip == NULL) 762 return (B_FALSE); 763 764 err = ldi_ident_from_major(ddi_driver_major(dip), &li); 765 if (err != 0) 766 return (B_FALSE); 767 768 err = ldi_open_by_name(dln.nexus_path, FREAD|FWRITE, kcred, &lh, li); 769 if (err != 0) 770 return (B_FALSE); 771 772 ret = mac_validate_intr(lh, &dln, cpuid); 773 if (ret < 0) { 774 (void) ldi_close(lh, FREAD|FWRITE, kcred); 775 return (B_FALSE); 776 } 777 /* cmn_note? */ 778 if (ret != 0) 779 if ((err = (mac_set_intr(lh, cpuid, dln.ino))) != 0) { 780 (void) ldi_close(lh, FREAD|FWRITE, kcred); 781 return (B_FALSE); 782 } 783 (void) ldi_close(lh, FREAD|FWRITE, kcred); 784 return (B_TRUE); 785 } 786 787 void 788 mac_client_set_intr_cpu(void *arg, mac_client_handle_t mch, int32_t cpuid) 789 { 790 dev_info_t *mdip = (dev_info_t *)arg; 791 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 792 mac_resource_props_t *mrp; 793 mac_perim_handle_t mph; 794 795 if (cpuid == -1 || !mac_check_interrupt_binding(mdip, cpuid)) 796 return; 797 798 mac_perim_enter_by_mh((mac_handle_t)mcip->mci_mip, &mph); 799 mrp = MCIP_RESOURCE_PROPS(mcip); 800 mrp->mrp_intr_cpu = cpuid; 801 mac_perim_exit(mph); 802 } 803 804 int32_t 805 mac_client_intr_cpu(mac_client_handle_t mch) 806 { 807 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 808 mac_cpus_t *srs_cpu; 809 mac_soft_ring_set_t *rx_srs; 810 flow_entry_t *flent = mcip->mci_flent; 811 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 812 813 /* 814 * Check if we need to retarget the interrupt. We do this only 815 * for the primary MAC client. We do this if we have the only 816 * exclusive ring in the group. 817 */ 818 if (mac_is_primary_client(mcip) && flent->fe_rx_srs_cnt == 2) { 819 rx_srs = flent->fe_rx_srs[1]; 820 srs_cpu = &rx_srs->srs_cpu; 821 if (mrp->mrp_intr_cpu == srs_cpu->mc_pollid) 822 return (-1); 823 return (srs_cpu->mc_pollid); 824 } 825 return (-1); 826 } 827 828 void * 829 mac_get_devinfo(mac_handle_t mh) 830 { 831 mac_impl_t *mip = (mac_impl_t *)mh; 832 833 return ((void *)mip->mi_dip); 834 } 835 836 #define PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3]) 837 #define PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5]) 838 839 uint64_t 840 mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) 841 { 842 struct ether_header *ehp; 843 uint64_t hash = 0; 844 uint16_t sap; 845 uint_t skip_len; 846 uint8_t proto; 847 848 /* 849 * We may want to have one of these per MAC type plugin in the 850 * future. For now supports only ethernet. 851 */ 852 if (media != DL_ETHER) 853 return (0L); 854 855 /* for now we support only outbound packets */ 856 ASSERT(is_outbound); 857 ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); 858 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 859 860 /* compute L2 hash */ 861 862 ehp = (struct ether_header *)mp->b_rptr; 863 864 if ((policy & MAC_PKT_HASH_L2) != 0) { 865 uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; 866 uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; 867 hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst); 868 policy &= ~MAC_PKT_HASH_L2; 869 } 870 871 if (policy == 0) 872 goto done; 873 874 /* skip ethernet header */ 875 876 sap = ntohs(ehp->ether_type); 877 if (sap == ETHERTYPE_VLAN) { 878 struct ether_vlan_header *evhp; 879 mblk_t *newmp = NULL; 880 881 skip_len = sizeof (struct ether_vlan_header); 882 if (MBLKL(mp) < skip_len) { 883 /* the vlan tag is the payload, pull up first */ 884 newmp = msgpullup(mp, -1); 885 if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) { 886 goto done; 887 } 888 evhp = (struct ether_vlan_header *)newmp->b_rptr; 889 } else { 890 evhp = (struct ether_vlan_header *)mp->b_rptr; 891 } 892 893 sap = ntohs(evhp->ether_type); 894 freemsg(newmp); 895 } else { 896 skip_len = sizeof (struct ether_header); 897 } 898 899 /* if ethernet header is in its own mblk, skip it */ 900 if (MBLKL(mp) <= skip_len) { 901 skip_len -= MBLKL(mp); 902 mp = mp->b_cont; 903 if (mp == NULL) 904 goto done; 905 } 906 907 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 908 909 /* compute IP src/dst addresses hash and skip IPv{4,6} header */ 910 911 switch (sap) { 912 case ETHERTYPE_IP: { 913 ipha_t *iphp; 914 915 /* 916 * If the header is not aligned or the header doesn't fit 917 * in the mblk, bail now. Note that this may cause packets 918 * reordering. 919 */ 920 iphp = (ipha_t *)(mp->b_rptr + skip_len); 921 if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) || 922 !OK_32PTR((char *)iphp)) 923 goto done; 924 925 proto = iphp->ipha_protocol; 926 skip_len += IPH_HDR_LENGTH(iphp); 927 928 if ((policy & MAC_PKT_HASH_L3) != 0) { 929 uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src); 930 uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst); 931 932 hash ^= (PKT_HASH_4BYTES(ip_src) ^ 933 PKT_HASH_4BYTES(ip_dst)); 934 policy &= ~MAC_PKT_HASH_L3; 935 } 936 break; 937 } 938 case ETHERTYPE_IPV6: { 939 ip6_t *ip6hp; 940 uint16_t hdr_length; 941 942 /* 943 * If the header is not aligned or the header doesn't fit 944 * in the mblk, bail now. Note that this may cause packets 945 * reordering. 946 */ 947 948 ip6hp = (ip6_t *)(mp->b_rptr + skip_len); 949 if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) || 950 !OK_32PTR((char *)ip6hp)) 951 goto done; 952 953 if (!mac_ip_hdr_length_v6(mp, ip6hp, &hdr_length, &proto)) 954 goto done; 955 skip_len += hdr_length; 956 957 if ((policy & MAC_PKT_HASH_L3) != 0) { 958 uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]); 959 uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]); 960 961 hash ^= (PKT_HASH_4BYTES(ip_src) ^ 962 PKT_HASH_4BYTES(ip_dst)); 963 policy &= ~MAC_PKT_HASH_L3; 964 } 965 break; 966 } 967 default: 968 goto done; 969 } 970 971 if (policy == 0) 972 goto done; 973 974 /* if ip header is in its own mblk, skip it */ 975 if (MBLKL(mp) <= skip_len) { 976 skip_len -= MBLKL(mp); 977 mp = mp->b_cont; 978 if (mp == NULL) 979 goto done; 980 } 981 982 /* parse ULP header */ 983 again: 984 switch (proto) { 985 case IPPROTO_TCP: 986 case IPPROTO_UDP: 987 case IPPROTO_ESP: 988 case IPPROTO_SCTP: 989 /* 990 * These Internet Protocols are intentionally designed 991 * for hashing from the git-go. Port numbers are in the first 992 * word for transports, SPI is first for ESP. 993 */ 994 if (mp->b_rptr + skip_len + 4 > mp->b_wptr) 995 goto done; 996 hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len)); 997 break; 998 999 case IPPROTO_AH: { 1000 ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); 1001 uint_t ah_length = AH_TOTAL_LEN(ah); 1002 1003 if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr) 1004 goto done; 1005 1006 proto = ah->ah_nexthdr; 1007 skip_len += ah_length; 1008 1009 /* if AH header is in its own mblk, skip it */ 1010 if (MBLKL(mp) <= skip_len) { 1011 skip_len -= MBLKL(mp); 1012 mp = mp->b_cont; 1013 if (mp == NULL) 1014 goto done; 1015 } 1016 1017 goto again; 1018 } 1019 } 1020 1021 done: 1022 return (hash); 1023 } 1024