1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved 24 * 25 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 26 * Copyright 2019 Joyent, Inc. 27 * Copyright 2024 Oxide Computer Company 28 */ 29 /* Copyright (c) 1990 Mentat Inc. */ 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsubr.h> 37 #include <sys/strlog.h> 38 #include <sys/strsun.h> 39 #include <sys/zone.h> 40 #define _SUN_TPI_VERSION 2 41 #include <sys/tihdr.h> 42 #include <sys/xti_inet.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/cmn_err.h> 46 #include <sys/debug.h> 47 #include <sys/kobj.h> 48 #include <sys/modctl.h> 49 #include <sys/atomic.h> 50 #include <sys/policy.h> 51 #include <sys/priv.h> 52 53 #include <sys/systm.h> 54 #include <sys/param.h> 55 #include <sys/kmem.h> 56 #include <sys/sdt.h> 57 #include <sys/socket.h> 58 #include <sys/vtrace.h> 59 #include <sys/isa_defs.h> 60 #include <sys/mac.h> 61 #include <net/if.h> 62 #include <net/if_arp.h> 63 #include <net/route.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <net/if_dl.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/mib2.h> 71 #include <inet/nd.h> 72 #include <inet/arp.h> 73 #include <inet/snmpcom.h> 74 #include <inet/kstatcom.h> 75 76 #include <netinet/igmp_var.h> 77 #include <netinet/ip6.h> 78 #include <netinet/icmp6.h> 79 #include <netinet/sctp.h> 80 81 #include <inet/ip.h> 82 #include <inet/ip_impl.h> 83 #include <inet/ip6.h> 84 #include <inet/ip6_asp.h> 85 #include <inet/optcom.h> 86 #include <inet/tcp.h> 87 #include <inet/tcp_impl.h> 88 #include <inet/ip_multi.h> 89 #include <inet/ip_if.h> 90 #include <inet/ip_ire.h> 91 #include <inet/ip_ftable.h> 92 #include <inet/ip_rts.h> 93 #include <inet/ip_ndp.h> 94 #include <inet/ip_listutils.h> 95 #include <netinet/igmp.h> 96 #include <netinet/ip_mroute.h> 97 #include <inet/ipp_common.h> 98 99 #include <net/pfkeyv2.h> 100 #include <inet/sadb.h> 101 #include <inet/ipsec_impl.h> 102 #include <inet/ipdrop.h> 103 #include <inet/ip_netinfo.h> 104 #include <inet/ilb_ip.h> 105 #include <sys/squeue_impl.h> 106 #include <sys/squeue.h> 107 108 #include <sys/ethernet.h> 109 #include <net/if_types.h> 110 #include <sys/cpuvar.h> 111 112 #include <ipp/ipp.h> 113 #include <ipp/ipp_impl.h> 114 #include <ipp/ipgpc/ipgpc.h> 115 116 #include <sys/pattr.h> 117 #include <inet/ipclassifier.h> 118 #include <inet/sctp_ip.h> 119 #include <inet/sctp/sctp_impl.h> 120 #include <inet/udp_impl.h> 121 #include <sys/sunddi.h> 122 123 #include <sys/tsol/label.h> 124 #include <sys/tsol/tnet.h> 125 126 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */ 127 128 #ifdef DEBUG 129 extern boolean_t skip_sctp_cksum; 130 #endif 131 132 static void ip_input_local_v6(ire_t *, mblk_t *, ip6_t *, ip_recv_attr_t *); 133 134 static void ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *, 135 ip_recv_attr_t *); 136 137 #pragma inline(ip_input_common_v6, ip_input_local_v6, ip_forward_xmit_v6) 138 139 /* 140 * Direct read side procedure capable of dealing with chains. GLDv3 based 141 * drivers call this function directly with mblk chains while STREAMS 142 * read side procedure ip_rput() calls this for single packet with ip_ring 143 * set to NULL to process one packet at a time. 144 * 145 * The ill will always be valid if this function is called directly from 146 * the driver. 147 * 148 * If this chain is part of a VLAN stream, then the VLAN tag is 149 * stripped from the MAC header before being delivered to this 150 * function. 151 * 152 * If the IP header in packet is not 32-bit aligned, every message in the 153 * chain will be aligned before further operations. This is required on SPARC 154 * platform. 155 */ 156 void 157 ip_input_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 158 struct mac_header_info_s *mhip) 159 { 160 (void) ip_input_common_v6(ill, ip_ring, mp_chain, mhip, NULL, NULL, 161 NULL); 162 } 163 164 /* 165 * ip_accept_tcp_v6() - This function is called by the squeue when it retrieves 166 * a chain of packets in the poll mode. The packets have gone through the 167 * data link processing but not IP processing. For performance and latency 168 * reasons, the squeue wants to process the chain in line instead of feeding 169 * it back via ip_input path. 170 * 171 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6 172 * will pass back any TCP packets matching the target sqp to 173 * ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by 174 * ip_input_v6 and ip_fanout_v6 as normal. 175 * The TCP packets that match the target squeue are returned to the caller 176 * as a b_next chain after each packet has been prepend with an mblk 177 * from ip_recv_attr_to_mblk. 178 */ 179 mblk_t * 180 ip_accept_tcp_v6(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, 181 mblk_t *mp_chain, mblk_t **last, uint_t *cnt) 182 { 183 return (ip_input_common_v6(ill, ip_ring, mp_chain, NULL, target_sqp, 184 last, cnt)); 185 } 186 187 /* 188 * Used by ip_input_v6 and ip_accept_tcp_v6 189 * The last three arguments are only used by ip_accept_tcp_v6, and mhip is 190 * only used by ip_input_v6. 191 */ 192 mblk_t * 193 ip_input_common_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 194 struct mac_header_info_s *mhip, squeue_t *target_sqp, 195 mblk_t **last, uint_t *cnt) 196 { 197 mblk_t *mp; 198 ip6_t *ip6h; 199 ip_recv_attr_t iras; /* Receive attributes */ 200 rtc_t rtc; 201 iaflags_t chain_flags = 0; /* Fixed for chain */ 202 mblk_t *ahead = NULL; /* Accepted head */ 203 mblk_t *atail = NULL; /* Accepted tail */ 204 uint_t acnt = 0; /* Accepted count */ 205 206 ASSERT(mp_chain != NULL); 207 ASSERT(ill != NULL); 208 209 /* These ones do not change as we loop over packets */ 210 iras.ira_ill = iras.ira_rill = ill; 211 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 212 iras.ira_rifindex = iras.ira_ruifindex; 213 iras.ira_sqp = NULL; 214 iras.ira_ring = ip_ring; 215 /* For ECMP and outbound transmit ring selection */ 216 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring); 217 218 iras.ira_target_sqp = target_sqp; 219 iras.ira_target_sqp_mp = NULL; 220 if (target_sqp != NULL) 221 chain_flags |= IRAF_TARGET_SQP; 222 223 /* 224 * We try to have a mhip pointer when possible, but 225 * it might be NULL in some cases. In those cases we 226 * have to assume unicast. 227 */ 228 iras.ira_mhip = mhip; 229 iras.ira_flags = 0; 230 if (mhip != NULL) { 231 switch (mhip->mhi_dsttype) { 232 case MAC_ADDRTYPE_MULTICAST : 233 chain_flags |= IRAF_L2DST_MULTICAST; 234 break; 235 case MAC_ADDRTYPE_BROADCAST : 236 chain_flags |= IRAF_L2DST_BROADCAST; 237 break; 238 } 239 } 240 241 /* 242 * Initialize the one-element route cache. 243 * 244 * We do ire caching from one iteration to 245 * another. In the event the packet chain contains 246 * all packets from the same dst, this caching saves 247 * an ire_route_recursive for each of the succeeding 248 * packets in a packet chain. 249 */ 250 rtc.rtc_ire = NULL; 251 rtc.rtc_ip6addr = ipv6_all_zeros; 252 253 /* Loop over b_next */ 254 for (mp = mp_chain; mp != NULL; mp = mp_chain) { 255 mp_chain = mp->b_next; 256 mp->b_next = NULL; 257 258 /* 259 * if db_ref > 1 then copymsg and free original. Packet 260 * may be changed and we do not want the other entity 261 * who has a reference to this message to trip over the 262 * changes. This is a blind change because trying to 263 * catch all places that might change the packet is too 264 * difficult. 265 * 266 * This corresponds to the fast path case, where we have 267 * a chain of M_DATA mblks. We check the db_ref count 268 * of only the 1st data block in the mblk chain. There 269 * doesn't seem to be a reason why a device driver would 270 * send up data with varying db_ref counts in the mblk 271 * chain. In any case the Fast path is a private 272 * interface, and our drivers don't do such a thing. 273 * Given the above assumption, there is no need to walk 274 * down the entire mblk chain (which could have a 275 * potential performance problem) 276 * 277 * The "(DB_REF(mp) > 1)" check was moved from ip_rput() 278 * to here because of exclusive ip stacks and vnics. 279 * Packets transmitted from exclusive stack over vnic 280 * can have db_ref > 1 and when it gets looped back to 281 * another vnic in a different zone, you have ip_input() 282 * getting dblks with db_ref > 1. So if someone 283 * complains of TCP performance under this scenario, 284 * take a serious look here on the impact of copymsg(). 285 */ 286 if (DB_REF(mp) > 1) { 287 if ((mp = ip_fix_dbref(mp, &iras)) == NULL) 288 continue; 289 } 290 291 /* 292 * IP header ptr not aligned? 293 * OR IP header not complete in first mblk 294 */ 295 ip6h = (ip6_t *)mp->b_rptr; 296 if (!OK_32PTR(ip6h) || MBLKL(mp) < IPV6_HDR_LEN) { 297 mp = ip_check_and_align_header(mp, IPV6_HDR_LEN, &iras); 298 if (mp == NULL) 299 continue; 300 ip6h = (ip6_t *)mp->b_rptr; 301 } 302 303 /* Protect against a mix of Ethertypes and IP versions */ 304 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) { 305 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 306 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 307 freemsg(mp); 308 /* mhip might point into 1st packet in the chain. */ 309 iras.ira_mhip = NULL; 310 continue; 311 } 312 313 /* 314 * Check for Martian addrs; we have to explicitly 315 * test for for zero dst since this is also used as 316 * an indication that the rtc is not used. 317 */ 318 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_dst)) { 319 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 320 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 321 freemsg(mp); 322 /* mhip might point into 1st packet in the chain. */ 323 iras.ira_mhip = NULL; 324 continue; 325 } 326 /* 327 * Keep L2SRC from a previous packet in chain since mhip 328 * might point into an earlier packet in the chain. 329 */ 330 chain_flags |= (iras.ira_flags & IRAF_L2SRC_SET); 331 332 iras.ira_flags = IRAF_VERIFY_ULP_CKSUM | chain_flags; 333 iras.ira_free_flags = 0; 334 iras.ira_cred = NULL; 335 iras.ira_cpid = NOPID; 336 iras.ira_tsl = NULL; 337 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */ 338 339 /* 340 * We must count all incoming packets, even if they end 341 * up being dropped later on. Defer counting bytes until 342 * we have the whole IP header in first mblk. 343 */ 344 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 345 346 iras.ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 347 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 348 iras.ira_pktlen); 349 iras.ira_ttl = ip6h->ip6_hlim; 350 351 /* 352 * Call one of: 353 * ill_input_full_v6 354 * ill_input_short_v6 355 * The former is used in the case of TX. See ill_set_inputfn(). 356 */ 357 (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc); 358 359 /* Any references to clean up? No hold on ira_ill */ 360 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 361 ira_cleanup(&iras, B_FALSE); 362 363 if (iras.ira_target_sqp_mp != NULL) { 364 /* Better be called from ip_accept_tcp */ 365 ASSERT(target_sqp != NULL); 366 367 /* Found one packet to accept */ 368 mp = iras.ira_target_sqp_mp; 369 iras.ira_target_sqp_mp = NULL; 370 ASSERT(ip_recv_attr_is_mblk(mp)); 371 372 if (atail != NULL) 373 atail->b_next = mp; 374 else 375 ahead = mp; 376 atail = mp; 377 acnt++; 378 mp = NULL; 379 } 380 /* mhip might point into 1st packet in the chain. */ 381 iras.ira_mhip = NULL; 382 } 383 /* Any remaining references to the route cache? */ 384 if (rtc.rtc_ire != NULL) { 385 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr)); 386 ire_refrele(rtc.rtc_ire); 387 } 388 389 if (ahead != NULL) { 390 /* Better be called from ip_accept_tcp */ 391 ASSERT(target_sqp != NULL); 392 *last = atail; 393 *cnt = acnt; 394 return (ahead); 395 } 396 397 return (NULL); 398 } 399 400 /* 401 * This input function is used when 402 * - is_system_labeled() 403 * 404 * Note that for IPv6 CGTP filtering is handled only when receiving fragment 405 * headers, and RSVP uses router alert options, thus we don't need anything 406 * extra for them. 407 */ 408 void 409 ill_input_full_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg, 410 ip_recv_attr_t *ira, rtc_t *rtc) 411 { 412 ip6_t *ip6h = (ip6_t *)iph_arg; 413 in6_addr_t *nexthop = (in6_addr_t *)nexthop_arg; 414 ill_t *ill = ira->ira_ill; 415 416 ASSERT(ira->ira_tsl == NULL); 417 418 /* 419 * Attach any necessary label information to 420 * this packet 421 */ 422 if (is_system_labeled()) { 423 ira->ira_flags |= IRAF_SYSTEM_LABELED; 424 425 /* 426 * This updates ira_cred, ira_tsl and ira_free_flags based 427 * on the label. 428 */ 429 if (!tsol_get_pkt_label(mp, IPV6_VERSION, ira)) { 430 if (ip6opt_ls != 0) 431 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 432 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 433 ip_drop_input("ipIfStatsInDiscards", mp, ill); 434 freemsg(mp); 435 return; 436 } 437 /* Note that ira_tsl can be NULL here. */ 438 439 /* tsol_get_pkt_label sometimes does pullupmsg */ 440 ip6h = (ip6_t *)mp->b_rptr; 441 } 442 ill_input_short_v6(mp, ip6h, nexthop, ira, rtc); 443 } 444 445 /* 446 * Check for IPv6 addresses that should not appear on the wire 447 * as either source or destination. 448 * If we ever implement Stateless IPv6 Translators (SIIT) we'd have 449 * to revisit the IPv4-mapped part. 450 */ 451 static boolean_t 452 ip6_bad_address(in6_addr_t *addr, boolean_t is_src) 453 { 454 if (IN6_IS_ADDR_V4MAPPED(addr)) { 455 ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr")); 456 return (B_TRUE); 457 } 458 if (IN6_IS_ADDR_LOOPBACK(addr)) { 459 ip1dbg(("ip_input_v6: pkt with loopback addr")); 460 return (B_TRUE); 461 } 462 if (!is_src && IN6_IS_ADDR_UNSPECIFIED(addr)) { 463 /* 464 * having :: in the src is ok: it's used for DAD. 465 */ 466 ip1dbg(("ip_input_v6: pkt with unspecified addr")); 467 return (B_TRUE); 468 } 469 return (B_FALSE); 470 } 471 472 /* 473 * Routing lookup for IPv6 link-locals. 474 * First we look on the inbound interface, then we check for IPMP and 475 * look on the upper interface. 476 * We update ira_ruifindex if we find the IRE on the upper interface. 477 */ 478 static ire_t * 479 ire_linklocal(const in6_addr_t *nexthop, ill_t *ill, ip_recv_attr_t *ira, 480 uint_t irr_flags, ip_stack_t *ipst) 481 { 482 int match_flags = MATCH_IRE_SECATTR | MATCH_IRE_ILL; 483 ire_t *ire; 484 485 ASSERT(IN6_IS_ADDR_LINKLOCAL(nexthop)); 486 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl, 487 match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 488 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 489 !IS_UNDER_IPMP(ill)) 490 return (ire); 491 492 /* 493 * When we are using IMP we need to look for an IRE on both the 494 * under and upper interfaces since there are different 495 * link-local addresses for the under and upper. 496 */ 497 ill = ipmp_ill_hold_ipmp_ill(ill); 498 if (ill == NULL) 499 return (ire); 500 501 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 502 503 ire_refrele(ire); 504 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl, 505 match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 506 ill_refrele(ill); 507 return (ire); 508 } 509 510 /* 511 * This is the tail-end of the full receive side packet handling. 512 * It can be used directly when the configuration is simple. 513 */ 514 void 515 ill_input_short_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg, 516 ip_recv_attr_t *ira, rtc_t *rtc) 517 { 518 ire_t *ire; 519 ill_t *ill = ira->ira_ill; 520 ip_stack_t *ipst = ill->ill_ipst; 521 uint_t pkt_len; 522 ssize_t len; 523 ip6_t *ip6h = (ip6_t *)iph_arg; 524 in6_addr_t nexthop = *(in6_addr_t *)nexthop_arg; 525 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; 526 uint_t irr_flags; 527 #define rptr ((uchar_t *)ip6h) 528 529 ASSERT(DB_TYPE(mp) == M_DATA); 530 531 /* 532 * Check for source/dest being a bad address: loopback, any, or 533 * v4mapped. All of them start with a 64 bits of zero. 534 */ 535 if (ip6h->ip6_src.s6_addr32[0] == 0 && 536 ip6h->ip6_src.s6_addr32[1] == 0) { 537 if (ip6_bad_address(&ip6h->ip6_src, B_TRUE)) { 538 ip1dbg(("ip_input_v6: pkt with bad src addr\n")); 539 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 540 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 541 freemsg(mp); 542 return; 543 } 544 } 545 if (ip6h->ip6_dst.s6_addr32[0] == 0 && 546 ip6h->ip6_dst.s6_addr32[1] == 0) { 547 if (ip6_bad_address(&ip6h->ip6_dst, B_FALSE)) { 548 ip1dbg(("ip_input_v6: pkt with bad dst addr\n")); 549 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 550 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 551 freemsg(mp); 552 return; 553 } 554 } 555 556 len = mp->b_wptr - rptr; 557 pkt_len = ira->ira_pktlen; 558 559 /* multiple mblk or too short */ 560 len -= pkt_len; 561 if (len != 0) { 562 mp = ip_check_length(mp, rptr, len, pkt_len, IPV6_HDR_LEN, ira); 563 if (mp == NULL) 564 return; 565 ip6h = (ip6_t *)mp->b_rptr; 566 } 567 568 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 569 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 570 int, 0); 571 /* 572 * The event for packets being received from a 'physical' 573 * interface is placed after validation of the source and/or 574 * destination address as being local so that packets can be 575 * redirected to loopback addresses using ipnat. 576 */ 577 DTRACE_PROBE4(ip6__physical__in__start, 578 ill_t *, ill, ill_t *, NULL, 579 ip6_t *, ip6h, mblk_t *, mp); 580 581 if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) { 582 int ll_multicast = 0; 583 int error; 584 in6_addr_t orig_dst = ip6h->ip6_dst; 585 586 if (ira->ira_flags & IRAF_L2DST_MULTICAST) 587 ll_multicast = HPE_MULTICAST; 588 else if (ira->ira_flags & IRAF_L2DST_BROADCAST) 589 ll_multicast = HPE_BROADCAST; 590 591 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 592 ipst->ips_ipv6firewall_physical_in, 593 ill, NULL, ip6h, mp, mp, ll_multicast, ipst, error); 594 595 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, mp); 596 597 if (mp == NULL) 598 return; 599 600 /* The length could have changed */ 601 ip6h = (ip6_t *)mp->b_rptr; 602 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 603 pkt_len = ira->ira_pktlen; 604 605 /* 606 * In case the destination changed we override any previous 607 * change to nexthop. 608 */ 609 if (!IN6_ARE_ADDR_EQUAL(&orig_dst, &ip6h->ip6_dst)) 610 nexthop = ip6h->ip6_dst; 611 612 if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) { 613 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 614 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 615 freemsg(mp); 616 return; 617 } 618 619 } 620 621 if (ipst->ips_ip6_observe.he_interested) { 622 zoneid_t dzone; 623 624 /* 625 * On the inbound path the src zone will be unknown as 626 * this packet has come from the wire. 627 */ 628 dzone = ip_get_zoneid_v6(&nexthop, mp, ill, ira, ALL_ZONES); 629 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst); 630 } 631 632 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) != 633 IPV6_DEFAULT_VERS_AND_FLOW) { 634 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 635 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 636 ip_drop_input("ipIfStatsInWrongIPVersion", mp, ill); 637 freemsg(mp); 638 return; 639 } 640 641 /* 642 * For IPv6 we update ira_ip_hdr_length and ira_protocol as 643 * we parse the headers, starting with the hop-by-hop options header. 644 */ 645 ira->ira_ip_hdr_length = IPV6_HDR_LEN; 646 if ((ira->ira_protocol = ip6h->ip6_nxt) == IPPROTO_HOPOPTS) { 647 ip6_hbh_t *hbhhdr; 648 uint_t ehdrlen; 649 uint8_t *optptr; 650 651 if (pkt_len < IPV6_HDR_LEN + MIN_EHDR_LEN) { 652 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 653 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 654 freemsg(mp); 655 return; 656 } 657 if (mp->b_cont != NULL && 658 rptr + IPV6_HDR_LEN + MIN_EHDR_LEN > mp->b_wptr) { 659 ip6h = ip_pullup(mp, IPV6_HDR_LEN + MIN_EHDR_LEN, ira); 660 if (ip6h == NULL) { 661 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 662 ip_drop_input("ipIfStatsInDiscards", mp, ill); 663 freemsg(mp); 664 return; 665 } 666 } 667 hbhhdr = (ip6_hbh_t *)&ip6h[1]; 668 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 669 670 if (pkt_len < IPV6_HDR_LEN + ehdrlen) { 671 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 672 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 673 freemsg(mp); 674 return; 675 } 676 if (mp->b_cont != NULL && 677 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 678 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 679 if (ip6h == NULL) { 680 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 681 ip_drop_input("ipIfStatsInDiscards", mp, ill); 682 freemsg(mp); 683 return; 684 } 685 hbhhdr = (ip6_hbh_t *)&ip6h[1]; 686 } 687 688 /* 689 * Update ira_ip_hdr_length to skip the hop-by-hop header 690 * once we get to ip_fanout_v6 691 */ 692 ira->ira_ip_hdr_length += ehdrlen; 693 ira->ira_protocol = hbhhdr->ip6h_nxt; 694 695 optptr = (uint8_t *)&hbhhdr[1]; 696 switch (ip_process_options_v6(mp, ip6h, optptr, 697 ehdrlen - 2, IPPROTO_HOPOPTS, ira)) { 698 case -1: 699 /* 700 * Packet has been consumed and any 701 * needed ICMP messages sent. 702 */ 703 return; 704 case 0: 705 /* no action needed */ 706 break; 707 case 1: 708 /* 709 * Known router alert. Make use handle it as local 710 * by setting the nexthop to be the all-host multicast 711 * address, and skip multicast membership filter by 712 * marking as a router alert. 713 */ 714 ira->ira_flags |= IRAF_ROUTER_ALERT; 715 nexthop = ipv6_all_hosts_mcast; 716 break; 717 } 718 } 719 720 /* 721 * Here we check to see if we machine is setup as 722 * L3 loadbalancer and if the incoming packet is for a VIP 723 * 724 * Check the following: 725 * - there is at least a rule 726 * - protocol of the packet is supported 727 * 728 * We don't load balance IPv6 link-locals. 729 */ 730 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ira->ira_protocol) && 731 !IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 732 in6_addr_t lb_dst; 733 int lb_ret; 734 735 /* For convenience, we just pull up the mblk. */ 736 if (mp->b_cont != NULL) { 737 if (pullupmsg(mp, -1) == 0) { 738 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 739 ip_drop_input("ipIfStatsInDiscards - pullupmsg", 740 mp, ill); 741 freemsg(mp); 742 return; 743 } 744 ip6h = (ip6_t *)mp->b_rptr; 745 } 746 lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, ira->ira_protocol, 747 (uint8_t *)ip6h + ira->ira_ip_hdr_length, &lb_dst); 748 if (lb_ret == ILB_DROPPED) { 749 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 750 ip_drop_input("ILB_DROPPED", mp, ill); 751 freemsg(mp); 752 return; 753 } 754 if (lb_ret == ILB_BALANCED) { 755 /* Set the dst to that of the chosen server */ 756 nexthop = lb_dst; 757 DB_CKSUMFLAGS(mp) = 0; 758 } 759 } 760 761 if (ill->ill_flags & ILLF_ROUTER) 762 irr_flags = IRR_ALLOCATE; 763 else 764 irr_flags = IRR_NONE; 765 766 /* Can not use route cache with TX since the labels can differ */ 767 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 768 if (IN6_IS_ADDR_MULTICAST(&nexthop)) { 769 ire = ire_multicast(ill); 770 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 771 ire = ire_linklocal(&nexthop, ill, ira, irr_flags, 772 ipst); 773 } else { 774 /* Match destination and label */ 775 ire = ire_route_recursive_v6(&nexthop, 0, NULL, 776 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR, 777 irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, 778 NULL); 779 } 780 /* Update the route cache so we do the ire_refrele */ 781 ASSERT(ire != NULL); 782 if (rtc->rtc_ire != NULL) 783 ire_refrele(rtc->rtc_ire); 784 rtc->rtc_ire = ire; 785 rtc->rtc_ip6addr = nexthop; 786 } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr) && 787 rtc->rtc_ire != NULL) { 788 /* Use the route cache */ 789 ire = rtc->rtc_ire; 790 } else { 791 /* Update the route cache */ 792 if (IN6_IS_ADDR_MULTICAST(&nexthop)) { 793 ire = ire_multicast(ill); 794 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 795 ire = ire_linklocal(&nexthop, ill, ira, irr_flags, 796 ipst); 797 } else { 798 ire = ire_route_recursive_dstonly_v6(&nexthop, 799 irr_flags, ira->ira_xmit_hint, ipst); 800 } 801 ASSERT(ire != NULL); 802 if (rtc->rtc_ire != NULL) 803 ire_refrele(rtc->rtc_ire); 804 rtc->rtc_ire = ire; 805 rtc->rtc_ip6addr = nexthop; 806 } 807 808 ire->ire_ib_pkt_count++; 809 810 /* 811 * Based on ire_type and ire_flags call one of: 812 * ire_recv_local_v6 - for IRE_LOCAL 813 * ire_recv_loopback_v6 - for IRE_LOOPBACK 814 * ire_recv_multirt_v6 - if RTF_MULTIRT 815 * ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE 816 * ire_recv_multicast_v6 - for IRE_MULTICAST 817 * ire_recv_noaccept_v6 - for ire_noaccept ones 818 * ire_recv_forward_v6 - for the rest. 819 */ 820 821 (*ire->ire_recvfn)(ire, mp, ip6h, ira); 822 } 823 #undef rptr 824 825 /* 826 * ire_recvfn for IREs that need forwarding 827 */ 828 void 829 ire_recv_forward_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 830 { 831 ip6_t *ip6h = (ip6_t *)iph_arg; 832 ill_t *ill = ira->ira_ill; 833 ip_stack_t *ipst = ill->ill_ipst; 834 iaflags_t iraflags = ira->ira_flags; 835 ill_t *dst_ill; 836 nce_t *nce; 837 uint32_t added_tx_len; 838 uint32_t mtu, iremtu; 839 840 if (iraflags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 841 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 842 ip_drop_input("l2 multicast not forwarded", mp, ill); 843 freemsg(mp); 844 return; 845 } 846 847 if (!(ill->ill_flags & ILLF_ROUTER)) { 848 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 849 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 850 freemsg(mp); 851 return; 852 } 853 854 /* 855 * Either ire_nce_capable or ire_dep_parent would be set for the IRE 856 * when it is found by ire_route_recursive, but that some other thread 857 * could have changed the routes with the effect of clearing 858 * ire_dep_parent. In that case we'd end up dropping the packet, or 859 * finding a new nce below. 860 * Get, allocate, or update the nce. 861 * We get a refhold on ire_nce_cache as a result of this to avoid races 862 * where ire_nce_cache is deleted. 863 * 864 * This ensures that we don't forward if the interface is down since 865 * ipif_down removes all the nces. 866 */ 867 mutex_enter(&ire->ire_lock); 868 nce = ire->ire_nce_cache; 869 if (nce == NULL) { 870 /* Not yet set up - try to set one up */ 871 mutex_exit(&ire->ire_lock); 872 (void) ire_revalidate_nce(ire); 873 mutex_enter(&ire->ire_lock); 874 nce = ire->ire_nce_cache; 875 if (nce == NULL) { 876 mutex_exit(&ire->ire_lock); 877 /* The ire_dep_parent chain went bad, or no memory */ 878 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 879 ip_drop_input("No ire_dep_parent", mp, ill); 880 freemsg(mp); 881 return; 882 } 883 } 884 nce_refhold(nce); 885 mutex_exit(&ire->ire_lock); 886 887 if (nce->nce_is_condemned) { 888 nce_t *nce1; 889 890 nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_FALSE); 891 nce_refrele(nce); 892 if (nce1 == NULL) { 893 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 894 ip_drop_input("No nce", mp, ill); 895 freemsg(mp); 896 return; 897 } 898 nce = nce1; 899 } 900 dst_ill = nce->nce_ill; 901 902 /* 903 * Unless we are forwarding, drop the packet. 904 * Unlike IPv4 we don't allow source routed packets out the same 905 * interface when we are not a router. 906 * Note that ill_forward_set() will set the ILLF_ROUTER on 907 * all the group members when it gets an ipmp-ill or under-ill. 908 */ 909 if (!(dst_ill->ill_flags & ILLF_ROUTER)) { 910 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 911 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 912 freemsg(mp); 913 nce_refrele(nce); 914 return; 915 } 916 917 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) { 918 ire->ire_ib_pkt_count--; 919 /* 920 * Should only use IREs that are visible from the 921 * global zone for forwarding. 922 * For IPv6 any source route would have already been 923 * advanced in ip_fanout_v6 924 */ 925 ire = ire_route_recursive_v6(&ip6h->ip6_dst, 0, NULL, 926 GLOBAL_ZONEID, ira->ira_tsl, MATCH_IRE_SECATTR, 927 (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE, 928 ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 929 ire->ire_ib_pkt_count++; 930 (*ire->ire_recvfn)(ire, mp, ip6h, ira); 931 ire_refrele(ire); 932 nce_refrele(nce); 933 return; 934 } 935 /* 936 * ipIfStatsHCInForwDatagrams should only be increment if there 937 * will be an attempt to forward the packet, which is why we 938 * increment after the above condition has been checked. 939 */ 940 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 941 942 /* Initiate Read side IPPF processing */ 943 if (IPP_ENABLED(IPP_FWD_IN, ipst)) { 944 /* ip_process translates an IS_UNDER_IPMP */ 945 mp = ip_process(IPP_FWD_IN, mp, ill, ill); 946 if (mp == NULL) { 947 /* ip_drop_packet and MIB done */ 948 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred " 949 "during IPPF processing\n")); 950 nce_refrele(nce); 951 return; 952 } 953 } 954 955 DTRACE_PROBE4(ip6__forwarding__start, 956 ill_t *, ill, ill_t *, dst_ill, ip6_t *, ip6h, mblk_t *, mp); 957 958 if (HOOKS6_INTERESTED_FORWARDING(ipst)) { 959 int error; 960 961 FW_HOOKS(ipst->ips_ip6_forwarding_event, 962 ipst->ips_ipv6firewall_forwarding, 963 ill, dst_ill, ip6h, mp, mp, 0, ipst, error); 964 965 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 966 967 if (mp == NULL) { 968 nce_refrele(nce); 969 return; 970 } 971 /* 972 * Even if the destination was changed by the filter we use the 973 * forwarding decision that was made based on the address 974 * in ip_input. 975 */ 976 977 /* Might have changed */ 978 ip6h = (ip6_t *)mp->b_rptr; 979 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 980 } 981 982 /* Packet is being forwarded. Turning off hwcksum flag. */ 983 DB_CKSUMFLAGS(mp) = 0; 984 985 /* 986 * Per RFC 3513 section 2.5.2, we must not forward packets with 987 * an unspecified source address. 988 * The loopback address check for both src and dst has already 989 * been checked in ip_input_v6 990 * In the future one can envision adding RPF checks using number 3. 991 */ 992 switch (ipst->ips_src_check) { 993 case 0: 994 break; 995 case 1: 996 case 2: 997 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) || 998 IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) { 999 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1000 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1001 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1002 nce_refrele(nce); 1003 freemsg(mp); 1004 return; 1005 } 1006 break; 1007 } 1008 1009 /* 1010 * Check to see if we're forwarding the packet to a 1011 * different link from which it came. If so, check the 1012 * source and destination addresses since routers must not 1013 * forward any packets with link-local source or 1014 * destination addresses to other links. Otherwise (if 1015 * we're forwarding onto the same link), conditionally send 1016 * a redirect message. 1017 */ 1018 if (!IS_ON_SAME_LAN(dst_ill, ill)) { 1019 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 1020 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 1021 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1022 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1023 freemsg(mp); 1024 nce_refrele(nce); 1025 return; 1026 } 1027 /* TBD add site-local check at site boundary? */ 1028 } else if (ipst->ips_ipv6_send_redirects) { 1029 ip_send_potential_redirect_v6(mp, ip6h, ire, ira); 1030 } 1031 1032 added_tx_len = 0; 1033 if (iraflags & IRAF_SYSTEM_LABELED) { 1034 mblk_t *mp1; 1035 uint32_t old_pkt_len = ira->ira_pktlen; 1036 1037 /* 1038 * Check if it can be forwarded and add/remove 1039 * CIPSO options as needed. 1040 */ 1041 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) { 1042 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1043 ip_drop_input("tsol_ip_forward", mp, ill); 1044 freemsg(mp); 1045 nce_refrele(nce); 1046 return; 1047 } 1048 /* 1049 * Size may have changed. Remember amount added in case 1050 * ip_fragment needs to send an ICMP too big. 1051 */ 1052 mp = mp1; 1053 ip6h = (ip6_t *)mp->b_rptr; 1054 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 1055 ira->ira_ip_hdr_length = IPV6_HDR_LEN; 1056 ira->ira_ttl = ip6h->ip6_hlim; 1057 if (ira->ira_pktlen > old_pkt_len) 1058 added_tx_len = ira->ira_pktlen - old_pkt_len; 1059 } 1060 1061 mtu = dst_ill->ill_mtu; 1062 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu) 1063 mtu = iremtu; 1064 ip_forward_xmit_v6(nce, mp, ip6h, ira, mtu, added_tx_len); 1065 nce_refrele(nce); 1066 return; 1067 1068 } 1069 1070 /* 1071 * Used for sending out unicast and multicast packets that are 1072 * forwarded. 1073 */ 1074 void 1075 ip_forward_xmit_v6(nce_t *nce, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira, 1076 uint32_t mtu, uint32_t added_tx_len) 1077 { 1078 ill_t *dst_ill = nce->nce_ill; 1079 uint32_t pkt_len; 1080 iaflags_t iraflags = ira->ira_flags; 1081 ip_stack_t *ipst = dst_ill->ill_ipst; 1082 1083 if (ip6h->ip6_hops-- <= 1) { 1084 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1085 ip_drop_input("ICMP6_TIME_EXCEED_TRANSIT", mp, ira->ira_ill); 1086 icmp_time_exceeded_v6(mp, ICMP6_TIME_EXCEED_TRANSIT, B_FALSE, 1087 ira); 1088 return; 1089 } 1090 1091 /* Initiate Write side IPPF processing before any fragmentation */ 1092 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { 1093 /* ip_process translates an IS_UNDER_IPMP */ 1094 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill); 1095 if (mp == NULL) { 1096 /* ip_drop_packet and MIB done */ 1097 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred" \ 1098 " during IPPF processing\n")); 1099 return; 1100 } 1101 } 1102 1103 pkt_len = ira->ira_pktlen; 1104 1105 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 1106 1107 if (pkt_len > mtu) { 1108 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails); 1109 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill); 1110 if (iraflags & IRAF_SYSTEM_LABELED) { 1111 /* 1112 * Remove any CIPSO option added by 1113 * tsol_ip_forward, and make sure we report 1114 * a path MTU so that there 1115 * is room to add such a CIPSO option for future 1116 * packets. 1117 */ 1118 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, AF_INET6); 1119 } 1120 icmp_pkt2big_v6(mp, mtu, B_TRUE, ira); 1121 return; 1122 } 1123 1124 ASSERT(pkt_len == 1125 ntohs(((ip6_t *)mp->b_rptr)->ip6_plen) + IPV6_HDR_LEN); 1126 1127 if (iraflags & IRAF_LOOPBACK_COPY) { 1128 /* 1129 * IXAF_NO_LOOP_ZONEID is not set hence 6th arg 1130 * is don't care 1131 */ 1132 (void) ip_postfrag_loopcheck(mp, nce, 1133 (IXAF_LOOPBACK_COPY | IXAF_NO_DEV_FLOW_CTL), 1134 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1135 } else { 1136 (void) ip_xmit(mp, nce, IXAF_NO_DEV_FLOW_CTL, 1137 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1138 } 1139 } 1140 1141 /* 1142 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE, 1143 * which is what ire_route_recursive returns when there is no matching ire. 1144 * Send ICMP unreachable unless blackhole. 1145 */ 1146 void 1147 ire_recv_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1148 { 1149 ip6_t *ip6h = (ip6_t *)iph_arg; 1150 ill_t *ill = ira->ira_ill; 1151 ip_stack_t *ipst = ill->ill_ipst; 1152 1153 /* Would we have forwarded this packet if we had a route? */ 1154 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 1155 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1156 ip_drop_input("l2 multicast not forwarded", mp, ill); 1157 freemsg(mp); 1158 return; 1159 } 1160 1161 if (!(ill->ill_flags & ILLF_ROUTER)) { 1162 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1163 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 1164 freemsg(mp); 1165 return; 1166 } 1167 /* 1168 * If we had a route this could have been forwarded. Count as such. 1169 * 1170 * ipIfStatsHCInForwDatagrams should only be increment if there 1171 * will be an attempt to forward the packet, which is why we 1172 * increment after the above condition has been checked. 1173 */ 1174 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 1175 1176 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1177 1178 ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, RTA_DST, 1179 ipst); 1180 1181 if (ire->ire_flags & RTF_BLACKHOLE) { 1182 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill); 1183 freemsg(mp); 1184 } else { 1185 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill); 1186 1187 icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE, 1188 ira); 1189 } 1190 } 1191 1192 /* 1193 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for 1194 * VRRP when in noaccept mode. 1195 * We silently drop packets except for Neighbor Solicitations and 1196 * Neighbor Advertisements. 1197 */ 1198 void 1199 ire_recv_noaccept_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 1200 ip_recv_attr_t *ira) 1201 { 1202 ip6_t *ip6h = (ip6_t *)iph_arg; 1203 ill_t *ill = ira->ira_ill; 1204 icmp6_t *icmp6; 1205 int ip_hdr_length; 1206 1207 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 1208 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1209 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1210 freemsg(mp); 1211 return; 1212 } 1213 ip_hdr_length = ira->ira_ip_hdr_length; 1214 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) { 1215 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) { 1216 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 1217 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 1218 freemsg(mp); 1219 return; 1220 } 1221 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira); 1222 if (ip6h == NULL) { 1223 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1224 freemsg(mp); 1225 return; 1226 } 1227 } 1228 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 1229 1230 if (icmp6->icmp6_type != ND_NEIGHBOR_SOLICIT && 1231 icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) { 1232 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1233 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1234 freemsg(mp); 1235 return; 1236 } 1237 ire_recv_local_v6(ire, mp, ip6h, ira); 1238 } 1239 1240 /* 1241 * ire_recvfn for IRE_MULTICAST. 1242 */ 1243 void 1244 ire_recv_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 1245 ip_recv_attr_t *ira) 1246 { 1247 ip6_t *ip6h = (ip6_t *)iph_arg; 1248 ill_t *ill = ira->ira_ill; 1249 1250 ASSERT(ire->ire_ill == ira->ira_ill); 1251 1252 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 1253 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen); 1254 1255 /* Tag for higher-level protocols */ 1256 ira->ira_flags |= IRAF_MULTICAST; 1257 1258 /* 1259 * So that we don't end up with dups, only one ill an IPMP group is 1260 * nominated to receive multicast traffic. 1261 * If we have no cast_ill we are liberal and accept everything. 1262 */ 1263 if (IS_UNDER_IPMP(ill)) { 1264 ip_stack_t *ipst = ill->ill_ipst; 1265 1266 /* For an under ill_grp can change under lock */ 1267 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1268 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1269 ill->ill_grp->ig_cast_ill != NULL) { 1270 rw_exit(&ipst->ips_ill_g_lock); 1271 ip_drop_input("not on cast ill", mp, ill); 1272 freemsg(mp); 1273 return; 1274 } 1275 rw_exit(&ipst->ips_ill_g_lock); 1276 /* 1277 * We switch to the upper ill so that mrouter and hasmembers 1278 * can operate on upper here and in ip_input_multicast. 1279 */ 1280 ill = ipmp_ill_hold_ipmp_ill(ill); 1281 if (ill != NULL) { 1282 ASSERT(ill != ira->ira_ill); 1283 ASSERT(ire->ire_ill == ira->ira_ill); 1284 ira->ira_ill = ill; 1285 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1286 } else { 1287 ill = ira->ira_ill; 1288 } 1289 } 1290 1291 #ifdef notdef 1292 /* 1293 * Check if we are a multicast router - send ip_mforward a copy of 1294 * the packet. 1295 * Due to mroute_decap tunnels we consider forwarding packets even if 1296 * mrouted has not joined the allmulti group on this interface. 1297 */ 1298 if (ipst->ips_ip_g_mrouter) { 1299 int retval; 1300 1301 /* 1302 * Clear the indication that this may have hardware 1303 * checksum as we are not using it for forwarding. 1304 */ 1305 DB_CKSUMFLAGS(mp) = 0; 1306 1307 /* 1308 * ip_mforward helps us make these distinctions: If received 1309 * on tunnel and not IGMP, then drop. 1310 * If IGMP packet, then don't check membership 1311 * If received on a phyint and IGMP or PIM, then 1312 * don't check membership 1313 */ 1314 retval = ip_mforward_v6(mp, ira); 1315 /* ip_mforward updates mib variables if needed */ 1316 1317 switch (retval) { 1318 case 0: 1319 /* 1320 * pkt is okay and arrived on phyint. 1321 */ 1322 break; 1323 case -1: 1324 /* pkt is mal-formed, toss it */ 1325 freemsg(mp); 1326 goto done; 1327 case 1: 1328 /* 1329 * pkt is okay and arrived on a tunnel 1330 * 1331 * If we are running a multicast router 1332 * we need to see all mld packets, which 1333 * are marked with router alerts. 1334 */ 1335 if (ira->ira_flags & IRAF_ROUTER_ALERT) 1336 goto forus; 1337 ip_drop_input("Multicast on tunnel ignored", mp, ill); 1338 freemsg(mp); 1339 goto done; 1340 } 1341 } 1342 #endif /* notdef */ 1343 1344 /* 1345 * If this was a router alert we skip the group membership check. 1346 */ 1347 if (ira->ira_flags & IRAF_ROUTER_ALERT) 1348 goto forus; 1349 1350 /* 1351 * Check if we have members on this ill. This is not necessary for 1352 * correctness because even if the NIC/GLD had a leaky filter, we 1353 * filter before passing to each conn_t. 1354 */ 1355 if (!ill_hasmembers_v6(ill, &ip6h->ip6_dst)) { 1356 /* 1357 * Nobody interested 1358 * 1359 * This might just be caused by the fact that 1360 * multiple IP Multicast addresses map to the same 1361 * link layer multicast - no need to increment counter! 1362 */ 1363 ip_drop_input("Multicast with no members", mp, ill); 1364 freemsg(mp); 1365 goto done; 1366 } 1367 forus: 1368 ip2dbg(("ire_recv_multicast_v6: multicast for us\n")); 1369 1370 /* 1371 * After reassembly and IPsec we will need to duplicate the 1372 * multicast packet for all matching zones on the ill. 1373 */ 1374 ira->ira_zoneid = ALL_ZONES; 1375 1376 /* Reassemble on the ill on which the packet arrived */ 1377 ip_input_local_v6(ire, mp, ip6h, ira); 1378 done: 1379 if (ill != ire->ire_ill) { 1380 ill_refrele(ill); 1381 ira->ira_ill = ire->ire_ill; 1382 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 1383 } 1384 } 1385 1386 /* 1387 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT. 1388 * Drop packets since we don't forward out multirt routes. 1389 */ 1390 /* ARGSUSED */ 1391 void 1392 ire_recv_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1393 { 1394 ill_t *ill = ira->ira_ill; 1395 1396 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1397 ip_drop_input("Not forwarding out MULTIRT", mp, ill); 1398 freemsg(mp); 1399 } 1400 1401 /* 1402 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK 1403 * has rewritten the packet to have a loopback destination address (We 1404 * filter out packet with a loopback destination from arriving over the wire). 1405 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID. 1406 */ 1407 void 1408 ire_recv_loopback_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1409 { 1410 ip6_t *ip6h = (ip6_t *)iph_arg; 1411 ill_t *ill = ira->ira_ill; 1412 ill_t *ire_ill = ire->ire_ill; 1413 1414 ira->ira_zoneid = GLOBAL_ZONEID; 1415 1416 /* Switch to the lo0 ill for further processing */ 1417 if (ire_ill != ill) { 1418 /* 1419 * Update ira_ill to be the ILL on which the IP address 1420 * is hosted. 1421 * No need to hold the ill since we have a hold on the ire 1422 */ 1423 ASSERT(ira->ira_ill == ira->ira_rill); 1424 ira->ira_ill = ire_ill; 1425 1426 ip_input_local_v6(ire, mp, ip6h, ira); 1427 1428 /* Restore */ 1429 ASSERT(ira->ira_ill == ire_ill); 1430 ira->ira_ill = ill; 1431 return; 1432 1433 } 1434 ip_input_local_v6(ire, mp, ip6h, ira); 1435 } 1436 1437 /* 1438 * ire_recvfn for IRE_LOCAL. 1439 */ 1440 void 1441 ire_recv_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1442 { 1443 ip6_t *ip6h = (ip6_t *)iph_arg; 1444 ill_t *ill = ira->ira_ill; 1445 ill_t *ire_ill = ire->ire_ill; 1446 1447 /* Make a note for DAD that this address is in use */ 1448 ire->ire_last_used_time = LBOLT_FASTPATH; 1449 1450 /* Only target the IRE_LOCAL with the right zoneid. */ 1451 ira->ira_zoneid = ire->ire_zoneid; 1452 1453 /* 1454 * If the packet arrived on the wrong ill, we check that 1455 * this is ok. 1456 * If it is, then we ensure that we do the reassembly on 1457 * the ill on which the address is hosted. We keep ira_rill as 1458 * the one on which the packet arrived, so that IP_PKTINFO and 1459 * friends can report this. 1460 */ 1461 if (ire_ill != ill) { 1462 ire_t *new_ire; 1463 1464 new_ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill); 1465 if (new_ire == NULL) { 1466 /* Drop packet */ 1467 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1468 ip_drop_input("ipIfStatsInForwProhibits", mp, ill); 1469 freemsg(mp); 1470 return; 1471 } 1472 /* 1473 * Update ira_ill to be the ILL on which the IP address 1474 * is hosted. No need to hold the ill since we have a 1475 * hold on the ire. Note that we do the switch even if 1476 * new_ire == ire (for IPMP, ire would be the one corresponding 1477 * to the IPMP ill). 1478 */ 1479 ASSERT(ira->ira_ill == ira->ira_rill); 1480 ira->ira_ill = new_ire->ire_ill; 1481 1482 /* ira_ruifindex tracks the upper for ira_rill */ 1483 if (IS_UNDER_IPMP(ill)) 1484 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1485 1486 ip_input_local_v6(new_ire, mp, ip6h, ira); 1487 1488 /* Restore */ 1489 ASSERT(ira->ira_ill == new_ire->ire_ill); 1490 ira->ira_ill = ill; 1491 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1492 1493 if (new_ire != ire) 1494 ire_refrele(new_ire); 1495 return; 1496 } 1497 1498 ip_input_local_v6(ire, mp, ip6h, ira); 1499 } 1500 1501 /* 1502 * Common function for packets arriving for the host. Handles 1503 * checksum verification, reassembly checks, etc. 1504 */ 1505 static void 1506 ip_input_local_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1507 { 1508 iaflags_t iraflags = ira->ira_flags; 1509 1510 /* 1511 * For multicast we need some extra work before 1512 * we call ip_fanout_v6(), since in the case of shared-IP zones 1513 * we need to pretend that a packet arrived for each zoneid. 1514 */ 1515 if (iraflags & IRAF_MULTICAST) { 1516 ip_input_multicast_v6(ire, mp, ip6h, ira); 1517 return; 1518 } 1519 ip_fanout_v6(mp, ip6h, ira); 1520 } 1521 1522 /* 1523 * Handle multiple zones which want to receive the same multicast packets 1524 * on this ill by delivering a packet to each of them. 1525 * 1526 * Note that for packets delivered to transports we could instead do this 1527 * as part of the fanout code, but since we need to handle icmp_inbound 1528 * it is simpler to have multicast work the same as IPv4 broadcast. 1529 * 1530 * The ip_fanout matching for multicast matches based on ilm independent of 1531 * zoneid since the zoneid restriction is applied when joining a multicast 1532 * group. 1533 */ 1534 /* ARGSUSED */ 1535 static void 1536 ip_input_multicast_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1537 { 1538 ill_t *ill = ira->ira_ill; 1539 iaflags_t iraflags = ira->ira_flags; 1540 ip_stack_t *ipst = ill->ill_ipst; 1541 netstack_t *ns = ipst->ips_netstack; 1542 zoneid_t zoneid; 1543 mblk_t *mp1; 1544 ip6_t *ip6h1; 1545 uint_t ira_pktlen = ira->ira_pktlen; 1546 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1547 1548 /* ire_recv_multicast has switched to the upper ill for IPMP */ 1549 ASSERT(!IS_UNDER_IPMP(ill)); 1550 1551 /* 1552 * If we don't have more than one shared-IP zone, or if 1553 * there are no members in anything but the global zone, 1554 * then just set the zoneid and proceed. 1555 */ 1556 if (ns->netstack_numzones == 1 || 1557 !ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst, 1558 GLOBAL_ZONEID)) { 1559 ira->ira_zoneid = GLOBAL_ZONEID; 1560 1561 /* If sender didn't want this zone to receive it, drop */ 1562 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1563 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1564 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1565 freemsg(mp); 1566 return; 1567 } 1568 ip_fanout_v6(mp, ip6h, ira); 1569 return; 1570 } 1571 1572 /* 1573 * Here we loop over all zoneids that have members in the group 1574 * and deliver a packet to ip_fanout for each zoneid. 1575 * 1576 * First find any members in the lowest numeric zoneid by looking for 1577 * first zoneid larger than -1 (ALL_ZONES). 1578 * We terminate the loop when we receive -1 (ALL_ZONES). 1579 */ 1580 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 1581 for (; zoneid != ALL_ZONES; 1582 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, zoneid)) { 1583 /* 1584 * Avoid an extra copymsg/freemsg by skipping global zone here 1585 * and doing that at the end. 1586 */ 1587 if (zoneid == GLOBAL_ZONEID) 1588 continue; 1589 1590 ira->ira_zoneid = zoneid; 1591 1592 /* If sender didn't want this zone to receive it, skip */ 1593 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1594 ira->ira_no_loop_zoneid == ira->ira_zoneid) 1595 continue; 1596 1597 mp1 = copymsg(mp); 1598 if (mp1 == NULL) { 1599 /* Failed to deliver to one zone */ 1600 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1601 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1602 continue; 1603 } 1604 ip6h1 = (ip6_t *)mp1->b_rptr; 1605 ip_fanout_v6(mp1, ip6h1, ira); 1606 /* 1607 * IPsec might have modified ira_pktlen and ira_ip_hdr_length 1608 * so we restore them for a potential next iteration 1609 */ 1610 ira->ira_pktlen = ira_pktlen; 1611 ira->ira_ip_hdr_length = ira_ip_hdr_length; 1612 } 1613 1614 /* Do the main ire */ 1615 ira->ira_zoneid = GLOBAL_ZONEID; 1616 /* If sender didn't want this zone to receive it, drop */ 1617 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1618 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1619 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1620 freemsg(mp); 1621 } else { 1622 ip_fanout_v6(mp, ip6h, ira); 1623 } 1624 } 1625 1626 1627 /* 1628 * Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions 1629 * is in use. Updates ira_zoneid and ira_flags as a result. 1630 */ 1631 static void 1632 ip_fanout_tx_v6(mblk_t *mp, ip6_t *ip6h, uint8_t protocol, uint_t ip_hdr_length, 1633 ip_recv_attr_t *ira) 1634 { 1635 uint16_t *up; 1636 uint16_t lport; 1637 zoneid_t zoneid; 1638 1639 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED); 1640 1641 /* 1642 * If the packet is unlabeled we might allow read-down 1643 * for MAC_EXEMPT. Below we clear this if it is a multi-level 1644 * port (MLP). 1645 * Note that ira_tsl can be NULL here. 1646 */ 1647 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED) 1648 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE; 1649 1650 if (ira->ira_zoneid != ALL_ZONES) 1651 return; 1652 1653 ira->ira_flags |= IRAF_TX_SHARED_ADDR; 1654 1655 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length); 1656 switch (protocol) { 1657 case IPPROTO_TCP: 1658 case IPPROTO_SCTP: 1659 case IPPROTO_UDP: 1660 /* Caller ensures this */ 1661 ASSERT(((uchar_t *)ip6h) + ip_hdr_length +4 <= mp->b_wptr); 1662 1663 /* 1664 * Only these transports support MLP. 1665 * We know their destination port numbers is in 1666 * the same place in the header. 1667 */ 1668 lport = up[1]; 1669 1670 /* 1671 * No need to handle exclusive-stack zones 1672 * since ALL_ZONES only applies to the shared IP instance. 1673 */ 1674 zoneid = tsol_mlp_findzone(protocol, lport); 1675 /* 1676 * If no shared MLP is found, tsol_mlp_findzone returns 1677 * ALL_ZONES. In that case, we assume it's SLP, and 1678 * search for the zone based on the packet label. 1679 * 1680 * If there is such a zone, we prefer to find a 1681 * connection in it. Otherwise, we look for a 1682 * MAC-exempt connection in any zone whose label 1683 * dominates the default label on the packet. 1684 */ 1685 if (zoneid == ALL_ZONES) 1686 zoneid = tsol_attr_to_zoneid(ira); 1687 else 1688 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE; 1689 break; 1690 default: 1691 /* Handle shared address for other protocols */ 1692 zoneid = tsol_attr_to_zoneid(ira); 1693 break; 1694 } 1695 ira->ira_zoneid = zoneid; 1696 } 1697 1698 /* 1699 * Increment checksum failure statistics 1700 */ 1701 static void 1702 ip_input_cksum_err_v6(uint8_t protocol, uint16_t hck_flags, ill_t *ill) 1703 { 1704 ip_stack_t *ipst = ill->ill_ipst; 1705 1706 switch (protocol) { 1707 case IPPROTO_TCP: 1708 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 1709 1710 if (hck_flags & HCK_FULLCKSUM) 1711 IP6_STAT(ipst, ip6_tcp_in_full_hw_cksum_err); 1712 else if (hck_flags & HCK_PARTIALCKSUM) 1713 IP6_STAT(ipst, ip6_tcp_in_part_hw_cksum_err); 1714 else 1715 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 1716 break; 1717 case IPPROTO_UDP: 1718 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 1719 if (hck_flags & HCK_FULLCKSUM) 1720 IP6_STAT(ipst, ip6_udp_in_full_hw_cksum_err); 1721 else if (hck_flags & HCK_PARTIALCKSUM) 1722 IP6_STAT(ipst, ip6_udp_in_part_hw_cksum_err); 1723 else 1724 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 1725 break; 1726 case IPPROTO_ICMPV6: 1727 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 1728 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1729 break; 1730 default: 1731 ASSERT(0); 1732 break; 1733 } 1734 } 1735 1736 /* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */ 1737 uint32_t 1738 ip_input_cksum_pseudo_v6(ip6_t *ip6h, ip_recv_attr_t *ira) 1739 { 1740 uint_t ulp_len; 1741 uint32_t cksum; 1742 uint8_t protocol = ira->ira_protocol; 1743 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1744 1745 #define iphs ((uint16_t *)ip6h) 1746 1747 switch (protocol) { 1748 case IPPROTO_TCP: 1749 ulp_len = ira->ira_pktlen - ip_hdr_length; 1750 1751 /* Protocol and length */ 1752 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP; 1753 /* IP addresses */ 1754 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1755 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1756 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1757 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1758 break; 1759 1760 case IPPROTO_UDP: { 1761 udpha_t *udpha; 1762 1763 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length); 1764 1765 /* Protocol and length */ 1766 cksum = udpha->uha_length + IP_UDP_CSUM_COMP; 1767 /* IP addresses */ 1768 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1769 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1770 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1771 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1772 break; 1773 } 1774 case IPPROTO_ICMPV6: 1775 ulp_len = ira->ira_pktlen - ip_hdr_length; 1776 1777 /* Protocol and length */ 1778 cksum = htons(ulp_len) + IP_ICMPV6_CSUM_COMP; 1779 /* IP addresses */ 1780 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1781 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1782 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1783 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1784 break; 1785 default: 1786 cksum = 0; 1787 break; 1788 } 1789 #undef iphs 1790 return (cksum); 1791 } 1792 1793 1794 /* 1795 * Software verification of the ULP checksums. 1796 * Returns B_TRUE if ok. 1797 * Increments statistics of failed. 1798 */ 1799 static boolean_t 1800 ip_input_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1801 { 1802 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 1803 uint32_t cksum; 1804 uint8_t protocol = ira->ira_protocol; 1805 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1806 1807 IP6_STAT(ipst, ip6_in_sw_cksum); 1808 1809 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 1810 protocol == IPPROTO_ICMPV6); 1811 1812 cksum = ip_input_cksum_pseudo_v6(ip6h, ira); 1813 cksum = IP_CSUM(mp, ip_hdr_length, cksum); 1814 if (cksum == 0) 1815 return (B_TRUE); 1816 1817 ip_input_cksum_err_v6(protocol, 0, ira->ira_ill); 1818 return (B_FALSE); 1819 } 1820 1821 /* 1822 * Verify the ULP checksums. 1823 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum 1824 * algorithm. 1825 * Increments statistics if failed. 1826 */ 1827 static boolean_t 1828 ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h, 1829 ip_recv_attr_t *ira) 1830 { 1831 ill_t *ill = ira->ira_rill; 1832 uint16_t hck_flags; 1833 uint32_t cksum; 1834 mblk_t *mp1; 1835 uint_t len; 1836 uint8_t protocol = ira->ira_protocol; 1837 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1838 1839 1840 switch (protocol) { 1841 case IPPROTO_TCP: 1842 case IPPROTO_ICMPV6: 1843 break; 1844 1845 case IPPROTO_UDP: { 1846 udpha_t *udpha; 1847 1848 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length); 1849 /* 1850 * Before going through the regular checksum 1851 * calculation, make sure the received checksum 1852 * is non-zero. RFC 2460 says, a 0x0000 checksum 1853 * in a UDP packet (within IPv6 packet) is invalid 1854 * and should be replaced by 0xffff. This makes 1855 * sense as regular checksum calculation will 1856 * pass for both the cases i.e. 0x0000 and 0xffff. 1857 * Removing one of the case makes error detection 1858 * stronger. 1859 */ 1860 if (udpha->uha_checksum == 0) { 1861 /* 0x0000 checksum is invalid */ 1862 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 1863 return (B_FALSE); 1864 } 1865 break; 1866 } 1867 case IPPROTO_SCTP: { 1868 sctp_hdr_t *sctph; 1869 uint32_t pktsum; 1870 1871 sctph = (sctp_hdr_t *)((uchar_t *)ip6h + ip_hdr_length); 1872 #ifdef DEBUG 1873 if (skip_sctp_cksum) 1874 return (B_TRUE); 1875 #endif 1876 pktsum = sctph->sh_chksum; 1877 sctph->sh_chksum = 0; 1878 cksum = sctp_cksum(mp, ip_hdr_length); 1879 sctph->sh_chksum = pktsum; 1880 if (cksum == pktsum) 1881 return (B_TRUE); 1882 1883 /* 1884 * Defer until later whether a bad checksum is ok 1885 * in order to allow RAW sockets to use Adler checksum 1886 * with SCTP. 1887 */ 1888 ira->ira_flags |= IRAF_SCTP_CSUM_ERR; 1889 return (B_TRUE); 1890 } 1891 1892 default: 1893 /* No ULP checksum to verify. */ 1894 return (B_TRUE); 1895 } 1896 1897 /* 1898 * Revert to software checksum calculation if the interface 1899 * isn't capable of checksum offload. 1900 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout. 1901 * Note: IRAF_NO_HW_CKSUM is not currently used. 1902 */ 1903 ASSERT(!IS_IPMP(ill)); 1904 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 1905 !dohwcksum) { 1906 return (ip_input_sw_cksum_v6(mp, ip6h, ira)); 1907 } 1908 1909 hck_flags = DB_CKSUMFLAGS(mp); 1910 1911 /* 1912 * We apply this for all ULP protocols. Does the HW know to 1913 * not set the flags for SCTP and other protocols. 1914 */ 1915 if (hck_flags & HCK_FULLCKSUM_OK) { 1916 /* 1917 * Hardware has already verified the checksum. 1918 */ 1919 return (B_TRUE); 1920 } 1921 1922 if (hck_flags & HCK_FULLCKSUM) { 1923 /* 1924 * Full checksum has been computed by the hardware 1925 * and has been attached. If the driver wants us to 1926 * verify the correctness of the attached value, in 1927 * order to protect against faulty hardware, compare 1928 * it against -0 (0xFFFF) to see if it's valid. 1929 */ 1930 cksum = DB_CKSUM16(mp); 1931 if (cksum == 0xFFFF) 1932 return (B_TRUE); 1933 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill); 1934 return (B_FALSE); 1935 } 1936 1937 mp1 = mp->b_cont; 1938 if ((hck_flags & HCK_PARTIALCKSUM) && 1939 (mp1 == NULL || mp1->b_cont == NULL) && 1940 ip_hdr_length >= DB_CKSUMSTART(mp) && 1941 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) { 1942 uint32_t adj; 1943 uchar_t *cksum_start; 1944 1945 cksum = ip_input_cksum_pseudo_v6(ip6h, ira); 1946 1947 cksum_start = ((uchar_t *)ip6h + DB_CKSUMSTART(mp)); 1948 1949 /* 1950 * Partial checksum has been calculated by hardware 1951 * and attached to the packet; in addition, any 1952 * prepended extraneous data is even byte aligned, 1953 * and there are at most two mblks associated with 1954 * the packet. If any such data exists, we adjust 1955 * the checksum; also take care any postpended data. 1956 */ 1957 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj); 1958 /* 1959 * One's complement subtract extraneous checksum 1960 */ 1961 cksum += DB_CKSUM16(mp); 1962 if (adj >= cksum) 1963 cksum = ~(adj - cksum) & 0xFFFF; 1964 else 1965 cksum -= adj; 1966 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 1967 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 1968 if (!(~cksum & 0xFFFF)) 1969 return (B_TRUE); 1970 1971 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill); 1972 return (B_FALSE); 1973 } 1974 return (ip_input_sw_cksum_v6(mp, ip6h, ira)); 1975 } 1976 1977 1978 /* 1979 * Handle fanout of received packets. 1980 * Unicast packets that are looped back (from ire_send_local_v6) and packets 1981 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM. 1982 * 1983 * IPQoS Notes 1984 * Before sending it to the client, invoke IPPF processing. Policy processing 1985 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled. 1986 */ 1987 void 1988 ip_fanout_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1989 { 1990 ill_t *ill = ira->ira_ill; 1991 iaflags_t iraflags = ira->ira_flags; 1992 ip_stack_t *ipst = ill->ill_ipst; 1993 uint8_t protocol; 1994 conn_t *connp; 1995 #define rptr ((uchar_t *)ip6h) 1996 uint_t ip_hdr_length; 1997 uint_t min_ulp_header_length; 1998 int offset; 1999 ssize_t len; 2000 netstack_t *ns = ipst->ips_netstack; 2001 ipsec_stack_t *ipss = ns->netstack_ipsec; 2002 ill_t *rill = ira->ira_rill; 2003 2004 ASSERT(ira->ira_pktlen == ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 2005 2006 /* 2007 * We repeat this as we parse over destination options header and 2008 * fragment headers (earlier we've handled any hop-by-hop options 2009 * header.) 2010 * We update ira_protocol and ira_ip_hdr_length as we skip past 2011 * the intermediate headers; they already point past any 2012 * hop-by-hop header. 2013 */ 2014 repeat: 2015 protocol = ira->ira_protocol; 2016 ip_hdr_length = ira->ira_ip_hdr_length; 2017 2018 /* 2019 * Time for IPP once we've done reassembly and IPsec. 2020 * We skip this for loopback packets since we don't do IPQoS 2021 * on loopback. 2022 */ 2023 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 2024 !(iraflags & IRAF_LOOPBACK) && 2025 (protocol != IPPROTO_ESP && protocol != IPPROTO_AH && 2026 protocol != IPPROTO_DSTOPTS && protocol != IPPROTO_ROUTING && 2027 protocol != IPPROTO_FRAGMENT)) { 2028 /* 2029 * Use the interface on which the packet arrived - not where 2030 * the IP address is hosted. 2031 */ 2032 /* ip_process translates an IS_UNDER_IPMP */ 2033 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill); 2034 if (mp == NULL) { 2035 /* ip_drop_packet and MIB done */ 2036 return; 2037 } 2038 } 2039 2040 /* Determine the minimum required size of the upper-layer header */ 2041 /* Need to do this for at least the set of ULPs that TX handles. */ 2042 switch (protocol) { 2043 case IPPROTO_TCP: 2044 min_ulp_header_length = TCP_MIN_HEADER_LENGTH; 2045 break; 2046 case IPPROTO_SCTP: 2047 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH; 2048 break; 2049 case IPPROTO_UDP: 2050 min_ulp_header_length = UDPH_SIZE; 2051 break; 2052 case IPPROTO_ICMP: 2053 case IPPROTO_ICMPV6: 2054 min_ulp_header_length = ICMPH_SIZE; 2055 break; 2056 case IPPROTO_FRAGMENT: 2057 case IPPROTO_DSTOPTS: 2058 case IPPROTO_ROUTING: 2059 min_ulp_header_length = MIN_EHDR_LEN; 2060 break; 2061 default: 2062 min_ulp_header_length = 0; 2063 break; 2064 } 2065 /* Make sure we have the min ULP header length */ 2066 len = mp->b_wptr - rptr; 2067 if (len < ip_hdr_length + min_ulp_header_length) { 2068 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length) 2069 goto pkt_too_short; 2070 2071 IP6_STAT(ipst, ip6_recv_pullup); 2072 ip6h = ip_pullup(mp, ip_hdr_length + min_ulp_header_length, 2073 ira); 2074 if (ip6h == NULL) 2075 goto discard; 2076 len = mp->b_wptr - rptr; 2077 } 2078 2079 /* 2080 * If trusted extensions then determine the zoneid and TX specific 2081 * ira_flags. 2082 */ 2083 if (iraflags & IRAF_SYSTEM_LABELED) { 2084 /* This can update ira->ira_flags and ira->ira_zoneid */ 2085 ip_fanout_tx_v6(mp, ip6h, protocol, ip_hdr_length, ira); 2086 iraflags = ira->ira_flags; 2087 } 2088 2089 2090 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */ 2091 if (iraflags & IRAF_VERIFY_ULP_CKSUM) { 2092 if (!ip_input_cksum_v6(iraflags, mp, ip6h, ira)) { 2093 /* Bad checksum. Stats are already incremented */ 2094 ip_drop_input("Bad ULP checksum", mp, ill); 2095 freemsg(mp); 2096 return; 2097 } 2098 /* IRAF_SCTP_CSUM_ERR could have been set */ 2099 iraflags = ira->ira_flags; 2100 } 2101 switch (protocol) { 2102 case IPPROTO_TCP: 2103 /* For TCP, discard multicast packets. */ 2104 if (iraflags & IRAF_MULTIBROADCAST) 2105 goto discard; 2106 2107 /* First mblk contains IP+TCP headers per above check */ 2108 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH); 2109 2110 /* TCP options present? */ 2111 offset = ((uchar_t *)ip6h)[ip_hdr_length + 12] >> 4; 2112 if (offset != 5) { 2113 if (offset < 5) 2114 goto discard; 2115 2116 /* 2117 * There must be TCP options. 2118 * Make sure we can grab them. 2119 */ 2120 offset <<= 2; 2121 offset += ip_hdr_length; 2122 if (len < offset) { 2123 if (ira->ira_pktlen < offset) 2124 goto pkt_too_short; 2125 2126 IP6_STAT(ipst, ip6_recv_pullup); 2127 ip6h = ip_pullup(mp, offset, ira); 2128 if (ip6h == NULL) 2129 goto discard; 2130 len = mp->b_wptr - rptr; 2131 } 2132 } 2133 2134 /* 2135 * Pass up a squeue hint to tcp. 2136 * If ira_sqp is already set (this is loopback) we leave it 2137 * alone. 2138 */ 2139 if (ira->ira_sqp == NULL) { 2140 ira->ira_sqp = ip_squeue_get(ira->ira_ring); 2141 } 2142 2143 /* Look for AF_INET or AF_INET6 that matches */ 2144 connp = ipcl_classify_v6(mp, IPPROTO_TCP, ip_hdr_length, 2145 ira, ipst); 2146 if (connp == NULL) { 2147 /* Send the TH_RST */ 2148 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2149 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2150 return; 2151 } 2152 if (connp->conn_min_ttl != 0 && 2153 connp->conn_min_ttl > ira->ira_ttl) { 2154 CONN_DEC_REF(connp); 2155 goto discard; 2156 } 2157 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 2158 (iraflags & IRAF_IPSEC_SECURE)) { 2159 mp = ipsec_check_inbound_policy(mp, connp, 2160 NULL, ip6h, ira); 2161 if (mp == NULL) { 2162 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2163 /* Note that mp is NULL */ 2164 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2165 CONN_DEC_REF(connp); 2166 return; 2167 } 2168 } 2169 /* Found a client; up it goes */ 2170 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2171 ira->ira_ill = ira->ira_rill = NULL; 2172 if (!IPCL_IS_TCP(connp)) { 2173 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 2174 (connp->conn_recv)(connp, mp, NULL, ira); 2175 CONN_DEC_REF(connp); 2176 ira->ira_ill = ill; 2177 ira->ira_rill = rill; 2178 return; 2179 } 2180 2181 /* 2182 * We do different processing whether called from 2183 * ip_accept_tcp and we match the target, don't match 2184 * the target, and when we are called by ip_input. 2185 */ 2186 if (iraflags & IRAF_TARGET_SQP) { 2187 if (ira->ira_target_sqp == connp->conn_sqp) { 2188 mblk_t *attrmp; 2189 2190 attrmp = ip_recv_attr_to_mblk(ira); 2191 if (attrmp == NULL) { 2192 BUMP_MIB(ill->ill_ip_mib, 2193 ipIfStatsInDiscards); 2194 ip_drop_input("ipIfStatsInDiscards", 2195 mp, ill); 2196 freemsg(mp); 2197 CONN_DEC_REF(connp); 2198 } else { 2199 SET_SQUEUE(attrmp, connp->conn_recv, 2200 connp); 2201 attrmp->b_cont = mp; 2202 ASSERT(ira->ira_target_sqp_mp == NULL); 2203 ira->ira_target_sqp_mp = attrmp; 2204 /* 2205 * Conn ref release when drained from 2206 * the squeue. 2207 */ 2208 } 2209 } else { 2210 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 2211 connp->conn_recv, connp, ira, SQ_FILL, 2212 SQTAG_IP6_TCP_INPUT); 2213 } 2214 } else { 2215 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, 2216 connp, ira, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 2217 } 2218 ira->ira_ill = ill; 2219 ira->ira_rill = rill; 2220 return; 2221 2222 case IPPROTO_SCTP: { 2223 sctp_hdr_t *sctph; 2224 uint32_t ports; /* Source and destination ports */ 2225 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp; 2226 2227 /* For SCTP, discard multicast packets. */ 2228 if (iraflags & IRAF_MULTIBROADCAST) 2229 goto discard; 2230 2231 /* 2232 * Since there is no SCTP h/w cksum support yet, just 2233 * clear the flag. 2234 */ 2235 DB_CKSUMFLAGS(mp) = 0; 2236 2237 /* Length ensured above */ 2238 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH); 2239 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length); 2240 2241 /* get the ports */ 2242 ports = *(uint32_t *)&sctph->sh_sport; 2243 2244 if (iraflags & IRAF_SCTP_CSUM_ERR) { 2245 /* 2246 * No potential sctp checksum errors go to the Sun 2247 * sctp stack however they might be Adler-32 summed 2248 * packets a userland stack bound to a raw IP socket 2249 * could reasonably use. Note though that Adler-32 is 2250 * a long deprecated algorithm and customer sctp 2251 * networks should eventually migrate to CRC-32 at 2252 * which time this facility should be removed. 2253 */ 2254 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2255 return; 2256 } 2257 connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, ports, 2258 ira, mp, sctps, sctph); 2259 if (connp == NULL) { 2260 /* Check for raw socket or OOTB handling */ 2261 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2262 return; 2263 } 2264 if (connp->conn_incoming_ifindex != 0 && 2265 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2266 CONN_DEC_REF(connp); 2267 2268 /* Check for raw socket or OOTB handling */ 2269 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2270 return; 2271 } 2272 2273 /* Found a client; up it goes */ 2274 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2275 sctp_input(connp, NULL, ip6h, mp, ira); 2276 /* sctp_input does a rele of the sctp_t */ 2277 return; 2278 } 2279 2280 case IPPROTO_UDP: 2281 /* First mblk contains IP+UDP headers as checked above */ 2282 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE); 2283 2284 if (iraflags & IRAF_MULTIBROADCAST) { 2285 uint16_t *up; /* Pointer to ports in ULP header */ 2286 2287 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length); 2288 2289 ip_fanout_udp_multi_v6(mp, ip6h, up[1], up[0], ira); 2290 return; 2291 } 2292 2293 /* Look for AF_INET or AF_INET6 that matches */ 2294 connp = ipcl_classify_v6(mp, IPPROTO_UDP, ip_hdr_length, 2295 ira, ipst); 2296 if (connp == NULL) { 2297 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP]. 2298 connf_head != NULL) { 2299 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2300 ip_fanout_proto_v6(mp, ip6h, ira); 2301 } else { 2302 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH, 2303 ICMP6_DST_UNREACH_NOPORT, ira); 2304 } 2305 return; 2306 2307 } 2308 if (connp->conn_min_ttl != 0 && 2309 connp->conn_min_ttl > ira->ira_ttl) { 2310 CONN_DEC_REF(connp); 2311 goto discard; 2312 } 2313 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld : 2314 !canputnext(connp->conn_rq)) { 2315 CONN_DEC_REF(connp); 2316 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 2317 ip_drop_input("udpIfStatsInOverflows", mp, ill); 2318 freemsg(mp); 2319 return; 2320 } 2321 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 2322 (iraflags & IRAF_IPSEC_SECURE)) { 2323 mp = ipsec_check_inbound_policy(mp, connp, 2324 NULL, ip6h, ira); 2325 if (mp == NULL) { 2326 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2327 /* Note that mp is NULL */ 2328 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2329 CONN_DEC_REF(connp); 2330 return; 2331 } 2332 } 2333 2334 /* Found a client; up it goes */ 2335 IP6_STAT(ipst, ip6_udp_fannorm); 2336 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2337 ira->ira_ill = ira->ira_rill = NULL; 2338 (connp->conn_recv)(connp, mp, NULL, ira); 2339 CONN_DEC_REF(connp); 2340 ira->ira_ill = ill; 2341 ira->ira_rill = rill; 2342 return; 2343 default: 2344 break; 2345 } 2346 2347 /* 2348 * Clear hardware checksumming flag as it is currently only 2349 * used by TCP and UDP. 2350 */ 2351 DB_CKSUMFLAGS(mp) = 0; 2352 2353 switch (protocol) { 2354 case IPPROTO_ICMPV6: 2355 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 2356 2357 /* Check variable for testing applications */ 2358 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 2359 ip_drop_input("ipv6_drop_inbound_icmpv6", mp, ill); 2360 freemsg(mp); 2361 return; 2362 } 2363 /* 2364 * We need to accomodate icmp messages coming in clear 2365 * until we get everything secure from the wire. If 2366 * icmp_accept_clear_messages is zero we check with 2367 * the global policy and act accordingly. If it is 2368 * non-zero, we accept the message without any checks. 2369 * But *this does not mean* that this will be delivered 2370 * to RAW socket clients. By accepting we might send 2371 * replies back, change our MTU value etc., 2372 * but delivery to the ULP/clients depends on their 2373 * policy dispositions. 2374 */ 2375 if (ipst->ips_icmp_accept_clear_messages == 0) { 2376 mp = ipsec_check_global_policy(mp, NULL, 2377 NULL, ip6h, ira, ns); 2378 if (mp == NULL) 2379 return; 2380 } 2381 2382 /* 2383 * On a labeled system, we have to check whether the zone 2384 * itself is permitted to receive raw traffic. 2385 */ 2386 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2387 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2388 BUMP_MIB(ill->ill_icmp6_mib, 2389 ipv6IfIcmpInErrors); 2390 ip_drop_input("tsol_can_accept_raw", mp, ill); 2391 freemsg(mp); 2392 return; 2393 } 2394 } 2395 2396 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2397 mp = icmp_inbound_v6(mp, ira); 2398 if (mp == NULL) { 2399 /* No need to pass to RAW sockets */ 2400 return; 2401 } 2402 break; 2403 2404 case IPPROTO_DSTOPTS: { 2405 ip6_dest_t *desthdr; 2406 uint_t ehdrlen; 2407 uint8_t *optptr; 2408 2409 /* We already check for MIN_EHDR_LEN above */ 2410 2411 /* Check if AH is present and needs to be processed. */ 2412 mp = ipsec_early_ah_v6(mp, ira); 2413 if (mp == NULL) 2414 return; 2415 2416 /* 2417 * Reinitialize pointers, as ipsec_early_ah_v6() does 2418 * complete pullups. We don't have to do more pullups 2419 * as a result. 2420 */ 2421 ip6h = (ip6_t *)mp->b_rptr; 2422 2423 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN) 2424 goto pkt_too_short; 2425 2426 if (mp->b_cont != NULL && 2427 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) { 2428 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira); 2429 if (ip6h == NULL) 2430 goto discard; 2431 } 2432 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length); 2433 ehdrlen = 8 * (desthdr->ip6d_len + 1); 2434 if (ira->ira_pktlen - ip_hdr_length < ehdrlen) 2435 goto pkt_too_short; 2436 if (mp->b_cont != NULL && 2437 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 2438 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 2439 if (ip6h == NULL) 2440 goto discard; 2441 2442 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length); 2443 } 2444 optptr = (uint8_t *)&desthdr[1]; 2445 2446 /* 2447 * Update ira_ip_hdr_length to skip the destination header 2448 * when we repeat. 2449 */ 2450 ira->ira_ip_hdr_length += ehdrlen; 2451 2452 ira->ira_protocol = desthdr->ip6d_nxt; 2453 2454 /* 2455 * Note: XXX This code does not seem to make 2456 * distinction between Destination Options Header 2457 * being before/after Routing Header which can 2458 * happen if we are at the end of source route. 2459 * This may become significant in future. 2460 * (No real significant Destination Options are 2461 * defined/implemented yet ). 2462 */ 2463 switch (ip_process_options_v6(mp, ip6h, optptr, 2464 ehdrlen - 2, IPPROTO_DSTOPTS, ira)) { 2465 case -1: 2466 /* 2467 * Packet has been consumed and any needed 2468 * ICMP errors sent. 2469 */ 2470 return; 2471 case 0: 2472 /* No action needed continue */ 2473 break; 2474 case 1: 2475 /* 2476 * Unnexpected return value 2477 * (Router alert is a Hop-by-Hop option) 2478 */ 2479 #ifdef DEBUG 2480 panic("ip_fanout_v6: router " 2481 "alert hbh opt indication in dest opt"); 2482 /*NOTREACHED*/ 2483 #else 2484 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2485 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2486 freemsg(mp); 2487 return; 2488 #endif 2489 } 2490 goto repeat; 2491 } 2492 case IPPROTO_FRAGMENT: { 2493 ip6_frag_t *fraghdr; 2494 2495 if (ira->ira_pktlen - ip_hdr_length < sizeof (ip6_frag_t)) 2496 goto pkt_too_short; 2497 2498 if (mp->b_cont != NULL && 2499 rptr + ip_hdr_length + sizeof (ip6_frag_t) > mp->b_wptr) { 2500 ip6h = ip_pullup(mp, 2501 ip_hdr_length + sizeof (ip6_frag_t), ira); 2502 if (ip6h == NULL) 2503 goto discard; 2504 } 2505 2506 fraghdr = (ip6_frag_t *)(rptr + ip_hdr_length); 2507 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 2508 2509 /* 2510 * Invoke the CGTP (multirouting) filtering module to 2511 * process the incoming packet. Packets identified as 2512 * duplicates must be discarded. Filtering is active 2513 * only if the ip_cgtp_filter ndd variable is 2514 * non-zero. 2515 */ 2516 if (ipst->ips_ip_cgtp_filter && 2517 ipst->ips_ip_cgtp_filter_ops != NULL) { 2518 int cgtp_flt_pkt; 2519 netstackid_t stackid; 2520 2521 stackid = ipst->ips_netstack->netstack_stackid; 2522 2523 /* 2524 * CGTP and IPMP are mutually exclusive so 2525 * phyint_ifindex is fine here. 2526 */ 2527 cgtp_flt_pkt = 2528 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 2529 stackid, ill->ill_phyint->phyint_ifindex, 2530 ip6h, fraghdr); 2531 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 2532 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill); 2533 freemsg(mp); 2534 return; 2535 } 2536 } 2537 2538 /* 2539 * Update ip_hdr_length to skip the frag header 2540 * ip_input_fragment_v6 will determine the extension header 2541 * prior to the fragment header and update its nexthdr value, 2542 * and also set ira_protocol to the nexthdr that follows the 2543 * completed fragment. 2544 */ 2545 ip_hdr_length += sizeof (ip6_frag_t); 2546 2547 /* 2548 * Make sure we have ira_l2src before we loose the original 2549 * mblk 2550 */ 2551 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 2552 ip_setl2src(mp, ira, ira->ira_rill); 2553 2554 mp = ip_input_fragment_v6(mp, ip6h, fraghdr, 2555 ira->ira_pktlen - ip_hdr_length, ira); 2556 if (mp == NULL) { 2557 /* Reassembly is still pending */ 2558 return; 2559 } 2560 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 2561 2562 /* 2563 * The mblk chain has the frag header removed and 2564 * ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the 2565 * IP header has been updated to refleact the result. 2566 */ 2567 ip6h = (ip6_t *)mp->b_rptr; 2568 ip_hdr_length = ira->ira_ip_hdr_length; 2569 goto repeat; 2570 } 2571 case IPPROTO_HOPOPTS: 2572 /* 2573 * Illegal header sequence. 2574 * (Hop-by-hop headers are processed above 2575 * and required to immediately follow IPv6 header) 2576 */ 2577 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 2578 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira); 2579 return; 2580 2581 case IPPROTO_ROUTING: { 2582 uint_t ehdrlen; 2583 ip6_rthdr_t *rthdr; 2584 2585 /* Check if AH is present and needs to be processed. */ 2586 mp = ipsec_early_ah_v6(mp, ira); 2587 if (mp == NULL) 2588 return; 2589 2590 /* 2591 * Reinitialize pointers, as ipsec_early_ah_v6() does 2592 * complete pullups. We don't have to do more pullups 2593 * as a result. 2594 */ 2595 ip6h = (ip6_t *)mp->b_rptr; 2596 2597 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN) 2598 goto pkt_too_short; 2599 2600 if (mp->b_cont != NULL && 2601 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) { 2602 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira); 2603 if (ip6h == NULL) 2604 goto discard; 2605 } 2606 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length); 2607 protocol = ira->ira_protocol = rthdr->ip6r_nxt; 2608 ehdrlen = 8 * (rthdr->ip6r_len + 1); 2609 if (ira->ira_pktlen - ip_hdr_length < ehdrlen) 2610 goto pkt_too_short; 2611 if (mp->b_cont != NULL && 2612 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 2613 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 2614 if (ip6h == NULL) 2615 goto discard; 2616 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length); 2617 } 2618 if (rthdr->ip6r_segleft != 0) { 2619 /* Not end of source route */ 2620 if (ira->ira_flags & 2621 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 2622 BUMP_MIB(ill->ill_ip_mib, 2623 ipIfStatsForwProhibits); 2624 ip_drop_input("ipIfStatsInForwProhibits", 2625 mp, ill); 2626 freemsg(mp); 2627 return; 2628 } 2629 ip_process_rthdr(mp, ip6h, rthdr, ira); 2630 return; 2631 } 2632 ira->ira_ip_hdr_length += ehdrlen; 2633 goto repeat; 2634 } 2635 2636 case IPPROTO_AH: 2637 case IPPROTO_ESP: { 2638 /* 2639 * Fast path for AH/ESP. 2640 */ 2641 netstack_t *ns = ipst->ips_netstack; 2642 ipsec_stack_t *ipss = ns->netstack_ipsec; 2643 2644 IP_STAT(ipst, ipsec_proto_ahesp); 2645 2646 if (!ipsec_loaded(ipss)) { 2647 ip_proto_not_sup(mp, ira); 2648 return; 2649 } 2650 2651 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2652 /* select inbound SA and have IPsec process the pkt */ 2653 if (protocol == IPPROTO_ESP) { 2654 esph_t *esph; 2655 2656 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 2657 if (mp == NULL) 2658 return; 2659 2660 ASSERT(esph != NULL); 2661 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2662 ASSERT(ira->ira_ipsec_esp_sa != NULL); 2663 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL); 2664 2665 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, 2666 ira); 2667 } else { 2668 ah_t *ah; 2669 2670 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 2671 if (mp == NULL) 2672 return; 2673 2674 ASSERT(ah != NULL); 2675 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2676 ASSERT(ira->ira_ipsec_ah_sa != NULL); 2677 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 2678 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, 2679 ira); 2680 } 2681 2682 if (mp == NULL) { 2683 /* 2684 * Either it failed or is pending. In the former case 2685 * ipIfStatsInDiscards was increased. 2686 */ 2687 return; 2688 } 2689 /* we're done with IPsec processing, send it up */ 2690 ip_input_post_ipsec(mp, ira); 2691 return; 2692 } 2693 case IPPROTO_NONE: 2694 /* All processing is done. Count as "delivered". */ 2695 freemsg(mp); 2696 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2697 return; 2698 2699 case IPPROTO_ENCAP: 2700 case IPPROTO_IPV6: 2701 /* iptun will verify trusted label */ 2702 connp = ipcl_classify_v6(mp, protocol, ip_hdr_length, 2703 ira, ipst); 2704 if (connp != NULL) { 2705 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2706 ira->ira_ill = ira->ira_rill = NULL; 2707 connp->conn_recv(connp, mp, NULL, ira); 2708 CONN_DEC_REF(connp); 2709 ira->ira_ill = ill; 2710 ira->ira_rill = rill; 2711 return; 2712 } 2713 /* FALLTHRU */ 2714 default: 2715 /* 2716 * On a labeled system, we have to check whether the zone 2717 * itself is permitted to receive raw traffic. 2718 */ 2719 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2720 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2721 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2722 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2723 freemsg(mp); 2724 return; 2725 } 2726 } 2727 break; 2728 } 2729 2730 /* 2731 * The above input functions may have returned the pulled up message. 2732 * So ip6h need to be reinitialized. 2733 */ 2734 ip6h = (ip6_t *)mp->b_rptr; 2735 ira->ira_protocol = protocol; 2736 if (ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head == NULL) { 2737 /* No user-level listener for these packets packets */ 2738 ip_proto_not_sup(mp, ira); 2739 return; 2740 } 2741 2742 /* 2743 * Handle fanout to raw sockets. There 2744 * can be more than one stream bound to a particular 2745 * protocol. When this is the case, each one gets a copy 2746 * of any incoming packets. 2747 */ 2748 ASSERT(ira->ira_protocol == protocol); 2749 ip_fanout_proto_v6(mp, ip6h, ira); 2750 return; 2751 2752 pkt_too_short: 2753 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 2754 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 2755 freemsg(mp); 2756 return; 2757 2758 discard: 2759 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2760 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2761 freemsg(mp); 2762 #undef rptr 2763 } 2764