1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/stropts.h> 31 #include <sys/sysmacros.h> 32 #include <sys/strsubr.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #include <sys/zone.h> 36 #define _SUN_TPI_VERSION 2 37 #include <sys/tihdr.h> 38 #include <sys/xti_inet.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kobj.h> 44 #include <sys/modctl.h> 45 #include <sys/atomic.h> 46 #include <sys/policy.h> 47 #include <sys/priv.h> 48 49 #include <sys/systm.h> 50 #include <sys/param.h> 51 #include <sys/kmem.h> 52 #include <sys/sdt.h> 53 #include <sys/socket.h> 54 #include <sys/vtrace.h> 55 #include <sys/isa_defs.h> 56 #include <sys/mac.h> 57 #include <net/if.h> 58 #include <net/if_arp.h> 59 #include <net/route.h> 60 #include <sys/sockio.h> 61 #include <netinet/in.h> 62 #include <net/if_dl.h> 63 64 #include <inet/common.h> 65 #include <inet/mi.h> 66 #include <inet/mib2.h> 67 #include <inet/nd.h> 68 #include <inet/arp.h> 69 #include <inet/snmpcom.h> 70 #include <inet/kstatcom.h> 71 72 #include <netinet/igmp_var.h> 73 #include <netinet/ip6.h> 74 #include <netinet/icmp6.h> 75 #include <netinet/sctp.h> 76 77 #include <inet/ip.h> 78 #include <inet/ip_impl.h> 79 #include <inet/ip6.h> 80 #include <inet/ip6_asp.h> 81 #include <inet/optcom.h> 82 #include <inet/tcp.h> 83 #include <inet/tcp_impl.h> 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_ftable.h> 88 #include <inet/ip_rts.h> 89 #include <inet/ip_ndp.h> 90 #include <inet/ip_listutils.h> 91 #include <netinet/igmp.h> 92 #include <netinet/ip_mroute.h> 93 #include <inet/ipp_common.h> 94 95 #include <net/pfkeyv2.h> 96 #include <inet/sadb.h> 97 #include <inet/ipsec_impl.h> 98 #include <inet/ipdrop.h> 99 #include <inet/ip_netinfo.h> 100 #include <inet/ilb_ip.h> 101 #include <sys/squeue_impl.h> 102 #include <sys/squeue.h> 103 104 #include <sys/ethernet.h> 105 #include <net/if_types.h> 106 #include <sys/cpuvar.h> 107 108 #include <ipp/ipp.h> 109 #include <ipp/ipp_impl.h> 110 #include <ipp/ipgpc/ipgpc.h> 111 112 #include <sys/pattr.h> 113 #include <inet/ipclassifier.h> 114 #include <inet/sctp_ip.h> 115 #include <inet/sctp/sctp_impl.h> 116 #include <inet/udp_impl.h> 117 #include <sys/sunddi.h> 118 119 #include <sys/tsol/label.h> 120 #include <sys/tsol/tnet.h> 121 122 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */ 123 124 #ifdef DEBUG 125 extern boolean_t skip_sctp_cksum; 126 #endif 127 128 static void ip_input_local_v6(ire_t *, mblk_t *, ip6_t *, ip_recv_attr_t *); 129 130 static void ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *, 131 ip_recv_attr_t *); 132 133 #pragma inline(ip_input_common_v6, ip_input_local_v6, ip_forward_xmit_v6) 134 135 /* 136 * Direct read side procedure capable of dealing with chains. GLDv3 based 137 * drivers call this function directly with mblk chains while STREAMS 138 * read side procedure ip_rput() calls this for single packet with ip_ring 139 * set to NULL to process one packet at a time. 140 * 141 * The ill will always be valid if this function is called directly from 142 * the driver. 143 * 144 * If ip_input_v6() is called from GLDv3: 145 * 146 * - This must be a non-VLAN IP stream. 147 * - 'mp' is either an untagged or a special priority-tagged packet. 148 * - Any VLAN tag that was in the MAC header has been stripped. 149 * 150 * If the IP header in packet is not 32-bit aligned, every message in the 151 * chain will be aligned before further operations. This is required on SPARC 152 * platform. 153 */ 154 void 155 ip_input_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 156 struct mac_header_info_s *mhip) 157 { 158 (void) ip_input_common_v6(ill, ip_ring, mp_chain, mhip, NULL, NULL, 159 NULL); 160 } 161 162 /* 163 * ip_accept_tcp_v6() - This function is called by the squeue when it retrieves 164 * a chain of packets in the poll mode. The packets have gone through the 165 * data link processing but not IP processing. For performance and latency 166 * reasons, the squeue wants to process the chain in line instead of feeding 167 * it back via ip_input path. 168 * 169 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6 170 * will pass back any TCP packets matching the target sqp to 171 * ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by 172 * ip_input_v6 and ip_fanout_v6 as normal. 173 * The TCP packets that match the target squeue are returned to the caller 174 * as a b_next chain after each packet has been prepend with an mblk 175 * from ip_recv_attr_to_mblk. 176 */ 177 mblk_t * 178 ip_accept_tcp_v6(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, 179 mblk_t *mp_chain, mblk_t **last, uint_t *cnt) 180 { 181 return (ip_input_common_v6(ill, ip_ring, mp_chain, NULL, target_sqp, 182 last, cnt)); 183 } 184 185 /* 186 * Used by ip_input_v6 and ip_accept_tcp_v6 187 * The last three arguments are only used by ip_accept_tcp_v6, and mhip is 188 * only used by ip_input_v6. 189 */ 190 mblk_t * 191 ip_input_common_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 192 struct mac_header_info_s *mhip, squeue_t *target_sqp, 193 mblk_t **last, uint_t *cnt) 194 { 195 mblk_t *mp; 196 ip6_t *ip6h; 197 ip_recv_attr_t iras; /* Receive attributes */ 198 rtc_t rtc; 199 iaflags_t chain_flags = 0; /* Fixed for chain */ 200 mblk_t *ahead = NULL; /* Accepted head */ 201 mblk_t *atail = NULL; /* Accepted tail */ 202 uint_t acnt = 0; /* Accepted count */ 203 204 ASSERT(mp_chain != NULL); 205 ASSERT(ill != NULL); 206 207 /* These ones do not change as we loop over packets */ 208 iras.ira_ill = iras.ira_rill = ill; 209 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 210 iras.ira_rifindex = iras.ira_ruifindex; 211 iras.ira_sqp = NULL; 212 iras.ira_ring = ip_ring; 213 /* For ECMP and outbound transmit ring selection */ 214 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring); 215 216 iras.ira_target_sqp = target_sqp; 217 iras.ira_target_sqp_mp = NULL; 218 if (target_sqp != NULL) 219 chain_flags |= IRAF_TARGET_SQP; 220 221 /* 222 * We try to have a mhip pointer when possible, but 223 * it might be NULL in some cases. In those cases we 224 * have to assume unicast. 225 */ 226 iras.ira_mhip = mhip; 227 iras.ira_flags = 0; 228 if (mhip != NULL) { 229 switch (mhip->mhi_dsttype) { 230 case MAC_ADDRTYPE_MULTICAST : 231 chain_flags |= IRAF_L2DST_MULTICAST; 232 break; 233 case MAC_ADDRTYPE_BROADCAST : 234 chain_flags |= IRAF_L2DST_BROADCAST; 235 break; 236 } 237 } 238 239 /* 240 * Initialize the one-element route cache. 241 * 242 * We do ire caching from one iteration to 243 * another. In the event the packet chain contains 244 * all packets from the same dst, this caching saves 245 * an ire_route_recursive for each of the succeeding 246 * packets in a packet chain. 247 */ 248 rtc.rtc_ire = NULL; 249 rtc.rtc_ip6addr = ipv6_all_zeros; 250 251 /* Loop over b_next */ 252 for (mp = mp_chain; mp != NULL; mp = mp_chain) { 253 mp_chain = mp->b_next; 254 mp->b_next = NULL; 255 256 /* 257 * if db_ref > 1 then copymsg and free original. Packet 258 * may be changed and we do not want the other entity 259 * who has a reference to this message to trip over the 260 * changes. This is a blind change because trying to 261 * catch all places that might change the packet is too 262 * difficult. 263 * 264 * This corresponds to the fast path case, where we have 265 * a chain of M_DATA mblks. We check the db_ref count 266 * of only the 1st data block in the mblk chain. There 267 * doesn't seem to be a reason why a device driver would 268 * send up data with varying db_ref counts in the mblk 269 * chain. In any case the Fast path is a private 270 * interface, and our drivers don't do such a thing. 271 * Given the above assumption, there is no need to walk 272 * down the entire mblk chain (which could have a 273 * potential performance problem) 274 * 275 * The "(DB_REF(mp) > 1)" check was moved from ip_rput() 276 * to here because of exclusive ip stacks and vnics. 277 * Packets transmitted from exclusive stack over vnic 278 * can have db_ref > 1 and when it gets looped back to 279 * another vnic in a different zone, you have ip_input() 280 * getting dblks with db_ref > 1. So if someone 281 * complains of TCP performance under this scenario, 282 * take a serious look here on the impact of copymsg(). 283 */ 284 if (DB_REF(mp) > 1) { 285 if ((mp = ip_fix_dbref(mp, &iras)) == NULL) 286 continue; 287 } 288 289 /* 290 * IP header ptr not aligned? 291 * OR IP header not complete in first mblk 292 */ 293 ip6h = (ip6_t *)mp->b_rptr; 294 if (!OK_32PTR(ip6h) || MBLKL(mp) < IPV6_HDR_LEN) { 295 mp = ip_check_and_align_header(mp, IPV6_HDR_LEN, &iras); 296 if (mp == NULL) 297 continue; 298 ip6h = (ip6_t *)mp->b_rptr; 299 } 300 301 /* Protect against a mix of Ethertypes and IP versions */ 302 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) { 303 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 304 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 305 freemsg(mp); 306 /* mhip might point into 1st packet in the chain. */ 307 iras.ira_mhip = NULL; 308 continue; 309 } 310 311 /* 312 * Check for Martian addrs; we have to explicitly 313 * test for for zero dst since this is also used as 314 * an indication that the rtc is not used. 315 */ 316 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_dst)) { 317 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 318 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 319 freemsg(mp); 320 /* mhip might point into 1st packet in the chain. */ 321 iras.ira_mhip = NULL; 322 continue; 323 } 324 /* 325 * Keep L2SRC from a previous packet in chain since mhip 326 * might point into an earlier packet in the chain. 327 */ 328 chain_flags |= (iras.ira_flags & IRAF_L2SRC_SET); 329 330 iras.ira_flags = IRAF_VERIFY_ULP_CKSUM | chain_flags; 331 iras.ira_free_flags = 0; 332 iras.ira_cred = NULL; 333 iras.ira_cpid = NOPID; 334 iras.ira_tsl = NULL; 335 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */ 336 337 /* 338 * We must count all incoming packets, even if they end 339 * up being dropped later on. Defer counting bytes until 340 * we have the whole IP header in first mblk. 341 */ 342 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 343 344 iras.ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 345 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 346 iras.ira_pktlen); 347 348 /* 349 * Call one of: 350 * ill_input_full_v6 351 * ill_input_short_v6 352 * The former is used in the case of TX. See ill_set_inputfn(). 353 */ 354 (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc); 355 356 /* Any references to clean up? No hold on ira_ill */ 357 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 358 ira_cleanup(&iras, B_FALSE); 359 360 if (iras.ira_target_sqp_mp != NULL) { 361 /* Better be called from ip_accept_tcp */ 362 ASSERT(target_sqp != NULL); 363 364 /* Found one packet to accept */ 365 mp = iras.ira_target_sqp_mp; 366 iras.ira_target_sqp_mp = NULL; 367 ASSERT(ip_recv_attr_is_mblk(mp)); 368 369 if (atail != NULL) 370 atail->b_next = mp; 371 else 372 ahead = mp; 373 atail = mp; 374 acnt++; 375 mp = NULL; 376 } 377 /* mhip might point into 1st packet in the chain. */ 378 iras.ira_mhip = NULL; 379 } 380 /* Any remaining references to the route cache? */ 381 if (rtc.rtc_ire != NULL) { 382 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr)); 383 ire_refrele(rtc.rtc_ire); 384 } 385 386 if (ahead != NULL) { 387 /* Better be called from ip_accept_tcp */ 388 ASSERT(target_sqp != NULL); 389 *last = atail; 390 *cnt = acnt; 391 return (ahead); 392 } 393 394 return (NULL); 395 } 396 397 /* 398 * This input function is used when 399 * - is_system_labeled() 400 * 401 * Note that for IPv6 CGTP filtering is handled only when receiving fragment 402 * headers, and RSVP uses router alert options, thus we don't need anything 403 * extra for them. 404 */ 405 void 406 ill_input_full_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg, 407 ip_recv_attr_t *ira, rtc_t *rtc) 408 { 409 ip6_t *ip6h = (ip6_t *)iph_arg; 410 in6_addr_t *nexthop = (in6_addr_t *)nexthop_arg; 411 ill_t *ill = ira->ira_ill; 412 413 ASSERT(ira->ira_tsl == NULL); 414 415 /* 416 * Attach any necessary label information to 417 * this packet 418 */ 419 if (is_system_labeled()) { 420 ira->ira_flags |= IRAF_SYSTEM_LABELED; 421 422 /* 423 * This updates ira_cred, ira_tsl and ira_free_flags based 424 * on the label. 425 */ 426 if (!tsol_get_pkt_label(mp, IPV6_VERSION, ira)) { 427 if (ip6opt_ls != 0) 428 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 429 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 430 ip_drop_input("ipIfStatsInDiscards", mp, ill); 431 freemsg(mp); 432 return; 433 } 434 /* Note that ira_tsl can be NULL here. */ 435 436 /* tsol_get_pkt_label sometimes does pullupmsg */ 437 ip6h = (ip6_t *)mp->b_rptr; 438 } 439 ill_input_short_v6(mp, ip6h, nexthop, ira, rtc); 440 } 441 442 /* 443 * Check for IPv6 addresses that should not appear on the wire 444 * as either source or destination. 445 * If we ever implement Stateless IPv6 Translators (SIIT) we'd have 446 * to revisit the IPv4-mapped part. 447 */ 448 static boolean_t 449 ip6_bad_address(in6_addr_t *addr, boolean_t is_src) 450 { 451 if (IN6_IS_ADDR_V4MAPPED(addr)) { 452 ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr")); 453 return (B_TRUE); 454 } 455 if (IN6_IS_ADDR_LOOPBACK(addr)) { 456 ip1dbg(("ip_input_v6: pkt with loopback addr")); 457 return (B_TRUE); 458 } 459 if (!is_src && IN6_IS_ADDR_UNSPECIFIED(addr)) { 460 /* 461 * having :: in the src is ok: it's used for DAD. 462 */ 463 ip1dbg(("ip_input_v6: pkt with unspecified addr")); 464 return (B_TRUE); 465 } 466 return (B_FALSE); 467 } 468 469 /* 470 * Routing lookup for IPv6 link-locals. 471 * First we look on the inbound interface, then we check for IPMP and 472 * look on the upper interface. 473 * We update ira_ruifindex if we find the IRE on the upper interface. 474 */ 475 static ire_t * 476 ire_linklocal(const in6_addr_t *nexthop, ill_t *ill, ip_recv_attr_t *ira, 477 uint_t irr_flags, ip_stack_t *ipst) 478 { 479 int match_flags = MATCH_IRE_SECATTR | MATCH_IRE_ILL; 480 ire_t *ire; 481 482 ASSERT(IN6_IS_ADDR_LINKLOCAL(nexthop)); 483 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl, 484 match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 485 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 486 !IS_UNDER_IPMP(ill)) 487 return (ire); 488 489 /* 490 * When we are using IMP we need to look for an IRE on both the 491 * under and upper interfaces since there are different 492 * link-local addresses for the under and upper. 493 */ 494 ill = ipmp_ill_hold_ipmp_ill(ill); 495 if (ill == NULL) 496 return (ire); 497 498 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 499 500 ire_refrele(ire); 501 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl, 502 match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 503 ill_refrele(ill); 504 return (ire); 505 } 506 507 /* 508 * This is the tail-end of the full receive side packet handling. 509 * It can be used directly when the configuration is simple. 510 */ 511 void 512 ill_input_short_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg, 513 ip_recv_attr_t *ira, rtc_t *rtc) 514 { 515 ire_t *ire; 516 ill_t *ill = ira->ira_ill; 517 ip_stack_t *ipst = ill->ill_ipst; 518 uint_t pkt_len; 519 ssize_t len; 520 ip6_t *ip6h = (ip6_t *)iph_arg; 521 in6_addr_t nexthop = *(in6_addr_t *)nexthop_arg; 522 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; 523 uint_t irr_flags; 524 #define rptr ((uchar_t *)ip6h) 525 526 ASSERT(DB_TYPE(mp) == M_DATA); 527 528 /* 529 * Check for source/dest being a bad address: loopback, any, or 530 * v4mapped. All of them start with a 64 bits of zero. 531 */ 532 if (ip6h->ip6_src.s6_addr32[0] == 0 && 533 ip6h->ip6_src.s6_addr32[1] == 0) { 534 if (ip6_bad_address(&ip6h->ip6_src, B_TRUE)) { 535 ip1dbg(("ip_input_v6: pkt with bad src addr\n")); 536 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 537 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 538 freemsg(mp); 539 return; 540 } 541 } 542 if (ip6h->ip6_dst.s6_addr32[0] == 0 && 543 ip6h->ip6_dst.s6_addr32[1] == 0) { 544 if (ip6_bad_address(&ip6h->ip6_dst, B_FALSE)) { 545 ip1dbg(("ip_input_v6: pkt with bad dst addr\n")); 546 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 547 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 548 freemsg(mp); 549 return; 550 } 551 } 552 553 len = mp->b_wptr - rptr; 554 pkt_len = ira->ira_pktlen; 555 556 /* multiple mblk or too short */ 557 len -= pkt_len; 558 if (len != 0) { 559 mp = ip_check_length(mp, rptr, len, pkt_len, IPV6_HDR_LEN, ira); 560 if (mp == NULL) 561 return; 562 ip6h = (ip6_t *)mp->b_rptr; 563 } 564 565 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 566 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 567 int, 0); 568 /* 569 * The event for packets being received from a 'physical' 570 * interface is placed after validation of the source and/or 571 * destination address as being local so that packets can be 572 * redirected to loopback addresses using ipnat. 573 */ 574 DTRACE_PROBE4(ip6__physical__in__start, 575 ill_t *, ill, ill_t *, NULL, 576 ip6_t *, ip6h, mblk_t *, mp); 577 578 if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) { 579 int ll_multicast = 0; 580 int error; 581 in6_addr_t orig_dst = ip6h->ip6_dst; 582 583 if (ira->ira_flags & IRAF_L2DST_MULTICAST) 584 ll_multicast = HPE_MULTICAST; 585 else if (ira->ira_flags & IRAF_L2DST_BROADCAST) 586 ll_multicast = HPE_BROADCAST; 587 588 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 589 ipst->ips_ipv6firewall_physical_in, 590 ill, NULL, ip6h, mp, mp, ll_multicast, ipst, error); 591 592 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, mp); 593 594 if (mp == NULL) 595 return; 596 597 /* The length could have changed */ 598 ip6h = (ip6_t *)mp->b_rptr; 599 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 600 pkt_len = ira->ira_pktlen; 601 602 /* 603 * In case the destination changed we override any previous 604 * change to nexthop. 605 */ 606 if (!IN6_ARE_ADDR_EQUAL(&orig_dst, &ip6h->ip6_dst)) 607 nexthop = ip6h->ip6_dst; 608 609 if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) { 610 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 611 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 612 freemsg(mp); 613 return; 614 } 615 616 } 617 618 if (ipst->ips_ip6_observe.he_interested) { 619 zoneid_t dzone; 620 621 /* 622 * On the inbound path the src zone will be unknown as 623 * this packet has come from the wire. 624 */ 625 dzone = ip_get_zoneid_v6(&nexthop, mp, ill, ira, ALL_ZONES); 626 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst); 627 } 628 629 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) != 630 IPV6_DEFAULT_VERS_AND_FLOW) { 631 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 632 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 633 ip_drop_input("ipIfStatsInWrongIPVersion", mp, ill); 634 freemsg(mp); 635 return; 636 } 637 638 /* 639 * For IPv6 we update ira_ip_hdr_length and ira_protocol as 640 * we parse the headers, starting with the hop-by-hop options header. 641 */ 642 ira->ira_ip_hdr_length = IPV6_HDR_LEN; 643 if ((ira->ira_protocol = ip6h->ip6_nxt) == IPPROTO_HOPOPTS) { 644 ip6_hbh_t *hbhhdr; 645 uint_t ehdrlen; 646 uint8_t *optptr; 647 648 if (pkt_len < IPV6_HDR_LEN + MIN_EHDR_LEN) { 649 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 650 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 651 freemsg(mp); 652 return; 653 } 654 if (mp->b_cont != NULL && 655 rptr + IPV6_HDR_LEN + MIN_EHDR_LEN > mp->b_wptr) { 656 ip6h = ip_pullup(mp, IPV6_HDR_LEN + MIN_EHDR_LEN, ira); 657 if (ip6h == NULL) { 658 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 659 ip_drop_input("ipIfStatsInDiscards", mp, ill); 660 freemsg(mp); 661 return; 662 } 663 } 664 hbhhdr = (ip6_hbh_t *)&ip6h[1]; 665 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 666 667 if (pkt_len < IPV6_HDR_LEN + ehdrlen) { 668 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 669 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 670 freemsg(mp); 671 return; 672 } 673 if (mp->b_cont != NULL && 674 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 675 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 676 if (ip6h == NULL) { 677 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 678 ip_drop_input("ipIfStatsInDiscards", mp, ill); 679 freemsg(mp); 680 return; 681 } 682 hbhhdr = (ip6_hbh_t *)&ip6h[1]; 683 } 684 685 /* 686 * Update ira_ip_hdr_length to skip the hop-by-hop header 687 * once we get to ip_fanout_v6 688 */ 689 ira->ira_ip_hdr_length += ehdrlen; 690 ira->ira_protocol = hbhhdr->ip6h_nxt; 691 692 optptr = (uint8_t *)&hbhhdr[1]; 693 switch (ip_process_options_v6(mp, ip6h, optptr, 694 ehdrlen - 2, IPPROTO_HOPOPTS, ira)) { 695 case -1: 696 /* 697 * Packet has been consumed and any 698 * needed ICMP messages sent. 699 */ 700 return; 701 case 0: 702 /* no action needed */ 703 break; 704 case 1: 705 /* 706 * Known router alert. Make use handle it as local 707 * by setting the nexthop to be the all-host multicast 708 * address, and skip multicast membership filter by 709 * marking as a router alert. 710 */ 711 ira->ira_flags |= IRAF_ROUTER_ALERT; 712 nexthop = ipv6_all_hosts_mcast; 713 break; 714 } 715 } 716 717 /* 718 * Here we check to see if we machine is setup as 719 * L3 loadbalancer and if the incoming packet is for a VIP 720 * 721 * Check the following: 722 * - there is at least a rule 723 * - protocol of the packet is supported 724 * 725 * We don't load balance IPv6 link-locals. 726 */ 727 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ira->ira_protocol) && 728 !IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 729 in6_addr_t lb_dst; 730 int lb_ret; 731 732 /* For convenience, we just pull up the mblk. */ 733 if (mp->b_cont != NULL) { 734 if (pullupmsg(mp, -1) == 0) { 735 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 736 ip_drop_input("ipIfStatsInDiscards - pullupmsg", 737 mp, ill); 738 freemsg(mp); 739 return; 740 } 741 ip6h = (ip6_t *)mp->b_rptr; 742 } 743 lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, ira->ira_protocol, 744 (uint8_t *)ip6h + ira->ira_ip_hdr_length, &lb_dst); 745 if (lb_ret == ILB_DROPPED) { 746 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 747 ip_drop_input("ILB_DROPPED", mp, ill); 748 freemsg(mp); 749 return; 750 } 751 if (lb_ret == ILB_BALANCED) { 752 /* Set the dst to that of the chosen server */ 753 nexthop = lb_dst; 754 DB_CKSUMFLAGS(mp) = 0; 755 } 756 } 757 758 if (ill->ill_flags & ILLF_ROUTER) 759 irr_flags = IRR_ALLOCATE; 760 else 761 irr_flags = IRR_NONE; 762 763 /* Can not use route cache with TX since the labels can differ */ 764 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 765 if (IN6_IS_ADDR_MULTICAST(&nexthop)) { 766 ire = ire_multicast(ill); 767 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 768 ire = ire_linklocal(&nexthop, ill, ira, irr_flags, 769 ipst); 770 } else { 771 /* Match destination and label */ 772 ire = ire_route_recursive_v6(&nexthop, 0, NULL, 773 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR, 774 irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, 775 NULL); 776 } 777 /* Update the route cache so we do the ire_refrele */ 778 ASSERT(ire != NULL); 779 if (rtc->rtc_ire != NULL) 780 ire_refrele(rtc->rtc_ire); 781 rtc->rtc_ire = ire; 782 rtc->rtc_ip6addr = nexthop; 783 } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr)) { 784 /* Use the route cache */ 785 ASSERT(rtc->rtc_ire != NULL); 786 ire = rtc->rtc_ire; 787 } else { 788 /* Update the route cache */ 789 if (IN6_IS_ADDR_MULTICAST(&nexthop)) { 790 ire = ire_multicast(ill); 791 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 792 ire = ire_linklocal(&nexthop, ill, ira, irr_flags, 793 ipst); 794 } else { 795 ire = ire_route_recursive_dstonly_v6(&nexthop, 796 irr_flags, ira->ira_xmit_hint, ipst); 797 } 798 ASSERT(ire != NULL); 799 if (rtc->rtc_ire != NULL) 800 ire_refrele(rtc->rtc_ire); 801 rtc->rtc_ire = ire; 802 rtc->rtc_ip6addr = nexthop; 803 } 804 805 ire->ire_ib_pkt_count++; 806 807 /* 808 * Based on ire_type and ire_flags call one of: 809 * ire_recv_local_v6 - for IRE_LOCAL 810 * ire_recv_loopback_v6 - for IRE_LOOPBACK 811 * ire_recv_multirt_v6 - if RTF_MULTIRT 812 * ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE 813 * ire_recv_multicast_v6 - for IRE_MULTICAST 814 * ire_recv_noaccept_v6 - for ire_noaccept ones 815 * ire_recv_forward_v6 - for the rest. 816 */ 817 818 (*ire->ire_recvfn)(ire, mp, ip6h, ira); 819 } 820 #undef rptr 821 822 /* 823 * ire_recvfn for IREs that need forwarding 824 */ 825 void 826 ire_recv_forward_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 827 { 828 ip6_t *ip6h = (ip6_t *)iph_arg; 829 ill_t *ill = ira->ira_ill; 830 ip_stack_t *ipst = ill->ill_ipst; 831 iaflags_t iraflags = ira->ira_flags; 832 ill_t *dst_ill; 833 nce_t *nce; 834 uint32_t added_tx_len; 835 uint32_t mtu, iremtu; 836 837 if (iraflags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 838 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 839 ip_drop_input("l2 multicast not forwarded", mp, ill); 840 freemsg(mp); 841 return; 842 } 843 844 if (!(ill->ill_flags & ILLF_ROUTER)) { 845 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 846 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 847 freemsg(mp); 848 return; 849 } 850 851 /* 852 * Either ire_nce_capable or ire_dep_parent would be set for the IRE 853 * when it is found by ire_route_recursive, but that some other thread 854 * could have changed the routes with the effect of clearing 855 * ire_dep_parent. In that case we'd end up dropping the packet, or 856 * finding a new nce below. 857 * Get, allocate, or update the nce. 858 * We get a refhold on ire_nce_cache as a result of this to avoid races 859 * where ire_nce_cache is deleted. 860 * 861 * This ensures that we don't forward if the interface is down since 862 * ipif_down removes all the nces. 863 */ 864 mutex_enter(&ire->ire_lock); 865 nce = ire->ire_nce_cache; 866 if (nce == NULL) { 867 /* Not yet set up - try to set one up */ 868 mutex_exit(&ire->ire_lock); 869 (void) ire_revalidate_nce(ire); 870 mutex_enter(&ire->ire_lock); 871 nce = ire->ire_nce_cache; 872 if (nce == NULL) { 873 mutex_exit(&ire->ire_lock); 874 /* The ire_dep_parent chain went bad, or no memory */ 875 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 876 ip_drop_input("No ire_dep_parent", mp, ill); 877 freemsg(mp); 878 return; 879 } 880 } 881 nce_refhold(nce); 882 mutex_exit(&ire->ire_lock); 883 884 if (nce->nce_is_condemned) { 885 nce_t *nce1; 886 887 nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_FALSE); 888 nce_refrele(nce); 889 if (nce1 == NULL) { 890 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 891 ip_drop_input("No nce", mp, ill); 892 freemsg(mp); 893 return; 894 } 895 nce = nce1; 896 } 897 dst_ill = nce->nce_ill; 898 899 /* 900 * Unless we are forwarding, drop the packet. 901 * Unlike IPv4 we don't allow source routed packets out the same 902 * interface when we are not a router. 903 * Note that ill_forward_set() will set the ILLF_ROUTER on 904 * all the group members when it gets an ipmp-ill or under-ill. 905 */ 906 if (!(dst_ill->ill_flags & ILLF_ROUTER)) { 907 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 908 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 909 freemsg(mp); 910 nce_refrele(nce); 911 return; 912 } 913 914 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) { 915 ire->ire_ib_pkt_count--; 916 /* 917 * Should only use IREs that are visible from the 918 * global zone for forwarding. 919 * For IPv6 any source route would have already been 920 * advanced in ip_fanout_v6 921 */ 922 ire = ire_route_recursive_v6(&ip6h->ip6_dst, 0, NULL, 923 GLOBAL_ZONEID, ira->ira_tsl, MATCH_IRE_SECATTR, 924 (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE, 925 ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 926 ire->ire_ib_pkt_count++; 927 (*ire->ire_recvfn)(ire, mp, ip6h, ira); 928 ire_refrele(ire); 929 nce_refrele(nce); 930 return; 931 } 932 /* 933 * ipIfStatsHCInForwDatagrams should only be increment if there 934 * will be an attempt to forward the packet, which is why we 935 * increment after the above condition has been checked. 936 */ 937 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 938 939 /* Initiate Read side IPPF processing */ 940 if (IPP_ENABLED(IPP_FWD_IN, ipst)) { 941 /* ip_process translates an IS_UNDER_IPMP */ 942 mp = ip_process(IPP_FWD_IN, mp, ill, ill); 943 if (mp == NULL) { 944 /* ip_drop_packet and MIB done */ 945 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred " 946 "during IPPF processing\n")); 947 nce_refrele(nce); 948 return; 949 } 950 } 951 952 DTRACE_PROBE4(ip6__forwarding__start, 953 ill_t *, ill, ill_t *, dst_ill, ip6_t *, ip6h, mblk_t *, mp); 954 955 if (HOOKS6_INTERESTED_FORWARDING(ipst)) { 956 int error; 957 958 FW_HOOKS(ipst->ips_ip6_forwarding_event, 959 ipst->ips_ipv6firewall_forwarding, 960 ill, dst_ill, ip6h, mp, mp, 0, ipst, error); 961 962 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 963 964 if (mp == NULL) { 965 nce_refrele(nce); 966 return; 967 } 968 /* 969 * Even if the destination was changed by the filter we use the 970 * forwarding decision that was made based on the address 971 * in ip_input. 972 */ 973 974 /* Might have changed */ 975 ip6h = (ip6_t *)mp->b_rptr; 976 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 977 } 978 979 /* Packet is being forwarded. Turning off hwcksum flag. */ 980 DB_CKSUMFLAGS(mp) = 0; 981 982 /* 983 * Per RFC 3513 section 2.5.2, we must not forward packets with 984 * an unspecified source address. 985 * The loopback address check for both src and dst has already 986 * been checked in ip_input_v6 987 * In the future one can envision adding RPF checks using number 3. 988 */ 989 switch (ipst->ips_src_check) { 990 case 0: 991 break; 992 case 1: 993 case 2: 994 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) || 995 IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) { 996 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 997 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 998 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 999 nce_refrele(nce); 1000 freemsg(mp); 1001 return; 1002 } 1003 break; 1004 } 1005 1006 /* 1007 * Check to see if we're forwarding the packet to a 1008 * different link from which it came. If so, check the 1009 * source and destination addresses since routers must not 1010 * forward any packets with link-local source or 1011 * destination addresses to other links. Otherwise (if 1012 * we're forwarding onto the same link), conditionally send 1013 * a redirect message. 1014 */ 1015 if (!IS_ON_SAME_LAN(dst_ill, ill)) { 1016 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 1017 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 1018 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1019 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1020 freemsg(mp); 1021 nce_refrele(nce); 1022 return; 1023 } 1024 /* TBD add site-local check at site boundary? */ 1025 } else if (ipst->ips_ipv6_send_redirects) { 1026 ip_send_potential_redirect_v6(mp, ip6h, ire, ira); 1027 } 1028 1029 added_tx_len = 0; 1030 if (iraflags & IRAF_SYSTEM_LABELED) { 1031 mblk_t *mp1; 1032 uint32_t old_pkt_len = ira->ira_pktlen; 1033 1034 /* 1035 * Check if it can be forwarded and add/remove 1036 * CIPSO options as needed. 1037 */ 1038 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) { 1039 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1040 ip_drop_input("tsol_ip_forward", mp, ill); 1041 freemsg(mp); 1042 nce_refrele(nce); 1043 return; 1044 } 1045 /* 1046 * Size may have changed. Remember amount added in case 1047 * ip_fragment needs to send an ICMP too big. 1048 */ 1049 mp = mp1; 1050 ip6h = (ip6_t *)mp->b_rptr; 1051 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 1052 ira->ira_ip_hdr_length = IPV6_HDR_LEN; 1053 if (ira->ira_pktlen > old_pkt_len) 1054 added_tx_len = ira->ira_pktlen - old_pkt_len; 1055 } 1056 1057 mtu = dst_ill->ill_mtu; 1058 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu) 1059 mtu = iremtu; 1060 ip_forward_xmit_v6(nce, mp, ip6h, ira, mtu, added_tx_len); 1061 nce_refrele(nce); 1062 return; 1063 1064 } 1065 1066 /* 1067 * Used for sending out unicast and multicast packets that are 1068 * forwarded. 1069 */ 1070 void 1071 ip_forward_xmit_v6(nce_t *nce, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira, 1072 uint32_t mtu, uint32_t added_tx_len) 1073 { 1074 ill_t *dst_ill = nce->nce_ill; 1075 uint32_t pkt_len; 1076 iaflags_t iraflags = ira->ira_flags; 1077 ip_stack_t *ipst = dst_ill->ill_ipst; 1078 1079 if (ip6h->ip6_hops-- <= 1) { 1080 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1081 ip_drop_input("ICMP6_TIME_EXCEED_TRANSIT", mp, ira->ira_ill); 1082 icmp_time_exceeded_v6(mp, ICMP6_TIME_EXCEED_TRANSIT, B_FALSE, 1083 ira); 1084 return; 1085 } 1086 1087 /* Initiate Write side IPPF processing before any fragmentation */ 1088 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { 1089 /* ip_process translates an IS_UNDER_IPMP */ 1090 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill); 1091 if (mp == NULL) { 1092 /* ip_drop_packet and MIB done */ 1093 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred" \ 1094 " during IPPF processing\n")); 1095 return; 1096 } 1097 } 1098 1099 pkt_len = ira->ira_pktlen; 1100 1101 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 1102 1103 if (pkt_len > mtu) { 1104 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails); 1105 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill); 1106 if (iraflags & IRAF_SYSTEM_LABELED) { 1107 /* 1108 * Remove any CIPSO option added by 1109 * tsol_ip_forward, and make sure we report 1110 * a path MTU so that there 1111 * is room to add such a CIPSO option for future 1112 * packets. 1113 */ 1114 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, AF_INET6); 1115 } 1116 icmp_pkt2big_v6(mp, mtu, B_TRUE, ira); 1117 return; 1118 } 1119 1120 ASSERT(pkt_len == 1121 ntohs(((ip6_t *)mp->b_rptr)->ip6_plen) + IPV6_HDR_LEN); 1122 1123 if (iraflags & IRAF_LOOPBACK_COPY) { 1124 /* 1125 * IXAF_NO_LOOP_ZONEID is not set hence 6th arg 1126 * is don't care 1127 */ 1128 (void) ip_postfrag_loopcheck(mp, nce, 1129 (IXAF_LOOPBACK_COPY | IXAF_NO_DEV_FLOW_CTL), 1130 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1131 } else { 1132 (void) ip_xmit(mp, nce, IXAF_NO_DEV_FLOW_CTL, 1133 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1134 } 1135 } 1136 1137 /* 1138 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE, 1139 * which is what ire_route_recursive returns when there is no matching ire. 1140 * Send ICMP unreachable unless blackhole. 1141 */ 1142 void 1143 ire_recv_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1144 { 1145 ip6_t *ip6h = (ip6_t *)iph_arg; 1146 ill_t *ill = ira->ira_ill; 1147 ip_stack_t *ipst = ill->ill_ipst; 1148 1149 /* Would we have forwarded this packet if we had a route? */ 1150 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 1151 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1152 ip_drop_input("l2 multicast not forwarded", mp, ill); 1153 freemsg(mp); 1154 return; 1155 } 1156 1157 if (!(ill->ill_flags & ILLF_ROUTER)) { 1158 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1159 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 1160 freemsg(mp); 1161 return; 1162 } 1163 /* 1164 * If we had a route this could have been forwarded. Count as such. 1165 * 1166 * ipIfStatsHCInForwDatagrams should only be increment if there 1167 * will be an attempt to forward the packet, which is why we 1168 * increment after the above condition has been checked. 1169 */ 1170 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 1171 1172 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1173 1174 ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, RTA_DST, 1175 ipst); 1176 1177 if (ire->ire_flags & RTF_BLACKHOLE) { 1178 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill); 1179 freemsg(mp); 1180 } else { 1181 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill); 1182 1183 icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE, 1184 ira); 1185 } 1186 } 1187 1188 /* 1189 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for 1190 * VRRP when in noaccept mode. 1191 * We silently drop packets except for Neighbor Solicitations and 1192 * Neighbor Advertisements. 1193 */ 1194 void 1195 ire_recv_noaccept_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 1196 ip_recv_attr_t *ira) 1197 { 1198 ip6_t *ip6h = (ip6_t *)iph_arg; 1199 ill_t *ill = ira->ira_ill; 1200 icmp6_t *icmp6; 1201 int ip_hdr_length; 1202 1203 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 1204 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1205 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1206 freemsg(mp); 1207 return; 1208 } 1209 ip_hdr_length = ira->ira_ip_hdr_length; 1210 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) { 1211 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) { 1212 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 1213 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 1214 freemsg(mp); 1215 return; 1216 } 1217 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira); 1218 if (ip6h == NULL) { 1219 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1220 freemsg(mp); 1221 return; 1222 } 1223 } 1224 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 1225 1226 if (icmp6->icmp6_type != ND_NEIGHBOR_SOLICIT && 1227 icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) { 1228 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1229 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1230 freemsg(mp); 1231 return; 1232 } 1233 ire_recv_local_v6(ire, mp, ip6h, ira); 1234 } 1235 1236 /* 1237 * ire_recvfn for IRE_MULTICAST. 1238 */ 1239 void 1240 ire_recv_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 1241 ip_recv_attr_t *ira) 1242 { 1243 ip6_t *ip6h = (ip6_t *)iph_arg; 1244 ill_t *ill = ira->ira_ill; 1245 1246 ASSERT(ire->ire_ill == ira->ira_ill); 1247 1248 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 1249 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen); 1250 1251 /* Tag for higher-level protocols */ 1252 ira->ira_flags |= IRAF_MULTICAST; 1253 1254 /* 1255 * So that we don't end up with dups, only one ill an IPMP group is 1256 * nominated to receive multicast traffic. 1257 * If we have no cast_ill we are liberal and accept everything. 1258 */ 1259 if (IS_UNDER_IPMP(ill)) { 1260 ip_stack_t *ipst = ill->ill_ipst; 1261 1262 /* For an under ill_grp can change under lock */ 1263 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1264 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1265 ill->ill_grp->ig_cast_ill != NULL) { 1266 rw_exit(&ipst->ips_ill_g_lock); 1267 ip_drop_input("not on cast ill", mp, ill); 1268 freemsg(mp); 1269 return; 1270 } 1271 rw_exit(&ipst->ips_ill_g_lock); 1272 /* 1273 * We switch to the upper ill so that mrouter and hasmembers 1274 * can operate on upper here and in ip_input_multicast. 1275 */ 1276 ill = ipmp_ill_hold_ipmp_ill(ill); 1277 if (ill != NULL) { 1278 ASSERT(ill != ira->ira_ill); 1279 ASSERT(ire->ire_ill == ira->ira_ill); 1280 ira->ira_ill = ill; 1281 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1282 } else { 1283 ill = ira->ira_ill; 1284 } 1285 } 1286 1287 #ifdef notdef 1288 /* 1289 * Check if we are a multicast router - send ip_mforward a copy of 1290 * the packet. 1291 * Due to mroute_decap tunnels we consider forwarding packets even if 1292 * mrouted has not joined the allmulti group on this interface. 1293 */ 1294 if (ipst->ips_ip_g_mrouter) { 1295 int retval; 1296 1297 /* 1298 * Clear the indication that this may have hardware 1299 * checksum as we are not using it for forwarding. 1300 */ 1301 DB_CKSUMFLAGS(mp) = 0; 1302 1303 /* 1304 * ip_mforward helps us make these distinctions: If received 1305 * on tunnel and not IGMP, then drop. 1306 * If IGMP packet, then don't check membership 1307 * If received on a phyint and IGMP or PIM, then 1308 * don't check membership 1309 */ 1310 retval = ip_mforward_v6(mp, ira); 1311 /* ip_mforward updates mib variables if needed */ 1312 1313 switch (retval) { 1314 case 0: 1315 /* 1316 * pkt is okay and arrived on phyint. 1317 */ 1318 break; 1319 case -1: 1320 /* pkt is mal-formed, toss it */ 1321 freemsg(mp); 1322 goto done; 1323 case 1: 1324 /* 1325 * pkt is okay and arrived on a tunnel 1326 * 1327 * If we are running a multicast router 1328 * we need to see all mld packets, which 1329 * are marked with router alerts. 1330 */ 1331 if (ira->ira_flags & IRAF_ROUTER_ALERT) 1332 goto forus; 1333 ip_drop_input("Multicast on tunnel ignored", mp, ill); 1334 freemsg(mp); 1335 goto done; 1336 } 1337 } 1338 #endif /* notdef */ 1339 1340 /* 1341 * If this was a router alert we skip the group membership check. 1342 */ 1343 if (ira->ira_flags & IRAF_ROUTER_ALERT) 1344 goto forus; 1345 1346 /* 1347 * Check if we have members on this ill. This is not necessary for 1348 * correctness because even if the NIC/GLD had a leaky filter, we 1349 * filter before passing to each conn_t. 1350 */ 1351 if (!ill_hasmembers_v6(ill, &ip6h->ip6_dst)) { 1352 /* 1353 * Nobody interested 1354 * 1355 * This might just be caused by the fact that 1356 * multiple IP Multicast addresses map to the same 1357 * link layer multicast - no need to increment counter! 1358 */ 1359 ip_drop_input("Multicast with no members", mp, ill); 1360 freemsg(mp); 1361 goto done; 1362 } 1363 forus: 1364 ip2dbg(("ire_recv_multicast_v6: multicast for us\n")); 1365 1366 /* 1367 * After reassembly and IPsec we will need to duplicate the 1368 * multicast packet for all matching zones on the ill. 1369 */ 1370 ira->ira_zoneid = ALL_ZONES; 1371 1372 /* Reassemble on the ill on which the packet arrived */ 1373 ip_input_local_v6(ire, mp, ip6h, ira); 1374 done: 1375 if (ill != ire->ire_ill) { 1376 ill_refrele(ill); 1377 ira->ira_ill = ire->ire_ill; 1378 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 1379 } 1380 } 1381 1382 /* 1383 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT. 1384 * Drop packets since we don't forward out multirt routes. 1385 */ 1386 /* ARGSUSED */ 1387 void 1388 ire_recv_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1389 { 1390 ill_t *ill = ira->ira_ill; 1391 1392 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1393 ip_drop_input("Not forwarding out MULTIRT", mp, ill); 1394 freemsg(mp); 1395 } 1396 1397 /* 1398 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK 1399 * has rewritten the packet to have a loopback destination address (We 1400 * filter out packet with a loopback destination from arriving over the wire). 1401 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID. 1402 */ 1403 void 1404 ire_recv_loopback_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1405 { 1406 ip6_t *ip6h = (ip6_t *)iph_arg; 1407 ill_t *ill = ira->ira_ill; 1408 ill_t *ire_ill = ire->ire_ill; 1409 1410 ira->ira_zoneid = GLOBAL_ZONEID; 1411 1412 /* Switch to the lo0 ill for further processing */ 1413 if (ire_ill != ill) { 1414 /* 1415 * Update ira_ill to be the ILL on which the IP address 1416 * is hosted. 1417 * No need to hold the ill since we have a hold on the ire 1418 */ 1419 ASSERT(ira->ira_ill == ira->ira_rill); 1420 ira->ira_ill = ire_ill; 1421 1422 ip_input_local_v6(ire, mp, ip6h, ira); 1423 1424 /* Restore */ 1425 ASSERT(ira->ira_ill == ire_ill); 1426 ira->ira_ill = ill; 1427 return; 1428 1429 } 1430 ip_input_local_v6(ire, mp, ip6h, ira); 1431 } 1432 1433 /* 1434 * ire_recvfn for IRE_LOCAL. 1435 */ 1436 void 1437 ire_recv_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1438 { 1439 ip6_t *ip6h = (ip6_t *)iph_arg; 1440 ill_t *ill = ira->ira_ill; 1441 ill_t *ire_ill = ire->ire_ill; 1442 1443 /* Make a note for DAD that this address is in use */ 1444 ire->ire_last_used_time = LBOLT_FASTPATH; 1445 1446 /* Only target the IRE_LOCAL with the right zoneid. */ 1447 ira->ira_zoneid = ire->ire_zoneid; 1448 1449 /* 1450 * If the packet arrived on the wrong ill, we check that 1451 * this is ok. 1452 * If it is, then we ensure that we do the reassembly on 1453 * the ill on which the address is hosted. We keep ira_rill as 1454 * the one on which the packet arrived, so that IP_PKTINFO and 1455 * friends can report this. 1456 */ 1457 if (ire_ill != ill) { 1458 ire_t *new_ire; 1459 1460 new_ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill); 1461 if (new_ire == NULL) { 1462 /* Drop packet */ 1463 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1464 ip_drop_input("ipIfStatsInForwProhibits", mp, ill); 1465 freemsg(mp); 1466 return; 1467 } 1468 /* 1469 * Update ira_ill to be the ILL on which the IP address 1470 * is hosted. No need to hold the ill since we have a 1471 * hold on the ire. Note that we do the switch even if 1472 * new_ire == ire (for IPMP, ire would be the one corresponding 1473 * to the IPMP ill). 1474 */ 1475 ASSERT(ira->ira_ill == ira->ira_rill); 1476 ira->ira_ill = new_ire->ire_ill; 1477 1478 /* ira_ruifindex tracks the upper for ira_rill */ 1479 if (IS_UNDER_IPMP(ill)) 1480 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1481 1482 ip_input_local_v6(new_ire, mp, ip6h, ira); 1483 1484 /* Restore */ 1485 ASSERT(ira->ira_ill == new_ire->ire_ill); 1486 ira->ira_ill = ill; 1487 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1488 1489 if (new_ire != ire) 1490 ire_refrele(new_ire); 1491 return; 1492 } 1493 1494 ip_input_local_v6(ire, mp, ip6h, ira); 1495 } 1496 1497 /* 1498 * Common function for packets arriving for the host. Handles 1499 * checksum verification, reassembly checks, etc. 1500 */ 1501 static void 1502 ip_input_local_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1503 { 1504 iaflags_t iraflags = ira->ira_flags; 1505 1506 /* 1507 * For multicast we need some extra work before 1508 * we call ip_fanout_v6(), since in the case of shared-IP zones 1509 * we need to pretend that a packet arrived for each zoneid. 1510 */ 1511 if (iraflags & IRAF_MULTICAST) { 1512 ip_input_multicast_v6(ire, mp, ip6h, ira); 1513 return; 1514 } 1515 ip_fanout_v6(mp, ip6h, ira); 1516 } 1517 1518 /* 1519 * Handle multiple zones which want to receive the same multicast packets 1520 * on this ill by delivering a packet to each of them. 1521 * 1522 * Note that for packets delivered to transports we could instead do this 1523 * as part of the fanout code, but since we need to handle icmp_inbound 1524 * it is simpler to have multicast work the same as IPv4 broadcast. 1525 * 1526 * The ip_fanout matching for multicast matches based on ilm independent of 1527 * zoneid since the zoneid restriction is applied when joining a multicast 1528 * group. 1529 */ 1530 /* ARGSUSED */ 1531 static void 1532 ip_input_multicast_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1533 { 1534 ill_t *ill = ira->ira_ill; 1535 iaflags_t iraflags = ira->ira_flags; 1536 ip_stack_t *ipst = ill->ill_ipst; 1537 netstack_t *ns = ipst->ips_netstack; 1538 zoneid_t zoneid; 1539 mblk_t *mp1; 1540 ip6_t *ip6h1; 1541 uint_t ira_pktlen = ira->ira_pktlen; 1542 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1543 1544 /* ire_recv_multicast has switched to the upper ill for IPMP */ 1545 ASSERT(!IS_UNDER_IPMP(ill)); 1546 1547 /* 1548 * If we don't have more than one shared-IP zone, or if 1549 * there are no members in anything but the global zone, 1550 * then just set the zoneid and proceed. 1551 */ 1552 if (ns->netstack_numzones == 1 || 1553 !ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst, 1554 GLOBAL_ZONEID)) { 1555 ira->ira_zoneid = GLOBAL_ZONEID; 1556 1557 /* If sender didn't want this zone to receive it, drop */ 1558 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1559 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1560 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1561 freemsg(mp); 1562 return; 1563 } 1564 ip_fanout_v6(mp, ip6h, ira); 1565 return; 1566 } 1567 1568 /* 1569 * Here we loop over all zoneids that have members in the group 1570 * and deliver a packet to ip_fanout for each zoneid. 1571 * 1572 * First find any members in the lowest numeric zoneid by looking for 1573 * first zoneid larger than -1 (ALL_ZONES). 1574 * We terminate the loop when we receive -1 (ALL_ZONES). 1575 */ 1576 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 1577 for (; zoneid != ALL_ZONES; 1578 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, zoneid)) { 1579 /* 1580 * Avoid an extra copymsg/freemsg by skipping global zone here 1581 * and doing that at the end. 1582 */ 1583 if (zoneid == GLOBAL_ZONEID) 1584 continue; 1585 1586 ira->ira_zoneid = zoneid; 1587 1588 /* If sender didn't want this zone to receive it, skip */ 1589 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1590 ira->ira_no_loop_zoneid == ira->ira_zoneid) 1591 continue; 1592 1593 mp1 = copymsg(mp); 1594 if (mp1 == NULL) { 1595 /* Failed to deliver to one zone */ 1596 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1597 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1598 continue; 1599 } 1600 ip6h1 = (ip6_t *)mp1->b_rptr; 1601 ip_fanout_v6(mp1, ip6h1, ira); 1602 /* 1603 * IPsec might have modified ira_pktlen and ira_ip_hdr_length 1604 * so we restore them for a potential next iteration 1605 */ 1606 ira->ira_pktlen = ira_pktlen; 1607 ira->ira_ip_hdr_length = ira_ip_hdr_length; 1608 } 1609 1610 /* Do the main ire */ 1611 ira->ira_zoneid = GLOBAL_ZONEID; 1612 /* If sender didn't want this zone to receive it, drop */ 1613 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1614 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1615 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1616 freemsg(mp); 1617 } else { 1618 ip_fanout_v6(mp, ip6h, ira); 1619 } 1620 } 1621 1622 1623 /* 1624 * Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions 1625 * is in use. Updates ira_zoneid and ira_flags as a result. 1626 */ 1627 static void 1628 ip_fanout_tx_v6(mblk_t *mp, ip6_t *ip6h, uint8_t protocol, uint_t ip_hdr_length, 1629 ip_recv_attr_t *ira) 1630 { 1631 uint16_t *up; 1632 uint16_t lport; 1633 zoneid_t zoneid; 1634 1635 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED); 1636 1637 /* 1638 * If the packet is unlabeled we might allow read-down 1639 * for MAC_EXEMPT. Below we clear this if it is a multi-level 1640 * port (MLP). 1641 * Note that ira_tsl can be NULL here. 1642 */ 1643 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED) 1644 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE; 1645 1646 if (ira->ira_zoneid != ALL_ZONES) 1647 return; 1648 1649 ira->ira_flags |= IRAF_TX_SHARED_ADDR; 1650 1651 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length); 1652 switch (protocol) { 1653 case IPPROTO_TCP: 1654 case IPPROTO_SCTP: 1655 case IPPROTO_UDP: 1656 /* Caller ensures this */ 1657 ASSERT(((uchar_t *)ip6h) + ip_hdr_length +4 <= mp->b_wptr); 1658 1659 /* 1660 * Only these transports support MLP. 1661 * We know their destination port numbers is in 1662 * the same place in the header. 1663 */ 1664 lport = up[1]; 1665 1666 /* 1667 * No need to handle exclusive-stack zones 1668 * since ALL_ZONES only applies to the shared IP instance. 1669 */ 1670 zoneid = tsol_mlp_findzone(protocol, lport); 1671 /* 1672 * If no shared MLP is found, tsol_mlp_findzone returns 1673 * ALL_ZONES. In that case, we assume it's SLP, and 1674 * search for the zone based on the packet label. 1675 * 1676 * If there is such a zone, we prefer to find a 1677 * connection in it. Otherwise, we look for a 1678 * MAC-exempt connection in any zone whose label 1679 * dominates the default label on the packet. 1680 */ 1681 if (zoneid == ALL_ZONES) 1682 zoneid = tsol_attr_to_zoneid(ira); 1683 else 1684 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE; 1685 break; 1686 default: 1687 /* Handle shared address for other protocols */ 1688 zoneid = tsol_attr_to_zoneid(ira); 1689 break; 1690 } 1691 ira->ira_zoneid = zoneid; 1692 } 1693 1694 /* 1695 * Increment checksum failure statistics 1696 */ 1697 static void 1698 ip_input_cksum_err_v6(uint8_t protocol, uint16_t hck_flags, ill_t *ill) 1699 { 1700 ip_stack_t *ipst = ill->ill_ipst; 1701 1702 switch (protocol) { 1703 case IPPROTO_TCP: 1704 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 1705 1706 if (hck_flags & HCK_FULLCKSUM) 1707 IP6_STAT(ipst, ip6_tcp_in_full_hw_cksum_err); 1708 else if (hck_flags & HCK_PARTIALCKSUM) 1709 IP6_STAT(ipst, ip6_tcp_in_part_hw_cksum_err); 1710 else 1711 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 1712 break; 1713 case IPPROTO_UDP: 1714 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 1715 if (hck_flags & HCK_FULLCKSUM) 1716 IP6_STAT(ipst, ip6_udp_in_full_hw_cksum_err); 1717 else if (hck_flags & HCK_PARTIALCKSUM) 1718 IP6_STAT(ipst, ip6_udp_in_part_hw_cksum_err); 1719 else 1720 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 1721 break; 1722 case IPPROTO_ICMPV6: 1723 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 1724 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1725 break; 1726 default: 1727 ASSERT(0); 1728 break; 1729 } 1730 } 1731 1732 /* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */ 1733 uint32_t 1734 ip_input_cksum_pseudo_v6(ip6_t *ip6h, ip_recv_attr_t *ira) 1735 { 1736 uint_t ulp_len; 1737 uint32_t cksum; 1738 uint8_t protocol = ira->ira_protocol; 1739 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1740 1741 #define iphs ((uint16_t *)ip6h) 1742 1743 switch (protocol) { 1744 case IPPROTO_TCP: 1745 ulp_len = ira->ira_pktlen - ip_hdr_length; 1746 1747 /* Protocol and length */ 1748 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP; 1749 /* IP addresses */ 1750 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1751 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1752 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1753 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1754 break; 1755 1756 case IPPROTO_UDP: { 1757 udpha_t *udpha; 1758 1759 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length); 1760 1761 /* Protocol and length */ 1762 cksum = udpha->uha_length + IP_UDP_CSUM_COMP; 1763 /* IP addresses */ 1764 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1765 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1766 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1767 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1768 break; 1769 } 1770 case IPPROTO_ICMPV6: 1771 ulp_len = ira->ira_pktlen - ip_hdr_length; 1772 1773 /* Protocol and length */ 1774 cksum = htons(ulp_len) + IP_ICMPV6_CSUM_COMP; 1775 /* IP addresses */ 1776 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1777 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1778 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1779 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1780 break; 1781 default: 1782 cksum = 0; 1783 break; 1784 } 1785 #undef iphs 1786 return (cksum); 1787 } 1788 1789 1790 /* 1791 * Software verification of the ULP checksums. 1792 * Returns B_TRUE if ok. 1793 * Increments statistics of failed. 1794 */ 1795 static boolean_t 1796 ip_input_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1797 { 1798 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 1799 uint32_t cksum; 1800 uint8_t protocol = ira->ira_protocol; 1801 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1802 1803 IP6_STAT(ipst, ip6_in_sw_cksum); 1804 1805 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 1806 protocol == IPPROTO_ICMPV6); 1807 1808 cksum = ip_input_cksum_pseudo_v6(ip6h, ira); 1809 cksum = IP_CSUM(mp, ip_hdr_length, cksum); 1810 if (cksum == 0) 1811 return (B_TRUE); 1812 1813 ip_input_cksum_err_v6(protocol, 0, ira->ira_ill); 1814 return (B_FALSE); 1815 } 1816 1817 /* 1818 * Verify the ULP checksums. 1819 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum 1820 * algorithm. 1821 * Increments statistics if failed. 1822 */ 1823 static boolean_t 1824 ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h, 1825 ip_recv_attr_t *ira) 1826 { 1827 ill_t *ill = ira->ira_rill; 1828 uint16_t hck_flags; 1829 uint32_t cksum; 1830 mblk_t *mp1; 1831 uint_t len; 1832 uint8_t protocol = ira->ira_protocol; 1833 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1834 1835 1836 switch (protocol) { 1837 case IPPROTO_TCP: 1838 case IPPROTO_ICMPV6: 1839 break; 1840 1841 case IPPROTO_UDP: { 1842 udpha_t *udpha; 1843 1844 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length); 1845 /* 1846 * Before going through the regular checksum 1847 * calculation, make sure the received checksum 1848 * is non-zero. RFC 2460 says, a 0x0000 checksum 1849 * in a UDP packet (within IPv6 packet) is invalid 1850 * and should be replaced by 0xffff. This makes 1851 * sense as regular checksum calculation will 1852 * pass for both the cases i.e. 0x0000 and 0xffff. 1853 * Removing one of the case makes error detection 1854 * stronger. 1855 */ 1856 if (udpha->uha_checksum == 0) { 1857 /* 0x0000 checksum is invalid */ 1858 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 1859 return (B_FALSE); 1860 } 1861 break; 1862 } 1863 case IPPROTO_SCTP: { 1864 sctp_hdr_t *sctph; 1865 uint32_t pktsum; 1866 1867 sctph = (sctp_hdr_t *)((uchar_t *)ip6h + ip_hdr_length); 1868 #ifdef DEBUG 1869 if (skip_sctp_cksum) 1870 return (B_TRUE); 1871 #endif 1872 pktsum = sctph->sh_chksum; 1873 sctph->sh_chksum = 0; 1874 cksum = sctp_cksum(mp, ip_hdr_length); 1875 sctph->sh_chksum = pktsum; 1876 if (cksum == pktsum) 1877 return (B_TRUE); 1878 1879 /* 1880 * Defer until later whether a bad checksum is ok 1881 * in order to allow RAW sockets to use Adler checksum 1882 * with SCTP. 1883 */ 1884 ira->ira_flags |= IRAF_SCTP_CSUM_ERR; 1885 return (B_TRUE); 1886 } 1887 1888 default: 1889 /* No ULP checksum to verify. */ 1890 return (B_TRUE); 1891 } 1892 1893 /* 1894 * Revert to software checksum calculation if the interface 1895 * isn't capable of checksum offload. 1896 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout. 1897 * Note: IRAF_NO_HW_CKSUM is not currently used. 1898 */ 1899 ASSERT(!IS_IPMP(ill)); 1900 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 1901 !dohwcksum) { 1902 return (ip_input_sw_cksum_v6(mp, ip6h, ira)); 1903 } 1904 1905 /* 1906 * We apply this for all ULP protocols. Does the HW know to 1907 * not set the flags for SCTP and other protocols. 1908 */ 1909 1910 hck_flags = DB_CKSUMFLAGS(mp); 1911 1912 if (hck_flags & HCK_FULLCKSUM_OK) { 1913 /* 1914 * Hardware has already verified the checksum. 1915 */ 1916 return (B_TRUE); 1917 } 1918 1919 if (hck_flags & HCK_FULLCKSUM) { 1920 /* 1921 * Full checksum has been computed by the hardware 1922 * and has been attached. If the driver wants us to 1923 * verify the correctness of the attached value, in 1924 * order to protect against faulty hardware, compare 1925 * it against -0 (0xFFFF) to see if it's valid. 1926 */ 1927 cksum = DB_CKSUM16(mp); 1928 if (cksum == 0xFFFF) 1929 return (B_TRUE); 1930 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill); 1931 return (B_FALSE); 1932 } 1933 1934 mp1 = mp->b_cont; 1935 if ((hck_flags & HCK_PARTIALCKSUM) && 1936 (mp1 == NULL || mp1->b_cont == NULL) && 1937 ip_hdr_length >= DB_CKSUMSTART(mp) && 1938 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) { 1939 uint32_t adj; 1940 uchar_t *cksum_start; 1941 1942 cksum = ip_input_cksum_pseudo_v6(ip6h, ira); 1943 1944 cksum_start = ((uchar_t *)ip6h + DB_CKSUMSTART(mp)); 1945 1946 /* 1947 * Partial checksum has been calculated by hardware 1948 * and attached to the packet; in addition, any 1949 * prepended extraneous data is even byte aligned, 1950 * and there are at most two mblks associated with 1951 * the packet. If any such data exists, we adjust 1952 * the checksum; also take care any postpended data. 1953 */ 1954 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj); 1955 /* 1956 * One's complement subtract extraneous checksum 1957 */ 1958 cksum += DB_CKSUM16(mp); 1959 if (adj >= cksum) 1960 cksum = ~(adj - cksum) & 0xFFFF; 1961 else 1962 cksum -= adj; 1963 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 1964 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 1965 if (!(~cksum & 0xFFFF)) 1966 return (B_TRUE); 1967 1968 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill); 1969 return (B_FALSE); 1970 } 1971 return (ip_input_sw_cksum_v6(mp, ip6h, ira)); 1972 } 1973 1974 1975 /* 1976 * Handle fanout of received packets. 1977 * Unicast packets that are looped back (from ire_send_local_v6) and packets 1978 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM. 1979 * 1980 * IPQoS Notes 1981 * Before sending it to the client, invoke IPPF processing. Policy processing 1982 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled. 1983 */ 1984 void 1985 ip_fanout_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1986 { 1987 ill_t *ill = ira->ira_ill; 1988 iaflags_t iraflags = ira->ira_flags; 1989 ip_stack_t *ipst = ill->ill_ipst; 1990 uint8_t protocol; 1991 conn_t *connp; 1992 #define rptr ((uchar_t *)ip6h) 1993 uint_t ip_hdr_length; 1994 uint_t min_ulp_header_length; 1995 int offset; 1996 ssize_t len; 1997 netstack_t *ns = ipst->ips_netstack; 1998 ipsec_stack_t *ipss = ns->netstack_ipsec; 1999 ill_t *rill = ira->ira_rill; 2000 2001 ASSERT(ira->ira_pktlen == ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 2002 2003 /* 2004 * We repeat this as we parse over destination options header and 2005 * fragment headers (earlier we've handled any hop-by-hop options 2006 * header.) 2007 * We update ira_protocol and ira_ip_hdr_length as we skip past 2008 * the intermediate headers; they already point past any 2009 * hop-by-hop header. 2010 */ 2011 repeat: 2012 protocol = ira->ira_protocol; 2013 ip_hdr_length = ira->ira_ip_hdr_length; 2014 2015 /* 2016 * Time for IPP once we've done reassembly and IPsec. 2017 * We skip this for loopback packets since we don't do IPQoS 2018 * on loopback. 2019 */ 2020 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 2021 !(iraflags & IRAF_LOOPBACK) && 2022 (protocol != IPPROTO_ESP || protocol != IPPROTO_AH || 2023 protocol != IPPROTO_DSTOPTS || protocol != IPPROTO_ROUTING || 2024 protocol != IPPROTO_FRAGMENT)) { 2025 /* 2026 * Use the interface on which the packet arrived - not where 2027 * the IP address is hosted. 2028 */ 2029 /* ip_process translates an IS_UNDER_IPMP */ 2030 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill); 2031 if (mp == NULL) { 2032 /* ip_drop_packet and MIB done */ 2033 return; 2034 } 2035 } 2036 2037 /* Determine the minimum required size of the upper-layer header */ 2038 /* Need to do this for at least the set of ULPs that TX handles. */ 2039 switch (protocol) { 2040 case IPPROTO_TCP: 2041 min_ulp_header_length = TCP_MIN_HEADER_LENGTH; 2042 break; 2043 case IPPROTO_SCTP: 2044 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH; 2045 break; 2046 case IPPROTO_UDP: 2047 min_ulp_header_length = UDPH_SIZE; 2048 break; 2049 case IPPROTO_ICMP: 2050 case IPPROTO_ICMPV6: 2051 min_ulp_header_length = ICMPH_SIZE; 2052 break; 2053 case IPPROTO_FRAGMENT: 2054 case IPPROTO_DSTOPTS: 2055 case IPPROTO_ROUTING: 2056 min_ulp_header_length = MIN_EHDR_LEN; 2057 break; 2058 default: 2059 min_ulp_header_length = 0; 2060 break; 2061 } 2062 /* Make sure we have the min ULP header length */ 2063 len = mp->b_wptr - rptr; 2064 if (len < ip_hdr_length + min_ulp_header_length) { 2065 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length) 2066 goto pkt_too_short; 2067 2068 IP6_STAT(ipst, ip6_recv_pullup); 2069 ip6h = ip_pullup(mp, ip_hdr_length + min_ulp_header_length, 2070 ira); 2071 if (ip6h == NULL) 2072 goto discard; 2073 len = mp->b_wptr - rptr; 2074 } 2075 2076 /* 2077 * If trusted extensions then determine the zoneid and TX specific 2078 * ira_flags. 2079 */ 2080 if (iraflags & IRAF_SYSTEM_LABELED) { 2081 /* This can update ira->ira_flags and ira->ira_zoneid */ 2082 ip_fanout_tx_v6(mp, ip6h, protocol, ip_hdr_length, ira); 2083 iraflags = ira->ira_flags; 2084 } 2085 2086 2087 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */ 2088 if (iraflags & IRAF_VERIFY_ULP_CKSUM) { 2089 if (!ip_input_cksum_v6(iraflags, mp, ip6h, ira)) { 2090 /* Bad checksum. Stats are already incremented */ 2091 ip_drop_input("Bad ULP checksum", mp, ill); 2092 freemsg(mp); 2093 return; 2094 } 2095 /* IRAF_SCTP_CSUM_ERR could have been set */ 2096 iraflags = ira->ira_flags; 2097 } 2098 switch (protocol) { 2099 case IPPROTO_TCP: 2100 /* For TCP, discard multicast packets. */ 2101 if (iraflags & IRAF_MULTIBROADCAST) 2102 goto discard; 2103 2104 /* First mblk contains IP+TCP headers per above check */ 2105 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH); 2106 2107 /* TCP options present? */ 2108 offset = ((uchar_t *)ip6h)[ip_hdr_length + 12] >> 4; 2109 if (offset != 5) { 2110 if (offset < 5) 2111 goto discard; 2112 2113 /* 2114 * There must be TCP options. 2115 * Make sure we can grab them. 2116 */ 2117 offset <<= 2; 2118 offset += ip_hdr_length; 2119 if (len < offset) { 2120 if (ira->ira_pktlen < offset) 2121 goto pkt_too_short; 2122 2123 IP6_STAT(ipst, ip6_recv_pullup); 2124 ip6h = ip_pullup(mp, offset, ira); 2125 if (ip6h == NULL) 2126 goto discard; 2127 len = mp->b_wptr - rptr; 2128 } 2129 } 2130 2131 /* 2132 * Pass up a squeue hint to tcp. 2133 * If ira_sqp is already set (this is loopback) we leave it 2134 * alone. 2135 */ 2136 if (ira->ira_sqp == NULL) { 2137 ira->ira_sqp = ip_squeue_get(ira->ira_ring); 2138 } 2139 2140 /* Look for AF_INET or AF_INET6 that matches */ 2141 connp = ipcl_classify_v6(mp, IPPROTO_TCP, ip_hdr_length, 2142 ira, ipst); 2143 if (connp == NULL) { 2144 /* Send the TH_RST */ 2145 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2146 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2147 return; 2148 } 2149 if (connp->conn_incoming_ifindex != 0 && 2150 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2151 CONN_DEC_REF(connp); 2152 2153 /* Send the TH_RST */ 2154 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2155 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2156 return; 2157 } 2158 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 2159 (iraflags & IRAF_IPSEC_SECURE)) { 2160 mp = ipsec_check_inbound_policy(mp, connp, 2161 NULL, ip6h, ira); 2162 if (mp == NULL) { 2163 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2164 /* Note that mp is NULL */ 2165 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2166 CONN_DEC_REF(connp); 2167 return; 2168 } 2169 } 2170 /* Found a client; up it goes */ 2171 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2172 ira->ira_ill = ira->ira_rill = NULL; 2173 if (!IPCL_IS_TCP(connp)) { 2174 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 2175 (connp->conn_recv)(connp, mp, NULL, ira); 2176 CONN_DEC_REF(connp); 2177 ira->ira_ill = ill; 2178 ira->ira_rill = rill; 2179 return; 2180 } 2181 2182 /* 2183 * We do different processing whether called from 2184 * ip_accept_tcp and we match the target, don't match 2185 * the target, and when we are called by ip_input. 2186 */ 2187 if (iraflags & IRAF_TARGET_SQP) { 2188 if (ira->ira_target_sqp == connp->conn_sqp) { 2189 mblk_t *attrmp; 2190 2191 attrmp = ip_recv_attr_to_mblk(ira); 2192 if (attrmp == NULL) { 2193 BUMP_MIB(ill->ill_ip_mib, 2194 ipIfStatsInDiscards); 2195 ip_drop_input("ipIfStatsInDiscards", 2196 mp, ill); 2197 freemsg(mp); 2198 CONN_DEC_REF(connp); 2199 } else { 2200 SET_SQUEUE(attrmp, connp->conn_recv, 2201 connp); 2202 attrmp->b_cont = mp; 2203 ASSERT(ira->ira_target_sqp_mp == NULL); 2204 ira->ira_target_sqp_mp = attrmp; 2205 /* 2206 * Conn ref release when drained from 2207 * the squeue. 2208 */ 2209 } 2210 } else { 2211 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 2212 connp->conn_recv, connp, ira, SQ_FILL, 2213 SQTAG_IP6_TCP_INPUT); 2214 } 2215 } else { 2216 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, 2217 connp, ira, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 2218 } 2219 ira->ira_ill = ill; 2220 ira->ira_rill = rill; 2221 return; 2222 2223 case IPPROTO_SCTP: { 2224 sctp_hdr_t *sctph; 2225 uint32_t ports; /* Source and destination ports */ 2226 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp; 2227 2228 /* For SCTP, discard multicast packets. */ 2229 if (iraflags & IRAF_MULTIBROADCAST) 2230 goto discard; 2231 2232 /* 2233 * Since there is no SCTP h/w cksum support yet, just 2234 * clear the flag. 2235 */ 2236 DB_CKSUMFLAGS(mp) = 0; 2237 2238 /* Length ensured above */ 2239 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH); 2240 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length); 2241 2242 /* get the ports */ 2243 ports = *(uint32_t *)&sctph->sh_sport; 2244 2245 if (iraflags & IRAF_SCTP_CSUM_ERR) { 2246 /* 2247 * No potential sctp checksum errors go to the Sun 2248 * sctp stack however they might be Adler-32 summed 2249 * packets a userland stack bound to a raw IP socket 2250 * could reasonably use. Note though that Adler-32 is 2251 * a long deprecated algorithm and customer sctp 2252 * networks should eventually migrate to CRC-32 at 2253 * which time this facility should be removed. 2254 */ 2255 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2256 return; 2257 } 2258 connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, ports, 2259 ira, mp, sctps, sctph); 2260 if (connp == NULL) { 2261 /* Check for raw socket or OOTB handling */ 2262 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2263 return; 2264 } 2265 if (connp->conn_incoming_ifindex != 0 && 2266 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2267 CONN_DEC_REF(connp); 2268 2269 /* Check for raw socket or OOTB handling */ 2270 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2271 return; 2272 } 2273 2274 /* Found a client; up it goes */ 2275 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2276 sctp_input(connp, NULL, ip6h, mp, ira); 2277 /* sctp_input does a rele of the sctp_t */ 2278 return; 2279 } 2280 2281 case IPPROTO_UDP: 2282 /* First mblk contains IP+UDP headers as checked above */ 2283 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE); 2284 2285 if (iraflags & IRAF_MULTIBROADCAST) { 2286 uint16_t *up; /* Pointer to ports in ULP header */ 2287 2288 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length); 2289 2290 ip_fanout_udp_multi_v6(mp, ip6h, up[1], up[0], ira); 2291 return; 2292 } 2293 2294 /* Look for AF_INET or AF_INET6 that matches */ 2295 connp = ipcl_classify_v6(mp, IPPROTO_UDP, ip_hdr_length, 2296 ira, ipst); 2297 if (connp == NULL) { 2298 no_udp_match: 2299 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP]. 2300 connf_head != NULL) { 2301 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2302 ip_fanout_proto_v6(mp, ip6h, ira); 2303 } else { 2304 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH, 2305 ICMP6_DST_UNREACH_NOPORT, ira); 2306 } 2307 return; 2308 2309 } 2310 if (connp->conn_incoming_ifindex != 0 && 2311 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2312 CONN_DEC_REF(connp); 2313 goto no_udp_match; 2314 } 2315 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld : 2316 !canputnext(connp->conn_rq)) { 2317 CONN_DEC_REF(connp); 2318 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 2319 ip_drop_input("udpIfStatsInOverflows", mp, ill); 2320 freemsg(mp); 2321 return; 2322 } 2323 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 2324 (iraflags & IRAF_IPSEC_SECURE)) { 2325 mp = ipsec_check_inbound_policy(mp, connp, 2326 NULL, ip6h, ira); 2327 if (mp == NULL) { 2328 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2329 /* Note that mp is NULL */ 2330 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2331 CONN_DEC_REF(connp); 2332 return; 2333 } 2334 } 2335 2336 /* Found a client; up it goes */ 2337 IP6_STAT(ipst, ip6_udp_fannorm); 2338 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2339 ira->ira_ill = ira->ira_rill = NULL; 2340 (connp->conn_recv)(connp, mp, NULL, ira); 2341 CONN_DEC_REF(connp); 2342 ira->ira_ill = ill; 2343 ira->ira_rill = rill; 2344 return; 2345 default: 2346 break; 2347 } 2348 2349 /* 2350 * Clear hardware checksumming flag as it is currently only 2351 * used by TCP and UDP. 2352 */ 2353 DB_CKSUMFLAGS(mp) = 0; 2354 2355 switch (protocol) { 2356 case IPPROTO_ICMPV6: 2357 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 2358 2359 /* Check variable for testing applications */ 2360 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 2361 ip_drop_input("ipv6_drop_inbound_icmpv6", mp, ill); 2362 freemsg(mp); 2363 return; 2364 } 2365 /* 2366 * We need to accomodate icmp messages coming in clear 2367 * until we get everything secure from the wire. If 2368 * icmp_accept_clear_messages is zero we check with 2369 * the global policy and act accordingly. If it is 2370 * non-zero, we accept the message without any checks. 2371 * But *this does not mean* that this will be delivered 2372 * to RAW socket clients. By accepting we might send 2373 * replies back, change our MTU value etc., 2374 * but delivery to the ULP/clients depends on their 2375 * policy dispositions. 2376 */ 2377 if (ipst->ips_icmp_accept_clear_messages == 0) { 2378 mp = ipsec_check_global_policy(mp, NULL, 2379 NULL, ip6h, ira, ns); 2380 if (mp == NULL) 2381 return; 2382 } 2383 2384 /* 2385 * On a labeled system, we have to check whether the zone 2386 * itself is permitted to receive raw traffic. 2387 */ 2388 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2389 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2390 BUMP_MIB(ill->ill_icmp6_mib, 2391 ipv6IfIcmpInErrors); 2392 ip_drop_input("tsol_can_accept_raw", mp, ill); 2393 freemsg(mp); 2394 return; 2395 } 2396 } 2397 2398 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2399 mp = icmp_inbound_v6(mp, ira); 2400 if (mp == NULL) { 2401 /* No need to pass to RAW sockets */ 2402 return; 2403 } 2404 break; 2405 2406 case IPPROTO_DSTOPTS: { 2407 ip6_dest_t *desthdr; 2408 uint_t ehdrlen; 2409 uint8_t *optptr; 2410 2411 /* We already check for MIN_EHDR_LEN above */ 2412 2413 /* Check if AH is present and needs to be processed. */ 2414 mp = ipsec_early_ah_v6(mp, ira); 2415 if (mp == NULL) 2416 return; 2417 2418 /* 2419 * Reinitialize pointers, as ipsec_early_ah_v6() does 2420 * complete pullups. We don't have to do more pullups 2421 * as a result. 2422 */ 2423 ip6h = (ip6_t *)mp->b_rptr; 2424 2425 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN) 2426 goto pkt_too_short; 2427 2428 if (mp->b_cont != NULL && 2429 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) { 2430 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira); 2431 if (ip6h == NULL) 2432 goto discard; 2433 } 2434 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length); 2435 ehdrlen = 8 * (desthdr->ip6d_len + 1); 2436 if (ira->ira_pktlen - ip_hdr_length < ehdrlen) 2437 goto pkt_too_short; 2438 if (mp->b_cont != NULL && 2439 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 2440 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 2441 if (ip6h == NULL) 2442 goto discard; 2443 2444 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length); 2445 } 2446 optptr = (uint8_t *)&desthdr[1]; 2447 2448 /* 2449 * Update ira_ip_hdr_length to skip the destination header 2450 * when we repeat. 2451 */ 2452 ira->ira_ip_hdr_length += ehdrlen; 2453 2454 ira->ira_protocol = desthdr->ip6d_nxt; 2455 2456 /* 2457 * Note: XXX This code does not seem to make 2458 * distinction between Destination Options Header 2459 * being before/after Routing Header which can 2460 * happen if we are at the end of source route. 2461 * This may become significant in future. 2462 * (No real significant Destination Options are 2463 * defined/implemented yet ). 2464 */ 2465 switch (ip_process_options_v6(mp, ip6h, optptr, 2466 ehdrlen - 2, IPPROTO_DSTOPTS, ira)) { 2467 case -1: 2468 /* 2469 * Packet has been consumed and any needed 2470 * ICMP errors sent. 2471 */ 2472 return; 2473 case 0: 2474 /* No action needed continue */ 2475 break; 2476 case 1: 2477 /* 2478 * Unnexpected return value 2479 * (Router alert is a Hop-by-Hop option) 2480 */ 2481 #ifdef DEBUG 2482 panic("ip_fanout_v6: router " 2483 "alert hbh opt indication in dest opt"); 2484 /*NOTREACHED*/ 2485 #else 2486 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2487 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2488 freemsg(mp); 2489 return; 2490 #endif 2491 } 2492 goto repeat; 2493 } 2494 case IPPROTO_FRAGMENT: { 2495 ip6_frag_t *fraghdr; 2496 2497 if (ira->ira_pktlen - ip_hdr_length < sizeof (ip6_frag_t)) 2498 goto pkt_too_short; 2499 2500 if (mp->b_cont != NULL && 2501 rptr + ip_hdr_length + sizeof (ip6_frag_t) > mp->b_wptr) { 2502 ip6h = ip_pullup(mp, 2503 ip_hdr_length + sizeof (ip6_frag_t), ira); 2504 if (ip6h == NULL) 2505 goto discard; 2506 } 2507 2508 fraghdr = (ip6_frag_t *)(rptr + ip_hdr_length); 2509 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 2510 2511 /* 2512 * Invoke the CGTP (multirouting) filtering module to 2513 * process the incoming packet. Packets identified as 2514 * duplicates must be discarded. Filtering is active 2515 * only if the ip_cgtp_filter ndd variable is 2516 * non-zero. 2517 */ 2518 if (ipst->ips_ip_cgtp_filter && 2519 ipst->ips_ip_cgtp_filter_ops != NULL) { 2520 int cgtp_flt_pkt; 2521 netstackid_t stackid; 2522 2523 stackid = ipst->ips_netstack->netstack_stackid; 2524 2525 /* 2526 * CGTP and IPMP are mutually exclusive so 2527 * phyint_ifindex is fine here. 2528 */ 2529 cgtp_flt_pkt = 2530 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 2531 stackid, ill->ill_phyint->phyint_ifindex, 2532 ip6h, fraghdr); 2533 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 2534 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill); 2535 freemsg(mp); 2536 return; 2537 } 2538 } 2539 2540 /* 2541 * Update ip_hdr_length to skip the frag header 2542 * ip_input_fragment_v6 will determine the extension header 2543 * prior to the fragment header and update its nexthdr value, 2544 * and also set ira_protocol to the nexthdr that follows the 2545 * completed fragment. 2546 */ 2547 ip_hdr_length += sizeof (ip6_frag_t); 2548 2549 /* 2550 * Make sure we have ira_l2src before we loose the original 2551 * mblk 2552 */ 2553 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 2554 ip_setl2src(mp, ira, ira->ira_rill); 2555 2556 mp = ip_input_fragment_v6(mp, ip6h, fraghdr, 2557 ira->ira_pktlen - ip_hdr_length, ira); 2558 if (mp == NULL) { 2559 /* Reassembly is still pending */ 2560 return; 2561 } 2562 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 2563 2564 /* 2565 * The mblk chain has the frag header removed and 2566 * ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the 2567 * IP header has been updated to refleact the result. 2568 */ 2569 ip6h = (ip6_t *)mp->b_rptr; 2570 ip_hdr_length = ira->ira_ip_hdr_length; 2571 goto repeat; 2572 } 2573 case IPPROTO_HOPOPTS: 2574 /* 2575 * Illegal header sequence. 2576 * (Hop-by-hop headers are processed above 2577 * and required to immediately follow IPv6 header) 2578 */ 2579 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 2580 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira); 2581 return; 2582 2583 case IPPROTO_ROUTING: { 2584 uint_t ehdrlen; 2585 ip6_rthdr_t *rthdr; 2586 2587 /* Check if AH is present and needs to be processed. */ 2588 mp = ipsec_early_ah_v6(mp, ira); 2589 if (mp == NULL) 2590 return; 2591 2592 /* 2593 * Reinitialize pointers, as ipsec_early_ah_v6() does 2594 * complete pullups. We don't have to do more pullups 2595 * as a result. 2596 */ 2597 ip6h = (ip6_t *)mp->b_rptr; 2598 2599 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN) 2600 goto pkt_too_short; 2601 2602 if (mp->b_cont != NULL && 2603 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) { 2604 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira); 2605 if (ip6h == NULL) 2606 goto discard; 2607 } 2608 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length); 2609 protocol = ira->ira_protocol = rthdr->ip6r_nxt; 2610 ehdrlen = 8 * (rthdr->ip6r_len + 1); 2611 if (ira->ira_pktlen - ip_hdr_length < ehdrlen) 2612 goto pkt_too_short; 2613 if (mp->b_cont != NULL && 2614 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 2615 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 2616 if (ip6h == NULL) 2617 goto discard; 2618 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length); 2619 } 2620 if (rthdr->ip6r_segleft != 0) { 2621 /* Not end of source route */ 2622 if (ira->ira_flags & 2623 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 2624 BUMP_MIB(ill->ill_ip_mib, 2625 ipIfStatsForwProhibits); 2626 ip_drop_input("ipIfStatsInForwProhibits", 2627 mp, ill); 2628 freemsg(mp); 2629 return; 2630 } 2631 ip_process_rthdr(mp, ip6h, rthdr, ira); 2632 return; 2633 } 2634 ira->ira_ip_hdr_length += ehdrlen; 2635 goto repeat; 2636 } 2637 2638 case IPPROTO_AH: 2639 case IPPROTO_ESP: { 2640 /* 2641 * Fast path for AH/ESP. 2642 */ 2643 netstack_t *ns = ipst->ips_netstack; 2644 ipsec_stack_t *ipss = ns->netstack_ipsec; 2645 2646 IP_STAT(ipst, ipsec_proto_ahesp); 2647 2648 if (!ipsec_loaded(ipss)) { 2649 ip_proto_not_sup(mp, ira); 2650 return; 2651 } 2652 2653 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2654 /* select inbound SA and have IPsec process the pkt */ 2655 if (protocol == IPPROTO_ESP) { 2656 esph_t *esph; 2657 2658 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 2659 if (mp == NULL) 2660 return; 2661 2662 ASSERT(esph != NULL); 2663 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2664 ASSERT(ira->ira_ipsec_esp_sa != NULL); 2665 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL); 2666 2667 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, 2668 ira); 2669 } else { 2670 ah_t *ah; 2671 2672 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 2673 if (mp == NULL) 2674 return; 2675 2676 ASSERT(ah != NULL); 2677 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2678 ASSERT(ira->ira_ipsec_ah_sa != NULL); 2679 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 2680 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, 2681 ira); 2682 } 2683 2684 if (mp == NULL) { 2685 /* 2686 * Either it failed or is pending. In the former case 2687 * ipIfStatsInDiscards was increased. 2688 */ 2689 return; 2690 } 2691 /* we're done with IPsec processing, send it up */ 2692 ip_input_post_ipsec(mp, ira); 2693 return; 2694 } 2695 case IPPROTO_NONE: 2696 /* All processing is done. Count as "delivered". */ 2697 freemsg(mp); 2698 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2699 return; 2700 2701 case IPPROTO_ENCAP: 2702 case IPPROTO_IPV6: 2703 /* iptun will verify trusted label */ 2704 connp = ipcl_classify_v6(mp, protocol, ip_hdr_length, 2705 ira, ipst); 2706 if (connp != NULL) { 2707 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2708 ira->ira_ill = ira->ira_rill = NULL; 2709 connp->conn_recv(connp, mp, NULL, ira); 2710 CONN_DEC_REF(connp); 2711 ira->ira_ill = ill; 2712 ira->ira_rill = rill; 2713 return; 2714 } 2715 /* FALLTHRU */ 2716 default: 2717 /* 2718 * On a labeled system, we have to check whether the zone 2719 * itself is permitted to receive raw traffic. 2720 */ 2721 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2722 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2723 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2724 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2725 freemsg(mp); 2726 return; 2727 } 2728 } 2729 break; 2730 } 2731 2732 /* 2733 * The above input functions may have returned the pulled up message. 2734 * So ip6h need to be reinitialized. 2735 */ 2736 ip6h = (ip6_t *)mp->b_rptr; 2737 ira->ira_protocol = protocol; 2738 if (ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head == NULL) { 2739 /* No user-level listener for these packets packets */ 2740 ip_proto_not_sup(mp, ira); 2741 return; 2742 } 2743 2744 /* 2745 * Handle fanout to raw sockets. There 2746 * can be more than one stream bound to a particular 2747 * protocol. When this is the case, each one gets a copy 2748 * of any incoming packets. 2749 */ 2750 ASSERT(ira->ira_protocol == protocol); 2751 ip_fanout_proto_v6(mp, ip6h, ira); 2752 return; 2753 2754 pkt_too_short: 2755 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 2756 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 2757 freemsg(mp); 2758 return; 2759 2760 discard: 2761 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2762 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2763 freemsg(mp); 2764 #undef rptr 2765 } 2766