1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/dlpi.h> 31 #include <sys/stropts.h> 32 #include <sys/sysmacros.h> 33 #include <sys/strsubr.h> 34 #include <sys/strlog.h> 35 #include <sys/strsun.h> 36 #include <sys/zone.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/xti_inet.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/cmn_err.h> 43 #include <sys/debug.h> 44 #include <sys/kobj.h> 45 #include <sys/modctl.h> 46 #include <sys/atomic.h> 47 #include <sys/policy.h> 48 #include <sys/priv.h> 49 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/kmem.h> 53 #include <sys/sdt.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/mac.h> 58 #include <net/if.h> 59 #include <net/if_arp.h> 60 #include <net/route.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <net/if_dl.h> 64 65 #include <inet/common.h> 66 #include <inet/mi.h> 67 #include <inet/mib2.h> 68 #include <inet/nd.h> 69 #include <inet/arp.h> 70 #include <inet/snmpcom.h> 71 #include <inet/kstatcom.h> 72 73 #include <netinet/igmp_var.h> 74 #include <netinet/ip6.h> 75 #include <netinet/icmp6.h> 76 #include <netinet/sctp.h> 77 78 #include <inet/ip.h> 79 #include <inet/ip_impl.h> 80 #include <inet/ip6.h> 81 #include <inet/ip6_asp.h> 82 #include <inet/optcom.h> 83 #include <inet/tcp.h> 84 #include <inet/tcp_impl.h> 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_ftable.h> 89 #include <inet/ip_rts.h> 90 #include <inet/ip_ndp.h> 91 #include <inet/ip_listutils.h> 92 #include <netinet/igmp.h> 93 #include <netinet/ip_mroute.h> 94 #include <inet/ipp_common.h> 95 96 #include <net/pfkeyv2.h> 97 #include <inet/sadb.h> 98 #include <inet/ipsec_impl.h> 99 #include <inet/ipdrop.h> 100 #include <inet/ip_netinfo.h> 101 #include <inet/ilb_ip.h> 102 #include <sys/squeue_impl.h> 103 #include <sys/squeue.h> 104 105 #include <sys/ethernet.h> 106 #include <net/if_types.h> 107 #include <sys/cpuvar.h> 108 109 #include <ipp/ipp.h> 110 #include <ipp/ipp_impl.h> 111 #include <ipp/ipgpc/ipgpc.h> 112 113 #include <sys/pattr.h> 114 #include <inet/ipclassifier.h> 115 #include <inet/sctp_ip.h> 116 #include <inet/sctp/sctp_impl.h> 117 #include <inet/udp_impl.h> 118 #include <sys/sunddi.h> 119 120 #include <sys/tsol/label.h> 121 #include <sys/tsol/tnet.h> 122 123 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */ 124 125 #ifdef DEBUG 126 extern boolean_t skip_sctp_cksum; 127 #endif 128 129 static void ip_input_local_v6(ire_t *, mblk_t *, ip6_t *, ip_recv_attr_t *); 130 131 static void ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *, 132 ip_recv_attr_t *); 133 134 #pragma inline(ip_input_common_v6, ip_input_local_v6, ip_forward_xmit_v6) 135 136 /* 137 * Direct read side procedure capable of dealing with chains. GLDv3 based 138 * drivers call this function directly with mblk chains while STREAMS 139 * read side procedure ip_rput() calls this for single packet with ip_ring 140 * set to NULL to process one packet at a time. 141 * 142 * The ill will always be valid if this function is called directly from 143 * the driver. 144 * 145 * If ip_input_v6() is called from GLDv3: 146 * 147 * - This must be a non-VLAN IP stream. 148 * - 'mp' is either an untagged or a special priority-tagged packet. 149 * - Any VLAN tag that was in the MAC header has been stripped. 150 * 151 * If the IP header in packet is not 32-bit aligned, every message in the 152 * chain will be aligned before further operations. This is required on SPARC 153 * platform. 154 */ 155 void 156 ip_input_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 157 struct mac_header_info_s *mhip) 158 { 159 (void) ip_input_common_v6(ill, ip_ring, mp_chain, mhip, NULL, NULL, 160 NULL); 161 } 162 163 /* 164 * ip_accept_tcp_v6() - This function is called by the squeue when it retrieves 165 * a chain of packets in the poll mode. The packets have gone through the 166 * data link processing but not IP processing. For performance and latency 167 * reasons, the squeue wants to process the chain in line instead of feeding 168 * it back via ip_input path. 169 * 170 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6 171 * will pass back any TCP packets matching the target sqp to 172 * ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by 173 * ip_input_v6 and ip_fanout_v6 as normal. 174 * The TCP packets that match the target squeue are returned to the caller 175 * as a b_next chain after each packet has been prepend with an mblk 176 * from ip_recv_attr_to_mblk. 177 */ 178 mblk_t * 179 ip_accept_tcp_v6(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, 180 mblk_t *mp_chain, mblk_t **last, uint_t *cnt) 181 { 182 return (ip_input_common_v6(ill, ip_ring, mp_chain, NULL, target_sqp, 183 last, cnt)); 184 } 185 186 /* 187 * Used by ip_input_v6 and ip_accept_tcp_v6 188 * The last three arguments are only used by ip_accept_tcp_v6, and mhip is 189 * only used by ip_input_v6. 190 */ 191 mblk_t * 192 ip_input_common_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 193 struct mac_header_info_s *mhip, squeue_t *target_sqp, 194 mblk_t **last, uint_t *cnt) 195 { 196 mblk_t *mp; 197 ip6_t *ip6h; 198 ip_recv_attr_t iras; /* Receive attributes */ 199 rtc_t rtc; 200 iaflags_t chain_flags = 0; /* Fixed for chain */ 201 mblk_t *ahead = NULL; /* Accepted head */ 202 mblk_t *atail = NULL; /* Accepted tail */ 203 uint_t acnt = 0; /* Accepted count */ 204 205 ASSERT(mp_chain != NULL); 206 ASSERT(ill != NULL); 207 208 /* These ones do not change as we loop over packets */ 209 iras.ira_ill = iras.ira_rill = ill; 210 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 211 iras.ira_rifindex = iras.ira_ruifindex; 212 iras.ira_sqp = NULL; 213 iras.ira_ring = ip_ring; 214 /* For ECMP and outbound transmit ring selection */ 215 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring); 216 217 iras.ira_target_sqp = target_sqp; 218 iras.ira_target_sqp_mp = NULL; 219 if (target_sqp != NULL) 220 chain_flags |= IRAF_TARGET_SQP; 221 222 /* 223 * We try to have a mhip pointer when possible, but 224 * it might be NULL in some cases. In those cases we 225 * have to assume unicast. 226 */ 227 iras.ira_mhip = mhip; 228 iras.ira_flags = 0; 229 if (mhip != NULL) { 230 switch (mhip->mhi_dsttype) { 231 case MAC_ADDRTYPE_MULTICAST : 232 chain_flags |= IRAF_L2DST_MULTICAST; 233 break; 234 case MAC_ADDRTYPE_BROADCAST : 235 chain_flags |= IRAF_L2DST_BROADCAST; 236 break; 237 } 238 } 239 240 /* 241 * Initialize the one-element route cache. 242 * 243 * We do ire caching from one iteration to 244 * another. In the event the packet chain contains 245 * all packets from the same dst, this caching saves 246 * an ire_route_recursive for each of the succeeding 247 * packets in a packet chain. 248 */ 249 rtc.rtc_ire = NULL; 250 rtc.rtc_ip6addr = ipv6_all_zeros; 251 252 /* Loop over b_next */ 253 for (mp = mp_chain; mp != NULL; mp = mp_chain) { 254 mp_chain = mp->b_next; 255 mp->b_next = NULL; 256 257 /* 258 * if db_ref > 1 then copymsg and free original. Packet 259 * may be changed and we do not want the other entity 260 * who has a reference to this message to trip over the 261 * changes. This is a blind change because trying to 262 * catch all places that might change the packet is too 263 * difficult. 264 * 265 * This corresponds to the fast path case, where we have 266 * a chain of M_DATA mblks. We check the db_ref count 267 * of only the 1st data block in the mblk chain. There 268 * doesn't seem to be a reason why a device driver would 269 * send up data with varying db_ref counts in the mblk 270 * chain. In any case the Fast path is a private 271 * interface, and our drivers don't do such a thing. 272 * Given the above assumption, there is no need to walk 273 * down the entire mblk chain (which could have a 274 * potential performance problem) 275 * 276 * The "(DB_REF(mp) > 1)" check was moved from ip_rput() 277 * to here because of exclusive ip stacks and vnics. 278 * Packets transmitted from exclusive stack over vnic 279 * can have db_ref > 1 and when it gets looped back to 280 * another vnic in a different zone, you have ip_input() 281 * getting dblks with db_ref > 1. So if someone 282 * complains of TCP performance under this scenario, 283 * take a serious look here on the impact of copymsg(). 284 */ 285 if (DB_REF(mp) > 1) { 286 if ((mp = ip_fix_dbref(mp, &iras)) == NULL) 287 continue; 288 } 289 290 /* 291 * IP header ptr not aligned? 292 * OR IP header not complete in first mblk 293 */ 294 ip6h = (ip6_t *)mp->b_rptr; 295 if (!OK_32PTR(ip6h) || MBLKL(mp) < IPV6_HDR_LEN) { 296 mp = ip_check_and_align_header(mp, IPV6_HDR_LEN, &iras); 297 if (mp == NULL) 298 continue; 299 ip6h = (ip6_t *)mp->b_rptr; 300 } 301 302 /* Protect against a mix of Ethertypes and IP versions */ 303 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) { 304 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 305 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 306 freemsg(mp); 307 /* mhip might point into 1st packet in the chain. */ 308 iras.ira_mhip = NULL; 309 continue; 310 } 311 312 /* 313 * Check for Martian addrs; we have to explicitly 314 * test for for zero dst since this is also used as 315 * an indication that the rtc is not used. 316 */ 317 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_dst)) { 318 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 319 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 320 freemsg(mp); 321 /* mhip might point into 1st packet in the chain. */ 322 iras.ira_mhip = NULL; 323 continue; 324 } 325 /* 326 * Keep L2SRC from a previous packet in chain since mhip 327 * might point into an earlier packet in the chain. 328 */ 329 chain_flags |= (iras.ira_flags & IRAF_L2SRC_SET); 330 331 iras.ira_flags = IRAF_VERIFY_ULP_CKSUM | chain_flags; 332 iras.ira_free_flags = 0; 333 iras.ira_cred = NULL; 334 iras.ira_cpid = NOPID; 335 iras.ira_tsl = NULL; 336 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */ 337 338 /* 339 * We must count all incoming packets, even if they end 340 * up being dropped later on. Defer counting bytes until 341 * we have the whole IP header in first mblk. 342 */ 343 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 344 345 iras.ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 346 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 347 iras.ira_pktlen); 348 349 /* 350 * Call one of: 351 * ill_input_full_v6 352 * ill_input_short_v6 353 * The former is used in the case of TX. See ill_set_inputfn(). 354 */ 355 (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc); 356 357 /* Any references to clean up? No hold on ira_ill */ 358 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 359 ira_cleanup(&iras, B_FALSE); 360 361 if (iras.ira_target_sqp_mp != NULL) { 362 /* Better be called from ip_accept_tcp */ 363 ASSERT(target_sqp != NULL); 364 365 /* Found one packet to accept */ 366 mp = iras.ira_target_sqp_mp; 367 iras.ira_target_sqp_mp = NULL; 368 ASSERT(ip_recv_attr_is_mblk(mp)); 369 370 if (atail != NULL) 371 atail->b_next = mp; 372 else 373 ahead = mp; 374 atail = mp; 375 acnt++; 376 mp = NULL; 377 } 378 /* mhip might point into 1st packet in the chain. */ 379 iras.ira_mhip = NULL; 380 } 381 /* Any remaining references to the route cache? */ 382 if (rtc.rtc_ire != NULL) { 383 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr)); 384 ire_refrele(rtc.rtc_ire); 385 } 386 387 if (ahead != NULL) { 388 /* Better be called from ip_accept_tcp */ 389 ASSERT(target_sqp != NULL); 390 *last = atail; 391 *cnt = acnt; 392 return (ahead); 393 } 394 395 return (NULL); 396 } 397 398 /* 399 * This input function is used when 400 * - is_system_labeled() 401 * 402 * Note that for IPv6 CGTP filtering is handled only when receiving fragment 403 * headers, and RSVP uses router alert options, thus we don't need anything 404 * extra for them. 405 */ 406 void 407 ill_input_full_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg, 408 ip_recv_attr_t *ira, rtc_t *rtc) 409 { 410 ip6_t *ip6h = (ip6_t *)iph_arg; 411 in6_addr_t *nexthop = (in6_addr_t *)nexthop_arg; 412 ill_t *ill = ira->ira_ill; 413 414 ASSERT(ira->ira_tsl == NULL); 415 416 /* 417 * Attach any necessary label information to 418 * this packet 419 */ 420 if (is_system_labeled()) { 421 ira->ira_flags |= IRAF_SYSTEM_LABELED; 422 423 /* 424 * This updates ira_cred, ira_tsl and ira_free_flags based 425 * on the label. 426 */ 427 if (!tsol_get_pkt_label(mp, IPV6_VERSION, ira)) { 428 if (ip6opt_ls != 0) 429 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 430 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 431 ip_drop_input("ipIfStatsInDiscards", mp, ill); 432 freemsg(mp); 433 return; 434 } 435 /* Note that ira_tsl can be NULL here. */ 436 437 /* tsol_get_pkt_label sometimes does pullupmsg */ 438 ip6h = (ip6_t *)mp->b_rptr; 439 } 440 ill_input_short_v6(mp, ip6h, nexthop, ira, rtc); 441 } 442 443 /* 444 * Check for IPv6 addresses that should not appear on the wire 445 * as either source or destination. 446 * If we ever implement Stateless IPv6 Translators (SIIT) we'd have 447 * to revisit the IPv4-mapped part. 448 */ 449 static boolean_t 450 ip6_bad_address(in6_addr_t *addr, boolean_t is_src) 451 { 452 if (IN6_IS_ADDR_V4MAPPED(addr)) { 453 ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr")); 454 return (B_TRUE); 455 } 456 if (IN6_IS_ADDR_LOOPBACK(addr)) { 457 ip1dbg(("ip_input_v6: pkt with loopback addr")); 458 return (B_TRUE); 459 } 460 if (!is_src && IN6_IS_ADDR_UNSPECIFIED(addr)) { 461 /* 462 * having :: in the src is ok: it's used for DAD. 463 */ 464 ip1dbg(("ip_input_v6: pkt with unspecified addr")); 465 return (B_TRUE); 466 } 467 return (B_FALSE); 468 } 469 470 /* 471 * Routing lookup for IPv6 link-locals. 472 * First we look on the inbound interface, then we check for IPMP and 473 * look on the upper interface. 474 * We update ira_ruifindex if we find the IRE on the upper interface. 475 */ 476 static ire_t * 477 ire_linklocal(const in6_addr_t *nexthop, ill_t *ill, ip_recv_attr_t *ira, 478 uint_t irr_flags, ip_stack_t *ipst) 479 { 480 int match_flags = MATCH_IRE_SECATTR | MATCH_IRE_ILL; 481 ire_t *ire; 482 483 ASSERT(IN6_IS_ADDR_LINKLOCAL(nexthop)); 484 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl, 485 match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 486 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 487 !IS_UNDER_IPMP(ill)) 488 return (ire); 489 490 /* 491 * When we are using IMP we need to look for an IRE on both the 492 * under and upper interfaces since there are different 493 * link-local addresses for the under and upper. 494 */ 495 ill = ipmp_ill_hold_ipmp_ill(ill); 496 if (ill == NULL) 497 return (ire); 498 499 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 500 501 ire_refrele(ire); 502 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl, 503 match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 504 ill_refrele(ill); 505 return (ire); 506 } 507 508 /* 509 * This is the tail-end of the full receive side packet handling. 510 * It can be used directly when the configuration is simple. 511 */ 512 void 513 ill_input_short_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg, 514 ip_recv_attr_t *ira, rtc_t *rtc) 515 { 516 ire_t *ire; 517 ill_t *ill = ira->ira_ill; 518 ip_stack_t *ipst = ill->ill_ipst; 519 uint_t pkt_len; 520 ssize_t len; 521 ip6_t *ip6h = (ip6_t *)iph_arg; 522 in6_addr_t nexthop = *(in6_addr_t *)nexthop_arg; 523 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; 524 uint_t irr_flags; 525 #define rptr ((uchar_t *)ip6h) 526 527 ASSERT(DB_TYPE(mp) == M_DATA); 528 529 /* 530 * Check for source/dest being a bad address: loopback, any, or 531 * v4mapped. All of them start with a 64 bits of zero. 532 */ 533 if (ip6h->ip6_src.s6_addr32[0] == 0 && 534 ip6h->ip6_src.s6_addr32[1] == 0) { 535 if (ip6_bad_address(&ip6h->ip6_src, B_TRUE)) { 536 ip1dbg(("ip_input_v6: pkt with bad src addr\n")); 537 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 538 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 539 freemsg(mp); 540 return; 541 } 542 } 543 if (ip6h->ip6_dst.s6_addr32[0] == 0 && 544 ip6h->ip6_dst.s6_addr32[1] == 0) { 545 if (ip6_bad_address(&ip6h->ip6_dst, B_FALSE)) { 546 ip1dbg(("ip_input_v6: pkt with bad dst addr\n")); 547 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 548 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 549 freemsg(mp); 550 return; 551 } 552 } 553 554 len = mp->b_wptr - rptr; 555 pkt_len = ira->ira_pktlen; 556 557 /* multiple mblk or too short */ 558 len -= pkt_len; 559 if (len != 0) { 560 mp = ip_check_length(mp, rptr, len, pkt_len, IPV6_HDR_LEN, ira); 561 if (mp == NULL) 562 return; 563 ip6h = (ip6_t *)mp->b_rptr; 564 } 565 566 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 567 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 568 int, 0); 569 /* 570 * The event for packets being received from a 'physical' 571 * interface is placed after validation of the source and/or 572 * destination address as being local so that packets can be 573 * redirected to loopback addresses using ipnat. 574 */ 575 DTRACE_PROBE4(ip6__physical__in__start, 576 ill_t *, ill, ill_t *, NULL, 577 ip6_t *, ip6h, mblk_t *, mp); 578 579 if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) { 580 int ll_multicast = 0; 581 int error; 582 in6_addr_t orig_dst = ip6h->ip6_dst; 583 584 if (ira->ira_flags & IRAF_L2DST_MULTICAST) 585 ll_multicast = HPE_MULTICAST; 586 else if (ira->ira_flags & IRAF_L2DST_BROADCAST) 587 ll_multicast = HPE_BROADCAST; 588 589 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 590 ipst->ips_ipv6firewall_physical_in, 591 ill, NULL, ip6h, mp, mp, ll_multicast, ipst, error); 592 593 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, mp); 594 595 if (mp == NULL) 596 return; 597 598 /* The length could have changed */ 599 ip6h = (ip6_t *)mp->b_rptr; 600 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 601 pkt_len = ira->ira_pktlen; 602 603 /* 604 * In case the destination changed we override any previous 605 * change to nexthop. 606 */ 607 if (!IN6_ARE_ADDR_EQUAL(&orig_dst, &ip6h->ip6_dst)) 608 nexthop = ip6h->ip6_dst; 609 610 if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) { 611 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 612 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 613 freemsg(mp); 614 return; 615 } 616 617 } 618 619 if (ipst->ips_ip6_observe.he_interested) { 620 zoneid_t dzone; 621 622 /* 623 * On the inbound path the src zone will be unknown as 624 * this packet has come from the wire. 625 */ 626 dzone = ip_get_zoneid_v6(&nexthop, mp, ill, ira, ALL_ZONES); 627 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst); 628 } 629 630 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) != 631 IPV6_DEFAULT_VERS_AND_FLOW) { 632 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 633 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 634 ip_drop_input("ipIfStatsInWrongIPVersion", mp, ill); 635 freemsg(mp); 636 return; 637 } 638 639 /* 640 * For IPv6 we update ira_ip_hdr_length and ira_protocol as 641 * we parse the headers, starting with the hop-by-hop options header. 642 */ 643 ira->ira_ip_hdr_length = IPV6_HDR_LEN; 644 if ((ira->ira_protocol = ip6h->ip6_nxt) == IPPROTO_HOPOPTS) { 645 ip6_hbh_t *hbhhdr; 646 uint_t ehdrlen; 647 uint8_t *optptr; 648 649 if (pkt_len < IPV6_HDR_LEN + MIN_EHDR_LEN) { 650 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 651 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 652 freemsg(mp); 653 return; 654 } 655 if (mp->b_cont != NULL && 656 rptr + IPV6_HDR_LEN + MIN_EHDR_LEN > mp->b_wptr) { 657 ip6h = ip_pullup(mp, IPV6_HDR_LEN + MIN_EHDR_LEN, ira); 658 if (ip6h == NULL) { 659 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 660 ip_drop_input("ipIfStatsInDiscards", mp, ill); 661 freemsg(mp); 662 return; 663 } 664 } 665 hbhhdr = (ip6_hbh_t *)&ip6h[1]; 666 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 667 668 if (pkt_len < IPV6_HDR_LEN + ehdrlen) { 669 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 670 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 671 freemsg(mp); 672 return; 673 } 674 if (mp->b_cont != NULL && 675 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 676 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 677 if (ip6h == NULL) { 678 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 679 ip_drop_input("ipIfStatsInDiscards", mp, ill); 680 freemsg(mp); 681 return; 682 } 683 hbhhdr = (ip6_hbh_t *)&ip6h[1]; 684 } 685 686 /* 687 * Update ira_ip_hdr_length to skip the hop-by-hop header 688 * once we get to ip_fanout_v6 689 */ 690 ira->ira_ip_hdr_length += ehdrlen; 691 ira->ira_protocol = hbhhdr->ip6h_nxt; 692 693 optptr = (uint8_t *)&hbhhdr[1]; 694 switch (ip_process_options_v6(mp, ip6h, optptr, 695 ehdrlen - 2, IPPROTO_HOPOPTS, ira)) { 696 case -1: 697 /* 698 * Packet has been consumed and any 699 * needed ICMP messages sent. 700 */ 701 return; 702 case 0: 703 /* no action needed */ 704 break; 705 case 1: 706 /* 707 * Known router alert. Make use handle it as local 708 * by setting the nexthop to be the all-host multicast 709 * address, and skip multicast membership filter by 710 * marking as a router alert. 711 */ 712 ira->ira_flags |= IRAF_ROUTER_ALERT; 713 nexthop = ipv6_all_hosts_mcast; 714 break; 715 } 716 } 717 718 /* 719 * Here we check to see if we machine is setup as 720 * L3 loadbalancer and if the incoming packet is for a VIP 721 * 722 * Check the following: 723 * - there is at least a rule 724 * - protocol of the packet is supported 725 * 726 * We don't load balance IPv6 link-locals. 727 */ 728 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ira->ira_protocol) && 729 !IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 730 in6_addr_t lb_dst; 731 int lb_ret; 732 733 /* For convenience, we just pull up the mblk. */ 734 if (mp->b_cont != NULL) { 735 if (pullupmsg(mp, -1) == 0) { 736 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 737 ip_drop_input("ipIfStatsInDiscards - pullupmsg", 738 mp, ill); 739 freemsg(mp); 740 return; 741 } 742 ip6h = (ip6_t *)mp->b_rptr; 743 } 744 lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, ira->ira_protocol, 745 (uint8_t *)ip6h + ira->ira_ip_hdr_length, &lb_dst); 746 if (lb_ret == ILB_DROPPED) { 747 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 748 ip_drop_input("ILB_DROPPED", mp, ill); 749 freemsg(mp); 750 return; 751 } 752 if (lb_ret == ILB_BALANCED) { 753 /* Set the dst to that of the chosen server */ 754 nexthop = lb_dst; 755 DB_CKSUMFLAGS(mp) = 0; 756 } 757 } 758 759 if (ill->ill_flags & ILLF_ROUTER) 760 irr_flags = IRR_ALLOCATE; 761 else 762 irr_flags = IRR_NONE; 763 764 /* Can not use route cache with TX since the labels can differ */ 765 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 766 if (IN6_IS_ADDR_MULTICAST(&nexthop)) { 767 ire = ire_multicast(ill); 768 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 769 ire = ire_linklocal(&nexthop, ill, ira, irr_flags, 770 ipst); 771 } else { 772 /* Match destination and label */ 773 ire = ire_route_recursive_v6(&nexthop, 0, NULL, 774 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR, 775 irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, 776 NULL); 777 } 778 /* Update the route cache so we do the ire_refrele */ 779 ASSERT(ire != NULL); 780 if (rtc->rtc_ire != NULL) 781 ire_refrele(rtc->rtc_ire); 782 rtc->rtc_ire = ire; 783 rtc->rtc_ip6addr = nexthop; 784 } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr)) { 785 /* Use the route cache */ 786 ASSERT(rtc->rtc_ire != NULL); 787 ire = rtc->rtc_ire; 788 } else { 789 /* Update the route cache */ 790 if (IN6_IS_ADDR_MULTICAST(&nexthop)) { 791 ire = ire_multicast(ill); 792 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 793 ire = ire_linklocal(&nexthop, ill, ira, irr_flags, 794 ipst); 795 } else { 796 ire = ire_route_recursive_dstonly_v6(&nexthop, 797 irr_flags, ira->ira_xmit_hint, ipst); 798 } 799 ASSERT(ire != NULL); 800 if (rtc->rtc_ire != NULL) 801 ire_refrele(rtc->rtc_ire); 802 rtc->rtc_ire = ire; 803 rtc->rtc_ip6addr = nexthop; 804 } 805 806 ire->ire_ib_pkt_count++; 807 808 /* 809 * Based on ire_type and ire_flags call one of: 810 * ire_recv_local_v6 - for IRE_LOCAL 811 * ire_recv_loopback_v6 - for IRE_LOOPBACK 812 * ire_recv_multirt_v6 - if RTF_MULTIRT 813 * ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE 814 * ire_recv_multicast_v6 - for IRE_MULTICAST 815 * ire_recv_noaccept_v6 - for ire_noaccept ones 816 * ire_recv_forward_v6 - for the rest. 817 */ 818 819 (*ire->ire_recvfn)(ire, mp, ip6h, ira); 820 } 821 #undef rptr 822 823 /* 824 * ire_recvfn for IREs that need forwarding 825 */ 826 void 827 ire_recv_forward_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 828 { 829 ip6_t *ip6h = (ip6_t *)iph_arg; 830 ill_t *ill = ira->ira_ill; 831 ip_stack_t *ipst = ill->ill_ipst; 832 iaflags_t iraflags = ira->ira_flags; 833 ill_t *dst_ill; 834 nce_t *nce; 835 uint32_t added_tx_len; 836 uint32_t mtu, iremtu; 837 838 if (iraflags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 839 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 840 ip_drop_input("l2 multicast not forwarded", mp, ill); 841 freemsg(mp); 842 return; 843 } 844 845 if (!(ill->ill_flags & ILLF_ROUTER)) { 846 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 847 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 848 freemsg(mp); 849 return; 850 } 851 852 /* 853 * Either ire_nce_capable or ire_dep_parent would be set for the IRE 854 * when it is found by ire_route_recursive, but that some other thread 855 * could have changed the routes with the effect of clearing 856 * ire_dep_parent. In that case we'd end up dropping the packet, or 857 * finding a new nce below. 858 * Get, allocate, or update the nce. 859 * We get a refhold on ire_nce_cache as a result of this to avoid races 860 * where ire_nce_cache is deleted. 861 * 862 * This ensures that we don't forward if the interface is down since 863 * ipif_down removes all the nces. 864 */ 865 mutex_enter(&ire->ire_lock); 866 nce = ire->ire_nce_cache; 867 if (nce == NULL) { 868 /* Not yet set up - try to set one up */ 869 mutex_exit(&ire->ire_lock); 870 (void) ire_revalidate_nce(ire); 871 mutex_enter(&ire->ire_lock); 872 nce = ire->ire_nce_cache; 873 if (nce == NULL) { 874 mutex_exit(&ire->ire_lock); 875 /* The ire_dep_parent chain went bad, or no memory */ 876 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 877 ip_drop_input("No ire_dep_parent", mp, ill); 878 freemsg(mp); 879 return; 880 } 881 } 882 nce_refhold(nce); 883 mutex_exit(&ire->ire_lock); 884 885 if (nce->nce_is_condemned) { 886 nce_t *nce1; 887 888 nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_FALSE); 889 nce_refrele(nce); 890 if (nce1 == NULL) { 891 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 892 ip_drop_input("No nce", mp, ill); 893 freemsg(mp); 894 return; 895 } 896 nce = nce1; 897 } 898 dst_ill = nce->nce_ill; 899 900 /* 901 * Unless we are forwarding, drop the packet. 902 * Unlike IPv4 we don't allow source routed packets out the same 903 * interface when we are not a router. 904 * Note that ill_forward_set() will set the ILLF_ROUTER on 905 * all the group members when it gets an ipmp-ill or under-ill. 906 */ 907 if (!(dst_ill->ill_flags & ILLF_ROUTER)) { 908 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 909 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 910 freemsg(mp); 911 nce_refrele(nce); 912 return; 913 } 914 915 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) { 916 ire->ire_ib_pkt_count--; 917 /* 918 * Should only use IREs that are visible from the 919 * global zone for forwarding. 920 * For IPv6 any source route would have already been 921 * advanced in ip_fanout_v6 922 */ 923 ire = ire_route_recursive_v6(&ip6h->ip6_dst, 0, NULL, 924 GLOBAL_ZONEID, ira->ira_tsl, MATCH_IRE_SECATTR, 925 (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE, 926 ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 927 ire->ire_ib_pkt_count++; 928 (*ire->ire_recvfn)(ire, mp, ip6h, ira); 929 ire_refrele(ire); 930 nce_refrele(nce); 931 return; 932 } 933 /* 934 * ipIfStatsHCInForwDatagrams should only be increment if there 935 * will be an attempt to forward the packet, which is why we 936 * increment after the above condition has been checked. 937 */ 938 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 939 940 /* Initiate Read side IPPF processing */ 941 if (IPP_ENABLED(IPP_FWD_IN, ipst)) { 942 /* ip_process translates an IS_UNDER_IPMP */ 943 mp = ip_process(IPP_FWD_IN, mp, ill, ill); 944 if (mp == NULL) { 945 /* ip_drop_packet and MIB done */ 946 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred " 947 "during IPPF processing\n")); 948 nce_refrele(nce); 949 return; 950 } 951 } 952 953 DTRACE_PROBE4(ip6__forwarding__start, 954 ill_t *, ill, ill_t *, dst_ill, ip6_t *, ip6h, mblk_t *, mp); 955 956 if (HOOKS6_INTERESTED_FORWARDING(ipst)) { 957 int error; 958 959 FW_HOOKS(ipst->ips_ip6_forwarding_event, 960 ipst->ips_ipv6firewall_forwarding, 961 ill, dst_ill, ip6h, mp, mp, 0, ipst, error); 962 963 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 964 965 if (mp == NULL) { 966 nce_refrele(nce); 967 return; 968 } 969 /* 970 * Even if the destination was changed by the filter we use the 971 * forwarding decision that was made based on the address 972 * in ip_input. 973 */ 974 975 /* Might have changed */ 976 ip6h = (ip6_t *)mp->b_rptr; 977 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 978 } 979 980 /* Packet is being forwarded. Turning off hwcksum flag. */ 981 DB_CKSUMFLAGS(mp) = 0; 982 983 /* 984 * Per RFC 3513 section 2.5.2, we must not forward packets with 985 * an unspecified source address. 986 * The loopback address check for both src and dst has already 987 * been checked in ip_input_v6 988 * In the future one can envision adding RPF checks using number 3. 989 */ 990 switch (ipst->ips_src_check) { 991 case 0: 992 break; 993 case 1: 994 case 2: 995 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) || 996 IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) { 997 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 998 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 999 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1000 nce_refrele(nce); 1001 freemsg(mp); 1002 return; 1003 } 1004 break; 1005 } 1006 1007 /* 1008 * Check to see if we're forwarding the packet to a 1009 * different link from which it came. If so, check the 1010 * source and destination addresses since routers must not 1011 * forward any packets with link-local source or 1012 * destination addresses to other links. Otherwise (if 1013 * we're forwarding onto the same link), conditionally send 1014 * a redirect message. 1015 */ 1016 if (!IS_ON_SAME_LAN(dst_ill, ill)) { 1017 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 1018 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 1019 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1020 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1021 freemsg(mp); 1022 nce_refrele(nce); 1023 return; 1024 } 1025 /* TBD add site-local check at site boundary? */ 1026 } else if (ipst->ips_ipv6_send_redirects) { 1027 ip_send_potential_redirect_v6(mp, ip6h, ire, ira); 1028 } 1029 1030 added_tx_len = 0; 1031 if (iraflags & IRAF_SYSTEM_LABELED) { 1032 mblk_t *mp1; 1033 uint32_t old_pkt_len = ira->ira_pktlen; 1034 1035 /* 1036 * Check if it can be forwarded and add/remove 1037 * CIPSO options as needed. 1038 */ 1039 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) { 1040 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1041 ip_drop_input("tsol_ip_forward", mp, ill); 1042 freemsg(mp); 1043 nce_refrele(nce); 1044 return; 1045 } 1046 /* 1047 * Size may have changed. Remember amount added in case 1048 * ip_fragment needs to send an ICMP too big. 1049 */ 1050 mp = mp1; 1051 ip6h = (ip6_t *)mp->b_rptr; 1052 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 1053 ira->ira_ip_hdr_length = IPV6_HDR_LEN; 1054 if (ira->ira_pktlen > old_pkt_len) 1055 added_tx_len = ira->ira_pktlen - old_pkt_len; 1056 } 1057 1058 mtu = dst_ill->ill_mtu; 1059 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu) 1060 mtu = iremtu; 1061 ip_forward_xmit_v6(nce, mp, ip6h, ira, mtu, added_tx_len); 1062 nce_refrele(nce); 1063 return; 1064 1065 } 1066 1067 /* 1068 * Used for sending out unicast and multicast packets that are 1069 * forwarded. 1070 */ 1071 void 1072 ip_forward_xmit_v6(nce_t *nce, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira, 1073 uint32_t mtu, uint32_t added_tx_len) 1074 { 1075 ill_t *dst_ill = nce->nce_ill; 1076 uint32_t pkt_len; 1077 iaflags_t iraflags = ira->ira_flags; 1078 ip_stack_t *ipst = dst_ill->ill_ipst; 1079 1080 if (ip6h->ip6_hops-- <= 1) { 1081 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1082 ip_drop_input("ICMP6_TIME_EXCEED_TRANSIT", mp, ira->ira_ill); 1083 icmp_time_exceeded_v6(mp, ICMP6_TIME_EXCEED_TRANSIT, B_FALSE, 1084 ira); 1085 return; 1086 } 1087 1088 /* Initiate Write side IPPF processing before any fragmentation */ 1089 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { 1090 /* ip_process translates an IS_UNDER_IPMP */ 1091 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill); 1092 if (mp == NULL) { 1093 /* ip_drop_packet and MIB done */ 1094 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred" \ 1095 " during IPPF processing\n")); 1096 return; 1097 } 1098 } 1099 1100 pkt_len = ira->ira_pktlen; 1101 1102 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 1103 1104 if (pkt_len > mtu) { 1105 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails); 1106 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill); 1107 if (iraflags & IRAF_SYSTEM_LABELED) { 1108 /* 1109 * Remove any CIPSO option added by 1110 * tsol_ip_forward, and make sure we report 1111 * a path MTU so that there 1112 * is room to add such a CIPSO option for future 1113 * packets. 1114 */ 1115 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, AF_INET6); 1116 } 1117 icmp_pkt2big_v6(mp, mtu, B_TRUE, ira); 1118 return; 1119 } 1120 1121 ASSERT(pkt_len == 1122 ntohs(((ip6_t *)mp->b_rptr)->ip6_plen) + IPV6_HDR_LEN); 1123 1124 if (iraflags & IRAF_LOOPBACK_COPY) { 1125 /* 1126 * IXAF_NO_LOOP_ZONEID is not set hence 6th arg 1127 * is don't care 1128 */ 1129 (void) ip_postfrag_loopcheck(mp, nce, 1130 (IXAF_LOOPBACK_COPY | IXAF_NO_DEV_FLOW_CTL), 1131 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1132 } else { 1133 (void) ip_xmit(mp, nce, IXAF_NO_DEV_FLOW_CTL, 1134 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1135 } 1136 } 1137 1138 /* 1139 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE, 1140 * which is what ire_route_recursive returns when there is no matching ire. 1141 * Send ICMP unreachable unless blackhole. 1142 */ 1143 void 1144 ire_recv_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1145 { 1146 ip6_t *ip6h = (ip6_t *)iph_arg; 1147 ill_t *ill = ira->ira_ill; 1148 ip_stack_t *ipst = ill->ill_ipst; 1149 1150 /* Would we have forwarded this packet if we had a route? */ 1151 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 1152 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1153 ip_drop_input("l2 multicast not forwarded", mp, ill); 1154 freemsg(mp); 1155 return; 1156 } 1157 1158 if (!(ill->ill_flags & ILLF_ROUTER)) { 1159 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1160 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 1161 freemsg(mp); 1162 return; 1163 } 1164 /* 1165 * If we had a route this could have been forwarded. Count as such. 1166 * 1167 * ipIfStatsHCInForwDatagrams should only be increment if there 1168 * will be an attempt to forward the packet, which is why we 1169 * increment after the above condition has been checked. 1170 */ 1171 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 1172 1173 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1174 1175 ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, RTA_DST, 1176 ipst); 1177 1178 if (ire->ire_flags & RTF_BLACKHOLE) { 1179 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill); 1180 freemsg(mp); 1181 } else { 1182 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill); 1183 1184 icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE, 1185 ira); 1186 } 1187 } 1188 1189 /* 1190 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for 1191 * VRRP when in noaccept mode. 1192 * We silently drop packets except for Neighbor Solicitations and 1193 * Neighbor Advertisements. 1194 */ 1195 void 1196 ire_recv_noaccept_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 1197 ip_recv_attr_t *ira) 1198 { 1199 ip6_t *ip6h = (ip6_t *)iph_arg; 1200 ill_t *ill = ira->ira_ill; 1201 icmp6_t *icmp6; 1202 int ip_hdr_length; 1203 1204 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 1205 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1206 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1207 freemsg(mp); 1208 return; 1209 } 1210 ip_hdr_length = ira->ira_ip_hdr_length; 1211 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) { 1212 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) { 1213 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 1214 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 1215 freemsg(mp); 1216 return; 1217 } 1218 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira); 1219 if (ip6h == NULL) { 1220 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1221 freemsg(mp); 1222 return; 1223 } 1224 } 1225 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 1226 1227 if (icmp6->icmp6_type != ND_NEIGHBOR_SOLICIT && 1228 icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) { 1229 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1230 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1231 freemsg(mp); 1232 return; 1233 } 1234 ire_recv_local_v6(ire, mp, ip6h, ira); 1235 } 1236 1237 /* 1238 * ire_recvfn for IRE_MULTICAST. 1239 */ 1240 void 1241 ire_recv_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 1242 ip_recv_attr_t *ira) 1243 { 1244 ip6_t *ip6h = (ip6_t *)iph_arg; 1245 ill_t *ill = ira->ira_ill; 1246 1247 ASSERT(ire->ire_ill == ira->ira_ill); 1248 1249 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 1250 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen); 1251 1252 /* Tag for higher-level protocols */ 1253 ira->ira_flags |= IRAF_MULTICAST; 1254 1255 /* 1256 * So that we don't end up with dups, only one ill an IPMP group is 1257 * nominated to receive multicast traffic. 1258 * If we have no cast_ill we are liberal and accept everything. 1259 */ 1260 if (IS_UNDER_IPMP(ill)) { 1261 ip_stack_t *ipst = ill->ill_ipst; 1262 1263 /* For an under ill_grp can change under lock */ 1264 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1265 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1266 ill->ill_grp->ig_cast_ill != NULL) { 1267 rw_exit(&ipst->ips_ill_g_lock); 1268 ip_drop_input("not on cast ill", mp, ill); 1269 freemsg(mp); 1270 return; 1271 } 1272 rw_exit(&ipst->ips_ill_g_lock); 1273 /* 1274 * We switch to the upper ill so that mrouter and hasmembers 1275 * can operate on upper here and in ip_input_multicast. 1276 */ 1277 ill = ipmp_ill_hold_ipmp_ill(ill); 1278 if (ill != NULL) { 1279 ASSERT(ill != ira->ira_ill); 1280 ASSERT(ire->ire_ill == ira->ira_ill); 1281 ira->ira_ill = ill; 1282 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1283 } else { 1284 ill = ira->ira_ill; 1285 } 1286 } 1287 1288 #ifdef notdef 1289 /* 1290 * Check if we are a multicast router - send ip_mforward a copy of 1291 * the packet. 1292 * Due to mroute_decap tunnels we consider forwarding packets even if 1293 * mrouted has not joined the allmulti group on this interface. 1294 */ 1295 if (ipst->ips_ip_g_mrouter) { 1296 int retval; 1297 1298 /* 1299 * Clear the indication that this may have hardware 1300 * checksum as we are not using it for forwarding. 1301 */ 1302 DB_CKSUMFLAGS(mp) = 0; 1303 1304 /* 1305 * ip_mforward helps us make these distinctions: If received 1306 * on tunnel and not IGMP, then drop. 1307 * If IGMP packet, then don't check membership 1308 * If received on a phyint and IGMP or PIM, then 1309 * don't check membership 1310 */ 1311 retval = ip_mforward_v6(mp, ira); 1312 /* ip_mforward updates mib variables if needed */ 1313 1314 switch (retval) { 1315 case 0: 1316 /* 1317 * pkt is okay and arrived on phyint. 1318 */ 1319 break; 1320 case -1: 1321 /* pkt is mal-formed, toss it */ 1322 freemsg(mp); 1323 goto done; 1324 case 1: 1325 /* 1326 * pkt is okay and arrived on a tunnel 1327 * 1328 * If we are running a multicast router 1329 * we need to see all mld packets, which 1330 * are marked with router alerts. 1331 */ 1332 if (ira->ira_flags & IRAF_ROUTER_ALERT) 1333 goto forus; 1334 ip_drop_input("Multicast on tunnel ignored", mp, ill); 1335 freemsg(mp); 1336 goto done; 1337 } 1338 } 1339 #endif /* notdef */ 1340 1341 /* 1342 * If this was a router alert we skip the group membership check. 1343 */ 1344 if (ira->ira_flags & IRAF_ROUTER_ALERT) 1345 goto forus; 1346 1347 /* 1348 * Check if we have members on this ill. This is not necessary for 1349 * correctness because even if the NIC/GLD had a leaky filter, we 1350 * filter before passing to each conn_t. 1351 */ 1352 if (!ill_hasmembers_v6(ill, &ip6h->ip6_dst)) { 1353 /* 1354 * Nobody interested 1355 * 1356 * This might just be caused by the fact that 1357 * multiple IP Multicast addresses map to the same 1358 * link layer multicast - no need to increment counter! 1359 */ 1360 ip_drop_input("Multicast with no members", mp, ill); 1361 freemsg(mp); 1362 goto done; 1363 } 1364 forus: 1365 ip2dbg(("ire_recv_multicast_v6: multicast for us\n")); 1366 1367 /* 1368 * After reassembly and IPsec we will need to duplicate the 1369 * multicast packet for all matching zones on the ill. 1370 */ 1371 ira->ira_zoneid = ALL_ZONES; 1372 1373 /* Reassemble on the ill on which the packet arrived */ 1374 ip_input_local_v6(ire, mp, ip6h, ira); 1375 done: 1376 if (ill != ire->ire_ill) { 1377 ill_refrele(ill); 1378 ira->ira_ill = ire->ire_ill; 1379 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 1380 } 1381 } 1382 1383 /* 1384 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT. 1385 * Drop packets since we don't forward out multirt routes. 1386 */ 1387 /* ARGSUSED */ 1388 void 1389 ire_recv_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1390 { 1391 ill_t *ill = ira->ira_ill; 1392 1393 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1394 ip_drop_input("Not forwarding out MULTIRT", mp, ill); 1395 freemsg(mp); 1396 } 1397 1398 /* 1399 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK 1400 * has rewritten the packet to have a loopback destination address (We 1401 * filter out packet with a loopback destination from arriving over the wire). 1402 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID. 1403 */ 1404 void 1405 ire_recv_loopback_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1406 { 1407 ip6_t *ip6h = (ip6_t *)iph_arg; 1408 ill_t *ill = ira->ira_ill; 1409 ill_t *ire_ill = ire->ire_ill; 1410 1411 ira->ira_zoneid = GLOBAL_ZONEID; 1412 1413 /* Switch to the lo0 ill for further processing */ 1414 if (ire_ill != ill) { 1415 /* 1416 * Update ira_ill to be the ILL on which the IP address 1417 * is hosted. 1418 * No need to hold the ill since we have a hold on the ire 1419 */ 1420 ASSERT(ira->ira_ill == ira->ira_rill); 1421 ira->ira_ill = ire_ill; 1422 1423 ip_input_local_v6(ire, mp, ip6h, ira); 1424 1425 /* Restore */ 1426 ASSERT(ira->ira_ill == ire_ill); 1427 ira->ira_ill = ill; 1428 return; 1429 1430 } 1431 ip_input_local_v6(ire, mp, ip6h, ira); 1432 } 1433 1434 /* 1435 * ire_recvfn for IRE_LOCAL. 1436 */ 1437 void 1438 ire_recv_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1439 { 1440 ip6_t *ip6h = (ip6_t *)iph_arg; 1441 ill_t *ill = ira->ira_ill; 1442 ill_t *ire_ill = ire->ire_ill; 1443 1444 /* Make a note for DAD that this address is in use */ 1445 ire->ire_last_used_time = LBOLT_FASTPATH; 1446 1447 /* Only target the IRE_LOCAL with the right zoneid. */ 1448 ira->ira_zoneid = ire->ire_zoneid; 1449 1450 /* 1451 * If the packet arrived on the wrong ill, we check that 1452 * this is ok. 1453 * If it is, then we ensure that we do the reassembly on 1454 * the ill on which the address is hosted. We keep ira_rill as 1455 * the one on which the packet arrived, so that IP_PKTINFO and 1456 * friends can report this. 1457 */ 1458 if (ire_ill != ill) { 1459 ire_t *new_ire; 1460 1461 new_ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill); 1462 if (new_ire == NULL) { 1463 /* Drop packet */ 1464 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1465 ip_drop_input("ipIfStatsInForwProhibits", mp, ill); 1466 freemsg(mp); 1467 return; 1468 } 1469 /* 1470 * Update ira_ill to be the ILL on which the IP address 1471 * is hosted. No need to hold the ill since we have a 1472 * hold on the ire. Note that we do the switch even if 1473 * new_ire == ire (for IPMP, ire would be the one corresponding 1474 * to the IPMP ill). 1475 */ 1476 ASSERT(ira->ira_ill == ira->ira_rill); 1477 ira->ira_ill = new_ire->ire_ill; 1478 1479 /* ira_ruifindex tracks the upper for ira_rill */ 1480 if (IS_UNDER_IPMP(ill)) 1481 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1482 1483 ip_input_local_v6(new_ire, mp, ip6h, ira); 1484 1485 /* Restore */ 1486 ASSERT(ira->ira_ill == new_ire->ire_ill); 1487 ira->ira_ill = ill; 1488 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1489 1490 if (new_ire != ire) 1491 ire_refrele(new_ire); 1492 return; 1493 } 1494 1495 ip_input_local_v6(ire, mp, ip6h, ira); 1496 } 1497 1498 /* 1499 * Common function for packets arriving for the host. Handles 1500 * checksum verification, reassembly checks, etc. 1501 */ 1502 static void 1503 ip_input_local_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1504 { 1505 iaflags_t iraflags = ira->ira_flags; 1506 1507 /* 1508 * For multicast we need some extra work before 1509 * we call ip_fanout_v6(), since in the case of shared-IP zones 1510 * we need to pretend that a packet arrived for each zoneid. 1511 */ 1512 if (iraflags & IRAF_MULTICAST) { 1513 ip_input_multicast_v6(ire, mp, ip6h, ira); 1514 return; 1515 } 1516 ip_fanout_v6(mp, ip6h, ira); 1517 } 1518 1519 /* 1520 * Handle multiple zones which want to receive the same multicast packets 1521 * on this ill by delivering a packet to each of them. 1522 * 1523 * Note that for packets delivered to transports we could instead do this 1524 * as part of the fanout code, but since we need to handle icmp_inbound 1525 * it is simpler to have multicast work the same as IPv4 broadcast. 1526 * 1527 * The ip_fanout matching for multicast matches based on ilm independent of 1528 * zoneid since the zoneid restriction is applied when joining a multicast 1529 * group. 1530 */ 1531 /* ARGSUSED */ 1532 static void 1533 ip_input_multicast_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1534 { 1535 ill_t *ill = ira->ira_ill; 1536 iaflags_t iraflags = ira->ira_flags; 1537 ip_stack_t *ipst = ill->ill_ipst; 1538 netstack_t *ns = ipst->ips_netstack; 1539 zoneid_t zoneid; 1540 mblk_t *mp1; 1541 ip6_t *ip6h1; 1542 uint_t ira_pktlen = ira->ira_pktlen; 1543 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1544 1545 /* ire_recv_multicast has switched to the upper ill for IPMP */ 1546 ASSERT(!IS_UNDER_IPMP(ill)); 1547 1548 /* 1549 * If we don't have more than one shared-IP zone, or if 1550 * there are no members in anything but the global zone, 1551 * then just set the zoneid and proceed. 1552 */ 1553 if (ns->netstack_numzones == 1 || 1554 !ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst, 1555 GLOBAL_ZONEID)) { 1556 ira->ira_zoneid = GLOBAL_ZONEID; 1557 1558 /* If sender didn't want this zone to receive it, drop */ 1559 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1560 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1561 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1562 freemsg(mp); 1563 return; 1564 } 1565 ip_fanout_v6(mp, ip6h, ira); 1566 return; 1567 } 1568 1569 /* 1570 * Here we loop over all zoneids that have members in the group 1571 * and deliver a packet to ip_fanout for each zoneid. 1572 * 1573 * First find any members in the lowest numeric zoneid by looking for 1574 * first zoneid larger than -1 (ALL_ZONES). 1575 * We terminate the loop when we receive -1 (ALL_ZONES). 1576 */ 1577 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 1578 for (; zoneid != ALL_ZONES; 1579 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, zoneid)) { 1580 /* 1581 * Avoid an extra copymsg/freemsg by skipping global zone here 1582 * and doing that at the end. 1583 */ 1584 if (zoneid == GLOBAL_ZONEID) 1585 continue; 1586 1587 ira->ira_zoneid = zoneid; 1588 1589 /* If sender didn't want this zone to receive it, skip */ 1590 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1591 ira->ira_no_loop_zoneid == ira->ira_zoneid) 1592 continue; 1593 1594 mp1 = copymsg(mp); 1595 if (mp1 == NULL) { 1596 /* Failed to deliver to one zone */ 1597 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1598 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1599 continue; 1600 } 1601 ip6h1 = (ip6_t *)mp1->b_rptr; 1602 ip_fanout_v6(mp1, ip6h1, ira); 1603 /* 1604 * IPsec might have modified ira_pktlen and ira_ip_hdr_length 1605 * so we restore them for a potential next iteration 1606 */ 1607 ira->ira_pktlen = ira_pktlen; 1608 ira->ira_ip_hdr_length = ira_ip_hdr_length; 1609 } 1610 1611 /* Do the main ire */ 1612 ira->ira_zoneid = GLOBAL_ZONEID; 1613 /* If sender didn't want this zone to receive it, drop */ 1614 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1615 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1616 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1617 freemsg(mp); 1618 } else { 1619 ip_fanout_v6(mp, ip6h, ira); 1620 } 1621 } 1622 1623 1624 /* 1625 * Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions 1626 * is in use. Updates ira_zoneid and ira_flags as a result. 1627 */ 1628 static void 1629 ip_fanout_tx_v6(mblk_t *mp, ip6_t *ip6h, uint8_t protocol, uint_t ip_hdr_length, 1630 ip_recv_attr_t *ira) 1631 { 1632 uint16_t *up; 1633 uint16_t lport; 1634 zoneid_t zoneid; 1635 1636 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED); 1637 1638 /* 1639 * If the packet is unlabeled we might allow read-down 1640 * for MAC_EXEMPT. Below we clear this if it is a multi-level 1641 * port (MLP). 1642 * Note that ira_tsl can be NULL here. 1643 */ 1644 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED) 1645 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE; 1646 1647 if (ira->ira_zoneid != ALL_ZONES) 1648 return; 1649 1650 ira->ira_flags |= IRAF_TX_SHARED_ADDR; 1651 1652 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length); 1653 switch (protocol) { 1654 case IPPROTO_TCP: 1655 case IPPROTO_SCTP: 1656 case IPPROTO_UDP: 1657 /* Caller ensures this */ 1658 ASSERT(((uchar_t *)ip6h) + ip_hdr_length +4 <= mp->b_wptr); 1659 1660 /* 1661 * Only these transports support MLP. 1662 * We know their destination port numbers is in 1663 * the same place in the header. 1664 */ 1665 lport = up[1]; 1666 1667 /* 1668 * No need to handle exclusive-stack zones 1669 * since ALL_ZONES only applies to the shared IP instance. 1670 */ 1671 zoneid = tsol_mlp_findzone(protocol, lport); 1672 /* 1673 * If no shared MLP is found, tsol_mlp_findzone returns 1674 * ALL_ZONES. In that case, we assume it's SLP, and 1675 * search for the zone based on the packet label. 1676 * 1677 * If there is such a zone, we prefer to find a 1678 * connection in it. Otherwise, we look for a 1679 * MAC-exempt connection in any zone whose label 1680 * dominates the default label on the packet. 1681 */ 1682 if (zoneid == ALL_ZONES) 1683 zoneid = tsol_attr_to_zoneid(ira); 1684 else 1685 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE; 1686 break; 1687 default: 1688 /* Handle shared address for other protocols */ 1689 zoneid = tsol_attr_to_zoneid(ira); 1690 break; 1691 } 1692 ira->ira_zoneid = zoneid; 1693 } 1694 1695 /* 1696 * Increment checksum failure statistics 1697 */ 1698 static void 1699 ip_input_cksum_err_v6(uint8_t protocol, uint16_t hck_flags, ill_t *ill) 1700 { 1701 ip_stack_t *ipst = ill->ill_ipst; 1702 1703 switch (protocol) { 1704 case IPPROTO_TCP: 1705 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 1706 1707 if (hck_flags & HCK_FULLCKSUM) 1708 IP6_STAT(ipst, ip6_tcp_in_full_hw_cksum_err); 1709 else if (hck_flags & HCK_PARTIALCKSUM) 1710 IP6_STAT(ipst, ip6_tcp_in_part_hw_cksum_err); 1711 else 1712 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 1713 break; 1714 case IPPROTO_UDP: 1715 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 1716 if (hck_flags & HCK_FULLCKSUM) 1717 IP6_STAT(ipst, ip6_udp_in_full_hw_cksum_err); 1718 else if (hck_flags & HCK_PARTIALCKSUM) 1719 IP6_STAT(ipst, ip6_udp_in_part_hw_cksum_err); 1720 else 1721 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 1722 break; 1723 case IPPROTO_ICMPV6: 1724 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 1725 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1726 break; 1727 default: 1728 ASSERT(0); 1729 break; 1730 } 1731 } 1732 1733 /* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */ 1734 uint32_t 1735 ip_input_cksum_pseudo_v6(ip6_t *ip6h, ip_recv_attr_t *ira) 1736 { 1737 uint_t ulp_len; 1738 uint32_t cksum; 1739 uint8_t protocol = ira->ira_protocol; 1740 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1741 1742 #define iphs ((uint16_t *)ip6h) 1743 1744 switch (protocol) { 1745 case IPPROTO_TCP: 1746 ulp_len = ira->ira_pktlen - ip_hdr_length; 1747 1748 /* Protocol and length */ 1749 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP; 1750 /* IP addresses */ 1751 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1752 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1753 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1754 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1755 break; 1756 1757 case IPPROTO_UDP: { 1758 udpha_t *udpha; 1759 1760 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length); 1761 1762 /* Protocol and length */ 1763 cksum = udpha->uha_length + IP_UDP_CSUM_COMP; 1764 /* IP addresses */ 1765 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1766 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1767 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1768 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1769 break; 1770 } 1771 case IPPROTO_ICMPV6: 1772 ulp_len = ira->ira_pktlen - ip_hdr_length; 1773 1774 /* Protocol and length */ 1775 cksum = htons(ulp_len) + IP_ICMPV6_CSUM_COMP; 1776 /* IP addresses */ 1777 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1778 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1779 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1780 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1781 break; 1782 default: 1783 cksum = 0; 1784 break; 1785 } 1786 #undef iphs 1787 return (cksum); 1788 } 1789 1790 1791 /* 1792 * Software verification of the ULP checksums. 1793 * Returns B_TRUE if ok. 1794 * Increments statistics of failed. 1795 */ 1796 static boolean_t 1797 ip_input_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1798 { 1799 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 1800 uint32_t cksum; 1801 uint8_t protocol = ira->ira_protocol; 1802 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1803 1804 IP6_STAT(ipst, ip6_in_sw_cksum); 1805 1806 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 1807 protocol == IPPROTO_ICMPV6); 1808 1809 cksum = ip_input_cksum_pseudo_v6(ip6h, ira); 1810 cksum = IP_CSUM(mp, ip_hdr_length, cksum); 1811 if (cksum == 0) 1812 return (B_TRUE); 1813 1814 ip_input_cksum_err_v6(protocol, 0, ira->ira_ill); 1815 return (B_FALSE); 1816 } 1817 1818 /* 1819 * Verify the ULP checksums. 1820 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum 1821 * algorithm. 1822 * Increments statistics if failed. 1823 */ 1824 static boolean_t 1825 ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h, 1826 ip_recv_attr_t *ira) 1827 { 1828 ill_t *ill = ira->ira_rill; 1829 uint16_t hck_flags; 1830 uint32_t cksum; 1831 mblk_t *mp1; 1832 uint_t len; 1833 uint8_t protocol = ira->ira_protocol; 1834 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1835 1836 1837 switch (protocol) { 1838 case IPPROTO_TCP: 1839 case IPPROTO_ICMPV6: 1840 break; 1841 1842 case IPPROTO_UDP: { 1843 udpha_t *udpha; 1844 1845 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length); 1846 /* 1847 * Before going through the regular checksum 1848 * calculation, make sure the received checksum 1849 * is non-zero. RFC 2460 says, a 0x0000 checksum 1850 * in a UDP packet (within IPv6 packet) is invalid 1851 * and should be replaced by 0xffff. This makes 1852 * sense as regular checksum calculation will 1853 * pass for both the cases i.e. 0x0000 and 0xffff. 1854 * Removing one of the case makes error detection 1855 * stronger. 1856 */ 1857 if (udpha->uha_checksum == 0) { 1858 /* 0x0000 checksum is invalid */ 1859 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 1860 return (B_FALSE); 1861 } 1862 break; 1863 } 1864 case IPPROTO_SCTP: { 1865 sctp_hdr_t *sctph; 1866 uint32_t pktsum; 1867 1868 sctph = (sctp_hdr_t *)((uchar_t *)ip6h + ip_hdr_length); 1869 #ifdef DEBUG 1870 if (skip_sctp_cksum) 1871 return (B_TRUE); 1872 #endif 1873 pktsum = sctph->sh_chksum; 1874 sctph->sh_chksum = 0; 1875 cksum = sctp_cksum(mp, ip_hdr_length); 1876 sctph->sh_chksum = pktsum; 1877 if (cksum == pktsum) 1878 return (B_TRUE); 1879 1880 /* 1881 * Defer until later whether a bad checksum is ok 1882 * in order to allow RAW sockets to use Adler checksum 1883 * with SCTP. 1884 */ 1885 ira->ira_flags |= IRAF_SCTP_CSUM_ERR; 1886 return (B_TRUE); 1887 } 1888 1889 default: 1890 /* No ULP checksum to verify. */ 1891 return (B_TRUE); 1892 } 1893 1894 /* 1895 * Revert to software checksum calculation if the interface 1896 * isn't capable of checksum offload. 1897 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout. 1898 * Note: IRAF_NO_HW_CKSUM is not currently used. 1899 */ 1900 ASSERT(!IS_IPMP(ill)); 1901 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 1902 !dohwcksum) { 1903 return (ip_input_sw_cksum_v6(mp, ip6h, ira)); 1904 } 1905 1906 /* 1907 * We apply this for all ULP protocols. Does the HW know to 1908 * not set the flags for SCTP and other protocols. 1909 */ 1910 1911 hck_flags = DB_CKSUMFLAGS(mp); 1912 1913 if (hck_flags & HCK_FULLCKSUM) { 1914 /* 1915 * Full checksum has been computed by the hardware 1916 * and has been attached. If the driver wants us to 1917 * verify the correctness of the attached value, in 1918 * order to protect against faulty hardware, compare 1919 * it against -0 (0xFFFF) to see if it's valid. 1920 */ 1921 if (hck_flags & HCK_FULLCKSUM_OK) 1922 return (B_TRUE); 1923 1924 cksum = DB_CKSUM16(mp); 1925 if (cksum == 0xFFFF) 1926 return (B_TRUE); 1927 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill); 1928 return (B_FALSE); 1929 } 1930 1931 mp1 = mp->b_cont; 1932 if ((hck_flags & HCK_PARTIALCKSUM) && 1933 (mp1 == NULL || mp1->b_cont == NULL) && 1934 ip_hdr_length >= DB_CKSUMSTART(mp) && 1935 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) { 1936 uint32_t adj; 1937 uchar_t *cksum_start; 1938 1939 cksum = ip_input_cksum_pseudo_v6(ip6h, ira); 1940 1941 cksum_start = ((uchar_t *)ip6h + DB_CKSUMSTART(mp)); 1942 1943 /* 1944 * Partial checksum has been calculated by hardware 1945 * and attached to the packet; in addition, any 1946 * prepended extraneous data is even byte aligned, 1947 * and there are at most two mblks associated with 1948 * the packet. If any such data exists, we adjust 1949 * the checksum; also take care any postpended data. 1950 */ 1951 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj); 1952 /* 1953 * One's complement subtract extraneous checksum 1954 */ 1955 cksum += DB_CKSUM16(mp); 1956 if (adj >= cksum) 1957 cksum = ~(adj - cksum) & 0xFFFF; 1958 else 1959 cksum -= adj; 1960 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 1961 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 1962 if (!(~cksum & 0xFFFF)) 1963 return (B_TRUE); 1964 1965 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill); 1966 return (B_FALSE); 1967 } 1968 return (ip_input_sw_cksum_v6(mp, ip6h, ira)); 1969 } 1970 1971 1972 /* 1973 * Handle fanout of received packets. 1974 * Unicast packets that are looped back (from ire_send_local_v6) and packets 1975 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM. 1976 * 1977 * IPQoS Notes 1978 * Before sending it to the client, invoke IPPF processing. Policy processing 1979 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled. 1980 */ 1981 void 1982 ip_fanout_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1983 { 1984 ill_t *ill = ira->ira_ill; 1985 iaflags_t iraflags = ira->ira_flags; 1986 ip_stack_t *ipst = ill->ill_ipst; 1987 uint8_t protocol; 1988 conn_t *connp; 1989 #define rptr ((uchar_t *)ip6h) 1990 uint_t ip_hdr_length; 1991 uint_t min_ulp_header_length; 1992 int offset; 1993 ssize_t len; 1994 netstack_t *ns = ipst->ips_netstack; 1995 ipsec_stack_t *ipss = ns->netstack_ipsec; 1996 ill_t *rill = ira->ira_rill; 1997 1998 ASSERT(ira->ira_pktlen == ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 1999 2000 /* 2001 * We repeat this as we parse over destination options header and 2002 * fragment headers (earlier we've handled any hop-by-hop options 2003 * header.) 2004 * We update ira_protocol and ira_ip_hdr_length as we skip past 2005 * the intermediate headers; they already point past any 2006 * hop-by-hop header. 2007 */ 2008 repeat: 2009 protocol = ira->ira_protocol; 2010 ip_hdr_length = ira->ira_ip_hdr_length; 2011 2012 /* 2013 * Time for IPP once we've done reassembly and IPsec. 2014 * We skip this for loopback packets since we don't do IPQoS 2015 * on loopback. 2016 */ 2017 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 2018 !(iraflags & IRAF_LOOPBACK) && 2019 (protocol != IPPROTO_ESP || protocol != IPPROTO_AH || 2020 protocol != IPPROTO_DSTOPTS || protocol != IPPROTO_ROUTING || 2021 protocol != IPPROTO_FRAGMENT)) { 2022 /* 2023 * Use the interface on which the packet arrived - not where 2024 * the IP address is hosted. 2025 */ 2026 /* ip_process translates an IS_UNDER_IPMP */ 2027 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill); 2028 if (mp == NULL) { 2029 /* ip_drop_packet and MIB done */ 2030 return; 2031 } 2032 } 2033 2034 /* Determine the minimum required size of the upper-layer header */ 2035 /* Need to do this for at least the set of ULPs that TX handles. */ 2036 switch (protocol) { 2037 case IPPROTO_TCP: 2038 min_ulp_header_length = TCP_MIN_HEADER_LENGTH; 2039 break; 2040 case IPPROTO_SCTP: 2041 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH; 2042 break; 2043 case IPPROTO_UDP: 2044 min_ulp_header_length = UDPH_SIZE; 2045 break; 2046 case IPPROTO_ICMP: 2047 case IPPROTO_ICMPV6: 2048 min_ulp_header_length = ICMPH_SIZE; 2049 break; 2050 case IPPROTO_FRAGMENT: 2051 case IPPROTO_DSTOPTS: 2052 case IPPROTO_ROUTING: 2053 min_ulp_header_length = MIN_EHDR_LEN; 2054 break; 2055 default: 2056 min_ulp_header_length = 0; 2057 break; 2058 } 2059 /* Make sure we have the min ULP header length */ 2060 len = mp->b_wptr - rptr; 2061 if (len < ip_hdr_length + min_ulp_header_length) { 2062 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length) 2063 goto pkt_too_short; 2064 2065 IP6_STAT(ipst, ip6_recv_pullup); 2066 ip6h = ip_pullup(mp, ip_hdr_length + min_ulp_header_length, 2067 ira); 2068 if (ip6h == NULL) 2069 goto discard; 2070 len = mp->b_wptr - rptr; 2071 } 2072 2073 /* 2074 * If trusted extensions then determine the zoneid and TX specific 2075 * ira_flags. 2076 */ 2077 if (iraflags & IRAF_SYSTEM_LABELED) { 2078 /* This can update ira->ira_flags and ira->ira_zoneid */ 2079 ip_fanout_tx_v6(mp, ip6h, protocol, ip_hdr_length, ira); 2080 iraflags = ira->ira_flags; 2081 } 2082 2083 2084 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */ 2085 if (iraflags & IRAF_VERIFY_ULP_CKSUM) { 2086 if (!ip_input_cksum_v6(iraflags, mp, ip6h, ira)) { 2087 /* Bad checksum. Stats are already incremented */ 2088 ip_drop_input("Bad ULP checksum", mp, ill); 2089 freemsg(mp); 2090 return; 2091 } 2092 /* IRAF_SCTP_CSUM_ERR could have been set */ 2093 iraflags = ira->ira_flags; 2094 } 2095 switch (protocol) { 2096 case IPPROTO_TCP: 2097 /* For TCP, discard multicast packets. */ 2098 if (iraflags & IRAF_MULTIBROADCAST) 2099 goto discard; 2100 2101 /* First mblk contains IP+TCP headers per above check */ 2102 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH); 2103 2104 /* TCP options present? */ 2105 offset = ((uchar_t *)ip6h)[ip_hdr_length + 12] >> 4; 2106 if (offset != 5) { 2107 if (offset < 5) 2108 goto discard; 2109 2110 /* 2111 * There must be TCP options. 2112 * Make sure we can grab them. 2113 */ 2114 offset <<= 2; 2115 offset += ip_hdr_length; 2116 if (len < offset) { 2117 if (ira->ira_pktlen < offset) 2118 goto pkt_too_short; 2119 2120 IP6_STAT(ipst, ip6_recv_pullup); 2121 ip6h = ip_pullup(mp, offset, ira); 2122 if (ip6h == NULL) 2123 goto discard; 2124 len = mp->b_wptr - rptr; 2125 } 2126 } 2127 2128 /* 2129 * Pass up a squeue hint to tcp. 2130 * If ira_sqp is already set (this is loopback) we leave it 2131 * alone. 2132 */ 2133 if (ira->ira_sqp == NULL) { 2134 ira->ira_sqp = ip_squeue_get(ira->ira_ring); 2135 } 2136 2137 /* Look for AF_INET or AF_INET6 that matches */ 2138 connp = ipcl_classify_v6(mp, IPPROTO_TCP, ip_hdr_length, 2139 ira, ipst); 2140 if (connp == NULL) { 2141 /* Send the TH_RST */ 2142 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2143 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2144 return; 2145 } 2146 if (connp->conn_incoming_ifindex != 0 && 2147 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2148 CONN_DEC_REF(connp); 2149 2150 /* Send the TH_RST */ 2151 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2152 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2153 return; 2154 } 2155 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 2156 (iraflags & IRAF_IPSEC_SECURE)) { 2157 mp = ipsec_check_inbound_policy(mp, connp, 2158 NULL, ip6h, ira); 2159 if (mp == NULL) { 2160 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2161 /* Note that mp is NULL */ 2162 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2163 CONN_DEC_REF(connp); 2164 return; 2165 } 2166 } 2167 /* Found a client; up it goes */ 2168 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2169 ira->ira_ill = ira->ira_rill = NULL; 2170 if (!IPCL_IS_TCP(connp)) { 2171 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 2172 (connp->conn_recv)(connp, mp, NULL, ira); 2173 CONN_DEC_REF(connp); 2174 ira->ira_ill = ill; 2175 ira->ira_rill = rill; 2176 return; 2177 } 2178 2179 /* 2180 * We do different processing whether called from 2181 * ip_accept_tcp and we match the target, don't match 2182 * the target, and when we are called by ip_input. 2183 */ 2184 if (iraflags & IRAF_TARGET_SQP) { 2185 if (ira->ira_target_sqp == connp->conn_sqp) { 2186 mblk_t *attrmp; 2187 2188 attrmp = ip_recv_attr_to_mblk(ira); 2189 if (attrmp == NULL) { 2190 BUMP_MIB(ill->ill_ip_mib, 2191 ipIfStatsInDiscards); 2192 ip_drop_input("ipIfStatsInDiscards", 2193 mp, ill); 2194 freemsg(mp); 2195 CONN_DEC_REF(connp); 2196 } else { 2197 SET_SQUEUE(attrmp, connp->conn_recv, 2198 connp); 2199 attrmp->b_cont = mp; 2200 ASSERT(ira->ira_target_sqp_mp == NULL); 2201 ira->ira_target_sqp_mp = attrmp; 2202 /* 2203 * Conn ref release when drained from 2204 * the squeue. 2205 */ 2206 } 2207 } else { 2208 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 2209 connp->conn_recv, connp, ira, SQ_FILL, 2210 SQTAG_IP6_TCP_INPUT); 2211 } 2212 } else { 2213 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, 2214 connp, ira, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 2215 } 2216 ira->ira_ill = ill; 2217 ira->ira_rill = rill; 2218 return; 2219 2220 case IPPROTO_SCTP: { 2221 sctp_hdr_t *sctph; 2222 uint32_t ports; /* Source and destination ports */ 2223 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp; 2224 2225 /* For SCTP, discard multicast packets. */ 2226 if (iraflags & IRAF_MULTIBROADCAST) 2227 goto discard; 2228 2229 /* 2230 * Since there is no SCTP h/w cksum support yet, just 2231 * clear the flag. 2232 */ 2233 DB_CKSUMFLAGS(mp) = 0; 2234 2235 /* Length ensured above */ 2236 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH); 2237 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length); 2238 2239 /* get the ports */ 2240 ports = *(uint32_t *)&sctph->sh_sport; 2241 2242 if (iraflags & IRAF_SCTP_CSUM_ERR) { 2243 /* 2244 * No potential sctp checksum errors go to the Sun 2245 * sctp stack however they might be Adler-32 summed 2246 * packets a userland stack bound to a raw IP socket 2247 * could reasonably use. Note though that Adler-32 is 2248 * a long deprecated algorithm and customer sctp 2249 * networks should eventually migrate to CRC-32 at 2250 * which time this facility should be removed. 2251 */ 2252 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2253 return; 2254 } 2255 connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, ports, 2256 ira, mp, sctps); 2257 if (connp == NULL) { 2258 /* Check for raw socket or OOTB handling */ 2259 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2260 return; 2261 } 2262 if (connp->conn_incoming_ifindex != 0 && 2263 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2264 CONN_DEC_REF(connp); 2265 2266 /* Check for raw socket or OOTB handling */ 2267 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2268 return; 2269 } 2270 2271 /* Found a client; up it goes */ 2272 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2273 sctp_input(connp, NULL, ip6h, mp, ira); 2274 /* sctp_input does a rele of the sctp_t */ 2275 return; 2276 } 2277 2278 case IPPROTO_UDP: 2279 /* First mblk contains IP+UDP headers as checked above */ 2280 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE); 2281 2282 if (iraflags & IRAF_MULTIBROADCAST) { 2283 uint16_t *up; /* Pointer to ports in ULP header */ 2284 2285 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length); 2286 2287 ip_fanout_udp_multi_v6(mp, ip6h, up[1], up[0], ira); 2288 return; 2289 } 2290 2291 /* Look for AF_INET or AF_INET6 that matches */ 2292 connp = ipcl_classify_v6(mp, IPPROTO_UDP, ip_hdr_length, 2293 ira, ipst); 2294 if (connp == NULL) { 2295 no_udp_match: 2296 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP]. 2297 connf_head != NULL) { 2298 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2299 ip_fanout_proto_v6(mp, ip6h, ira); 2300 } else { 2301 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH, 2302 ICMP6_DST_UNREACH_NOPORT, ira); 2303 } 2304 return; 2305 2306 } 2307 if (connp->conn_incoming_ifindex != 0 && 2308 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2309 CONN_DEC_REF(connp); 2310 goto no_udp_match; 2311 } 2312 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld : 2313 !canputnext(connp->conn_rq)) { 2314 CONN_DEC_REF(connp); 2315 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 2316 ip_drop_input("udpIfStatsInOverflows", mp, ill); 2317 freemsg(mp); 2318 return; 2319 } 2320 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 2321 (iraflags & IRAF_IPSEC_SECURE)) { 2322 mp = ipsec_check_inbound_policy(mp, connp, 2323 NULL, ip6h, ira); 2324 if (mp == NULL) { 2325 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2326 /* Note that mp is NULL */ 2327 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2328 CONN_DEC_REF(connp); 2329 return; 2330 } 2331 } 2332 2333 /* Found a client; up it goes */ 2334 IP6_STAT(ipst, ip6_udp_fannorm); 2335 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2336 ira->ira_ill = ira->ira_rill = NULL; 2337 (connp->conn_recv)(connp, mp, NULL, ira); 2338 CONN_DEC_REF(connp); 2339 ira->ira_ill = ill; 2340 ira->ira_rill = rill; 2341 return; 2342 default: 2343 break; 2344 } 2345 2346 /* 2347 * Clear hardware checksumming flag as it is currently only 2348 * used by TCP and UDP. 2349 */ 2350 DB_CKSUMFLAGS(mp) = 0; 2351 2352 switch (protocol) { 2353 case IPPROTO_ICMPV6: 2354 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 2355 2356 /* Check variable for testing applications */ 2357 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 2358 ip_drop_input("ipv6_drop_inbound_icmpv6", mp, ill); 2359 freemsg(mp); 2360 return; 2361 } 2362 /* 2363 * We need to accomodate icmp messages coming in clear 2364 * until we get everything secure from the wire. If 2365 * icmp_accept_clear_messages is zero we check with 2366 * the global policy and act accordingly. If it is 2367 * non-zero, we accept the message without any checks. 2368 * But *this does not mean* that this will be delivered 2369 * to RAW socket clients. By accepting we might send 2370 * replies back, change our MTU value etc., 2371 * but delivery to the ULP/clients depends on their 2372 * policy dispositions. 2373 */ 2374 if (ipst->ips_icmp_accept_clear_messages == 0) { 2375 mp = ipsec_check_global_policy(mp, NULL, 2376 NULL, ip6h, ira, ns); 2377 if (mp == NULL) 2378 return; 2379 } 2380 2381 /* 2382 * On a labeled system, we have to check whether the zone 2383 * itself is permitted to receive raw traffic. 2384 */ 2385 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2386 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2387 BUMP_MIB(ill->ill_icmp6_mib, 2388 ipv6IfIcmpInErrors); 2389 ip_drop_input("tsol_can_accept_raw", mp, ill); 2390 freemsg(mp); 2391 return; 2392 } 2393 } 2394 2395 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2396 mp = icmp_inbound_v6(mp, ira); 2397 if (mp == NULL) { 2398 /* No need to pass to RAW sockets */ 2399 return; 2400 } 2401 break; 2402 2403 case IPPROTO_DSTOPTS: { 2404 ip6_dest_t *desthdr; 2405 uint_t ehdrlen; 2406 uint8_t *optptr; 2407 2408 /* We already check for MIN_EHDR_LEN above */ 2409 2410 /* Check if AH is present and needs to be processed. */ 2411 mp = ipsec_early_ah_v6(mp, ira); 2412 if (mp == NULL) 2413 return; 2414 2415 /* 2416 * Reinitialize pointers, as ipsec_early_ah_v6() does 2417 * complete pullups. We don't have to do more pullups 2418 * as a result. 2419 */ 2420 ip6h = (ip6_t *)mp->b_rptr; 2421 2422 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN) 2423 goto pkt_too_short; 2424 2425 if (mp->b_cont != NULL && 2426 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) { 2427 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira); 2428 if (ip6h == NULL) 2429 goto discard; 2430 } 2431 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length); 2432 ehdrlen = 8 * (desthdr->ip6d_len + 1); 2433 if (ira->ira_pktlen - ip_hdr_length < ehdrlen) 2434 goto pkt_too_short; 2435 if (mp->b_cont != NULL && 2436 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 2437 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 2438 if (ip6h == NULL) 2439 goto discard; 2440 2441 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length); 2442 } 2443 optptr = (uint8_t *)&desthdr[1]; 2444 2445 /* 2446 * Update ira_ip_hdr_length to skip the destination header 2447 * when we repeat. 2448 */ 2449 ira->ira_ip_hdr_length += ehdrlen; 2450 2451 ira->ira_protocol = desthdr->ip6d_nxt; 2452 2453 /* 2454 * Note: XXX This code does not seem to make 2455 * distinction between Destination Options Header 2456 * being before/after Routing Header which can 2457 * happen if we are at the end of source route. 2458 * This may become significant in future. 2459 * (No real significant Destination Options are 2460 * defined/implemented yet ). 2461 */ 2462 switch (ip_process_options_v6(mp, ip6h, optptr, 2463 ehdrlen - 2, IPPROTO_DSTOPTS, ira)) { 2464 case -1: 2465 /* 2466 * Packet has been consumed and any needed 2467 * ICMP errors sent. 2468 */ 2469 return; 2470 case 0: 2471 /* No action needed continue */ 2472 break; 2473 case 1: 2474 /* 2475 * Unnexpected return value 2476 * (Router alert is a Hop-by-Hop option) 2477 */ 2478 #ifdef DEBUG 2479 panic("ip_fanout_v6: router " 2480 "alert hbh opt indication in dest opt"); 2481 /*NOTREACHED*/ 2482 #else 2483 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2484 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2485 freemsg(mp); 2486 return; 2487 #endif 2488 } 2489 goto repeat; 2490 } 2491 case IPPROTO_FRAGMENT: { 2492 ip6_frag_t *fraghdr; 2493 2494 if (ira->ira_pktlen - ip_hdr_length < sizeof (ip6_frag_t)) 2495 goto pkt_too_short; 2496 2497 if (mp->b_cont != NULL && 2498 rptr + ip_hdr_length + sizeof (ip6_frag_t) > mp->b_wptr) { 2499 ip6h = ip_pullup(mp, 2500 ip_hdr_length + sizeof (ip6_frag_t), ira); 2501 if (ip6h == NULL) 2502 goto discard; 2503 } 2504 2505 fraghdr = (ip6_frag_t *)(rptr + ip_hdr_length); 2506 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 2507 2508 /* 2509 * Invoke the CGTP (multirouting) filtering module to 2510 * process the incoming packet. Packets identified as 2511 * duplicates must be discarded. Filtering is active 2512 * only if the ip_cgtp_filter ndd variable is 2513 * non-zero. 2514 */ 2515 if (ipst->ips_ip_cgtp_filter && 2516 ipst->ips_ip_cgtp_filter_ops != NULL) { 2517 int cgtp_flt_pkt; 2518 netstackid_t stackid; 2519 2520 stackid = ipst->ips_netstack->netstack_stackid; 2521 2522 /* 2523 * CGTP and IPMP are mutually exclusive so 2524 * phyint_ifindex is fine here. 2525 */ 2526 cgtp_flt_pkt = 2527 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 2528 stackid, ill->ill_phyint->phyint_ifindex, 2529 ip6h, fraghdr); 2530 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 2531 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill); 2532 freemsg(mp); 2533 return; 2534 } 2535 } 2536 2537 /* 2538 * Update ip_hdr_length to skip the frag header 2539 * ip_input_fragment_v6 will determine the extension header 2540 * prior to the fragment header and update its nexthdr value, 2541 * and also set ira_protocol to the nexthdr that follows the 2542 * completed fragment. 2543 */ 2544 ip_hdr_length += sizeof (ip6_frag_t); 2545 2546 /* 2547 * Make sure we have ira_l2src before we loose the original 2548 * mblk 2549 */ 2550 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 2551 ip_setl2src(mp, ira, ira->ira_rill); 2552 2553 mp = ip_input_fragment_v6(mp, ip6h, fraghdr, 2554 ira->ira_pktlen - ip_hdr_length, ira); 2555 if (mp == NULL) { 2556 /* Reassembly is still pending */ 2557 return; 2558 } 2559 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 2560 2561 /* 2562 * The mblk chain has the frag header removed and 2563 * ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the 2564 * IP header has been updated to refleact the result. 2565 */ 2566 ip6h = (ip6_t *)mp->b_rptr; 2567 ip_hdr_length = ira->ira_ip_hdr_length; 2568 goto repeat; 2569 } 2570 case IPPROTO_HOPOPTS: 2571 /* 2572 * Illegal header sequence. 2573 * (Hop-by-hop headers are processed above 2574 * and required to immediately follow IPv6 header) 2575 */ 2576 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 2577 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira); 2578 return; 2579 2580 case IPPROTO_ROUTING: { 2581 uint_t ehdrlen; 2582 ip6_rthdr_t *rthdr; 2583 2584 /* Check if AH is present and needs to be processed. */ 2585 mp = ipsec_early_ah_v6(mp, ira); 2586 if (mp == NULL) 2587 return; 2588 2589 /* 2590 * Reinitialize pointers, as ipsec_early_ah_v6() does 2591 * complete pullups. We don't have to do more pullups 2592 * as a result. 2593 */ 2594 ip6h = (ip6_t *)mp->b_rptr; 2595 2596 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN) 2597 goto pkt_too_short; 2598 2599 if (mp->b_cont != NULL && 2600 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) { 2601 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira); 2602 if (ip6h == NULL) 2603 goto discard; 2604 } 2605 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length); 2606 protocol = ira->ira_protocol = rthdr->ip6r_nxt; 2607 ehdrlen = 8 * (rthdr->ip6r_len + 1); 2608 if (ira->ira_pktlen - ip_hdr_length < ehdrlen) 2609 goto pkt_too_short; 2610 if (mp->b_cont != NULL && 2611 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 2612 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 2613 if (ip6h == NULL) 2614 goto discard; 2615 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length); 2616 } 2617 if (rthdr->ip6r_segleft != 0) { 2618 /* Not end of source route */ 2619 if (ira->ira_flags & 2620 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 2621 BUMP_MIB(ill->ill_ip_mib, 2622 ipIfStatsForwProhibits); 2623 ip_drop_input("ipIfStatsInForwProhibits", 2624 mp, ill); 2625 freemsg(mp); 2626 return; 2627 } 2628 ip_process_rthdr(mp, ip6h, rthdr, ira); 2629 return; 2630 } 2631 ira->ira_ip_hdr_length += ehdrlen; 2632 goto repeat; 2633 } 2634 2635 case IPPROTO_AH: 2636 case IPPROTO_ESP: { 2637 /* 2638 * Fast path for AH/ESP. 2639 */ 2640 netstack_t *ns = ipst->ips_netstack; 2641 ipsec_stack_t *ipss = ns->netstack_ipsec; 2642 2643 IP_STAT(ipst, ipsec_proto_ahesp); 2644 2645 if (!ipsec_loaded(ipss)) { 2646 ip_proto_not_sup(mp, ira); 2647 return; 2648 } 2649 2650 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2651 /* select inbound SA and have IPsec process the pkt */ 2652 if (protocol == IPPROTO_ESP) { 2653 esph_t *esph; 2654 2655 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 2656 if (mp == NULL) 2657 return; 2658 2659 ASSERT(esph != NULL); 2660 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2661 ASSERT(ira->ira_ipsec_esp_sa != NULL); 2662 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL); 2663 2664 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, 2665 ira); 2666 } else { 2667 ah_t *ah; 2668 2669 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 2670 if (mp == NULL) 2671 return; 2672 2673 ASSERT(ah != NULL); 2674 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2675 ASSERT(ira->ira_ipsec_ah_sa != NULL); 2676 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 2677 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, 2678 ira); 2679 } 2680 2681 if (mp == NULL) { 2682 /* 2683 * Either it failed or is pending. In the former case 2684 * ipIfStatsInDiscards was increased. 2685 */ 2686 return; 2687 } 2688 /* we're done with IPsec processing, send it up */ 2689 ip_input_post_ipsec(mp, ira); 2690 return; 2691 } 2692 case IPPROTO_NONE: 2693 /* All processing is done. Count as "delivered". */ 2694 freemsg(mp); 2695 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2696 return; 2697 2698 case IPPROTO_ENCAP: 2699 case IPPROTO_IPV6: 2700 /* iptun will verify trusted label */ 2701 connp = ipcl_classify_v6(mp, protocol, ip_hdr_length, 2702 ira, ipst); 2703 if (connp != NULL) { 2704 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2705 ira->ira_ill = ira->ira_rill = NULL; 2706 connp->conn_recv(connp, mp, NULL, ira); 2707 CONN_DEC_REF(connp); 2708 ira->ira_ill = ill; 2709 ira->ira_rill = rill; 2710 return; 2711 } 2712 /* FALLTHRU */ 2713 default: 2714 /* 2715 * On a labeled system, we have to check whether the zone 2716 * itself is permitted to receive raw traffic. 2717 */ 2718 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2719 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2720 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2721 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2722 freemsg(mp); 2723 return; 2724 } 2725 } 2726 break; 2727 } 2728 2729 /* 2730 * The above input functions may have returned the pulled up message. 2731 * So ip6h need to be reinitialized. 2732 */ 2733 ip6h = (ip6_t *)mp->b_rptr; 2734 ira->ira_protocol = protocol; 2735 if (ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head == NULL) { 2736 /* No user-level listener for these packets packets */ 2737 ip_proto_not_sup(mp, ira); 2738 return; 2739 } 2740 2741 /* 2742 * Handle fanout to raw sockets. There 2743 * can be more than one stream bound to a particular 2744 * protocol. When this is the case, each one gets a copy 2745 * of any incoming packets. 2746 */ 2747 ASSERT(ira->ira_protocol == protocol); 2748 ip_fanout_proto_v6(mp, ip6h, ira); 2749 return; 2750 2751 pkt_too_short: 2752 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 2753 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 2754 freemsg(mp); 2755 return; 2756 2757 discard: 2758 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2759 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2760 freemsg(mp); 2761 #undef rptr 2762 } 2763