1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved 24 * 25 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 26 */ 27 /* Copyright (c) 1990 Mentat Inc. */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsubr.h> 35 #include <sys/strlog.h> 36 #include <sys/strsun.h> 37 #include <sys/zone.h> 38 #define _SUN_TPI_VERSION 2 39 #include <sys/tihdr.h> 40 #include <sys/xti_inet.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/kobj.h> 46 #include <sys/modctl.h> 47 #include <sys/atomic.h> 48 #include <sys/policy.h> 49 #include <sys/priv.h> 50 51 #include <sys/systm.h> 52 #include <sys/param.h> 53 #include <sys/kmem.h> 54 #include <sys/sdt.h> 55 #include <sys/socket.h> 56 #include <sys/vtrace.h> 57 #include <sys/isa_defs.h> 58 #include <sys/mac.h> 59 #include <net/if.h> 60 #include <net/if_arp.h> 61 #include <net/route.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <net/if_dl.h> 65 66 #include <inet/common.h> 67 #include <inet/mi.h> 68 #include <inet/mib2.h> 69 #include <inet/nd.h> 70 #include <inet/arp.h> 71 #include <inet/snmpcom.h> 72 #include <inet/kstatcom.h> 73 74 #include <netinet/igmp_var.h> 75 #include <netinet/ip6.h> 76 #include <netinet/icmp6.h> 77 #include <netinet/sctp.h> 78 79 #include <inet/ip.h> 80 #include <inet/ip_impl.h> 81 #include <inet/ip6.h> 82 #include <inet/ip6_asp.h> 83 #include <inet/optcom.h> 84 #include <inet/tcp.h> 85 #include <inet/tcp_impl.h> 86 #include <inet/ip_multi.h> 87 #include <inet/ip_if.h> 88 #include <inet/ip_ire.h> 89 #include <inet/ip_ftable.h> 90 #include <inet/ip_rts.h> 91 #include <inet/ip_ndp.h> 92 #include <inet/ip_listutils.h> 93 #include <netinet/igmp.h> 94 #include <netinet/ip_mroute.h> 95 #include <inet/ipp_common.h> 96 97 #include <net/pfkeyv2.h> 98 #include <inet/sadb.h> 99 #include <inet/ipsec_impl.h> 100 #include <inet/ipdrop.h> 101 #include <inet/ip_netinfo.h> 102 #include <inet/ilb_ip.h> 103 #include <sys/squeue_impl.h> 104 #include <sys/squeue.h> 105 106 #include <sys/ethernet.h> 107 #include <net/if_types.h> 108 #include <sys/cpuvar.h> 109 110 #include <ipp/ipp.h> 111 #include <ipp/ipp_impl.h> 112 #include <ipp/ipgpc/ipgpc.h> 113 114 #include <sys/pattr.h> 115 #include <inet/ipclassifier.h> 116 #include <inet/sctp_ip.h> 117 #include <inet/sctp/sctp_impl.h> 118 #include <inet/udp_impl.h> 119 #include <sys/sunddi.h> 120 121 #include <sys/tsol/label.h> 122 #include <sys/tsol/tnet.h> 123 124 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */ 125 126 #ifdef DEBUG 127 extern boolean_t skip_sctp_cksum; 128 #endif 129 130 static void ip_input_local_v6(ire_t *, mblk_t *, ip6_t *, ip_recv_attr_t *); 131 132 static void ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *, 133 ip_recv_attr_t *); 134 135 #pragma inline(ip_input_common_v6, ip_input_local_v6, ip_forward_xmit_v6) 136 137 /* 138 * Direct read side procedure capable of dealing with chains. GLDv3 based 139 * drivers call this function directly with mblk chains while STREAMS 140 * read side procedure ip_rput() calls this for single packet with ip_ring 141 * set to NULL to process one packet at a time. 142 * 143 * The ill will always be valid if this function is called directly from 144 * the driver. 145 * 146 * If ip_input_v6() is called from GLDv3: 147 * 148 * - This must be a non-VLAN IP stream. 149 * - 'mp' is either an untagged or a special priority-tagged packet. 150 * - Any VLAN tag that was in the MAC header has been stripped. 151 * 152 * If the IP header in packet is not 32-bit aligned, every message in the 153 * chain will be aligned before further operations. This is required on SPARC 154 * platform. 155 */ 156 void 157 ip_input_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 158 struct mac_header_info_s *mhip) 159 { 160 (void) ip_input_common_v6(ill, ip_ring, mp_chain, mhip, NULL, NULL, 161 NULL); 162 } 163 164 /* 165 * ip_accept_tcp_v6() - This function is called by the squeue when it retrieves 166 * a chain of packets in the poll mode. The packets have gone through the 167 * data link processing but not IP processing. For performance and latency 168 * reasons, the squeue wants to process the chain in line instead of feeding 169 * it back via ip_input path. 170 * 171 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6 172 * will pass back any TCP packets matching the target sqp to 173 * ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by 174 * ip_input_v6 and ip_fanout_v6 as normal. 175 * The TCP packets that match the target squeue are returned to the caller 176 * as a b_next chain after each packet has been prepend with an mblk 177 * from ip_recv_attr_to_mblk. 178 */ 179 mblk_t * 180 ip_accept_tcp_v6(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, 181 mblk_t *mp_chain, mblk_t **last, uint_t *cnt) 182 { 183 return (ip_input_common_v6(ill, ip_ring, mp_chain, NULL, target_sqp, 184 last, cnt)); 185 } 186 187 /* 188 * Used by ip_input_v6 and ip_accept_tcp_v6 189 * The last three arguments are only used by ip_accept_tcp_v6, and mhip is 190 * only used by ip_input_v6. 191 */ 192 mblk_t * 193 ip_input_common_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 194 struct mac_header_info_s *mhip, squeue_t *target_sqp, 195 mblk_t **last, uint_t *cnt) 196 { 197 mblk_t *mp; 198 ip6_t *ip6h; 199 ip_recv_attr_t iras; /* Receive attributes */ 200 rtc_t rtc; 201 iaflags_t chain_flags = 0; /* Fixed for chain */ 202 mblk_t *ahead = NULL; /* Accepted head */ 203 mblk_t *atail = NULL; /* Accepted tail */ 204 uint_t acnt = 0; /* Accepted count */ 205 206 ASSERT(mp_chain != NULL); 207 ASSERT(ill != NULL); 208 209 /* These ones do not change as we loop over packets */ 210 iras.ira_ill = iras.ira_rill = ill; 211 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 212 iras.ira_rifindex = iras.ira_ruifindex; 213 iras.ira_sqp = NULL; 214 iras.ira_ring = ip_ring; 215 /* For ECMP and outbound transmit ring selection */ 216 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring); 217 218 iras.ira_target_sqp = target_sqp; 219 iras.ira_target_sqp_mp = NULL; 220 if (target_sqp != NULL) 221 chain_flags |= IRAF_TARGET_SQP; 222 223 /* 224 * We try to have a mhip pointer when possible, but 225 * it might be NULL in some cases. In those cases we 226 * have to assume unicast. 227 */ 228 iras.ira_mhip = mhip; 229 iras.ira_flags = 0; 230 if (mhip != NULL) { 231 switch (mhip->mhi_dsttype) { 232 case MAC_ADDRTYPE_MULTICAST : 233 chain_flags |= IRAF_L2DST_MULTICAST; 234 break; 235 case MAC_ADDRTYPE_BROADCAST : 236 chain_flags |= IRAF_L2DST_BROADCAST; 237 break; 238 } 239 } 240 241 /* 242 * Initialize the one-element route cache. 243 * 244 * We do ire caching from one iteration to 245 * another. In the event the packet chain contains 246 * all packets from the same dst, this caching saves 247 * an ire_route_recursive for each of the succeeding 248 * packets in a packet chain. 249 */ 250 rtc.rtc_ire = NULL; 251 rtc.rtc_ip6addr = ipv6_all_zeros; 252 253 /* Loop over b_next */ 254 for (mp = mp_chain; mp != NULL; mp = mp_chain) { 255 mp_chain = mp->b_next; 256 mp->b_next = NULL; 257 258 /* 259 * if db_ref > 1 then copymsg and free original. Packet 260 * may be changed and we do not want the other entity 261 * who has a reference to this message to trip over the 262 * changes. This is a blind change because trying to 263 * catch all places that might change the packet is too 264 * difficult. 265 * 266 * This corresponds to the fast path case, where we have 267 * a chain of M_DATA mblks. We check the db_ref count 268 * of only the 1st data block in the mblk chain. There 269 * doesn't seem to be a reason why a device driver would 270 * send up data with varying db_ref counts in the mblk 271 * chain. In any case the Fast path is a private 272 * interface, and our drivers don't do such a thing. 273 * Given the above assumption, there is no need to walk 274 * down the entire mblk chain (which could have a 275 * potential performance problem) 276 * 277 * The "(DB_REF(mp) > 1)" check was moved from ip_rput() 278 * to here because of exclusive ip stacks and vnics. 279 * Packets transmitted from exclusive stack over vnic 280 * can have db_ref > 1 and when it gets looped back to 281 * another vnic in a different zone, you have ip_input() 282 * getting dblks with db_ref > 1. So if someone 283 * complains of TCP performance under this scenario, 284 * take a serious look here on the impact of copymsg(). 285 */ 286 if (DB_REF(mp) > 1) { 287 if ((mp = ip_fix_dbref(mp, &iras)) == NULL) 288 continue; 289 } 290 291 /* 292 * IP header ptr not aligned? 293 * OR IP header not complete in first mblk 294 */ 295 ip6h = (ip6_t *)mp->b_rptr; 296 if (!OK_32PTR(ip6h) || MBLKL(mp) < IPV6_HDR_LEN) { 297 mp = ip_check_and_align_header(mp, IPV6_HDR_LEN, &iras); 298 if (mp == NULL) 299 continue; 300 ip6h = (ip6_t *)mp->b_rptr; 301 } 302 303 /* Protect against a mix of Ethertypes and IP versions */ 304 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) { 305 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 306 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 307 freemsg(mp); 308 /* mhip might point into 1st packet in the chain. */ 309 iras.ira_mhip = NULL; 310 continue; 311 } 312 313 /* 314 * Check for Martian addrs; we have to explicitly 315 * test for for zero dst since this is also used as 316 * an indication that the rtc is not used. 317 */ 318 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_dst)) { 319 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 320 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 321 freemsg(mp); 322 /* mhip might point into 1st packet in the chain. */ 323 iras.ira_mhip = NULL; 324 continue; 325 } 326 /* 327 * Keep L2SRC from a previous packet in chain since mhip 328 * might point into an earlier packet in the chain. 329 */ 330 chain_flags |= (iras.ira_flags & IRAF_L2SRC_SET); 331 332 iras.ira_flags = IRAF_VERIFY_ULP_CKSUM | chain_flags; 333 iras.ira_free_flags = 0; 334 iras.ira_cred = NULL; 335 iras.ira_cpid = NOPID; 336 iras.ira_tsl = NULL; 337 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */ 338 339 /* 340 * We must count all incoming packets, even if they end 341 * up being dropped later on. Defer counting bytes until 342 * we have the whole IP header in first mblk. 343 */ 344 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 345 346 iras.ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 347 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 348 iras.ira_pktlen); 349 350 /* 351 * Call one of: 352 * ill_input_full_v6 353 * ill_input_short_v6 354 * The former is used in the case of TX. See ill_set_inputfn(). 355 */ 356 (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc); 357 358 /* Any references to clean up? No hold on ira_ill */ 359 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 360 ira_cleanup(&iras, B_FALSE); 361 362 if (iras.ira_target_sqp_mp != NULL) { 363 /* Better be called from ip_accept_tcp */ 364 ASSERT(target_sqp != NULL); 365 366 /* Found one packet to accept */ 367 mp = iras.ira_target_sqp_mp; 368 iras.ira_target_sqp_mp = NULL; 369 ASSERT(ip_recv_attr_is_mblk(mp)); 370 371 if (atail != NULL) 372 atail->b_next = mp; 373 else 374 ahead = mp; 375 atail = mp; 376 acnt++; 377 mp = NULL; 378 } 379 /* mhip might point into 1st packet in the chain. */ 380 iras.ira_mhip = NULL; 381 } 382 /* Any remaining references to the route cache? */ 383 if (rtc.rtc_ire != NULL) { 384 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr)); 385 ire_refrele(rtc.rtc_ire); 386 } 387 388 if (ahead != NULL) { 389 /* Better be called from ip_accept_tcp */ 390 ASSERT(target_sqp != NULL); 391 *last = atail; 392 *cnt = acnt; 393 return (ahead); 394 } 395 396 return (NULL); 397 } 398 399 /* 400 * This input function is used when 401 * - is_system_labeled() 402 * 403 * Note that for IPv6 CGTP filtering is handled only when receiving fragment 404 * headers, and RSVP uses router alert options, thus we don't need anything 405 * extra for them. 406 */ 407 void 408 ill_input_full_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg, 409 ip_recv_attr_t *ira, rtc_t *rtc) 410 { 411 ip6_t *ip6h = (ip6_t *)iph_arg; 412 in6_addr_t *nexthop = (in6_addr_t *)nexthop_arg; 413 ill_t *ill = ira->ira_ill; 414 415 ASSERT(ira->ira_tsl == NULL); 416 417 /* 418 * Attach any necessary label information to 419 * this packet 420 */ 421 if (is_system_labeled()) { 422 ira->ira_flags |= IRAF_SYSTEM_LABELED; 423 424 /* 425 * This updates ira_cred, ira_tsl and ira_free_flags based 426 * on the label. 427 */ 428 if (!tsol_get_pkt_label(mp, IPV6_VERSION, ira)) { 429 if (ip6opt_ls != 0) 430 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 431 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 432 ip_drop_input("ipIfStatsInDiscards", mp, ill); 433 freemsg(mp); 434 return; 435 } 436 /* Note that ira_tsl can be NULL here. */ 437 438 /* tsol_get_pkt_label sometimes does pullupmsg */ 439 ip6h = (ip6_t *)mp->b_rptr; 440 } 441 ill_input_short_v6(mp, ip6h, nexthop, ira, rtc); 442 } 443 444 /* 445 * Check for IPv6 addresses that should not appear on the wire 446 * as either source or destination. 447 * If we ever implement Stateless IPv6 Translators (SIIT) we'd have 448 * to revisit the IPv4-mapped part. 449 */ 450 static boolean_t 451 ip6_bad_address(in6_addr_t *addr, boolean_t is_src) 452 { 453 if (IN6_IS_ADDR_V4MAPPED(addr)) { 454 ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr")); 455 return (B_TRUE); 456 } 457 if (IN6_IS_ADDR_LOOPBACK(addr)) { 458 ip1dbg(("ip_input_v6: pkt with loopback addr")); 459 return (B_TRUE); 460 } 461 if (!is_src && IN6_IS_ADDR_UNSPECIFIED(addr)) { 462 /* 463 * having :: in the src is ok: it's used for DAD. 464 */ 465 ip1dbg(("ip_input_v6: pkt with unspecified addr")); 466 return (B_TRUE); 467 } 468 return (B_FALSE); 469 } 470 471 /* 472 * Routing lookup for IPv6 link-locals. 473 * First we look on the inbound interface, then we check for IPMP and 474 * look on the upper interface. 475 * We update ira_ruifindex if we find the IRE on the upper interface. 476 */ 477 static ire_t * 478 ire_linklocal(const in6_addr_t *nexthop, ill_t *ill, ip_recv_attr_t *ira, 479 uint_t irr_flags, ip_stack_t *ipst) 480 { 481 int match_flags = MATCH_IRE_SECATTR | MATCH_IRE_ILL; 482 ire_t *ire; 483 484 ASSERT(IN6_IS_ADDR_LINKLOCAL(nexthop)); 485 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl, 486 match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 487 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 488 !IS_UNDER_IPMP(ill)) 489 return (ire); 490 491 /* 492 * When we are using IMP we need to look for an IRE on both the 493 * under and upper interfaces since there are different 494 * link-local addresses for the under and upper. 495 */ 496 ill = ipmp_ill_hold_ipmp_ill(ill); 497 if (ill == NULL) 498 return (ire); 499 500 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 501 502 ire_refrele(ire); 503 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl, 504 match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 505 ill_refrele(ill); 506 return (ire); 507 } 508 509 /* 510 * This is the tail-end of the full receive side packet handling. 511 * It can be used directly when the configuration is simple. 512 */ 513 void 514 ill_input_short_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg, 515 ip_recv_attr_t *ira, rtc_t *rtc) 516 { 517 ire_t *ire; 518 ill_t *ill = ira->ira_ill; 519 ip_stack_t *ipst = ill->ill_ipst; 520 uint_t pkt_len; 521 ssize_t len; 522 ip6_t *ip6h = (ip6_t *)iph_arg; 523 in6_addr_t nexthop = *(in6_addr_t *)nexthop_arg; 524 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; 525 uint_t irr_flags; 526 #define rptr ((uchar_t *)ip6h) 527 528 ASSERT(DB_TYPE(mp) == M_DATA); 529 530 /* 531 * Check for source/dest being a bad address: loopback, any, or 532 * v4mapped. All of them start with a 64 bits of zero. 533 */ 534 if (ip6h->ip6_src.s6_addr32[0] == 0 && 535 ip6h->ip6_src.s6_addr32[1] == 0) { 536 if (ip6_bad_address(&ip6h->ip6_src, B_TRUE)) { 537 ip1dbg(("ip_input_v6: pkt with bad src addr\n")); 538 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 539 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 540 freemsg(mp); 541 return; 542 } 543 } 544 if (ip6h->ip6_dst.s6_addr32[0] == 0 && 545 ip6h->ip6_dst.s6_addr32[1] == 0) { 546 if (ip6_bad_address(&ip6h->ip6_dst, B_FALSE)) { 547 ip1dbg(("ip_input_v6: pkt with bad dst addr\n")); 548 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 549 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 550 freemsg(mp); 551 return; 552 } 553 } 554 555 len = mp->b_wptr - rptr; 556 pkt_len = ira->ira_pktlen; 557 558 /* multiple mblk or too short */ 559 len -= pkt_len; 560 if (len != 0) { 561 mp = ip_check_length(mp, rptr, len, pkt_len, IPV6_HDR_LEN, ira); 562 if (mp == NULL) 563 return; 564 ip6h = (ip6_t *)mp->b_rptr; 565 } 566 567 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 568 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 569 int, 0); 570 /* 571 * The event for packets being received from a 'physical' 572 * interface is placed after validation of the source and/or 573 * destination address as being local so that packets can be 574 * redirected to loopback addresses using ipnat. 575 */ 576 DTRACE_PROBE4(ip6__physical__in__start, 577 ill_t *, ill, ill_t *, NULL, 578 ip6_t *, ip6h, mblk_t *, mp); 579 580 if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) { 581 int ll_multicast = 0; 582 int error; 583 in6_addr_t orig_dst = ip6h->ip6_dst; 584 585 if (ira->ira_flags & IRAF_L2DST_MULTICAST) 586 ll_multicast = HPE_MULTICAST; 587 else if (ira->ira_flags & IRAF_L2DST_BROADCAST) 588 ll_multicast = HPE_BROADCAST; 589 590 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 591 ipst->ips_ipv6firewall_physical_in, 592 ill, NULL, ip6h, mp, mp, ll_multicast, ipst, error); 593 594 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, mp); 595 596 if (mp == NULL) 597 return; 598 599 /* The length could have changed */ 600 ip6h = (ip6_t *)mp->b_rptr; 601 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 602 pkt_len = ira->ira_pktlen; 603 604 /* 605 * In case the destination changed we override any previous 606 * change to nexthop. 607 */ 608 if (!IN6_ARE_ADDR_EQUAL(&orig_dst, &ip6h->ip6_dst)) 609 nexthop = ip6h->ip6_dst; 610 611 if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) { 612 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 613 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 614 freemsg(mp); 615 return; 616 } 617 618 } 619 620 if (ipst->ips_ip6_observe.he_interested) { 621 zoneid_t dzone; 622 623 /* 624 * On the inbound path the src zone will be unknown as 625 * this packet has come from the wire. 626 */ 627 dzone = ip_get_zoneid_v6(&nexthop, mp, ill, ira, ALL_ZONES); 628 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst); 629 } 630 631 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) != 632 IPV6_DEFAULT_VERS_AND_FLOW) { 633 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 634 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 635 ip_drop_input("ipIfStatsInWrongIPVersion", mp, ill); 636 freemsg(mp); 637 return; 638 } 639 640 /* 641 * For IPv6 we update ira_ip_hdr_length and ira_protocol as 642 * we parse the headers, starting with the hop-by-hop options header. 643 */ 644 ira->ira_ip_hdr_length = IPV6_HDR_LEN; 645 if ((ira->ira_protocol = ip6h->ip6_nxt) == IPPROTO_HOPOPTS) { 646 ip6_hbh_t *hbhhdr; 647 uint_t ehdrlen; 648 uint8_t *optptr; 649 650 if (pkt_len < IPV6_HDR_LEN + MIN_EHDR_LEN) { 651 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 652 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 653 freemsg(mp); 654 return; 655 } 656 if (mp->b_cont != NULL && 657 rptr + IPV6_HDR_LEN + MIN_EHDR_LEN > mp->b_wptr) { 658 ip6h = ip_pullup(mp, IPV6_HDR_LEN + MIN_EHDR_LEN, ira); 659 if (ip6h == NULL) { 660 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 661 ip_drop_input("ipIfStatsInDiscards", mp, ill); 662 freemsg(mp); 663 return; 664 } 665 } 666 hbhhdr = (ip6_hbh_t *)&ip6h[1]; 667 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 668 669 if (pkt_len < IPV6_HDR_LEN + ehdrlen) { 670 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 671 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 672 freemsg(mp); 673 return; 674 } 675 if (mp->b_cont != NULL && 676 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 677 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 678 if (ip6h == NULL) { 679 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 680 ip_drop_input("ipIfStatsInDiscards", mp, ill); 681 freemsg(mp); 682 return; 683 } 684 hbhhdr = (ip6_hbh_t *)&ip6h[1]; 685 } 686 687 /* 688 * Update ira_ip_hdr_length to skip the hop-by-hop header 689 * once we get to ip_fanout_v6 690 */ 691 ira->ira_ip_hdr_length += ehdrlen; 692 ira->ira_protocol = hbhhdr->ip6h_nxt; 693 694 optptr = (uint8_t *)&hbhhdr[1]; 695 switch (ip_process_options_v6(mp, ip6h, optptr, 696 ehdrlen - 2, IPPROTO_HOPOPTS, ira)) { 697 case -1: 698 /* 699 * Packet has been consumed and any 700 * needed ICMP messages sent. 701 */ 702 return; 703 case 0: 704 /* no action needed */ 705 break; 706 case 1: 707 /* 708 * Known router alert. Make use handle it as local 709 * by setting the nexthop to be the all-host multicast 710 * address, and skip multicast membership filter by 711 * marking as a router alert. 712 */ 713 ira->ira_flags |= IRAF_ROUTER_ALERT; 714 nexthop = ipv6_all_hosts_mcast; 715 break; 716 } 717 } 718 719 /* 720 * Here we check to see if we machine is setup as 721 * L3 loadbalancer and if the incoming packet is for a VIP 722 * 723 * Check the following: 724 * - there is at least a rule 725 * - protocol of the packet is supported 726 * 727 * We don't load balance IPv6 link-locals. 728 */ 729 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ira->ira_protocol) && 730 !IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 731 in6_addr_t lb_dst; 732 int lb_ret; 733 734 /* For convenience, we just pull up the mblk. */ 735 if (mp->b_cont != NULL) { 736 if (pullupmsg(mp, -1) == 0) { 737 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 738 ip_drop_input("ipIfStatsInDiscards - pullupmsg", 739 mp, ill); 740 freemsg(mp); 741 return; 742 } 743 ip6h = (ip6_t *)mp->b_rptr; 744 } 745 lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, ira->ira_protocol, 746 (uint8_t *)ip6h + ira->ira_ip_hdr_length, &lb_dst); 747 if (lb_ret == ILB_DROPPED) { 748 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 749 ip_drop_input("ILB_DROPPED", mp, ill); 750 freemsg(mp); 751 return; 752 } 753 if (lb_ret == ILB_BALANCED) { 754 /* Set the dst to that of the chosen server */ 755 nexthop = lb_dst; 756 DB_CKSUMFLAGS(mp) = 0; 757 } 758 } 759 760 if (ill->ill_flags & ILLF_ROUTER) 761 irr_flags = IRR_ALLOCATE; 762 else 763 irr_flags = IRR_NONE; 764 765 /* Can not use route cache with TX since the labels can differ */ 766 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 767 if (IN6_IS_ADDR_MULTICAST(&nexthop)) { 768 ire = ire_multicast(ill); 769 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 770 ire = ire_linklocal(&nexthop, ill, ira, irr_flags, 771 ipst); 772 } else { 773 /* Match destination and label */ 774 ire = ire_route_recursive_v6(&nexthop, 0, NULL, 775 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR, 776 irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, 777 NULL); 778 } 779 /* Update the route cache so we do the ire_refrele */ 780 ASSERT(ire != NULL); 781 if (rtc->rtc_ire != NULL) 782 ire_refrele(rtc->rtc_ire); 783 rtc->rtc_ire = ire; 784 rtc->rtc_ip6addr = nexthop; 785 } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr) && 786 rtc->rtc_ire != NULL) { 787 /* Use the route cache */ 788 ire = rtc->rtc_ire; 789 } else { 790 /* Update the route cache */ 791 if (IN6_IS_ADDR_MULTICAST(&nexthop)) { 792 ire = ire_multicast(ill); 793 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 794 ire = ire_linklocal(&nexthop, ill, ira, irr_flags, 795 ipst); 796 } else { 797 ire = ire_route_recursive_dstonly_v6(&nexthop, 798 irr_flags, ira->ira_xmit_hint, ipst); 799 } 800 ASSERT(ire != NULL); 801 if (rtc->rtc_ire != NULL) 802 ire_refrele(rtc->rtc_ire); 803 rtc->rtc_ire = ire; 804 rtc->rtc_ip6addr = nexthop; 805 } 806 807 ire->ire_ib_pkt_count++; 808 809 /* 810 * Based on ire_type and ire_flags call one of: 811 * ire_recv_local_v6 - for IRE_LOCAL 812 * ire_recv_loopback_v6 - for IRE_LOOPBACK 813 * ire_recv_multirt_v6 - if RTF_MULTIRT 814 * ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE 815 * ire_recv_multicast_v6 - for IRE_MULTICAST 816 * ire_recv_noaccept_v6 - for ire_noaccept ones 817 * ire_recv_forward_v6 - for the rest. 818 */ 819 820 (*ire->ire_recvfn)(ire, mp, ip6h, ira); 821 } 822 #undef rptr 823 824 /* 825 * ire_recvfn for IREs that need forwarding 826 */ 827 void 828 ire_recv_forward_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 829 { 830 ip6_t *ip6h = (ip6_t *)iph_arg; 831 ill_t *ill = ira->ira_ill; 832 ip_stack_t *ipst = ill->ill_ipst; 833 iaflags_t iraflags = ira->ira_flags; 834 ill_t *dst_ill; 835 nce_t *nce; 836 uint32_t added_tx_len; 837 uint32_t mtu, iremtu; 838 839 if (iraflags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 840 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 841 ip_drop_input("l2 multicast not forwarded", mp, ill); 842 freemsg(mp); 843 return; 844 } 845 846 if (!(ill->ill_flags & ILLF_ROUTER)) { 847 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 848 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 849 freemsg(mp); 850 return; 851 } 852 853 /* 854 * Either ire_nce_capable or ire_dep_parent would be set for the IRE 855 * when it is found by ire_route_recursive, but that some other thread 856 * could have changed the routes with the effect of clearing 857 * ire_dep_parent. In that case we'd end up dropping the packet, or 858 * finding a new nce below. 859 * Get, allocate, or update the nce. 860 * We get a refhold on ire_nce_cache as a result of this to avoid races 861 * where ire_nce_cache is deleted. 862 * 863 * This ensures that we don't forward if the interface is down since 864 * ipif_down removes all the nces. 865 */ 866 mutex_enter(&ire->ire_lock); 867 nce = ire->ire_nce_cache; 868 if (nce == NULL) { 869 /* Not yet set up - try to set one up */ 870 mutex_exit(&ire->ire_lock); 871 (void) ire_revalidate_nce(ire); 872 mutex_enter(&ire->ire_lock); 873 nce = ire->ire_nce_cache; 874 if (nce == NULL) { 875 mutex_exit(&ire->ire_lock); 876 /* The ire_dep_parent chain went bad, or no memory */ 877 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 878 ip_drop_input("No ire_dep_parent", mp, ill); 879 freemsg(mp); 880 return; 881 } 882 } 883 nce_refhold(nce); 884 mutex_exit(&ire->ire_lock); 885 886 if (nce->nce_is_condemned) { 887 nce_t *nce1; 888 889 nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_FALSE); 890 nce_refrele(nce); 891 if (nce1 == NULL) { 892 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 893 ip_drop_input("No nce", mp, ill); 894 freemsg(mp); 895 return; 896 } 897 nce = nce1; 898 } 899 dst_ill = nce->nce_ill; 900 901 /* 902 * Unless we are forwarding, drop the packet. 903 * Unlike IPv4 we don't allow source routed packets out the same 904 * interface when we are not a router. 905 * Note that ill_forward_set() will set the ILLF_ROUTER on 906 * all the group members when it gets an ipmp-ill or under-ill. 907 */ 908 if (!(dst_ill->ill_flags & ILLF_ROUTER)) { 909 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 910 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 911 freemsg(mp); 912 nce_refrele(nce); 913 return; 914 } 915 916 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) { 917 ire->ire_ib_pkt_count--; 918 /* 919 * Should only use IREs that are visible from the 920 * global zone for forwarding. 921 * For IPv6 any source route would have already been 922 * advanced in ip_fanout_v6 923 */ 924 ire = ire_route_recursive_v6(&ip6h->ip6_dst, 0, NULL, 925 GLOBAL_ZONEID, ira->ira_tsl, MATCH_IRE_SECATTR, 926 (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE, 927 ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 928 ire->ire_ib_pkt_count++; 929 (*ire->ire_recvfn)(ire, mp, ip6h, ira); 930 ire_refrele(ire); 931 nce_refrele(nce); 932 return; 933 } 934 /* 935 * ipIfStatsHCInForwDatagrams should only be increment if there 936 * will be an attempt to forward the packet, which is why we 937 * increment after the above condition has been checked. 938 */ 939 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 940 941 /* Initiate Read side IPPF processing */ 942 if (IPP_ENABLED(IPP_FWD_IN, ipst)) { 943 /* ip_process translates an IS_UNDER_IPMP */ 944 mp = ip_process(IPP_FWD_IN, mp, ill, ill); 945 if (mp == NULL) { 946 /* ip_drop_packet and MIB done */ 947 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred " 948 "during IPPF processing\n")); 949 nce_refrele(nce); 950 return; 951 } 952 } 953 954 DTRACE_PROBE4(ip6__forwarding__start, 955 ill_t *, ill, ill_t *, dst_ill, ip6_t *, ip6h, mblk_t *, mp); 956 957 if (HOOKS6_INTERESTED_FORWARDING(ipst)) { 958 int error; 959 960 FW_HOOKS(ipst->ips_ip6_forwarding_event, 961 ipst->ips_ipv6firewall_forwarding, 962 ill, dst_ill, ip6h, mp, mp, 0, ipst, error); 963 964 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 965 966 if (mp == NULL) { 967 nce_refrele(nce); 968 return; 969 } 970 /* 971 * Even if the destination was changed by the filter we use the 972 * forwarding decision that was made based on the address 973 * in ip_input. 974 */ 975 976 /* Might have changed */ 977 ip6h = (ip6_t *)mp->b_rptr; 978 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 979 } 980 981 /* Packet is being forwarded. Turning off hwcksum flag. */ 982 DB_CKSUMFLAGS(mp) = 0; 983 984 /* 985 * Per RFC 3513 section 2.5.2, we must not forward packets with 986 * an unspecified source address. 987 * The loopback address check for both src and dst has already 988 * been checked in ip_input_v6 989 * In the future one can envision adding RPF checks using number 3. 990 */ 991 switch (ipst->ips_src_check) { 992 case 0: 993 break; 994 case 1: 995 case 2: 996 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) || 997 IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) { 998 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 999 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1000 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1001 nce_refrele(nce); 1002 freemsg(mp); 1003 return; 1004 } 1005 break; 1006 } 1007 1008 /* 1009 * Check to see if we're forwarding the packet to a 1010 * different link from which it came. If so, check the 1011 * source and destination addresses since routers must not 1012 * forward any packets with link-local source or 1013 * destination addresses to other links. Otherwise (if 1014 * we're forwarding onto the same link), conditionally send 1015 * a redirect message. 1016 */ 1017 if (!IS_ON_SAME_LAN(dst_ill, ill)) { 1018 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 1019 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 1020 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1021 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1022 freemsg(mp); 1023 nce_refrele(nce); 1024 return; 1025 } 1026 /* TBD add site-local check at site boundary? */ 1027 } else if (ipst->ips_ipv6_send_redirects) { 1028 ip_send_potential_redirect_v6(mp, ip6h, ire, ira); 1029 } 1030 1031 added_tx_len = 0; 1032 if (iraflags & IRAF_SYSTEM_LABELED) { 1033 mblk_t *mp1; 1034 uint32_t old_pkt_len = ira->ira_pktlen; 1035 1036 /* 1037 * Check if it can be forwarded and add/remove 1038 * CIPSO options as needed. 1039 */ 1040 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) { 1041 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1042 ip_drop_input("tsol_ip_forward", mp, ill); 1043 freemsg(mp); 1044 nce_refrele(nce); 1045 return; 1046 } 1047 /* 1048 * Size may have changed. Remember amount added in case 1049 * ip_fragment needs to send an ICMP too big. 1050 */ 1051 mp = mp1; 1052 ip6h = (ip6_t *)mp->b_rptr; 1053 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 1054 ira->ira_ip_hdr_length = IPV6_HDR_LEN; 1055 if (ira->ira_pktlen > old_pkt_len) 1056 added_tx_len = ira->ira_pktlen - old_pkt_len; 1057 } 1058 1059 mtu = dst_ill->ill_mtu; 1060 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu) 1061 mtu = iremtu; 1062 ip_forward_xmit_v6(nce, mp, ip6h, ira, mtu, added_tx_len); 1063 nce_refrele(nce); 1064 return; 1065 1066 } 1067 1068 /* 1069 * Used for sending out unicast and multicast packets that are 1070 * forwarded. 1071 */ 1072 void 1073 ip_forward_xmit_v6(nce_t *nce, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira, 1074 uint32_t mtu, uint32_t added_tx_len) 1075 { 1076 ill_t *dst_ill = nce->nce_ill; 1077 uint32_t pkt_len; 1078 iaflags_t iraflags = ira->ira_flags; 1079 ip_stack_t *ipst = dst_ill->ill_ipst; 1080 1081 if (ip6h->ip6_hops-- <= 1) { 1082 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1083 ip_drop_input("ICMP6_TIME_EXCEED_TRANSIT", mp, ira->ira_ill); 1084 icmp_time_exceeded_v6(mp, ICMP6_TIME_EXCEED_TRANSIT, B_FALSE, 1085 ira); 1086 return; 1087 } 1088 1089 /* Initiate Write side IPPF processing before any fragmentation */ 1090 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { 1091 /* ip_process translates an IS_UNDER_IPMP */ 1092 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill); 1093 if (mp == NULL) { 1094 /* ip_drop_packet and MIB done */ 1095 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred" \ 1096 " during IPPF processing\n")); 1097 return; 1098 } 1099 } 1100 1101 pkt_len = ira->ira_pktlen; 1102 1103 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 1104 1105 if (pkt_len > mtu) { 1106 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails); 1107 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill); 1108 if (iraflags & IRAF_SYSTEM_LABELED) { 1109 /* 1110 * Remove any CIPSO option added by 1111 * tsol_ip_forward, and make sure we report 1112 * a path MTU so that there 1113 * is room to add such a CIPSO option for future 1114 * packets. 1115 */ 1116 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, AF_INET6); 1117 } 1118 icmp_pkt2big_v6(mp, mtu, B_TRUE, ira); 1119 return; 1120 } 1121 1122 ASSERT(pkt_len == 1123 ntohs(((ip6_t *)mp->b_rptr)->ip6_plen) + IPV6_HDR_LEN); 1124 1125 if (iraflags & IRAF_LOOPBACK_COPY) { 1126 /* 1127 * IXAF_NO_LOOP_ZONEID is not set hence 6th arg 1128 * is don't care 1129 */ 1130 (void) ip_postfrag_loopcheck(mp, nce, 1131 (IXAF_LOOPBACK_COPY | IXAF_NO_DEV_FLOW_CTL), 1132 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1133 } else { 1134 (void) ip_xmit(mp, nce, IXAF_NO_DEV_FLOW_CTL, 1135 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1136 } 1137 } 1138 1139 /* 1140 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE, 1141 * which is what ire_route_recursive returns when there is no matching ire. 1142 * Send ICMP unreachable unless blackhole. 1143 */ 1144 void 1145 ire_recv_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1146 { 1147 ip6_t *ip6h = (ip6_t *)iph_arg; 1148 ill_t *ill = ira->ira_ill; 1149 ip_stack_t *ipst = ill->ill_ipst; 1150 1151 /* Would we have forwarded this packet if we had a route? */ 1152 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 1153 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1154 ip_drop_input("l2 multicast not forwarded", mp, ill); 1155 freemsg(mp); 1156 return; 1157 } 1158 1159 if (!(ill->ill_flags & ILLF_ROUTER)) { 1160 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1161 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 1162 freemsg(mp); 1163 return; 1164 } 1165 /* 1166 * If we had a route this could have been forwarded. Count as such. 1167 * 1168 * ipIfStatsHCInForwDatagrams should only be increment if there 1169 * will be an attempt to forward the packet, which is why we 1170 * increment after the above condition has been checked. 1171 */ 1172 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 1173 1174 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1175 1176 ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, RTA_DST, 1177 ipst); 1178 1179 if (ire->ire_flags & RTF_BLACKHOLE) { 1180 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill); 1181 freemsg(mp); 1182 } else { 1183 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill); 1184 1185 icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE, 1186 ira); 1187 } 1188 } 1189 1190 /* 1191 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for 1192 * VRRP when in noaccept mode. 1193 * We silently drop packets except for Neighbor Solicitations and 1194 * Neighbor Advertisements. 1195 */ 1196 void 1197 ire_recv_noaccept_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 1198 ip_recv_attr_t *ira) 1199 { 1200 ip6_t *ip6h = (ip6_t *)iph_arg; 1201 ill_t *ill = ira->ira_ill; 1202 icmp6_t *icmp6; 1203 int ip_hdr_length; 1204 1205 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 1206 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1207 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1208 freemsg(mp); 1209 return; 1210 } 1211 ip_hdr_length = ira->ira_ip_hdr_length; 1212 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) { 1213 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) { 1214 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 1215 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 1216 freemsg(mp); 1217 return; 1218 } 1219 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira); 1220 if (ip6h == NULL) { 1221 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1222 freemsg(mp); 1223 return; 1224 } 1225 } 1226 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 1227 1228 if (icmp6->icmp6_type != ND_NEIGHBOR_SOLICIT && 1229 icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) { 1230 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1231 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1232 freemsg(mp); 1233 return; 1234 } 1235 ire_recv_local_v6(ire, mp, ip6h, ira); 1236 } 1237 1238 /* 1239 * ire_recvfn for IRE_MULTICAST. 1240 */ 1241 void 1242 ire_recv_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 1243 ip_recv_attr_t *ira) 1244 { 1245 ip6_t *ip6h = (ip6_t *)iph_arg; 1246 ill_t *ill = ira->ira_ill; 1247 1248 ASSERT(ire->ire_ill == ira->ira_ill); 1249 1250 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 1251 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen); 1252 1253 /* Tag for higher-level protocols */ 1254 ira->ira_flags |= IRAF_MULTICAST; 1255 1256 /* 1257 * So that we don't end up with dups, only one ill an IPMP group is 1258 * nominated to receive multicast traffic. 1259 * If we have no cast_ill we are liberal and accept everything. 1260 */ 1261 if (IS_UNDER_IPMP(ill)) { 1262 ip_stack_t *ipst = ill->ill_ipst; 1263 1264 /* For an under ill_grp can change under lock */ 1265 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1266 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1267 ill->ill_grp->ig_cast_ill != NULL) { 1268 rw_exit(&ipst->ips_ill_g_lock); 1269 ip_drop_input("not on cast ill", mp, ill); 1270 freemsg(mp); 1271 return; 1272 } 1273 rw_exit(&ipst->ips_ill_g_lock); 1274 /* 1275 * We switch to the upper ill so that mrouter and hasmembers 1276 * can operate on upper here and in ip_input_multicast. 1277 */ 1278 ill = ipmp_ill_hold_ipmp_ill(ill); 1279 if (ill != NULL) { 1280 ASSERT(ill != ira->ira_ill); 1281 ASSERT(ire->ire_ill == ira->ira_ill); 1282 ira->ira_ill = ill; 1283 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1284 } else { 1285 ill = ira->ira_ill; 1286 } 1287 } 1288 1289 #ifdef notdef 1290 /* 1291 * Check if we are a multicast router - send ip_mforward a copy of 1292 * the packet. 1293 * Due to mroute_decap tunnels we consider forwarding packets even if 1294 * mrouted has not joined the allmulti group on this interface. 1295 */ 1296 if (ipst->ips_ip_g_mrouter) { 1297 int retval; 1298 1299 /* 1300 * Clear the indication that this may have hardware 1301 * checksum as we are not using it for forwarding. 1302 */ 1303 DB_CKSUMFLAGS(mp) = 0; 1304 1305 /* 1306 * ip_mforward helps us make these distinctions: If received 1307 * on tunnel and not IGMP, then drop. 1308 * If IGMP packet, then don't check membership 1309 * If received on a phyint and IGMP or PIM, then 1310 * don't check membership 1311 */ 1312 retval = ip_mforward_v6(mp, ira); 1313 /* ip_mforward updates mib variables if needed */ 1314 1315 switch (retval) { 1316 case 0: 1317 /* 1318 * pkt is okay and arrived on phyint. 1319 */ 1320 break; 1321 case -1: 1322 /* pkt is mal-formed, toss it */ 1323 freemsg(mp); 1324 goto done; 1325 case 1: 1326 /* 1327 * pkt is okay and arrived on a tunnel 1328 * 1329 * If we are running a multicast router 1330 * we need to see all mld packets, which 1331 * are marked with router alerts. 1332 */ 1333 if (ira->ira_flags & IRAF_ROUTER_ALERT) 1334 goto forus; 1335 ip_drop_input("Multicast on tunnel ignored", mp, ill); 1336 freemsg(mp); 1337 goto done; 1338 } 1339 } 1340 #endif /* notdef */ 1341 1342 /* 1343 * If this was a router alert we skip the group membership check. 1344 */ 1345 if (ira->ira_flags & IRAF_ROUTER_ALERT) 1346 goto forus; 1347 1348 /* 1349 * Check if we have members on this ill. This is not necessary for 1350 * correctness because even if the NIC/GLD had a leaky filter, we 1351 * filter before passing to each conn_t. 1352 */ 1353 if (!ill_hasmembers_v6(ill, &ip6h->ip6_dst)) { 1354 /* 1355 * Nobody interested 1356 * 1357 * This might just be caused by the fact that 1358 * multiple IP Multicast addresses map to the same 1359 * link layer multicast - no need to increment counter! 1360 */ 1361 ip_drop_input("Multicast with no members", mp, ill); 1362 freemsg(mp); 1363 goto done; 1364 } 1365 forus: 1366 ip2dbg(("ire_recv_multicast_v6: multicast for us\n")); 1367 1368 /* 1369 * After reassembly and IPsec we will need to duplicate the 1370 * multicast packet for all matching zones on the ill. 1371 */ 1372 ira->ira_zoneid = ALL_ZONES; 1373 1374 /* Reassemble on the ill on which the packet arrived */ 1375 ip_input_local_v6(ire, mp, ip6h, ira); 1376 done: 1377 if (ill != ire->ire_ill) { 1378 ill_refrele(ill); 1379 ira->ira_ill = ire->ire_ill; 1380 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 1381 } 1382 } 1383 1384 /* 1385 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT. 1386 * Drop packets since we don't forward out multirt routes. 1387 */ 1388 /* ARGSUSED */ 1389 void 1390 ire_recv_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1391 { 1392 ill_t *ill = ira->ira_ill; 1393 1394 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1395 ip_drop_input("Not forwarding out MULTIRT", mp, ill); 1396 freemsg(mp); 1397 } 1398 1399 /* 1400 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK 1401 * has rewritten the packet to have a loopback destination address (We 1402 * filter out packet with a loopback destination from arriving over the wire). 1403 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID. 1404 */ 1405 void 1406 ire_recv_loopback_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1407 { 1408 ip6_t *ip6h = (ip6_t *)iph_arg; 1409 ill_t *ill = ira->ira_ill; 1410 ill_t *ire_ill = ire->ire_ill; 1411 1412 ira->ira_zoneid = GLOBAL_ZONEID; 1413 1414 /* Switch to the lo0 ill for further processing */ 1415 if (ire_ill != ill) { 1416 /* 1417 * Update ira_ill to be the ILL on which the IP address 1418 * is hosted. 1419 * No need to hold the ill since we have a hold on the ire 1420 */ 1421 ASSERT(ira->ira_ill == ira->ira_rill); 1422 ira->ira_ill = ire_ill; 1423 1424 ip_input_local_v6(ire, mp, ip6h, ira); 1425 1426 /* Restore */ 1427 ASSERT(ira->ira_ill == ire_ill); 1428 ira->ira_ill = ill; 1429 return; 1430 1431 } 1432 ip_input_local_v6(ire, mp, ip6h, ira); 1433 } 1434 1435 /* 1436 * ire_recvfn for IRE_LOCAL. 1437 */ 1438 void 1439 ire_recv_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1440 { 1441 ip6_t *ip6h = (ip6_t *)iph_arg; 1442 ill_t *ill = ira->ira_ill; 1443 ill_t *ire_ill = ire->ire_ill; 1444 1445 /* Make a note for DAD that this address is in use */ 1446 ire->ire_last_used_time = LBOLT_FASTPATH; 1447 1448 /* Only target the IRE_LOCAL with the right zoneid. */ 1449 ira->ira_zoneid = ire->ire_zoneid; 1450 1451 /* 1452 * If the packet arrived on the wrong ill, we check that 1453 * this is ok. 1454 * If it is, then we ensure that we do the reassembly on 1455 * the ill on which the address is hosted. We keep ira_rill as 1456 * the one on which the packet arrived, so that IP_PKTINFO and 1457 * friends can report this. 1458 */ 1459 if (ire_ill != ill) { 1460 ire_t *new_ire; 1461 1462 new_ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill); 1463 if (new_ire == NULL) { 1464 /* Drop packet */ 1465 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1466 ip_drop_input("ipIfStatsInForwProhibits", mp, ill); 1467 freemsg(mp); 1468 return; 1469 } 1470 /* 1471 * Update ira_ill to be the ILL on which the IP address 1472 * is hosted. No need to hold the ill since we have a 1473 * hold on the ire. Note that we do the switch even if 1474 * new_ire == ire (for IPMP, ire would be the one corresponding 1475 * to the IPMP ill). 1476 */ 1477 ASSERT(ira->ira_ill == ira->ira_rill); 1478 ira->ira_ill = new_ire->ire_ill; 1479 1480 /* ira_ruifindex tracks the upper for ira_rill */ 1481 if (IS_UNDER_IPMP(ill)) 1482 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1483 1484 ip_input_local_v6(new_ire, mp, ip6h, ira); 1485 1486 /* Restore */ 1487 ASSERT(ira->ira_ill == new_ire->ire_ill); 1488 ira->ira_ill = ill; 1489 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1490 1491 if (new_ire != ire) 1492 ire_refrele(new_ire); 1493 return; 1494 } 1495 1496 ip_input_local_v6(ire, mp, ip6h, ira); 1497 } 1498 1499 /* 1500 * Common function for packets arriving for the host. Handles 1501 * checksum verification, reassembly checks, etc. 1502 */ 1503 static void 1504 ip_input_local_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1505 { 1506 iaflags_t iraflags = ira->ira_flags; 1507 1508 /* 1509 * For multicast we need some extra work before 1510 * we call ip_fanout_v6(), since in the case of shared-IP zones 1511 * we need to pretend that a packet arrived for each zoneid. 1512 */ 1513 if (iraflags & IRAF_MULTICAST) { 1514 ip_input_multicast_v6(ire, mp, ip6h, ira); 1515 return; 1516 } 1517 ip_fanout_v6(mp, ip6h, ira); 1518 } 1519 1520 /* 1521 * Handle multiple zones which want to receive the same multicast packets 1522 * on this ill by delivering a packet to each of them. 1523 * 1524 * Note that for packets delivered to transports we could instead do this 1525 * as part of the fanout code, but since we need to handle icmp_inbound 1526 * it is simpler to have multicast work the same as IPv4 broadcast. 1527 * 1528 * The ip_fanout matching for multicast matches based on ilm independent of 1529 * zoneid since the zoneid restriction is applied when joining a multicast 1530 * group. 1531 */ 1532 /* ARGSUSED */ 1533 static void 1534 ip_input_multicast_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1535 { 1536 ill_t *ill = ira->ira_ill; 1537 iaflags_t iraflags = ira->ira_flags; 1538 ip_stack_t *ipst = ill->ill_ipst; 1539 netstack_t *ns = ipst->ips_netstack; 1540 zoneid_t zoneid; 1541 mblk_t *mp1; 1542 ip6_t *ip6h1; 1543 uint_t ira_pktlen = ira->ira_pktlen; 1544 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1545 1546 /* ire_recv_multicast has switched to the upper ill for IPMP */ 1547 ASSERT(!IS_UNDER_IPMP(ill)); 1548 1549 /* 1550 * If we don't have more than one shared-IP zone, or if 1551 * there are no members in anything but the global zone, 1552 * then just set the zoneid and proceed. 1553 */ 1554 if (ns->netstack_numzones == 1 || 1555 !ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst, 1556 GLOBAL_ZONEID)) { 1557 ira->ira_zoneid = GLOBAL_ZONEID; 1558 1559 /* If sender didn't want this zone to receive it, drop */ 1560 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1561 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1562 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1563 freemsg(mp); 1564 return; 1565 } 1566 ip_fanout_v6(mp, ip6h, ira); 1567 return; 1568 } 1569 1570 /* 1571 * Here we loop over all zoneids that have members in the group 1572 * and deliver a packet to ip_fanout for each zoneid. 1573 * 1574 * First find any members in the lowest numeric zoneid by looking for 1575 * first zoneid larger than -1 (ALL_ZONES). 1576 * We terminate the loop when we receive -1 (ALL_ZONES). 1577 */ 1578 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 1579 for (; zoneid != ALL_ZONES; 1580 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, zoneid)) { 1581 /* 1582 * Avoid an extra copymsg/freemsg by skipping global zone here 1583 * and doing that at the end. 1584 */ 1585 if (zoneid == GLOBAL_ZONEID) 1586 continue; 1587 1588 ira->ira_zoneid = zoneid; 1589 1590 /* If sender didn't want this zone to receive it, skip */ 1591 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1592 ira->ira_no_loop_zoneid == ira->ira_zoneid) 1593 continue; 1594 1595 mp1 = copymsg(mp); 1596 if (mp1 == NULL) { 1597 /* Failed to deliver to one zone */ 1598 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1599 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1600 continue; 1601 } 1602 ip6h1 = (ip6_t *)mp1->b_rptr; 1603 ip_fanout_v6(mp1, ip6h1, ira); 1604 /* 1605 * IPsec might have modified ira_pktlen and ira_ip_hdr_length 1606 * so we restore them for a potential next iteration 1607 */ 1608 ira->ira_pktlen = ira_pktlen; 1609 ira->ira_ip_hdr_length = ira_ip_hdr_length; 1610 } 1611 1612 /* Do the main ire */ 1613 ira->ira_zoneid = GLOBAL_ZONEID; 1614 /* If sender didn't want this zone to receive it, drop */ 1615 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1616 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1617 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1618 freemsg(mp); 1619 } else { 1620 ip_fanout_v6(mp, ip6h, ira); 1621 } 1622 } 1623 1624 1625 /* 1626 * Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions 1627 * is in use. Updates ira_zoneid and ira_flags as a result. 1628 */ 1629 static void 1630 ip_fanout_tx_v6(mblk_t *mp, ip6_t *ip6h, uint8_t protocol, uint_t ip_hdr_length, 1631 ip_recv_attr_t *ira) 1632 { 1633 uint16_t *up; 1634 uint16_t lport; 1635 zoneid_t zoneid; 1636 1637 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED); 1638 1639 /* 1640 * If the packet is unlabeled we might allow read-down 1641 * for MAC_EXEMPT. Below we clear this if it is a multi-level 1642 * port (MLP). 1643 * Note that ira_tsl can be NULL here. 1644 */ 1645 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED) 1646 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE; 1647 1648 if (ira->ira_zoneid != ALL_ZONES) 1649 return; 1650 1651 ira->ira_flags |= IRAF_TX_SHARED_ADDR; 1652 1653 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length); 1654 switch (protocol) { 1655 case IPPROTO_TCP: 1656 case IPPROTO_SCTP: 1657 case IPPROTO_UDP: 1658 /* Caller ensures this */ 1659 ASSERT(((uchar_t *)ip6h) + ip_hdr_length +4 <= mp->b_wptr); 1660 1661 /* 1662 * Only these transports support MLP. 1663 * We know their destination port numbers is in 1664 * the same place in the header. 1665 */ 1666 lport = up[1]; 1667 1668 /* 1669 * No need to handle exclusive-stack zones 1670 * since ALL_ZONES only applies to the shared IP instance. 1671 */ 1672 zoneid = tsol_mlp_findzone(protocol, lport); 1673 /* 1674 * If no shared MLP is found, tsol_mlp_findzone returns 1675 * ALL_ZONES. In that case, we assume it's SLP, and 1676 * search for the zone based on the packet label. 1677 * 1678 * If there is such a zone, we prefer to find a 1679 * connection in it. Otherwise, we look for a 1680 * MAC-exempt connection in any zone whose label 1681 * dominates the default label on the packet. 1682 */ 1683 if (zoneid == ALL_ZONES) 1684 zoneid = tsol_attr_to_zoneid(ira); 1685 else 1686 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE; 1687 break; 1688 default: 1689 /* Handle shared address for other protocols */ 1690 zoneid = tsol_attr_to_zoneid(ira); 1691 break; 1692 } 1693 ira->ira_zoneid = zoneid; 1694 } 1695 1696 /* 1697 * Increment checksum failure statistics 1698 */ 1699 static void 1700 ip_input_cksum_err_v6(uint8_t protocol, uint16_t hck_flags, ill_t *ill) 1701 { 1702 ip_stack_t *ipst = ill->ill_ipst; 1703 1704 switch (protocol) { 1705 case IPPROTO_TCP: 1706 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 1707 1708 if (hck_flags & HCK_FULLCKSUM) 1709 IP6_STAT(ipst, ip6_tcp_in_full_hw_cksum_err); 1710 else if (hck_flags & HCK_PARTIALCKSUM) 1711 IP6_STAT(ipst, ip6_tcp_in_part_hw_cksum_err); 1712 else 1713 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 1714 break; 1715 case IPPROTO_UDP: 1716 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 1717 if (hck_flags & HCK_FULLCKSUM) 1718 IP6_STAT(ipst, ip6_udp_in_full_hw_cksum_err); 1719 else if (hck_flags & HCK_PARTIALCKSUM) 1720 IP6_STAT(ipst, ip6_udp_in_part_hw_cksum_err); 1721 else 1722 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 1723 break; 1724 case IPPROTO_ICMPV6: 1725 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 1726 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1727 break; 1728 default: 1729 ASSERT(0); 1730 break; 1731 } 1732 } 1733 1734 /* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */ 1735 uint32_t 1736 ip_input_cksum_pseudo_v6(ip6_t *ip6h, ip_recv_attr_t *ira) 1737 { 1738 uint_t ulp_len; 1739 uint32_t cksum; 1740 uint8_t protocol = ira->ira_protocol; 1741 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1742 1743 #define iphs ((uint16_t *)ip6h) 1744 1745 switch (protocol) { 1746 case IPPROTO_TCP: 1747 ulp_len = ira->ira_pktlen - ip_hdr_length; 1748 1749 /* Protocol and length */ 1750 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP; 1751 /* IP addresses */ 1752 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1753 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1754 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1755 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1756 break; 1757 1758 case IPPROTO_UDP: { 1759 udpha_t *udpha; 1760 1761 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length); 1762 1763 /* Protocol and length */ 1764 cksum = udpha->uha_length + IP_UDP_CSUM_COMP; 1765 /* IP addresses */ 1766 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1767 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1768 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1769 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1770 break; 1771 } 1772 case IPPROTO_ICMPV6: 1773 ulp_len = ira->ira_pktlen - ip_hdr_length; 1774 1775 /* Protocol and length */ 1776 cksum = htons(ulp_len) + IP_ICMPV6_CSUM_COMP; 1777 /* IP addresses */ 1778 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1779 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1780 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1781 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1782 break; 1783 default: 1784 cksum = 0; 1785 break; 1786 } 1787 #undef iphs 1788 return (cksum); 1789 } 1790 1791 1792 /* 1793 * Software verification of the ULP checksums. 1794 * Returns B_TRUE if ok. 1795 * Increments statistics of failed. 1796 */ 1797 static boolean_t 1798 ip_input_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1799 { 1800 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 1801 uint32_t cksum; 1802 uint8_t protocol = ira->ira_protocol; 1803 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1804 1805 IP6_STAT(ipst, ip6_in_sw_cksum); 1806 1807 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 1808 protocol == IPPROTO_ICMPV6); 1809 1810 cksum = ip_input_cksum_pseudo_v6(ip6h, ira); 1811 cksum = IP_CSUM(mp, ip_hdr_length, cksum); 1812 if (cksum == 0) 1813 return (B_TRUE); 1814 1815 ip_input_cksum_err_v6(protocol, 0, ira->ira_ill); 1816 return (B_FALSE); 1817 } 1818 1819 /* 1820 * Verify the ULP checksums. 1821 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum 1822 * algorithm. 1823 * Increments statistics if failed. 1824 */ 1825 static boolean_t 1826 ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h, 1827 ip_recv_attr_t *ira) 1828 { 1829 ill_t *ill = ira->ira_rill; 1830 uint16_t hck_flags; 1831 uint32_t cksum; 1832 mblk_t *mp1; 1833 uint_t len; 1834 uint8_t protocol = ira->ira_protocol; 1835 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1836 1837 1838 switch (protocol) { 1839 case IPPROTO_TCP: 1840 case IPPROTO_ICMPV6: 1841 break; 1842 1843 case IPPROTO_UDP: { 1844 udpha_t *udpha; 1845 1846 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length); 1847 /* 1848 * Before going through the regular checksum 1849 * calculation, make sure the received checksum 1850 * is non-zero. RFC 2460 says, a 0x0000 checksum 1851 * in a UDP packet (within IPv6 packet) is invalid 1852 * and should be replaced by 0xffff. This makes 1853 * sense as regular checksum calculation will 1854 * pass for both the cases i.e. 0x0000 and 0xffff. 1855 * Removing one of the case makes error detection 1856 * stronger. 1857 */ 1858 if (udpha->uha_checksum == 0) { 1859 /* 0x0000 checksum is invalid */ 1860 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 1861 return (B_FALSE); 1862 } 1863 break; 1864 } 1865 case IPPROTO_SCTP: { 1866 sctp_hdr_t *sctph; 1867 uint32_t pktsum; 1868 1869 sctph = (sctp_hdr_t *)((uchar_t *)ip6h + ip_hdr_length); 1870 #ifdef DEBUG 1871 if (skip_sctp_cksum) 1872 return (B_TRUE); 1873 #endif 1874 pktsum = sctph->sh_chksum; 1875 sctph->sh_chksum = 0; 1876 cksum = sctp_cksum(mp, ip_hdr_length); 1877 sctph->sh_chksum = pktsum; 1878 if (cksum == pktsum) 1879 return (B_TRUE); 1880 1881 /* 1882 * Defer until later whether a bad checksum is ok 1883 * in order to allow RAW sockets to use Adler checksum 1884 * with SCTP. 1885 */ 1886 ira->ira_flags |= IRAF_SCTP_CSUM_ERR; 1887 return (B_TRUE); 1888 } 1889 1890 default: 1891 /* No ULP checksum to verify. */ 1892 return (B_TRUE); 1893 } 1894 1895 /* 1896 * Revert to software checksum calculation if the interface 1897 * isn't capable of checksum offload. 1898 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout. 1899 * Note: IRAF_NO_HW_CKSUM is not currently used. 1900 */ 1901 ASSERT(!IS_IPMP(ill)); 1902 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 1903 !dohwcksum) { 1904 return (ip_input_sw_cksum_v6(mp, ip6h, ira)); 1905 } 1906 1907 /* 1908 * We apply this for all ULP protocols. Does the HW know to 1909 * not set the flags for SCTP and other protocols. 1910 */ 1911 1912 hck_flags = DB_CKSUMFLAGS(mp); 1913 1914 if (hck_flags & HCK_FULLCKSUM_OK) { 1915 /* 1916 * Hardware has already verified the checksum. 1917 */ 1918 return (B_TRUE); 1919 } 1920 1921 if (hck_flags & HCK_FULLCKSUM) { 1922 /* 1923 * Full checksum has been computed by the hardware 1924 * and has been attached. If the driver wants us to 1925 * verify the correctness of the attached value, in 1926 * order to protect against faulty hardware, compare 1927 * it against -0 (0xFFFF) to see if it's valid. 1928 */ 1929 cksum = DB_CKSUM16(mp); 1930 if (cksum == 0xFFFF) 1931 return (B_TRUE); 1932 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill); 1933 return (B_FALSE); 1934 } 1935 1936 mp1 = mp->b_cont; 1937 if ((hck_flags & HCK_PARTIALCKSUM) && 1938 (mp1 == NULL || mp1->b_cont == NULL) && 1939 ip_hdr_length >= DB_CKSUMSTART(mp) && 1940 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) { 1941 uint32_t adj; 1942 uchar_t *cksum_start; 1943 1944 cksum = ip_input_cksum_pseudo_v6(ip6h, ira); 1945 1946 cksum_start = ((uchar_t *)ip6h + DB_CKSUMSTART(mp)); 1947 1948 /* 1949 * Partial checksum has been calculated by hardware 1950 * and attached to the packet; in addition, any 1951 * prepended extraneous data is even byte aligned, 1952 * and there are at most two mblks associated with 1953 * the packet. If any such data exists, we adjust 1954 * the checksum; also take care any postpended data. 1955 */ 1956 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj); 1957 /* 1958 * One's complement subtract extraneous checksum 1959 */ 1960 cksum += DB_CKSUM16(mp); 1961 if (adj >= cksum) 1962 cksum = ~(adj - cksum) & 0xFFFF; 1963 else 1964 cksum -= adj; 1965 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 1966 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 1967 if (!(~cksum & 0xFFFF)) 1968 return (B_TRUE); 1969 1970 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill); 1971 return (B_FALSE); 1972 } 1973 return (ip_input_sw_cksum_v6(mp, ip6h, ira)); 1974 } 1975 1976 1977 /* 1978 * Handle fanout of received packets. 1979 * Unicast packets that are looped back (from ire_send_local_v6) and packets 1980 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM. 1981 * 1982 * IPQoS Notes 1983 * Before sending it to the client, invoke IPPF processing. Policy processing 1984 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled. 1985 */ 1986 void 1987 ip_fanout_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1988 { 1989 ill_t *ill = ira->ira_ill; 1990 iaflags_t iraflags = ira->ira_flags; 1991 ip_stack_t *ipst = ill->ill_ipst; 1992 uint8_t protocol; 1993 conn_t *connp; 1994 #define rptr ((uchar_t *)ip6h) 1995 uint_t ip_hdr_length; 1996 uint_t min_ulp_header_length; 1997 int offset; 1998 ssize_t len; 1999 netstack_t *ns = ipst->ips_netstack; 2000 ipsec_stack_t *ipss = ns->netstack_ipsec; 2001 ill_t *rill = ira->ira_rill; 2002 2003 ASSERT(ira->ira_pktlen == ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 2004 2005 /* 2006 * We repeat this as we parse over destination options header and 2007 * fragment headers (earlier we've handled any hop-by-hop options 2008 * header.) 2009 * We update ira_protocol and ira_ip_hdr_length as we skip past 2010 * the intermediate headers; they already point past any 2011 * hop-by-hop header. 2012 */ 2013 repeat: 2014 protocol = ira->ira_protocol; 2015 ip_hdr_length = ira->ira_ip_hdr_length; 2016 2017 /* 2018 * Time for IPP once we've done reassembly and IPsec. 2019 * We skip this for loopback packets since we don't do IPQoS 2020 * on loopback. 2021 */ 2022 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 2023 !(iraflags & IRAF_LOOPBACK) && 2024 (protocol != IPPROTO_ESP && protocol != IPPROTO_AH && 2025 protocol != IPPROTO_DSTOPTS && protocol != IPPROTO_ROUTING && 2026 protocol != IPPROTO_FRAGMENT)) { 2027 /* 2028 * Use the interface on which the packet arrived - not where 2029 * the IP address is hosted. 2030 */ 2031 /* ip_process translates an IS_UNDER_IPMP */ 2032 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill); 2033 if (mp == NULL) { 2034 /* ip_drop_packet and MIB done */ 2035 return; 2036 } 2037 } 2038 2039 /* Determine the minimum required size of the upper-layer header */ 2040 /* Need to do this for at least the set of ULPs that TX handles. */ 2041 switch (protocol) { 2042 case IPPROTO_TCP: 2043 min_ulp_header_length = TCP_MIN_HEADER_LENGTH; 2044 break; 2045 case IPPROTO_SCTP: 2046 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH; 2047 break; 2048 case IPPROTO_UDP: 2049 min_ulp_header_length = UDPH_SIZE; 2050 break; 2051 case IPPROTO_ICMP: 2052 case IPPROTO_ICMPV6: 2053 min_ulp_header_length = ICMPH_SIZE; 2054 break; 2055 case IPPROTO_FRAGMENT: 2056 case IPPROTO_DSTOPTS: 2057 case IPPROTO_ROUTING: 2058 min_ulp_header_length = MIN_EHDR_LEN; 2059 break; 2060 default: 2061 min_ulp_header_length = 0; 2062 break; 2063 } 2064 /* Make sure we have the min ULP header length */ 2065 len = mp->b_wptr - rptr; 2066 if (len < ip_hdr_length + min_ulp_header_length) { 2067 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length) 2068 goto pkt_too_short; 2069 2070 IP6_STAT(ipst, ip6_recv_pullup); 2071 ip6h = ip_pullup(mp, ip_hdr_length + min_ulp_header_length, 2072 ira); 2073 if (ip6h == NULL) 2074 goto discard; 2075 len = mp->b_wptr - rptr; 2076 } 2077 2078 /* 2079 * If trusted extensions then determine the zoneid and TX specific 2080 * ira_flags. 2081 */ 2082 if (iraflags & IRAF_SYSTEM_LABELED) { 2083 /* This can update ira->ira_flags and ira->ira_zoneid */ 2084 ip_fanout_tx_v6(mp, ip6h, protocol, ip_hdr_length, ira); 2085 iraflags = ira->ira_flags; 2086 } 2087 2088 2089 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */ 2090 if (iraflags & IRAF_VERIFY_ULP_CKSUM) { 2091 if (!ip_input_cksum_v6(iraflags, mp, ip6h, ira)) { 2092 /* Bad checksum. Stats are already incremented */ 2093 ip_drop_input("Bad ULP checksum", mp, ill); 2094 freemsg(mp); 2095 return; 2096 } 2097 /* IRAF_SCTP_CSUM_ERR could have been set */ 2098 iraflags = ira->ira_flags; 2099 } 2100 switch (protocol) { 2101 case IPPROTO_TCP: 2102 /* For TCP, discard multicast packets. */ 2103 if (iraflags & IRAF_MULTIBROADCAST) 2104 goto discard; 2105 2106 /* First mblk contains IP+TCP headers per above check */ 2107 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH); 2108 2109 /* TCP options present? */ 2110 offset = ((uchar_t *)ip6h)[ip_hdr_length + 12] >> 4; 2111 if (offset != 5) { 2112 if (offset < 5) 2113 goto discard; 2114 2115 /* 2116 * There must be TCP options. 2117 * Make sure we can grab them. 2118 */ 2119 offset <<= 2; 2120 offset += ip_hdr_length; 2121 if (len < offset) { 2122 if (ira->ira_pktlen < offset) 2123 goto pkt_too_short; 2124 2125 IP6_STAT(ipst, ip6_recv_pullup); 2126 ip6h = ip_pullup(mp, offset, ira); 2127 if (ip6h == NULL) 2128 goto discard; 2129 len = mp->b_wptr - rptr; 2130 } 2131 } 2132 2133 /* 2134 * Pass up a squeue hint to tcp. 2135 * If ira_sqp is already set (this is loopback) we leave it 2136 * alone. 2137 */ 2138 if (ira->ira_sqp == NULL) { 2139 ira->ira_sqp = ip_squeue_get(ira->ira_ring); 2140 } 2141 2142 /* Look for AF_INET or AF_INET6 that matches */ 2143 connp = ipcl_classify_v6(mp, IPPROTO_TCP, ip_hdr_length, 2144 ira, ipst); 2145 if (connp == NULL) { 2146 /* Send the TH_RST */ 2147 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2148 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2149 return; 2150 } 2151 if (connp->conn_incoming_ifindex != 0 && 2152 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2153 CONN_DEC_REF(connp); 2154 2155 /* Send the TH_RST */ 2156 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2157 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2158 return; 2159 } 2160 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 2161 (iraflags & IRAF_IPSEC_SECURE)) { 2162 mp = ipsec_check_inbound_policy(mp, connp, 2163 NULL, ip6h, ira); 2164 if (mp == NULL) { 2165 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2166 /* Note that mp is NULL */ 2167 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2168 CONN_DEC_REF(connp); 2169 return; 2170 } 2171 } 2172 /* Found a client; up it goes */ 2173 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2174 ira->ira_ill = ira->ira_rill = NULL; 2175 if (!IPCL_IS_TCP(connp)) { 2176 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 2177 (connp->conn_recv)(connp, mp, NULL, ira); 2178 CONN_DEC_REF(connp); 2179 ira->ira_ill = ill; 2180 ira->ira_rill = rill; 2181 return; 2182 } 2183 2184 /* 2185 * We do different processing whether called from 2186 * ip_accept_tcp and we match the target, don't match 2187 * the target, and when we are called by ip_input. 2188 */ 2189 if (iraflags & IRAF_TARGET_SQP) { 2190 if (ira->ira_target_sqp == connp->conn_sqp) { 2191 mblk_t *attrmp; 2192 2193 attrmp = ip_recv_attr_to_mblk(ira); 2194 if (attrmp == NULL) { 2195 BUMP_MIB(ill->ill_ip_mib, 2196 ipIfStatsInDiscards); 2197 ip_drop_input("ipIfStatsInDiscards", 2198 mp, ill); 2199 freemsg(mp); 2200 CONN_DEC_REF(connp); 2201 } else { 2202 SET_SQUEUE(attrmp, connp->conn_recv, 2203 connp); 2204 attrmp->b_cont = mp; 2205 ASSERT(ira->ira_target_sqp_mp == NULL); 2206 ira->ira_target_sqp_mp = attrmp; 2207 /* 2208 * Conn ref release when drained from 2209 * the squeue. 2210 */ 2211 } 2212 } else { 2213 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 2214 connp->conn_recv, connp, ira, SQ_FILL, 2215 SQTAG_IP6_TCP_INPUT); 2216 } 2217 } else { 2218 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, 2219 connp, ira, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 2220 } 2221 ira->ira_ill = ill; 2222 ira->ira_rill = rill; 2223 return; 2224 2225 case IPPROTO_SCTP: { 2226 sctp_hdr_t *sctph; 2227 uint32_t ports; /* Source and destination ports */ 2228 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp; 2229 2230 /* For SCTP, discard multicast packets. */ 2231 if (iraflags & IRAF_MULTIBROADCAST) 2232 goto discard; 2233 2234 /* 2235 * Since there is no SCTP h/w cksum support yet, just 2236 * clear the flag. 2237 */ 2238 DB_CKSUMFLAGS(mp) = 0; 2239 2240 /* Length ensured above */ 2241 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH); 2242 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length); 2243 2244 /* get the ports */ 2245 ports = *(uint32_t *)&sctph->sh_sport; 2246 2247 if (iraflags & IRAF_SCTP_CSUM_ERR) { 2248 /* 2249 * No potential sctp checksum errors go to the Sun 2250 * sctp stack however they might be Adler-32 summed 2251 * packets a userland stack bound to a raw IP socket 2252 * could reasonably use. Note though that Adler-32 is 2253 * a long deprecated algorithm and customer sctp 2254 * networks should eventually migrate to CRC-32 at 2255 * which time this facility should be removed. 2256 */ 2257 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2258 return; 2259 } 2260 connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, ports, 2261 ira, mp, sctps, sctph); 2262 if (connp == NULL) { 2263 /* Check for raw socket or OOTB handling */ 2264 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2265 return; 2266 } 2267 if (connp->conn_incoming_ifindex != 0 && 2268 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2269 CONN_DEC_REF(connp); 2270 2271 /* Check for raw socket or OOTB handling */ 2272 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2273 return; 2274 } 2275 2276 /* Found a client; up it goes */ 2277 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2278 sctp_input(connp, NULL, ip6h, mp, ira); 2279 /* sctp_input does a rele of the sctp_t */ 2280 return; 2281 } 2282 2283 case IPPROTO_UDP: 2284 /* First mblk contains IP+UDP headers as checked above */ 2285 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE); 2286 2287 if (iraflags & IRAF_MULTIBROADCAST) { 2288 uint16_t *up; /* Pointer to ports in ULP header */ 2289 2290 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length); 2291 2292 ip_fanout_udp_multi_v6(mp, ip6h, up[1], up[0], ira); 2293 return; 2294 } 2295 2296 /* Look for AF_INET or AF_INET6 that matches */ 2297 connp = ipcl_classify_v6(mp, IPPROTO_UDP, ip_hdr_length, 2298 ira, ipst); 2299 if (connp == NULL) { 2300 no_udp_match: 2301 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP]. 2302 connf_head != NULL) { 2303 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2304 ip_fanout_proto_v6(mp, ip6h, ira); 2305 } else { 2306 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH, 2307 ICMP6_DST_UNREACH_NOPORT, ira); 2308 } 2309 return; 2310 2311 } 2312 if (connp->conn_incoming_ifindex != 0 && 2313 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2314 CONN_DEC_REF(connp); 2315 goto no_udp_match; 2316 } 2317 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld : 2318 !canputnext(connp->conn_rq)) { 2319 CONN_DEC_REF(connp); 2320 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 2321 ip_drop_input("udpIfStatsInOverflows", mp, ill); 2322 freemsg(mp); 2323 return; 2324 } 2325 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 2326 (iraflags & IRAF_IPSEC_SECURE)) { 2327 mp = ipsec_check_inbound_policy(mp, connp, 2328 NULL, ip6h, ira); 2329 if (mp == NULL) { 2330 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2331 /* Note that mp is NULL */ 2332 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2333 CONN_DEC_REF(connp); 2334 return; 2335 } 2336 } 2337 2338 /* Found a client; up it goes */ 2339 IP6_STAT(ipst, ip6_udp_fannorm); 2340 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2341 ira->ira_ill = ira->ira_rill = NULL; 2342 (connp->conn_recv)(connp, mp, NULL, ira); 2343 CONN_DEC_REF(connp); 2344 ira->ira_ill = ill; 2345 ira->ira_rill = rill; 2346 return; 2347 default: 2348 break; 2349 } 2350 2351 /* 2352 * Clear hardware checksumming flag as it is currently only 2353 * used by TCP and UDP. 2354 */ 2355 DB_CKSUMFLAGS(mp) = 0; 2356 2357 switch (protocol) { 2358 case IPPROTO_ICMPV6: 2359 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 2360 2361 /* Check variable for testing applications */ 2362 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 2363 ip_drop_input("ipv6_drop_inbound_icmpv6", mp, ill); 2364 freemsg(mp); 2365 return; 2366 } 2367 /* 2368 * We need to accomodate icmp messages coming in clear 2369 * until we get everything secure from the wire. If 2370 * icmp_accept_clear_messages is zero we check with 2371 * the global policy and act accordingly. If it is 2372 * non-zero, we accept the message without any checks. 2373 * But *this does not mean* that this will be delivered 2374 * to RAW socket clients. By accepting we might send 2375 * replies back, change our MTU value etc., 2376 * but delivery to the ULP/clients depends on their 2377 * policy dispositions. 2378 */ 2379 if (ipst->ips_icmp_accept_clear_messages == 0) { 2380 mp = ipsec_check_global_policy(mp, NULL, 2381 NULL, ip6h, ira, ns); 2382 if (mp == NULL) 2383 return; 2384 } 2385 2386 /* 2387 * On a labeled system, we have to check whether the zone 2388 * itself is permitted to receive raw traffic. 2389 */ 2390 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2391 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2392 BUMP_MIB(ill->ill_icmp6_mib, 2393 ipv6IfIcmpInErrors); 2394 ip_drop_input("tsol_can_accept_raw", mp, ill); 2395 freemsg(mp); 2396 return; 2397 } 2398 } 2399 2400 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2401 mp = icmp_inbound_v6(mp, ira); 2402 if (mp == NULL) { 2403 /* No need to pass to RAW sockets */ 2404 return; 2405 } 2406 break; 2407 2408 case IPPROTO_DSTOPTS: { 2409 ip6_dest_t *desthdr; 2410 uint_t ehdrlen; 2411 uint8_t *optptr; 2412 2413 /* We already check for MIN_EHDR_LEN above */ 2414 2415 /* Check if AH is present and needs to be processed. */ 2416 mp = ipsec_early_ah_v6(mp, ira); 2417 if (mp == NULL) 2418 return; 2419 2420 /* 2421 * Reinitialize pointers, as ipsec_early_ah_v6() does 2422 * complete pullups. We don't have to do more pullups 2423 * as a result. 2424 */ 2425 ip6h = (ip6_t *)mp->b_rptr; 2426 2427 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN) 2428 goto pkt_too_short; 2429 2430 if (mp->b_cont != NULL && 2431 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) { 2432 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira); 2433 if (ip6h == NULL) 2434 goto discard; 2435 } 2436 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length); 2437 ehdrlen = 8 * (desthdr->ip6d_len + 1); 2438 if (ira->ira_pktlen - ip_hdr_length < ehdrlen) 2439 goto pkt_too_short; 2440 if (mp->b_cont != NULL && 2441 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 2442 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 2443 if (ip6h == NULL) 2444 goto discard; 2445 2446 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length); 2447 } 2448 optptr = (uint8_t *)&desthdr[1]; 2449 2450 /* 2451 * Update ira_ip_hdr_length to skip the destination header 2452 * when we repeat. 2453 */ 2454 ira->ira_ip_hdr_length += ehdrlen; 2455 2456 ira->ira_protocol = desthdr->ip6d_nxt; 2457 2458 /* 2459 * Note: XXX This code does not seem to make 2460 * distinction between Destination Options Header 2461 * being before/after Routing Header which can 2462 * happen if we are at the end of source route. 2463 * This may become significant in future. 2464 * (No real significant Destination Options are 2465 * defined/implemented yet ). 2466 */ 2467 switch (ip_process_options_v6(mp, ip6h, optptr, 2468 ehdrlen - 2, IPPROTO_DSTOPTS, ira)) { 2469 case -1: 2470 /* 2471 * Packet has been consumed and any needed 2472 * ICMP errors sent. 2473 */ 2474 return; 2475 case 0: 2476 /* No action needed continue */ 2477 break; 2478 case 1: 2479 /* 2480 * Unnexpected return value 2481 * (Router alert is a Hop-by-Hop option) 2482 */ 2483 #ifdef DEBUG 2484 panic("ip_fanout_v6: router " 2485 "alert hbh opt indication in dest opt"); 2486 /*NOTREACHED*/ 2487 #else 2488 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2489 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2490 freemsg(mp); 2491 return; 2492 #endif 2493 } 2494 goto repeat; 2495 } 2496 case IPPROTO_FRAGMENT: { 2497 ip6_frag_t *fraghdr; 2498 2499 if (ira->ira_pktlen - ip_hdr_length < sizeof (ip6_frag_t)) 2500 goto pkt_too_short; 2501 2502 if (mp->b_cont != NULL && 2503 rptr + ip_hdr_length + sizeof (ip6_frag_t) > mp->b_wptr) { 2504 ip6h = ip_pullup(mp, 2505 ip_hdr_length + sizeof (ip6_frag_t), ira); 2506 if (ip6h == NULL) 2507 goto discard; 2508 } 2509 2510 fraghdr = (ip6_frag_t *)(rptr + ip_hdr_length); 2511 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 2512 2513 /* 2514 * Invoke the CGTP (multirouting) filtering module to 2515 * process the incoming packet. Packets identified as 2516 * duplicates must be discarded. Filtering is active 2517 * only if the ip_cgtp_filter ndd variable is 2518 * non-zero. 2519 */ 2520 if (ipst->ips_ip_cgtp_filter && 2521 ipst->ips_ip_cgtp_filter_ops != NULL) { 2522 int cgtp_flt_pkt; 2523 netstackid_t stackid; 2524 2525 stackid = ipst->ips_netstack->netstack_stackid; 2526 2527 /* 2528 * CGTP and IPMP are mutually exclusive so 2529 * phyint_ifindex is fine here. 2530 */ 2531 cgtp_flt_pkt = 2532 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 2533 stackid, ill->ill_phyint->phyint_ifindex, 2534 ip6h, fraghdr); 2535 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 2536 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill); 2537 freemsg(mp); 2538 return; 2539 } 2540 } 2541 2542 /* 2543 * Update ip_hdr_length to skip the frag header 2544 * ip_input_fragment_v6 will determine the extension header 2545 * prior to the fragment header and update its nexthdr value, 2546 * and also set ira_protocol to the nexthdr that follows the 2547 * completed fragment. 2548 */ 2549 ip_hdr_length += sizeof (ip6_frag_t); 2550 2551 /* 2552 * Make sure we have ira_l2src before we loose the original 2553 * mblk 2554 */ 2555 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 2556 ip_setl2src(mp, ira, ira->ira_rill); 2557 2558 mp = ip_input_fragment_v6(mp, ip6h, fraghdr, 2559 ira->ira_pktlen - ip_hdr_length, ira); 2560 if (mp == NULL) { 2561 /* Reassembly is still pending */ 2562 return; 2563 } 2564 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 2565 2566 /* 2567 * The mblk chain has the frag header removed and 2568 * ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the 2569 * IP header has been updated to refleact the result. 2570 */ 2571 ip6h = (ip6_t *)mp->b_rptr; 2572 ip_hdr_length = ira->ira_ip_hdr_length; 2573 goto repeat; 2574 } 2575 case IPPROTO_HOPOPTS: 2576 /* 2577 * Illegal header sequence. 2578 * (Hop-by-hop headers are processed above 2579 * and required to immediately follow IPv6 header) 2580 */ 2581 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 2582 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira); 2583 return; 2584 2585 case IPPROTO_ROUTING: { 2586 uint_t ehdrlen; 2587 ip6_rthdr_t *rthdr; 2588 2589 /* Check if AH is present and needs to be processed. */ 2590 mp = ipsec_early_ah_v6(mp, ira); 2591 if (mp == NULL) 2592 return; 2593 2594 /* 2595 * Reinitialize pointers, as ipsec_early_ah_v6() does 2596 * complete pullups. We don't have to do more pullups 2597 * as a result. 2598 */ 2599 ip6h = (ip6_t *)mp->b_rptr; 2600 2601 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN) 2602 goto pkt_too_short; 2603 2604 if (mp->b_cont != NULL && 2605 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) { 2606 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira); 2607 if (ip6h == NULL) 2608 goto discard; 2609 } 2610 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length); 2611 protocol = ira->ira_protocol = rthdr->ip6r_nxt; 2612 ehdrlen = 8 * (rthdr->ip6r_len + 1); 2613 if (ira->ira_pktlen - ip_hdr_length < ehdrlen) 2614 goto pkt_too_short; 2615 if (mp->b_cont != NULL && 2616 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 2617 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 2618 if (ip6h == NULL) 2619 goto discard; 2620 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length); 2621 } 2622 if (rthdr->ip6r_segleft != 0) { 2623 /* Not end of source route */ 2624 if (ira->ira_flags & 2625 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 2626 BUMP_MIB(ill->ill_ip_mib, 2627 ipIfStatsForwProhibits); 2628 ip_drop_input("ipIfStatsInForwProhibits", 2629 mp, ill); 2630 freemsg(mp); 2631 return; 2632 } 2633 ip_process_rthdr(mp, ip6h, rthdr, ira); 2634 return; 2635 } 2636 ira->ira_ip_hdr_length += ehdrlen; 2637 goto repeat; 2638 } 2639 2640 case IPPROTO_AH: 2641 case IPPROTO_ESP: { 2642 /* 2643 * Fast path for AH/ESP. 2644 */ 2645 netstack_t *ns = ipst->ips_netstack; 2646 ipsec_stack_t *ipss = ns->netstack_ipsec; 2647 2648 IP_STAT(ipst, ipsec_proto_ahesp); 2649 2650 if (!ipsec_loaded(ipss)) { 2651 ip_proto_not_sup(mp, ira); 2652 return; 2653 } 2654 2655 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2656 /* select inbound SA and have IPsec process the pkt */ 2657 if (protocol == IPPROTO_ESP) { 2658 esph_t *esph; 2659 2660 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 2661 if (mp == NULL) 2662 return; 2663 2664 ASSERT(esph != NULL); 2665 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2666 ASSERT(ira->ira_ipsec_esp_sa != NULL); 2667 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL); 2668 2669 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, 2670 ira); 2671 } else { 2672 ah_t *ah; 2673 2674 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 2675 if (mp == NULL) 2676 return; 2677 2678 ASSERT(ah != NULL); 2679 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2680 ASSERT(ira->ira_ipsec_ah_sa != NULL); 2681 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 2682 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, 2683 ira); 2684 } 2685 2686 if (mp == NULL) { 2687 /* 2688 * Either it failed or is pending. In the former case 2689 * ipIfStatsInDiscards was increased. 2690 */ 2691 return; 2692 } 2693 /* we're done with IPsec processing, send it up */ 2694 ip_input_post_ipsec(mp, ira); 2695 return; 2696 } 2697 case IPPROTO_NONE: 2698 /* All processing is done. Count as "delivered". */ 2699 freemsg(mp); 2700 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2701 return; 2702 2703 case IPPROTO_ENCAP: 2704 case IPPROTO_IPV6: 2705 /* iptun will verify trusted label */ 2706 connp = ipcl_classify_v6(mp, protocol, ip_hdr_length, 2707 ira, ipst); 2708 if (connp != NULL) { 2709 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2710 ira->ira_ill = ira->ira_rill = NULL; 2711 connp->conn_recv(connp, mp, NULL, ira); 2712 CONN_DEC_REF(connp); 2713 ira->ira_ill = ill; 2714 ira->ira_rill = rill; 2715 return; 2716 } 2717 /* FALLTHRU */ 2718 default: 2719 /* 2720 * On a labeled system, we have to check whether the zone 2721 * itself is permitted to receive raw traffic. 2722 */ 2723 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2724 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2725 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2726 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2727 freemsg(mp); 2728 return; 2729 } 2730 } 2731 break; 2732 } 2733 2734 /* 2735 * The above input functions may have returned the pulled up message. 2736 * So ip6h need to be reinitialized. 2737 */ 2738 ip6h = (ip6_t *)mp->b_rptr; 2739 ira->ira_protocol = protocol; 2740 if (ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head == NULL) { 2741 /* No user-level listener for these packets packets */ 2742 ip_proto_not_sup(mp, ira); 2743 return; 2744 } 2745 2746 /* 2747 * Handle fanout to raw sockets. There 2748 * can be more than one stream bound to a particular 2749 * protocol. When this is the case, each one gets a copy 2750 * of any incoming packets. 2751 */ 2752 ASSERT(ira->ira_protocol == protocol); 2753 ip_fanout_proto_v6(mp, ip6h, ira); 2754 return; 2755 2756 pkt_too_short: 2757 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 2758 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 2759 freemsg(mp); 2760 return; 2761 2762 discard: 2763 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2764 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2765 freemsg(mp); 2766 #undef rptr 2767 } 2768