1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/dlpi.h> 31 #include <sys/stropts.h> 32 #include <sys/sysmacros.h> 33 #include <sys/strsubr.h> 34 #include <sys/strlog.h> 35 #include <sys/strsun.h> 36 #include <sys/zone.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/xti_inet.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/cmn_err.h> 43 #include <sys/debug.h> 44 #include <sys/kobj.h> 45 #include <sys/modctl.h> 46 #include <sys/atomic.h> 47 #include <sys/policy.h> 48 #include <sys/priv.h> 49 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/kmem.h> 53 #include <sys/sdt.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/mac.h> 58 #include <net/if.h> 59 #include <net/if_arp.h> 60 #include <net/route.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <net/if_dl.h> 64 65 #include <inet/common.h> 66 #include <inet/mi.h> 67 #include <inet/mib2.h> 68 #include <inet/nd.h> 69 #include <inet/arp.h> 70 #include <inet/snmpcom.h> 71 #include <inet/kstatcom.h> 72 73 #include <netinet/igmp_var.h> 74 #include <netinet/ip6.h> 75 #include <netinet/icmp6.h> 76 #include <netinet/sctp.h> 77 78 #include <inet/ip.h> 79 #include <inet/ip_impl.h> 80 #include <inet/ip6.h> 81 #include <inet/ip6_asp.h> 82 #include <inet/optcom.h> 83 #include <inet/tcp.h> 84 #include <inet/tcp_impl.h> 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_ftable.h> 89 #include <inet/ip_rts.h> 90 #include <inet/ip_ndp.h> 91 #include <inet/ip_listutils.h> 92 #include <netinet/igmp.h> 93 #include <netinet/ip_mroute.h> 94 #include <inet/ipp_common.h> 95 96 #include <net/pfkeyv2.h> 97 #include <inet/sadb.h> 98 #include <inet/ipsec_impl.h> 99 #include <inet/ipdrop.h> 100 #include <inet/ip_netinfo.h> 101 #include <inet/ilb_ip.h> 102 #include <sys/squeue_impl.h> 103 #include <sys/squeue.h> 104 105 #include <sys/ethernet.h> 106 #include <net/if_types.h> 107 #include <sys/cpuvar.h> 108 109 #include <ipp/ipp.h> 110 #include <ipp/ipp_impl.h> 111 #include <ipp/ipgpc/ipgpc.h> 112 113 #include <sys/pattr.h> 114 #include <inet/ipclassifier.h> 115 #include <inet/sctp_ip.h> 116 #include <inet/sctp/sctp_impl.h> 117 #include <inet/udp_impl.h> 118 #include <sys/sunddi.h> 119 120 #include <sys/tsol/label.h> 121 #include <sys/tsol/tnet.h> 122 123 #include <rpc/pmap_prot.h> 124 125 #ifdef DEBUG 126 extern boolean_t skip_sctp_cksum; 127 #endif 128 129 static void ip_input_local_v6(ire_t *, mblk_t *, ip6_t *, ip_recv_attr_t *); 130 131 static void ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *, 132 ip_recv_attr_t *); 133 134 #pragma inline(ip_input_common_v6, ip_input_local_v6, ip_forward_xmit_v6) 135 136 /* 137 * Direct read side procedure capable of dealing with chains. GLDv3 based 138 * drivers call this function directly with mblk chains while STREAMS 139 * read side procedure ip_rput() calls this for single packet with ip_ring 140 * set to NULL to process one packet at a time. 141 * 142 * The ill will always be valid if this function is called directly from 143 * the driver. 144 * 145 * If ip_input_v6() is called from GLDv3: 146 * 147 * - This must be a non-VLAN IP stream. 148 * - 'mp' is either an untagged or a special priority-tagged packet. 149 * - Any VLAN tag that was in the MAC header has been stripped. 150 * 151 * If the IP header in packet is not 32-bit aligned, every message in the 152 * chain will be aligned before further operations. This is required on SPARC 153 * platform. 154 */ 155 void 156 ip_input_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 157 struct mac_header_info_s *mhip) 158 { 159 (void) ip_input_common_v6(ill, ip_ring, mp_chain, mhip, NULL, NULL, 160 NULL); 161 } 162 163 /* 164 * ip_accept_tcp_v6() - This function is called by the squeue when it retrieves 165 * a chain of packets in the poll mode. The packets have gone through the 166 * data link processing but not IP processing. For performance and latency 167 * reasons, the squeue wants to process the chain in line instead of feeding 168 * it back via ip_input path. 169 * 170 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6 171 * will pass back any TCP packets matching the target sqp to 172 * ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by 173 * ip_input_v6 and ip_fanout_v6 as normal. 174 * The TCP packets that match the target squeue are returned to the caller 175 * as a b_next chain after each packet has been prepend with an mblk 176 * from ip_recv_attr_to_mblk. 177 */ 178 mblk_t * 179 ip_accept_tcp_v6(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, 180 mblk_t *mp_chain, mblk_t **last, uint_t *cnt) 181 { 182 return (ip_input_common_v6(ill, ip_ring, mp_chain, NULL, target_sqp, 183 last, cnt)); 184 } 185 186 /* 187 * Used by ip_input_v6 and ip_accept_tcp_v6 188 * The last three arguments are only used by ip_accept_tcp_v6, and mhip is 189 * only used by ip_input_v6. 190 */ 191 mblk_t * 192 ip_input_common_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 193 struct mac_header_info_s *mhip, squeue_t *target_sqp, 194 mblk_t **last, uint_t *cnt) 195 { 196 mblk_t *mp; 197 ip6_t *ip6h; 198 ip_recv_attr_t iras; /* Receive attributes */ 199 rtc_t rtc; 200 iaflags_t chain_flags = 0; /* Fixed for chain */ 201 mblk_t *ahead = NULL; /* Accepted head */ 202 mblk_t *atail = NULL; /* Accepted tail */ 203 uint_t acnt = 0; /* Accepted count */ 204 205 ASSERT(mp_chain != NULL); 206 ASSERT(ill != NULL); 207 208 /* These ones do not change as we loop over packets */ 209 iras.ira_ill = iras.ira_rill = ill; 210 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 211 iras.ira_rifindex = iras.ira_ruifindex; 212 iras.ira_sqp = NULL; 213 iras.ira_ring = ip_ring; 214 /* For ECMP and outbound transmit ring selection */ 215 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring); 216 217 iras.ira_target_sqp = target_sqp; 218 iras.ira_target_sqp_mp = NULL; 219 if (target_sqp != NULL) 220 chain_flags |= IRAF_TARGET_SQP; 221 222 /* 223 * We try to have a mhip pointer when possible, but 224 * it might be NULL in some cases. In those cases we 225 * have to assume unicast. 226 */ 227 iras.ira_mhip = mhip; 228 iras.ira_flags = 0; 229 if (mhip != NULL) { 230 switch (mhip->mhi_dsttype) { 231 case MAC_ADDRTYPE_MULTICAST : 232 chain_flags |= IRAF_L2DST_MULTICAST; 233 break; 234 case MAC_ADDRTYPE_BROADCAST : 235 chain_flags |= IRAF_L2DST_BROADCAST; 236 break; 237 } 238 } 239 240 /* 241 * Initialize the one-element route cache. 242 * 243 * We do ire caching from one iteration to 244 * another. In the event the packet chain contains 245 * all packets from the same dst, this caching saves 246 * an ire_route_recursive for each of the succeeding 247 * packets in a packet chain. 248 */ 249 rtc.rtc_ire = NULL; 250 rtc.rtc_ip6addr = ipv6_all_zeros; 251 252 /* Loop over b_next */ 253 for (mp = mp_chain; mp != NULL; mp = mp_chain) { 254 mp_chain = mp->b_next; 255 mp->b_next = NULL; 256 257 /* 258 * if db_ref > 1 then copymsg and free original. Packet 259 * may be changed and we do not want the other entity 260 * who has a reference to this message to trip over the 261 * changes. This is a blind change because trying to 262 * catch all places that might change the packet is too 263 * difficult. 264 * 265 * This corresponds to the fast path case, where we have 266 * a chain of M_DATA mblks. We check the db_ref count 267 * of only the 1st data block in the mblk chain. There 268 * doesn't seem to be a reason why a device driver would 269 * send up data with varying db_ref counts in the mblk 270 * chain. In any case the Fast path is a private 271 * interface, and our drivers don't do such a thing. 272 * Given the above assumption, there is no need to walk 273 * down the entire mblk chain (which could have a 274 * potential performance problem) 275 * 276 * The "(DB_REF(mp) > 1)" check was moved from ip_rput() 277 * to here because of exclusive ip stacks and vnics. 278 * Packets transmitted from exclusive stack over vnic 279 * can have db_ref > 1 and when it gets looped back to 280 * another vnic in a different zone, you have ip_input() 281 * getting dblks with db_ref > 1. So if someone 282 * complains of TCP performance under this scenario, 283 * take a serious look here on the impact of copymsg(). 284 */ 285 if (DB_REF(mp) > 1) { 286 if ((mp = ip_fix_dbref(mp, &iras)) == NULL) 287 continue; 288 } 289 290 /* 291 * IP header ptr not aligned? 292 * OR IP header not complete in first mblk 293 */ 294 ip6h = (ip6_t *)mp->b_rptr; 295 if (!OK_32PTR(ip6h) || MBLKL(mp) < IPV6_HDR_LEN) { 296 mp = ip_check_and_align_header(mp, IPV6_HDR_LEN, &iras); 297 if (mp == NULL) 298 continue; 299 ip6h = (ip6_t *)mp->b_rptr; 300 } 301 302 /* Protect against a mix of Ethertypes and IP versions */ 303 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) { 304 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 305 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 306 freemsg(mp); 307 /* mhip might point into 1st packet in the chain. */ 308 iras.ira_mhip = NULL; 309 continue; 310 } 311 312 /* 313 * Check for Martian addrs; we have to explicitly 314 * test for for zero dst since this is also used as 315 * an indication that the rtc is not used. 316 */ 317 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_dst)) { 318 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 319 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 320 freemsg(mp); 321 /* mhip might point into 1st packet in the chain. */ 322 iras.ira_mhip = NULL; 323 continue; 324 } 325 /* 326 * Keep L2SRC from a previous packet in chain since mhip 327 * might point into an earlier packet in the chain. 328 */ 329 chain_flags |= (iras.ira_flags & IRAF_L2SRC_SET); 330 331 iras.ira_flags = IRAF_VERIFY_ULP_CKSUM | chain_flags; 332 iras.ira_free_flags = 0; 333 iras.ira_cred = NULL; 334 iras.ira_cpid = NOPID; 335 iras.ira_tsl = NULL; 336 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */ 337 338 /* 339 * We must count all incoming packets, even if they end 340 * up being dropped later on. Defer counting bytes until 341 * we have the whole IP header in first mblk. 342 */ 343 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 344 345 iras.ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 346 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 347 iras.ira_pktlen); 348 349 /* 350 * Call one of: 351 * ill_input_full_v6 352 * ill_input_short_v6 353 * The former is used in the case of TX. See ill_set_inputfn(). 354 */ 355 (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc); 356 357 /* Any references to clean up? No hold on ira_ill */ 358 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 359 ira_cleanup(&iras, B_FALSE); 360 361 if (iras.ira_target_sqp_mp != NULL) { 362 /* Better be called from ip_accept_tcp */ 363 ASSERT(target_sqp != NULL); 364 365 /* Found one packet to accept */ 366 mp = iras.ira_target_sqp_mp; 367 iras.ira_target_sqp_mp = NULL; 368 ASSERT(ip_recv_attr_is_mblk(mp)); 369 370 if (atail != NULL) 371 atail->b_next = mp; 372 else 373 ahead = mp; 374 atail = mp; 375 acnt++; 376 mp = NULL; 377 } 378 /* mhip might point into 1st packet in the chain. */ 379 iras.ira_mhip = NULL; 380 } 381 /* Any remaining references to the route cache? */ 382 if (rtc.rtc_ire != NULL) { 383 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr)); 384 ire_refrele(rtc.rtc_ire); 385 } 386 387 if (ahead != NULL) { 388 /* Better be called from ip_accept_tcp */ 389 ASSERT(target_sqp != NULL); 390 *last = atail; 391 *cnt = acnt; 392 return (ahead); 393 } 394 395 return (NULL); 396 } 397 398 /* 399 * This input function is used when 400 * - is_system_labeled() 401 * 402 * Note that for IPv6 CGTP filtering is handled only when receiving fragment 403 * headers, and RSVP uses router alert options, thus we don't need anything 404 * extra for them. 405 */ 406 void 407 ill_input_full_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg, 408 ip_recv_attr_t *ira, rtc_t *rtc) 409 { 410 ip6_t *ip6h = (ip6_t *)iph_arg; 411 in6_addr_t *nexthop = (in6_addr_t *)nexthop_arg; 412 ill_t *ill = ira->ira_ill; 413 414 ASSERT(ira->ira_tsl == NULL); 415 416 /* 417 * Attach any necessary label information to 418 * this packet 419 */ 420 if (is_system_labeled()) { 421 ira->ira_flags |= IRAF_SYSTEM_LABELED; 422 423 /* 424 * This updates ira_cred, ira_tsl and ira_free_flags based 425 * on the label. 426 */ 427 if (!tsol_get_pkt_label(mp, IPV6_VERSION, ira)) { 428 if (ip6opt_ls != 0) 429 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 430 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 431 ip_drop_input("ipIfStatsInDiscards", mp, ill); 432 freemsg(mp); 433 return; 434 } 435 /* Note that ira_tsl can be NULL here. */ 436 437 /* tsol_get_pkt_label sometimes does pullupmsg */ 438 ip6h = (ip6_t *)mp->b_rptr; 439 } 440 ill_input_short_v6(mp, ip6h, nexthop, ira, rtc); 441 } 442 443 /* 444 * Check for IPv6 addresses that should not appear on the wire 445 * as either source or destination. 446 * If we ever implement Stateless IPv6 Translators (SIIT) we'd have 447 * to revisit the IPv4-mapped part. 448 */ 449 static boolean_t 450 ip6_bad_address(in6_addr_t *addr, boolean_t is_src) 451 { 452 if (IN6_IS_ADDR_V4MAPPED(addr)) { 453 ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr")); 454 return (B_TRUE); 455 } 456 if (IN6_IS_ADDR_LOOPBACK(addr)) { 457 ip1dbg(("ip_input_v6: pkt with loopback addr")); 458 return (B_TRUE); 459 } 460 if (!is_src && IN6_IS_ADDR_UNSPECIFIED(addr)) { 461 /* 462 * having :: in the src is ok: it's used for DAD. 463 */ 464 ip1dbg(("ip_input_v6: pkt with unspecified addr")); 465 return (B_TRUE); 466 } 467 return (B_FALSE); 468 } 469 470 /* 471 * Routing lookup for IPv6 link-locals. 472 * First we look on the inbound interface, then we check for IPMP and 473 * look on the upper interface. 474 * We update ira_ruifindex if we find the IRE on the upper interface. 475 */ 476 static ire_t * 477 ire_linklocal(const in6_addr_t *nexthop, ill_t *ill, ip_recv_attr_t *ira, 478 boolean_t allocate, ip_stack_t *ipst) 479 { 480 int match_flags = MATCH_IRE_SECATTR | MATCH_IRE_ILL; 481 ire_t *ire; 482 483 ASSERT(IN6_IS_ADDR_LINKLOCAL(nexthop)); 484 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl, 485 match_flags, allocate, ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 486 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 487 !IS_UNDER_IPMP(ill)) 488 return (ire); 489 490 /* 491 * When we are using IMP we need to look for an IRE on both the 492 * under and upper interfaces since there are different 493 * link-local addresses for the under and upper. 494 */ 495 ill = ipmp_ill_hold_ipmp_ill(ill); 496 if (ill == NULL) 497 return (ire); 498 499 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 500 501 ire_refrele(ire); 502 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl, 503 match_flags, allocate, ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 504 ill_refrele(ill); 505 return (ire); 506 } 507 508 /* 509 * This is the tail-end of the full receive side packet handling. 510 * It can be used directly when the configuration is simple. 511 */ 512 void 513 ill_input_short_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg, 514 ip_recv_attr_t *ira, rtc_t *rtc) 515 { 516 ire_t *ire; 517 ill_t *ill = ira->ira_ill; 518 ip_stack_t *ipst = ill->ill_ipst; 519 uint_t pkt_len; 520 ssize_t len; 521 ip6_t *ip6h = (ip6_t *)iph_arg; 522 in6_addr_t nexthop = *(in6_addr_t *)nexthop_arg; 523 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; 524 #define rptr ((uchar_t *)ip6h) 525 526 ASSERT(DB_TYPE(mp) == M_DATA); 527 528 /* 529 * Check for source/dest being a bad address: loopback, any, or 530 * v4mapped. All of them start with a 64 bits of zero. 531 */ 532 if (ip6h->ip6_src.s6_addr32[0] == 0 && 533 ip6h->ip6_src.s6_addr32[1] == 0) { 534 if (ip6_bad_address(&ip6h->ip6_src, B_TRUE)) { 535 ip1dbg(("ip_input_v6: pkt with bad src addr\n")); 536 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 537 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 538 freemsg(mp); 539 return; 540 } 541 } 542 if (ip6h->ip6_dst.s6_addr32[0] == 0 && 543 ip6h->ip6_dst.s6_addr32[1] == 0) { 544 if (ip6_bad_address(&ip6h->ip6_dst, B_FALSE)) { 545 ip1dbg(("ip_input_v6: pkt with bad dst addr\n")); 546 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 547 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 548 freemsg(mp); 549 return; 550 } 551 } 552 553 len = mp->b_wptr - rptr; 554 pkt_len = ira->ira_pktlen; 555 556 /* multiple mblk or too short */ 557 len -= pkt_len; 558 if (len != 0) { 559 mp = ip_check_length(mp, rptr, len, pkt_len, IPV6_HDR_LEN, ira); 560 if (mp == NULL) 561 return; 562 ip6h = (ip6_t *)mp->b_rptr; 563 } 564 565 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 566 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 567 int, 0); 568 /* 569 * The event for packets being received from a 'physical' 570 * interface is placed after validation of the source and/or 571 * destination address as being local so that packets can be 572 * redirected to loopback addresses using ipnat. 573 */ 574 DTRACE_PROBE4(ip6__physical__in__start, 575 ill_t *, ill, ill_t *, NULL, 576 ip6_t *, ip6h, mblk_t *, mp); 577 578 if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) { 579 int ll_multicast = 0; 580 int error; 581 in6_addr_t orig_dst = ip6h->ip6_dst; 582 583 if (ira->ira_flags & IRAF_L2DST_MULTICAST) 584 ll_multicast = HPE_MULTICAST; 585 else if (ira->ira_flags & IRAF_L2DST_BROADCAST) 586 ll_multicast = HPE_BROADCAST; 587 588 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 589 ipst->ips_ipv6firewall_physical_in, 590 ill, NULL, ip6h, mp, mp, ll_multicast, ipst, error); 591 592 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, mp); 593 594 if (mp == NULL) 595 return; 596 597 /* The length could have changed */ 598 ip6h = (ip6_t *)mp->b_rptr; 599 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 600 pkt_len = ira->ira_pktlen; 601 602 /* 603 * In case the destination changed we override any previous 604 * change to nexthop. 605 */ 606 if (!IN6_ARE_ADDR_EQUAL(&orig_dst, &ip6h->ip6_dst)) 607 nexthop = ip6h->ip6_dst; 608 609 if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) { 610 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 611 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 612 freemsg(mp); 613 return; 614 } 615 616 } 617 618 if (ipst->ips_ip6_observe.he_interested) { 619 zoneid_t dzone; 620 621 /* 622 * On the inbound path the src zone will be unknown as 623 * this packet has come from the wire. 624 */ 625 dzone = ip_get_zoneid_v6(&nexthop, mp, ill, ira, ALL_ZONES); 626 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst); 627 } 628 629 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) != 630 IPV6_DEFAULT_VERS_AND_FLOW) { 631 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 632 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 633 ip_drop_input("ipIfStatsInWrongIPVersion", mp, ill); 634 freemsg(mp); 635 return; 636 } 637 638 /* 639 * For IPv6 we update ira_ip_hdr_length and ira_protocol as 640 * we parse the headers, starting with the hop-by-hop options header. 641 */ 642 ira->ira_ip_hdr_length = IPV6_HDR_LEN; 643 if ((ira->ira_protocol = ip6h->ip6_nxt) == IPPROTO_HOPOPTS) { 644 ip6_hbh_t *hbhhdr; 645 uint_t ehdrlen; 646 uint8_t *optptr; 647 648 if (pkt_len < IPV6_HDR_LEN + MIN_EHDR_LEN) { 649 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 650 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 651 freemsg(mp); 652 return; 653 } 654 if (mp->b_cont != NULL && 655 rptr + IPV6_HDR_LEN + MIN_EHDR_LEN > mp->b_wptr) { 656 ip6h = ip_pullup(mp, IPV6_HDR_LEN + MIN_EHDR_LEN, ira); 657 if (ip6h == NULL) { 658 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 659 ip_drop_input("ipIfStatsInDiscards", mp, ill); 660 freemsg(mp); 661 return; 662 } 663 } 664 hbhhdr = (ip6_hbh_t *)&ip6h[1]; 665 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 666 667 if (pkt_len < IPV6_HDR_LEN + ehdrlen) { 668 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 669 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 670 freemsg(mp); 671 return; 672 } 673 if (mp->b_cont != NULL && 674 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 675 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 676 if (ip6h == NULL) { 677 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 678 ip_drop_input("ipIfStatsInDiscards", mp, ill); 679 freemsg(mp); 680 return; 681 } 682 hbhhdr = (ip6_hbh_t *)&ip6h[1]; 683 } 684 685 /* 686 * Update ira_ip_hdr_length to skip the hop-by-hop header 687 * once we get to ip_fanout_v6 688 */ 689 ira->ira_ip_hdr_length += ehdrlen; 690 ira->ira_protocol = hbhhdr->ip6h_nxt; 691 692 optptr = (uint8_t *)&hbhhdr[1]; 693 switch (ip_process_options_v6(mp, ip6h, optptr, 694 ehdrlen - 2, IPPROTO_HOPOPTS, ira)) { 695 case -1: 696 /* 697 * Packet has been consumed and any 698 * needed ICMP messages sent. 699 */ 700 return; 701 case 0: 702 /* no action needed */ 703 break; 704 case 1: 705 /* 706 * Known router alert. Make use handle it as local 707 * by setting the nexthop to be the all-host multicast 708 * address, and skip multicast membership filter by 709 * marking as a router alert. 710 */ 711 ira->ira_flags |= IRAF_ROUTER_ALERT; 712 nexthop = ipv6_all_hosts_mcast; 713 break; 714 } 715 } 716 717 /* 718 * Here we check to see if we machine is setup as 719 * L3 loadbalancer and if the incoming packet is for a VIP 720 * 721 * Check the following: 722 * - there is at least a rule 723 * - protocol of the packet is supported 724 * 725 * We don't load balance IPv6 link-locals. 726 */ 727 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ira->ira_protocol) && 728 !IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 729 in6_addr_t lb_dst; 730 int lb_ret; 731 732 /* For convenience, we just pull up the mblk. */ 733 if (mp->b_cont != NULL) { 734 if (pullupmsg(mp, -1) == 0) { 735 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 736 ip_drop_input("ipIfStatsInDiscards - pullupmsg", 737 mp, ill); 738 freemsg(mp); 739 return; 740 } 741 ip6h = (ip6_t *)mp->b_rptr; 742 } 743 lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, ira->ira_protocol, 744 (uint8_t *)ip6h + ira->ira_ip_hdr_length, &lb_dst); 745 if (lb_ret == ILB_DROPPED) { 746 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 747 ip_drop_input("ILB_DROPPED", mp, ill); 748 freemsg(mp); 749 return; 750 } 751 if (lb_ret == ILB_BALANCED) { 752 /* Set the dst to that of the chosen server */ 753 nexthop = lb_dst; 754 DB_CKSUMFLAGS(mp) = 0; 755 } 756 } 757 758 /* Can not use route cache with TX since the labels can differ */ 759 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 760 if (IN6_IS_ADDR_MULTICAST(&nexthop)) { 761 ire = ire_multicast(ill); 762 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 763 ire = ire_linklocal(&nexthop, ill, ira, 764 (ill->ill_flags & ILLF_ROUTER), ipst); 765 } else { 766 /* Match destination and label */ 767 ire = ire_route_recursive_v6(&nexthop, 0, NULL, 768 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR, 769 (ill->ill_flags & ILLF_ROUTER), ira->ira_xmit_hint, 770 ipst, NULL, NULL, NULL); 771 } 772 /* Update the route cache so we do the ire_refrele */ 773 ASSERT(ire != NULL); 774 if (rtc->rtc_ire != NULL) 775 ire_refrele(rtc->rtc_ire); 776 rtc->rtc_ire = ire; 777 rtc->rtc_ip6addr = nexthop; 778 } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr)) { 779 /* Use the route cache */ 780 ASSERT(rtc->rtc_ire != NULL); 781 ire = rtc->rtc_ire; 782 } else { 783 /* Update the route cache */ 784 if (IN6_IS_ADDR_MULTICAST(&nexthop)) { 785 ire = ire_multicast(ill); 786 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) { 787 ire = ire_linklocal(&nexthop, ill, ira, 788 (ill->ill_flags & ILLF_ROUTER), ipst); 789 } else { 790 ire = ire_route_recursive_dstonly_v6(&nexthop, 791 (ill->ill_flags & ILLF_ROUTER), ira->ira_xmit_hint, 792 ipst); 793 } 794 ASSERT(ire != NULL); 795 if (rtc->rtc_ire != NULL) 796 ire_refrele(rtc->rtc_ire); 797 rtc->rtc_ire = ire; 798 rtc->rtc_ip6addr = nexthop; 799 } 800 801 ire->ire_ib_pkt_count++; 802 803 /* 804 * Based on ire_type and ire_flags call one of: 805 * ire_recv_local_v6 - for IRE_LOCAL 806 * ire_recv_loopback_v6 - for IRE_LOOPBACK 807 * ire_recv_multirt_v6 - if RTF_MULTIRT 808 * ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE 809 * ire_recv_multicast_v6 - for IRE_MULTICAST 810 * ire_recv_noaccept_v6 - for ire_noaccept ones 811 * ire_recv_forward_v6 - for the rest. 812 */ 813 814 (*ire->ire_recvfn)(ire, mp, ip6h, ira); 815 } 816 #undef rptr 817 818 /* 819 * ire_recvfn for IREs that need forwarding 820 */ 821 void 822 ire_recv_forward_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 823 { 824 ip6_t *ip6h = (ip6_t *)iph_arg; 825 ill_t *ill = ira->ira_ill; 826 ip_stack_t *ipst = ill->ill_ipst; 827 iaflags_t iraflags = ira->ira_flags; 828 ill_t *dst_ill; 829 nce_t *nce; 830 uint32_t added_tx_len; 831 uint32_t mtu, iremtu; 832 833 if (iraflags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 834 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 835 ip_drop_input("l2 multicast not forwarded", mp, ill); 836 freemsg(mp); 837 return; 838 } 839 840 if (!(ill->ill_flags & ILLF_ROUTER)) { 841 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 842 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 843 freemsg(mp); 844 return; 845 } 846 847 /* 848 * Either ire_nce_capable or ire_dep_parent would be set for the IRE 849 * when it is found by ire_route_recursive, but that some other thread 850 * could have changed the routes with the effect of clearing 851 * ire_dep_parent. In that case we'd end up dropping the packet, or 852 * finding a new nce below. 853 * Get, allocate, or update the nce. 854 * We get a refhold on ire_nce_cache as a result of this to avoid races 855 * where ire_nce_cache is deleted. 856 * 857 * This ensures that we don't forward if the interface is down since 858 * ipif_down removes all the nces. 859 */ 860 mutex_enter(&ire->ire_lock); 861 nce = ire->ire_nce_cache; 862 if (nce == NULL) { 863 /* Not yet set up - try to set one up */ 864 mutex_exit(&ire->ire_lock); 865 (void) ire_revalidate_nce(ire); 866 mutex_enter(&ire->ire_lock); 867 nce = ire->ire_nce_cache; 868 if (nce == NULL) { 869 mutex_exit(&ire->ire_lock); 870 /* The ire_dep_parent chain went bad, or no memory */ 871 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 872 ip_drop_input("No ire_dep_parent", mp, ill); 873 freemsg(mp); 874 return; 875 } 876 } 877 nce_refhold(nce); 878 mutex_exit(&ire->ire_lock); 879 880 if (nce->nce_is_condemned) { 881 nce_t *nce1; 882 883 nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_FALSE); 884 nce_refrele(nce); 885 if (nce1 == NULL) { 886 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 887 ip_drop_input("No nce", mp, ill); 888 freemsg(mp); 889 return; 890 } 891 nce = nce1; 892 } 893 dst_ill = nce->nce_ill; 894 895 /* 896 * Unless we are forwarding, drop the packet. 897 * Unlike IPv4 we don't allow source routed packets out the same 898 * interface when we are not a router. 899 * Note that ill_forward_set() will set the ILLF_ROUTER on 900 * all the group members when it gets an ipmp-ill or under-ill. 901 */ 902 if (!(dst_ill->ill_flags & ILLF_ROUTER)) { 903 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 904 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 905 freemsg(mp); 906 nce_refrele(nce); 907 return; 908 } 909 910 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) { 911 ire->ire_ib_pkt_count--; 912 /* 913 * Should only use IREs that are visible from the 914 * global zone for forwarding. 915 * For IPv6 any source route would have already been 916 * advanced in ip_fanout_v6 917 */ 918 ire = ire_route_recursive_v6(&ip6h->ip6_dst, 0, NULL, 919 GLOBAL_ZONEID, ira->ira_tsl, MATCH_IRE_SECATTR, 920 (ill->ill_flags & ILLF_ROUTER), ira->ira_xmit_hint, ipst, 921 NULL, NULL, NULL); 922 ire->ire_ib_pkt_count++; 923 (*ire->ire_recvfn)(ire, mp, ip6h, ira); 924 ire_refrele(ire); 925 nce_refrele(nce); 926 return; 927 } 928 /* 929 * ipIfStatsHCInForwDatagrams should only be increment if there 930 * will be an attempt to forward the packet, which is why we 931 * increment after the above condition has been checked. 932 */ 933 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 934 935 /* Initiate Read side IPPF processing */ 936 if (IPP_ENABLED(IPP_FWD_IN, ipst)) { 937 /* ip_process translates an IS_UNDER_IPMP */ 938 mp = ip_process(IPP_FWD_IN, mp, ill, ill); 939 if (mp == NULL) { 940 /* ip_drop_packet and MIB done */ 941 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred " 942 "during IPPF processing\n")); 943 nce_refrele(nce); 944 return; 945 } 946 } 947 948 DTRACE_PROBE4(ip6__forwarding__start, 949 ill_t *, ill, ill_t *, dst_ill, ip6_t *, ip6h, mblk_t *, mp); 950 951 if (HOOKS6_INTERESTED_FORWARDING(ipst)) { 952 int error; 953 954 FW_HOOKS(ipst->ips_ip6_forwarding_event, 955 ipst->ips_ipv6firewall_forwarding, 956 ill, dst_ill, ip6h, mp, mp, 0, ipst, error); 957 958 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 959 960 if (mp == NULL) { 961 nce_refrele(nce); 962 return; 963 } 964 /* 965 * Even if the destination was changed by the filter we use the 966 * forwarding decision that was made based on the address 967 * in ip_input. 968 */ 969 970 /* Might have changed */ 971 ip6h = (ip6_t *)mp->b_rptr; 972 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 973 } 974 975 /* Packet is being forwarded. Turning off hwcksum flag. */ 976 DB_CKSUMFLAGS(mp) = 0; 977 978 /* 979 * Per RFC 3513 section 2.5.2, we must not forward packets with 980 * an unspecified source address. 981 * The loopback address check for both src and dst has already 982 * been checked in ip_input_v6 983 * In the future one can envision adding RPF checks using number 3. 984 */ 985 switch (ipst->ips_src_check) { 986 case 0: 987 break; 988 case 1: 989 case 2: 990 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) || 991 IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) { 992 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 993 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 994 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 995 nce_refrele(nce); 996 freemsg(mp); 997 return; 998 } 999 break; 1000 } 1001 1002 /* 1003 * Check to see if we're forwarding the packet to a 1004 * different link from which it came. If so, check the 1005 * source and destination addresses since routers must not 1006 * forward any packets with link-local source or 1007 * destination addresses to other links. Otherwise (if 1008 * we're forwarding onto the same link), conditionally send 1009 * a redirect message. 1010 */ 1011 if (!IS_ON_SAME_LAN(dst_ill, ill)) { 1012 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 1013 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 1014 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1015 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1016 freemsg(mp); 1017 nce_refrele(nce); 1018 return; 1019 } 1020 /* TBD add site-local check at site boundary? */ 1021 } else if (ipst->ips_ipv6_send_redirects) { 1022 ip_send_potential_redirect_v6(mp, ip6h, ire, ira); 1023 } 1024 1025 added_tx_len = 0; 1026 if (iraflags & IRAF_SYSTEM_LABELED) { 1027 mblk_t *mp1; 1028 uint32_t old_pkt_len = ira->ira_pktlen; 1029 1030 /* 1031 * Check if it can be forwarded and add/remove 1032 * CIPSO options as needed. 1033 */ 1034 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) { 1035 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1036 ip_drop_input("tsol_ip_forward", mp, ill); 1037 freemsg(mp); 1038 nce_refrele(nce); 1039 return; 1040 } 1041 /* 1042 * Size may have changed. Remember amount added in case 1043 * ip_fragment needs to send an ICMP too big. 1044 */ 1045 mp = mp1; 1046 ip6h = (ip6_t *)mp->b_rptr; 1047 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 1048 ira->ira_ip_hdr_length = IPV6_HDR_LEN; 1049 if (ira->ira_pktlen > old_pkt_len) 1050 added_tx_len = ira->ira_pktlen - old_pkt_len; 1051 } 1052 1053 mtu = dst_ill->ill_mtu; 1054 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu) 1055 mtu = iremtu; 1056 ip_forward_xmit_v6(nce, mp, ip6h, ira, mtu, added_tx_len); 1057 nce_refrele(nce); 1058 return; 1059 1060 } 1061 1062 /* 1063 * Used for sending out unicast and multicast packets that are 1064 * forwarded. 1065 */ 1066 void 1067 ip_forward_xmit_v6(nce_t *nce, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira, 1068 uint32_t mtu, uint32_t added_tx_len) 1069 { 1070 ill_t *dst_ill = nce->nce_ill; 1071 uint32_t pkt_len; 1072 iaflags_t iraflags = ira->ira_flags; 1073 ip_stack_t *ipst = dst_ill->ill_ipst; 1074 1075 if (ip6h->ip6_hops-- <= 1) { 1076 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards); 1077 ip_drop_input("ICMP6_TIME_EXCEED_TRANSIT", mp, ira->ira_ill); 1078 icmp_time_exceeded_v6(mp, ICMP6_TIME_EXCEED_TRANSIT, B_FALSE, 1079 ira); 1080 return; 1081 } 1082 1083 /* Initiate Write side IPPF processing before any fragmentation */ 1084 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { 1085 /* ip_process translates an IS_UNDER_IPMP */ 1086 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill); 1087 if (mp == NULL) { 1088 /* ip_drop_packet and MIB done */ 1089 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred" \ 1090 " during IPPF processing\n")); 1091 return; 1092 } 1093 } 1094 1095 pkt_len = ira->ira_pktlen; 1096 1097 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 1098 1099 if (pkt_len > mtu) { 1100 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails); 1101 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill); 1102 if (iraflags & IRAF_SYSTEM_LABELED) { 1103 /* 1104 * Remove any CIPSO option added by 1105 * tsol_ip_forward, and make sure we report 1106 * a path MTU so that there 1107 * is room to add such a CIPSO option for future 1108 * packets. 1109 */ 1110 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, AF_INET6); 1111 } 1112 icmp_pkt2big_v6(mp, mtu, B_TRUE, ira); 1113 return; 1114 } 1115 1116 ASSERT(pkt_len == 1117 ntohs(((ip6_t *)mp->b_rptr)->ip6_plen) + IPV6_HDR_LEN); 1118 1119 if (iraflags & IRAF_LOOPBACK_COPY) { 1120 /* 1121 * IXAF_NO_LOOP_ZONEID is not set hence 6th arg 1122 * is don't care 1123 */ 1124 (void) ip_postfrag_loopcheck(mp, nce, 1125 (IXAF_LOOPBACK_COPY | IXAF_NO_DEV_FLOW_CTL), 1126 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1127 } else { 1128 (void) ip_xmit(mp, nce, IXAF_NO_DEV_FLOW_CTL, 1129 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1130 } 1131 } 1132 1133 /* 1134 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE, 1135 * which is what ire_route_recursive returns when there is no matching ire. 1136 * Send ICMP unreachable unless blackhole. 1137 */ 1138 void 1139 ire_recv_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1140 { 1141 ip6_t *ip6h = (ip6_t *)iph_arg; 1142 ill_t *ill = ira->ira_ill; 1143 ip_stack_t *ipst = ill->ill_ipst; 1144 1145 /* Would we have forwarded this packet if we had a route? */ 1146 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 1147 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1148 ip_drop_input("l2 multicast not forwarded", mp, ill); 1149 freemsg(mp); 1150 return; 1151 } 1152 1153 if (!(ill->ill_flags & ILLF_ROUTER)) { 1154 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1155 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 1156 freemsg(mp); 1157 return; 1158 } 1159 /* 1160 * If we had a route this could have been forwarded. Count as such. 1161 * 1162 * ipIfStatsHCInForwDatagrams should only be increment if there 1163 * will be an attempt to forward the packet, which is why we 1164 * increment after the above condition has been checked. 1165 */ 1166 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 1167 1168 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1169 1170 ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, RTA_DST, 1171 ipst); 1172 1173 if (ire->ire_flags & RTF_BLACKHOLE) { 1174 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill); 1175 freemsg(mp); 1176 } else { 1177 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill); 1178 1179 icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE, 1180 ira); 1181 } 1182 } 1183 1184 /* 1185 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for 1186 * VRRP when in noaccept mode. 1187 * We silently drop packets except for Neighbor Solicitations and 1188 * Neighbor Advertisements. 1189 */ 1190 void 1191 ire_recv_noaccept_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 1192 ip_recv_attr_t *ira) 1193 { 1194 ip6_t *ip6h = (ip6_t *)iph_arg; 1195 ill_t *ill = ira->ira_ill; 1196 icmp6_t *icmp6; 1197 int ip_hdr_length; 1198 1199 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 1200 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1201 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1202 freemsg(mp); 1203 return; 1204 } 1205 ip_hdr_length = ira->ira_ip_hdr_length; 1206 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) { 1207 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) { 1208 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 1209 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 1210 freemsg(mp); 1211 return; 1212 } 1213 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira); 1214 if (ip6h == NULL) { 1215 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1216 freemsg(mp); 1217 return; 1218 } 1219 } 1220 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 1221 1222 if (icmp6->icmp6_type != ND_NEIGHBOR_SOLICIT && 1223 icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) { 1224 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1225 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1226 freemsg(mp); 1227 return; 1228 } 1229 ire_recv_local_v6(ire, mp, ip6h, ira); 1230 } 1231 1232 /* 1233 * ire_recvfn for IRE_MULTICAST. 1234 */ 1235 void 1236 ire_recv_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 1237 ip_recv_attr_t *ira) 1238 { 1239 ip6_t *ip6h = (ip6_t *)iph_arg; 1240 ill_t *ill = ira->ira_ill; 1241 1242 ASSERT(ire->ire_ill == ira->ira_ill); 1243 1244 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 1245 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen); 1246 1247 /* Tag for higher-level protocols */ 1248 ira->ira_flags |= IRAF_MULTICAST; 1249 1250 /* 1251 * So that we don't end up with dups, only one ill an IPMP group is 1252 * nominated to receive multicast traffic. 1253 * If we have no cast_ill we are liberal and accept everything. 1254 */ 1255 if (IS_UNDER_IPMP(ill)) { 1256 ip_stack_t *ipst = ill->ill_ipst; 1257 1258 /* For an under ill_grp can change under lock */ 1259 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1260 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1261 ill->ill_grp->ig_cast_ill != NULL) { 1262 rw_exit(&ipst->ips_ill_g_lock); 1263 ip_drop_input("not on cast ill", mp, ill); 1264 freemsg(mp); 1265 return; 1266 } 1267 rw_exit(&ipst->ips_ill_g_lock); 1268 /* 1269 * We switch to the upper ill so that mrouter and hasmembers 1270 * can operate on upper here and in ip_input_multicast. 1271 */ 1272 ill = ipmp_ill_hold_ipmp_ill(ill); 1273 if (ill != NULL) { 1274 ASSERT(ill != ira->ira_ill); 1275 ASSERT(ire->ire_ill == ira->ira_ill); 1276 ira->ira_ill = ill; 1277 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1278 } else { 1279 ill = ira->ira_ill; 1280 } 1281 } 1282 1283 #ifdef notdef 1284 /* 1285 * Check if we are a multicast router - send ip_mforward a copy of 1286 * the packet. 1287 * Due to mroute_decap tunnels we consider forwarding packets even if 1288 * mrouted has not joined the allmulti group on this interface. 1289 */ 1290 if (ipst->ips_ip_g_mrouter) { 1291 int retval; 1292 1293 /* 1294 * Clear the indication that this may have hardware 1295 * checksum as we are not using it for forwarding. 1296 */ 1297 DB_CKSUMFLAGS(mp) = 0; 1298 1299 /* 1300 * ip_mforward helps us make these distinctions: If received 1301 * on tunnel and not IGMP, then drop. 1302 * If IGMP packet, then don't check membership 1303 * If received on a phyint and IGMP or PIM, then 1304 * don't check membership 1305 */ 1306 retval = ip_mforward_v6(mp, ira); 1307 /* ip_mforward updates mib variables if needed */ 1308 1309 switch (retval) { 1310 case 0: 1311 /* 1312 * pkt is okay and arrived on phyint. 1313 */ 1314 break; 1315 case -1: 1316 /* pkt is mal-formed, toss it */ 1317 freemsg(mp); 1318 goto done; 1319 case 1: 1320 /* 1321 * pkt is okay and arrived on a tunnel 1322 * 1323 * If we are running a multicast router 1324 * we need to see all mld packets, which 1325 * are marked with router alerts. 1326 */ 1327 if (ira->ira_flags & IRAF_ROUTER_ALERT) 1328 goto forus; 1329 ip_drop_input("Multicast on tunnel ignored", mp, ill); 1330 freemsg(mp); 1331 goto done; 1332 } 1333 } 1334 #endif /* notdef */ 1335 1336 /* 1337 * If this was a router alert we skip the group membership check. 1338 */ 1339 if (ira->ira_flags & IRAF_ROUTER_ALERT) 1340 goto forus; 1341 1342 /* 1343 * Check if we have members on this ill. This is not necessary for 1344 * correctness because even if the NIC/GLD had a leaky filter, we 1345 * filter before passing to each conn_t. 1346 */ 1347 if (!ill_hasmembers_v6(ill, &ip6h->ip6_dst)) { 1348 /* 1349 * Nobody interested 1350 * 1351 * This might just be caused by the fact that 1352 * multiple IP Multicast addresses map to the same 1353 * link layer multicast - no need to increment counter! 1354 */ 1355 ip_drop_input("Multicast with no members", mp, ill); 1356 freemsg(mp); 1357 goto done; 1358 } 1359 forus: 1360 ip2dbg(("ire_recv_multicast_v6: multicast for us\n")); 1361 1362 /* 1363 * After reassembly and IPsec we will need to duplicate the 1364 * multicast packet for all matching zones on the ill. 1365 */ 1366 ira->ira_zoneid = ALL_ZONES; 1367 1368 /* Reassemble on the ill on which the packet arrived */ 1369 ip_input_local_v6(ire, mp, ip6h, ira); 1370 done: 1371 if (ill != ire->ire_ill) { 1372 ill_refrele(ill); 1373 ira->ira_ill = ire->ire_ill; 1374 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 1375 } 1376 } 1377 1378 /* 1379 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT. 1380 * Drop packets since we don't forward out multirt routes. 1381 */ 1382 /* ARGSUSED */ 1383 void 1384 ire_recv_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1385 { 1386 ill_t *ill = ira->ira_ill; 1387 1388 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1389 ip_drop_input("Not forwarding out MULTIRT", mp, ill); 1390 freemsg(mp); 1391 } 1392 1393 /* 1394 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK 1395 * has rewritten the packet to have a loopback destination address (We 1396 * filter out packet with a loopback destination from arriving over the wire). 1397 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID. 1398 */ 1399 void 1400 ire_recv_loopback_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1401 { 1402 ip6_t *ip6h = (ip6_t *)iph_arg; 1403 ill_t *ill = ira->ira_ill; 1404 ill_t *ire_ill = ire->ire_ill; 1405 1406 ira->ira_zoneid = GLOBAL_ZONEID; 1407 1408 /* Switch to the lo0 ill for further processing */ 1409 if (ire_ill != ill) { 1410 /* 1411 * Update ira_ill to be the ILL on which the IP address 1412 * is hosted. 1413 * No need to hold the ill since we have a hold on the ire 1414 */ 1415 ASSERT(ira->ira_ill == ira->ira_rill); 1416 ira->ira_ill = ire_ill; 1417 1418 ip_input_local_v6(ire, mp, ip6h, ira); 1419 1420 /* Restore */ 1421 ASSERT(ira->ira_ill == ire_ill); 1422 ira->ira_ill = ill; 1423 return; 1424 1425 } 1426 ip_input_local_v6(ire, mp, ip6h, ira); 1427 } 1428 1429 /* 1430 * ire_recvfn for IRE_LOCAL. 1431 */ 1432 void 1433 ire_recv_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1434 { 1435 ip6_t *ip6h = (ip6_t *)iph_arg; 1436 ill_t *ill = ira->ira_ill; 1437 ill_t *ire_ill = ire->ire_ill; 1438 1439 /* Make a note for DAD that this address is in use */ 1440 ire->ire_last_used_time = ddi_get_lbolt(); 1441 1442 /* Only target the IRE_LOCAL with the right zoneid. */ 1443 ira->ira_zoneid = ire->ire_zoneid; 1444 1445 /* 1446 * If the packet arrived on the wrong ill, we check that 1447 * this is ok. 1448 * If it is, then we ensure that we do the reassembly on 1449 * the ill on which the address is hosted. We keep ira_rill as 1450 * the one on which the packet arrived, so that IP_PKTINFO and 1451 * friends can report this. 1452 */ 1453 if (ire_ill != ill) { 1454 ire_t *new_ire; 1455 1456 new_ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill); 1457 if (new_ire == NULL) { 1458 /* Drop packet */ 1459 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1460 ip_drop_input("ipIfStatsInForwProhibits", mp, ill); 1461 freemsg(mp); 1462 return; 1463 } 1464 /* 1465 * Update ira_ill to be the ILL on which the IP address 1466 * is hosted. No need to hold the ill since we have a 1467 * hold on the ire. Note that we do the switch even if 1468 * new_ire == ire (for IPMP, ire would be the one corresponding 1469 * to the IPMP ill). 1470 */ 1471 ASSERT(ira->ira_ill == ira->ira_rill); 1472 ira->ira_ill = new_ire->ire_ill; 1473 1474 /* ira_ruifindex tracks the upper for ira_rill */ 1475 if (IS_UNDER_IPMP(ill)) 1476 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1477 1478 ip_input_local_v6(new_ire, mp, ip6h, ira); 1479 1480 /* Restore */ 1481 ASSERT(ira->ira_ill == new_ire->ire_ill); 1482 ira->ira_ill = ill; 1483 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1484 1485 if (new_ire != ire) 1486 ire_refrele(new_ire); 1487 return; 1488 } 1489 1490 ip_input_local_v6(ire, mp, ip6h, ira); 1491 } 1492 1493 /* 1494 * Common function for packets arriving for the host. Handles 1495 * checksum verification, reassembly checks, etc. 1496 */ 1497 static void 1498 ip_input_local_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1499 { 1500 iaflags_t iraflags = ira->ira_flags; 1501 1502 /* 1503 * For multicast we need some extra work before 1504 * we call ip_fanout_v6(), since in the case of shared-IP zones 1505 * we need to pretend that a packet arrived for each zoneid. 1506 */ 1507 if (iraflags & IRAF_MULTICAST) { 1508 ip_input_multicast_v6(ire, mp, ip6h, ira); 1509 return; 1510 } 1511 ip_fanout_v6(mp, ip6h, ira); 1512 } 1513 1514 /* 1515 * Handle multiple zones which want to receive the same multicast packets 1516 * on this ill by delivering a packet to each of them. 1517 * 1518 * Note that for packets delivered to transports we could instead do this 1519 * as part of the fanout code, but since we need to handle icmp_inbound 1520 * it is simpler to have multicast work the same as IPv4 broadcast. 1521 * 1522 * The ip_fanout matching for multicast matches based on ilm independent of 1523 * zoneid since the zoneid restriction is applied when joining a multicast 1524 * group. 1525 */ 1526 /* ARGSUSED */ 1527 static void 1528 ip_input_multicast_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1529 { 1530 ill_t *ill = ira->ira_ill; 1531 iaflags_t iraflags = ira->ira_flags; 1532 ip_stack_t *ipst = ill->ill_ipst; 1533 netstack_t *ns = ipst->ips_netstack; 1534 zoneid_t zoneid; 1535 mblk_t *mp1; 1536 ip6_t *ip6h1; 1537 1538 /* ire_recv_multicast has switched to the upper ill for IPMP */ 1539 ASSERT(!IS_UNDER_IPMP(ill)); 1540 1541 /* 1542 * If we don't have more than one shared-IP zone, or if 1543 * there are no members in anything but the global zone, 1544 * then just set the zoneid and proceed. 1545 */ 1546 if (ns->netstack_numzones == 1 || 1547 !ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst, 1548 GLOBAL_ZONEID)) { 1549 ira->ira_zoneid = GLOBAL_ZONEID; 1550 1551 /* If sender didn't want this zone to receive it, drop */ 1552 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1553 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1554 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1555 freemsg(mp); 1556 return; 1557 } 1558 ip_fanout_v6(mp, ip6h, ira); 1559 return; 1560 } 1561 1562 /* 1563 * Here we loop over all zoneids that have members in the group 1564 * and deliver a packet to ip_fanout for each zoneid. 1565 * 1566 * First find any members in the lowest numeric zoneid by looking for 1567 * first zoneid larger than -1 (ALL_ZONES). 1568 * We terminate the loop when we receive -1 (ALL_ZONES). 1569 */ 1570 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 1571 for (; zoneid != ALL_ZONES; 1572 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, zoneid)) { 1573 /* 1574 * Avoid an extra copymsg/freemsg by skipping global zone here 1575 * and doing that at the end. 1576 */ 1577 if (zoneid == GLOBAL_ZONEID) 1578 continue; 1579 1580 ira->ira_zoneid = zoneid; 1581 1582 /* If sender didn't want this zone to receive it, skip */ 1583 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1584 ira->ira_no_loop_zoneid == ira->ira_zoneid) 1585 continue; 1586 1587 mp1 = copymsg(mp); 1588 if (mp1 == NULL) { 1589 /* Failed to deliver to one zone */ 1590 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1591 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1592 continue; 1593 } 1594 ip6h1 = (ip6_t *)mp1->b_rptr; 1595 ip_fanout_v6(mp1, ip6h1, ira); 1596 } 1597 1598 /* Do the main ire */ 1599 ira->ira_zoneid = GLOBAL_ZONEID; 1600 /* If sender didn't want this zone to receive it, drop */ 1601 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1602 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1603 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1604 freemsg(mp); 1605 } else { 1606 ip_fanout_v6(mp, ip6h, ira); 1607 } 1608 } 1609 1610 1611 /* 1612 * Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions 1613 * is in use. Updates ira_zoneid and ira_flags as a result. 1614 */ 1615 static void 1616 ip_fanout_tx_v6(mblk_t *mp, ip6_t *ip6h, uint8_t protocol, uint_t ip_hdr_length, 1617 ip_recv_attr_t *ira) 1618 { 1619 uint16_t *up; 1620 uint16_t lport; 1621 zoneid_t zoneid; 1622 1623 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED); 1624 1625 /* 1626 * If the packet is unlabeled we might allow read-down 1627 * for MAC_EXEMPT. Below we clear this if it is a multi-level 1628 * port (MLP). 1629 * Note that ira_tsl can be NULL here. 1630 */ 1631 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED) 1632 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE; 1633 1634 if (ira->ira_zoneid != ALL_ZONES) 1635 return; 1636 1637 ira->ira_flags |= IRAF_TX_SHARED_ADDR; 1638 1639 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length); 1640 switch (protocol) { 1641 case IPPROTO_TCP: 1642 case IPPROTO_SCTP: 1643 case IPPROTO_UDP: 1644 /* Caller ensures this */ 1645 ASSERT(((uchar_t *)ip6h) + ip_hdr_length +4 <= mp->b_wptr); 1646 1647 /* 1648 * Only these transports support MLP. 1649 * We know their destination port numbers is in 1650 * the same place in the header. 1651 */ 1652 lport = up[1]; 1653 1654 /* 1655 * No need to handle exclusive-stack zones 1656 * since ALL_ZONES only applies to the shared IP instance. 1657 */ 1658 zoneid = tsol_mlp_findzone(protocol, lport); 1659 /* 1660 * If no shared MLP is found, tsol_mlp_findzone returns 1661 * ALL_ZONES. In that case, we assume it's SLP, and 1662 * search for the zone based on the packet label. 1663 * 1664 * If there is such a zone, we prefer to find a 1665 * connection in it. Otherwise, we look for a 1666 * MAC-exempt connection in any zone whose label 1667 * dominates the default label on the packet. 1668 */ 1669 if (zoneid == ALL_ZONES) 1670 zoneid = tsol_attr_to_zoneid(ira); 1671 else 1672 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE; 1673 break; 1674 default: 1675 /* Handle shared address for other protocols */ 1676 zoneid = tsol_attr_to_zoneid(ira); 1677 break; 1678 } 1679 ira->ira_zoneid = zoneid; 1680 } 1681 1682 /* 1683 * Increment checksum failure statistics 1684 */ 1685 static void 1686 ip_input_cksum_err_v6(uint8_t protocol, uint16_t hck_flags, ill_t *ill) 1687 { 1688 ip_stack_t *ipst = ill->ill_ipst; 1689 1690 switch (protocol) { 1691 case IPPROTO_TCP: 1692 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 1693 1694 if (hck_flags & HCK_FULLCKSUM) 1695 IP6_STAT(ipst, ip6_tcp_in_full_hw_cksum_err); 1696 else if (hck_flags & HCK_PARTIALCKSUM) 1697 IP6_STAT(ipst, ip6_tcp_in_part_hw_cksum_err); 1698 else 1699 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 1700 break; 1701 case IPPROTO_UDP: 1702 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 1703 if (hck_flags & HCK_FULLCKSUM) 1704 IP6_STAT(ipst, ip6_udp_in_full_hw_cksum_err); 1705 else if (hck_flags & HCK_PARTIALCKSUM) 1706 IP6_STAT(ipst, ip6_udp_in_part_hw_cksum_err); 1707 else 1708 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 1709 break; 1710 case IPPROTO_ICMPV6: 1711 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 1712 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1713 break; 1714 default: 1715 ASSERT(0); 1716 break; 1717 } 1718 } 1719 1720 /* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */ 1721 uint32_t 1722 ip_input_cksum_pseudo_v6(ip6_t *ip6h, ip_recv_attr_t *ira) 1723 { 1724 uint_t ulp_len; 1725 uint32_t cksum; 1726 uint8_t protocol = ira->ira_protocol; 1727 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1728 1729 #define iphs ((uint16_t *)ip6h) 1730 1731 switch (protocol) { 1732 case IPPROTO_TCP: 1733 ulp_len = ira->ira_pktlen - ip_hdr_length; 1734 1735 /* Protocol and length */ 1736 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP; 1737 /* IP addresses */ 1738 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1739 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1740 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1741 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1742 break; 1743 1744 case IPPROTO_UDP: { 1745 udpha_t *udpha; 1746 1747 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length); 1748 1749 /* Protocol and length */ 1750 cksum = udpha->uha_length + IP_UDP_CSUM_COMP; 1751 /* IP addresses */ 1752 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1753 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1754 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1755 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1756 break; 1757 } 1758 case IPPROTO_ICMPV6: 1759 ulp_len = ira->ira_pktlen - ip_hdr_length; 1760 1761 /* Protocol and length */ 1762 cksum = htons(ulp_len) + IP_ICMPV6_CSUM_COMP; 1763 /* IP addresses */ 1764 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 1765 iphs[8] + iphs[9] + iphs[10] + iphs[11] + 1766 iphs[12] + iphs[13] + iphs[14] + iphs[15] + 1767 iphs[16] + iphs[17] + iphs[18] + iphs[19]; 1768 break; 1769 default: 1770 cksum = 0; 1771 break; 1772 } 1773 #undef iphs 1774 return (cksum); 1775 } 1776 1777 1778 /* 1779 * Software verification of the ULP checksums. 1780 * Returns B_TRUE if ok. 1781 * Increments statistics of failed. 1782 */ 1783 static boolean_t 1784 ip_input_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1785 { 1786 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 1787 uint32_t cksum; 1788 uint8_t protocol = ira->ira_protocol; 1789 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1790 1791 IP6_STAT(ipst, ip6_in_sw_cksum); 1792 1793 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 1794 protocol == IPPROTO_ICMPV6); 1795 1796 cksum = ip_input_cksum_pseudo_v6(ip6h, ira); 1797 cksum = IP_CSUM(mp, ip_hdr_length, cksum); 1798 if (cksum == 0) 1799 return (B_TRUE); 1800 1801 ip_input_cksum_err_v6(protocol, 0, ira->ira_ill); 1802 return (B_FALSE); 1803 } 1804 1805 /* 1806 * Verify the ULP checksums. 1807 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum 1808 * algorithm. 1809 * Increments statistics if failed. 1810 */ 1811 static boolean_t 1812 ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h, 1813 ip_recv_attr_t *ira) 1814 { 1815 ill_t *ill = ira->ira_rill; 1816 uint16_t hck_flags; 1817 uint32_t cksum; 1818 mblk_t *mp1; 1819 uint_t len; 1820 uint8_t protocol = ira->ira_protocol; 1821 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 1822 1823 1824 switch (protocol) { 1825 case IPPROTO_TCP: 1826 case IPPROTO_ICMPV6: 1827 break; 1828 1829 case IPPROTO_UDP: { 1830 udpha_t *udpha; 1831 1832 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length); 1833 /* 1834 * Before going through the regular checksum 1835 * calculation, make sure the received checksum 1836 * is non-zero. RFC 2460 says, a 0x0000 checksum 1837 * in a UDP packet (within IPv6 packet) is invalid 1838 * and should be replaced by 0xffff. This makes 1839 * sense as regular checksum calculation will 1840 * pass for both the cases i.e. 0x0000 and 0xffff. 1841 * Removing one of the case makes error detection 1842 * stronger. 1843 */ 1844 if (udpha->uha_checksum == 0) { 1845 /* 0x0000 checksum is invalid */ 1846 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 1847 return (B_FALSE); 1848 } 1849 break; 1850 } 1851 case IPPROTO_SCTP: { 1852 sctp_hdr_t *sctph; 1853 uint32_t pktsum; 1854 1855 sctph = (sctp_hdr_t *)((uchar_t *)ip6h + ip_hdr_length); 1856 #ifdef DEBUG 1857 if (skip_sctp_cksum) 1858 return (B_TRUE); 1859 #endif 1860 pktsum = sctph->sh_chksum; 1861 sctph->sh_chksum = 0; 1862 cksum = sctp_cksum(mp, ip_hdr_length); 1863 sctph->sh_chksum = pktsum; 1864 if (cksum == pktsum) 1865 return (B_TRUE); 1866 1867 /* 1868 * Defer until later whether a bad checksum is ok 1869 * in order to allow RAW sockets to use Adler checksum 1870 * with SCTP. 1871 */ 1872 ira->ira_flags |= IRAF_SCTP_CSUM_ERR; 1873 return (B_TRUE); 1874 } 1875 1876 default: 1877 /* No ULP checksum to verify. */ 1878 return (B_TRUE); 1879 } 1880 1881 /* 1882 * Revert to software checksum calculation if the interface 1883 * isn't capable of checksum offload. 1884 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout. 1885 * Note: IRAF_NO_HW_CKSUM is not currently used. 1886 */ 1887 ASSERT(!IS_IPMP(ill)); 1888 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 1889 !dohwcksum) { 1890 return (ip_input_sw_cksum_v6(mp, ip6h, ira)); 1891 } 1892 1893 /* 1894 * We apply this for all ULP protocols. Does the HW know to 1895 * not set the flags for SCTP and other protocols. 1896 */ 1897 1898 hck_flags = DB_CKSUMFLAGS(mp); 1899 1900 if (hck_flags & HCK_FULLCKSUM) { 1901 /* 1902 * Full checksum has been computed by the hardware 1903 * and has been attached. If the driver wants us to 1904 * verify the correctness of the attached value, in 1905 * order to protect against faulty hardware, compare 1906 * it against -0 (0xFFFF) to see if it's valid. 1907 */ 1908 if (hck_flags & HCK_FULLCKSUM_OK) 1909 return (B_TRUE); 1910 1911 cksum = DB_CKSUM16(mp); 1912 if (cksum == 0xFFFF) 1913 return (B_TRUE); 1914 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill); 1915 return (B_FALSE); 1916 } 1917 1918 mp1 = mp->b_cont; 1919 if ((hck_flags & HCK_PARTIALCKSUM) && 1920 (mp1 == NULL || mp1->b_cont == NULL) && 1921 ip_hdr_length >= DB_CKSUMSTART(mp) && 1922 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) { 1923 uint32_t adj; 1924 uchar_t *cksum_start; 1925 1926 cksum = ip_input_cksum_pseudo_v6(ip6h, ira); 1927 1928 cksum_start = ((uchar_t *)ip6h + DB_CKSUMSTART(mp)); 1929 1930 /* 1931 * Partial checksum has been calculated by hardware 1932 * and attached to the packet; in addition, any 1933 * prepended extraneous data is even byte aligned, 1934 * and there are at most two mblks associated with 1935 * the packet. If any such data exists, we adjust 1936 * the checksum; also take care any postpended data. 1937 */ 1938 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj); 1939 /* 1940 * One's complement subtract extraneous checksum 1941 */ 1942 cksum += DB_CKSUM16(mp); 1943 if (adj >= cksum) 1944 cksum = ~(adj - cksum) & 0xFFFF; 1945 else 1946 cksum -= adj; 1947 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 1948 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 1949 if (!(~cksum & 0xFFFF)) 1950 return (B_TRUE); 1951 1952 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill); 1953 return (B_FALSE); 1954 } 1955 return (ip_input_sw_cksum_v6(mp, ip6h, ira)); 1956 } 1957 1958 1959 /* 1960 * Handle fanout of received packets. 1961 * Unicast packets that are looped back (from ire_send_local_v6) and packets 1962 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM. 1963 * 1964 * IPQoS Notes 1965 * Before sending it to the client, invoke IPPF processing. Policy processing 1966 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled. 1967 */ 1968 void 1969 ip_fanout_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 1970 { 1971 ill_t *ill = ira->ira_ill; 1972 iaflags_t iraflags = ira->ira_flags; 1973 ip_stack_t *ipst = ill->ill_ipst; 1974 uint8_t protocol; 1975 conn_t *connp; 1976 #define rptr ((uchar_t *)ip6h) 1977 uint_t ip_hdr_length; 1978 uint_t min_ulp_header_length; 1979 int offset; 1980 ssize_t len; 1981 netstack_t *ns = ipst->ips_netstack; 1982 ipsec_stack_t *ipss = ns->netstack_ipsec; 1983 ill_t *rill = ira->ira_rill; 1984 1985 ASSERT(ira->ira_pktlen == ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 1986 1987 /* 1988 * We repeat this as we parse over destination options header and 1989 * fragment headers (earlier we've handled any hop-by-hop options 1990 * header.) 1991 * We update ira_protocol and ira_ip_hdr_length as we skip past 1992 * the intermediate headers; they already point past any 1993 * hop-by-hop header. 1994 */ 1995 repeat: 1996 protocol = ira->ira_protocol; 1997 ip_hdr_length = ira->ira_ip_hdr_length; 1998 1999 /* 2000 * Time for IPP once we've done reassembly and IPsec. 2001 * We skip this for loopback packets since we don't do IPQoS 2002 * on loopback. 2003 */ 2004 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 2005 !(iraflags & IRAF_LOOPBACK) && 2006 (protocol != IPPROTO_ESP || protocol != IPPROTO_AH || 2007 protocol != IPPROTO_DSTOPTS || protocol != IPPROTO_ROUTING || 2008 protocol != IPPROTO_FRAGMENT)) { 2009 /* 2010 * Use the interface on which the packet arrived - not where 2011 * the IP address is hosted. 2012 */ 2013 /* ip_process translates an IS_UNDER_IPMP */ 2014 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill); 2015 if (mp == NULL) { 2016 /* ip_drop_packet and MIB done */ 2017 return; 2018 } 2019 } 2020 2021 /* Determine the minimum required size of the upper-layer header */ 2022 /* Need to do this for at least the set of ULPs that TX handles. */ 2023 switch (protocol) { 2024 case IPPROTO_TCP: 2025 min_ulp_header_length = TCP_MIN_HEADER_LENGTH; 2026 break; 2027 case IPPROTO_SCTP: 2028 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH; 2029 break; 2030 case IPPROTO_UDP: 2031 min_ulp_header_length = UDPH_SIZE; 2032 break; 2033 case IPPROTO_ICMP: 2034 case IPPROTO_ICMPV6: 2035 min_ulp_header_length = ICMPH_SIZE; 2036 break; 2037 case IPPROTO_FRAGMENT: 2038 case IPPROTO_DSTOPTS: 2039 case IPPROTO_ROUTING: 2040 min_ulp_header_length = MIN_EHDR_LEN; 2041 break; 2042 default: 2043 min_ulp_header_length = 0; 2044 break; 2045 } 2046 /* Make sure we have the min ULP header length */ 2047 len = mp->b_wptr - rptr; 2048 if (len < ip_hdr_length + min_ulp_header_length) { 2049 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length) 2050 goto pkt_too_short; 2051 2052 IP6_STAT(ipst, ip6_recv_pullup); 2053 ip6h = ip_pullup(mp, ip_hdr_length + min_ulp_header_length, 2054 ira); 2055 if (ip6h == NULL) 2056 goto discard; 2057 len = mp->b_wptr - rptr; 2058 } 2059 2060 /* 2061 * If trusted extensions then determine the zoneid and TX specific 2062 * ira_flags. 2063 */ 2064 if (iraflags & IRAF_SYSTEM_LABELED) { 2065 /* This can update ira->ira_flags and ira->ira_zoneid */ 2066 ip_fanout_tx_v6(mp, ip6h, protocol, ip_hdr_length, ira); 2067 iraflags = ira->ira_flags; 2068 } 2069 2070 2071 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */ 2072 if (iraflags & IRAF_VERIFY_ULP_CKSUM) { 2073 if (!ip_input_cksum_v6(iraflags, mp, ip6h, ira)) { 2074 /* Bad checksum. Stats are already incremented */ 2075 ip_drop_input("Bad ULP checksum", mp, ill); 2076 freemsg(mp); 2077 return; 2078 } 2079 /* IRAF_SCTP_CSUM_ERR could have been set */ 2080 iraflags = ira->ira_flags; 2081 } 2082 switch (protocol) { 2083 case IPPROTO_TCP: 2084 /* For TCP, discard multicast packets. */ 2085 if (iraflags & IRAF_MULTIBROADCAST) 2086 goto discard; 2087 2088 /* First mblk contains IP+TCP headers per above check */ 2089 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH); 2090 2091 /* TCP options present? */ 2092 offset = ((uchar_t *)ip6h)[ip_hdr_length + 12] >> 4; 2093 if (offset != 5) { 2094 if (offset < 5) 2095 goto discard; 2096 2097 /* 2098 * There must be TCP options. 2099 * Make sure we can grab them. 2100 */ 2101 offset <<= 2; 2102 offset += ip_hdr_length; 2103 if (len < offset) { 2104 if (ira->ira_pktlen < offset) 2105 goto pkt_too_short; 2106 2107 IP6_STAT(ipst, ip6_recv_pullup); 2108 ip6h = ip_pullup(mp, offset, ira); 2109 if (ip6h == NULL) 2110 goto discard; 2111 len = mp->b_wptr - rptr; 2112 } 2113 } 2114 2115 /* 2116 * Pass up a squeue hint to tcp. 2117 * If ira_sqp is already set (this is loopback) we leave it 2118 * alone. 2119 */ 2120 if (ira->ira_sqp == NULL) { 2121 ira->ira_sqp = ip_squeue_get(ira->ira_ring); 2122 } 2123 2124 /* Look for AF_INET or AF_INET6 that matches */ 2125 connp = ipcl_classify_v6(mp, IPPROTO_TCP, ip_hdr_length, 2126 ira, ipst); 2127 if (connp == NULL) { 2128 /* Send the TH_RST */ 2129 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2130 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2131 return; 2132 } 2133 if (connp->conn_incoming_ifindex != 0 && 2134 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2135 CONN_DEC_REF(connp); 2136 2137 /* Send the TH_RST */ 2138 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2139 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2140 return; 2141 } 2142 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 2143 (iraflags & IRAF_IPSEC_SECURE)) { 2144 mp = ipsec_check_inbound_policy(mp, connp, 2145 NULL, ip6h, ira); 2146 if (mp == NULL) { 2147 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2148 /* Note that mp is NULL */ 2149 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2150 CONN_DEC_REF(connp); 2151 return; 2152 } 2153 } 2154 /* Found a client; up it goes */ 2155 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2156 ira->ira_ill = ira->ira_rill = NULL; 2157 if (!IPCL_IS_TCP(connp)) { 2158 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 2159 (connp->conn_recv)(connp, mp, NULL, ira); 2160 CONN_DEC_REF(connp); 2161 ira->ira_ill = ill; 2162 ira->ira_rill = rill; 2163 return; 2164 } 2165 2166 /* 2167 * We do different processing whether called from 2168 * ip_accept_tcp and we match the target, don't match 2169 * the target, and when we are called by ip_input. 2170 */ 2171 if (iraflags & IRAF_TARGET_SQP) { 2172 if (ira->ira_target_sqp == connp->conn_sqp) { 2173 mblk_t *attrmp; 2174 2175 attrmp = ip_recv_attr_to_mblk(ira); 2176 if (attrmp == NULL) { 2177 BUMP_MIB(ill->ill_ip_mib, 2178 ipIfStatsInDiscards); 2179 ip_drop_input("ipIfStatsInDiscards", 2180 mp, ill); 2181 freemsg(mp); 2182 CONN_DEC_REF(connp); 2183 } else { 2184 SET_SQUEUE(attrmp, connp->conn_recv, 2185 connp); 2186 attrmp->b_cont = mp; 2187 ASSERT(ira->ira_target_sqp_mp == NULL); 2188 ira->ira_target_sqp_mp = attrmp; 2189 /* 2190 * Conn ref release when drained from 2191 * the squeue. 2192 */ 2193 } 2194 } else { 2195 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 2196 connp->conn_recv, connp, ira, SQ_FILL, 2197 SQTAG_IP6_TCP_INPUT); 2198 } 2199 } else { 2200 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, 2201 connp, ira, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 2202 } 2203 ira->ira_ill = ill; 2204 ira->ira_rill = rill; 2205 return; 2206 2207 case IPPROTO_SCTP: { 2208 sctp_hdr_t *sctph; 2209 uint32_t ports; /* Source and destination ports */ 2210 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp; 2211 2212 /* For SCTP, discard multicast packets. */ 2213 if (iraflags & IRAF_MULTIBROADCAST) 2214 goto discard; 2215 2216 /* 2217 * Since there is no SCTP h/w cksum support yet, just 2218 * clear the flag. 2219 */ 2220 DB_CKSUMFLAGS(mp) = 0; 2221 2222 /* Length ensured above */ 2223 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH); 2224 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length); 2225 2226 /* get the ports */ 2227 ports = *(uint32_t *)&sctph->sh_sport; 2228 2229 if (iraflags & IRAF_SCTP_CSUM_ERR) { 2230 /* 2231 * No potential sctp checksum errors go to the Sun 2232 * sctp stack however they might be Adler-32 summed 2233 * packets a userland stack bound to a raw IP socket 2234 * could reasonably use. Note though that Adler-32 is 2235 * a long deprecated algorithm and customer sctp 2236 * networks should eventually migrate to CRC-32 at 2237 * which time this facility should be removed. 2238 */ 2239 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2240 return; 2241 } 2242 connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, ports, 2243 ira, mp, sctps); 2244 if (connp == NULL) { 2245 /* Check for raw socket or OOTB handling */ 2246 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2247 return; 2248 } 2249 if (connp->conn_incoming_ifindex != 0 && 2250 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2251 CONN_DEC_REF(connp); 2252 2253 /* Check for raw socket or OOTB handling */ 2254 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira); 2255 return; 2256 } 2257 2258 /* Found a client; up it goes */ 2259 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2260 sctp_input(connp, NULL, ip6h, mp, ira); 2261 /* sctp_input does a rele of the sctp_t */ 2262 return; 2263 } 2264 2265 case IPPROTO_UDP: 2266 /* First mblk contains IP+UDP headers as checked above */ 2267 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE); 2268 2269 if (iraflags & IRAF_MULTIBROADCAST) { 2270 uint16_t *up; /* Pointer to ports in ULP header */ 2271 2272 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length); 2273 2274 ip_fanout_udp_multi_v6(mp, ip6h, up[1], up[0], ira); 2275 return; 2276 } 2277 2278 /* Look for AF_INET or AF_INET6 that matches */ 2279 connp = ipcl_classify_v6(mp, IPPROTO_UDP, ip_hdr_length, 2280 ira, ipst); 2281 if (connp == NULL) { 2282 no_udp_match: 2283 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP]. 2284 connf_head != NULL) { 2285 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2286 ip_fanout_proto_v6(mp, ip6h, ira); 2287 } else { 2288 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH, 2289 ICMP6_DST_UNREACH_NOPORT, ira); 2290 } 2291 return; 2292 2293 } 2294 if (connp->conn_incoming_ifindex != 0 && 2295 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2296 CONN_DEC_REF(connp); 2297 goto no_udp_match; 2298 } 2299 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld : 2300 !canputnext(connp->conn_rq)) { 2301 CONN_DEC_REF(connp); 2302 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 2303 ip_drop_input("udpIfStatsInOverflows", mp, ill); 2304 freemsg(mp); 2305 return; 2306 } 2307 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 2308 (iraflags & IRAF_IPSEC_SECURE)) { 2309 mp = ipsec_check_inbound_policy(mp, connp, 2310 NULL, ip6h, ira); 2311 if (mp == NULL) { 2312 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2313 /* Note that mp is NULL */ 2314 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2315 CONN_DEC_REF(connp); 2316 return; 2317 } 2318 } 2319 2320 /* Found a client; up it goes */ 2321 IP6_STAT(ipst, ip6_udp_fannorm); 2322 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2323 ira->ira_ill = ira->ira_rill = NULL; 2324 (connp->conn_recv)(connp, mp, NULL, ira); 2325 CONN_DEC_REF(connp); 2326 ira->ira_ill = ill; 2327 ira->ira_rill = rill; 2328 return; 2329 default: 2330 break; 2331 } 2332 2333 /* 2334 * Clear hardware checksumming flag as it is currently only 2335 * used by TCP and UDP. 2336 */ 2337 DB_CKSUMFLAGS(mp) = 0; 2338 2339 switch (protocol) { 2340 case IPPROTO_ICMPV6: 2341 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 2342 2343 /* Check variable for testing applications */ 2344 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 2345 ip_drop_input("ipv6_drop_inbound_icmpv6", mp, ill); 2346 freemsg(mp); 2347 return; 2348 } 2349 /* 2350 * We need to accomodate icmp messages coming in clear 2351 * until we get everything secure from the wire. If 2352 * icmp_accept_clear_messages is zero we check with 2353 * the global policy and act accordingly. If it is 2354 * non-zero, we accept the message without any checks. 2355 * But *this does not mean* that this will be delivered 2356 * to RAW socket clients. By accepting we might send 2357 * replies back, change our MTU value etc., 2358 * but delivery to the ULP/clients depends on their 2359 * policy dispositions. 2360 */ 2361 if (ipst->ips_icmp_accept_clear_messages == 0) { 2362 mp = ipsec_check_global_policy(mp, NULL, 2363 NULL, ip6h, ira, ns); 2364 if (mp == NULL) 2365 return; 2366 } 2367 2368 /* 2369 * On a labeled system, we have to check whether the zone 2370 * itself is permitted to receive raw traffic. 2371 */ 2372 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2373 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2374 BUMP_MIB(ill->ill_icmp6_mib, 2375 ipv6IfIcmpInErrors); 2376 ip_drop_input("tsol_can_accept_raw", mp, ill); 2377 freemsg(mp); 2378 return; 2379 } 2380 } 2381 2382 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2383 mp = icmp_inbound_v6(mp, ira); 2384 if (mp == NULL) { 2385 /* No need to pass to RAW sockets */ 2386 return; 2387 } 2388 break; 2389 2390 case IPPROTO_DSTOPTS: { 2391 ip6_dest_t *desthdr; 2392 uint_t ehdrlen; 2393 uint8_t *optptr; 2394 2395 /* We already check for MIN_EHDR_LEN above */ 2396 2397 /* Check if AH is present and needs to be processed. */ 2398 mp = ipsec_early_ah_v6(mp, ira); 2399 if (mp == NULL) 2400 return; 2401 2402 /* 2403 * Reinitialize pointers, as ipsec_early_ah_v6() does 2404 * complete pullups. We don't have to do more pullups 2405 * as a result. 2406 */ 2407 ip6h = (ip6_t *)mp->b_rptr; 2408 2409 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN) 2410 goto pkt_too_short; 2411 2412 if (mp->b_cont != NULL && 2413 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) { 2414 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira); 2415 if (ip6h == NULL) 2416 goto discard; 2417 } 2418 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length); 2419 ehdrlen = 8 * (desthdr->ip6d_len + 1); 2420 if (ira->ira_pktlen - ip_hdr_length < ehdrlen) 2421 goto pkt_too_short; 2422 if (mp->b_cont != NULL && 2423 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 2424 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 2425 if (ip6h == NULL) 2426 goto discard; 2427 2428 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length); 2429 } 2430 optptr = (uint8_t *)&desthdr[1]; 2431 2432 /* 2433 * Update ira_ip_hdr_length to skip the destination header 2434 * when we repeat. 2435 */ 2436 ira->ira_ip_hdr_length += ehdrlen; 2437 2438 ira->ira_protocol = desthdr->ip6d_nxt; 2439 2440 /* 2441 * Note: XXX This code does not seem to make 2442 * distinction between Destination Options Header 2443 * being before/after Routing Header which can 2444 * happen if we are at the end of source route. 2445 * This may become significant in future. 2446 * (No real significant Destination Options are 2447 * defined/implemented yet ). 2448 */ 2449 switch (ip_process_options_v6(mp, ip6h, optptr, 2450 ehdrlen - 2, IPPROTO_DSTOPTS, ira)) { 2451 case -1: 2452 /* 2453 * Packet has been consumed and any needed 2454 * ICMP errors sent. 2455 */ 2456 return; 2457 case 0: 2458 /* No action needed continue */ 2459 break; 2460 case 1: 2461 /* 2462 * Unnexpected return value 2463 * (Router alert is a Hop-by-Hop option) 2464 */ 2465 #ifdef DEBUG 2466 panic("ip_fanout_v6: router " 2467 "alert hbh opt indication in dest opt"); 2468 /*NOTREACHED*/ 2469 #else 2470 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2471 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2472 freemsg(mp); 2473 return; 2474 #endif 2475 } 2476 goto repeat; 2477 } 2478 case IPPROTO_FRAGMENT: { 2479 ip6_frag_t *fraghdr; 2480 2481 if (ira->ira_pktlen - ip_hdr_length < sizeof (ip6_frag_t)) 2482 goto pkt_too_short; 2483 2484 if (mp->b_cont != NULL && 2485 rptr + ip_hdr_length + sizeof (ip6_frag_t) > mp->b_wptr) { 2486 ip6h = ip_pullup(mp, 2487 ip_hdr_length + sizeof (ip6_frag_t), ira); 2488 if (ip6h == NULL) 2489 goto discard; 2490 } 2491 2492 fraghdr = (ip6_frag_t *)(rptr + ip_hdr_length); 2493 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 2494 2495 /* 2496 * Invoke the CGTP (multirouting) filtering module to 2497 * process the incoming packet. Packets identified as 2498 * duplicates must be discarded. Filtering is active 2499 * only if the ip_cgtp_filter ndd variable is 2500 * non-zero. 2501 */ 2502 if (ipst->ips_ip_cgtp_filter && 2503 ipst->ips_ip_cgtp_filter_ops != NULL) { 2504 int cgtp_flt_pkt; 2505 netstackid_t stackid; 2506 2507 stackid = ipst->ips_netstack->netstack_stackid; 2508 2509 /* 2510 * CGTP and IPMP are mutually exclusive so 2511 * phyint_ifindex is fine here. 2512 */ 2513 cgtp_flt_pkt = 2514 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 2515 stackid, ill->ill_phyint->phyint_ifindex, 2516 ip6h, fraghdr); 2517 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 2518 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill); 2519 freemsg(mp); 2520 return; 2521 } 2522 } 2523 2524 /* 2525 * Update ip_hdr_length to skip the frag header 2526 * ip_input_fragment_v6 will determine the extension header 2527 * prior to the fragment header and update its nexthdr value, 2528 * and also set ira_protocol to the nexthdr that follows the 2529 * completed fragment. 2530 */ 2531 ip_hdr_length += sizeof (ip6_frag_t); 2532 2533 /* 2534 * Make sure we have ira_l2src before we loose the original 2535 * mblk 2536 */ 2537 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 2538 ip_setl2src(mp, ira, ira->ira_rill); 2539 2540 mp = ip_input_fragment_v6(mp, ip6h, fraghdr, 2541 ira->ira_pktlen - ip_hdr_length, ira); 2542 if (mp == NULL) { 2543 /* Reassembly is still pending */ 2544 return; 2545 } 2546 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 2547 2548 /* 2549 * The mblk chain has the frag header removed and 2550 * ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the 2551 * IP header has been updated to refleact the result. 2552 */ 2553 ip6h = (ip6_t *)mp->b_rptr; 2554 ip_hdr_length = ira->ira_ip_hdr_length; 2555 goto repeat; 2556 } 2557 case IPPROTO_HOPOPTS: 2558 /* 2559 * Illegal header sequence. 2560 * (Hop-by-hop headers are processed above 2561 * and required to immediately follow IPv6 header) 2562 */ 2563 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 2564 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira); 2565 return; 2566 2567 case IPPROTO_ROUTING: { 2568 uint_t ehdrlen; 2569 ip6_rthdr_t *rthdr; 2570 2571 /* Check if AH is present and needs to be processed. */ 2572 mp = ipsec_early_ah_v6(mp, ira); 2573 if (mp == NULL) 2574 return; 2575 2576 /* 2577 * Reinitialize pointers, as ipsec_early_ah_v6() does 2578 * complete pullups. We don't have to do more pullups 2579 * as a result. 2580 */ 2581 ip6h = (ip6_t *)mp->b_rptr; 2582 2583 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN) 2584 goto pkt_too_short; 2585 2586 if (mp->b_cont != NULL && 2587 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) { 2588 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira); 2589 if (ip6h == NULL) 2590 goto discard; 2591 } 2592 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length); 2593 protocol = ira->ira_protocol = rthdr->ip6r_nxt; 2594 ehdrlen = 8 * (rthdr->ip6r_len + 1); 2595 if (ira->ira_pktlen - ip_hdr_length < ehdrlen) 2596 goto pkt_too_short; 2597 if (mp->b_cont != NULL && 2598 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) { 2599 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira); 2600 if (ip6h == NULL) 2601 goto discard; 2602 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length); 2603 } 2604 if (rthdr->ip6r_segleft != 0) { 2605 /* Not end of source route */ 2606 if (ira->ira_flags & 2607 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 2608 BUMP_MIB(ill->ill_ip_mib, 2609 ipIfStatsForwProhibits); 2610 ip_drop_input("ipIfStatsInForwProhibits", 2611 mp, ill); 2612 freemsg(mp); 2613 return; 2614 } 2615 ip_process_rthdr(mp, ip6h, rthdr, ira); 2616 return; 2617 } 2618 ira->ira_ip_hdr_length += ehdrlen; 2619 goto repeat; 2620 } 2621 2622 case IPPROTO_AH: 2623 case IPPROTO_ESP: { 2624 /* 2625 * Fast path for AH/ESP. 2626 */ 2627 netstack_t *ns = ipst->ips_netstack; 2628 ipsec_stack_t *ipss = ns->netstack_ipsec; 2629 2630 IP_STAT(ipst, ipsec_proto_ahesp); 2631 2632 if (!ipsec_loaded(ipss)) { 2633 ip_proto_not_sup(mp, ira); 2634 return; 2635 } 2636 2637 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2638 /* select inbound SA and have IPsec process the pkt */ 2639 if (protocol == IPPROTO_ESP) { 2640 esph_t *esph; 2641 2642 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 2643 if (mp == NULL) 2644 return; 2645 2646 ASSERT(esph != NULL); 2647 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2648 ASSERT(ira->ira_ipsec_esp_sa != NULL); 2649 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL); 2650 2651 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, 2652 ira); 2653 } else { 2654 ah_t *ah; 2655 2656 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 2657 if (mp == NULL) 2658 return; 2659 2660 ASSERT(ah != NULL); 2661 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2662 ASSERT(ira->ira_ipsec_ah_sa != NULL); 2663 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 2664 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, 2665 ira); 2666 } 2667 2668 if (mp == NULL) { 2669 /* 2670 * Either it failed or is pending. In the former case 2671 * ipIfStatsInDiscards was increased. 2672 */ 2673 return; 2674 } 2675 /* we're done with IPsec processing, send it up */ 2676 ip_input_post_ipsec(mp, ira); 2677 return; 2678 } 2679 case IPPROTO_NONE: 2680 /* All processing is done. Count as "delivered". */ 2681 freemsg(mp); 2682 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2683 return; 2684 2685 case IPPROTO_ENCAP: 2686 case IPPROTO_IPV6: 2687 /* iptun will verify trusted label */ 2688 connp = ipcl_classify_v6(mp, protocol, ip_hdr_length, 2689 ira, ipst); 2690 if (connp != NULL) { 2691 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2692 ira->ira_ill = ira->ira_rill = NULL; 2693 connp->conn_recv(connp, mp, NULL, ira); 2694 CONN_DEC_REF(connp); 2695 ira->ira_ill = ill; 2696 ira->ira_rill = rill; 2697 return; 2698 } 2699 /* FALLTHRU */ 2700 default: 2701 /* 2702 * On a labeled system, we have to check whether the zone 2703 * itself is permitted to receive raw traffic. 2704 */ 2705 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2706 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2707 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2708 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2709 freemsg(mp); 2710 return; 2711 } 2712 } 2713 break; 2714 } 2715 2716 /* 2717 * The above input functions may have returned the pulled up message. 2718 * So ip6h need to be reinitialized. 2719 */ 2720 ip6h = (ip6_t *)mp->b_rptr; 2721 ira->ira_protocol = protocol; 2722 if (ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head == NULL) { 2723 /* No user-level listener for these packets packets */ 2724 ip_proto_not_sup(mp, ira); 2725 return; 2726 } 2727 2728 /* 2729 * Handle fanout to raw sockets. There 2730 * can be more than one stream bound to a particular 2731 * protocol. When this is the case, each one gets a copy 2732 * of any incoming packets. 2733 */ 2734 ASSERT(ira->ira_protocol == protocol); 2735 ip_fanout_proto_v6(mp, ip6h, ira); 2736 return; 2737 2738 pkt_too_short: 2739 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 2740 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 2741 freemsg(mp); 2742 return; 2743 2744 discard: 2745 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2746 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2747 freemsg(mp); 2748 #undef rptr 2749 } 2750