1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/dlpi.h> 31 #include <sys/stropts.h> 32 #include <sys/sysmacros.h> 33 #include <sys/strsubr.h> 34 #include <sys/strlog.h> 35 #include <sys/strsun.h> 36 #include <sys/zone.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/xti_inet.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/cmn_err.h> 43 #include <sys/debug.h> 44 #include <sys/kobj.h> 45 #include <sys/modctl.h> 46 #include <sys/atomic.h> 47 #include <sys/policy.h> 48 #include <sys/priv.h> 49 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/kmem.h> 53 #include <sys/sdt.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/mac.h> 58 #include <net/if.h> 59 #include <net/if_arp.h> 60 #include <net/route.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <net/if_dl.h> 64 65 #include <inet/common.h> 66 #include <inet/mi.h> 67 #include <inet/mib2.h> 68 #include <inet/nd.h> 69 #include <inet/arp.h> 70 #include <inet/snmpcom.h> 71 #include <inet/kstatcom.h> 72 73 #include <netinet/igmp_var.h> 74 #include <netinet/ip6.h> 75 #include <netinet/icmp6.h> 76 #include <netinet/sctp.h> 77 78 #include <inet/ip.h> 79 #include <inet/ip_impl.h> 80 #include <inet/ip6.h> 81 #include <inet/ip6_asp.h> 82 #include <inet/optcom.h> 83 #include <inet/tcp.h> 84 #include <inet/tcp_impl.h> 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_ftable.h> 89 #include <inet/ip_rts.h> 90 #include <inet/ip_ndp.h> 91 #include <inet/ip_listutils.h> 92 #include <netinet/igmp.h> 93 #include <netinet/ip_mroute.h> 94 #include <inet/ipp_common.h> 95 96 #include <net/pfkeyv2.h> 97 #include <inet/sadb.h> 98 #include <inet/ipsec_impl.h> 99 #include <inet/ipdrop.h> 100 #include <inet/ip_netinfo.h> 101 #include <inet/ilb_ip.h> 102 #include <sys/squeue_impl.h> 103 #include <sys/squeue.h> 104 105 #include <sys/ethernet.h> 106 #include <net/if_types.h> 107 #include <sys/cpuvar.h> 108 109 #include <ipp/ipp.h> 110 #include <ipp/ipp_impl.h> 111 #include <ipp/ipgpc/ipgpc.h> 112 113 #include <sys/pattr.h> 114 #include <inet/ipclassifier.h> 115 #include <inet/sctp_ip.h> 116 #include <inet/sctp/sctp_impl.h> 117 #include <inet/udp_impl.h> 118 #include <sys/sunddi.h> 119 120 #include <sys/tsol/label.h> 121 #include <sys/tsol/tnet.h> 122 123 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */ 124 125 #ifdef DEBUG 126 extern boolean_t skip_sctp_cksum; 127 #endif 128 129 static void ip_input_local_v4(ire_t *, mblk_t *, ipha_t *, 130 ip_recv_attr_t *); 131 132 static void ip_input_broadcast_v4(ire_t *, mblk_t *, ipha_t *, 133 ip_recv_attr_t *); 134 static void ip_input_multicast_v4(ire_t *, mblk_t *, ipha_t *, 135 ip_recv_attr_t *); 136 137 #pragma inline(ip_input_common_v4, ip_input_local_v4, ip_forward_xmit_v4) 138 139 /* 140 * Direct read side procedure capable of dealing with chains. GLDv3 based 141 * drivers call this function directly with mblk chains while STREAMS 142 * read side procedure ip_rput() calls this for single packet with ip_ring 143 * set to NULL to process one packet at a time. 144 * 145 * The ill will always be valid if this function is called directly from 146 * the driver. 147 * 148 * If ip_input() is called from GLDv3: 149 * 150 * - This must be a non-VLAN IP stream. 151 * - 'mp' is either an untagged or a special priority-tagged packet. 152 * - Any VLAN tag that was in the MAC header has been stripped. 153 * 154 * If the IP header in packet is not 32-bit aligned, every message in the 155 * chain will be aligned before further operations. This is required on SPARC 156 * platform. 157 */ 158 void 159 ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 160 struct mac_header_info_s *mhip) 161 { 162 (void) ip_input_common_v4(ill, ip_ring, mp_chain, mhip, NULL, NULL, 163 NULL); 164 } 165 166 /* 167 * ip_accept_tcp() - This function is called by the squeue when it retrieves 168 * a chain of packets in the poll mode. The packets have gone through the 169 * data link processing but not IP processing. For performance and latency 170 * reasons, the squeue wants to process the chain in line instead of feeding 171 * it back via ip_input path. 172 * 173 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v4 174 * will pass back any TCP packets matching the target sqp to 175 * ip_input_common_v4 using ira_target_sqp_mp. Other packets are handled by 176 * ip_input_v4 and ip_fanout_v4 as normal. 177 * The TCP packets that match the target squeue are returned to the caller 178 * as a b_next chain after each packet has been prepend with an mblk 179 * from ip_recv_attr_to_mblk. 180 */ 181 mblk_t * 182 ip_accept_tcp(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, 183 mblk_t *mp_chain, mblk_t **last, uint_t *cnt) 184 { 185 return (ip_input_common_v4(ill, ip_ring, mp_chain, NULL, target_sqp, 186 last, cnt)); 187 } 188 189 /* 190 * Used by ip_input and ip_accept_tcp 191 * The last three arguments are only used by ip_accept_tcp, and mhip is 192 * only used by ip_input. 193 */ 194 mblk_t * 195 ip_input_common_v4(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 196 struct mac_header_info_s *mhip, squeue_t *target_sqp, 197 mblk_t **last, uint_t *cnt) 198 { 199 mblk_t *mp; 200 ipha_t *ipha; 201 ip_recv_attr_t iras; /* Receive attributes */ 202 rtc_t rtc; 203 iaflags_t chain_flags = 0; /* Fixed for chain */ 204 mblk_t *ahead = NULL; /* Accepted head */ 205 mblk_t *atail = NULL; /* Accepted tail */ 206 uint_t acnt = 0; /* Accepted count */ 207 208 ASSERT(mp_chain != NULL); 209 ASSERT(ill != NULL); 210 211 /* These ones do not change as we loop over packets */ 212 iras.ira_ill = iras.ira_rill = ill; 213 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 214 iras.ira_rifindex = iras.ira_ruifindex; 215 iras.ira_sqp = NULL; 216 iras.ira_ring = ip_ring; 217 /* For ECMP and outbound transmit ring selection */ 218 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring); 219 220 iras.ira_target_sqp = target_sqp; 221 iras.ira_target_sqp_mp = NULL; 222 if (target_sqp != NULL) 223 chain_flags |= IRAF_TARGET_SQP; 224 225 /* 226 * We try to have a mhip pointer when possible, but 227 * it might be NULL in some cases. In those cases we 228 * have to assume unicast. 229 */ 230 iras.ira_mhip = mhip; 231 iras.ira_flags = 0; 232 if (mhip != NULL) { 233 switch (mhip->mhi_dsttype) { 234 case MAC_ADDRTYPE_MULTICAST : 235 chain_flags |= IRAF_L2DST_MULTICAST; 236 break; 237 case MAC_ADDRTYPE_BROADCAST : 238 chain_flags |= IRAF_L2DST_BROADCAST; 239 break; 240 } 241 } 242 243 /* 244 * Initialize the one-element route cache. 245 * 246 * We do ire caching from one iteration to 247 * another. In the event the packet chain contains 248 * all packets from the same dst, this caching saves 249 * an ire_route_recursive for each of the succeeding 250 * packets in a packet chain. 251 */ 252 rtc.rtc_ire = NULL; 253 rtc.rtc_ipaddr = INADDR_ANY; 254 255 /* Loop over b_next */ 256 for (mp = mp_chain; mp != NULL; mp = mp_chain) { 257 mp_chain = mp->b_next; 258 mp->b_next = NULL; 259 260 ASSERT(DB_TYPE(mp) == M_DATA); 261 262 263 /* 264 * if db_ref > 1 then copymsg and free original. Packet 265 * may be changed and we do not want the other entity 266 * who has a reference to this message to trip over the 267 * changes. This is a blind change because trying to 268 * catch all places that might change the packet is too 269 * difficult. 270 * 271 * This corresponds to the fast path case, where we have 272 * a chain of M_DATA mblks. We check the db_ref count 273 * of only the 1st data block in the mblk chain. There 274 * doesn't seem to be a reason why a device driver would 275 * send up data with varying db_ref counts in the mblk 276 * chain. In any case the Fast path is a private 277 * interface, and our drivers don't do such a thing. 278 * Given the above assumption, there is no need to walk 279 * down the entire mblk chain (which could have a 280 * potential performance problem) 281 * 282 * The "(DB_REF(mp) > 1)" check was moved from ip_rput() 283 * to here because of exclusive ip stacks and vnics. 284 * Packets transmitted from exclusive stack over vnic 285 * can have db_ref > 1 and when it gets looped back to 286 * another vnic in a different zone, you have ip_input() 287 * getting dblks with db_ref > 1. So if someone 288 * complains of TCP performance under this scenario, 289 * take a serious look here on the impact of copymsg(). 290 */ 291 if (DB_REF(mp) > 1) { 292 if ((mp = ip_fix_dbref(mp, &iras)) == NULL) { 293 /* mhip might point into 1st packet in chain */ 294 iras.ira_mhip = NULL; 295 continue; 296 } 297 } 298 299 /* 300 * IP header ptr not aligned? 301 * OR IP header not complete in first mblk 302 */ 303 ipha = (ipha_t *)mp->b_rptr; 304 if (!OK_32PTR(ipha) || MBLKL(mp) < IP_SIMPLE_HDR_LENGTH) { 305 mp = ip_check_and_align_header(mp, IP_SIMPLE_HDR_LENGTH, 306 &iras); 307 if (mp == NULL) { 308 /* mhip might point into 1st packet in chain */ 309 iras.ira_mhip = NULL; 310 continue; 311 } 312 ipha = (ipha_t *)mp->b_rptr; 313 } 314 315 /* Protect against a mix of Ethertypes and IP versions */ 316 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 317 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 318 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 319 freemsg(mp); 320 /* mhip might point into 1st packet in the chain. */ 321 iras.ira_mhip = NULL; 322 continue; 323 } 324 325 /* 326 * Check for Martian addrs; we have to explicitly 327 * test for for zero dst since this is also used as 328 * an indication that the rtc is not used. 329 */ 330 if (ipha->ipha_dst == INADDR_ANY) { 331 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 332 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 333 freemsg(mp); 334 /* mhip might point into 1st packet in the chain. */ 335 iras.ira_mhip = NULL; 336 continue; 337 } 338 339 /* 340 * Keep L2SRC from a previous packet in chain since mhip 341 * might point into an earlier packet in the chain. 342 * Keep IRAF_VERIFIED_SRC to avoid redoing broadcast 343 * source check in forwarding path. 344 */ 345 chain_flags |= (iras.ira_flags & 346 (IRAF_L2SRC_SET|IRAF_VERIFIED_SRC)); 347 348 iras.ira_flags = IRAF_IS_IPV4 | IRAF_VERIFY_IP_CKSUM | 349 IRAF_VERIFY_ULP_CKSUM | chain_flags; 350 iras.ira_free_flags = 0; 351 iras.ira_cred = NULL; 352 iras.ira_cpid = NOPID; 353 iras.ira_tsl = NULL; 354 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */ 355 356 /* 357 * We must count all incoming packets, even if they end 358 * up being dropped later on. Defer counting bytes until 359 * we have the whole IP header in first mblk. 360 */ 361 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 362 363 iras.ira_pktlen = ntohs(ipha->ipha_length); 364 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 365 iras.ira_pktlen); 366 367 /* 368 * Call one of: 369 * ill_input_full_v4 370 * ill_input_short_v4 371 * The former is used in unusual cases. See ill_set_inputfn(). 372 */ 373 (*ill->ill_inputfn)(mp, ipha, &ipha->ipha_dst, &iras, &rtc); 374 375 /* Any references to clean up? No hold on ira_ill */ 376 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 377 ira_cleanup(&iras, B_FALSE); 378 379 if (iras.ira_target_sqp_mp != NULL) { 380 /* Better be called from ip_accept_tcp */ 381 ASSERT(target_sqp != NULL); 382 383 /* Found one packet to accept */ 384 mp = iras.ira_target_sqp_mp; 385 iras.ira_target_sqp_mp = NULL; 386 ASSERT(ip_recv_attr_is_mblk(mp)); 387 388 if (atail != NULL) 389 atail->b_next = mp; 390 else 391 ahead = mp; 392 atail = mp; 393 acnt++; 394 mp = NULL; 395 } 396 /* mhip might point into 1st packet in the chain. */ 397 iras.ira_mhip = NULL; 398 } 399 /* Any remaining references to the route cache? */ 400 if (rtc.rtc_ire != NULL) { 401 ASSERT(rtc.rtc_ipaddr != INADDR_ANY); 402 ire_refrele(rtc.rtc_ire); 403 } 404 405 if (ahead != NULL) { 406 /* Better be called from ip_accept_tcp */ 407 ASSERT(target_sqp != NULL); 408 *last = atail; 409 *cnt = acnt; 410 return (ahead); 411 } 412 413 return (NULL); 414 } 415 416 /* 417 * This input function is used when 418 * - is_system_labeled() 419 * - CGTP filtering 420 * - DHCP unicast before we have an IP address configured 421 * - there is an listener for IPPROTO_RSVP 422 */ 423 void 424 ill_input_full_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, 425 ip_recv_attr_t *ira, rtc_t *rtc) 426 { 427 ipha_t *ipha = (ipha_t *)iph_arg; 428 ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg; 429 ill_t *ill = ira->ira_ill; 430 ip_stack_t *ipst = ill->ill_ipst; 431 int cgtp_flt_pkt; 432 433 ASSERT(ira->ira_tsl == NULL); 434 435 /* 436 * Attach any necessary label information to 437 * this packet 438 */ 439 if (is_system_labeled()) { 440 ira->ira_flags |= IRAF_SYSTEM_LABELED; 441 442 /* 443 * This updates ira_cred, ira_tsl and ira_free_flags based 444 * on the label. 445 */ 446 if (!tsol_get_pkt_label(mp, IPV4_VERSION, ira)) { 447 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 448 ip_drop_input("ipIfStatsInDiscards", mp, ill); 449 freemsg(mp); 450 return; 451 } 452 /* Note that ira_tsl can be NULL here. */ 453 454 /* tsol_get_pkt_label sometimes does pullupmsg */ 455 ipha = (ipha_t *)mp->b_rptr; 456 } 457 458 /* 459 * Invoke the CGTP (multirouting) filtering module to process 460 * the incoming packet. Packets identified as duplicates 461 * must be discarded. Filtering is active only if the 462 * the ip_cgtp_filter ndd variable is non-zero. 463 */ 464 cgtp_flt_pkt = CGTP_IP_PKT_NOT_CGTP; 465 if (ipst->ips_ip_cgtp_filter && 466 ipst->ips_ip_cgtp_filter_ops != NULL) { 467 netstackid_t stackid; 468 469 stackid = ipst->ips_netstack->netstack_stackid; 470 /* 471 * CGTP and IPMP are mutually exclusive so 472 * phyint_ifindex is fine here. 473 */ 474 cgtp_flt_pkt = 475 ipst->ips_ip_cgtp_filter_ops->cfo_filter(stackid, 476 ill->ill_phyint->phyint_ifindex, mp); 477 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 478 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill); 479 freemsg(mp); 480 return; 481 } 482 } 483 484 /* 485 * Brutal hack for DHCPv4 unicast: RFC2131 allows a DHCP 486 * server to unicast DHCP packets to a DHCP client using the 487 * IP address it is offering to the client. This can be 488 * disabled through the "broadcast bit", but not all DHCP 489 * servers honor that bit. Therefore, to interoperate with as 490 * many DHCP servers as possible, the DHCP client allows the 491 * server to unicast, but we treat those packets as broadcast 492 * here. Note that we don't rewrite the packet itself since 493 * (a) that would mess up the checksums and (b) the DHCP 494 * client conn is bound to INADDR_ANY so ip_fanout_udp() will 495 * hand it the packet regardless. 496 */ 497 if (ill->ill_dhcpinit != 0 && 498 ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION && 499 ipha->ipha_protocol == IPPROTO_UDP) { 500 udpha_t *udpha; 501 502 ipha = ip_pullup(mp, sizeof (ipha_t) + sizeof (udpha_t), ira); 503 if (ipha == NULL) { 504 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 505 ip_drop_input("ipIfStatsInDiscards - dhcp", mp, ill); 506 freemsg(mp); 507 return; 508 } 509 /* Reload since pullupmsg() can change b_rptr. */ 510 udpha = (udpha_t *)&ipha[1]; 511 512 if (ntohs(udpha->uha_dst_port) == IPPORT_BOOTPC) { 513 DTRACE_PROBE2(ip4__dhcpinit__pkt, ill_t *, ill, 514 mblk_t *, mp); 515 /* 516 * This assumes that we deliver to all conns for 517 * multicast and broadcast packets. 518 */ 519 nexthop = INADDR_BROADCAST; 520 ira->ira_flags |= IRAF_DHCP_UNICAST; 521 } 522 } 523 524 /* 525 * If rsvpd is running, let RSVP daemon handle its processing 526 * and forwarding of RSVP multicast/unicast packets. 527 * If rsvpd is not running but mrouted is running, RSVP 528 * multicast packets are forwarded as multicast traffic 529 * and RSVP unicast packets are forwarded by unicast router. 530 * If neither rsvpd nor mrouted is running, RSVP multicast 531 * packets are not forwarded, but the unicast packets are 532 * forwarded like unicast traffic. 533 */ 534 if (ipha->ipha_protocol == IPPROTO_RSVP && 535 ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) { 536 /* RSVP packet and rsvpd running. Treat as ours */ 537 ip2dbg(("ip_input: RSVP for us: 0x%x\n", ntohl(nexthop))); 538 /* 539 * We use a multicast address to get the packet to 540 * ire_recv_multicast_v4. There will not be a membership 541 * check since we set IRAF_RSVP 542 */ 543 nexthop = htonl(INADDR_UNSPEC_GROUP); 544 ira->ira_flags |= IRAF_RSVP; 545 } 546 547 ill_input_short_v4(mp, ipha, &nexthop, ira, rtc); 548 } 549 550 /* 551 * This is the tail-end of the full receive side packet handling. 552 * It can be used directly when the configuration is simple. 553 */ 554 void 555 ill_input_short_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, 556 ip_recv_attr_t *ira, rtc_t *rtc) 557 { 558 ire_t *ire; 559 uint_t opt_len; 560 ill_t *ill = ira->ira_ill; 561 ip_stack_t *ipst = ill->ill_ipst; 562 uint_t pkt_len; 563 ssize_t len; 564 ipha_t *ipha = (ipha_t *)iph_arg; 565 ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg; 566 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; 567 #define rptr ((uchar_t *)ipha) 568 569 ASSERT(DB_TYPE(mp) == M_DATA); 570 571 /* 572 * The following test for loopback is faster than 573 * IP_LOOPBACK_ADDR(), because it avoids any bitwise 574 * operations. 575 * Note that these addresses are always in network byte order 576 */ 577 if (((*(uchar_t *)&ipha->ipha_dst) == 127) || 578 ((*(uchar_t *)&ipha->ipha_src) == 127)) { 579 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 580 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 581 freemsg(mp); 582 return; 583 } 584 585 len = mp->b_wptr - rptr; 586 pkt_len = ira->ira_pktlen; 587 588 /* multiple mblk or too short */ 589 len -= pkt_len; 590 if (len != 0) { 591 mp = ip_check_length(mp, rptr, len, pkt_len, 592 IP_SIMPLE_HDR_LENGTH, ira); 593 if (mp == NULL) 594 return; 595 ipha = (ipha_t *)mp->b_rptr; 596 } 597 598 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 599 ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL, 600 int, 0); 601 602 /* 603 * The event for packets being received from a 'physical' 604 * interface is placed after validation of the source and/or 605 * destination address as being local so that packets can be 606 * redirected to loopback addresses using ipnat. 607 */ 608 DTRACE_PROBE4(ip4__physical__in__start, 609 ill_t *, ill, ill_t *, NULL, 610 ipha_t *, ipha, mblk_t *, mp); 611 612 if (HOOKS4_INTERESTED_PHYSICAL_IN(ipst)) { 613 int ll_multicast = 0; 614 int error; 615 ipaddr_t orig_dst = ipha->ipha_dst; 616 617 if (ira->ira_flags & IRAF_L2DST_MULTICAST) 618 ll_multicast = HPE_MULTICAST; 619 else if (ira->ira_flags & IRAF_L2DST_BROADCAST) 620 ll_multicast = HPE_BROADCAST; 621 622 FW_HOOKS(ipst->ips_ip4_physical_in_event, 623 ipst->ips_ipv4firewall_physical_in, 624 ill, NULL, ipha, mp, mp, ll_multicast, ipst, error); 625 626 DTRACE_PROBE1(ip4__physical__in__end, mblk_t *, mp); 627 628 if (mp == NULL) 629 return; 630 /* The length could have changed */ 631 ipha = (ipha_t *)mp->b_rptr; 632 ira->ira_pktlen = ntohs(ipha->ipha_length); 633 pkt_len = ira->ira_pktlen; 634 635 /* 636 * In case the destination changed we override any previous 637 * change to nexthop. 638 */ 639 if (orig_dst != ipha->ipha_dst) 640 nexthop = ipha->ipha_dst; 641 if (nexthop == INADDR_ANY) { 642 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 643 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 644 freemsg(mp); 645 return; 646 } 647 } 648 649 if (ipst->ips_ip4_observe.he_interested) { 650 zoneid_t dzone; 651 652 /* 653 * On the inbound path the src zone will be unknown as 654 * this packet has come from the wire. 655 */ 656 dzone = ip_get_zoneid_v4(nexthop, mp, ira, ALL_ZONES); 657 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst); 658 } 659 660 /* 661 * If there is a good HW IP header checksum we clear the need 662 * look at the IP header checksum. 663 */ 664 if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) && 665 ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 666 /* Header checksum was ok. Clear the flag */ 667 DB_CKSUMFLAGS(mp) &= ~HCK_IPV4_HDRCKSUM; 668 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 669 } 670 671 /* 672 * Here we check to see if we machine is setup as 673 * L3 loadbalancer and if the incoming packet is for a VIP 674 * 675 * Check the following: 676 * - there is at least a rule 677 * - protocol of the packet is supported 678 */ 679 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ipha->ipha_protocol)) { 680 ipaddr_t lb_dst; 681 int lb_ret; 682 683 /* For convenience, we pull up the mblk. */ 684 if (mp->b_cont != NULL) { 685 if (pullupmsg(mp, -1) == 0) { 686 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 687 ip_drop_input("ipIfStatsInDiscards - pullupmsg", 688 mp, ill); 689 freemsg(mp); 690 return; 691 } 692 ipha = (ipha_t *)mp->b_rptr; 693 } 694 695 /* 696 * We just drop all fragments going to any VIP, at 697 * least for now.... 698 */ 699 if (ntohs(ipha->ipha_fragment_offset_and_flags) & 700 (IPH_MF | IPH_OFFSET)) { 701 if (!ilb_rule_match_vip_v4(ilbs, nexthop, NULL)) { 702 goto after_ilb; 703 } 704 705 ILB_KSTAT_UPDATE(ilbs, ip_frag_in, 1); 706 ILB_KSTAT_UPDATE(ilbs, ip_frag_dropped, 1); 707 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 708 ip_drop_input("ILB fragment", mp, ill); 709 freemsg(mp); 710 return; 711 } 712 lb_ret = ilb_check_v4(ilbs, ill, mp, ipha, ipha->ipha_protocol, 713 (uint8_t *)ipha + IPH_HDR_LENGTH(ipha), &lb_dst); 714 715 if (lb_ret == ILB_DROPPED) { 716 /* Is this the right counter to increase? */ 717 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 718 ip_drop_input("ILB_DROPPED", mp, ill); 719 freemsg(mp); 720 return; 721 } 722 if (lb_ret == ILB_BALANCED) { 723 /* Set the dst to that of the chosen server */ 724 nexthop = lb_dst; 725 DB_CKSUMFLAGS(mp) = 0; 726 } 727 } 728 729 after_ilb: 730 opt_len = ipha->ipha_version_and_hdr_length - IP_SIMPLE_HDR_VERSION; 731 ira->ira_ip_hdr_length = IP_SIMPLE_HDR_LENGTH; 732 if (opt_len != 0) { 733 int error = 0; 734 735 ira->ira_ip_hdr_length += (opt_len << 2); 736 ira->ira_flags |= IRAF_IPV4_OPTIONS; 737 738 /* IP Options present! Validate the length. */ 739 mp = ip_check_optlen(mp, ipha, opt_len, pkt_len, ira); 740 if (mp == NULL) 741 return; 742 743 /* Might have changed */ 744 ipha = (ipha_t *)mp->b_rptr; 745 746 /* Verify IP header checksum before parsing the options */ 747 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && 748 ip_csum_hdr(ipha)) { 749 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 750 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 751 freemsg(mp); 752 return; 753 } 754 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 755 756 /* 757 * Go off to ip_input_options which returns the next hop 758 * destination address, which may have been affected 759 * by source routing. 760 */ 761 IP_STAT(ipst, ip_opt); 762 763 nexthop = ip_input_options(ipha, nexthop, mp, ira, &error); 764 if (error != 0) { 765 /* 766 * An ICMP error has been sent and the packet has 767 * been dropped. 768 */ 769 return; 770 } 771 } 772 /* Can not use route cache with TX since the labels can differ */ 773 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 774 if (CLASSD(nexthop)) { 775 ire = ire_multicast(ill); 776 } else { 777 /* Match destination and label */ 778 ire = ire_route_recursive_v4(nexthop, 0, NULL, 779 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR, 780 (ill->ill_flags & ILLF_ROUTER), 781 ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 782 } 783 /* Update the route cache so we do the ire_refrele */ 784 ASSERT(ire != NULL); 785 if (rtc->rtc_ire != NULL) 786 ire_refrele(rtc->rtc_ire); 787 rtc->rtc_ire = ire; 788 rtc->rtc_ipaddr = nexthop; 789 } else if (nexthop == rtc->rtc_ipaddr) { 790 /* Use the route cache */ 791 ASSERT(rtc->rtc_ire != NULL); 792 ire = rtc->rtc_ire; 793 } else { 794 /* Update the route cache */ 795 if (CLASSD(nexthop)) { 796 ire = ire_multicast(ill); 797 } else { 798 /* Just match the destination */ 799 ire = ire_route_recursive_dstonly_v4(nexthop, 800 (ill->ill_flags & ILLF_ROUTER), ira->ira_xmit_hint, 801 ipst); 802 } 803 ASSERT(ire != NULL); 804 if (rtc->rtc_ire != NULL) 805 ire_refrele(rtc->rtc_ire); 806 rtc->rtc_ire = ire; 807 rtc->rtc_ipaddr = nexthop; 808 } 809 810 ire->ire_ib_pkt_count++; 811 812 /* 813 * Based on ire_type and ire_flags call one of: 814 * ire_recv_local_v4 - for IRE_LOCAL 815 * ire_recv_loopback_v4 - for IRE_LOOPBACK 816 * ire_recv_multirt_v4 - if RTF_MULTIRT 817 * ire_recv_noroute_v4 - if RTF_REJECT or RTF_BLACHOLE 818 * ire_recv_multicast_v4 - for IRE_MULTICAST 819 * ire_recv_broadcast_v4 - for IRE_BROADCAST 820 * ire_recv_noaccept_v4 - for ire_noaccept ones 821 * ire_recv_forward_v4 - for the rest. 822 */ 823 (*ire->ire_recvfn)(ire, mp, ipha, ira); 824 } 825 #undef rptr 826 827 /* 828 * ire_recvfn for IREs that need forwarding 829 */ 830 void 831 ire_recv_forward_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 832 { 833 ipha_t *ipha = (ipha_t *)iph_arg; 834 ill_t *ill = ira->ira_ill; 835 ip_stack_t *ipst = ill->ill_ipst; 836 ill_t *dst_ill; 837 nce_t *nce; 838 ipaddr_t src = ipha->ipha_src; 839 uint32_t added_tx_len; 840 uint32_t mtu, iremtu; 841 842 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 843 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 844 ip_drop_input("l2 multicast not forwarded", mp, ill); 845 freemsg(mp); 846 return; 847 } 848 849 if (!(ill->ill_flags & ILLF_ROUTER) && !ip_source_routed(ipha, ipst)) { 850 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 851 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 852 freemsg(mp); 853 return; 854 } 855 856 /* 857 * Either ire_nce_capable or ire_dep_parent would be set for the IRE 858 * when it is found by ire_route_recursive, but that some other thread 859 * could have changed the routes with the effect of clearing 860 * ire_dep_parent. In that case we'd end up dropping the packet, or 861 * finding a new nce below. 862 * Get, allocate, or update the nce. 863 * We get a refhold on ire_nce_cache as a result of this to avoid races 864 * where ire_nce_cache is deleted. 865 * 866 * This ensures that we don't forward if the interface is down since 867 * ipif_down removes all the nces. 868 */ 869 mutex_enter(&ire->ire_lock); 870 nce = ire->ire_nce_cache; 871 if (nce == NULL) { 872 /* Not yet set up - try to set one up */ 873 mutex_exit(&ire->ire_lock); 874 (void) ire_revalidate_nce(ire); 875 mutex_enter(&ire->ire_lock); 876 nce = ire->ire_nce_cache; 877 if (nce == NULL) { 878 mutex_exit(&ire->ire_lock); 879 /* The ire_dep_parent chain went bad, or no memory */ 880 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 881 ip_drop_input("No ire_dep_parent", mp, ill); 882 freemsg(mp); 883 return; 884 } 885 } 886 nce_refhold(nce); 887 mutex_exit(&ire->ire_lock); 888 889 if (nce->nce_is_condemned) { 890 nce_t *nce1; 891 892 nce1 = ire_handle_condemned_nce(nce, ire, ipha, NULL, B_FALSE); 893 nce_refrele(nce); 894 if (nce1 == NULL) { 895 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 896 ip_drop_input("No nce", mp, ill); 897 freemsg(mp); 898 return; 899 } 900 nce = nce1; 901 } 902 dst_ill = nce->nce_ill; 903 904 /* 905 * Unless we are forwarding, drop the packet. 906 * We have to let source routed packets through if they go out 907 * the same interface i.e., they are 'ping -l' packets. 908 */ 909 if (!(dst_ill->ill_flags & ILLF_ROUTER) && 910 !(ip_source_routed(ipha, ipst) && dst_ill == ill)) { 911 if (ip_source_routed(ipha, ipst)) { 912 ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", mp, ill); 913 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira); 914 nce_refrele(nce); 915 return; 916 } 917 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 918 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 919 freemsg(mp); 920 nce_refrele(nce); 921 return; 922 } 923 924 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) { 925 ipaddr_t dst = ipha->ipha_dst; 926 927 ire->ire_ib_pkt_count--; 928 /* 929 * Should only use IREs that are visible from the 930 * global zone for forwarding. 931 * Take a source route into account the same way as ip_input 932 * did. 933 */ 934 if (ira->ira_flags & IRAF_IPV4_OPTIONS) { 935 int error = 0; 936 937 dst = ip_input_options(ipha, dst, mp, ira, &error); 938 ASSERT(error == 0); /* ip_input checked */ 939 } 940 ire = ire_route_recursive_v4(dst, 0, NULL, GLOBAL_ZONEID, 941 ira->ira_tsl, MATCH_IRE_SECATTR, 942 (ill->ill_flags & ILLF_ROUTER), ira->ira_xmit_hint, ipst, 943 NULL, NULL, NULL); 944 ire->ire_ib_pkt_count++; 945 (*ire->ire_recvfn)(ire, mp, ipha, ira); 946 ire_refrele(ire); 947 nce_refrele(nce); 948 return; 949 } 950 951 /* 952 * ipIfStatsHCInForwDatagrams should only be increment if there 953 * will be an attempt to forward the packet, which is why we 954 * increment after the above condition has been checked. 955 */ 956 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 957 958 /* Initiate Read side IPPF processing */ 959 if (IPP_ENABLED(IPP_FWD_IN, ipst)) { 960 /* ip_process translates an IS_UNDER_IPMP */ 961 mp = ip_process(IPP_FWD_IN, mp, ill, ill); 962 if (mp == NULL) { 963 /* ip_drop_packet and MIB done */ 964 ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred " 965 "during IPPF processing\n")); 966 nce_refrele(nce); 967 return; 968 } 969 } 970 971 DTRACE_PROBE4(ip4__forwarding__start, 972 ill_t *, ill, ill_t *, dst_ill, ipha_t *, ipha, mblk_t *, mp); 973 974 if (HOOKS4_INTERESTED_FORWARDING(ipst)) { 975 int error; 976 977 FW_HOOKS(ipst->ips_ip4_forwarding_event, 978 ipst->ips_ipv4firewall_forwarding, 979 ill, dst_ill, ipha, mp, mp, 0, ipst, error); 980 981 DTRACE_PROBE1(ip4__forwarding__end, mblk_t *, mp); 982 983 if (mp == NULL) { 984 nce_refrele(nce); 985 return; 986 } 987 /* 988 * Even if the destination was changed by the filter we use the 989 * forwarding decision that was made based on the address 990 * in ip_input. 991 */ 992 993 /* Might have changed */ 994 ipha = (ipha_t *)mp->b_rptr; 995 ira->ira_pktlen = ntohs(ipha->ipha_length); 996 } 997 998 /* Packet is being forwarded. Turning off hwcksum flag. */ 999 DB_CKSUMFLAGS(mp) = 0; 1000 1001 /* 1002 * Martian Address Filtering [RFC 1812, Section 5.3.7] 1003 * The loopback address check for both src and dst has already 1004 * been checked in ip_input 1005 * In the future one can envision adding RPF checks using number 3. 1006 * If we already checked the same source address we can skip this. 1007 */ 1008 if (!(ira->ira_flags & IRAF_VERIFIED_SRC) || 1009 src != ira->ira_verified_src) { 1010 switch (ipst->ips_src_check) { 1011 case 0: 1012 break; 1013 case 2: 1014 if (ip_type_v4(src, ipst) == IRE_BROADCAST) { 1015 BUMP_MIB(ill->ill_ip_mib, 1016 ipIfStatsForwProhibits); 1017 BUMP_MIB(ill->ill_ip_mib, 1018 ipIfStatsInAddrErrors); 1019 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1020 freemsg(mp); 1021 nce_refrele(nce); 1022 return; 1023 } 1024 /* FALLTHRU */ 1025 1026 case 1: 1027 if (CLASSD(src)) { 1028 BUMP_MIB(ill->ill_ip_mib, 1029 ipIfStatsForwProhibits); 1030 BUMP_MIB(ill->ill_ip_mib, 1031 ipIfStatsInAddrErrors); 1032 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1033 freemsg(mp); 1034 nce_refrele(nce); 1035 return; 1036 } 1037 break; 1038 } 1039 /* Remember for next packet */ 1040 ira->ira_flags |= IRAF_VERIFIED_SRC; 1041 ira->ira_verified_src = src; 1042 } 1043 1044 /* 1045 * Check if packet is going out the same link on which it arrived. 1046 * Means we might need to send a redirect. 1047 */ 1048 if (IS_ON_SAME_LAN(dst_ill, ill) && ipst->ips_ip_g_send_redirects) { 1049 ip_send_potential_redirect_v4(mp, ipha, ire, ira); 1050 } 1051 1052 added_tx_len = 0; 1053 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 1054 mblk_t *mp1; 1055 uint32_t old_pkt_len = ira->ira_pktlen; 1056 1057 /* Verify IP header checksum before adding/removing options */ 1058 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && 1059 ip_csum_hdr(ipha)) { 1060 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1061 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1062 freemsg(mp); 1063 nce_refrele(nce); 1064 return; 1065 } 1066 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 1067 1068 /* 1069 * Check if it can be forwarded and add/remove 1070 * CIPSO options as needed. 1071 */ 1072 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) { 1073 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1074 ip_drop_input("tsol_ip_forward", mp, ill); 1075 freemsg(mp); 1076 nce_refrele(nce); 1077 return; 1078 } 1079 /* 1080 * Size may have changed. Remember amount added in case 1081 * IP needs to send an ICMP too big. 1082 */ 1083 mp = mp1; 1084 ipha = (ipha_t *)mp->b_rptr; 1085 ira->ira_pktlen = ntohs(ipha->ipha_length); 1086 ira->ira_ip_hdr_length = IPH_HDR_LENGTH(ipha); 1087 if (ira->ira_pktlen > old_pkt_len) 1088 added_tx_len = ira->ira_pktlen - old_pkt_len; 1089 1090 /* Options can have been added or removed */ 1091 if (ira->ira_ip_hdr_length != IP_SIMPLE_HDR_LENGTH) 1092 ira->ira_flags |= IRAF_IPV4_OPTIONS; 1093 else 1094 ira->ira_flags &= ~IRAF_IPV4_OPTIONS; 1095 } 1096 1097 mtu = dst_ill->ill_mtu; 1098 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu) 1099 mtu = iremtu; 1100 ip_forward_xmit_v4(nce, ill, mp, ipha, ira, mtu, added_tx_len); 1101 nce_refrele(nce); 1102 } 1103 1104 /* 1105 * Used for sending out unicast and multicast packets that are 1106 * forwarded. 1107 */ 1108 void 1109 ip_forward_xmit_v4(nce_t *nce, ill_t *ill, mblk_t *mp, ipha_t *ipha, 1110 ip_recv_attr_t *ira, uint32_t mtu, uint32_t added_tx_len) 1111 { 1112 ill_t *dst_ill = nce->nce_ill; 1113 uint32_t pkt_len; 1114 uint32_t sum; 1115 iaflags_t iraflags = ira->ira_flags; 1116 ip_stack_t *ipst = ill->ill_ipst; 1117 iaflags_t ixaflags; 1118 1119 if (ipha->ipha_ttl <= 1) { 1120 /* Perhaps the checksum was bad */ 1121 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1122 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1123 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1124 freemsg(mp); 1125 return; 1126 } 1127 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1128 ip_drop_input("ICMP_TTL_EXCEEDED", mp, ill); 1129 icmp_time_exceeded(mp, ICMP_TTL_EXCEEDED, ira); 1130 return; 1131 } 1132 ipha->ipha_ttl--; 1133 /* Adjust the checksum to reflect the ttl decrement. */ 1134 sum = (int)ipha->ipha_hdr_checksum + IP_HDR_CSUM_TTL_ADJUST; 1135 ipha->ipha_hdr_checksum = (uint16_t)(sum + (sum >> 16)); 1136 1137 /* Check if there are options to update */ 1138 if (iraflags & IRAF_IPV4_OPTIONS) { 1139 ASSERT(ipha->ipha_version_and_hdr_length != 1140 IP_SIMPLE_HDR_VERSION); 1141 ASSERT(!(iraflags & IRAF_VERIFY_IP_CKSUM)); 1142 1143 if (!ip_forward_options(mp, ipha, dst_ill, ira)) { 1144 /* ipIfStatsForwProhibits and ip_drop_input done */ 1145 return; 1146 } 1147 1148 ipha->ipha_hdr_checksum = 0; 1149 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1150 } 1151 1152 /* Initiate Write side IPPF processing before any fragmentation */ 1153 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { 1154 /* ip_process translates an IS_UNDER_IPMP */ 1155 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill); 1156 if (mp == NULL) { 1157 /* ip_drop_packet and MIB done */ 1158 ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred" \ 1159 " during IPPF processing\n")); 1160 return; 1161 } 1162 } 1163 1164 pkt_len = ira->ira_pktlen; 1165 1166 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 1167 1168 ixaflags = IXAF_IS_IPV4 | IXAF_NO_DEV_FLOW_CTL; 1169 1170 if (pkt_len > mtu) { 1171 /* 1172 * It needs fragging on its way out. If we haven't 1173 * verified the header checksum yet we do it now since 1174 * are going to put a surely good checksum in the 1175 * outgoing header, we have to make sure that it 1176 * was good coming in. 1177 */ 1178 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1179 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1180 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1181 freemsg(mp); 1182 return; 1183 } 1184 if (ipha->ipha_fragment_offset_and_flags & IPH_DF_HTONS) { 1185 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails); 1186 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill); 1187 if (iraflags & IRAF_SYSTEM_LABELED) { 1188 /* 1189 * Remove any CIPSO option added by 1190 * tsol_ip_forward, and make sure we report 1191 * a path MTU so that there 1192 * is room to add such a CIPSO option for future 1193 * packets. 1194 */ 1195 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, 1196 AF_INET); 1197 } 1198 1199 icmp_frag_needed(mp, mtu, ira); 1200 return; 1201 } 1202 1203 (void) ip_fragment_v4(mp, nce, ixaflags, pkt_len, mtu, 1204 ira->ira_xmit_hint, GLOBAL_ZONEID, 0, ip_xmit, NULL); 1205 return; 1206 } 1207 1208 ASSERT(pkt_len == ntohs(((ipha_t *)mp->b_rptr)->ipha_length)); 1209 if (iraflags & IRAF_LOOPBACK_COPY) { 1210 /* 1211 * IXAF_NO_LOOP_ZONEID is not set hence 7th arg 1212 * is don't care 1213 */ 1214 (void) ip_postfrag_loopcheck(mp, nce, 1215 ixaflags | IXAF_LOOPBACK_COPY, 1216 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1217 } else { 1218 (void) ip_xmit(mp, nce, ixaflags, pkt_len, ira->ira_xmit_hint, 1219 GLOBAL_ZONEID, 0, NULL); 1220 } 1221 } 1222 1223 /* 1224 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE, 1225 * which is what ire_route_recursive returns when there is no matching ire. 1226 * Send ICMP unreachable unless blackhole. 1227 */ 1228 void 1229 ire_recv_noroute_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1230 { 1231 ipha_t *ipha = (ipha_t *)iph_arg; 1232 ill_t *ill = ira->ira_ill; 1233 ip_stack_t *ipst = ill->ill_ipst; 1234 1235 /* Would we have forwarded this packet if we had a route? */ 1236 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 1237 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1238 ip_drop_input("l2 multicast not forwarded", mp, ill); 1239 freemsg(mp); 1240 return; 1241 } 1242 1243 if (!(ill->ill_flags & ILLF_ROUTER)) { 1244 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1245 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 1246 freemsg(mp); 1247 return; 1248 } 1249 /* 1250 * If we had a route this could have been forwarded. Count as such. 1251 * 1252 * ipIfStatsHCInForwDatagrams should only be increment if there 1253 * will be an attempt to forward the packet, which is why we 1254 * increment after the above condition has been checked. 1255 */ 1256 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 1257 1258 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1259 1260 ip_rts_change(RTM_MISS, ipha->ipha_dst, 0, 0, 0, 0, 0, 0, RTA_DST, 1261 ipst); 1262 1263 if (ire->ire_flags & RTF_BLACKHOLE) { 1264 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill); 1265 freemsg(mp); 1266 } else { 1267 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill); 1268 1269 if (ip_source_routed(ipha, ipst)) { 1270 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira); 1271 } else { 1272 icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, ira); 1273 } 1274 } 1275 } 1276 1277 /* 1278 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for 1279 * VRRP when in noaccept mode. 1280 * We silently drop the packet. ARP handles packets even if noaccept is set. 1281 */ 1282 /* ARGSUSED */ 1283 void 1284 ire_recv_noaccept_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1285 ip_recv_attr_t *ira) 1286 { 1287 ill_t *ill = ira->ira_ill; 1288 1289 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1290 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1291 freemsg(mp); 1292 } 1293 1294 /* 1295 * ire_recvfn for IRE_BROADCAST. 1296 */ 1297 void 1298 ire_recv_broadcast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1299 ip_recv_attr_t *ira) 1300 { 1301 ipha_t *ipha = (ipha_t *)iph_arg; 1302 ill_t *ill = ira->ira_ill; 1303 ill_t *dst_ill = ire->ire_ill; 1304 ip_stack_t *ipst = ill->ill_ipst; 1305 ire_t *alt_ire; 1306 nce_t *nce; 1307 ipaddr_t ipha_dst; 1308 1309 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInBcastPkts); 1310 1311 /* Tag for higher-level protocols */ 1312 ira->ira_flags |= IRAF_BROADCAST; 1313 1314 /* 1315 * Whether local or directed broadcast forwarding: don't allow 1316 * for TCP. 1317 */ 1318 if (ipha->ipha_protocol == IPPROTO_TCP) { 1319 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1320 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1321 freemsg(mp); 1322 return; 1323 } 1324 1325 /* 1326 * So that we don't end up with dups, only one ill an IPMP group is 1327 * nominated to receive broadcast traffic. 1328 * If we have no cast_ill we are liberal and accept everything. 1329 */ 1330 if (IS_UNDER_IPMP(ill)) { 1331 /* For an under ill_grp can change under lock */ 1332 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1333 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1334 ill->ill_grp->ig_cast_ill != NULL) { 1335 rw_exit(&ipst->ips_ill_g_lock); 1336 /* No MIB since this is normal operation */ 1337 ip_drop_input("not nom_cast", mp, ill); 1338 freemsg(mp); 1339 return; 1340 } 1341 rw_exit(&ipst->ips_ill_g_lock); 1342 1343 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1344 } 1345 1346 /* 1347 * After reassembly and IPsec we will need to duplicate the 1348 * broadcast packet for all matching zones on the ill. 1349 */ 1350 ira->ira_zoneid = ALL_ZONES; 1351 1352 /* 1353 * Check for directed broadcast i.e. ire->ire_ill is different than 1354 * the incoming ill. 1355 * The same broadcast address can be assigned to multiple interfaces 1356 * so have to check explicitly for that case by looking up the alt_ire 1357 */ 1358 if (dst_ill == ill && !(ire->ire_flags & RTF_MULTIRT)) { 1359 /* Reassemble on the ill on which the packet arrived */ 1360 ip_input_local_v4(ire, mp, ipha, ira); 1361 /* Restore */ 1362 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1363 return; 1364 } 1365 1366 /* Is there an IRE_BROADCAST on the incoming ill? */ 1367 ipha_dst = ((ira->ira_flags & IRAF_DHCP_UNICAST) ? INADDR_BROADCAST : 1368 ipha->ipha_dst); 1369 alt_ire = ire_ftable_lookup_v4(ipha_dst, 0, 0, IRE_BROADCAST, ill, 1370 ALL_ZONES, ira->ira_tsl, 1371 MATCH_IRE_TYPE|MATCH_IRE_ILL|MATCH_IRE_SECATTR, 0, ipst, NULL); 1372 if (alt_ire != NULL) { 1373 /* Not a directed broadcast */ 1374 /* 1375 * In the special case of multirouted broadcast 1376 * packets, we unconditionally need to "gateway" 1377 * them to the appropriate interface here so that reassembly 1378 * works. We know that the IRE_BROADCAST on cgtp0 doesn't 1379 * have RTF_MULTIRT set so we look for such an IRE in the 1380 * bucket. 1381 */ 1382 if (alt_ire->ire_flags & RTF_MULTIRT) { 1383 irb_t *irb; 1384 ire_t *ire1; 1385 1386 irb = ire->ire_bucket; 1387 irb_refhold(irb); 1388 for (ire1 = irb->irb_ire; ire1 != NULL; 1389 ire1 = ire1->ire_next) { 1390 if (IRE_IS_CONDEMNED(ire1)) 1391 continue; 1392 if (!(ire1->ire_type & IRE_BROADCAST) || 1393 (ire1->ire_flags & RTF_MULTIRT)) 1394 continue; 1395 ill = ire1->ire_ill; 1396 ill_refhold(ill); 1397 break; 1398 } 1399 irb_refrele(irb); 1400 if (ire1 != NULL) { 1401 ill_t *orig_ill = ira->ira_ill; 1402 1403 ire_refrele(alt_ire); 1404 /* Reassemble on the new ill */ 1405 ira->ira_ill = ill; 1406 ip_input_local_v4(ire, mp, ipha, ira); 1407 ill_refrele(ill); 1408 /* Restore */ 1409 ira->ira_ill = orig_ill; 1410 ira->ira_ruifindex = 1411 orig_ill->ill_phyint->phyint_ifindex; 1412 return; 1413 } 1414 } 1415 ire_refrele(alt_ire); 1416 /* Reassemble on the ill on which the packet arrived */ 1417 ip_input_local_v4(ire, mp, ipha, ira); 1418 goto done; 1419 } 1420 1421 /* 1422 * This is a directed broadcast 1423 * 1424 * If directed broadcast is allowed, then forward the packet out 1425 * the destination interface with IXAF_LOOPBACK_COPY set. That will 1426 * result in ip_input() receiving a copy of the packet on the 1427 * appropriate ill. (We could optimize this to avoid the extra trip 1428 * via ip_input(), but since directed broadcasts are normally disabled 1429 * it doesn't make sense to optimize it.) 1430 */ 1431 if (!ipst->ips_ip_g_forward_directed_bcast || 1432 (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST))) { 1433 ip_drop_input("directed broadcast not allowed", mp, ill); 1434 freemsg(mp); 1435 goto done; 1436 } 1437 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1438 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1439 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1440 freemsg(mp); 1441 goto done; 1442 } 1443 1444 /* 1445 * Clear the indication that this may have hardware 1446 * checksum as we are not using it for forwarding. 1447 */ 1448 DB_CKSUMFLAGS(mp) = 0; 1449 1450 /* 1451 * Adjust ttl to 2 (1+1 - the forward engine will decrement it by one. 1452 */ 1453 ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl + 1; 1454 ipha->ipha_hdr_checksum = 0; 1455 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1456 1457 /* 1458 * We use ip_forward_xmit to do any fragmentation. 1459 * and loopback copy on the outbound interface. 1460 * 1461 * Make it so that IXAF_LOOPBACK_COPY to be set on transmit side. 1462 */ 1463 ira->ira_flags |= IRAF_LOOPBACK_COPY; 1464 1465 nce = arp_nce_init(dst_ill, ipha->ipha_dst, IRE_BROADCAST); 1466 if (nce == NULL) { 1467 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutDiscards); 1468 ip_drop_output("No nce", mp, dst_ill); 1469 freemsg(mp); 1470 goto done; 1471 } 1472 1473 ip_forward_xmit_v4(nce, ill, mp, ipha, ira, dst_ill->ill_mtu, 0); 1474 nce_refrele(nce); 1475 done: 1476 /* Restore */ 1477 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1478 } 1479 1480 /* 1481 * ire_recvfn for IRE_MULTICAST. 1482 */ 1483 void 1484 ire_recv_multicast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1485 ip_recv_attr_t *ira) 1486 { 1487 ipha_t *ipha = (ipha_t *)iph_arg; 1488 ill_t *ill = ira->ira_ill; 1489 ip_stack_t *ipst = ill->ill_ipst; 1490 1491 ASSERT(ire->ire_ill == ira->ira_ill); 1492 1493 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 1494 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen); 1495 1496 /* RSVP hook */ 1497 if (ira->ira_flags & IRAF_RSVP) 1498 goto forus; 1499 1500 /* Tag for higher-level protocols */ 1501 ira->ira_flags |= IRAF_MULTICAST; 1502 1503 /* 1504 * So that we don't end up with dups, only one ill an IPMP group is 1505 * nominated to receive multicast traffic. 1506 * If we have no cast_ill we are liberal and accept everything. 1507 */ 1508 if (IS_UNDER_IPMP(ill)) { 1509 ip_stack_t *ipst = ill->ill_ipst; 1510 1511 /* For an under ill_grp can change under lock */ 1512 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1513 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1514 ill->ill_grp->ig_cast_ill != NULL) { 1515 rw_exit(&ipst->ips_ill_g_lock); 1516 ip_drop_input("not on cast ill", mp, ill); 1517 freemsg(mp); 1518 return; 1519 } 1520 rw_exit(&ipst->ips_ill_g_lock); 1521 /* 1522 * We switch to the upper ill so that mrouter and hasmembers 1523 * can operate on upper here and in ip_input_multicast. 1524 */ 1525 ill = ipmp_ill_hold_ipmp_ill(ill); 1526 if (ill != NULL) { 1527 ASSERT(ill != ira->ira_ill); 1528 ASSERT(ire->ire_ill == ira->ira_ill); 1529 ira->ira_ill = ill; 1530 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1531 } else { 1532 ill = ira->ira_ill; 1533 } 1534 } 1535 1536 /* 1537 * Check if we are a multicast router - send ip_mforward a copy of 1538 * the packet. 1539 * Due to mroute_decap tunnels we consider forwarding packets even if 1540 * mrouted has not joined the allmulti group on this interface. 1541 */ 1542 if (ipst->ips_ip_g_mrouter) { 1543 int retval; 1544 1545 /* 1546 * Clear the indication that this may have hardware 1547 * checksum as we are not using it for forwarding. 1548 */ 1549 DB_CKSUMFLAGS(mp) = 0; 1550 1551 /* 1552 * ip_mforward helps us make these distinctions: If received 1553 * on tunnel and not IGMP, then drop. 1554 * If IGMP packet, then don't check membership 1555 * If received on a phyint and IGMP or PIM, then 1556 * don't check membership 1557 */ 1558 retval = ip_mforward(mp, ira); 1559 /* ip_mforward updates mib variables if needed */ 1560 1561 switch (retval) { 1562 case 0: 1563 /* 1564 * pkt is okay and arrived on phyint. 1565 * 1566 * If we are running as a multicast router 1567 * we need to see all IGMP and/or PIM packets. 1568 */ 1569 if ((ipha->ipha_protocol == IPPROTO_IGMP) || 1570 (ipha->ipha_protocol == IPPROTO_PIM)) { 1571 goto forus; 1572 } 1573 break; 1574 case -1: 1575 /* pkt is mal-formed, toss it */ 1576 freemsg(mp); 1577 goto done; 1578 case 1: 1579 /* 1580 * pkt is okay and arrived on a tunnel 1581 * 1582 * If we are running a multicast router 1583 * we need to see all igmp packets. 1584 */ 1585 if (ipha->ipha_protocol == IPPROTO_IGMP) { 1586 goto forus; 1587 } 1588 ip_drop_input("Multicast on tunnel ignored", mp, ill); 1589 freemsg(mp); 1590 goto done; 1591 } 1592 } 1593 1594 /* 1595 * Check if we have members on this ill. This is not necessary for 1596 * correctness because even if the NIC/GLD had a leaky filter, we 1597 * filter before passing to each conn_t. 1598 */ 1599 if (!ill_hasmembers_v4(ill, ipha->ipha_dst)) { 1600 /* 1601 * Nobody interested 1602 * 1603 * This might just be caused by the fact that 1604 * multiple IP Multicast addresses map to the same 1605 * link layer multicast - no need to increment counter! 1606 */ 1607 ip_drop_input("Multicast with no members", mp, ill); 1608 freemsg(mp); 1609 goto done; 1610 } 1611 forus: 1612 ip2dbg(("ire_recv_multicast_v4: multicast for us: 0x%x\n", 1613 ntohl(ipha->ipha_dst))); 1614 1615 /* 1616 * After reassembly and IPsec we will need to duplicate the 1617 * multicast packet for all matching zones on the ill. 1618 */ 1619 ira->ira_zoneid = ALL_ZONES; 1620 1621 /* Reassemble on the ill on which the packet arrived */ 1622 ip_input_local_v4(ire, mp, ipha, ira); 1623 done: 1624 if (ill != ire->ire_ill) { 1625 ill_refrele(ill); 1626 ira->ira_ill = ire->ire_ill; 1627 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 1628 } 1629 } 1630 1631 /* 1632 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT. 1633 * Drop packets since we don't forward out multirt routes. 1634 */ 1635 /* ARGSUSED */ 1636 void 1637 ire_recv_multirt_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1638 { 1639 ill_t *ill = ira->ira_ill; 1640 1641 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1642 ip_drop_input("Not forwarding out MULTIRT", mp, ill); 1643 freemsg(mp); 1644 } 1645 1646 /* 1647 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK 1648 * has rewritten the packet to have a loopback destination address (We 1649 * filter out packet with a loopback destination from arriving over the wire). 1650 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID. 1651 */ 1652 void 1653 ire_recv_loopback_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1654 { 1655 ipha_t *ipha = (ipha_t *)iph_arg; 1656 ill_t *ill = ira->ira_ill; 1657 ill_t *ire_ill = ire->ire_ill; 1658 1659 ira->ira_zoneid = GLOBAL_ZONEID; 1660 1661 /* Switch to the lo0 ill for further processing */ 1662 if (ire_ill != ill) { 1663 /* 1664 * Update ira_ill to be the ILL on which the IP address 1665 * is hosted. 1666 * No need to hold the ill since we have a hold on the ire 1667 */ 1668 ASSERT(ira->ira_ill == ira->ira_rill); 1669 ira->ira_ill = ire_ill; 1670 1671 ip_input_local_v4(ire, mp, ipha, ira); 1672 1673 /* Restore */ 1674 ASSERT(ira->ira_ill == ire_ill); 1675 ira->ira_ill = ill; 1676 return; 1677 1678 } 1679 ip_input_local_v4(ire, mp, ipha, ira); 1680 } 1681 1682 /* 1683 * ire_recvfn for IRE_LOCAL. 1684 */ 1685 void 1686 ire_recv_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1687 { 1688 ipha_t *ipha = (ipha_t *)iph_arg; 1689 ill_t *ill = ira->ira_ill; 1690 ill_t *ire_ill = ire->ire_ill; 1691 1692 /* Make a note for DAD that this address is in use */ 1693 ire->ire_last_used_time = LBOLT_FASTPATH; 1694 1695 /* Only target the IRE_LOCAL with the right zoneid. */ 1696 ira->ira_zoneid = ire->ire_zoneid; 1697 1698 /* 1699 * If the packet arrived on the wrong ill, we check that 1700 * this is ok. 1701 * If it is, then we ensure that we do the reassembly on 1702 * the ill on which the address is hosted. We keep ira_rill as 1703 * the one on which the packet arrived, so that IP_PKTINFO and 1704 * friends can report this. 1705 */ 1706 if (ire_ill != ill) { 1707 ire_t *new_ire; 1708 1709 new_ire = ip_check_multihome(&ipha->ipha_dst, ire, ill); 1710 if (new_ire == NULL) { 1711 /* Drop packet */ 1712 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1713 ip_drop_input("ipIfStatsInForwProhibits", mp, ill); 1714 freemsg(mp); 1715 return; 1716 } 1717 /* 1718 * Update ira_ill to be the ILL on which the IP address 1719 * is hosted. No need to hold the ill since we have a 1720 * hold on the ire. Note that we do the switch even if 1721 * new_ire == ire (for IPMP, ire would be the one corresponding 1722 * to the IPMP ill). 1723 */ 1724 ASSERT(ira->ira_ill == ira->ira_rill); 1725 ira->ira_ill = new_ire->ire_ill; 1726 1727 /* ira_ruifindex tracks the upper for ira_rill */ 1728 if (IS_UNDER_IPMP(ill)) 1729 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1730 1731 ip_input_local_v4(new_ire, mp, ipha, ira); 1732 1733 /* Restore */ 1734 ASSERT(ira->ira_ill == new_ire->ire_ill); 1735 ira->ira_ill = ill; 1736 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1737 1738 if (new_ire != ire) 1739 ire_refrele(new_ire); 1740 return; 1741 } 1742 1743 ip_input_local_v4(ire, mp, ipha, ira); 1744 } 1745 1746 /* 1747 * Common function for packets arriving for the host. Handles 1748 * checksum verification, reassembly checks, etc. 1749 */ 1750 static void 1751 ip_input_local_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1752 { 1753 ill_t *ill = ira->ira_ill; 1754 iaflags_t iraflags = ira->ira_flags; 1755 1756 /* 1757 * Verify IP header checksum. If the packet was AH or ESP then 1758 * this flag has already been cleared. Likewise if the packet 1759 * had a hardware checksum. 1760 */ 1761 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1762 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1763 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1764 freemsg(mp); 1765 return; 1766 } 1767 1768 if (iraflags & IRAF_IPV4_OPTIONS) { 1769 if (!ip_input_local_options(mp, ipha, ira)) { 1770 /* Error has been sent and mp consumed */ 1771 return; 1772 } 1773 /* 1774 * Some old hardware does partial checksum by including the 1775 * whole IP header, so the partial checksum value might have 1776 * become invalid if any option in the packet have been 1777 * updated. Always clear partial checksum flag here. 1778 */ 1779 DB_CKSUMFLAGS(mp) &= ~HCK_PARTIALCKSUM; 1780 } 1781 1782 /* 1783 * Is packet part of fragmented IP packet? 1784 * We compare against defined values in network byte order 1785 */ 1786 if (ipha->ipha_fragment_offset_and_flags & 1787 (IPH_MF_HTONS | IPH_OFFSET_HTONS)) { 1788 /* 1789 * Make sure we have ira_l2src before we loose the original 1790 * mblk 1791 */ 1792 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 1793 ip_setl2src(mp, ira, ira->ira_rill); 1794 1795 mp = ip_input_fragment(mp, ipha, ira); 1796 if (mp == NULL) 1797 return; 1798 /* Completed reassembly */ 1799 ipha = (ipha_t *)mp->b_rptr; 1800 } 1801 1802 /* 1803 * For broadcast and multicast we need some extra work before 1804 * we call ip_fanout_v4(), since in the case of shared-IP zones 1805 * we need to pretend that a packet arrived for each zoneid. 1806 */ 1807 if (iraflags & IRAF_MULTIBROADCAST) { 1808 if (iraflags & IRAF_BROADCAST) 1809 ip_input_broadcast_v4(ire, mp, ipha, ira); 1810 else 1811 ip_input_multicast_v4(ire, mp, ipha, ira); 1812 return; 1813 } 1814 ip_fanout_v4(mp, ipha, ira); 1815 } 1816 1817 1818 /* 1819 * Handle multiple zones which match the same broadcast address 1820 * and ill by delivering a packet to each of them. 1821 * Walk the bucket and look for different ire_zoneid but otherwise 1822 * the same IRE (same ill/addr/mask/type). 1823 * Note that ire_add() tracks IREs that are identical in all 1824 * fields (addr/mask/type/gw/ill/zoneid) within a single IRE by 1825 * increasing ire_identical_cnt. Thus we don't need to be concerned 1826 * about those. 1827 */ 1828 static void 1829 ip_input_broadcast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1830 { 1831 ill_t *ill = ira->ira_ill; 1832 ip_stack_t *ipst = ill->ill_ipst; 1833 netstack_t *ns = ipst->ips_netstack; 1834 irb_t *irb; 1835 ire_t *ire1; 1836 mblk_t *mp1; 1837 ipha_t *ipha1; 1838 1839 irb = ire->ire_bucket; 1840 1841 /* 1842 * If we don't have more than one shared-IP zone, or if 1843 * there can't be more than one IRE_BROADCAST for this 1844 * IP address, then just set the zoneid and proceed. 1845 */ 1846 if (ns->netstack_numzones == 1 || irb->irb_ire_cnt == 1) { 1847 ira->ira_zoneid = ire->ire_zoneid; 1848 1849 ip_fanout_v4(mp, ipha, ira); 1850 return; 1851 } 1852 irb_refhold(irb); 1853 for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 1854 /* We do the main IRE after the end of the loop */ 1855 if (ire1 == ire) 1856 continue; 1857 1858 /* 1859 * Only IREs for the same IP address should be in the same 1860 * bucket. 1861 * But could have IRE_HOSTs in the case of CGTP. 1862 */ 1863 ASSERT(ire1->ire_addr == ire->ire_addr); 1864 if (!(ire1->ire_type & IRE_BROADCAST)) 1865 continue; 1866 1867 if (IRE_IS_CONDEMNED(ire1)) 1868 continue; 1869 1870 mp1 = copymsg(mp); 1871 if (mp1 == NULL) { 1872 /* Failed to deliver to one zone */ 1873 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1874 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1875 continue; 1876 } 1877 ira->ira_zoneid = ire1->ire_zoneid; 1878 ipha1 = (ipha_t *)mp1->b_rptr; 1879 ip_fanout_v4(mp1, ipha1, ira); 1880 } 1881 irb_refrele(irb); 1882 /* Do the main ire */ 1883 ira->ira_zoneid = ire->ire_zoneid; 1884 ip_fanout_v4(mp, ipha, ira); 1885 } 1886 1887 /* 1888 * Handle multiple zones which want to receive the same multicast packets 1889 * on this ill by delivering a packet to each of them. 1890 * 1891 * Note that for packets delivered to transports we could instead do this 1892 * as part of the fanout code, but since we need to handle icmp_inbound 1893 * it is simpler to have multicast work the same as broadcast. 1894 * 1895 * The ip_fanout matching for multicast matches based on ilm independent of 1896 * zoneid since the zoneid restriction is applied when joining a multicast 1897 * group. 1898 */ 1899 /* ARGSUSED */ 1900 static void 1901 ip_input_multicast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1902 { 1903 ill_t *ill = ira->ira_ill; 1904 iaflags_t iraflags = ira->ira_flags; 1905 ip_stack_t *ipst = ill->ill_ipst; 1906 netstack_t *ns = ipst->ips_netstack; 1907 zoneid_t zoneid; 1908 mblk_t *mp1; 1909 ipha_t *ipha1; 1910 1911 /* ire_recv_multicast has switched to the upper ill for IPMP */ 1912 ASSERT(!IS_UNDER_IPMP(ill)); 1913 1914 /* 1915 * If we don't have more than one shared-IP zone, or if 1916 * there are no members in anything but the global zone, 1917 * then just set the zoneid and proceed. 1918 */ 1919 if (ns->netstack_numzones == 1 || 1920 !ill_hasmembers_otherzones_v4(ill, ipha->ipha_dst, 1921 GLOBAL_ZONEID)) { 1922 ira->ira_zoneid = GLOBAL_ZONEID; 1923 1924 /* If sender didn't want this zone to receive it, drop */ 1925 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1926 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1927 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1928 freemsg(mp); 1929 return; 1930 } 1931 ip_fanout_v4(mp, ipha, ira); 1932 return; 1933 } 1934 1935 /* 1936 * Here we loop over all zoneids that have members in the group 1937 * and deliver a packet to ip_fanout for each zoneid. 1938 * 1939 * First find any members in the lowest numeric zoneid by looking for 1940 * first zoneid larger than -1 (ALL_ZONES). 1941 * We terminate the loop when we receive -1 (ALL_ZONES). 1942 */ 1943 zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, ALL_ZONES); 1944 for (; zoneid != ALL_ZONES; 1945 zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, zoneid)) { 1946 /* 1947 * Avoid an extra copymsg/freemsg by skipping global zone here 1948 * and doing that at the end. 1949 */ 1950 if (zoneid == GLOBAL_ZONEID) 1951 continue; 1952 1953 ira->ira_zoneid = zoneid; 1954 1955 /* If sender didn't want this zone to receive it, skip */ 1956 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1957 ira->ira_no_loop_zoneid == ira->ira_zoneid) 1958 continue; 1959 1960 mp1 = copymsg(mp); 1961 if (mp1 == NULL) { 1962 /* Failed to deliver to one zone */ 1963 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1964 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1965 continue; 1966 } 1967 ipha1 = (ipha_t *)mp1->b_rptr; 1968 ip_fanout_v4(mp1, ipha1, ira); 1969 } 1970 1971 /* Do the main ire */ 1972 ira->ira_zoneid = GLOBAL_ZONEID; 1973 /* If sender didn't want this zone to receive it, drop */ 1974 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1975 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1976 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1977 freemsg(mp); 1978 } else { 1979 ip_fanout_v4(mp, ipha, ira); 1980 } 1981 } 1982 1983 1984 /* 1985 * Determine the zoneid and IRAF_TX_* flags if trusted extensions 1986 * is in use. Updates ira_zoneid and ira_flags as a result. 1987 */ 1988 static void 1989 ip_fanout_tx_v4(mblk_t *mp, ipha_t *ipha, uint8_t protocol, 1990 uint_t ip_hdr_length, ip_recv_attr_t *ira) 1991 { 1992 uint16_t *up; 1993 uint16_t lport; 1994 zoneid_t zoneid; 1995 1996 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED); 1997 1998 /* 1999 * If the packet is unlabeled we might allow read-down 2000 * for MAC_EXEMPT. Below we clear this if it is a multi-level 2001 * port (MLP). 2002 * Note that ira_tsl can be NULL here. 2003 */ 2004 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED) 2005 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE; 2006 2007 if (ira->ira_zoneid != ALL_ZONES) 2008 return; 2009 2010 ira->ira_flags |= IRAF_TX_SHARED_ADDR; 2011 2012 up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length); 2013 switch (protocol) { 2014 case IPPROTO_TCP: 2015 case IPPROTO_SCTP: 2016 case IPPROTO_UDP: 2017 /* Caller ensures this */ 2018 ASSERT(((uchar_t *)ipha) + ip_hdr_length +4 <= mp->b_wptr); 2019 2020 /* 2021 * Only these transports support MLP. 2022 * We know their destination port numbers is in 2023 * the same place in the header. 2024 */ 2025 lport = up[1]; 2026 2027 /* 2028 * No need to handle exclusive-stack zones 2029 * since ALL_ZONES only applies to the shared IP instance. 2030 */ 2031 zoneid = tsol_mlp_findzone(protocol, lport); 2032 /* 2033 * If no shared MLP is found, tsol_mlp_findzone returns 2034 * ALL_ZONES. In that case, we assume it's SLP, and 2035 * search for the zone based on the packet label. 2036 * 2037 * If there is such a zone, we prefer to find a 2038 * connection in it. Otherwise, we look for a 2039 * MAC-exempt connection in any zone whose label 2040 * dominates the default label on the packet. 2041 */ 2042 if (zoneid == ALL_ZONES) 2043 zoneid = tsol_attr_to_zoneid(ira); 2044 else 2045 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE; 2046 break; 2047 default: 2048 /* Handle shared address for other protocols */ 2049 zoneid = tsol_attr_to_zoneid(ira); 2050 break; 2051 } 2052 ira->ira_zoneid = zoneid; 2053 } 2054 2055 /* 2056 * Increment checksum failure statistics 2057 */ 2058 static void 2059 ip_input_cksum_err_v4(uint8_t protocol, uint16_t hck_flags, ill_t *ill) 2060 { 2061 ip_stack_t *ipst = ill->ill_ipst; 2062 2063 switch (protocol) { 2064 case IPPROTO_TCP: 2065 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 2066 2067 if (hck_flags & HCK_FULLCKSUM) 2068 IP_STAT(ipst, ip_tcp_in_full_hw_cksum_err); 2069 else if (hck_flags & HCK_PARTIALCKSUM) 2070 IP_STAT(ipst, ip_tcp_in_part_hw_cksum_err); 2071 else 2072 IP_STAT(ipst, ip_tcp_in_sw_cksum_err); 2073 break; 2074 case IPPROTO_UDP: 2075 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 2076 if (hck_flags & HCK_FULLCKSUM) 2077 IP_STAT(ipst, ip_udp_in_full_hw_cksum_err); 2078 else if (hck_flags & HCK_PARTIALCKSUM) 2079 IP_STAT(ipst, ip_udp_in_part_hw_cksum_err); 2080 else 2081 IP_STAT(ipst, ip_udp_in_sw_cksum_err); 2082 break; 2083 case IPPROTO_ICMP: 2084 BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); 2085 break; 2086 default: 2087 ASSERT(0); 2088 break; 2089 } 2090 } 2091 2092 /* Calculate the IPv4 pseudo-header checksum */ 2093 uint32_t 2094 ip_input_cksum_pseudo_v4(ipha_t *ipha, ip_recv_attr_t *ira) 2095 { 2096 uint_t ulp_len; 2097 uint32_t cksum; 2098 uint8_t protocol = ira->ira_protocol; 2099 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2100 2101 #define iphs ((uint16_t *)ipha) 2102 2103 switch (protocol) { 2104 case IPPROTO_TCP: 2105 ulp_len = ira->ira_pktlen - ip_hdr_length; 2106 2107 /* Protocol and length */ 2108 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP; 2109 /* IP addresses */ 2110 cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9]; 2111 break; 2112 2113 case IPPROTO_UDP: { 2114 udpha_t *udpha; 2115 2116 udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length); 2117 2118 /* Protocol and length */ 2119 cksum = udpha->uha_length + IP_UDP_CSUM_COMP; 2120 /* IP addresses */ 2121 cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9]; 2122 break; 2123 } 2124 2125 default: 2126 cksum = 0; 2127 break; 2128 } 2129 #undef iphs 2130 return (cksum); 2131 } 2132 2133 2134 /* 2135 * Software verification of the ULP checksums. 2136 * Returns B_TRUE if ok. 2137 * Increments statistics of failed. 2138 */ 2139 static boolean_t 2140 ip_input_sw_cksum_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 2141 { 2142 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 2143 uint32_t cksum; 2144 uint8_t protocol = ira->ira_protocol; 2145 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2146 2147 IP_STAT(ipst, ip_in_sw_cksum); 2148 2149 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP); 2150 2151 cksum = ip_input_cksum_pseudo_v4(ipha, ira); 2152 cksum = IP_CSUM(mp, ip_hdr_length, cksum); 2153 if (cksum == 0) 2154 return (B_TRUE); 2155 2156 ip_input_cksum_err_v4(protocol, 0, ira->ira_ill); 2157 return (B_FALSE); 2158 } 2159 2160 /* 2161 * Verify the ULP checksums. 2162 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum 2163 * algorithm. 2164 * Increments statistics if failed. 2165 */ 2166 static boolean_t 2167 ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, 2168 ip_recv_attr_t *ira) 2169 { 2170 ill_t *ill = ira->ira_rill; 2171 uint16_t hck_flags; 2172 uint32_t cksum; 2173 mblk_t *mp1; 2174 int32_t len; 2175 uint8_t protocol = ira->ira_protocol; 2176 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2177 2178 2179 switch (protocol) { 2180 case IPPROTO_TCP: 2181 break; 2182 2183 case IPPROTO_UDP: { 2184 udpha_t *udpha; 2185 2186 udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length); 2187 if (udpha->uha_checksum == 0) { 2188 /* Packet doesn't have a UDP checksum */ 2189 return (B_TRUE); 2190 } 2191 break; 2192 } 2193 case IPPROTO_SCTP: { 2194 sctp_hdr_t *sctph; 2195 uint32_t pktsum; 2196 2197 sctph = (sctp_hdr_t *)((uchar_t *)ipha + ip_hdr_length); 2198 #ifdef DEBUG 2199 if (skip_sctp_cksum) 2200 return (B_TRUE); 2201 #endif 2202 pktsum = sctph->sh_chksum; 2203 sctph->sh_chksum = 0; 2204 cksum = sctp_cksum(mp, ip_hdr_length); 2205 sctph->sh_chksum = pktsum; 2206 if (cksum == pktsum) 2207 return (B_TRUE); 2208 2209 /* 2210 * Defer until later whether a bad checksum is ok 2211 * in order to allow RAW sockets to use Adler checksum 2212 * with SCTP. 2213 */ 2214 ira->ira_flags |= IRAF_SCTP_CSUM_ERR; 2215 return (B_TRUE); 2216 } 2217 2218 default: 2219 /* No ULP checksum to verify. */ 2220 return (B_TRUE); 2221 } 2222 /* 2223 * Revert to software checksum calculation if the interface 2224 * isn't capable of checksum offload. 2225 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout. 2226 * Note: IRAF_NO_HW_CKSUM is not currently used. 2227 */ 2228 ASSERT(!IS_IPMP(ill)); 2229 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 2230 !dohwcksum) { 2231 return (ip_input_sw_cksum_v4(mp, ipha, ira)); 2232 } 2233 2234 /* 2235 * We apply this for all ULP protocols. Does the HW know to 2236 * not set the flags for SCTP and other protocols. 2237 */ 2238 2239 hck_flags = DB_CKSUMFLAGS(mp); 2240 2241 if (hck_flags & HCK_FULLCKSUM) { 2242 /* 2243 * Full checksum has been computed by the hardware 2244 * and has been attached. If the driver wants us to 2245 * verify the correctness of the attached value, in 2246 * order to protect against faulty hardware, compare 2247 * it against -0 (0xFFFF) to see if it's valid. 2248 */ 2249 if (hck_flags & HCK_FULLCKSUM_OK) 2250 return (B_TRUE); 2251 2252 cksum = DB_CKSUM16(mp); 2253 if (cksum == 0xFFFF) 2254 return (B_TRUE); 2255 ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill); 2256 return (B_FALSE); 2257 } 2258 2259 mp1 = mp->b_cont; 2260 if ((hck_flags & HCK_PARTIALCKSUM) && 2261 (mp1 == NULL || mp1->b_cont == NULL) && 2262 ip_hdr_length >= DB_CKSUMSTART(mp) && 2263 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) { 2264 uint32_t adj; 2265 uchar_t *cksum_start; 2266 2267 cksum = ip_input_cksum_pseudo_v4(ipha, ira); 2268 2269 cksum_start = ((uchar_t *)ipha + DB_CKSUMSTART(mp)); 2270 2271 /* 2272 * Partial checksum has been calculated by hardware 2273 * and attached to the packet; in addition, any 2274 * prepended extraneous data is even byte aligned, 2275 * and there are at most two mblks associated with 2276 * the packet. If any such data exists, we adjust 2277 * the checksum; also take care any postpended data. 2278 */ 2279 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj); 2280 /* 2281 * One's complement subtract extraneous checksum 2282 */ 2283 cksum += DB_CKSUM16(mp); 2284 if (adj >= cksum) 2285 cksum = ~(adj - cksum) & 0xFFFF; 2286 else 2287 cksum -= adj; 2288 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 2289 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 2290 if (!(~cksum & 0xFFFF)) 2291 return (B_TRUE); 2292 2293 ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill); 2294 return (B_FALSE); 2295 } 2296 return (ip_input_sw_cksum_v4(mp, ipha, ira)); 2297 } 2298 2299 2300 /* 2301 * Handle fanout of received packets. 2302 * Unicast packets that are looped back (from ire_send_local_v4) and packets 2303 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM. 2304 * 2305 * IPQoS Notes 2306 * Before sending it to the client, invoke IPPF processing. Policy processing 2307 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled. 2308 */ 2309 void 2310 ip_fanout_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 2311 { 2312 ill_t *ill = ira->ira_ill; 2313 iaflags_t iraflags = ira->ira_flags; 2314 ip_stack_t *ipst = ill->ill_ipst; 2315 uint8_t protocol = ipha->ipha_protocol; 2316 conn_t *connp; 2317 #define rptr ((uchar_t *)ipha) 2318 uint_t ip_hdr_length; 2319 uint_t min_ulp_header_length; 2320 int offset; 2321 ssize_t len; 2322 netstack_t *ns = ipst->ips_netstack; 2323 ipsec_stack_t *ipss = ns->netstack_ipsec; 2324 ill_t *rill = ira->ira_rill; 2325 2326 ASSERT(ira->ira_pktlen == ntohs(ipha->ipha_length)); 2327 2328 ip_hdr_length = ira->ira_ip_hdr_length; 2329 ira->ira_protocol = protocol; 2330 2331 /* 2332 * Time for IPP once we've done reassembly and IPsec. 2333 * We skip this for loopback packets since we don't do IPQoS 2334 * on loopback. 2335 */ 2336 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 2337 !(iraflags & IRAF_LOOPBACK) && 2338 (protocol != IPPROTO_ESP || protocol != IPPROTO_AH)) { 2339 /* 2340 * Use the interface on which the packet arrived - not where 2341 * the IP address is hosted. 2342 */ 2343 /* ip_process translates an IS_UNDER_IPMP */ 2344 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill); 2345 if (mp == NULL) { 2346 /* ip_drop_packet and MIB done */ 2347 return; 2348 } 2349 } 2350 2351 /* Determine the minimum required size of the upper-layer header */ 2352 /* Need to do this for at least the set of ULPs that TX handles. */ 2353 switch (protocol) { 2354 case IPPROTO_TCP: 2355 min_ulp_header_length = TCP_MIN_HEADER_LENGTH; 2356 break; 2357 case IPPROTO_SCTP: 2358 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH; 2359 break; 2360 case IPPROTO_UDP: 2361 min_ulp_header_length = UDPH_SIZE; 2362 break; 2363 case IPPROTO_ICMP: 2364 min_ulp_header_length = ICMPH_SIZE; 2365 break; 2366 default: 2367 min_ulp_header_length = 0; 2368 break; 2369 } 2370 /* Make sure we have the min ULP header length */ 2371 len = mp->b_wptr - rptr; 2372 if (len < ip_hdr_length + min_ulp_header_length) { 2373 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length) { 2374 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 2375 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 2376 freemsg(mp); 2377 return; 2378 } 2379 IP_STAT(ipst, ip_recv_pullup); 2380 ipha = ip_pullup(mp, ip_hdr_length + min_ulp_header_length, 2381 ira); 2382 if (ipha == NULL) 2383 goto discard; 2384 len = mp->b_wptr - rptr; 2385 } 2386 2387 /* 2388 * If trusted extensions then determine the zoneid and TX specific 2389 * ira_flags. 2390 */ 2391 if (iraflags & IRAF_SYSTEM_LABELED) { 2392 /* This can update ira->ira_flags and ira->ira_zoneid */ 2393 ip_fanout_tx_v4(mp, ipha, protocol, ip_hdr_length, ira); 2394 iraflags = ira->ira_flags; 2395 } 2396 2397 2398 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */ 2399 if (iraflags & IRAF_VERIFY_ULP_CKSUM) { 2400 if (!ip_input_cksum_v4(iraflags, mp, ipha, ira)) { 2401 /* Bad checksum. Stats are already incremented */ 2402 ip_drop_input("Bad ULP checksum", mp, ill); 2403 freemsg(mp); 2404 return; 2405 } 2406 /* IRAF_SCTP_CSUM_ERR could have been set */ 2407 iraflags = ira->ira_flags; 2408 } 2409 switch (protocol) { 2410 case IPPROTO_TCP: 2411 /* For TCP, discard broadcast and multicast packets. */ 2412 if (iraflags & IRAF_MULTIBROADCAST) 2413 goto discard; 2414 2415 /* First mblk contains IP+TCP headers per above check */ 2416 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH); 2417 2418 /* TCP options present? */ 2419 offset = ((uchar_t *)ipha)[ip_hdr_length + 12] >> 4; 2420 if (offset != 5) { 2421 if (offset < 5) 2422 goto discard; 2423 2424 /* 2425 * There must be TCP options. 2426 * Make sure we can grab them. 2427 */ 2428 offset <<= 2; 2429 offset += ip_hdr_length; 2430 if (len < offset) { 2431 if (ira->ira_pktlen < offset) { 2432 BUMP_MIB(ill->ill_ip_mib, 2433 ipIfStatsInTruncatedPkts); 2434 ip_drop_input( 2435 "ipIfStatsInTruncatedPkts", 2436 mp, ill); 2437 freemsg(mp); 2438 return; 2439 } 2440 IP_STAT(ipst, ip_recv_pullup); 2441 ipha = ip_pullup(mp, offset, ira); 2442 if (ipha == NULL) 2443 goto discard; 2444 len = mp->b_wptr - rptr; 2445 } 2446 } 2447 2448 /* 2449 * Pass up a squeue hint to tcp. 2450 * If ira_sqp is already set (this is loopback) we leave it 2451 * alone. 2452 */ 2453 if (ira->ira_sqp == NULL) { 2454 ira->ira_sqp = ip_squeue_get(ira->ira_ring); 2455 } 2456 2457 /* Look for AF_INET or AF_INET6 that matches */ 2458 connp = ipcl_classify_v4(mp, IPPROTO_TCP, ip_hdr_length, 2459 ira, ipst); 2460 if (connp == NULL) { 2461 /* Send the TH_RST */ 2462 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2463 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2464 return; 2465 } 2466 if (connp->conn_incoming_ifindex != 0 && 2467 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2468 CONN_DEC_REF(connp); 2469 2470 /* Send the TH_RST */ 2471 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2472 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2473 return; 2474 } 2475 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || 2476 (iraflags & IRAF_IPSEC_SECURE)) { 2477 mp = ipsec_check_inbound_policy(mp, connp, 2478 ipha, NULL, ira); 2479 if (mp == NULL) { 2480 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2481 /* Note that mp is NULL */ 2482 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2483 CONN_DEC_REF(connp); 2484 return; 2485 } 2486 } 2487 /* Found a client; up it goes */ 2488 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2489 ira->ira_ill = ira->ira_rill = NULL; 2490 if (!IPCL_IS_TCP(connp)) { 2491 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 2492 (connp->conn_recv)(connp, mp, NULL, ira); 2493 CONN_DEC_REF(connp); 2494 ira->ira_ill = ill; 2495 ira->ira_rill = rill; 2496 return; 2497 } 2498 2499 /* 2500 * We do different processing whether called from 2501 * ip_accept_tcp and we match the target, don't match 2502 * the target, and when we are called by ip_input. 2503 */ 2504 if (iraflags & IRAF_TARGET_SQP) { 2505 if (ira->ira_target_sqp == connp->conn_sqp) { 2506 mblk_t *attrmp; 2507 2508 attrmp = ip_recv_attr_to_mblk(ira); 2509 if (attrmp == NULL) { 2510 BUMP_MIB(ill->ill_ip_mib, 2511 ipIfStatsInDiscards); 2512 ip_drop_input("ipIfStatsInDiscards", 2513 mp, ill); 2514 freemsg(mp); 2515 CONN_DEC_REF(connp); 2516 } else { 2517 SET_SQUEUE(attrmp, connp->conn_recv, 2518 connp); 2519 attrmp->b_cont = mp; 2520 ASSERT(ira->ira_target_sqp_mp == NULL); 2521 ira->ira_target_sqp_mp = attrmp; 2522 /* 2523 * Conn ref release when drained from 2524 * the squeue. 2525 */ 2526 } 2527 } else { 2528 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 2529 connp->conn_recv, connp, ira, SQ_FILL, 2530 SQTAG_IP_TCP_INPUT); 2531 } 2532 } else { 2533 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, 2534 connp, ira, ip_squeue_flag, SQTAG_IP_TCP_INPUT); 2535 } 2536 ira->ira_ill = ill; 2537 ira->ira_rill = rill; 2538 return; 2539 2540 case IPPROTO_SCTP: { 2541 sctp_hdr_t *sctph; 2542 in6_addr_t map_src, map_dst; 2543 uint32_t ports; /* Source and destination ports */ 2544 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp; 2545 2546 /* For SCTP, discard broadcast and multicast packets. */ 2547 if (iraflags & IRAF_MULTIBROADCAST) 2548 goto discard; 2549 2550 /* 2551 * Since there is no SCTP h/w cksum support yet, just 2552 * clear the flag. 2553 */ 2554 DB_CKSUMFLAGS(mp) = 0; 2555 2556 /* Length ensured above */ 2557 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH); 2558 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length); 2559 2560 /* get the ports */ 2561 ports = *(uint32_t *)&sctph->sh_sport; 2562 2563 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst); 2564 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src); 2565 if (iraflags & IRAF_SCTP_CSUM_ERR) { 2566 /* 2567 * No potential sctp checksum errors go to the Sun 2568 * sctp stack however they might be Adler-32 summed 2569 * packets a userland stack bound to a raw IP socket 2570 * could reasonably use. Note though that Adler-32 is 2571 * a long deprecated algorithm and customer sctp 2572 * networks should eventually migrate to CRC-32 at 2573 * which time this facility should be removed. 2574 */ 2575 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2576 return; 2577 } 2578 connp = sctp_fanout(&map_src, &map_dst, ports, ira, mp, sctps); 2579 if (connp == NULL) { 2580 /* Check for raw socket or OOTB handling */ 2581 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2582 return; 2583 } 2584 if (connp->conn_incoming_ifindex != 0 && 2585 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2586 CONN_DEC_REF(connp); 2587 /* Check for raw socket or OOTB handling */ 2588 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2589 return; 2590 } 2591 2592 /* Found a client; up it goes */ 2593 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2594 sctp_input(connp, ipha, NULL, mp, ira); 2595 /* sctp_input does a rele of the sctp_t */ 2596 return; 2597 } 2598 2599 case IPPROTO_UDP: 2600 /* First mblk contains IP+UDP headers as checked above */ 2601 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE); 2602 2603 if (iraflags & IRAF_MULTIBROADCAST) { 2604 uint16_t *up; /* Pointer to ports in ULP header */ 2605 2606 up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length); 2607 ip_fanout_udp_multi_v4(mp, ipha, up[1], up[0], ira); 2608 return; 2609 } 2610 2611 /* Look for AF_INET or AF_INET6 that matches */ 2612 connp = ipcl_classify_v4(mp, IPPROTO_UDP, ip_hdr_length, 2613 ira, ipst); 2614 if (connp == NULL) { 2615 no_udp_match: 2616 if (ipst->ips_ipcl_proto_fanout_v4[IPPROTO_UDP]. 2617 connf_head != NULL) { 2618 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2619 ip_fanout_proto_v4(mp, ipha, ira); 2620 } else { 2621 ip_fanout_send_icmp_v4(mp, 2622 ICMP_DEST_UNREACHABLE, 2623 ICMP_PORT_UNREACHABLE, ira); 2624 } 2625 return; 2626 2627 } 2628 if (connp->conn_incoming_ifindex != 0 && 2629 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2630 CONN_DEC_REF(connp); 2631 goto no_udp_match; 2632 } 2633 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld : 2634 !canputnext(connp->conn_rq)) { 2635 CONN_DEC_REF(connp); 2636 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 2637 ip_drop_input("udpIfStatsInOverflows", mp, ill); 2638 freemsg(mp); 2639 return; 2640 } 2641 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || 2642 (iraflags & IRAF_IPSEC_SECURE)) { 2643 mp = ipsec_check_inbound_policy(mp, connp, 2644 ipha, NULL, ira); 2645 if (mp == NULL) { 2646 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2647 /* Note that mp is NULL */ 2648 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2649 CONN_DEC_REF(connp); 2650 return; 2651 } 2652 } 2653 /* 2654 * Remove 0-spi if it's 0, or move everything behind 2655 * the UDP header over it and forward to ESP via 2656 * ip_fanout_v4(). 2657 */ 2658 if (connp->conn_udp->udp_nat_t_endpoint) { 2659 if (iraflags & IRAF_IPSEC_SECURE) { 2660 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 2661 DROPPER(ipss, ipds_esp_nat_t_ipsec), 2662 &ipss->ipsec_dropper); 2663 CONN_DEC_REF(connp); 2664 return; 2665 } 2666 2667 mp = zero_spi_check(mp, ira); 2668 if (mp == NULL) { 2669 /* 2670 * Packet was consumed - probably sent to 2671 * ip_fanout_v4. 2672 */ 2673 CONN_DEC_REF(connp); 2674 return; 2675 } 2676 /* Else continue like a normal UDP packet. */ 2677 ipha = (ipha_t *)mp->b_rptr; 2678 protocol = ipha->ipha_protocol; 2679 ira->ira_protocol = protocol; 2680 } 2681 /* Found a client; up it goes */ 2682 IP_STAT(ipst, ip_udp_fannorm); 2683 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2684 ira->ira_ill = ira->ira_rill = NULL; 2685 (connp->conn_recv)(connp, mp, NULL, ira); 2686 CONN_DEC_REF(connp); 2687 ira->ira_ill = ill; 2688 ira->ira_rill = rill; 2689 return; 2690 default: 2691 break; 2692 } 2693 2694 /* 2695 * Clear hardware checksumming flag as it is currently only 2696 * used by TCP and UDP. 2697 */ 2698 DB_CKSUMFLAGS(mp) = 0; 2699 2700 switch (protocol) { 2701 case IPPROTO_ICMP: 2702 /* 2703 * We need to accomodate icmp messages coming in clear 2704 * until we get everything secure from the wire. If 2705 * icmp_accept_clear_messages is zero we check with 2706 * the global policy and act accordingly. If it is 2707 * non-zero, we accept the message without any checks. 2708 * But *this does not mean* that this will be delivered 2709 * to RAW socket clients. By accepting we might send 2710 * replies back, change our MTU value etc., 2711 * but delivery to the ULP/clients depends on their 2712 * policy dispositions. 2713 */ 2714 if (ipst->ips_icmp_accept_clear_messages == 0) { 2715 mp = ipsec_check_global_policy(mp, NULL, 2716 ipha, NULL, ira, ns); 2717 if (mp == NULL) 2718 return; 2719 } 2720 2721 /* 2722 * On a labeled system, we have to check whether the zone 2723 * itself is permitted to receive raw traffic. 2724 */ 2725 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2726 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2727 BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors); 2728 ip_drop_input("tsol_can_accept_raw", mp, ill); 2729 freemsg(mp); 2730 return; 2731 } 2732 } 2733 2734 /* 2735 * ICMP header checksum, including checksum field, 2736 * should be zero. 2737 */ 2738 if (IP_CSUM(mp, ip_hdr_length, 0)) { 2739 BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); 2740 ip_drop_input("icmpInCksumErrs", mp, ill); 2741 freemsg(mp); 2742 return; 2743 } 2744 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2745 mp = icmp_inbound_v4(mp, ira); 2746 if (mp == NULL) { 2747 /* No need to pass to RAW sockets */ 2748 return; 2749 } 2750 break; 2751 2752 case IPPROTO_IGMP: 2753 /* 2754 * If we are not willing to accept IGMP packets in clear, 2755 * then check with global policy. 2756 */ 2757 if (ipst->ips_igmp_accept_clear_messages == 0) { 2758 mp = ipsec_check_global_policy(mp, NULL, 2759 ipha, NULL, ira, ns); 2760 if (mp == NULL) 2761 return; 2762 } 2763 if ((ira->ira_flags & IRAF_SYSTEM_LABELED) && 2764 !tsol_can_accept_raw(mp, ira, B_TRUE)) { 2765 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2766 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2767 freemsg(mp); 2768 return; 2769 } 2770 /* 2771 * Validate checksum 2772 */ 2773 if (IP_CSUM(mp, ip_hdr_length, 0)) { 2774 ++ipst->ips_igmpstat.igps_rcv_badsum; 2775 ip_drop_input("igps_rcv_badsum", mp, ill); 2776 freemsg(mp); 2777 return; 2778 } 2779 2780 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2781 mp = igmp_input(mp, ira); 2782 if (mp == NULL) { 2783 /* Bad packet - discarded by igmp_input */ 2784 return; 2785 } 2786 break; 2787 case IPPROTO_PIM: 2788 /* 2789 * If we are not willing to accept PIM packets in clear, 2790 * then check with global policy. 2791 */ 2792 if (ipst->ips_pim_accept_clear_messages == 0) { 2793 mp = ipsec_check_global_policy(mp, NULL, 2794 ipha, NULL, ira, ns); 2795 if (mp == NULL) 2796 return; 2797 } 2798 if ((ira->ira_flags & IRAF_SYSTEM_LABELED) && 2799 !tsol_can_accept_raw(mp, ira, B_TRUE)) { 2800 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2801 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2802 freemsg(mp); 2803 return; 2804 } 2805 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2806 2807 /* Checksum is verified in pim_input */ 2808 mp = pim_input(mp, ira); 2809 if (mp == NULL) { 2810 /* Bad packet - discarded by pim_input */ 2811 return; 2812 } 2813 break; 2814 case IPPROTO_AH: 2815 case IPPROTO_ESP: { 2816 /* 2817 * Fast path for AH/ESP. 2818 */ 2819 netstack_t *ns = ipst->ips_netstack; 2820 ipsec_stack_t *ipss = ns->netstack_ipsec; 2821 2822 IP_STAT(ipst, ipsec_proto_ahesp); 2823 2824 if (!ipsec_loaded(ipss)) { 2825 ip_proto_not_sup(mp, ira); 2826 return; 2827 } 2828 2829 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2830 /* select inbound SA and have IPsec process the pkt */ 2831 if (protocol == IPPROTO_ESP) { 2832 esph_t *esph; 2833 boolean_t esp_in_udp_sa; 2834 boolean_t esp_in_udp_packet; 2835 2836 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 2837 if (mp == NULL) 2838 return; 2839 2840 ASSERT(esph != NULL); 2841 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2842 ASSERT(ira->ira_ipsec_esp_sa != NULL); 2843 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL); 2844 2845 esp_in_udp_sa = ((ira->ira_ipsec_esp_sa->ipsa_flags & 2846 IPSA_F_NATT) != 0); 2847 esp_in_udp_packet = 2848 (ira->ira_flags & IRAF_ESP_UDP_PORTS) != 0; 2849 2850 /* 2851 * The following is a fancy, but quick, way of saying: 2852 * ESP-in-UDP SA and Raw ESP packet --> drop 2853 * OR 2854 * ESP SA and ESP-in-UDP packet --> drop 2855 */ 2856 if (esp_in_udp_sa != esp_in_udp_packet) { 2857 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2858 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 2859 DROPPER(ipss, ipds_esp_no_sa), 2860 &ipss->ipsec_dropper); 2861 return; 2862 } 2863 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, 2864 ira); 2865 } else { 2866 ah_t *ah; 2867 2868 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 2869 if (mp == NULL) 2870 return; 2871 2872 ASSERT(ah != NULL); 2873 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2874 ASSERT(ira->ira_ipsec_ah_sa != NULL); 2875 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 2876 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, 2877 ira); 2878 } 2879 2880 if (mp == NULL) { 2881 /* 2882 * Either it failed or is pending. In the former case 2883 * ipIfStatsInDiscards was increased. 2884 */ 2885 return; 2886 } 2887 /* we're done with IPsec processing, send it up */ 2888 ip_input_post_ipsec(mp, ira); 2889 return; 2890 } 2891 case IPPROTO_ENCAP: { 2892 ipha_t *inner_ipha; 2893 2894 /* 2895 * Handle self-encapsulated packets (IP-in-IP where 2896 * the inner addresses == the outer addresses). 2897 */ 2898 if ((uchar_t *)ipha + ip_hdr_length + sizeof (ipha_t) > 2899 mp->b_wptr) { 2900 if (ira->ira_pktlen < 2901 ip_hdr_length + sizeof (ipha_t)) { 2902 BUMP_MIB(ill->ill_ip_mib, 2903 ipIfStatsInTruncatedPkts); 2904 ip_drop_input("ipIfStatsInTruncatedPkts", 2905 mp, ill); 2906 freemsg(mp); 2907 return; 2908 } 2909 ipha = ip_pullup(mp, (uchar_t *)ipha + ip_hdr_length + 2910 sizeof (ipha_t) - mp->b_rptr, ira); 2911 if (ipha == NULL) { 2912 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2913 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2914 freemsg(mp); 2915 return; 2916 } 2917 } 2918 inner_ipha = (ipha_t *)((uchar_t *)ipha + ip_hdr_length); 2919 /* 2920 * Check the sanity of the inner IP header. 2921 */ 2922 if ((IPH_HDR_VERSION(inner_ipha) != IPV4_VERSION)) { 2923 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2924 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2925 freemsg(mp); 2926 return; 2927 } 2928 if (IPH_HDR_LENGTH(inner_ipha) < sizeof (ipha_t)) { 2929 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2930 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2931 freemsg(mp); 2932 return; 2933 } 2934 if (inner_ipha->ipha_src != ipha->ipha_src || 2935 inner_ipha->ipha_dst != ipha->ipha_dst) { 2936 /* We fallthru to iptun fanout below */ 2937 goto iptun; 2938 } 2939 2940 /* 2941 * Self-encapsulated tunnel packet. Remove 2942 * the outer IP header and fanout again. 2943 * We also need to make sure that the inner 2944 * header is pulled up until options. 2945 */ 2946 mp->b_rptr = (uchar_t *)inner_ipha; 2947 ipha = inner_ipha; 2948 ip_hdr_length = IPH_HDR_LENGTH(ipha); 2949 if ((uchar_t *)ipha + ip_hdr_length > mp->b_wptr) { 2950 if (ira->ira_pktlen < 2951 (uchar_t *)ipha + ip_hdr_length - mp->b_rptr) { 2952 BUMP_MIB(ill->ill_ip_mib, 2953 ipIfStatsInTruncatedPkts); 2954 ip_drop_input("ipIfStatsInTruncatedPkts", 2955 mp, ill); 2956 freemsg(mp); 2957 return; 2958 } 2959 ipha = ip_pullup(mp, 2960 (uchar_t *)ipha + ip_hdr_length - mp->b_rptr, ira); 2961 if (ipha == NULL) { 2962 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2963 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2964 freemsg(mp); 2965 return; 2966 } 2967 } 2968 if (ip_hdr_length > sizeof (ipha_t)) { 2969 /* We got options on the inner packet. */ 2970 ipaddr_t dst = ipha->ipha_dst; 2971 int error = 0; 2972 2973 dst = ip_input_options(ipha, dst, mp, ira, &error); 2974 if (error != 0) { 2975 /* 2976 * An ICMP error has been sent and the packet 2977 * has been dropped. 2978 */ 2979 return; 2980 } 2981 if (dst != ipha->ipha_dst) { 2982 /* 2983 * Someone put a source-route in 2984 * the inside header of a self- 2985 * encapsulated packet. Drop it 2986 * with extreme prejudice and let 2987 * the sender know. 2988 */ 2989 ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", 2990 mp, ill); 2991 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, 2992 ira); 2993 return; 2994 } 2995 } 2996 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 2997 /* 2998 * This means that somebody is sending 2999 * Self-encapsualted packets without AH/ESP. 3000 * 3001 * Send this packet to find a tunnel endpoint. 3002 * if I can't find one, an ICMP 3003 * PROTOCOL_UNREACHABLE will get sent. 3004 */ 3005 protocol = ipha->ipha_protocol; 3006 ira->ira_protocol = protocol; 3007 goto iptun; 3008 } 3009 3010 /* Update based on removed IP header */ 3011 ira->ira_ip_hdr_length = ip_hdr_length; 3012 ira->ira_pktlen = ntohs(ipha->ipha_length); 3013 3014 if (ira->ira_flags & IRAF_IPSEC_DECAPS) { 3015 /* 3016 * This packet is self-encapsulated multiple 3017 * times. We don't want to recurse infinitely. 3018 * To keep it simple, drop the packet. 3019 */ 3020 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3021 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3022 freemsg(mp); 3023 return; 3024 } 3025 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3026 ira->ira_flags |= IRAF_IPSEC_DECAPS; 3027 3028 ip_input_post_ipsec(mp, ira); 3029 return; 3030 } 3031 3032 iptun: /* IPPROTO_ENCAPS that is not self-encapsulated */ 3033 case IPPROTO_IPV6: 3034 /* iptun will verify trusted label */ 3035 connp = ipcl_classify_v4(mp, protocol, ip_hdr_length, 3036 ira, ipst); 3037 if (connp != NULL) { 3038 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3039 ira->ira_ill = ira->ira_rill = NULL; 3040 (connp->conn_recv)(connp, mp, NULL, ira); 3041 CONN_DEC_REF(connp); 3042 ira->ira_ill = ill; 3043 ira->ira_rill = rill; 3044 return; 3045 } 3046 /* FALLTHRU */ 3047 default: 3048 /* 3049 * On a labeled system, we have to check whether the zone 3050 * itself is permitted to receive raw traffic. 3051 */ 3052 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 3053 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 3054 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3055 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3056 freemsg(mp); 3057 return; 3058 } 3059 } 3060 break; 3061 } 3062 3063 /* 3064 * The above input functions may have returned the pulled up message. 3065 * So ipha need to be reinitialized. 3066 */ 3067 ipha = (ipha_t *)mp->b_rptr; 3068 ira->ira_protocol = protocol = ipha->ipha_protocol; 3069 if (ipst->ips_ipcl_proto_fanout_v4[protocol].connf_head == NULL) { 3070 /* 3071 * No user-level listener for these packets packets. 3072 * Check for IPPROTO_ENCAP... 3073 */ 3074 if (protocol == IPPROTO_ENCAP && ipst->ips_ip_g_mrouter) { 3075 /* 3076 * Check policy here, 3077 * THEN ship off to ip_mroute_decap(). 3078 * 3079 * BTW, If I match a configured IP-in-IP 3080 * tunnel above, this path will not be reached, and 3081 * ip_mroute_decap will never be called. 3082 */ 3083 mp = ipsec_check_global_policy(mp, connp, 3084 ipha, NULL, ira, ns); 3085 if (mp != NULL) { 3086 ip_mroute_decap(mp, ira); 3087 } /* Else we already freed everything! */ 3088 } else { 3089 ip_proto_not_sup(mp, ira); 3090 } 3091 return; 3092 } 3093 3094 /* 3095 * Handle fanout to raw sockets. There 3096 * can be more than one stream bound to a particular 3097 * protocol. When this is the case, each one gets a copy 3098 * of any incoming packets. 3099 */ 3100 ASSERT(ira->ira_protocol == ipha->ipha_protocol); 3101 ip_fanout_proto_v4(mp, ipha, ira); 3102 return; 3103 3104 discard: 3105 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3106 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3107 freemsg(mp); 3108 #undef rptr 3109 } 3110