1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 * 25 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 26 * Copyright 2019 Joyent, Inc. 27 */ 28 /* Copyright (c) 1990 Mentat Inc. */ 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/dlpi.h> 33 #include <sys/stropts.h> 34 #include <sys/sysmacros.h> 35 #include <sys/strsubr.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/zone.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/xti_inet.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/kobj.h> 47 #include <sys/modctl.h> 48 #include <sys/atomic.h> 49 #include <sys/policy.h> 50 #include <sys/priv.h> 51 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/kmem.h> 55 #include <sys/sdt.h> 56 #include <sys/socket.h> 57 #include <sys/vtrace.h> 58 #include <sys/isa_defs.h> 59 #include <sys/mac.h> 60 #include <sys/mac_client.h> 61 #include <net/if.h> 62 #include <net/if_arp.h> 63 #include <net/route.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <net/if_dl.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/mib2.h> 71 #include <inet/nd.h> 72 #include <inet/arp.h> 73 #include <inet/snmpcom.h> 74 #include <inet/kstatcom.h> 75 76 #include <netinet/igmp_var.h> 77 #include <netinet/ip6.h> 78 #include <netinet/icmp6.h> 79 #include <netinet/sctp.h> 80 81 #include <inet/ip.h> 82 #include <inet/ip_impl.h> 83 #include <inet/ip6.h> 84 #include <inet/ip6_asp.h> 85 #include <inet/optcom.h> 86 #include <inet/tcp.h> 87 #include <inet/tcp_impl.h> 88 #include <inet/ip_multi.h> 89 #include <inet/ip_if.h> 90 #include <inet/ip_ire.h> 91 #include <inet/ip_ftable.h> 92 #include <inet/ip_rts.h> 93 #include <inet/ip_ndp.h> 94 #include <inet/ip_listutils.h> 95 #include <netinet/igmp.h> 96 #include <netinet/ip_mroute.h> 97 #include <inet/ipp_common.h> 98 99 #include <net/pfkeyv2.h> 100 #include <inet/sadb.h> 101 #include <inet/ipsec_impl.h> 102 #include <inet/ipdrop.h> 103 #include <inet/ip_netinfo.h> 104 #include <inet/ilb_ip.h> 105 #include <sys/squeue_impl.h> 106 #include <sys/squeue.h> 107 108 #include <sys/ethernet.h> 109 #include <net/if_types.h> 110 #include <sys/cpuvar.h> 111 112 #include <ipp/ipp.h> 113 #include <ipp/ipp_impl.h> 114 #include <ipp/ipgpc/ipgpc.h> 115 116 #include <sys/pattr.h> 117 #include <inet/ipclassifier.h> 118 #include <inet/sctp_ip.h> 119 #include <inet/sctp/sctp_impl.h> 120 #include <inet/udp_impl.h> 121 #include <sys/sunddi.h> 122 123 #include <sys/tsol/label.h> 124 #include <sys/tsol/tnet.h> 125 126 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */ 127 128 #ifdef DEBUG 129 extern boolean_t skip_sctp_cksum; 130 #endif 131 132 static void ip_input_local_v4(ire_t *, mblk_t *, ipha_t *, 133 ip_recv_attr_t *); 134 135 static void ip_input_broadcast_v4(ire_t *, mblk_t *, ipha_t *, 136 ip_recv_attr_t *); 137 static void ip_input_multicast_v4(ire_t *, mblk_t *, ipha_t *, 138 ip_recv_attr_t *); 139 140 #pragma inline(ip_input_common_v4, ip_input_local_v4, ip_forward_xmit_v4) 141 142 /* 143 * Direct read side procedure capable of dealing with chains. GLDv3 based 144 * drivers call this function directly with mblk chains while STREAMS 145 * read side procedure ip_rput() calls this for single packet with ip_ring 146 * set to NULL to process one packet at a time. 147 * 148 * The ill will always be valid if this function is called directly from 149 * the driver. 150 * 151 * If this chain is part of a VLAN stream, then the VLAN tag is 152 * stripped from the MAC header before being delivered to this 153 * function. 154 * 155 * If the IP header in packet is not 32-bit aligned, every message in the 156 * chain will be aligned before further operations. This is required on SPARC 157 * platform. 158 */ 159 void 160 ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 161 struct mac_header_info_s *mhip) 162 { 163 (void) ip_input_common_v4(ill, ip_ring, mp_chain, mhip, NULL, NULL, 164 NULL); 165 } 166 167 /* 168 * ip_accept_tcp() - This function is called by the squeue when it retrieves 169 * a chain of packets in the poll mode. The packets have gone through the 170 * data link processing but not IP processing. For performance and latency 171 * reasons, the squeue wants to process the chain in line instead of feeding 172 * it back via ip_input path. 173 * 174 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v4 175 * will pass back any TCP packets matching the target sqp to 176 * ip_input_common_v4 using ira_target_sqp_mp. Other packets are handled by 177 * ip_input_v4 and ip_fanout_v4 as normal. 178 * The TCP packets that match the target squeue are returned to the caller 179 * as a b_next chain after each packet has been prepend with an mblk 180 * from ip_recv_attr_to_mblk. 181 */ 182 mblk_t * 183 ip_accept_tcp(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, 184 mblk_t *mp_chain, mblk_t **last, uint_t *cnt) 185 { 186 return (ip_input_common_v4(ill, ip_ring, mp_chain, NULL, target_sqp, 187 last, cnt)); 188 } 189 190 /* 191 * Used by ip_input and ip_accept_tcp 192 * The last three arguments are only used by ip_accept_tcp, and mhip is 193 * only used by ip_input. 194 */ 195 mblk_t * 196 ip_input_common_v4(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 197 struct mac_header_info_s *mhip, squeue_t *target_sqp, 198 mblk_t **last, uint_t *cnt) 199 { 200 mblk_t *mp; 201 ipha_t *ipha; 202 ip_recv_attr_t iras; /* Receive attributes */ 203 rtc_t rtc; 204 iaflags_t chain_flags = 0; /* Fixed for chain */ 205 mblk_t *ahead = NULL; /* Accepted head */ 206 mblk_t *atail = NULL; /* Accepted tail */ 207 uint_t acnt = 0; /* Accepted count */ 208 209 ASSERT(mp_chain != NULL); 210 ASSERT(ill != NULL); 211 212 /* These ones do not change as we loop over packets */ 213 iras.ira_ill = iras.ira_rill = ill; 214 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 215 iras.ira_rifindex = iras.ira_ruifindex; 216 iras.ira_sqp = NULL; 217 iras.ira_ring = ip_ring; 218 /* For ECMP and outbound transmit ring selection */ 219 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring); 220 221 iras.ira_target_sqp = target_sqp; 222 iras.ira_target_sqp_mp = NULL; 223 if (target_sqp != NULL) 224 chain_flags |= IRAF_TARGET_SQP; 225 226 /* 227 * We try to have a mhip pointer when possible, but 228 * it might be NULL in some cases. In those cases we 229 * have to assume unicast. 230 */ 231 iras.ira_mhip = mhip; 232 iras.ira_flags = 0; 233 if (mhip != NULL) { 234 switch (mhip->mhi_dsttype) { 235 case MAC_ADDRTYPE_MULTICAST : 236 chain_flags |= IRAF_L2DST_MULTICAST; 237 break; 238 case MAC_ADDRTYPE_BROADCAST : 239 chain_flags |= IRAF_L2DST_BROADCAST; 240 break; 241 } 242 } 243 244 /* 245 * Initialize the one-element route cache. 246 * 247 * We do ire caching from one iteration to 248 * another. In the event the packet chain contains 249 * all packets from the same dst, this caching saves 250 * an ire_route_recursive for each of the succeeding 251 * packets in a packet chain. 252 */ 253 rtc.rtc_ire = NULL; 254 rtc.rtc_ipaddr = INADDR_ANY; 255 256 /* Loop over b_next */ 257 for (mp = mp_chain; mp != NULL; mp = mp_chain) { 258 mp_chain = mp->b_next; 259 mp->b_next = NULL; 260 261 ASSERT(DB_TYPE(mp) == M_DATA); 262 263 264 /* 265 * if db_ref > 1 then copymsg and free original. Packet 266 * may be changed and we do not want the other entity 267 * who has a reference to this message to trip over the 268 * changes. This is a blind change because trying to 269 * catch all places that might change the packet is too 270 * difficult. 271 * 272 * This corresponds to the fast path case, where we have 273 * a chain of M_DATA mblks. We check the db_ref count 274 * of only the 1st data block in the mblk chain. There 275 * doesn't seem to be a reason why a device driver would 276 * send up data with varying db_ref counts in the mblk 277 * chain. In any case the Fast path is a private 278 * interface, and our drivers don't do such a thing. 279 * Given the above assumption, there is no need to walk 280 * down the entire mblk chain (which could have a 281 * potential performance problem) 282 * 283 * The "(DB_REF(mp) > 1)" check was moved from ip_rput() 284 * to here because of exclusive ip stacks and vnics. 285 * Packets transmitted from exclusive stack over vnic 286 * can have db_ref > 1 and when it gets looped back to 287 * another vnic in a different zone, you have ip_input() 288 * getting dblks with db_ref > 1. So if someone 289 * complains of TCP performance under this scenario, 290 * take a serious look here on the impact of copymsg(). 291 */ 292 if (DB_REF(mp) > 1) { 293 if ((mp = ip_fix_dbref(mp, &iras)) == NULL) { 294 /* mhip might point into 1st packet in chain */ 295 iras.ira_mhip = NULL; 296 continue; 297 } 298 } 299 300 /* 301 * IP header ptr not aligned? 302 * OR IP header not complete in first mblk 303 */ 304 ipha = (ipha_t *)mp->b_rptr; 305 if (!OK_32PTR(ipha) || MBLKL(mp) < IP_SIMPLE_HDR_LENGTH) { 306 mp = ip_check_and_align_header(mp, IP_SIMPLE_HDR_LENGTH, 307 &iras); 308 if (mp == NULL) { 309 /* mhip might point into 1st packet in chain */ 310 iras.ira_mhip = NULL; 311 continue; 312 } 313 ipha = (ipha_t *)mp->b_rptr; 314 } 315 316 /* Protect against a mix of Ethertypes and IP versions */ 317 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 318 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 319 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 320 freemsg(mp); 321 /* mhip might point into 1st packet in the chain. */ 322 iras.ira_mhip = NULL; 323 continue; 324 } 325 326 /* 327 * Check for Martian addrs; we have to explicitly 328 * test for for zero dst since this is also used as 329 * an indication that the rtc is not used. 330 */ 331 if (ipha->ipha_dst == INADDR_ANY) { 332 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 333 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 334 freemsg(mp); 335 /* mhip might point into 1st packet in the chain. */ 336 iras.ira_mhip = NULL; 337 continue; 338 } 339 340 /* 341 * Keep L2SRC from a previous packet in chain since mhip 342 * might point into an earlier packet in the chain. 343 * Keep IRAF_VERIFIED_SRC to avoid redoing broadcast 344 * source check in forwarding path. 345 */ 346 chain_flags |= (iras.ira_flags & 347 (IRAF_L2SRC_SET|IRAF_VERIFIED_SRC)); 348 349 iras.ira_flags = IRAF_IS_IPV4 | IRAF_VERIFY_IP_CKSUM | 350 IRAF_VERIFY_ULP_CKSUM | chain_flags; 351 iras.ira_free_flags = 0; 352 iras.ira_cred = NULL; 353 iras.ira_cpid = NOPID; 354 iras.ira_tsl = NULL; 355 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */ 356 357 /* 358 * We must count all incoming packets, even if they end 359 * up being dropped later on. Defer counting bytes until 360 * we have the whole IP header in first mblk. 361 */ 362 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 363 364 iras.ira_pktlen = ntohs(ipha->ipha_length); 365 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 366 iras.ira_pktlen); 367 368 /* 369 * Call one of: 370 * ill_input_full_v4 371 * ill_input_short_v4 372 * The former is used in unusual cases. See ill_set_inputfn(). 373 */ 374 (*ill->ill_inputfn)(mp, ipha, &ipha->ipha_dst, &iras, &rtc); 375 376 /* Any references to clean up? No hold on ira_ill */ 377 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 378 ira_cleanup(&iras, B_FALSE); 379 380 if (iras.ira_target_sqp_mp != NULL) { 381 /* Better be called from ip_accept_tcp */ 382 ASSERT(target_sqp != NULL); 383 384 /* Found one packet to accept */ 385 mp = iras.ira_target_sqp_mp; 386 iras.ira_target_sqp_mp = NULL; 387 ASSERT(ip_recv_attr_is_mblk(mp)); 388 389 if (atail != NULL) 390 atail->b_next = mp; 391 else 392 ahead = mp; 393 atail = mp; 394 acnt++; 395 mp = NULL; 396 } 397 /* mhip might point into 1st packet in the chain. */ 398 iras.ira_mhip = NULL; 399 } 400 /* Any remaining references to the route cache? */ 401 if (rtc.rtc_ire != NULL) { 402 ASSERT(rtc.rtc_ipaddr != INADDR_ANY); 403 ire_refrele(rtc.rtc_ire); 404 } 405 406 if (ahead != NULL) { 407 /* Better be called from ip_accept_tcp */ 408 ASSERT(target_sqp != NULL); 409 *last = atail; 410 *cnt = acnt; 411 return (ahead); 412 } 413 414 return (NULL); 415 } 416 417 /* 418 * This input function is used when 419 * - is_system_labeled() 420 * - CGTP filtering 421 * - DHCP unicast before we have an IP address configured 422 * - there is an listener for IPPROTO_RSVP 423 */ 424 void 425 ill_input_full_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, 426 ip_recv_attr_t *ira, rtc_t *rtc) 427 { 428 ipha_t *ipha = (ipha_t *)iph_arg; 429 ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg; 430 ill_t *ill = ira->ira_ill; 431 ip_stack_t *ipst = ill->ill_ipst; 432 int cgtp_flt_pkt; 433 434 ASSERT(ira->ira_tsl == NULL); 435 436 /* 437 * Attach any necessary label information to 438 * this packet 439 */ 440 if (is_system_labeled()) { 441 ira->ira_flags |= IRAF_SYSTEM_LABELED; 442 443 /* 444 * This updates ira_cred, ira_tsl and ira_free_flags based 445 * on the label. 446 */ 447 if (!tsol_get_pkt_label(mp, IPV4_VERSION, ira)) { 448 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 449 ip_drop_input("ipIfStatsInDiscards", mp, ill); 450 freemsg(mp); 451 return; 452 } 453 /* Note that ira_tsl can be NULL here. */ 454 455 /* tsol_get_pkt_label sometimes does pullupmsg */ 456 ipha = (ipha_t *)mp->b_rptr; 457 } 458 459 /* 460 * Invoke the CGTP (multirouting) filtering module to process 461 * the incoming packet. Packets identified as duplicates 462 * must be discarded. Filtering is active only if the 463 * the ip_cgtp_filter ndd variable is non-zero. 464 */ 465 cgtp_flt_pkt = CGTP_IP_PKT_NOT_CGTP; 466 if (ipst->ips_ip_cgtp_filter && 467 ipst->ips_ip_cgtp_filter_ops != NULL) { 468 netstackid_t stackid; 469 470 stackid = ipst->ips_netstack->netstack_stackid; 471 /* 472 * CGTP and IPMP are mutually exclusive so 473 * phyint_ifindex is fine here. 474 */ 475 cgtp_flt_pkt = 476 ipst->ips_ip_cgtp_filter_ops->cfo_filter(stackid, 477 ill->ill_phyint->phyint_ifindex, mp); 478 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 479 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill); 480 freemsg(mp); 481 return; 482 } 483 } 484 485 /* 486 * Brutal hack for DHCPv4 unicast: RFC2131 allows a DHCP 487 * server to unicast DHCP packets to a DHCP client using the 488 * IP address it is offering to the client. This can be 489 * disabled through the "broadcast bit", but not all DHCP 490 * servers honor that bit. Therefore, to interoperate with as 491 * many DHCP servers as possible, the DHCP client allows the 492 * server to unicast, but we treat those packets as broadcast 493 * here. Note that we don't rewrite the packet itself since 494 * (a) that would mess up the checksums and (b) the DHCP 495 * client conn is bound to INADDR_ANY so ip_fanout_udp() will 496 * hand it the packet regardless. 497 */ 498 if (ill->ill_dhcpinit != 0 && 499 ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION && 500 ipha->ipha_protocol == IPPROTO_UDP) { 501 udpha_t *udpha; 502 503 ipha = ip_pullup(mp, sizeof (ipha_t) + sizeof (udpha_t), ira); 504 if (ipha == NULL) { 505 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 506 ip_drop_input("ipIfStatsInDiscards - dhcp", mp, ill); 507 freemsg(mp); 508 return; 509 } 510 /* Reload since pullupmsg() can change b_rptr. */ 511 udpha = (udpha_t *)&ipha[1]; 512 513 if (ntohs(udpha->uha_dst_port) == IPPORT_BOOTPC) { 514 DTRACE_PROBE2(ip4__dhcpinit__pkt, ill_t *, ill, 515 mblk_t *, mp); 516 /* 517 * This assumes that we deliver to all conns for 518 * multicast and broadcast packets. 519 */ 520 nexthop = INADDR_BROADCAST; 521 ira->ira_flags |= IRAF_DHCP_UNICAST; 522 } 523 } 524 525 /* 526 * If rsvpd is running, let RSVP daemon handle its processing 527 * and forwarding of RSVP multicast/unicast packets. 528 * If rsvpd is not running but mrouted is running, RSVP 529 * multicast packets are forwarded as multicast traffic 530 * and RSVP unicast packets are forwarded by unicast router. 531 * If neither rsvpd nor mrouted is running, RSVP multicast 532 * packets are not forwarded, but the unicast packets are 533 * forwarded like unicast traffic. 534 */ 535 if (ipha->ipha_protocol == IPPROTO_RSVP && 536 ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) { 537 /* RSVP packet and rsvpd running. Treat as ours */ 538 ip2dbg(("ip_input: RSVP for us: 0x%x\n", ntohl(nexthop))); 539 /* 540 * We use a multicast address to get the packet to 541 * ire_recv_multicast_v4. There will not be a membership 542 * check since we set IRAF_RSVP 543 */ 544 nexthop = htonl(INADDR_UNSPEC_GROUP); 545 ira->ira_flags |= IRAF_RSVP; 546 } 547 548 ill_input_short_v4(mp, ipha, &nexthop, ira, rtc); 549 } 550 551 /* 552 * This is the tail-end of the full receive side packet handling. 553 * It can be used directly when the configuration is simple. 554 */ 555 void 556 ill_input_short_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, 557 ip_recv_attr_t *ira, rtc_t *rtc) 558 { 559 ire_t *ire; 560 uint_t opt_len; 561 ill_t *ill = ira->ira_ill; 562 ip_stack_t *ipst = ill->ill_ipst; 563 uint_t pkt_len; 564 ssize_t len; 565 ipha_t *ipha = (ipha_t *)iph_arg; 566 ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg; 567 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; 568 uint_t irr_flags; 569 #define rptr ((uchar_t *)ipha) 570 571 ASSERT(DB_TYPE(mp) == M_DATA); 572 573 /* 574 * The following test for loopback is faster than 575 * IP_LOOPBACK_ADDR(), because it avoids any bitwise 576 * operations. 577 * Note that these addresses are always in network byte order 578 */ 579 if (((*(uchar_t *)&ipha->ipha_dst) == IN_LOOPBACKNET) || 580 ((*(uchar_t *)&ipha->ipha_src) == IN_LOOPBACKNET)) { 581 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 582 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 583 freemsg(mp); 584 return; 585 } 586 587 len = mp->b_wptr - rptr; 588 pkt_len = ira->ira_pktlen; 589 590 /* multiple mblk or too short */ 591 len -= pkt_len; 592 if (len != 0) { 593 mp = ip_check_length(mp, rptr, len, pkt_len, 594 IP_SIMPLE_HDR_LENGTH, ira); 595 if (mp == NULL) 596 return; 597 ipha = (ipha_t *)mp->b_rptr; 598 } 599 600 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 601 ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL, 602 int, 0); 603 604 /* 605 * The event for packets being received from a 'physical' 606 * interface is placed after validation of the source and/or 607 * destination address as being local so that packets can be 608 * redirected to loopback addresses using ipnat. 609 */ 610 DTRACE_PROBE4(ip4__physical__in__start, 611 ill_t *, ill, ill_t *, NULL, 612 ipha_t *, ipha, mblk_t *, mp); 613 614 if (HOOKS4_INTERESTED_PHYSICAL_IN(ipst)) { 615 int ll_multicast = 0; 616 int error; 617 ipaddr_t orig_dst = ipha->ipha_dst; 618 619 if (ira->ira_flags & IRAF_L2DST_MULTICAST) 620 ll_multicast = HPE_MULTICAST; 621 else if (ira->ira_flags & IRAF_L2DST_BROADCAST) 622 ll_multicast = HPE_BROADCAST; 623 624 FW_HOOKS(ipst->ips_ip4_physical_in_event, 625 ipst->ips_ipv4firewall_physical_in, 626 ill, NULL, ipha, mp, mp, ll_multicast, ipst, error); 627 628 DTRACE_PROBE1(ip4__physical__in__end, mblk_t *, mp); 629 630 if (mp == NULL) 631 return; 632 /* The length could have changed */ 633 ipha = (ipha_t *)mp->b_rptr; 634 ira->ira_pktlen = ntohs(ipha->ipha_length); 635 pkt_len = ira->ira_pktlen; 636 637 /* 638 * In case the destination changed we override any previous 639 * change to nexthop. 640 */ 641 if (orig_dst != ipha->ipha_dst) 642 nexthop = ipha->ipha_dst; 643 if (nexthop == INADDR_ANY) { 644 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 645 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 646 freemsg(mp); 647 return; 648 } 649 } 650 651 if (ipst->ips_ip4_observe.he_interested) { 652 zoneid_t dzone; 653 654 /* 655 * On the inbound path the src zone will be unknown as 656 * this packet has come from the wire. 657 */ 658 dzone = ip_get_zoneid_v4(nexthop, mp, ira, ALL_ZONES); 659 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst); 660 } 661 662 /* 663 * If the packet originated from a same-machine sender or 664 * there is a good HW IP header checksum, we clear the need 665 * look at the IP header checksum. 666 */ 667 if (((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) && 668 ILL_HCKSUM_CAPABLE(ill) && dohwcksum)) { 669 /* Header checksum was ok. Clear the flag */ 670 DB_CKSUMFLAGS(mp) &= ~HCK_IPV4_HDRCKSUM; 671 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 672 } 673 674 /* 675 * Here we check to see if we machine is setup as 676 * L3 loadbalancer and if the incoming packet is for a VIP 677 * 678 * Check the following: 679 * - there is at least a rule 680 * - protocol of the packet is supported 681 */ 682 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ipha->ipha_protocol)) { 683 ipaddr_t lb_dst; 684 int lb_ret; 685 686 /* For convenience, we pull up the mblk. */ 687 if (mp->b_cont != NULL) { 688 if (pullupmsg(mp, -1) == 0) { 689 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 690 ip_drop_input("ipIfStatsInDiscards - pullupmsg", 691 mp, ill); 692 freemsg(mp); 693 return; 694 } 695 ipha = (ipha_t *)mp->b_rptr; 696 } 697 698 /* 699 * We just drop all fragments going to any VIP, at 700 * least for now.... 701 */ 702 if (ntohs(ipha->ipha_fragment_offset_and_flags) & 703 (IPH_MF | IPH_OFFSET)) { 704 if (!ilb_rule_match_vip_v4(ilbs, nexthop, NULL)) { 705 goto after_ilb; 706 } 707 708 ILB_KSTAT_UPDATE(ilbs, ip_frag_in, 1); 709 ILB_KSTAT_UPDATE(ilbs, ip_frag_dropped, 1); 710 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 711 ip_drop_input("ILB fragment", mp, ill); 712 freemsg(mp); 713 return; 714 } 715 lb_ret = ilb_check_v4(ilbs, ill, mp, ipha, ipha->ipha_protocol, 716 (uint8_t *)ipha + IPH_HDR_LENGTH(ipha), &lb_dst); 717 718 if (lb_ret == ILB_DROPPED) { 719 /* Is this the right counter to increase? */ 720 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 721 ip_drop_input("ILB_DROPPED", mp, ill); 722 freemsg(mp); 723 return; 724 } 725 if (lb_ret == ILB_BALANCED) { 726 /* Set the dst to that of the chosen server */ 727 nexthop = lb_dst; 728 DB_CKSUMFLAGS(mp) = 0; 729 } 730 } 731 732 after_ilb: 733 opt_len = ipha->ipha_version_and_hdr_length - IP_SIMPLE_HDR_VERSION; 734 ira->ira_ip_hdr_length = IP_SIMPLE_HDR_LENGTH; 735 if (opt_len != 0) { 736 int error = 0; 737 738 ira->ira_ip_hdr_length += (opt_len << 2); 739 ira->ira_flags |= IRAF_IPV4_OPTIONS; 740 741 /* IP Options present! Validate the length. */ 742 mp = ip_check_optlen(mp, ipha, opt_len, pkt_len, ira); 743 if (mp == NULL) 744 return; 745 746 /* Might have changed */ 747 ipha = (ipha_t *)mp->b_rptr; 748 749 /* Verify IP header checksum before parsing the options */ 750 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && 751 ip_csum_hdr(ipha)) { 752 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 753 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 754 freemsg(mp); 755 return; 756 } 757 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 758 759 /* 760 * Go off to ip_input_options which returns the next hop 761 * destination address, which may have been affected 762 * by source routing. 763 */ 764 IP_STAT(ipst, ip_opt); 765 766 nexthop = ip_input_options(ipha, nexthop, mp, ira, &error); 767 if (error != 0) { 768 /* 769 * An ICMP error has been sent and the packet has 770 * been dropped. 771 */ 772 return; 773 } 774 } 775 776 if (ill->ill_flags & ILLF_ROUTER) 777 irr_flags = IRR_ALLOCATE; 778 else 779 irr_flags = IRR_NONE; 780 781 /* Can not use route cache with TX since the labels can differ */ 782 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 783 if (CLASSD(nexthop)) { 784 ire = ire_multicast(ill); 785 } else { 786 /* Match destination and label */ 787 ire = ire_route_recursive_v4(nexthop, 0, NULL, 788 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR, 789 irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, 790 NULL); 791 } 792 /* Update the route cache so we do the ire_refrele */ 793 ASSERT(ire != NULL); 794 if (rtc->rtc_ire != NULL) 795 ire_refrele(rtc->rtc_ire); 796 rtc->rtc_ire = ire; 797 rtc->rtc_ipaddr = nexthop; 798 } else if (nexthop == rtc->rtc_ipaddr && rtc->rtc_ire != NULL) { 799 /* Use the route cache */ 800 ire = rtc->rtc_ire; 801 } else { 802 /* Update the route cache */ 803 if (CLASSD(nexthop)) { 804 ire = ire_multicast(ill); 805 } else { 806 /* Just match the destination */ 807 ire = ire_route_recursive_dstonly_v4(nexthop, irr_flags, 808 ira->ira_xmit_hint, ipst); 809 } 810 ASSERT(ire != NULL); 811 if (rtc->rtc_ire != NULL) 812 ire_refrele(rtc->rtc_ire); 813 rtc->rtc_ire = ire; 814 rtc->rtc_ipaddr = nexthop; 815 } 816 817 ire->ire_ib_pkt_count++; 818 819 /* 820 * Based on ire_type and ire_flags call one of: 821 * ire_recv_local_v4 - for IRE_LOCAL 822 * ire_recv_loopback_v4 - for IRE_LOOPBACK 823 * ire_recv_multirt_v4 - if RTF_MULTIRT 824 * ire_recv_noroute_v4 - if RTF_REJECT or RTF_BLACHOLE 825 * ire_recv_multicast_v4 - for IRE_MULTICAST 826 * ire_recv_broadcast_v4 - for IRE_BROADCAST 827 * ire_recv_noaccept_v4 - for ire_noaccept ones 828 * ire_recv_forward_v4 - for the rest. 829 */ 830 (*ire->ire_recvfn)(ire, mp, ipha, ira); 831 } 832 #undef rptr 833 834 /* 835 * ire_recvfn for IREs that need forwarding 836 */ 837 void 838 ire_recv_forward_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 839 { 840 ipha_t *ipha = (ipha_t *)iph_arg; 841 ill_t *ill = ira->ira_ill; 842 ip_stack_t *ipst = ill->ill_ipst; 843 ill_t *dst_ill; 844 nce_t *nce; 845 ipaddr_t src = ipha->ipha_src; 846 uint32_t added_tx_len; 847 uint32_t mtu, iremtu; 848 849 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 850 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 851 ip_drop_input("l2 multicast not forwarded", mp, ill); 852 freemsg(mp); 853 return; 854 } 855 856 if (!(ill->ill_flags & ILLF_ROUTER) && !ip_source_routed(ipha, ipst)) { 857 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 858 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 859 freemsg(mp); 860 return; 861 } 862 863 /* 864 * Either ire_nce_capable or ire_dep_parent would be set for the IRE 865 * when it is found by ire_route_recursive, but that some other thread 866 * could have changed the routes with the effect of clearing 867 * ire_dep_parent. In that case we'd end up dropping the packet, or 868 * finding a new nce below. 869 * Get, allocate, or update the nce. 870 * We get a refhold on ire_nce_cache as a result of this to avoid races 871 * where ire_nce_cache is deleted. 872 * 873 * This ensures that we don't forward if the interface is down since 874 * ipif_down removes all the nces. 875 */ 876 mutex_enter(&ire->ire_lock); 877 nce = ire->ire_nce_cache; 878 if (nce == NULL) { 879 /* Not yet set up - try to set one up */ 880 mutex_exit(&ire->ire_lock); 881 (void) ire_revalidate_nce(ire); 882 mutex_enter(&ire->ire_lock); 883 nce = ire->ire_nce_cache; 884 if (nce == NULL) { 885 mutex_exit(&ire->ire_lock); 886 /* The ire_dep_parent chain went bad, or no memory */ 887 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 888 ip_drop_input("No ire_dep_parent", mp, ill); 889 freemsg(mp); 890 return; 891 } 892 } 893 nce_refhold(nce); 894 mutex_exit(&ire->ire_lock); 895 896 if (nce->nce_is_condemned) { 897 nce_t *nce1; 898 899 nce1 = ire_handle_condemned_nce(nce, ire, ipha, NULL, B_FALSE); 900 nce_refrele(nce); 901 if (nce1 == NULL) { 902 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 903 ip_drop_input("No nce", mp, ill); 904 freemsg(mp); 905 return; 906 } 907 nce = nce1; 908 } 909 dst_ill = nce->nce_ill; 910 911 /* 912 * Unless we are forwarding, drop the packet. 913 * We have to let source routed packets through if they go out 914 * the same interface i.e., they are 'ping -l' packets. 915 */ 916 if (!(dst_ill->ill_flags & ILLF_ROUTER) && 917 !(ip_source_routed(ipha, ipst) && dst_ill == ill)) { 918 if (ip_source_routed(ipha, ipst)) { 919 ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", mp, ill); 920 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira); 921 nce_refrele(nce); 922 return; 923 } 924 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 925 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 926 freemsg(mp); 927 nce_refrele(nce); 928 return; 929 } 930 931 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) { 932 ipaddr_t dst = ipha->ipha_dst; 933 934 ire->ire_ib_pkt_count--; 935 /* 936 * Should only use IREs that are visible from the 937 * global zone for forwarding. 938 * Take a source route into account the same way as ip_input 939 * did. 940 */ 941 if (ira->ira_flags & IRAF_IPV4_OPTIONS) { 942 int error = 0; 943 944 dst = ip_input_options(ipha, dst, mp, ira, &error); 945 ASSERT(error == 0); /* ip_input checked */ 946 } 947 ire = ire_route_recursive_v4(dst, 0, NULL, GLOBAL_ZONEID, 948 ira->ira_tsl, MATCH_IRE_SECATTR, 949 (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE, 950 ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 951 ire->ire_ib_pkt_count++; 952 (*ire->ire_recvfn)(ire, mp, ipha, ira); 953 ire_refrele(ire); 954 nce_refrele(nce); 955 return; 956 } 957 958 /* 959 * ipIfStatsHCInForwDatagrams should only be increment if there 960 * will be an attempt to forward the packet, which is why we 961 * increment after the above condition has been checked. 962 */ 963 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 964 965 /* Initiate Read side IPPF processing */ 966 if (IPP_ENABLED(IPP_FWD_IN, ipst)) { 967 /* ip_process translates an IS_UNDER_IPMP */ 968 mp = ip_process(IPP_FWD_IN, mp, ill, ill); 969 if (mp == NULL) { 970 /* ip_drop_packet and MIB done */ 971 ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred " 972 "during IPPF processing\n")); 973 nce_refrele(nce); 974 return; 975 } 976 } 977 978 DTRACE_PROBE4(ip4__forwarding__start, 979 ill_t *, ill, ill_t *, dst_ill, ipha_t *, ipha, mblk_t *, mp); 980 981 if (HOOKS4_INTERESTED_FORWARDING(ipst)) { 982 int error; 983 984 FW_HOOKS(ipst->ips_ip4_forwarding_event, 985 ipst->ips_ipv4firewall_forwarding, 986 ill, dst_ill, ipha, mp, mp, 0, ipst, error); 987 988 DTRACE_PROBE1(ip4__forwarding__end, mblk_t *, mp); 989 990 if (mp == NULL) { 991 nce_refrele(nce); 992 return; 993 } 994 /* 995 * Even if the destination was changed by the filter we use the 996 * forwarding decision that was made based on the address 997 * in ip_input. 998 */ 999 1000 /* Might have changed */ 1001 ipha = (ipha_t *)mp->b_rptr; 1002 ira->ira_pktlen = ntohs(ipha->ipha_length); 1003 } 1004 1005 /* Packet is being forwarded. Turning off hwcksum flag. */ 1006 DB_CKSUMFLAGS(mp) = 0; 1007 1008 /* 1009 * Martian Address Filtering [RFC 1812, Section 5.3.7] 1010 * The loopback address check for both src and dst has already 1011 * been checked in ip_input 1012 * In the future one can envision adding RPF checks using number 3. 1013 * If we already checked the same source address we can skip this. 1014 */ 1015 if (!(ira->ira_flags & IRAF_VERIFIED_SRC) || 1016 src != ira->ira_verified_src) { 1017 switch (ipst->ips_src_check) { 1018 case 0: 1019 break; 1020 case 2: 1021 if (ip_type_v4(src, ipst) == IRE_BROADCAST) { 1022 BUMP_MIB(ill->ill_ip_mib, 1023 ipIfStatsForwProhibits); 1024 BUMP_MIB(ill->ill_ip_mib, 1025 ipIfStatsInAddrErrors); 1026 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1027 freemsg(mp); 1028 nce_refrele(nce); 1029 return; 1030 } 1031 /* FALLTHRU */ 1032 1033 case 1: 1034 if (CLASSD(src)) { 1035 BUMP_MIB(ill->ill_ip_mib, 1036 ipIfStatsForwProhibits); 1037 BUMP_MIB(ill->ill_ip_mib, 1038 ipIfStatsInAddrErrors); 1039 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1040 freemsg(mp); 1041 nce_refrele(nce); 1042 return; 1043 } 1044 break; 1045 } 1046 /* Remember for next packet */ 1047 ira->ira_flags |= IRAF_VERIFIED_SRC; 1048 ira->ira_verified_src = src; 1049 } 1050 1051 /* 1052 * Check if packet is going out the same link on which it arrived. 1053 * Means we might need to send a redirect. 1054 */ 1055 if (IS_ON_SAME_LAN(dst_ill, ill) && ipst->ips_ip_g_send_redirects) { 1056 ip_send_potential_redirect_v4(mp, ipha, ire, ira); 1057 } 1058 1059 added_tx_len = 0; 1060 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 1061 mblk_t *mp1; 1062 uint32_t old_pkt_len = ira->ira_pktlen; 1063 1064 /* Verify IP header checksum before adding/removing options */ 1065 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && 1066 ip_csum_hdr(ipha)) { 1067 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1068 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1069 freemsg(mp); 1070 nce_refrele(nce); 1071 return; 1072 } 1073 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 1074 1075 /* 1076 * Check if it can be forwarded and add/remove 1077 * CIPSO options as needed. 1078 */ 1079 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) { 1080 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1081 ip_drop_input("tsol_ip_forward", mp, ill); 1082 freemsg(mp); 1083 nce_refrele(nce); 1084 return; 1085 } 1086 /* 1087 * Size may have changed. Remember amount added in case 1088 * IP needs to send an ICMP too big. 1089 */ 1090 mp = mp1; 1091 ipha = (ipha_t *)mp->b_rptr; 1092 ira->ira_pktlen = ntohs(ipha->ipha_length); 1093 ira->ira_ip_hdr_length = IPH_HDR_LENGTH(ipha); 1094 if (ira->ira_pktlen > old_pkt_len) 1095 added_tx_len = ira->ira_pktlen - old_pkt_len; 1096 1097 /* Options can have been added or removed */ 1098 if (ira->ira_ip_hdr_length != IP_SIMPLE_HDR_LENGTH) 1099 ira->ira_flags |= IRAF_IPV4_OPTIONS; 1100 else 1101 ira->ira_flags &= ~IRAF_IPV4_OPTIONS; 1102 } 1103 1104 mtu = dst_ill->ill_mtu; 1105 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu) 1106 mtu = iremtu; 1107 ip_forward_xmit_v4(nce, ill, mp, ipha, ira, mtu, added_tx_len); 1108 nce_refrele(nce); 1109 } 1110 1111 /* 1112 * Used for sending out unicast and multicast packets that are 1113 * forwarded. 1114 */ 1115 void 1116 ip_forward_xmit_v4(nce_t *nce, ill_t *ill, mblk_t *mp, ipha_t *ipha, 1117 ip_recv_attr_t *ira, uint32_t mtu, uint32_t added_tx_len) 1118 { 1119 ill_t *dst_ill = nce->nce_ill; 1120 uint32_t pkt_len; 1121 uint32_t sum; 1122 iaflags_t iraflags = ira->ira_flags; 1123 ip_stack_t *ipst = ill->ill_ipst; 1124 iaflags_t ixaflags; 1125 1126 if (ipha->ipha_ttl <= 1) { 1127 /* Perhaps the checksum was bad */ 1128 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1129 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1130 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1131 freemsg(mp); 1132 return; 1133 } 1134 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1135 ip_drop_input("ICMP_TTL_EXCEEDED", mp, ill); 1136 icmp_time_exceeded(mp, ICMP_TTL_EXCEEDED, ira); 1137 return; 1138 } 1139 1140 /* 1141 * Count the forward as a hop and update the checksum 1142 * accordingly. 1143 */ 1144 ipha->ipha_ttl--; 1145 sum = (int)ipha->ipha_hdr_checksum + IP_HDR_CSUM_TTL_ADJUST; 1146 ipha->ipha_hdr_checksum = (uint16_t)(sum + (sum >> 16)); 1147 1148 /* Check if there are options to update */ 1149 if (iraflags & IRAF_IPV4_OPTIONS) { 1150 ASSERT(ipha->ipha_version_and_hdr_length != 1151 IP_SIMPLE_HDR_VERSION); 1152 ASSERT(!(iraflags & IRAF_VERIFY_IP_CKSUM)); 1153 1154 if (!ip_forward_options(mp, ipha, dst_ill, ira)) { 1155 /* ipIfStatsForwProhibits and ip_drop_input done */ 1156 return; 1157 } 1158 1159 ipha->ipha_hdr_checksum = 0; 1160 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1161 } 1162 1163 /* Initiate Write side IPPF processing before any fragmentation */ 1164 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { 1165 /* ip_process translates an IS_UNDER_IPMP */ 1166 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill); 1167 if (mp == NULL) { 1168 /* ip_drop_packet and MIB done */ 1169 ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred" \ 1170 " during IPPF processing\n")); 1171 return; 1172 } 1173 } 1174 1175 pkt_len = ira->ira_pktlen; 1176 1177 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 1178 1179 ixaflags = IXAF_IS_IPV4 | IXAF_NO_DEV_FLOW_CTL; 1180 1181 if (pkt_len > mtu) { 1182 /* 1183 * It needs fragging on its way out. If we haven't 1184 * verified the header checksum yet we do it now since 1185 * are going to put a surely good checksum in the 1186 * outgoing header, we have to make sure that it 1187 * was good coming in. 1188 */ 1189 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1190 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1191 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1192 freemsg(mp); 1193 return; 1194 } 1195 if (ipha->ipha_fragment_offset_and_flags & IPH_DF_HTONS) { 1196 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails); 1197 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill); 1198 if (iraflags & IRAF_SYSTEM_LABELED) { 1199 /* 1200 * Remove any CIPSO option added by 1201 * tsol_ip_forward, and make sure we report 1202 * a path MTU so that there 1203 * is room to add such a CIPSO option for future 1204 * packets. 1205 */ 1206 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, 1207 AF_INET); 1208 } 1209 1210 icmp_frag_needed(mp, mtu, ira); 1211 return; 1212 } 1213 1214 (void) ip_fragment_v4(mp, nce, ixaflags, pkt_len, mtu, 1215 ira->ira_xmit_hint, GLOBAL_ZONEID, 0, ip_xmit, NULL); 1216 return; 1217 } 1218 1219 ASSERT(pkt_len == ntohs(((ipha_t *)mp->b_rptr)->ipha_length)); 1220 if (iraflags & IRAF_LOOPBACK_COPY) { 1221 /* 1222 * IXAF_NO_LOOP_ZONEID is not set hence 7th arg 1223 * is don't care 1224 */ 1225 (void) ip_postfrag_loopcheck(mp, nce, 1226 ixaflags | IXAF_LOOPBACK_COPY, 1227 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1228 } else { 1229 (void) ip_xmit(mp, nce, ixaflags, pkt_len, ira->ira_xmit_hint, 1230 GLOBAL_ZONEID, 0, NULL); 1231 } 1232 } 1233 1234 /* 1235 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE, 1236 * which is what ire_route_recursive returns when there is no matching ire. 1237 * Send ICMP unreachable unless blackhole. 1238 */ 1239 void 1240 ire_recv_noroute_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1241 { 1242 ipha_t *ipha = (ipha_t *)iph_arg; 1243 ill_t *ill = ira->ira_ill; 1244 ip_stack_t *ipst = ill->ill_ipst; 1245 1246 /* Would we have forwarded this packet if we had a route? */ 1247 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 1248 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1249 ip_drop_input("l2 multicast not forwarded", mp, ill); 1250 freemsg(mp); 1251 return; 1252 } 1253 1254 if (!(ill->ill_flags & ILLF_ROUTER)) { 1255 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1256 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 1257 freemsg(mp); 1258 return; 1259 } 1260 /* 1261 * If we had a route this could have been forwarded. Count as such. 1262 * 1263 * ipIfStatsHCInForwDatagrams should only be increment if there 1264 * will be an attempt to forward the packet, which is why we 1265 * increment after the above condition has been checked. 1266 */ 1267 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 1268 1269 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1270 1271 ip_rts_change(RTM_MISS, ipha->ipha_dst, 0, 0, 0, 0, 0, 0, RTA_DST, 1272 ipst); 1273 1274 if (ire->ire_flags & RTF_BLACKHOLE) { 1275 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill); 1276 freemsg(mp); 1277 } else { 1278 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill); 1279 1280 if (ip_source_routed(ipha, ipst)) { 1281 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira); 1282 } else { 1283 icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, ira); 1284 } 1285 } 1286 } 1287 1288 /* 1289 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for 1290 * VRRP when in noaccept mode. 1291 * We silently drop the packet. ARP handles packets even if noaccept is set. 1292 */ 1293 /* ARGSUSED */ 1294 void 1295 ire_recv_noaccept_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1296 ip_recv_attr_t *ira) 1297 { 1298 ill_t *ill = ira->ira_ill; 1299 1300 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1301 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1302 freemsg(mp); 1303 } 1304 1305 /* 1306 * ire_recvfn for IRE_BROADCAST. 1307 */ 1308 void 1309 ire_recv_broadcast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1310 ip_recv_attr_t *ira) 1311 { 1312 ipha_t *ipha = (ipha_t *)iph_arg; 1313 ill_t *ill = ira->ira_ill; 1314 ill_t *dst_ill = ire->ire_ill; 1315 ip_stack_t *ipst = ill->ill_ipst; 1316 ire_t *alt_ire; 1317 nce_t *nce; 1318 ipaddr_t ipha_dst; 1319 1320 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInBcastPkts); 1321 1322 /* Tag for higher-level protocols */ 1323 ira->ira_flags |= IRAF_BROADCAST; 1324 1325 /* 1326 * Whether local or directed broadcast forwarding: don't allow 1327 * for TCP. 1328 */ 1329 if (ipha->ipha_protocol == IPPROTO_TCP) { 1330 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1331 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1332 freemsg(mp); 1333 return; 1334 } 1335 1336 /* 1337 * So that we don't end up with dups, only one ill an IPMP group is 1338 * nominated to receive broadcast traffic. 1339 * If we have no cast_ill we are liberal and accept everything. 1340 */ 1341 if (IS_UNDER_IPMP(ill)) { 1342 /* For an under ill_grp can change under lock */ 1343 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1344 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1345 ill->ill_grp->ig_cast_ill != NULL) { 1346 rw_exit(&ipst->ips_ill_g_lock); 1347 /* No MIB since this is normal operation */ 1348 ip_drop_input("not nom_cast", mp, ill); 1349 freemsg(mp); 1350 return; 1351 } 1352 rw_exit(&ipst->ips_ill_g_lock); 1353 1354 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1355 } 1356 1357 /* 1358 * After reassembly and IPsec we will need to duplicate the 1359 * broadcast packet for all matching zones on the ill. 1360 */ 1361 ira->ira_zoneid = ALL_ZONES; 1362 1363 /* 1364 * Check for directed broadcast i.e. ire->ire_ill is different than 1365 * the incoming ill. 1366 * The same broadcast address can be assigned to multiple interfaces 1367 * so have to check explicitly for that case by looking up the alt_ire 1368 */ 1369 if (dst_ill == ill && !(ire->ire_flags & RTF_MULTIRT)) { 1370 /* Reassemble on the ill on which the packet arrived */ 1371 ip_input_local_v4(ire, mp, ipha, ira); 1372 /* Restore */ 1373 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1374 return; 1375 } 1376 1377 /* Is there an IRE_BROADCAST on the incoming ill? */ 1378 ipha_dst = ((ira->ira_flags & IRAF_DHCP_UNICAST) ? INADDR_BROADCAST : 1379 ipha->ipha_dst); 1380 alt_ire = ire_ftable_lookup_v4(ipha_dst, 0, 0, IRE_BROADCAST, ill, 1381 ALL_ZONES, ira->ira_tsl, 1382 MATCH_IRE_TYPE|MATCH_IRE_ILL|MATCH_IRE_SECATTR, 0, ipst, NULL); 1383 if (alt_ire != NULL) { 1384 /* Not a directed broadcast */ 1385 /* 1386 * In the special case of multirouted broadcast 1387 * packets, we unconditionally need to "gateway" 1388 * them to the appropriate interface here so that reassembly 1389 * works. We know that the IRE_BROADCAST on cgtp0 doesn't 1390 * have RTF_MULTIRT set so we look for such an IRE in the 1391 * bucket. 1392 */ 1393 if (alt_ire->ire_flags & RTF_MULTIRT) { 1394 irb_t *irb; 1395 ire_t *ire1; 1396 1397 irb = ire->ire_bucket; 1398 irb_refhold(irb); 1399 for (ire1 = irb->irb_ire; ire1 != NULL; 1400 ire1 = ire1->ire_next) { 1401 if (IRE_IS_CONDEMNED(ire1)) 1402 continue; 1403 if (!(ire1->ire_type & IRE_BROADCAST) || 1404 (ire1->ire_flags & RTF_MULTIRT)) 1405 continue; 1406 ill = ire1->ire_ill; 1407 ill_refhold(ill); 1408 break; 1409 } 1410 irb_refrele(irb); 1411 if (ire1 != NULL) { 1412 ill_t *orig_ill = ira->ira_ill; 1413 1414 ire_refrele(alt_ire); 1415 /* Reassemble on the new ill */ 1416 ira->ira_ill = ill; 1417 ip_input_local_v4(ire, mp, ipha, ira); 1418 ill_refrele(ill); 1419 /* Restore */ 1420 ira->ira_ill = orig_ill; 1421 ira->ira_ruifindex = 1422 orig_ill->ill_phyint->phyint_ifindex; 1423 return; 1424 } 1425 } 1426 ire_refrele(alt_ire); 1427 /* Reassemble on the ill on which the packet arrived */ 1428 ip_input_local_v4(ire, mp, ipha, ira); 1429 goto done; 1430 } 1431 1432 /* 1433 * This is a directed broadcast 1434 * 1435 * If directed broadcast is allowed, then forward the packet out 1436 * the destination interface with IXAF_LOOPBACK_COPY set. That will 1437 * result in ip_input() receiving a copy of the packet on the 1438 * appropriate ill. (We could optimize this to avoid the extra trip 1439 * via ip_input(), but since directed broadcasts are normally disabled 1440 * it doesn't make sense to optimize it.) 1441 */ 1442 if (!ipst->ips_ip_g_forward_directed_bcast || 1443 (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST))) { 1444 ip_drop_input("directed broadcast not allowed", mp, ill); 1445 freemsg(mp); 1446 goto done; 1447 } 1448 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1449 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1450 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1451 freemsg(mp); 1452 goto done; 1453 } 1454 1455 /* 1456 * Clear the indication that this may have hardware 1457 * checksum as we are not using it for forwarding. 1458 */ 1459 DB_CKSUMFLAGS(mp) = 0; 1460 1461 /* 1462 * Adjust ttl to 2 (1+1 - the forward engine will decrement it by one. 1463 */ 1464 ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl + 1; 1465 ipha->ipha_hdr_checksum = 0; 1466 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1467 1468 /* 1469 * We use ip_forward_xmit to do any fragmentation. 1470 * and loopback copy on the outbound interface. 1471 * 1472 * Make it so that IXAF_LOOPBACK_COPY to be set on transmit side. 1473 */ 1474 ira->ira_flags |= IRAF_LOOPBACK_COPY; 1475 1476 nce = arp_nce_init(dst_ill, ipha->ipha_dst, IRE_BROADCAST); 1477 if (nce == NULL) { 1478 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutDiscards); 1479 ip_drop_output("No nce", mp, dst_ill); 1480 freemsg(mp); 1481 goto done; 1482 } 1483 1484 ip_forward_xmit_v4(nce, ill, mp, ipha, ira, dst_ill->ill_mc_mtu, 0); 1485 nce_refrele(nce); 1486 done: 1487 /* Restore */ 1488 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1489 } 1490 1491 /* 1492 * ire_recvfn for IRE_MULTICAST. 1493 */ 1494 void 1495 ire_recv_multicast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1496 ip_recv_attr_t *ira) 1497 { 1498 ipha_t *ipha = (ipha_t *)iph_arg; 1499 ill_t *ill = ira->ira_ill; 1500 ip_stack_t *ipst = ill->ill_ipst; 1501 1502 ASSERT(ire->ire_ill == ira->ira_ill); 1503 1504 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 1505 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen); 1506 1507 /* RSVP hook */ 1508 if (ira->ira_flags & IRAF_RSVP) 1509 goto forus; 1510 1511 /* Tag for higher-level protocols */ 1512 ira->ira_flags |= IRAF_MULTICAST; 1513 1514 /* 1515 * So that we don't end up with dups, only one ill an IPMP group is 1516 * nominated to receive multicast traffic. 1517 * If we have no cast_ill we are liberal and accept everything. 1518 */ 1519 if (IS_UNDER_IPMP(ill)) { 1520 ip_stack_t *ipst = ill->ill_ipst; 1521 1522 /* For an under ill_grp can change under lock */ 1523 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1524 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1525 ill->ill_grp->ig_cast_ill != NULL) { 1526 rw_exit(&ipst->ips_ill_g_lock); 1527 ip_drop_input("not on cast ill", mp, ill); 1528 freemsg(mp); 1529 return; 1530 } 1531 rw_exit(&ipst->ips_ill_g_lock); 1532 /* 1533 * We switch to the upper ill so that mrouter and hasmembers 1534 * can operate on upper here and in ip_input_multicast. 1535 */ 1536 ill = ipmp_ill_hold_ipmp_ill(ill); 1537 if (ill != NULL) { 1538 ASSERT(ill != ira->ira_ill); 1539 ASSERT(ire->ire_ill == ira->ira_ill); 1540 ira->ira_ill = ill; 1541 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1542 } else { 1543 ill = ira->ira_ill; 1544 } 1545 } 1546 1547 /* 1548 * Check if we are a multicast router - send ip_mforward a copy of 1549 * the packet. 1550 * Due to mroute_decap tunnels we consider forwarding packets even if 1551 * mrouted has not joined the allmulti group on this interface. 1552 */ 1553 if (ipst->ips_ip_g_mrouter) { 1554 int retval; 1555 1556 /* 1557 * Clear the indication that this may have hardware 1558 * checksum as we are not using it for forwarding. 1559 */ 1560 DB_CKSUMFLAGS(mp) = 0; 1561 1562 /* 1563 * ip_mforward helps us make these distinctions: If received 1564 * on tunnel and not IGMP, then drop. 1565 * If IGMP packet, then don't check membership 1566 * If received on a phyint and IGMP or PIM, then 1567 * don't check membership 1568 */ 1569 retval = ip_mforward(mp, ira); 1570 /* ip_mforward updates mib variables if needed */ 1571 1572 switch (retval) { 1573 case 0: 1574 /* 1575 * pkt is okay and arrived on phyint. 1576 * 1577 * If we are running as a multicast router 1578 * we need to see all IGMP and/or PIM packets. 1579 */ 1580 if ((ipha->ipha_protocol == IPPROTO_IGMP) || 1581 (ipha->ipha_protocol == IPPROTO_PIM)) { 1582 goto forus; 1583 } 1584 break; 1585 case -1: 1586 /* pkt is mal-formed, toss it */ 1587 freemsg(mp); 1588 goto done; 1589 case 1: 1590 /* 1591 * pkt is okay and arrived on a tunnel 1592 * 1593 * If we are running a multicast router 1594 * we need to see all igmp packets. 1595 */ 1596 if (ipha->ipha_protocol == IPPROTO_IGMP) { 1597 goto forus; 1598 } 1599 ip_drop_input("Multicast on tunnel ignored", mp, ill); 1600 freemsg(mp); 1601 goto done; 1602 } 1603 } 1604 1605 /* 1606 * Check if we have members on this ill. This is not necessary for 1607 * correctness because even if the NIC/GLD had a leaky filter, we 1608 * filter before passing to each conn_t. 1609 */ 1610 if (!ill_hasmembers_v4(ill, ipha->ipha_dst)) { 1611 /* 1612 * Nobody interested 1613 * 1614 * This might just be caused by the fact that 1615 * multiple IP Multicast addresses map to the same 1616 * link layer multicast - no need to increment counter! 1617 */ 1618 ip_drop_input("Multicast with no members", mp, ill); 1619 freemsg(mp); 1620 goto done; 1621 } 1622 forus: 1623 ip2dbg(("ire_recv_multicast_v4: multicast for us: 0x%x\n", 1624 ntohl(ipha->ipha_dst))); 1625 1626 /* 1627 * After reassembly and IPsec we will need to duplicate the 1628 * multicast packet for all matching zones on the ill. 1629 */ 1630 ira->ira_zoneid = ALL_ZONES; 1631 1632 /* Reassemble on the ill on which the packet arrived */ 1633 ip_input_local_v4(ire, mp, ipha, ira); 1634 done: 1635 if (ill != ire->ire_ill) { 1636 ill_refrele(ill); 1637 ira->ira_ill = ire->ire_ill; 1638 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 1639 } 1640 } 1641 1642 /* 1643 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT. 1644 * Drop packets since we don't forward out multirt routes. 1645 */ 1646 /* ARGSUSED */ 1647 void 1648 ire_recv_multirt_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1649 { 1650 ill_t *ill = ira->ira_ill; 1651 1652 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1653 ip_drop_input("Not forwarding out MULTIRT", mp, ill); 1654 freemsg(mp); 1655 } 1656 1657 /* 1658 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK 1659 * has rewritten the packet to have a loopback destination address (We 1660 * filter out packet with a loopback destination from arriving over the wire). 1661 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID. 1662 */ 1663 void 1664 ire_recv_loopback_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1665 { 1666 ipha_t *ipha = (ipha_t *)iph_arg; 1667 ill_t *ill = ira->ira_ill; 1668 ill_t *ire_ill = ire->ire_ill; 1669 1670 ira->ira_zoneid = GLOBAL_ZONEID; 1671 1672 /* Switch to the lo0 ill for further processing */ 1673 if (ire_ill != ill) { 1674 /* 1675 * Update ira_ill to be the ILL on which the IP address 1676 * is hosted. 1677 * No need to hold the ill since we have a hold on the ire 1678 */ 1679 ASSERT(ira->ira_ill == ira->ira_rill); 1680 ira->ira_ill = ire_ill; 1681 1682 ip_input_local_v4(ire, mp, ipha, ira); 1683 1684 /* Restore */ 1685 ASSERT(ira->ira_ill == ire_ill); 1686 ira->ira_ill = ill; 1687 return; 1688 1689 } 1690 ip_input_local_v4(ire, mp, ipha, ira); 1691 } 1692 1693 /* 1694 * ire_recvfn for IRE_LOCAL. 1695 */ 1696 void 1697 ire_recv_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1698 { 1699 ipha_t *ipha = (ipha_t *)iph_arg; 1700 ill_t *ill = ira->ira_ill; 1701 ill_t *ire_ill = ire->ire_ill; 1702 1703 /* Make a note for DAD that this address is in use */ 1704 ire->ire_last_used_time = LBOLT_FASTPATH; 1705 1706 /* Only target the IRE_LOCAL with the right zoneid. */ 1707 ira->ira_zoneid = ire->ire_zoneid; 1708 1709 /* 1710 * If the packet arrived on the wrong ill, we check that 1711 * this is ok. 1712 * If it is, then we ensure that we do the reassembly on 1713 * the ill on which the address is hosted. We keep ira_rill as 1714 * the one on which the packet arrived, so that IP_PKTINFO and 1715 * friends can report this. 1716 */ 1717 if (ire_ill != ill) { 1718 ire_t *new_ire; 1719 1720 new_ire = ip_check_multihome(&ipha->ipha_dst, ire, ill); 1721 if (new_ire == NULL) { 1722 /* Drop packet */ 1723 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1724 ip_drop_input("ipIfStatsInForwProhibits", mp, ill); 1725 freemsg(mp); 1726 return; 1727 } 1728 /* 1729 * Update ira_ill to be the ILL on which the IP address 1730 * is hosted. No need to hold the ill since we have a 1731 * hold on the ire. Note that we do the switch even if 1732 * new_ire == ire (for IPMP, ire would be the one corresponding 1733 * to the IPMP ill). 1734 */ 1735 ASSERT(ira->ira_ill == ira->ira_rill); 1736 ira->ira_ill = new_ire->ire_ill; 1737 1738 /* ira_ruifindex tracks the upper for ira_rill */ 1739 if (IS_UNDER_IPMP(ill)) 1740 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1741 1742 ip_input_local_v4(new_ire, mp, ipha, ira); 1743 1744 /* Restore */ 1745 ASSERT(ira->ira_ill == new_ire->ire_ill); 1746 ira->ira_ill = ill; 1747 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1748 1749 if (new_ire != ire) 1750 ire_refrele(new_ire); 1751 return; 1752 } 1753 1754 ip_input_local_v4(ire, mp, ipha, ira); 1755 } 1756 1757 /* 1758 * Common function for packets arriving for the host. Handles 1759 * checksum verification, reassembly checks, etc. 1760 */ 1761 static void 1762 ip_input_local_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1763 { 1764 ill_t *ill = ira->ira_ill; 1765 iaflags_t iraflags = ira->ira_flags; 1766 1767 /* 1768 * Verify IP header checksum. If the packet was AH or ESP then 1769 * this flag has already been cleared. Likewise if the packet 1770 * had a hardware checksum. 1771 */ 1772 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1773 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1774 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1775 freemsg(mp); 1776 return; 1777 } 1778 1779 if (iraflags & IRAF_IPV4_OPTIONS) { 1780 if (!ip_input_local_options(mp, ipha, ira)) { 1781 /* Error has been sent and mp consumed */ 1782 return; 1783 } 1784 /* 1785 * Some old hardware does partial checksum by including the 1786 * whole IP header, so the partial checksum value might have 1787 * become invalid if any option in the packet have been 1788 * updated. Always clear partial checksum flag here. 1789 */ 1790 DB_CKSUMFLAGS(mp) &= ~HCK_PARTIALCKSUM; 1791 } 1792 1793 /* 1794 * Is packet part of fragmented IP packet? 1795 * We compare against defined values in network byte order 1796 */ 1797 if (ipha->ipha_fragment_offset_and_flags & 1798 (IPH_MF_HTONS | IPH_OFFSET_HTONS)) { 1799 /* 1800 * Make sure we have ira_l2src before we loose the original 1801 * mblk 1802 */ 1803 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 1804 ip_setl2src(mp, ira, ira->ira_rill); 1805 1806 mp = ip_input_fragment(mp, ipha, ira); 1807 if (mp == NULL) 1808 return; 1809 /* Completed reassembly */ 1810 ipha = (ipha_t *)mp->b_rptr; 1811 } 1812 1813 /* 1814 * For broadcast and multicast we need some extra work before 1815 * we call ip_fanout_v4(), since in the case of shared-IP zones 1816 * we need to pretend that a packet arrived for each zoneid. 1817 */ 1818 if (iraflags & IRAF_MULTIBROADCAST) { 1819 if (iraflags & IRAF_BROADCAST) 1820 ip_input_broadcast_v4(ire, mp, ipha, ira); 1821 else 1822 ip_input_multicast_v4(ire, mp, ipha, ira); 1823 return; 1824 } 1825 ip_fanout_v4(mp, ipha, ira); 1826 } 1827 1828 1829 /* 1830 * Handle multiple zones which match the same broadcast address 1831 * and ill by delivering a packet to each of them. 1832 * Walk the bucket and look for different ire_zoneid but otherwise 1833 * the same IRE (same ill/addr/mask/type). 1834 * Note that ire_add() tracks IREs that are identical in all 1835 * fields (addr/mask/type/gw/ill/zoneid) within a single IRE by 1836 * increasing ire_identical_cnt. Thus we don't need to be concerned 1837 * about those. 1838 */ 1839 static void 1840 ip_input_broadcast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1841 { 1842 ill_t *ill = ira->ira_ill; 1843 ip_stack_t *ipst = ill->ill_ipst; 1844 netstack_t *ns = ipst->ips_netstack; 1845 irb_t *irb; 1846 ire_t *ire1; 1847 mblk_t *mp1; 1848 ipha_t *ipha1; 1849 uint_t ira_pktlen = ira->ira_pktlen; 1850 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1851 1852 irb = ire->ire_bucket; 1853 1854 /* 1855 * If we don't have more than one shared-IP zone, or if 1856 * there can't be more than one IRE_BROADCAST for this 1857 * IP address, then just set the zoneid and proceed. 1858 */ 1859 if (ns->netstack_numzones == 1 || irb->irb_ire_cnt == 1) { 1860 ira->ira_zoneid = ire->ire_zoneid; 1861 1862 ip_fanout_v4(mp, ipha, ira); 1863 return; 1864 } 1865 irb_refhold(irb); 1866 for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 1867 /* We do the main IRE after the end of the loop */ 1868 if (ire1 == ire) 1869 continue; 1870 1871 /* 1872 * Only IREs for the same IP address should be in the same 1873 * bucket. 1874 * But could have IRE_HOSTs in the case of CGTP. 1875 */ 1876 ASSERT(ire1->ire_addr == ire->ire_addr); 1877 if (!(ire1->ire_type & IRE_BROADCAST)) 1878 continue; 1879 1880 if (IRE_IS_CONDEMNED(ire1)) 1881 continue; 1882 1883 mp1 = copymsg(mp); 1884 if (mp1 == NULL) { 1885 /* Failed to deliver to one zone */ 1886 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1887 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1888 continue; 1889 } 1890 ira->ira_zoneid = ire1->ire_zoneid; 1891 ipha1 = (ipha_t *)mp1->b_rptr; 1892 ip_fanout_v4(mp1, ipha1, ira); 1893 /* 1894 * IPsec might have modified ira_pktlen and ira_ip_hdr_length 1895 * so we restore them for a potential next iteration 1896 */ 1897 ira->ira_pktlen = ira_pktlen; 1898 ira->ira_ip_hdr_length = ira_ip_hdr_length; 1899 } 1900 irb_refrele(irb); 1901 /* Do the main ire */ 1902 ira->ira_zoneid = ire->ire_zoneid; 1903 ip_fanout_v4(mp, ipha, ira); 1904 } 1905 1906 /* 1907 * Handle multiple zones which want to receive the same multicast packets 1908 * on this ill by delivering a packet to each of them. 1909 * 1910 * Note that for packets delivered to transports we could instead do this 1911 * as part of the fanout code, but since we need to handle icmp_inbound 1912 * it is simpler to have multicast work the same as broadcast. 1913 * 1914 * The ip_fanout matching for multicast matches based on ilm independent of 1915 * zoneid since the zoneid restriction is applied when joining a multicast 1916 * group. 1917 */ 1918 /* ARGSUSED */ 1919 static void 1920 ip_input_multicast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1921 { 1922 ill_t *ill = ira->ira_ill; 1923 iaflags_t iraflags = ira->ira_flags; 1924 ip_stack_t *ipst = ill->ill_ipst; 1925 netstack_t *ns = ipst->ips_netstack; 1926 zoneid_t zoneid; 1927 mblk_t *mp1; 1928 ipha_t *ipha1; 1929 uint_t ira_pktlen = ira->ira_pktlen; 1930 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1931 1932 /* ire_recv_multicast has switched to the upper ill for IPMP */ 1933 ASSERT(!IS_UNDER_IPMP(ill)); 1934 1935 /* 1936 * If we don't have more than one shared-IP zone, or if 1937 * there are no members in anything but the global zone, 1938 * then just set the zoneid and proceed. 1939 */ 1940 if (ns->netstack_numzones == 1 || 1941 !ill_hasmembers_otherzones_v4(ill, ipha->ipha_dst, 1942 GLOBAL_ZONEID)) { 1943 ira->ira_zoneid = GLOBAL_ZONEID; 1944 1945 /* If sender didn't want this zone to receive it, drop */ 1946 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1947 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1948 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1949 freemsg(mp); 1950 return; 1951 } 1952 ip_fanout_v4(mp, ipha, ira); 1953 return; 1954 } 1955 1956 /* 1957 * Here we loop over all zoneids that have members in the group 1958 * and deliver a packet to ip_fanout for each zoneid. 1959 * 1960 * First find any members in the lowest numeric zoneid by looking for 1961 * first zoneid larger than -1 (ALL_ZONES). 1962 * We terminate the loop when we receive -1 (ALL_ZONES). 1963 */ 1964 zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, ALL_ZONES); 1965 for (; zoneid != ALL_ZONES; 1966 zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, zoneid)) { 1967 /* 1968 * Avoid an extra copymsg/freemsg by skipping global zone here 1969 * and doing that at the end. 1970 */ 1971 if (zoneid == GLOBAL_ZONEID) 1972 continue; 1973 1974 ira->ira_zoneid = zoneid; 1975 1976 /* If sender didn't want this zone to receive it, skip */ 1977 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1978 ira->ira_no_loop_zoneid == ira->ira_zoneid) 1979 continue; 1980 1981 mp1 = copymsg(mp); 1982 if (mp1 == NULL) { 1983 /* Failed to deliver to one zone */ 1984 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1985 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1986 continue; 1987 } 1988 ipha1 = (ipha_t *)mp1->b_rptr; 1989 ip_fanout_v4(mp1, ipha1, ira); 1990 /* 1991 * IPsec might have modified ira_pktlen and ira_ip_hdr_length 1992 * so we restore them for a potential next iteration 1993 */ 1994 ira->ira_pktlen = ira_pktlen; 1995 ira->ira_ip_hdr_length = ira_ip_hdr_length; 1996 } 1997 1998 /* Do the main ire */ 1999 ira->ira_zoneid = GLOBAL_ZONEID; 2000 /* If sender didn't want this zone to receive it, drop */ 2001 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 2002 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 2003 ip_drop_input("Multicast but wrong zoneid", mp, ill); 2004 freemsg(mp); 2005 } else { 2006 ip_fanout_v4(mp, ipha, ira); 2007 } 2008 } 2009 2010 2011 /* 2012 * Determine the zoneid and IRAF_TX_* flags if trusted extensions 2013 * is in use. Updates ira_zoneid and ira_flags as a result. 2014 */ 2015 static void 2016 ip_fanout_tx_v4(mblk_t *mp, ipha_t *ipha, uint8_t protocol, 2017 uint_t ip_hdr_length, ip_recv_attr_t *ira) 2018 { 2019 uint16_t *up; 2020 uint16_t lport; 2021 zoneid_t zoneid; 2022 2023 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED); 2024 2025 /* 2026 * If the packet is unlabeled we might allow read-down 2027 * for MAC_EXEMPT. Below we clear this if it is a multi-level 2028 * port (MLP). 2029 * Note that ira_tsl can be NULL here. 2030 */ 2031 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED) 2032 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE; 2033 2034 if (ira->ira_zoneid != ALL_ZONES) 2035 return; 2036 2037 ira->ira_flags |= IRAF_TX_SHARED_ADDR; 2038 2039 up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length); 2040 switch (protocol) { 2041 case IPPROTO_TCP: 2042 case IPPROTO_SCTP: 2043 case IPPROTO_UDP: 2044 /* Caller ensures this */ 2045 ASSERT(((uchar_t *)ipha) + ip_hdr_length +4 <= mp->b_wptr); 2046 2047 /* 2048 * Only these transports support MLP. 2049 * We know their destination port numbers is in 2050 * the same place in the header. 2051 */ 2052 lport = up[1]; 2053 2054 /* 2055 * No need to handle exclusive-stack zones 2056 * since ALL_ZONES only applies to the shared IP instance. 2057 */ 2058 zoneid = tsol_mlp_findzone(protocol, lport); 2059 /* 2060 * If no shared MLP is found, tsol_mlp_findzone returns 2061 * ALL_ZONES. In that case, we assume it's SLP, and 2062 * search for the zone based on the packet label. 2063 * 2064 * If there is such a zone, we prefer to find a 2065 * connection in it. Otherwise, we look for a 2066 * MAC-exempt connection in any zone whose label 2067 * dominates the default label on the packet. 2068 */ 2069 if (zoneid == ALL_ZONES) 2070 zoneid = tsol_attr_to_zoneid(ira); 2071 else 2072 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE; 2073 break; 2074 default: 2075 /* Handle shared address for other protocols */ 2076 zoneid = tsol_attr_to_zoneid(ira); 2077 break; 2078 } 2079 ira->ira_zoneid = zoneid; 2080 } 2081 2082 /* 2083 * Increment checksum failure statistics 2084 */ 2085 static void 2086 ip_input_cksum_err_v4(uint8_t protocol, uint16_t hck_flags, ill_t *ill) 2087 { 2088 ip_stack_t *ipst = ill->ill_ipst; 2089 2090 switch (protocol) { 2091 case IPPROTO_TCP: 2092 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 2093 2094 if (hck_flags & HCK_FULLCKSUM) 2095 IP_STAT(ipst, ip_tcp_in_full_hw_cksum_err); 2096 else if (hck_flags & HCK_PARTIALCKSUM) 2097 IP_STAT(ipst, ip_tcp_in_part_hw_cksum_err); 2098 else 2099 IP_STAT(ipst, ip_tcp_in_sw_cksum_err); 2100 break; 2101 case IPPROTO_UDP: 2102 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 2103 if (hck_flags & HCK_FULLCKSUM) 2104 IP_STAT(ipst, ip_udp_in_full_hw_cksum_err); 2105 else if (hck_flags & HCK_PARTIALCKSUM) 2106 IP_STAT(ipst, ip_udp_in_part_hw_cksum_err); 2107 else 2108 IP_STAT(ipst, ip_udp_in_sw_cksum_err); 2109 break; 2110 case IPPROTO_ICMP: 2111 BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); 2112 break; 2113 default: 2114 ASSERT(0); 2115 break; 2116 } 2117 } 2118 2119 /* Calculate the IPv4 pseudo-header checksum */ 2120 uint32_t 2121 ip_input_cksum_pseudo_v4(ipha_t *ipha, ip_recv_attr_t *ira) 2122 { 2123 uint_t ulp_len; 2124 uint32_t cksum; 2125 uint8_t protocol = ira->ira_protocol; 2126 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2127 2128 #define iphs ((uint16_t *)ipha) 2129 2130 switch (protocol) { 2131 case IPPROTO_TCP: 2132 ulp_len = ira->ira_pktlen - ip_hdr_length; 2133 2134 /* Protocol and length */ 2135 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP; 2136 /* IP addresses */ 2137 cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9]; 2138 break; 2139 2140 case IPPROTO_UDP: { 2141 udpha_t *udpha; 2142 2143 udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length); 2144 2145 /* Protocol and length */ 2146 cksum = udpha->uha_length + IP_UDP_CSUM_COMP; 2147 /* IP addresses */ 2148 cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9]; 2149 break; 2150 } 2151 2152 default: 2153 cksum = 0; 2154 break; 2155 } 2156 #undef iphs 2157 return (cksum); 2158 } 2159 2160 2161 /* 2162 * Software verification of the ULP checksums. 2163 * Returns B_TRUE if ok. 2164 * Increments statistics of failed. 2165 */ 2166 static boolean_t 2167 ip_input_sw_cksum_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 2168 { 2169 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 2170 uint32_t cksum; 2171 uint8_t protocol = ira->ira_protocol; 2172 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2173 2174 IP_STAT(ipst, ip_in_sw_cksum); 2175 2176 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP); 2177 2178 cksum = ip_input_cksum_pseudo_v4(ipha, ira); 2179 cksum = IP_CSUM(mp, ip_hdr_length, cksum); 2180 if (cksum == 0) 2181 return (B_TRUE); 2182 2183 ip_input_cksum_err_v4(protocol, 0, ira->ira_ill); 2184 return (B_FALSE); 2185 } 2186 2187 /* 2188 * Verify the ULP checksums. 2189 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum 2190 * algorithm. 2191 * Increments statistics if failed. 2192 */ 2193 static boolean_t 2194 ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, 2195 ip_recv_attr_t *ira) 2196 { 2197 ill_t *ill = ira->ira_rill; 2198 uint16_t hck_flags; 2199 uint32_t cksum; 2200 mblk_t *mp1; 2201 int32_t len; 2202 uint8_t protocol = ira->ira_protocol; 2203 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2204 2205 2206 switch (protocol) { 2207 case IPPROTO_TCP: 2208 break; 2209 2210 case IPPROTO_UDP: { 2211 udpha_t *udpha; 2212 2213 udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length); 2214 if (udpha->uha_checksum == 0) { 2215 /* Packet doesn't have a UDP checksum */ 2216 return (B_TRUE); 2217 } 2218 break; 2219 } 2220 case IPPROTO_SCTP: { 2221 sctp_hdr_t *sctph; 2222 uint32_t pktsum; 2223 2224 sctph = (sctp_hdr_t *)((uchar_t *)ipha + ip_hdr_length); 2225 #ifdef DEBUG 2226 if (skip_sctp_cksum) 2227 return (B_TRUE); 2228 #endif 2229 pktsum = sctph->sh_chksum; 2230 sctph->sh_chksum = 0; 2231 cksum = sctp_cksum(mp, ip_hdr_length); 2232 sctph->sh_chksum = pktsum; 2233 if (cksum == pktsum) 2234 return (B_TRUE); 2235 2236 /* 2237 * Defer until later whether a bad checksum is ok 2238 * in order to allow RAW sockets to use Adler checksum 2239 * with SCTP. 2240 */ 2241 ira->ira_flags |= IRAF_SCTP_CSUM_ERR; 2242 return (B_TRUE); 2243 } 2244 2245 default: 2246 /* No ULP checksum to verify. */ 2247 return (B_TRUE); 2248 } 2249 2250 /* 2251 * Revert to software checksum calculation if the interface 2252 * isn't capable of checksum offload. 2253 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout. 2254 * Note: IRAF_NO_HW_CKSUM is not currently used. 2255 */ 2256 ASSERT(!IS_IPMP(ill)); 2257 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 2258 !dohwcksum) { 2259 return (ip_input_sw_cksum_v4(mp, ipha, ira)); 2260 } 2261 2262 hck_flags = DB_CKSUMFLAGS(mp); 2263 2264 /* 2265 * We apply this for all ULP protocols. Does the HW know to 2266 * not set the flags for SCTP and other protocols. 2267 */ 2268 if (hck_flags & HCK_FULLCKSUM_OK) { 2269 /* 2270 * Hardware has already verified the checksum. 2271 */ 2272 return (B_TRUE); 2273 } 2274 2275 if (hck_flags & HCK_FULLCKSUM) { 2276 /* 2277 * Full checksum has been computed by the hardware 2278 * and has been attached. If the driver wants us to 2279 * verify the correctness of the attached value, in 2280 * order to protect against faulty hardware, compare 2281 * it against -0 (0xFFFF) to see if it's valid. 2282 */ 2283 cksum = DB_CKSUM16(mp); 2284 if (cksum == 0xFFFF) 2285 return (B_TRUE); 2286 ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill); 2287 return (B_FALSE); 2288 } 2289 2290 mp1 = mp->b_cont; 2291 if ((hck_flags & HCK_PARTIALCKSUM) && 2292 (mp1 == NULL || mp1->b_cont == NULL) && 2293 ip_hdr_length >= DB_CKSUMSTART(mp) && 2294 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) { 2295 uint32_t adj; 2296 uchar_t *cksum_start; 2297 2298 cksum = ip_input_cksum_pseudo_v4(ipha, ira); 2299 2300 cksum_start = ((uchar_t *)ipha + DB_CKSUMSTART(mp)); 2301 2302 /* 2303 * Partial checksum has been calculated by hardware 2304 * and attached to the packet; in addition, any 2305 * prepended extraneous data is even byte aligned, 2306 * and there are at most two mblks associated with 2307 * the packet. If any such data exists, we adjust 2308 * the checksum; also take care any postpended data. 2309 */ 2310 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj); 2311 /* 2312 * One's complement subtract extraneous checksum 2313 */ 2314 cksum += DB_CKSUM16(mp); 2315 if (adj >= cksum) 2316 cksum = ~(adj - cksum) & 0xFFFF; 2317 else 2318 cksum -= adj; 2319 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 2320 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 2321 if (!(~cksum & 0xFFFF)) 2322 return (B_TRUE); 2323 2324 ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill); 2325 return (B_FALSE); 2326 } 2327 return (ip_input_sw_cksum_v4(mp, ipha, ira)); 2328 } 2329 2330 2331 /* 2332 * Handle fanout of received packets. 2333 * Unicast packets that are looped back (from ire_send_local_v4) and packets 2334 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM. 2335 * 2336 * IPQoS Notes 2337 * Before sending it to the client, invoke IPPF processing. Policy processing 2338 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled. 2339 */ 2340 void 2341 ip_fanout_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 2342 { 2343 ill_t *ill = ira->ira_ill; 2344 iaflags_t iraflags = ira->ira_flags; 2345 ip_stack_t *ipst = ill->ill_ipst; 2346 uint8_t protocol = ipha->ipha_protocol; 2347 conn_t *connp; 2348 #define rptr ((uchar_t *)ipha) 2349 uint_t ip_hdr_length; 2350 uint_t min_ulp_header_length; 2351 int offset; 2352 ssize_t len; 2353 netstack_t *ns = ipst->ips_netstack; 2354 ipsec_stack_t *ipss = ns->netstack_ipsec; 2355 ill_t *rill = ira->ira_rill; 2356 2357 ASSERT(ira->ira_pktlen == ntohs(ipha->ipha_length)); 2358 2359 ip_hdr_length = ira->ira_ip_hdr_length; 2360 ira->ira_protocol = protocol; 2361 2362 /* 2363 * Time for IPP once we've done reassembly and IPsec. 2364 * We skip this for loopback packets since we don't do IPQoS 2365 * on loopback. 2366 */ 2367 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 2368 !(iraflags & IRAF_LOOPBACK) && 2369 (protocol != IPPROTO_ESP && protocol != IPPROTO_AH)) { 2370 /* 2371 * Use the interface on which the packet arrived - not where 2372 * the IP address is hosted. 2373 */ 2374 /* ip_process translates an IS_UNDER_IPMP */ 2375 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill); 2376 if (mp == NULL) { 2377 /* ip_drop_packet and MIB done */ 2378 return; 2379 } 2380 } 2381 2382 /* Determine the minimum required size of the upper-layer header */ 2383 /* Need to do this for at least the set of ULPs that TX handles. */ 2384 switch (protocol) { 2385 case IPPROTO_TCP: 2386 min_ulp_header_length = TCP_MIN_HEADER_LENGTH; 2387 break; 2388 case IPPROTO_SCTP: 2389 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH; 2390 break; 2391 case IPPROTO_UDP: 2392 min_ulp_header_length = UDPH_SIZE; 2393 break; 2394 case IPPROTO_ICMP: 2395 min_ulp_header_length = ICMPH_SIZE; 2396 break; 2397 default: 2398 min_ulp_header_length = 0; 2399 break; 2400 } 2401 /* Make sure we have the min ULP header length */ 2402 len = mp->b_wptr - rptr; 2403 if (len < ip_hdr_length + min_ulp_header_length) { 2404 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length) { 2405 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 2406 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 2407 freemsg(mp); 2408 return; 2409 } 2410 IP_STAT(ipst, ip_recv_pullup); 2411 ipha = ip_pullup(mp, ip_hdr_length + min_ulp_header_length, 2412 ira); 2413 if (ipha == NULL) 2414 goto discard; 2415 len = mp->b_wptr - rptr; 2416 } 2417 2418 /* 2419 * If trusted extensions then determine the zoneid and TX specific 2420 * ira_flags. 2421 */ 2422 if (iraflags & IRAF_SYSTEM_LABELED) { 2423 /* This can update ira->ira_flags and ira->ira_zoneid */ 2424 ip_fanout_tx_v4(mp, ipha, protocol, ip_hdr_length, ira); 2425 iraflags = ira->ira_flags; 2426 } 2427 2428 2429 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */ 2430 if (iraflags & IRAF_VERIFY_ULP_CKSUM) { 2431 if (!ip_input_cksum_v4(iraflags, mp, ipha, ira)) { 2432 /* Bad checksum. Stats are already incremented */ 2433 ip_drop_input("Bad ULP checksum", mp, ill); 2434 freemsg(mp); 2435 return; 2436 } 2437 /* IRAF_SCTP_CSUM_ERR could have been set */ 2438 iraflags = ira->ira_flags; 2439 } 2440 switch (protocol) { 2441 case IPPROTO_TCP: 2442 /* For TCP, discard broadcast and multicast packets. */ 2443 if (iraflags & IRAF_MULTIBROADCAST) 2444 goto discard; 2445 2446 /* First mblk contains IP+TCP headers per above check */ 2447 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH); 2448 2449 /* TCP options present? */ 2450 offset = ((uchar_t *)ipha)[ip_hdr_length + 12] >> 4; 2451 if (offset != 5) { 2452 if (offset < 5) 2453 goto discard; 2454 2455 /* 2456 * There must be TCP options. 2457 * Make sure we can grab them. 2458 */ 2459 offset <<= 2; 2460 offset += ip_hdr_length; 2461 if (len < offset) { 2462 if (ira->ira_pktlen < offset) { 2463 BUMP_MIB(ill->ill_ip_mib, 2464 ipIfStatsInTruncatedPkts); 2465 ip_drop_input( 2466 "ipIfStatsInTruncatedPkts", 2467 mp, ill); 2468 freemsg(mp); 2469 return; 2470 } 2471 IP_STAT(ipst, ip_recv_pullup); 2472 ipha = ip_pullup(mp, offset, ira); 2473 if (ipha == NULL) 2474 goto discard; 2475 len = mp->b_wptr - rptr; 2476 } 2477 } 2478 2479 /* 2480 * Pass up a squeue hint to tcp. 2481 * If ira_sqp is already set (this is loopback) we leave it 2482 * alone. 2483 */ 2484 if (ira->ira_sqp == NULL) { 2485 ira->ira_sqp = ip_squeue_get(ira->ira_ring); 2486 } 2487 2488 /* Look for AF_INET or AF_INET6 that matches */ 2489 connp = ipcl_classify_v4(mp, IPPROTO_TCP, ip_hdr_length, 2490 ira, ipst); 2491 if (connp == NULL) { 2492 /* Send the TH_RST */ 2493 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2494 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2495 return; 2496 } 2497 if (connp->conn_incoming_ifindex != 0 && 2498 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2499 CONN_DEC_REF(connp); 2500 2501 /* Send the TH_RST */ 2502 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2503 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2504 return; 2505 } 2506 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || 2507 (iraflags & IRAF_IPSEC_SECURE)) { 2508 mp = ipsec_check_inbound_policy(mp, connp, 2509 ipha, NULL, ira); 2510 if (mp == NULL) { 2511 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2512 /* Note that mp is NULL */ 2513 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2514 CONN_DEC_REF(connp); 2515 return; 2516 } 2517 } 2518 /* Found a client; up it goes */ 2519 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2520 ira->ira_ill = ira->ira_rill = NULL; 2521 if (!IPCL_IS_TCP(connp)) { 2522 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 2523 (connp->conn_recv)(connp, mp, NULL, ira); 2524 CONN_DEC_REF(connp); 2525 ira->ira_ill = ill; 2526 ira->ira_rill = rill; 2527 return; 2528 } 2529 2530 /* 2531 * We do different processing whether called from 2532 * ip_accept_tcp and we match the target, don't match 2533 * the target, and when we are called by ip_input. 2534 */ 2535 if (iraflags & IRAF_TARGET_SQP) { 2536 if (ira->ira_target_sqp == connp->conn_sqp) { 2537 mblk_t *attrmp; 2538 2539 attrmp = ip_recv_attr_to_mblk(ira); 2540 if (attrmp == NULL) { 2541 BUMP_MIB(ill->ill_ip_mib, 2542 ipIfStatsInDiscards); 2543 ip_drop_input("ipIfStatsInDiscards", 2544 mp, ill); 2545 freemsg(mp); 2546 CONN_DEC_REF(connp); 2547 } else { 2548 SET_SQUEUE(attrmp, connp->conn_recv, 2549 connp); 2550 attrmp->b_cont = mp; 2551 ASSERT(ira->ira_target_sqp_mp == NULL); 2552 ira->ira_target_sqp_mp = attrmp; 2553 /* 2554 * Conn ref release when drained from 2555 * the squeue. 2556 */ 2557 } 2558 } else { 2559 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 2560 connp->conn_recv, connp, ira, SQ_FILL, 2561 SQTAG_IP_TCP_INPUT); 2562 } 2563 } else { 2564 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, 2565 connp, ira, ip_squeue_flag, SQTAG_IP_TCP_INPUT); 2566 } 2567 ira->ira_ill = ill; 2568 ira->ira_rill = rill; 2569 return; 2570 2571 case IPPROTO_SCTP: { 2572 sctp_hdr_t *sctph; 2573 in6_addr_t map_src, map_dst; 2574 uint32_t ports; /* Source and destination ports */ 2575 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp; 2576 2577 /* For SCTP, discard broadcast and multicast packets. */ 2578 if (iraflags & IRAF_MULTIBROADCAST) 2579 goto discard; 2580 2581 /* 2582 * Since there is no SCTP h/w cksum support yet, just 2583 * clear the flag. 2584 */ 2585 DB_CKSUMFLAGS(mp) = 0; 2586 2587 /* Length ensured above */ 2588 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH); 2589 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length); 2590 2591 /* get the ports */ 2592 ports = *(uint32_t *)&sctph->sh_sport; 2593 2594 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst); 2595 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src); 2596 if (iraflags & IRAF_SCTP_CSUM_ERR) { 2597 /* 2598 * No potential sctp checksum errors go to the Sun 2599 * sctp stack however they might be Adler-32 summed 2600 * packets a userland stack bound to a raw IP socket 2601 * could reasonably use. Note though that Adler-32 is 2602 * a long deprecated algorithm and customer sctp 2603 * networks should eventually migrate to CRC-32 at 2604 * which time this facility should be removed. 2605 */ 2606 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2607 return; 2608 } 2609 connp = sctp_fanout(&map_src, &map_dst, ports, ira, mp, 2610 sctps, sctph); 2611 if (connp == NULL) { 2612 /* Check for raw socket or OOTB handling */ 2613 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2614 return; 2615 } 2616 if (connp->conn_incoming_ifindex != 0 && 2617 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2618 CONN_DEC_REF(connp); 2619 /* Check for raw socket or OOTB handling */ 2620 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2621 return; 2622 } 2623 2624 /* Found a client; up it goes */ 2625 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2626 sctp_input(connp, ipha, NULL, mp, ira); 2627 /* sctp_input does a rele of the sctp_t */ 2628 return; 2629 } 2630 2631 case IPPROTO_UDP: 2632 /* First mblk contains IP+UDP headers as checked above */ 2633 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE); 2634 2635 if (iraflags & IRAF_MULTIBROADCAST) { 2636 uint16_t *up; /* Pointer to ports in ULP header */ 2637 2638 up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length); 2639 ip_fanout_udp_multi_v4(mp, ipha, up[1], up[0], ira); 2640 return; 2641 } 2642 2643 /* Look for AF_INET or AF_INET6 that matches */ 2644 connp = ipcl_classify_v4(mp, IPPROTO_UDP, ip_hdr_length, 2645 ira, ipst); 2646 if (connp == NULL) { 2647 no_udp_match: 2648 if (ipst->ips_ipcl_proto_fanout_v4[IPPROTO_UDP]. 2649 connf_head != NULL) { 2650 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2651 ip_fanout_proto_v4(mp, ipha, ira); 2652 } else { 2653 ip_fanout_send_icmp_v4(mp, 2654 ICMP_DEST_UNREACHABLE, 2655 ICMP_PORT_UNREACHABLE, ira); 2656 } 2657 return; 2658 2659 } 2660 if (connp->conn_incoming_ifindex != 0 && 2661 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2662 CONN_DEC_REF(connp); 2663 goto no_udp_match; 2664 } 2665 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld : 2666 !canputnext(connp->conn_rq)) { 2667 CONN_DEC_REF(connp); 2668 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 2669 ip_drop_input("udpIfStatsInOverflows", mp, ill); 2670 freemsg(mp); 2671 return; 2672 } 2673 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || 2674 (iraflags & IRAF_IPSEC_SECURE)) { 2675 mp = ipsec_check_inbound_policy(mp, connp, 2676 ipha, NULL, ira); 2677 if (mp == NULL) { 2678 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2679 /* Note that mp is NULL */ 2680 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2681 CONN_DEC_REF(connp); 2682 return; 2683 } 2684 } 2685 /* 2686 * Remove 0-spi if it's 0, or move everything behind 2687 * the UDP header over it and forward to ESP via 2688 * ip_fanout_v4(). 2689 */ 2690 if (connp->conn_udp->udp_nat_t_endpoint) { 2691 if (iraflags & IRAF_IPSEC_SECURE) { 2692 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 2693 DROPPER(ipss, ipds_esp_nat_t_ipsec), 2694 &ipss->ipsec_dropper); 2695 CONN_DEC_REF(connp); 2696 return; 2697 } 2698 2699 mp = zero_spi_check(mp, ira); 2700 if (mp == NULL) { 2701 /* 2702 * Packet was consumed - probably sent to 2703 * ip_fanout_v4. 2704 */ 2705 CONN_DEC_REF(connp); 2706 return; 2707 } 2708 /* Else continue like a normal UDP packet. */ 2709 ipha = (ipha_t *)mp->b_rptr; 2710 protocol = ipha->ipha_protocol; 2711 ira->ira_protocol = protocol; 2712 } 2713 /* Found a client; up it goes */ 2714 IP_STAT(ipst, ip_udp_fannorm); 2715 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2716 ira->ira_ill = ira->ira_rill = NULL; 2717 (connp->conn_recv)(connp, mp, NULL, ira); 2718 CONN_DEC_REF(connp); 2719 ira->ira_ill = ill; 2720 ira->ira_rill = rill; 2721 return; 2722 default: 2723 break; 2724 } 2725 2726 /* 2727 * Clear hardware checksumming flag as it is currently only 2728 * used by TCP and UDP. 2729 */ 2730 DB_CKSUMFLAGS(mp) = 0; 2731 2732 switch (protocol) { 2733 case IPPROTO_ICMP: 2734 /* 2735 * We need to accomodate icmp messages coming in clear 2736 * until we get everything secure from the wire. If 2737 * icmp_accept_clear_messages is zero we check with 2738 * the global policy and act accordingly. If it is 2739 * non-zero, we accept the message without any checks. 2740 * But *this does not mean* that this will be delivered 2741 * to RAW socket clients. By accepting we might send 2742 * replies back, change our MTU value etc., 2743 * but delivery to the ULP/clients depends on their 2744 * policy dispositions. 2745 */ 2746 if (ipst->ips_icmp_accept_clear_messages == 0) { 2747 mp = ipsec_check_global_policy(mp, NULL, 2748 ipha, NULL, ira, ns); 2749 if (mp == NULL) 2750 return; 2751 } 2752 2753 /* 2754 * On a labeled system, we have to check whether the zone 2755 * itself is permitted to receive raw traffic. 2756 */ 2757 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2758 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2759 BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors); 2760 ip_drop_input("tsol_can_accept_raw", mp, ill); 2761 freemsg(mp); 2762 return; 2763 } 2764 } 2765 2766 /* 2767 * ICMP header checksum, including checksum field, 2768 * should be zero. 2769 */ 2770 if (IP_CSUM(mp, ip_hdr_length, 0)) { 2771 BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); 2772 ip_drop_input("icmpInCksumErrs", mp, ill); 2773 freemsg(mp); 2774 return; 2775 } 2776 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2777 mp = icmp_inbound_v4(mp, ira); 2778 if (mp == NULL) { 2779 /* No need to pass to RAW sockets */ 2780 return; 2781 } 2782 break; 2783 2784 case IPPROTO_IGMP: 2785 /* 2786 * If we are not willing to accept IGMP packets in clear, 2787 * then check with global policy. 2788 */ 2789 if (ipst->ips_igmp_accept_clear_messages == 0) { 2790 mp = ipsec_check_global_policy(mp, NULL, 2791 ipha, NULL, ira, ns); 2792 if (mp == NULL) 2793 return; 2794 } 2795 if ((ira->ira_flags & IRAF_SYSTEM_LABELED) && 2796 !tsol_can_accept_raw(mp, ira, B_TRUE)) { 2797 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2798 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2799 freemsg(mp); 2800 return; 2801 } 2802 /* 2803 * Validate checksum 2804 */ 2805 if (IP_CSUM(mp, ip_hdr_length, 0)) { 2806 ++ipst->ips_igmpstat.igps_rcv_badsum; 2807 ip_drop_input("igps_rcv_badsum", mp, ill); 2808 freemsg(mp); 2809 return; 2810 } 2811 2812 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2813 mp = igmp_input(mp, ira); 2814 if (mp == NULL) { 2815 /* Bad packet - discarded by igmp_input */ 2816 return; 2817 } 2818 break; 2819 case IPPROTO_PIM: 2820 /* 2821 * If we are not willing to accept PIM packets in clear, 2822 * then check with global policy. 2823 */ 2824 if (ipst->ips_pim_accept_clear_messages == 0) { 2825 mp = ipsec_check_global_policy(mp, NULL, 2826 ipha, NULL, ira, ns); 2827 if (mp == NULL) 2828 return; 2829 } 2830 if ((ira->ira_flags & IRAF_SYSTEM_LABELED) && 2831 !tsol_can_accept_raw(mp, ira, B_TRUE)) { 2832 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2833 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2834 freemsg(mp); 2835 return; 2836 } 2837 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2838 2839 /* Checksum is verified in pim_input */ 2840 mp = pim_input(mp, ira); 2841 if (mp == NULL) { 2842 /* Bad packet - discarded by pim_input */ 2843 return; 2844 } 2845 break; 2846 case IPPROTO_AH: 2847 case IPPROTO_ESP: { 2848 /* 2849 * Fast path for AH/ESP. 2850 */ 2851 netstack_t *ns = ipst->ips_netstack; 2852 ipsec_stack_t *ipss = ns->netstack_ipsec; 2853 2854 IP_STAT(ipst, ipsec_proto_ahesp); 2855 2856 if (!ipsec_loaded(ipss)) { 2857 ip_proto_not_sup(mp, ira); 2858 return; 2859 } 2860 2861 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2862 /* select inbound SA and have IPsec process the pkt */ 2863 if (protocol == IPPROTO_ESP) { 2864 esph_t *esph; 2865 boolean_t esp_in_udp_sa; 2866 boolean_t esp_in_udp_packet; 2867 2868 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 2869 if (mp == NULL) 2870 return; 2871 2872 ASSERT(esph != NULL); 2873 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2874 ASSERT(ira->ira_ipsec_esp_sa != NULL); 2875 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL); 2876 2877 esp_in_udp_sa = ((ira->ira_ipsec_esp_sa->ipsa_flags & 2878 IPSA_F_NATT) != 0); 2879 esp_in_udp_packet = 2880 (ira->ira_flags & IRAF_ESP_UDP_PORTS) != 0; 2881 2882 /* 2883 * The following is a fancy, but quick, way of saying: 2884 * ESP-in-UDP SA and Raw ESP packet --> drop 2885 * OR 2886 * ESP SA and ESP-in-UDP packet --> drop 2887 */ 2888 if (esp_in_udp_sa != esp_in_udp_packet) { 2889 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2890 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 2891 DROPPER(ipss, ipds_esp_no_sa), 2892 &ipss->ipsec_dropper); 2893 return; 2894 } 2895 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, 2896 ira); 2897 } else { 2898 ah_t *ah; 2899 2900 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 2901 if (mp == NULL) 2902 return; 2903 2904 ASSERT(ah != NULL); 2905 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2906 ASSERT(ira->ira_ipsec_ah_sa != NULL); 2907 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 2908 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, 2909 ira); 2910 } 2911 2912 if (mp == NULL) { 2913 /* 2914 * Either it failed or is pending. In the former case 2915 * ipIfStatsInDiscards was increased. 2916 */ 2917 return; 2918 } 2919 /* we're done with IPsec processing, send it up */ 2920 ip_input_post_ipsec(mp, ira); 2921 return; 2922 } 2923 case IPPROTO_ENCAP: { 2924 ipha_t *inner_ipha; 2925 2926 /* 2927 * Handle self-encapsulated packets (IP-in-IP where 2928 * the inner addresses == the outer addresses). 2929 */ 2930 if ((uchar_t *)ipha + ip_hdr_length + sizeof (ipha_t) > 2931 mp->b_wptr) { 2932 if (ira->ira_pktlen < 2933 ip_hdr_length + sizeof (ipha_t)) { 2934 BUMP_MIB(ill->ill_ip_mib, 2935 ipIfStatsInTruncatedPkts); 2936 ip_drop_input("ipIfStatsInTruncatedPkts", 2937 mp, ill); 2938 freemsg(mp); 2939 return; 2940 } 2941 ipha = ip_pullup(mp, (uchar_t *)ipha + ip_hdr_length + 2942 sizeof (ipha_t) - mp->b_rptr, ira); 2943 if (ipha == NULL) { 2944 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2945 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2946 freemsg(mp); 2947 return; 2948 } 2949 } 2950 inner_ipha = (ipha_t *)((uchar_t *)ipha + ip_hdr_length); 2951 /* 2952 * Check the sanity of the inner IP header. 2953 */ 2954 if ((IPH_HDR_VERSION(inner_ipha) != IPV4_VERSION)) { 2955 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2956 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2957 freemsg(mp); 2958 return; 2959 } 2960 if (IPH_HDR_LENGTH(inner_ipha) < sizeof (ipha_t)) { 2961 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2962 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2963 freemsg(mp); 2964 return; 2965 } 2966 if (inner_ipha->ipha_src != ipha->ipha_src || 2967 inner_ipha->ipha_dst != ipha->ipha_dst) { 2968 /* We fallthru to iptun fanout below */ 2969 goto iptun; 2970 } 2971 2972 /* 2973 * Self-encapsulated tunnel packet. Remove 2974 * the outer IP header and fanout again. 2975 * We also need to make sure that the inner 2976 * header is pulled up until options. 2977 */ 2978 mp->b_rptr = (uchar_t *)inner_ipha; 2979 ipha = inner_ipha; 2980 ip_hdr_length = IPH_HDR_LENGTH(ipha); 2981 if ((uchar_t *)ipha + ip_hdr_length > mp->b_wptr) { 2982 if (ira->ira_pktlen < 2983 (uchar_t *)ipha + ip_hdr_length - mp->b_rptr) { 2984 BUMP_MIB(ill->ill_ip_mib, 2985 ipIfStatsInTruncatedPkts); 2986 ip_drop_input("ipIfStatsInTruncatedPkts", 2987 mp, ill); 2988 freemsg(mp); 2989 return; 2990 } 2991 ipha = ip_pullup(mp, 2992 (uchar_t *)ipha + ip_hdr_length - mp->b_rptr, ira); 2993 if (ipha == NULL) { 2994 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2995 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2996 freemsg(mp); 2997 return; 2998 } 2999 } 3000 if (ip_hdr_length > sizeof (ipha_t)) { 3001 /* We got options on the inner packet. */ 3002 ipaddr_t dst = ipha->ipha_dst; 3003 int error = 0; 3004 3005 dst = ip_input_options(ipha, dst, mp, ira, &error); 3006 if (error != 0) { 3007 /* 3008 * An ICMP error has been sent and the packet 3009 * has been dropped. 3010 */ 3011 return; 3012 } 3013 if (dst != ipha->ipha_dst) { 3014 /* 3015 * Someone put a source-route in 3016 * the inside header of a self- 3017 * encapsulated packet. Drop it 3018 * with extreme prejudice and let 3019 * the sender know. 3020 */ 3021 ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", 3022 mp, ill); 3023 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, 3024 ira); 3025 return; 3026 } 3027 } 3028 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 3029 /* 3030 * This means that somebody is sending 3031 * Self-encapsualted packets without AH/ESP. 3032 * 3033 * Send this packet to find a tunnel endpoint. 3034 * if I can't find one, an ICMP 3035 * PROTOCOL_UNREACHABLE will get sent. 3036 */ 3037 protocol = ipha->ipha_protocol; 3038 ira->ira_protocol = protocol; 3039 goto iptun; 3040 } 3041 3042 /* Update based on removed IP header */ 3043 ira->ira_ip_hdr_length = ip_hdr_length; 3044 ira->ira_pktlen = ntohs(ipha->ipha_length); 3045 3046 if (ira->ira_flags & IRAF_IPSEC_DECAPS) { 3047 /* 3048 * This packet is self-encapsulated multiple 3049 * times. We don't want to recurse infinitely. 3050 * To keep it simple, drop the packet. 3051 */ 3052 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3053 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3054 freemsg(mp); 3055 return; 3056 } 3057 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3058 ira->ira_flags |= IRAF_IPSEC_DECAPS; 3059 3060 ip_input_post_ipsec(mp, ira); 3061 return; 3062 } 3063 3064 iptun: /* IPPROTO_ENCAPS that is not self-encapsulated */ 3065 case IPPROTO_IPV6: 3066 /* iptun will verify trusted label */ 3067 connp = ipcl_classify_v4(mp, protocol, ip_hdr_length, 3068 ira, ipst); 3069 if (connp != NULL) { 3070 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3071 ira->ira_ill = ira->ira_rill = NULL; 3072 (connp->conn_recv)(connp, mp, NULL, ira); 3073 CONN_DEC_REF(connp); 3074 ira->ira_ill = ill; 3075 ira->ira_rill = rill; 3076 return; 3077 } 3078 /* FALLTHRU */ 3079 default: 3080 /* 3081 * On a labeled system, we have to check whether the zone 3082 * itself is permitted to receive raw traffic. 3083 */ 3084 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 3085 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 3086 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3087 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3088 freemsg(mp); 3089 return; 3090 } 3091 } 3092 break; 3093 } 3094 3095 /* 3096 * The above input functions may have returned the pulled up message. 3097 * So ipha need to be reinitialized. 3098 */ 3099 ipha = (ipha_t *)mp->b_rptr; 3100 ira->ira_protocol = protocol = ipha->ipha_protocol; 3101 if (ipst->ips_ipcl_proto_fanout_v4[protocol].connf_head == NULL) { 3102 /* 3103 * No user-level listener for these packets packets. 3104 * Check for IPPROTO_ENCAP... 3105 */ 3106 if (protocol == IPPROTO_ENCAP && ipst->ips_ip_g_mrouter) { 3107 /* 3108 * Check policy here, 3109 * THEN ship off to ip_mroute_decap(). 3110 * 3111 * BTW, If I match a configured IP-in-IP 3112 * tunnel above, this path will not be reached, and 3113 * ip_mroute_decap will never be called. 3114 */ 3115 mp = ipsec_check_global_policy(mp, connp, 3116 ipha, NULL, ira, ns); 3117 if (mp != NULL) { 3118 ip_mroute_decap(mp, ira); 3119 } /* Else we already freed everything! */ 3120 } else { 3121 ip_proto_not_sup(mp, ira); 3122 } 3123 return; 3124 } 3125 3126 /* 3127 * Handle fanout to raw sockets. There 3128 * can be more than one stream bound to a particular 3129 * protocol. When this is the case, each one gets a copy 3130 * of any incoming packets. 3131 */ 3132 ASSERT(ira->ira_protocol == ipha->ipha_protocol); 3133 ip_fanout_proto_v4(mp, ipha, ira); 3134 return; 3135 3136 discard: 3137 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3138 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3139 freemsg(mp); 3140 #undef rptr 3141 } 3142