1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 * 25 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 26 * Copyright 2019 Joyent, Inc. 27 * Copyright 2024 Oxide Computer Company 28 */ 29 /* Copyright (c) 1990 Mentat Inc. */ 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsubr.h> 37 #include <sys/strlog.h> 38 #include <sys/strsun.h> 39 #include <sys/zone.h> 40 #define _SUN_TPI_VERSION 2 41 #include <sys/tihdr.h> 42 #include <sys/xti_inet.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/cmn_err.h> 46 #include <sys/debug.h> 47 #include <sys/kobj.h> 48 #include <sys/modctl.h> 49 #include <sys/atomic.h> 50 #include <sys/policy.h> 51 #include <sys/priv.h> 52 53 #include <sys/systm.h> 54 #include <sys/param.h> 55 #include <sys/kmem.h> 56 #include <sys/sdt.h> 57 #include <sys/socket.h> 58 #include <sys/vtrace.h> 59 #include <sys/isa_defs.h> 60 #include <sys/mac.h> 61 #include <sys/mac_client.h> 62 #include <net/if.h> 63 #include <net/if_arp.h> 64 #include <net/route.h> 65 #include <sys/sockio.h> 66 #include <netinet/in.h> 67 #include <net/if_dl.h> 68 69 #include <inet/common.h> 70 #include <inet/mi.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 #include <inet/snmpcom.h> 75 #include <inet/kstatcom.h> 76 77 #include <netinet/igmp_var.h> 78 #include <netinet/ip6.h> 79 #include <netinet/icmp6.h> 80 #include <netinet/sctp.h> 81 82 #include <inet/ip.h> 83 #include <inet/ip_impl.h> 84 #include <inet/ip6.h> 85 #include <inet/ip6_asp.h> 86 #include <inet/optcom.h> 87 #include <inet/tcp.h> 88 #include <inet/tcp_impl.h> 89 #include <inet/ip_multi.h> 90 #include <inet/ip_if.h> 91 #include <inet/ip_ire.h> 92 #include <inet/ip_ftable.h> 93 #include <inet/ip_rts.h> 94 #include <inet/ip_ndp.h> 95 #include <inet/ip_listutils.h> 96 #include <netinet/igmp.h> 97 #include <netinet/ip_mroute.h> 98 #include <inet/ipp_common.h> 99 100 #include <net/pfkeyv2.h> 101 #include <inet/sadb.h> 102 #include <inet/ipsec_impl.h> 103 #include <inet/ipdrop.h> 104 #include <inet/ip_netinfo.h> 105 #include <inet/ilb_ip.h> 106 #include <sys/squeue_impl.h> 107 #include <sys/squeue.h> 108 109 #include <sys/ethernet.h> 110 #include <net/if_types.h> 111 #include <sys/cpuvar.h> 112 113 #include <ipp/ipp.h> 114 #include <ipp/ipp_impl.h> 115 #include <ipp/ipgpc/ipgpc.h> 116 117 #include <sys/pattr.h> 118 #include <inet/ipclassifier.h> 119 #include <inet/sctp_ip.h> 120 #include <inet/sctp/sctp_impl.h> 121 #include <inet/udp_impl.h> 122 #include <sys/sunddi.h> 123 124 #include <sys/tsol/label.h> 125 #include <sys/tsol/tnet.h> 126 127 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */ 128 129 #ifdef DEBUG 130 extern boolean_t skip_sctp_cksum; 131 #endif 132 133 static void ip_input_local_v4(ire_t *, mblk_t *, ipha_t *, 134 ip_recv_attr_t *); 135 136 static void ip_input_broadcast_v4(ire_t *, mblk_t *, ipha_t *, 137 ip_recv_attr_t *); 138 static void ip_input_multicast_v4(ire_t *, mblk_t *, ipha_t *, 139 ip_recv_attr_t *); 140 141 #pragma inline(ip_input_common_v4, ip_input_local_v4, ip_forward_xmit_v4) 142 143 /* 144 * Direct read side procedure capable of dealing with chains. GLDv3 based 145 * drivers call this function directly with mblk chains while STREAMS 146 * read side procedure ip_rput() calls this for single packet with ip_ring 147 * set to NULL to process one packet at a time. 148 * 149 * The ill will always be valid if this function is called directly from 150 * the driver. 151 * 152 * If this chain is part of a VLAN stream, then the VLAN tag is 153 * stripped from the MAC header before being delivered to this 154 * function. 155 * 156 * If the IP header in packet is not 32-bit aligned, every message in the 157 * chain will be aligned before further operations. This is required on SPARC 158 * platform. 159 */ 160 void 161 ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 162 struct mac_header_info_s *mhip) 163 { 164 (void) ip_input_common_v4(ill, ip_ring, mp_chain, mhip, NULL, NULL, 165 NULL); 166 } 167 168 /* 169 * ip_accept_tcp() - This function is called by the squeue when it retrieves 170 * a chain of packets in the poll mode. The packets have gone through the 171 * data link processing but not IP processing. For performance and latency 172 * reasons, the squeue wants to process the chain in line instead of feeding 173 * it back via ip_input path. 174 * 175 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v4 176 * will pass back any TCP packets matching the target sqp to 177 * ip_input_common_v4 using ira_target_sqp_mp. Other packets are handled by 178 * ip_input_v4 and ip_fanout_v4 as normal. 179 * The TCP packets that match the target squeue are returned to the caller 180 * as a b_next chain after each packet has been prepend with an mblk 181 * from ip_recv_attr_to_mblk. 182 */ 183 mblk_t * 184 ip_accept_tcp(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, 185 mblk_t *mp_chain, mblk_t **last, uint_t *cnt) 186 { 187 return (ip_input_common_v4(ill, ip_ring, mp_chain, NULL, target_sqp, 188 last, cnt)); 189 } 190 191 /* 192 * Used by ip_input and ip_accept_tcp 193 * The last three arguments are only used by ip_accept_tcp, and mhip is 194 * only used by ip_input. 195 */ 196 mblk_t * 197 ip_input_common_v4(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 198 struct mac_header_info_s *mhip, squeue_t *target_sqp, 199 mblk_t **last, uint_t *cnt) 200 { 201 mblk_t *mp; 202 ipha_t *ipha; 203 ip_recv_attr_t iras; /* Receive attributes */ 204 rtc_t rtc; 205 iaflags_t chain_flags = 0; /* Fixed for chain */ 206 mblk_t *ahead = NULL; /* Accepted head */ 207 mblk_t *atail = NULL; /* Accepted tail */ 208 uint_t acnt = 0; /* Accepted count */ 209 210 ASSERT(mp_chain != NULL); 211 ASSERT(ill != NULL); 212 213 /* These ones do not change as we loop over packets */ 214 iras.ira_ill = iras.ira_rill = ill; 215 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 216 iras.ira_rifindex = iras.ira_ruifindex; 217 iras.ira_sqp = NULL; 218 iras.ira_ring = ip_ring; 219 /* For ECMP and outbound transmit ring selection */ 220 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring); 221 222 iras.ira_target_sqp = target_sqp; 223 iras.ira_target_sqp_mp = NULL; 224 if (target_sqp != NULL) 225 chain_flags |= IRAF_TARGET_SQP; 226 227 /* 228 * We try to have a mhip pointer when possible, but 229 * it might be NULL in some cases. In those cases we 230 * have to assume unicast. 231 */ 232 iras.ira_mhip = mhip; 233 iras.ira_flags = 0; 234 if (mhip != NULL) { 235 switch (mhip->mhi_dsttype) { 236 case MAC_ADDRTYPE_MULTICAST : 237 chain_flags |= IRAF_L2DST_MULTICAST; 238 break; 239 case MAC_ADDRTYPE_BROADCAST : 240 chain_flags |= IRAF_L2DST_BROADCAST; 241 break; 242 } 243 } 244 245 /* 246 * Initialize the one-element route cache. 247 * 248 * We do ire caching from one iteration to 249 * another. In the event the packet chain contains 250 * all packets from the same dst, this caching saves 251 * an ire_route_recursive for each of the succeeding 252 * packets in a packet chain. 253 */ 254 rtc.rtc_ire = NULL; 255 rtc.rtc_ipaddr = INADDR_ANY; 256 257 /* Loop over b_next */ 258 for (mp = mp_chain; mp != NULL; mp = mp_chain) { 259 mp_chain = mp->b_next; 260 mp->b_next = NULL; 261 262 ASSERT(DB_TYPE(mp) == M_DATA); 263 264 265 /* 266 * if db_ref > 1 then copymsg and free original. Packet 267 * may be changed and we do not want the other entity 268 * who has a reference to this message to trip over the 269 * changes. This is a blind change because trying to 270 * catch all places that might change the packet is too 271 * difficult. 272 * 273 * This corresponds to the fast path case, where we have 274 * a chain of M_DATA mblks. We check the db_ref count 275 * of only the 1st data block in the mblk chain. There 276 * doesn't seem to be a reason why a device driver would 277 * send up data with varying db_ref counts in the mblk 278 * chain. In any case the Fast path is a private 279 * interface, and our drivers don't do such a thing. 280 * Given the above assumption, there is no need to walk 281 * down the entire mblk chain (which could have a 282 * potential performance problem) 283 * 284 * The "(DB_REF(mp) > 1)" check was moved from ip_rput() 285 * to here because of exclusive ip stacks and vnics. 286 * Packets transmitted from exclusive stack over vnic 287 * can have db_ref > 1 and when it gets looped back to 288 * another vnic in a different zone, you have ip_input() 289 * getting dblks with db_ref > 1. So if someone 290 * complains of TCP performance under this scenario, 291 * take a serious look here on the impact of copymsg(). 292 */ 293 if (DB_REF(mp) > 1) { 294 if ((mp = ip_fix_dbref(mp, &iras)) == NULL) { 295 /* mhip might point into 1st packet in chain */ 296 iras.ira_mhip = NULL; 297 continue; 298 } 299 } 300 301 /* 302 * IP header ptr not aligned? 303 * OR IP header not complete in first mblk 304 */ 305 ipha = (ipha_t *)mp->b_rptr; 306 if (!OK_32PTR(ipha) || MBLKL(mp) < IP_SIMPLE_HDR_LENGTH) { 307 mp = ip_check_and_align_header(mp, IP_SIMPLE_HDR_LENGTH, 308 &iras); 309 if (mp == NULL) { 310 /* mhip might point into 1st packet in chain */ 311 iras.ira_mhip = NULL; 312 continue; 313 } 314 ipha = (ipha_t *)mp->b_rptr; 315 } 316 317 /* Protect against a mix of Ethertypes and IP versions */ 318 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 319 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 320 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 321 freemsg(mp); 322 /* mhip might point into 1st packet in the chain. */ 323 iras.ira_mhip = NULL; 324 continue; 325 } 326 327 /* 328 * Check for Martian addrs; we have to explicitly 329 * test for for zero dst since this is also used as 330 * an indication that the rtc is not used. 331 */ 332 if (ipha->ipha_dst == INADDR_ANY) { 333 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 334 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 335 freemsg(mp); 336 /* mhip might point into 1st packet in the chain. */ 337 iras.ira_mhip = NULL; 338 continue; 339 } 340 341 /* 342 * Keep L2SRC from a previous packet in chain since mhip 343 * might point into an earlier packet in the chain. 344 * Keep IRAF_VERIFIED_SRC to avoid redoing broadcast 345 * source check in forwarding path. 346 */ 347 chain_flags |= (iras.ira_flags & 348 (IRAF_L2SRC_SET|IRAF_VERIFIED_SRC)); 349 350 iras.ira_flags = IRAF_IS_IPV4 | IRAF_VERIFY_IP_CKSUM | 351 IRAF_VERIFY_ULP_CKSUM | chain_flags; 352 iras.ira_free_flags = 0; 353 iras.ira_cred = NULL; 354 iras.ira_cpid = NOPID; 355 iras.ira_tsl = NULL; 356 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */ 357 358 /* 359 * We must count all incoming packets, even if they end 360 * up being dropped later on. Defer counting bytes until 361 * we have the whole IP header in first mblk. 362 */ 363 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 364 365 iras.ira_pktlen = ntohs(ipha->ipha_length); 366 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 367 iras.ira_pktlen); 368 iras.ira_ttl = ipha->ipha_ttl; 369 370 /* 371 * Call one of: 372 * ill_input_full_v4 373 * ill_input_short_v4 374 * The former is used in unusual cases. See ill_set_inputfn(). 375 */ 376 (*ill->ill_inputfn)(mp, ipha, &ipha->ipha_dst, &iras, &rtc); 377 378 /* Any references to clean up? No hold on ira_ill */ 379 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 380 ira_cleanup(&iras, B_FALSE); 381 382 if (iras.ira_target_sqp_mp != NULL) { 383 /* Better be called from ip_accept_tcp */ 384 ASSERT(target_sqp != NULL); 385 386 /* Found one packet to accept */ 387 mp = iras.ira_target_sqp_mp; 388 iras.ira_target_sqp_mp = NULL; 389 ASSERT(ip_recv_attr_is_mblk(mp)); 390 391 if (atail != NULL) 392 atail->b_next = mp; 393 else 394 ahead = mp; 395 atail = mp; 396 acnt++; 397 mp = NULL; 398 } 399 /* mhip might point into 1st packet in the chain. */ 400 iras.ira_mhip = NULL; 401 } 402 /* Any remaining references to the route cache? */ 403 if (rtc.rtc_ire != NULL) { 404 ASSERT(rtc.rtc_ipaddr != INADDR_ANY); 405 ire_refrele(rtc.rtc_ire); 406 } 407 408 if (ahead != NULL) { 409 /* Better be called from ip_accept_tcp */ 410 ASSERT(target_sqp != NULL); 411 *last = atail; 412 *cnt = acnt; 413 return (ahead); 414 } 415 416 return (NULL); 417 } 418 419 /* 420 * This input function is used when 421 * - is_system_labeled() 422 * - CGTP filtering 423 * - DHCP unicast before we have an IP address configured 424 * - there is an listener for IPPROTO_RSVP 425 */ 426 void 427 ill_input_full_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, 428 ip_recv_attr_t *ira, rtc_t *rtc) 429 { 430 ipha_t *ipha = (ipha_t *)iph_arg; 431 ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg; 432 ill_t *ill = ira->ira_ill; 433 ip_stack_t *ipst = ill->ill_ipst; 434 int cgtp_flt_pkt; 435 436 ASSERT(ira->ira_tsl == NULL); 437 438 /* 439 * Attach any necessary label information to 440 * this packet 441 */ 442 if (is_system_labeled()) { 443 ira->ira_flags |= IRAF_SYSTEM_LABELED; 444 445 /* 446 * This updates ira_cred, ira_tsl and ira_free_flags based 447 * on the label. 448 */ 449 if (!tsol_get_pkt_label(mp, IPV4_VERSION, ira)) { 450 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 451 ip_drop_input("ipIfStatsInDiscards", mp, ill); 452 freemsg(mp); 453 return; 454 } 455 /* Note that ira_tsl can be NULL here. */ 456 457 /* tsol_get_pkt_label sometimes does pullupmsg */ 458 ipha = (ipha_t *)mp->b_rptr; 459 } 460 461 /* 462 * Invoke the CGTP (multirouting) filtering module to process 463 * the incoming packet. Packets identified as duplicates 464 * must be discarded. Filtering is active only if the 465 * the ip_cgtp_filter ndd variable is non-zero. 466 */ 467 cgtp_flt_pkt = CGTP_IP_PKT_NOT_CGTP; 468 if (ipst->ips_ip_cgtp_filter && 469 ipst->ips_ip_cgtp_filter_ops != NULL) { 470 netstackid_t stackid; 471 472 stackid = ipst->ips_netstack->netstack_stackid; 473 /* 474 * CGTP and IPMP are mutually exclusive so 475 * phyint_ifindex is fine here. 476 */ 477 cgtp_flt_pkt = 478 ipst->ips_ip_cgtp_filter_ops->cfo_filter(stackid, 479 ill->ill_phyint->phyint_ifindex, mp); 480 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 481 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill); 482 freemsg(mp); 483 return; 484 } 485 } 486 487 /* 488 * Brutal hack for DHCPv4 unicast: RFC2131 allows a DHCP 489 * server to unicast DHCP packets to a DHCP client using the 490 * IP address it is offering to the client. This can be 491 * disabled through the "broadcast bit", but not all DHCP 492 * servers honor that bit. Therefore, to interoperate with as 493 * many DHCP servers as possible, the DHCP client allows the 494 * server to unicast, but we treat those packets as broadcast 495 * here. Note that we don't rewrite the packet itself since 496 * (a) that would mess up the checksums and (b) the DHCP 497 * client conn is bound to INADDR_ANY so ip_fanout_udp() will 498 * hand it the packet regardless. 499 */ 500 if (ill->ill_dhcpinit != 0 && 501 ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION && 502 ipha->ipha_protocol == IPPROTO_UDP) { 503 udpha_t *udpha; 504 505 ipha = ip_pullup(mp, sizeof (ipha_t) + sizeof (udpha_t), ira); 506 if (ipha == NULL) { 507 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 508 ip_drop_input("ipIfStatsInDiscards - dhcp", mp, ill); 509 freemsg(mp); 510 return; 511 } 512 /* Reload since pullupmsg() can change b_rptr. */ 513 udpha = (udpha_t *)&ipha[1]; 514 515 if (ntohs(udpha->uha_dst_port) == IPPORT_BOOTPC) { 516 DTRACE_PROBE2(ip4__dhcpinit__pkt, ill_t *, ill, 517 mblk_t *, mp); 518 /* 519 * This assumes that we deliver to all conns for 520 * multicast and broadcast packets. 521 */ 522 nexthop = INADDR_BROADCAST; 523 ira->ira_flags |= IRAF_DHCP_UNICAST; 524 } 525 } 526 527 /* 528 * If rsvpd is running, let RSVP daemon handle its processing 529 * and forwarding of RSVP multicast/unicast packets. 530 * If rsvpd is not running but mrouted is running, RSVP 531 * multicast packets are forwarded as multicast traffic 532 * and RSVP unicast packets are forwarded by unicast router. 533 * If neither rsvpd nor mrouted is running, RSVP multicast 534 * packets are not forwarded, but the unicast packets are 535 * forwarded like unicast traffic. 536 */ 537 if (ipha->ipha_protocol == IPPROTO_RSVP && 538 ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) { 539 /* RSVP packet and rsvpd running. Treat as ours */ 540 ip2dbg(("ip_input: RSVP for us: 0x%x\n", ntohl(nexthop))); 541 /* 542 * We use a multicast address to get the packet to 543 * ire_recv_multicast_v4. There will not be a membership 544 * check since we set IRAF_RSVP 545 */ 546 nexthop = htonl(INADDR_UNSPEC_GROUP); 547 ira->ira_flags |= IRAF_RSVP; 548 } 549 550 ill_input_short_v4(mp, ipha, &nexthop, ira, rtc); 551 } 552 553 /* 554 * This is the tail-end of the full receive side packet handling. 555 * It can be used directly when the configuration is simple. 556 */ 557 void 558 ill_input_short_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, 559 ip_recv_attr_t *ira, rtc_t *rtc) 560 { 561 ire_t *ire; 562 uint_t opt_len; 563 ill_t *ill = ira->ira_ill; 564 ip_stack_t *ipst = ill->ill_ipst; 565 uint_t pkt_len; 566 ssize_t len; 567 ipha_t *ipha = (ipha_t *)iph_arg; 568 ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg; 569 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; 570 uint_t irr_flags; 571 #define rptr ((uchar_t *)ipha) 572 573 ASSERT(DB_TYPE(mp) == M_DATA); 574 575 /* 576 * The following test for loopback is faster than 577 * IP_LOOPBACK_ADDR(), because it avoids any bitwise 578 * operations. 579 * Note that these addresses are always in network byte order 580 */ 581 if (((*(uchar_t *)&ipha->ipha_dst) == IN_LOOPBACKNET) || 582 ((*(uchar_t *)&ipha->ipha_src) == IN_LOOPBACKNET)) { 583 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 584 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 585 freemsg(mp); 586 return; 587 } 588 589 len = mp->b_wptr - rptr; 590 pkt_len = ira->ira_pktlen; 591 592 /* multiple mblk or too short */ 593 len -= pkt_len; 594 if (len != 0) { 595 mp = ip_check_length(mp, rptr, len, pkt_len, 596 IP_SIMPLE_HDR_LENGTH, ira); 597 if (mp == NULL) 598 return; 599 ipha = (ipha_t *)mp->b_rptr; 600 } 601 602 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 603 ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL, 604 int, 0); 605 606 /* 607 * The event for packets being received from a 'physical' 608 * interface is placed after validation of the source and/or 609 * destination address as being local so that packets can be 610 * redirected to loopback addresses using ipnat. 611 */ 612 DTRACE_PROBE4(ip4__physical__in__start, 613 ill_t *, ill, ill_t *, NULL, 614 ipha_t *, ipha, mblk_t *, mp); 615 616 if (HOOKS4_INTERESTED_PHYSICAL_IN(ipst)) { 617 int ll_multicast = 0; 618 int error; 619 ipaddr_t orig_dst = ipha->ipha_dst; 620 621 if (ira->ira_flags & IRAF_L2DST_MULTICAST) 622 ll_multicast = HPE_MULTICAST; 623 else if (ira->ira_flags & IRAF_L2DST_BROADCAST) 624 ll_multicast = HPE_BROADCAST; 625 626 FW_HOOKS(ipst->ips_ip4_physical_in_event, 627 ipst->ips_ipv4firewall_physical_in, 628 ill, NULL, ipha, mp, mp, ll_multicast, ipst, error); 629 630 DTRACE_PROBE1(ip4__physical__in__end, mblk_t *, mp); 631 632 if (mp == NULL) 633 return; 634 /* The length could have changed */ 635 ipha = (ipha_t *)mp->b_rptr; 636 ira->ira_pktlen = ntohs(ipha->ipha_length); 637 pkt_len = ira->ira_pktlen; 638 639 /* 640 * In case the destination changed we override any previous 641 * change to nexthop. 642 */ 643 if (orig_dst != ipha->ipha_dst) 644 nexthop = ipha->ipha_dst; 645 if (nexthop == INADDR_ANY) { 646 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 647 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 648 freemsg(mp); 649 return; 650 } 651 } 652 653 if (ipst->ips_ip4_observe.he_interested) { 654 zoneid_t dzone; 655 656 /* 657 * On the inbound path the src zone will be unknown as 658 * this packet has come from the wire. 659 */ 660 dzone = ip_get_zoneid_v4(nexthop, mp, ira, ALL_ZONES); 661 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst); 662 } 663 664 /* 665 * If the packet originated from a same-machine sender or 666 * there is a good HW IP header checksum, we clear the need 667 * look at the IP header checksum. 668 */ 669 if (((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) && 670 ILL_HCKSUM_CAPABLE(ill) && dohwcksum)) { 671 /* Header checksum was ok. Clear the flag */ 672 DB_CKSUMFLAGS(mp) &= ~HCK_IPV4_HDRCKSUM; 673 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 674 } 675 676 /* 677 * Here we check to see if we machine is setup as 678 * L3 loadbalancer and if the incoming packet is for a VIP 679 * 680 * Check the following: 681 * - there is at least a rule 682 * - protocol of the packet is supported 683 */ 684 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ipha->ipha_protocol)) { 685 ipaddr_t lb_dst; 686 int lb_ret; 687 688 /* For convenience, we pull up the mblk. */ 689 if (mp->b_cont != NULL) { 690 if (pullupmsg(mp, -1) == 0) { 691 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 692 ip_drop_input("ipIfStatsInDiscards - pullupmsg", 693 mp, ill); 694 freemsg(mp); 695 return; 696 } 697 ipha = (ipha_t *)mp->b_rptr; 698 } 699 700 /* 701 * We just drop all fragments going to any VIP, at 702 * least for now.... 703 */ 704 if (ntohs(ipha->ipha_fragment_offset_and_flags) & 705 (IPH_MF | IPH_OFFSET)) { 706 if (!ilb_rule_match_vip_v4(ilbs, nexthop, NULL)) { 707 goto after_ilb; 708 } 709 710 ILB_KSTAT_UPDATE(ilbs, ip_frag_in, 1); 711 ILB_KSTAT_UPDATE(ilbs, ip_frag_dropped, 1); 712 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 713 ip_drop_input("ILB fragment", mp, ill); 714 freemsg(mp); 715 return; 716 } 717 lb_ret = ilb_check_v4(ilbs, ill, mp, ipha, ipha->ipha_protocol, 718 (uint8_t *)ipha + IPH_HDR_LENGTH(ipha), &lb_dst); 719 720 if (lb_ret == ILB_DROPPED) { 721 /* Is this the right counter to increase? */ 722 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 723 ip_drop_input("ILB_DROPPED", mp, ill); 724 freemsg(mp); 725 return; 726 } 727 if (lb_ret == ILB_BALANCED) { 728 /* Set the dst to that of the chosen server */ 729 nexthop = lb_dst; 730 DB_CKSUMFLAGS(mp) = 0; 731 } 732 } 733 734 after_ilb: 735 opt_len = ipha->ipha_version_and_hdr_length - IP_SIMPLE_HDR_VERSION; 736 ira->ira_ip_hdr_length = IP_SIMPLE_HDR_LENGTH; 737 if (opt_len != 0) { 738 int error = 0; 739 740 ira->ira_ip_hdr_length += (opt_len << 2); 741 ira->ira_flags |= IRAF_IPV4_OPTIONS; 742 743 /* IP Options present! Validate the length. */ 744 mp = ip_check_optlen(mp, ipha, opt_len, pkt_len, ira); 745 if (mp == NULL) 746 return; 747 748 /* Might have changed */ 749 ipha = (ipha_t *)mp->b_rptr; 750 751 /* Verify IP header checksum before parsing the options */ 752 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && 753 ip_csum_hdr(ipha)) { 754 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 755 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 756 freemsg(mp); 757 return; 758 } 759 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 760 761 /* 762 * Go off to ip_input_options which returns the next hop 763 * destination address, which may have been affected 764 * by source routing. 765 */ 766 IP_STAT(ipst, ip_opt); 767 768 nexthop = ip_input_options(ipha, nexthop, mp, ira, &error); 769 if (error != 0) { 770 /* 771 * An ICMP error has been sent and the packet has 772 * been dropped. 773 */ 774 return; 775 } 776 } 777 778 if (ill->ill_flags & ILLF_ROUTER) 779 irr_flags = IRR_ALLOCATE; 780 else 781 irr_flags = IRR_NONE; 782 783 /* Can not use route cache with TX since the labels can differ */ 784 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 785 if (CLASSD(nexthop)) { 786 ire = ire_multicast(ill); 787 } else { 788 /* Match destination and label */ 789 ire = ire_route_recursive_v4(nexthop, 0, NULL, 790 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR, 791 irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, 792 NULL); 793 } 794 /* Update the route cache so we do the ire_refrele */ 795 ASSERT(ire != NULL); 796 if (rtc->rtc_ire != NULL) 797 ire_refrele(rtc->rtc_ire); 798 rtc->rtc_ire = ire; 799 rtc->rtc_ipaddr = nexthop; 800 } else if (nexthop == rtc->rtc_ipaddr && rtc->rtc_ire != NULL) { 801 /* Use the route cache */ 802 ire = rtc->rtc_ire; 803 } else { 804 /* Update the route cache */ 805 if (CLASSD(nexthop)) { 806 ire = ire_multicast(ill); 807 } else { 808 /* Just match the destination */ 809 ire = ire_route_recursive_dstonly_v4(nexthop, irr_flags, 810 ira->ira_xmit_hint, ipst); 811 } 812 ASSERT(ire != NULL); 813 if (rtc->rtc_ire != NULL) 814 ire_refrele(rtc->rtc_ire); 815 rtc->rtc_ire = ire; 816 rtc->rtc_ipaddr = nexthop; 817 } 818 819 ire->ire_ib_pkt_count++; 820 821 /* 822 * Based on ire_type and ire_flags call one of: 823 * ire_recv_local_v4 - for IRE_LOCAL 824 * ire_recv_loopback_v4 - for IRE_LOOPBACK 825 * ire_recv_multirt_v4 - if RTF_MULTIRT 826 * ire_recv_noroute_v4 - if RTF_REJECT or RTF_BLACHOLE 827 * ire_recv_multicast_v4 - for IRE_MULTICAST 828 * ire_recv_broadcast_v4 - for IRE_BROADCAST 829 * ire_recv_noaccept_v4 - for ire_noaccept ones 830 * ire_recv_forward_v4 - for the rest. 831 */ 832 (*ire->ire_recvfn)(ire, mp, ipha, ira); 833 } 834 #undef rptr 835 836 /* 837 * ire_recvfn for IREs that need forwarding 838 */ 839 void 840 ire_recv_forward_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 841 { 842 ipha_t *ipha = (ipha_t *)iph_arg; 843 ill_t *ill = ira->ira_ill; 844 ip_stack_t *ipst = ill->ill_ipst; 845 ill_t *dst_ill; 846 nce_t *nce; 847 ipaddr_t src = ipha->ipha_src; 848 uint32_t added_tx_len; 849 uint32_t mtu, iremtu; 850 851 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 852 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 853 ip_drop_input("l2 multicast not forwarded", mp, ill); 854 freemsg(mp); 855 return; 856 } 857 858 if (!(ill->ill_flags & ILLF_ROUTER) && !ip_source_routed(ipha, ipst)) { 859 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 860 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 861 freemsg(mp); 862 return; 863 } 864 865 /* 866 * Either ire_nce_capable or ire_dep_parent would be set for the IRE 867 * when it is found by ire_route_recursive, but that some other thread 868 * could have changed the routes with the effect of clearing 869 * ire_dep_parent. In that case we'd end up dropping the packet, or 870 * finding a new nce below. 871 * Get, allocate, or update the nce. 872 * We get a refhold on ire_nce_cache as a result of this to avoid races 873 * where ire_nce_cache is deleted. 874 * 875 * This ensures that we don't forward if the interface is down since 876 * ipif_down removes all the nces. 877 */ 878 mutex_enter(&ire->ire_lock); 879 nce = ire->ire_nce_cache; 880 if (nce == NULL) { 881 /* Not yet set up - try to set one up */ 882 mutex_exit(&ire->ire_lock); 883 (void) ire_revalidate_nce(ire); 884 mutex_enter(&ire->ire_lock); 885 nce = ire->ire_nce_cache; 886 if (nce == NULL) { 887 mutex_exit(&ire->ire_lock); 888 /* The ire_dep_parent chain went bad, or no memory */ 889 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 890 ip_drop_input("No ire_dep_parent", mp, ill); 891 freemsg(mp); 892 return; 893 } 894 } 895 nce_refhold(nce); 896 mutex_exit(&ire->ire_lock); 897 898 if (nce->nce_is_condemned) { 899 nce_t *nce1; 900 901 nce1 = ire_handle_condemned_nce(nce, ire, ipha, NULL, B_FALSE); 902 nce_refrele(nce); 903 if (nce1 == NULL) { 904 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 905 ip_drop_input("No nce", mp, ill); 906 freemsg(mp); 907 return; 908 } 909 nce = nce1; 910 } 911 dst_ill = nce->nce_ill; 912 913 /* 914 * Unless we are forwarding, drop the packet. 915 * We have to let source routed packets through if they go out 916 * the same interface i.e., they are 'ping -l' packets. 917 */ 918 if (!(dst_ill->ill_flags & ILLF_ROUTER) && 919 !(ip_source_routed(ipha, ipst) && dst_ill == ill)) { 920 if (ip_source_routed(ipha, ipst)) { 921 ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", mp, ill); 922 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira); 923 nce_refrele(nce); 924 return; 925 } 926 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 927 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 928 freemsg(mp); 929 nce_refrele(nce); 930 return; 931 } 932 933 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) { 934 ipaddr_t dst = ipha->ipha_dst; 935 936 ire->ire_ib_pkt_count--; 937 /* 938 * Should only use IREs that are visible from the 939 * global zone for forwarding. 940 * Take a source route into account the same way as ip_input 941 * did. 942 */ 943 if (ira->ira_flags & IRAF_IPV4_OPTIONS) { 944 int error = 0; 945 946 dst = ip_input_options(ipha, dst, mp, ira, &error); 947 ASSERT(error == 0); /* ip_input checked */ 948 } 949 ire = ire_route_recursive_v4(dst, 0, NULL, GLOBAL_ZONEID, 950 ira->ira_tsl, MATCH_IRE_SECATTR, 951 (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE, 952 ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 953 ire->ire_ib_pkt_count++; 954 (*ire->ire_recvfn)(ire, mp, ipha, ira); 955 ire_refrele(ire); 956 nce_refrele(nce); 957 return; 958 } 959 960 /* 961 * ipIfStatsHCInForwDatagrams should only be increment if there 962 * will be an attempt to forward the packet, which is why we 963 * increment after the above condition has been checked. 964 */ 965 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 966 967 /* Initiate Read side IPPF processing */ 968 if (IPP_ENABLED(IPP_FWD_IN, ipst)) { 969 /* ip_process translates an IS_UNDER_IPMP */ 970 mp = ip_process(IPP_FWD_IN, mp, ill, ill); 971 if (mp == NULL) { 972 /* ip_drop_packet and MIB done */ 973 ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred " 974 "during IPPF processing\n")); 975 nce_refrele(nce); 976 return; 977 } 978 } 979 980 DTRACE_PROBE4(ip4__forwarding__start, 981 ill_t *, ill, ill_t *, dst_ill, ipha_t *, ipha, mblk_t *, mp); 982 983 if (HOOKS4_INTERESTED_FORWARDING(ipst)) { 984 int error; 985 986 FW_HOOKS(ipst->ips_ip4_forwarding_event, 987 ipst->ips_ipv4firewall_forwarding, 988 ill, dst_ill, ipha, mp, mp, 0, ipst, error); 989 990 DTRACE_PROBE1(ip4__forwarding__end, mblk_t *, mp); 991 992 if (mp == NULL) { 993 nce_refrele(nce); 994 return; 995 } 996 /* 997 * Even if the destination was changed by the filter we use the 998 * forwarding decision that was made based on the address 999 * in ip_input. 1000 */ 1001 1002 /* Might have changed */ 1003 ipha = (ipha_t *)mp->b_rptr; 1004 ira->ira_pktlen = ntohs(ipha->ipha_length); 1005 } 1006 1007 /* Packet is being forwarded. Turning off hwcksum flag. */ 1008 DB_CKSUMFLAGS(mp) = 0; 1009 1010 /* 1011 * Martian Address Filtering [RFC 1812, Section 5.3.7] 1012 * The loopback address check for both src and dst has already 1013 * been checked in ip_input 1014 * In the future one can envision adding RPF checks using number 3. 1015 * If we already checked the same source address we can skip this. 1016 */ 1017 if (!(ira->ira_flags & IRAF_VERIFIED_SRC) || 1018 src != ira->ira_verified_src) { 1019 switch (ipst->ips_src_check) { 1020 case 0: 1021 break; 1022 case 2: 1023 if (ip_type_v4(src, ipst) == IRE_BROADCAST) { 1024 BUMP_MIB(ill->ill_ip_mib, 1025 ipIfStatsForwProhibits); 1026 BUMP_MIB(ill->ill_ip_mib, 1027 ipIfStatsInAddrErrors); 1028 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1029 freemsg(mp); 1030 nce_refrele(nce); 1031 return; 1032 } 1033 /* FALLTHRU */ 1034 1035 case 1: 1036 if (CLASSD(src)) { 1037 BUMP_MIB(ill->ill_ip_mib, 1038 ipIfStatsForwProhibits); 1039 BUMP_MIB(ill->ill_ip_mib, 1040 ipIfStatsInAddrErrors); 1041 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1042 freemsg(mp); 1043 nce_refrele(nce); 1044 return; 1045 } 1046 break; 1047 } 1048 /* Remember for next packet */ 1049 ira->ira_flags |= IRAF_VERIFIED_SRC; 1050 ira->ira_verified_src = src; 1051 } 1052 1053 /* 1054 * Check if packet is going out the same link on which it arrived. 1055 * Means we might need to send a redirect. 1056 */ 1057 if (IS_ON_SAME_LAN(dst_ill, ill) && ipst->ips_ip_g_send_redirects) { 1058 ip_send_potential_redirect_v4(mp, ipha, ire, ira); 1059 } 1060 1061 added_tx_len = 0; 1062 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 1063 mblk_t *mp1; 1064 uint32_t old_pkt_len = ira->ira_pktlen; 1065 1066 /* Verify IP header checksum before adding/removing options */ 1067 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && 1068 ip_csum_hdr(ipha)) { 1069 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1070 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1071 freemsg(mp); 1072 nce_refrele(nce); 1073 return; 1074 } 1075 ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 1076 1077 /* 1078 * Check if it can be forwarded and add/remove 1079 * CIPSO options as needed. 1080 */ 1081 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) { 1082 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1083 ip_drop_input("tsol_ip_forward", mp, ill); 1084 freemsg(mp); 1085 nce_refrele(nce); 1086 return; 1087 } 1088 /* 1089 * Size may have changed. Remember amount added in case 1090 * IP needs to send an ICMP too big. 1091 */ 1092 mp = mp1; 1093 ipha = (ipha_t *)mp->b_rptr; 1094 ira->ira_pktlen = ntohs(ipha->ipha_length); 1095 ira->ira_ip_hdr_length = IPH_HDR_LENGTH(ipha); 1096 if (ira->ira_pktlen > old_pkt_len) 1097 added_tx_len = ira->ira_pktlen - old_pkt_len; 1098 1099 /* Options can have been added or removed */ 1100 if (ira->ira_ip_hdr_length != IP_SIMPLE_HDR_LENGTH) 1101 ira->ira_flags |= IRAF_IPV4_OPTIONS; 1102 else 1103 ira->ira_flags &= ~IRAF_IPV4_OPTIONS; 1104 } 1105 1106 mtu = dst_ill->ill_mtu; 1107 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu) 1108 mtu = iremtu; 1109 ip_forward_xmit_v4(nce, ill, mp, ipha, ira, mtu, added_tx_len); 1110 nce_refrele(nce); 1111 } 1112 1113 /* 1114 * Used for sending out unicast and multicast packets that are 1115 * forwarded. 1116 */ 1117 void 1118 ip_forward_xmit_v4(nce_t *nce, ill_t *ill, mblk_t *mp, ipha_t *ipha, 1119 ip_recv_attr_t *ira, uint32_t mtu, uint32_t added_tx_len) 1120 { 1121 ill_t *dst_ill = nce->nce_ill; 1122 uint32_t pkt_len; 1123 uint32_t sum; 1124 iaflags_t iraflags = ira->ira_flags; 1125 ip_stack_t *ipst = ill->ill_ipst; 1126 iaflags_t ixaflags; 1127 1128 if (ipha->ipha_ttl <= 1) { 1129 /* Perhaps the checksum was bad */ 1130 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1131 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1132 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1133 freemsg(mp); 1134 return; 1135 } 1136 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1137 ip_drop_input("ICMP_TTL_EXCEEDED", mp, ill); 1138 icmp_time_exceeded(mp, ICMP_TTL_EXCEEDED, ira); 1139 return; 1140 } 1141 1142 /* 1143 * Count the forward as a hop and update the checksum 1144 * accordingly. 1145 */ 1146 ipha->ipha_ttl--; 1147 sum = (int)ipha->ipha_hdr_checksum + IP_HDR_CSUM_TTL_ADJUST; 1148 ipha->ipha_hdr_checksum = (uint16_t)(sum + (sum >> 16)); 1149 1150 /* Check if there are options to update */ 1151 if (iraflags & IRAF_IPV4_OPTIONS) { 1152 ASSERT(ipha->ipha_version_and_hdr_length != 1153 IP_SIMPLE_HDR_VERSION); 1154 ASSERT(!(iraflags & IRAF_VERIFY_IP_CKSUM)); 1155 1156 if (!ip_forward_options(mp, ipha, dst_ill, ira)) { 1157 /* ipIfStatsForwProhibits and ip_drop_input done */ 1158 return; 1159 } 1160 1161 ipha->ipha_hdr_checksum = 0; 1162 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1163 } 1164 1165 /* Initiate Write side IPPF processing before any fragmentation */ 1166 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { 1167 /* ip_process translates an IS_UNDER_IPMP */ 1168 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill); 1169 if (mp == NULL) { 1170 /* ip_drop_packet and MIB done */ 1171 ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred" \ 1172 " during IPPF processing\n")); 1173 return; 1174 } 1175 } 1176 1177 pkt_len = ira->ira_pktlen; 1178 1179 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 1180 1181 ixaflags = IXAF_IS_IPV4 | IXAF_NO_DEV_FLOW_CTL; 1182 1183 if (pkt_len > mtu) { 1184 /* 1185 * It needs fragging on its way out. If we haven't 1186 * verified the header checksum yet we do it now since 1187 * are going to put a surely good checksum in the 1188 * outgoing header, we have to make sure that it 1189 * was good coming in. 1190 */ 1191 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1192 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1193 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1194 freemsg(mp); 1195 return; 1196 } 1197 if (ipha->ipha_fragment_offset_and_flags & IPH_DF_HTONS) { 1198 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails); 1199 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill); 1200 if (iraflags & IRAF_SYSTEM_LABELED) { 1201 /* 1202 * Remove any CIPSO option added by 1203 * tsol_ip_forward, and make sure we report 1204 * a path MTU so that there 1205 * is room to add such a CIPSO option for future 1206 * packets. 1207 */ 1208 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, 1209 AF_INET); 1210 } 1211 1212 icmp_frag_needed(mp, mtu, ira); 1213 return; 1214 } 1215 1216 (void) ip_fragment_v4(mp, nce, ixaflags, pkt_len, mtu, 1217 ira->ira_xmit_hint, GLOBAL_ZONEID, 0, ip_xmit, NULL); 1218 return; 1219 } 1220 1221 ASSERT(pkt_len == ntohs(((ipha_t *)mp->b_rptr)->ipha_length)); 1222 if (iraflags & IRAF_LOOPBACK_COPY) { 1223 /* 1224 * IXAF_NO_LOOP_ZONEID is not set hence 7th arg 1225 * is don't care 1226 */ 1227 (void) ip_postfrag_loopcheck(mp, nce, 1228 ixaflags | IXAF_LOOPBACK_COPY, 1229 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1230 } else { 1231 (void) ip_xmit(mp, nce, ixaflags, pkt_len, ira->ira_xmit_hint, 1232 GLOBAL_ZONEID, 0, NULL); 1233 } 1234 } 1235 1236 /* 1237 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE, 1238 * which is what ire_route_recursive returns when there is no matching ire. 1239 * Send ICMP unreachable unless blackhole. 1240 */ 1241 void 1242 ire_recv_noroute_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1243 { 1244 ipha_t *ipha = (ipha_t *)iph_arg; 1245 ill_t *ill = ira->ira_ill; 1246 ip_stack_t *ipst = ill->ill_ipst; 1247 1248 /* Would we have forwarded this packet if we had a route? */ 1249 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 1250 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1251 ip_drop_input("l2 multicast not forwarded", mp, ill); 1252 freemsg(mp); 1253 return; 1254 } 1255 1256 if (!(ill->ill_flags & ILLF_ROUTER)) { 1257 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1258 ip_drop_input("ipIfStatsForwProhibits", mp, ill); 1259 freemsg(mp); 1260 return; 1261 } 1262 /* 1263 * If we had a route this could have been forwarded. Count as such. 1264 * 1265 * ipIfStatsHCInForwDatagrams should only be increment if there 1266 * will be an attempt to forward the packet, which is why we 1267 * increment after the above condition has been checked. 1268 */ 1269 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 1270 1271 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1272 1273 ip_rts_change(RTM_MISS, ipha->ipha_dst, 0, 0, 0, 0, 0, 0, RTA_DST, 1274 ipst); 1275 1276 if (ire->ire_flags & RTF_BLACKHOLE) { 1277 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill); 1278 freemsg(mp); 1279 } else { 1280 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill); 1281 1282 if (ip_source_routed(ipha, ipst)) { 1283 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira); 1284 } else { 1285 icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, ira); 1286 } 1287 } 1288 } 1289 1290 /* 1291 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for 1292 * VRRP when in noaccept mode. 1293 * We silently drop the packet. ARP handles packets even if noaccept is set. 1294 */ 1295 /* ARGSUSED */ 1296 void 1297 ire_recv_noaccept_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1298 ip_recv_attr_t *ira) 1299 { 1300 ill_t *ill = ira->ira_ill; 1301 1302 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1303 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1304 freemsg(mp); 1305 } 1306 1307 /* 1308 * ire_recvfn for IRE_BROADCAST. 1309 */ 1310 void 1311 ire_recv_broadcast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1312 ip_recv_attr_t *ira) 1313 { 1314 ipha_t *ipha = (ipha_t *)iph_arg; 1315 ill_t *ill = ira->ira_ill; 1316 ill_t *dst_ill = ire->ire_ill; 1317 ip_stack_t *ipst = ill->ill_ipst; 1318 ire_t *alt_ire; 1319 nce_t *nce; 1320 ipaddr_t ipha_dst; 1321 1322 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInBcastPkts); 1323 1324 /* Tag for higher-level protocols */ 1325 ira->ira_flags |= IRAF_BROADCAST; 1326 1327 /* 1328 * Whether local or directed broadcast forwarding: don't allow 1329 * for TCP. 1330 */ 1331 if (ipha->ipha_protocol == IPPROTO_TCP) { 1332 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1333 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1334 freemsg(mp); 1335 return; 1336 } 1337 1338 /* 1339 * So that we don't end up with dups, only one ill an IPMP group is 1340 * nominated to receive broadcast traffic. 1341 * If we have no cast_ill we are liberal and accept everything. 1342 */ 1343 if (IS_UNDER_IPMP(ill)) { 1344 /* For an under ill_grp can change under lock */ 1345 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1346 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1347 ill->ill_grp->ig_cast_ill != NULL) { 1348 rw_exit(&ipst->ips_ill_g_lock); 1349 /* No MIB since this is normal operation */ 1350 ip_drop_input("not nom_cast", mp, ill); 1351 freemsg(mp); 1352 return; 1353 } 1354 rw_exit(&ipst->ips_ill_g_lock); 1355 1356 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1357 } 1358 1359 /* 1360 * After reassembly and IPsec we will need to duplicate the 1361 * broadcast packet for all matching zones on the ill. 1362 */ 1363 ira->ira_zoneid = ALL_ZONES; 1364 1365 /* 1366 * Check for directed broadcast i.e. ire->ire_ill is different than 1367 * the incoming ill. 1368 * The same broadcast address can be assigned to multiple interfaces 1369 * so have to check explicitly for that case by looking up the alt_ire 1370 */ 1371 if (dst_ill == ill && !(ire->ire_flags & RTF_MULTIRT)) { 1372 /* Reassemble on the ill on which the packet arrived */ 1373 ip_input_local_v4(ire, mp, ipha, ira); 1374 /* Restore */ 1375 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1376 return; 1377 } 1378 1379 /* Is there an IRE_BROADCAST on the incoming ill? */ 1380 ipha_dst = ((ira->ira_flags & IRAF_DHCP_UNICAST) ? INADDR_BROADCAST : 1381 ipha->ipha_dst); 1382 alt_ire = ire_ftable_lookup_v4(ipha_dst, 0, 0, IRE_BROADCAST, ill, 1383 ALL_ZONES, ira->ira_tsl, 1384 MATCH_IRE_TYPE|MATCH_IRE_ILL|MATCH_IRE_SECATTR, 0, ipst, NULL); 1385 if (alt_ire != NULL) { 1386 /* Not a directed broadcast */ 1387 /* 1388 * In the special case of multirouted broadcast 1389 * packets, we unconditionally need to "gateway" 1390 * them to the appropriate interface here so that reassembly 1391 * works. We know that the IRE_BROADCAST on cgtp0 doesn't 1392 * have RTF_MULTIRT set so we look for such an IRE in the 1393 * bucket. 1394 */ 1395 if (alt_ire->ire_flags & RTF_MULTIRT) { 1396 irb_t *irb; 1397 ire_t *ire1; 1398 1399 irb = ire->ire_bucket; 1400 irb_refhold(irb); 1401 for (ire1 = irb->irb_ire; ire1 != NULL; 1402 ire1 = ire1->ire_next) { 1403 if (IRE_IS_CONDEMNED(ire1)) 1404 continue; 1405 if (!(ire1->ire_type & IRE_BROADCAST) || 1406 (ire1->ire_flags & RTF_MULTIRT)) 1407 continue; 1408 ill = ire1->ire_ill; 1409 ill_refhold(ill); 1410 break; 1411 } 1412 irb_refrele(irb); 1413 if (ire1 != NULL) { 1414 ill_t *orig_ill = ira->ira_ill; 1415 1416 ire_refrele(alt_ire); 1417 /* Reassemble on the new ill */ 1418 ira->ira_ill = ill; 1419 ip_input_local_v4(ire, mp, ipha, ira); 1420 ill_refrele(ill); 1421 /* Restore */ 1422 ira->ira_ill = orig_ill; 1423 ira->ira_ruifindex = 1424 orig_ill->ill_phyint->phyint_ifindex; 1425 return; 1426 } 1427 } 1428 ire_refrele(alt_ire); 1429 /* Reassemble on the ill on which the packet arrived */ 1430 ip_input_local_v4(ire, mp, ipha, ira); 1431 goto done; 1432 } 1433 1434 /* 1435 * This is a directed broadcast 1436 * 1437 * If directed broadcast is allowed, then forward the packet out 1438 * the destination interface with IXAF_LOOPBACK_COPY set. That will 1439 * result in ip_input() receiving a copy of the packet on the 1440 * appropriate ill. (We could optimize this to avoid the extra trip 1441 * via ip_input(), but since directed broadcasts are normally disabled 1442 * it doesn't make sense to optimize it.) 1443 */ 1444 if (!ipst->ips_ip_g_forward_directed_bcast || 1445 (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST))) { 1446 ip_drop_input("directed broadcast not allowed", mp, ill); 1447 freemsg(mp); 1448 goto done; 1449 } 1450 if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1451 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1452 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1453 freemsg(mp); 1454 goto done; 1455 } 1456 1457 /* 1458 * Clear the indication that this may have hardware 1459 * checksum as we are not using it for forwarding. 1460 */ 1461 DB_CKSUMFLAGS(mp) = 0; 1462 1463 /* 1464 * Adjust ttl to 2 (1+1 - the forward engine will decrement it by one. 1465 */ 1466 ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl + 1; 1467 ipha->ipha_hdr_checksum = 0; 1468 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1469 1470 /* 1471 * We use ip_forward_xmit to do any fragmentation. 1472 * and loopback copy on the outbound interface. 1473 * 1474 * Make it so that IXAF_LOOPBACK_COPY to be set on transmit side. 1475 */ 1476 ira->ira_flags |= IRAF_LOOPBACK_COPY; 1477 1478 nce = arp_nce_init(dst_ill, ipha->ipha_dst, IRE_BROADCAST); 1479 if (nce == NULL) { 1480 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutDiscards); 1481 ip_drop_output("No nce", mp, dst_ill); 1482 freemsg(mp); 1483 goto done; 1484 } 1485 1486 ip_forward_xmit_v4(nce, ill, mp, ipha, ira, dst_ill->ill_mc_mtu, 0); 1487 nce_refrele(nce); 1488 done: 1489 /* Restore */ 1490 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1491 } 1492 1493 /* 1494 * ire_recvfn for IRE_MULTICAST. 1495 */ 1496 void 1497 ire_recv_multicast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1498 ip_recv_attr_t *ira) 1499 { 1500 ipha_t *ipha = (ipha_t *)iph_arg; 1501 ill_t *ill = ira->ira_ill; 1502 ip_stack_t *ipst = ill->ill_ipst; 1503 1504 ASSERT(ire->ire_ill == ira->ira_ill); 1505 1506 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 1507 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen); 1508 1509 /* RSVP hook */ 1510 if (ira->ira_flags & IRAF_RSVP) 1511 goto forus; 1512 1513 /* Tag for higher-level protocols */ 1514 ira->ira_flags |= IRAF_MULTICAST; 1515 1516 /* 1517 * So that we don't end up with dups, only one ill an IPMP group is 1518 * nominated to receive multicast traffic. 1519 * If we have no cast_ill we are liberal and accept everything. 1520 */ 1521 if (IS_UNDER_IPMP(ill)) { 1522 ip_stack_t *ipst = ill->ill_ipst; 1523 1524 /* For an under ill_grp can change under lock */ 1525 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1526 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1527 ill->ill_grp->ig_cast_ill != NULL) { 1528 rw_exit(&ipst->ips_ill_g_lock); 1529 ip_drop_input("not on cast ill", mp, ill); 1530 freemsg(mp); 1531 return; 1532 } 1533 rw_exit(&ipst->ips_ill_g_lock); 1534 /* 1535 * We switch to the upper ill so that mrouter and hasmembers 1536 * can operate on upper here and in ip_input_multicast. 1537 */ 1538 ill = ipmp_ill_hold_ipmp_ill(ill); 1539 if (ill != NULL) { 1540 ASSERT(ill != ira->ira_ill); 1541 ASSERT(ire->ire_ill == ira->ira_ill); 1542 ira->ira_ill = ill; 1543 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1544 } else { 1545 ill = ira->ira_ill; 1546 } 1547 } 1548 1549 /* 1550 * Check if we are a multicast router - send ip_mforward a copy of 1551 * the packet. 1552 * Due to mroute_decap tunnels we consider forwarding packets even if 1553 * mrouted has not joined the allmulti group on this interface. 1554 */ 1555 if (ipst->ips_ip_g_mrouter) { 1556 int retval; 1557 1558 /* 1559 * Clear the indication that this may have hardware 1560 * checksum as we are not using it for forwarding. 1561 */ 1562 DB_CKSUMFLAGS(mp) = 0; 1563 1564 /* 1565 * ip_mforward helps us make these distinctions: If received 1566 * on tunnel and not IGMP, then drop. 1567 * If IGMP packet, then don't check membership 1568 * If received on a phyint and IGMP or PIM, then 1569 * don't check membership 1570 */ 1571 retval = ip_mforward(mp, ira); 1572 /* ip_mforward updates mib variables if needed */ 1573 1574 switch (retval) { 1575 case 0: 1576 /* 1577 * pkt is okay and arrived on phyint. 1578 * 1579 * If we are running as a multicast router 1580 * we need to see all IGMP and/or PIM packets. 1581 */ 1582 if ((ipha->ipha_protocol == IPPROTO_IGMP) || 1583 (ipha->ipha_protocol == IPPROTO_PIM)) { 1584 goto forus; 1585 } 1586 break; 1587 case -1: 1588 /* pkt is mal-formed, toss it */ 1589 freemsg(mp); 1590 goto done; 1591 case 1: 1592 /* 1593 * pkt is okay and arrived on a tunnel 1594 * 1595 * If we are running a multicast router 1596 * we need to see all igmp packets. 1597 */ 1598 if (ipha->ipha_protocol == IPPROTO_IGMP) { 1599 goto forus; 1600 } 1601 ip_drop_input("Multicast on tunnel ignored", mp, ill); 1602 freemsg(mp); 1603 goto done; 1604 } 1605 } 1606 1607 /* 1608 * Check if we have members on this ill. This is not necessary for 1609 * correctness because even if the NIC/GLD had a leaky filter, we 1610 * filter before passing to each conn_t. 1611 */ 1612 if (!ill_hasmembers_v4(ill, ipha->ipha_dst)) { 1613 /* 1614 * Nobody interested 1615 * 1616 * This might just be caused by the fact that 1617 * multiple IP Multicast addresses map to the same 1618 * link layer multicast - no need to increment counter! 1619 */ 1620 ip_drop_input("Multicast with no members", mp, ill); 1621 freemsg(mp); 1622 goto done; 1623 } 1624 forus: 1625 ip2dbg(("ire_recv_multicast_v4: multicast for us: 0x%x\n", 1626 ntohl(ipha->ipha_dst))); 1627 1628 /* 1629 * After reassembly and IPsec we will need to duplicate the 1630 * multicast packet for all matching zones on the ill. 1631 */ 1632 ira->ira_zoneid = ALL_ZONES; 1633 1634 /* Reassemble on the ill on which the packet arrived */ 1635 ip_input_local_v4(ire, mp, ipha, ira); 1636 done: 1637 if (ill != ire->ire_ill) { 1638 ill_refrele(ill); 1639 ira->ira_ill = ire->ire_ill; 1640 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 1641 } 1642 } 1643 1644 /* 1645 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT. 1646 * Drop packets since we don't forward out multirt routes. 1647 */ 1648 /* ARGSUSED */ 1649 void 1650 ire_recv_multirt_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1651 { 1652 ill_t *ill = ira->ira_ill; 1653 1654 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1655 ip_drop_input("Not forwarding out MULTIRT", mp, ill); 1656 freemsg(mp); 1657 } 1658 1659 /* 1660 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK 1661 * has rewritten the packet to have a loopback destination address (We 1662 * filter out packet with a loopback destination from arriving over the wire). 1663 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID. 1664 */ 1665 void 1666 ire_recv_loopback_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1667 { 1668 ipha_t *ipha = (ipha_t *)iph_arg; 1669 ill_t *ill = ira->ira_ill; 1670 ill_t *ire_ill = ire->ire_ill; 1671 1672 ira->ira_zoneid = GLOBAL_ZONEID; 1673 1674 /* Switch to the lo0 ill for further processing */ 1675 if (ire_ill != ill) { 1676 /* 1677 * Update ira_ill to be the ILL on which the IP address 1678 * is hosted. 1679 * No need to hold the ill since we have a hold on the ire 1680 */ 1681 ASSERT(ira->ira_ill == ira->ira_rill); 1682 ira->ira_ill = ire_ill; 1683 1684 ip_input_local_v4(ire, mp, ipha, ira); 1685 1686 /* Restore */ 1687 ASSERT(ira->ira_ill == ire_ill); 1688 ira->ira_ill = ill; 1689 return; 1690 1691 } 1692 ip_input_local_v4(ire, mp, ipha, ira); 1693 } 1694 1695 /* 1696 * ire_recvfn for IRE_LOCAL. 1697 */ 1698 void 1699 ire_recv_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1700 { 1701 ipha_t *ipha = (ipha_t *)iph_arg; 1702 ill_t *ill = ira->ira_ill; 1703 ill_t *ire_ill = ire->ire_ill; 1704 1705 /* Make a note for DAD that this address is in use */ 1706 ire->ire_last_used_time = LBOLT_FASTPATH; 1707 1708 /* Only target the IRE_LOCAL with the right zoneid. */ 1709 ira->ira_zoneid = ire->ire_zoneid; 1710 1711 /* 1712 * If the packet arrived on the wrong ill, we check that 1713 * this is ok. 1714 * If it is, then we ensure that we do the reassembly on 1715 * the ill on which the address is hosted. We keep ira_rill as 1716 * the one on which the packet arrived, so that IP_PKTINFO and 1717 * friends can report this. 1718 */ 1719 if (ire_ill != ill) { 1720 ire_t *new_ire; 1721 1722 new_ire = ip_check_multihome(&ipha->ipha_dst, ire, ill); 1723 if (new_ire == NULL) { 1724 /* Drop packet */ 1725 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1726 ip_drop_input("ipIfStatsInForwProhibits", mp, ill); 1727 freemsg(mp); 1728 return; 1729 } 1730 /* 1731 * Update ira_ill to be the ILL on which the IP address 1732 * is hosted. No need to hold the ill since we have a 1733 * hold on the ire. Note that we do the switch even if 1734 * new_ire == ire (for IPMP, ire would be the one corresponding 1735 * to the IPMP ill). 1736 */ 1737 ASSERT(ira->ira_ill == ira->ira_rill); 1738 ira->ira_ill = new_ire->ire_ill; 1739 1740 /* ira_ruifindex tracks the upper for ira_rill */ 1741 if (IS_UNDER_IPMP(ill)) 1742 ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1743 1744 ip_input_local_v4(new_ire, mp, ipha, ira); 1745 1746 /* Restore */ 1747 ASSERT(ira->ira_ill == new_ire->ire_ill); 1748 ira->ira_ill = ill; 1749 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1750 1751 if (new_ire != ire) 1752 ire_refrele(new_ire); 1753 return; 1754 } 1755 1756 ip_input_local_v4(ire, mp, ipha, ira); 1757 } 1758 1759 /* 1760 * Common function for packets arriving for the host. Handles 1761 * checksum verification, reassembly checks, etc. 1762 */ 1763 static void 1764 ip_input_local_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1765 { 1766 ill_t *ill = ira->ira_ill; 1767 iaflags_t iraflags = ira->ira_flags; 1768 1769 /* 1770 * Verify IP header checksum. If the packet was AH or ESP then 1771 * this flag has already been cleared. Likewise if the packet 1772 * had a hardware checksum. 1773 */ 1774 if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1775 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1776 ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1777 freemsg(mp); 1778 return; 1779 } 1780 1781 if (iraflags & IRAF_IPV4_OPTIONS) { 1782 if (!ip_input_local_options(mp, ipha, ira)) { 1783 /* Error has been sent and mp consumed */ 1784 return; 1785 } 1786 /* 1787 * Some old hardware does partial checksum by including the 1788 * whole IP header, so the partial checksum value might have 1789 * become invalid if any option in the packet have been 1790 * updated. Always clear partial checksum flag here. 1791 */ 1792 DB_CKSUMFLAGS(mp) &= ~HCK_PARTIALCKSUM; 1793 } 1794 1795 /* 1796 * Is packet part of fragmented IP packet? 1797 * We compare against defined values in network byte order 1798 */ 1799 if (ipha->ipha_fragment_offset_and_flags & 1800 (IPH_MF_HTONS | IPH_OFFSET_HTONS)) { 1801 /* 1802 * Make sure we have ira_l2src before we loose the original 1803 * mblk 1804 */ 1805 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 1806 ip_setl2src(mp, ira, ira->ira_rill); 1807 1808 mp = ip_input_fragment(mp, ipha, ira); 1809 if (mp == NULL) 1810 return; 1811 /* Completed reassembly */ 1812 ipha = (ipha_t *)mp->b_rptr; 1813 } 1814 1815 /* 1816 * For broadcast and multicast we need some extra work before 1817 * we call ip_fanout_v4(), since in the case of shared-IP zones 1818 * we need to pretend that a packet arrived for each zoneid. 1819 */ 1820 if (iraflags & IRAF_MULTIBROADCAST) { 1821 if (iraflags & IRAF_BROADCAST) 1822 ip_input_broadcast_v4(ire, mp, ipha, ira); 1823 else 1824 ip_input_multicast_v4(ire, mp, ipha, ira); 1825 return; 1826 } 1827 ip_fanout_v4(mp, ipha, ira); 1828 } 1829 1830 1831 /* 1832 * Handle multiple zones which match the same broadcast address 1833 * and ill by delivering a packet to each of them. 1834 * Walk the bucket and look for different ire_zoneid but otherwise 1835 * the same IRE (same ill/addr/mask/type). 1836 * Note that ire_add() tracks IREs that are identical in all 1837 * fields (addr/mask/type/gw/ill/zoneid) within a single IRE by 1838 * increasing ire_identical_cnt. Thus we don't need to be concerned 1839 * about those. 1840 */ 1841 static void 1842 ip_input_broadcast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1843 { 1844 ill_t *ill = ira->ira_ill; 1845 ip_stack_t *ipst = ill->ill_ipst; 1846 netstack_t *ns = ipst->ips_netstack; 1847 irb_t *irb; 1848 ire_t *ire1; 1849 mblk_t *mp1; 1850 ipha_t *ipha1; 1851 uint_t ira_pktlen = ira->ira_pktlen; 1852 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1853 1854 irb = ire->ire_bucket; 1855 1856 /* 1857 * If we don't have more than one shared-IP zone, or if 1858 * there can't be more than one IRE_BROADCAST for this 1859 * IP address, then just set the zoneid and proceed. 1860 */ 1861 if (ns->netstack_numzones == 1 || irb->irb_ire_cnt == 1) { 1862 ira->ira_zoneid = ire->ire_zoneid; 1863 1864 ip_fanout_v4(mp, ipha, ira); 1865 return; 1866 } 1867 irb_refhold(irb); 1868 for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 1869 /* We do the main IRE after the end of the loop */ 1870 if (ire1 == ire) 1871 continue; 1872 1873 /* 1874 * Only IREs for the same IP address should be in the same 1875 * bucket. 1876 * But could have IRE_HOSTs in the case of CGTP. 1877 */ 1878 ASSERT(ire1->ire_addr == ire->ire_addr); 1879 if (!(ire1->ire_type & IRE_BROADCAST)) 1880 continue; 1881 1882 if (IRE_IS_CONDEMNED(ire1)) 1883 continue; 1884 1885 mp1 = copymsg(mp); 1886 if (mp1 == NULL) { 1887 /* Failed to deliver to one zone */ 1888 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1889 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1890 continue; 1891 } 1892 ira->ira_zoneid = ire1->ire_zoneid; 1893 ipha1 = (ipha_t *)mp1->b_rptr; 1894 ip_fanout_v4(mp1, ipha1, ira); 1895 /* 1896 * IPsec might have modified ira_pktlen and ira_ip_hdr_length 1897 * so we restore them for a potential next iteration 1898 */ 1899 ira->ira_pktlen = ira_pktlen; 1900 ira->ira_ip_hdr_length = ira_ip_hdr_length; 1901 } 1902 irb_refrele(irb); 1903 /* Do the main ire */ 1904 ira->ira_zoneid = ire->ire_zoneid; 1905 ip_fanout_v4(mp, ipha, ira); 1906 } 1907 1908 /* 1909 * Handle multiple zones which want to receive the same multicast packets 1910 * on this ill by delivering a packet to each of them. 1911 * 1912 * Note that for packets delivered to transports we could instead do this 1913 * as part of the fanout code, but since we need to handle icmp_inbound 1914 * it is simpler to have multicast work the same as broadcast. 1915 * 1916 * The ip_fanout matching for multicast matches based on ilm independent of 1917 * zoneid since the zoneid restriction is applied when joining a multicast 1918 * group. 1919 */ 1920 /* ARGSUSED */ 1921 static void 1922 ip_input_multicast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1923 { 1924 ill_t *ill = ira->ira_ill; 1925 iaflags_t iraflags = ira->ira_flags; 1926 ip_stack_t *ipst = ill->ill_ipst; 1927 netstack_t *ns = ipst->ips_netstack; 1928 zoneid_t zoneid; 1929 mblk_t *mp1; 1930 ipha_t *ipha1; 1931 uint_t ira_pktlen = ira->ira_pktlen; 1932 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1933 1934 /* ire_recv_multicast has switched to the upper ill for IPMP */ 1935 ASSERT(!IS_UNDER_IPMP(ill)); 1936 1937 /* 1938 * If we don't have more than one shared-IP zone, or if 1939 * there are no members in anything but the global zone, 1940 * then just set the zoneid and proceed. 1941 */ 1942 if (ns->netstack_numzones == 1 || 1943 !ill_hasmembers_otherzones_v4(ill, ipha->ipha_dst, 1944 GLOBAL_ZONEID)) { 1945 ira->ira_zoneid = GLOBAL_ZONEID; 1946 1947 /* If sender didn't want this zone to receive it, drop */ 1948 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1949 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1950 ip_drop_input("Multicast but wrong zoneid", mp, ill); 1951 freemsg(mp); 1952 return; 1953 } 1954 ip_fanout_v4(mp, ipha, ira); 1955 return; 1956 } 1957 1958 /* 1959 * Here we loop over all zoneids that have members in the group 1960 * and deliver a packet to ip_fanout for each zoneid. 1961 * 1962 * First find any members in the lowest numeric zoneid by looking for 1963 * first zoneid larger than -1 (ALL_ZONES). 1964 * We terminate the loop when we receive -1 (ALL_ZONES). 1965 */ 1966 zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, ALL_ZONES); 1967 for (; zoneid != ALL_ZONES; 1968 zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, zoneid)) { 1969 /* 1970 * Avoid an extra copymsg/freemsg by skipping global zone here 1971 * and doing that at the end. 1972 */ 1973 if (zoneid == GLOBAL_ZONEID) 1974 continue; 1975 1976 ira->ira_zoneid = zoneid; 1977 1978 /* If sender didn't want this zone to receive it, skip */ 1979 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1980 ira->ira_no_loop_zoneid == ira->ira_zoneid) 1981 continue; 1982 1983 mp1 = copymsg(mp); 1984 if (mp1 == NULL) { 1985 /* Failed to deliver to one zone */ 1986 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1987 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1988 continue; 1989 } 1990 ipha1 = (ipha_t *)mp1->b_rptr; 1991 ip_fanout_v4(mp1, ipha1, ira); 1992 /* 1993 * IPsec might have modified ira_pktlen and ira_ip_hdr_length 1994 * so we restore them for a potential next iteration 1995 */ 1996 ira->ira_pktlen = ira_pktlen; 1997 ira->ira_ip_hdr_length = ira_ip_hdr_length; 1998 } 1999 2000 /* Do the main ire */ 2001 ira->ira_zoneid = GLOBAL_ZONEID; 2002 /* If sender didn't want this zone to receive it, drop */ 2003 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 2004 ira->ira_no_loop_zoneid == ira->ira_zoneid) { 2005 ip_drop_input("Multicast but wrong zoneid", mp, ill); 2006 freemsg(mp); 2007 } else { 2008 ip_fanout_v4(mp, ipha, ira); 2009 } 2010 } 2011 2012 2013 /* 2014 * Determine the zoneid and IRAF_TX_* flags if trusted extensions 2015 * is in use. Updates ira_zoneid and ira_flags as a result. 2016 */ 2017 static void 2018 ip_fanout_tx_v4(mblk_t *mp, ipha_t *ipha, uint8_t protocol, 2019 uint_t ip_hdr_length, ip_recv_attr_t *ira) 2020 { 2021 uint16_t *up; 2022 uint16_t lport; 2023 zoneid_t zoneid; 2024 2025 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED); 2026 2027 /* 2028 * If the packet is unlabeled we might allow read-down 2029 * for MAC_EXEMPT. Below we clear this if it is a multi-level 2030 * port (MLP). 2031 * Note that ira_tsl can be NULL here. 2032 */ 2033 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED) 2034 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE; 2035 2036 if (ira->ira_zoneid != ALL_ZONES) 2037 return; 2038 2039 ira->ira_flags |= IRAF_TX_SHARED_ADDR; 2040 2041 up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length); 2042 switch (protocol) { 2043 case IPPROTO_TCP: 2044 case IPPROTO_SCTP: 2045 case IPPROTO_UDP: 2046 /* Caller ensures this */ 2047 ASSERT(((uchar_t *)ipha) + ip_hdr_length +4 <= mp->b_wptr); 2048 2049 /* 2050 * Only these transports support MLP. 2051 * We know their destination port numbers is in 2052 * the same place in the header. 2053 */ 2054 lport = up[1]; 2055 2056 /* 2057 * No need to handle exclusive-stack zones 2058 * since ALL_ZONES only applies to the shared IP instance. 2059 */ 2060 zoneid = tsol_mlp_findzone(protocol, lport); 2061 /* 2062 * If no shared MLP is found, tsol_mlp_findzone returns 2063 * ALL_ZONES. In that case, we assume it's SLP, and 2064 * search for the zone based on the packet label. 2065 * 2066 * If there is such a zone, we prefer to find a 2067 * connection in it. Otherwise, we look for a 2068 * MAC-exempt connection in any zone whose label 2069 * dominates the default label on the packet. 2070 */ 2071 if (zoneid == ALL_ZONES) 2072 zoneid = tsol_attr_to_zoneid(ira); 2073 else 2074 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE; 2075 break; 2076 default: 2077 /* Handle shared address for other protocols */ 2078 zoneid = tsol_attr_to_zoneid(ira); 2079 break; 2080 } 2081 ira->ira_zoneid = zoneid; 2082 } 2083 2084 /* 2085 * Increment checksum failure statistics 2086 */ 2087 static void 2088 ip_input_cksum_err_v4(uint8_t protocol, uint16_t hck_flags, ill_t *ill) 2089 { 2090 ip_stack_t *ipst = ill->ill_ipst; 2091 2092 switch (protocol) { 2093 case IPPROTO_TCP: 2094 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 2095 2096 if (hck_flags & HCK_FULLCKSUM) 2097 IP_STAT(ipst, ip_tcp_in_full_hw_cksum_err); 2098 else if (hck_flags & HCK_PARTIALCKSUM) 2099 IP_STAT(ipst, ip_tcp_in_part_hw_cksum_err); 2100 else 2101 IP_STAT(ipst, ip_tcp_in_sw_cksum_err); 2102 break; 2103 case IPPROTO_UDP: 2104 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 2105 if (hck_flags & HCK_FULLCKSUM) 2106 IP_STAT(ipst, ip_udp_in_full_hw_cksum_err); 2107 else if (hck_flags & HCK_PARTIALCKSUM) 2108 IP_STAT(ipst, ip_udp_in_part_hw_cksum_err); 2109 else 2110 IP_STAT(ipst, ip_udp_in_sw_cksum_err); 2111 break; 2112 case IPPROTO_ICMP: 2113 BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); 2114 break; 2115 default: 2116 ASSERT(0); 2117 break; 2118 } 2119 } 2120 2121 /* Calculate the IPv4 pseudo-header checksum */ 2122 uint32_t 2123 ip_input_cksum_pseudo_v4(ipha_t *ipha, ip_recv_attr_t *ira) 2124 { 2125 uint_t ulp_len; 2126 uint32_t cksum; 2127 uint8_t protocol = ira->ira_protocol; 2128 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2129 2130 #define iphs ((uint16_t *)ipha) 2131 2132 switch (protocol) { 2133 case IPPROTO_TCP: 2134 ulp_len = ira->ira_pktlen - ip_hdr_length; 2135 2136 /* Protocol and length */ 2137 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP; 2138 /* IP addresses */ 2139 cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9]; 2140 break; 2141 2142 case IPPROTO_UDP: { 2143 udpha_t *udpha; 2144 2145 udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length); 2146 2147 /* Protocol and length */ 2148 cksum = udpha->uha_length + IP_UDP_CSUM_COMP; 2149 /* IP addresses */ 2150 cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9]; 2151 break; 2152 } 2153 2154 default: 2155 cksum = 0; 2156 break; 2157 } 2158 #undef iphs 2159 return (cksum); 2160 } 2161 2162 2163 /* 2164 * Software verification of the ULP checksums. 2165 * Returns B_TRUE if ok. 2166 * Increments statistics of failed. 2167 */ 2168 static boolean_t 2169 ip_input_sw_cksum_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 2170 { 2171 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 2172 uint32_t cksum; 2173 uint8_t protocol = ira->ira_protocol; 2174 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2175 2176 IP_STAT(ipst, ip_in_sw_cksum); 2177 2178 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP); 2179 2180 cksum = ip_input_cksum_pseudo_v4(ipha, ira); 2181 cksum = IP_CSUM(mp, ip_hdr_length, cksum); 2182 if (cksum == 0) 2183 return (B_TRUE); 2184 2185 ip_input_cksum_err_v4(protocol, 0, ira->ira_ill); 2186 return (B_FALSE); 2187 } 2188 2189 /* 2190 * Verify the ULP checksums. 2191 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum 2192 * algorithm. 2193 * Increments statistics if failed. 2194 */ 2195 static boolean_t 2196 ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, 2197 ip_recv_attr_t *ira) 2198 { 2199 ill_t *ill = ira->ira_rill; 2200 uint16_t hck_flags; 2201 uint32_t cksum; 2202 mblk_t *mp1; 2203 int32_t len; 2204 uint8_t protocol = ira->ira_protocol; 2205 uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2206 2207 2208 switch (protocol) { 2209 case IPPROTO_TCP: 2210 break; 2211 2212 case IPPROTO_UDP: { 2213 udpha_t *udpha; 2214 2215 udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length); 2216 if (udpha->uha_checksum == 0) { 2217 /* Packet doesn't have a UDP checksum */ 2218 return (B_TRUE); 2219 } 2220 break; 2221 } 2222 case IPPROTO_SCTP: { 2223 sctp_hdr_t *sctph; 2224 uint32_t pktsum; 2225 2226 sctph = (sctp_hdr_t *)((uchar_t *)ipha + ip_hdr_length); 2227 #ifdef DEBUG 2228 if (skip_sctp_cksum) 2229 return (B_TRUE); 2230 #endif 2231 pktsum = sctph->sh_chksum; 2232 sctph->sh_chksum = 0; 2233 cksum = sctp_cksum(mp, ip_hdr_length); 2234 sctph->sh_chksum = pktsum; 2235 if (cksum == pktsum) 2236 return (B_TRUE); 2237 2238 /* 2239 * Defer until later whether a bad checksum is ok 2240 * in order to allow RAW sockets to use Adler checksum 2241 * with SCTP. 2242 */ 2243 ira->ira_flags |= IRAF_SCTP_CSUM_ERR; 2244 return (B_TRUE); 2245 } 2246 2247 default: 2248 /* No ULP checksum to verify. */ 2249 return (B_TRUE); 2250 } 2251 2252 /* 2253 * Revert to software checksum calculation if the interface 2254 * isn't capable of checksum offload. 2255 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout. 2256 * Note: IRAF_NO_HW_CKSUM is not currently used. 2257 */ 2258 ASSERT(!IS_IPMP(ill)); 2259 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 2260 !dohwcksum) { 2261 return (ip_input_sw_cksum_v4(mp, ipha, ira)); 2262 } 2263 2264 hck_flags = DB_CKSUMFLAGS(mp); 2265 2266 /* 2267 * We apply this for all ULP protocols. Does the HW know to 2268 * not set the flags for SCTP and other protocols. 2269 */ 2270 if (hck_flags & HCK_FULLCKSUM_OK) { 2271 /* 2272 * Hardware has already verified the checksum. 2273 */ 2274 return (B_TRUE); 2275 } 2276 2277 if (hck_flags & HCK_FULLCKSUM) { 2278 /* 2279 * Full checksum has been computed by the hardware 2280 * and has been attached. If the driver wants us to 2281 * verify the correctness of the attached value, in 2282 * order to protect against faulty hardware, compare 2283 * it against -0 (0xFFFF) to see if it's valid. 2284 */ 2285 cksum = DB_CKSUM16(mp); 2286 if (cksum == 0xFFFF) 2287 return (B_TRUE); 2288 ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill); 2289 return (B_FALSE); 2290 } 2291 2292 mp1 = mp->b_cont; 2293 if ((hck_flags & HCK_PARTIALCKSUM) && 2294 (mp1 == NULL || mp1->b_cont == NULL) && 2295 ip_hdr_length >= DB_CKSUMSTART(mp) && 2296 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) { 2297 uint32_t adj; 2298 uchar_t *cksum_start; 2299 2300 cksum = ip_input_cksum_pseudo_v4(ipha, ira); 2301 2302 cksum_start = ((uchar_t *)ipha + DB_CKSUMSTART(mp)); 2303 2304 /* 2305 * Partial checksum has been calculated by hardware 2306 * and attached to the packet; in addition, any 2307 * prepended extraneous data is even byte aligned, 2308 * and there are at most two mblks associated with 2309 * the packet. If any such data exists, we adjust 2310 * the checksum; also take care any postpended data. 2311 */ 2312 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj); 2313 /* 2314 * One's complement subtract extraneous checksum 2315 */ 2316 cksum += DB_CKSUM16(mp); 2317 if (adj >= cksum) 2318 cksum = ~(adj - cksum) & 0xFFFF; 2319 else 2320 cksum -= adj; 2321 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 2322 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 2323 if (!(~cksum & 0xFFFF)) 2324 return (B_TRUE); 2325 2326 ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill); 2327 return (B_FALSE); 2328 } 2329 return (ip_input_sw_cksum_v4(mp, ipha, ira)); 2330 } 2331 2332 2333 /* 2334 * Handle fanout of received packets. 2335 * Unicast packets that are looped back (from ire_send_local_v4) and packets 2336 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM. 2337 * 2338 * IPQoS Notes 2339 * Before sending it to the client, invoke IPPF processing. Policy processing 2340 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled. 2341 */ 2342 void 2343 ip_fanout_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 2344 { 2345 ill_t *ill = ira->ira_ill; 2346 iaflags_t iraflags = ira->ira_flags; 2347 ip_stack_t *ipst = ill->ill_ipst; 2348 uint8_t protocol = ipha->ipha_protocol; 2349 conn_t *connp; 2350 #define rptr ((uchar_t *)ipha) 2351 uint_t ip_hdr_length; 2352 uint_t min_ulp_header_length; 2353 int offset; 2354 ssize_t len; 2355 netstack_t *ns = ipst->ips_netstack; 2356 ipsec_stack_t *ipss = ns->netstack_ipsec; 2357 ill_t *rill = ira->ira_rill; 2358 2359 ASSERT(ira->ira_pktlen == ntohs(ipha->ipha_length)); 2360 2361 ip_hdr_length = ira->ira_ip_hdr_length; 2362 ira->ira_protocol = protocol; 2363 2364 /* 2365 * Time for IPP once we've done reassembly and IPsec. 2366 * We skip this for loopback packets since we don't do IPQoS 2367 * on loopback. 2368 */ 2369 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 2370 !(iraflags & IRAF_LOOPBACK) && 2371 (protocol != IPPROTO_ESP && protocol != IPPROTO_AH)) { 2372 /* 2373 * Use the interface on which the packet arrived - not where 2374 * the IP address is hosted. 2375 */ 2376 /* ip_process translates an IS_UNDER_IPMP */ 2377 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill); 2378 if (mp == NULL) { 2379 /* ip_drop_packet and MIB done */ 2380 return; 2381 } 2382 } 2383 2384 /* Determine the minimum required size of the upper-layer header */ 2385 /* Need to do this for at least the set of ULPs that TX handles. */ 2386 switch (protocol) { 2387 case IPPROTO_TCP: 2388 min_ulp_header_length = TCP_MIN_HEADER_LENGTH; 2389 break; 2390 case IPPROTO_SCTP: 2391 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH; 2392 break; 2393 case IPPROTO_UDP: 2394 min_ulp_header_length = UDPH_SIZE; 2395 break; 2396 case IPPROTO_ICMP: 2397 min_ulp_header_length = ICMPH_SIZE; 2398 break; 2399 default: 2400 min_ulp_header_length = 0; 2401 break; 2402 } 2403 /* Make sure we have the min ULP header length */ 2404 len = mp->b_wptr - rptr; 2405 if (len < ip_hdr_length + min_ulp_header_length) { 2406 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length) { 2407 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 2408 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 2409 freemsg(mp); 2410 return; 2411 } 2412 IP_STAT(ipst, ip_recv_pullup); 2413 ipha = ip_pullup(mp, ip_hdr_length + min_ulp_header_length, 2414 ira); 2415 if (ipha == NULL) 2416 goto discard; 2417 len = mp->b_wptr - rptr; 2418 } 2419 2420 /* 2421 * If trusted extensions then determine the zoneid and TX specific 2422 * ira_flags. 2423 */ 2424 if (iraflags & IRAF_SYSTEM_LABELED) { 2425 /* This can update ira->ira_flags and ira->ira_zoneid */ 2426 ip_fanout_tx_v4(mp, ipha, protocol, ip_hdr_length, ira); 2427 iraflags = ira->ira_flags; 2428 } 2429 2430 2431 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */ 2432 if (iraflags & IRAF_VERIFY_ULP_CKSUM) { 2433 if (!ip_input_cksum_v4(iraflags, mp, ipha, ira)) { 2434 /* Bad checksum. Stats are already incremented */ 2435 ip_drop_input("Bad ULP checksum", mp, ill); 2436 freemsg(mp); 2437 return; 2438 } 2439 /* IRAF_SCTP_CSUM_ERR could have been set */ 2440 iraflags = ira->ira_flags; 2441 } 2442 switch (protocol) { 2443 case IPPROTO_TCP: 2444 /* For TCP, discard broadcast and multicast packets. */ 2445 if (iraflags & IRAF_MULTIBROADCAST) 2446 goto discard; 2447 2448 /* First mblk contains IP+TCP headers per above check */ 2449 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH); 2450 2451 /* TCP options present? */ 2452 offset = ((uchar_t *)ipha)[ip_hdr_length + 12] >> 4; 2453 if (offset != 5) { 2454 if (offset < 5) 2455 goto discard; 2456 2457 /* 2458 * There must be TCP options. 2459 * Make sure we can grab them. 2460 */ 2461 offset <<= 2; 2462 offset += ip_hdr_length; 2463 if (len < offset) { 2464 if (ira->ira_pktlen < offset) { 2465 BUMP_MIB(ill->ill_ip_mib, 2466 ipIfStatsInTruncatedPkts); 2467 ip_drop_input( 2468 "ipIfStatsInTruncatedPkts", 2469 mp, ill); 2470 freemsg(mp); 2471 return; 2472 } 2473 IP_STAT(ipst, ip_recv_pullup); 2474 ipha = ip_pullup(mp, offset, ira); 2475 if (ipha == NULL) 2476 goto discard; 2477 len = mp->b_wptr - rptr; 2478 } 2479 } 2480 2481 /* 2482 * Pass up a squeue hint to tcp. 2483 * If ira_sqp is already set (this is loopback) we leave it 2484 * alone. 2485 */ 2486 if (ira->ira_sqp == NULL) { 2487 ira->ira_sqp = ip_squeue_get(ira->ira_ring); 2488 } 2489 2490 /* Look for AF_INET or AF_INET6 that matches */ 2491 connp = ipcl_classify_v4(mp, IPPROTO_TCP, ip_hdr_length, 2492 ira, ipst); 2493 if (connp == NULL) { 2494 /* Send the TH_RST */ 2495 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2496 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2497 return; 2498 } 2499 if (connp->conn_min_ttl != 0 && 2500 connp->conn_min_ttl > ira->ira_ttl) { 2501 CONN_DEC_REF(connp); 2502 goto discard; 2503 } 2504 if (connp->conn_incoming_ifindex != 0 && 2505 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2506 CONN_DEC_REF(connp); 2507 2508 /* Send the TH_RST */ 2509 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2510 tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2511 return; 2512 } 2513 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || 2514 (iraflags & IRAF_IPSEC_SECURE)) { 2515 mp = ipsec_check_inbound_policy(mp, connp, 2516 ipha, NULL, ira); 2517 if (mp == NULL) { 2518 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2519 /* Note that mp is NULL */ 2520 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2521 CONN_DEC_REF(connp); 2522 return; 2523 } 2524 } 2525 /* Found a client; up it goes */ 2526 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2527 ira->ira_ill = ira->ira_rill = NULL; 2528 if (!IPCL_IS_TCP(connp)) { 2529 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 2530 (connp->conn_recv)(connp, mp, NULL, ira); 2531 CONN_DEC_REF(connp); 2532 ira->ira_ill = ill; 2533 ira->ira_rill = rill; 2534 return; 2535 } 2536 2537 /* 2538 * We do different processing whether called from 2539 * ip_accept_tcp and we match the target, don't match 2540 * the target, and when we are called by ip_input. 2541 */ 2542 if (iraflags & IRAF_TARGET_SQP) { 2543 if (ira->ira_target_sqp == connp->conn_sqp) { 2544 mblk_t *attrmp; 2545 2546 attrmp = ip_recv_attr_to_mblk(ira); 2547 if (attrmp == NULL) { 2548 BUMP_MIB(ill->ill_ip_mib, 2549 ipIfStatsInDiscards); 2550 ip_drop_input("ipIfStatsInDiscards", 2551 mp, ill); 2552 freemsg(mp); 2553 CONN_DEC_REF(connp); 2554 } else { 2555 SET_SQUEUE(attrmp, connp->conn_recv, 2556 connp); 2557 attrmp->b_cont = mp; 2558 ASSERT(ira->ira_target_sqp_mp == NULL); 2559 ira->ira_target_sqp_mp = attrmp; 2560 /* 2561 * Conn ref release when drained from 2562 * the squeue. 2563 */ 2564 } 2565 } else { 2566 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 2567 connp->conn_recv, connp, ira, SQ_FILL, 2568 SQTAG_IP_TCP_INPUT); 2569 } 2570 } else { 2571 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, 2572 connp, ira, ip_squeue_flag, SQTAG_IP_TCP_INPUT); 2573 } 2574 ira->ira_ill = ill; 2575 ira->ira_rill = rill; 2576 return; 2577 2578 case IPPROTO_SCTP: { 2579 sctp_hdr_t *sctph; 2580 in6_addr_t map_src, map_dst; 2581 uint32_t ports; /* Source and destination ports */ 2582 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp; 2583 2584 /* For SCTP, discard broadcast and multicast packets. */ 2585 if (iraflags & IRAF_MULTIBROADCAST) 2586 goto discard; 2587 2588 /* 2589 * Since there is no SCTP h/w cksum support yet, just 2590 * clear the flag. 2591 */ 2592 DB_CKSUMFLAGS(mp) = 0; 2593 2594 /* Length ensured above */ 2595 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH); 2596 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length); 2597 2598 /* get the ports */ 2599 ports = *(uint32_t *)&sctph->sh_sport; 2600 2601 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst); 2602 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src); 2603 if (iraflags & IRAF_SCTP_CSUM_ERR) { 2604 /* 2605 * No potential sctp checksum errors go to the Sun 2606 * sctp stack however they might be Adler-32 summed 2607 * packets a userland stack bound to a raw IP socket 2608 * could reasonably use. Note though that Adler-32 is 2609 * a long deprecated algorithm and customer sctp 2610 * networks should eventually migrate to CRC-32 at 2611 * which time this facility should be removed. 2612 */ 2613 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2614 return; 2615 } 2616 connp = sctp_fanout(&map_src, &map_dst, ports, ira, mp, 2617 sctps, sctph); 2618 if (connp == NULL) { 2619 /* Check for raw socket or OOTB handling */ 2620 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2621 return; 2622 } 2623 if (connp->conn_incoming_ifindex != 0 && 2624 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2625 CONN_DEC_REF(connp); 2626 /* Check for raw socket or OOTB handling */ 2627 ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2628 return; 2629 } 2630 2631 /* Found a client; up it goes */ 2632 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2633 sctp_input(connp, ipha, NULL, mp, ira); 2634 /* sctp_input does a rele of the sctp_t */ 2635 return; 2636 } 2637 2638 case IPPROTO_UDP: 2639 /* First mblk contains IP+UDP headers as checked above */ 2640 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE); 2641 2642 if (iraflags & IRAF_MULTIBROADCAST) { 2643 uint16_t *up; /* Pointer to ports in ULP header */ 2644 2645 up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length); 2646 ip_fanout_udp_multi_v4(mp, ipha, up[1], up[0], ira); 2647 return; 2648 } 2649 2650 /* Look for AF_INET or AF_INET6 that matches */ 2651 connp = ipcl_classify_v4(mp, IPPROTO_UDP, ip_hdr_length, 2652 ira, ipst); 2653 if (connp == NULL) { 2654 no_udp_match: 2655 if (ipst->ips_ipcl_proto_fanout_v4[IPPROTO_UDP]. 2656 connf_head != NULL) { 2657 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2658 ip_fanout_proto_v4(mp, ipha, ira); 2659 } else { 2660 ip_fanout_send_icmp_v4(mp, 2661 ICMP_DEST_UNREACHABLE, 2662 ICMP_PORT_UNREACHABLE, ira); 2663 } 2664 return; 2665 2666 } 2667 if (connp->conn_incoming_ifindex != 0 && 2668 connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2669 CONN_DEC_REF(connp); 2670 goto no_udp_match; 2671 } 2672 if (connp->conn_min_ttl != 0 && 2673 connp->conn_min_ttl > ira->ira_ttl) { 2674 CONN_DEC_REF(connp); 2675 goto discard; 2676 } 2677 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld : 2678 !canputnext(connp->conn_rq)) { 2679 CONN_DEC_REF(connp); 2680 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 2681 ip_drop_input("udpIfStatsInOverflows", mp, ill); 2682 freemsg(mp); 2683 return; 2684 } 2685 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || 2686 (iraflags & IRAF_IPSEC_SECURE)) { 2687 mp = ipsec_check_inbound_policy(mp, connp, 2688 ipha, NULL, ira); 2689 if (mp == NULL) { 2690 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2691 /* Note that mp is NULL */ 2692 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2693 CONN_DEC_REF(connp); 2694 return; 2695 } 2696 } 2697 /* 2698 * Remove 0-spi if it's 0, or move everything behind 2699 * the UDP header over it and forward to ESP via 2700 * ip_fanout_v4(). 2701 */ 2702 if (connp->conn_udp->udp_nat_t_endpoint) { 2703 if (iraflags & IRAF_IPSEC_SECURE) { 2704 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 2705 DROPPER(ipss, ipds_esp_nat_t_ipsec), 2706 &ipss->ipsec_dropper); 2707 CONN_DEC_REF(connp); 2708 return; 2709 } 2710 2711 mp = zero_spi_check(mp, ira); 2712 if (mp == NULL) { 2713 /* 2714 * Packet was consumed - probably sent to 2715 * ip_fanout_v4. 2716 */ 2717 CONN_DEC_REF(connp); 2718 return; 2719 } 2720 /* Else continue like a normal UDP packet. */ 2721 ipha = (ipha_t *)mp->b_rptr; 2722 protocol = ipha->ipha_protocol; 2723 ira->ira_protocol = protocol; 2724 } 2725 /* Found a client; up it goes */ 2726 IP_STAT(ipst, ip_udp_fannorm); 2727 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2728 ira->ira_ill = ira->ira_rill = NULL; 2729 (connp->conn_recv)(connp, mp, NULL, ira); 2730 CONN_DEC_REF(connp); 2731 ira->ira_ill = ill; 2732 ira->ira_rill = rill; 2733 return; 2734 default: 2735 break; 2736 } 2737 2738 /* 2739 * Clear hardware checksumming flag as it is currently only 2740 * used by TCP and UDP. 2741 */ 2742 DB_CKSUMFLAGS(mp) = 0; 2743 2744 switch (protocol) { 2745 case IPPROTO_ICMP: 2746 /* 2747 * We need to accomodate icmp messages coming in clear 2748 * until we get everything secure from the wire. If 2749 * icmp_accept_clear_messages is zero we check with 2750 * the global policy and act accordingly. If it is 2751 * non-zero, we accept the message without any checks. 2752 * But *this does not mean* that this will be delivered 2753 * to RAW socket clients. By accepting we might send 2754 * replies back, change our MTU value etc., 2755 * but delivery to the ULP/clients depends on their 2756 * policy dispositions. 2757 */ 2758 if (ipst->ips_icmp_accept_clear_messages == 0) { 2759 mp = ipsec_check_global_policy(mp, NULL, 2760 ipha, NULL, ira, ns); 2761 if (mp == NULL) 2762 return; 2763 } 2764 2765 /* 2766 * On a labeled system, we have to check whether the zone 2767 * itself is permitted to receive raw traffic. 2768 */ 2769 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2770 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2771 BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors); 2772 ip_drop_input("tsol_can_accept_raw", mp, ill); 2773 freemsg(mp); 2774 return; 2775 } 2776 } 2777 2778 /* 2779 * ICMP header checksum, including checksum field, 2780 * should be zero. 2781 */ 2782 if (IP_CSUM(mp, ip_hdr_length, 0)) { 2783 BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); 2784 ip_drop_input("icmpInCksumErrs", mp, ill); 2785 freemsg(mp); 2786 return; 2787 } 2788 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2789 mp = icmp_inbound_v4(mp, ira); 2790 if (mp == NULL) { 2791 /* No need to pass to RAW sockets */ 2792 return; 2793 } 2794 break; 2795 2796 case IPPROTO_IGMP: 2797 /* 2798 * If we are not willing to accept IGMP packets in clear, 2799 * then check with global policy. 2800 */ 2801 if (ipst->ips_igmp_accept_clear_messages == 0) { 2802 mp = ipsec_check_global_policy(mp, NULL, 2803 ipha, NULL, ira, ns); 2804 if (mp == NULL) 2805 return; 2806 } 2807 if ((ira->ira_flags & IRAF_SYSTEM_LABELED) && 2808 !tsol_can_accept_raw(mp, ira, B_TRUE)) { 2809 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2810 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2811 freemsg(mp); 2812 return; 2813 } 2814 /* 2815 * Validate checksum 2816 */ 2817 if (IP_CSUM(mp, ip_hdr_length, 0)) { 2818 ++ipst->ips_igmpstat.igps_rcv_badsum; 2819 ip_drop_input("igps_rcv_badsum", mp, ill); 2820 freemsg(mp); 2821 return; 2822 } 2823 2824 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2825 mp = igmp_input(mp, ira); 2826 if (mp == NULL) { 2827 /* Bad packet - discarded by igmp_input */ 2828 return; 2829 } 2830 break; 2831 case IPPROTO_PIM: 2832 /* 2833 * If we are not willing to accept PIM packets in clear, 2834 * then check with global policy. 2835 */ 2836 if (ipst->ips_pim_accept_clear_messages == 0) { 2837 mp = ipsec_check_global_policy(mp, NULL, 2838 ipha, NULL, ira, ns); 2839 if (mp == NULL) 2840 return; 2841 } 2842 if ((ira->ira_flags & IRAF_SYSTEM_LABELED) && 2843 !tsol_can_accept_raw(mp, ira, B_TRUE)) { 2844 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2845 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2846 freemsg(mp); 2847 return; 2848 } 2849 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2850 2851 /* Checksum is verified in pim_input */ 2852 mp = pim_input(mp, ira); 2853 if (mp == NULL) { 2854 /* Bad packet - discarded by pim_input */ 2855 return; 2856 } 2857 break; 2858 case IPPROTO_AH: 2859 case IPPROTO_ESP: { 2860 /* 2861 * Fast path for AH/ESP. 2862 */ 2863 netstack_t *ns = ipst->ips_netstack; 2864 ipsec_stack_t *ipss = ns->netstack_ipsec; 2865 2866 IP_STAT(ipst, ipsec_proto_ahesp); 2867 2868 if (!ipsec_loaded(ipss)) { 2869 ip_proto_not_sup(mp, ira); 2870 return; 2871 } 2872 2873 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2874 /* select inbound SA and have IPsec process the pkt */ 2875 if (protocol == IPPROTO_ESP) { 2876 esph_t *esph; 2877 boolean_t esp_in_udp_sa; 2878 boolean_t esp_in_udp_packet; 2879 2880 mp = ipsec_inbound_esp_sa(mp, ira, &esph); 2881 if (mp == NULL) 2882 return; 2883 2884 ASSERT(esph != NULL); 2885 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2886 ASSERT(ira->ira_ipsec_esp_sa != NULL); 2887 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL); 2888 2889 esp_in_udp_sa = ((ira->ira_ipsec_esp_sa->ipsa_flags & 2890 IPSA_F_NATT) != 0); 2891 esp_in_udp_packet = 2892 (ira->ira_flags & IRAF_ESP_UDP_PORTS) != 0; 2893 2894 /* 2895 * The following is a fancy, but quick, way of saying: 2896 * ESP-in-UDP SA and Raw ESP packet --> drop 2897 * OR 2898 * ESP SA and ESP-in-UDP packet --> drop 2899 */ 2900 if (esp_in_udp_sa != esp_in_udp_packet) { 2901 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2902 ip_drop_packet(mp, B_TRUE, ira->ira_ill, 2903 DROPPER(ipss, ipds_esp_no_sa), 2904 &ipss->ipsec_dropper); 2905 return; 2906 } 2907 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, 2908 ira); 2909 } else { 2910 ah_t *ah; 2911 2912 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 2913 if (mp == NULL) 2914 return; 2915 2916 ASSERT(ah != NULL); 2917 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2918 ASSERT(ira->ira_ipsec_ah_sa != NULL); 2919 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 2920 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, 2921 ira); 2922 } 2923 2924 if (mp == NULL) { 2925 /* 2926 * Either it failed or is pending. In the former case 2927 * ipIfStatsInDiscards was increased. 2928 */ 2929 return; 2930 } 2931 /* we're done with IPsec processing, send it up */ 2932 ip_input_post_ipsec(mp, ira); 2933 return; 2934 } 2935 case IPPROTO_ENCAP: { 2936 ipha_t *inner_ipha; 2937 2938 /* 2939 * Handle self-encapsulated packets (IP-in-IP where 2940 * the inner addresses == the outer addresses). 2941 */ 2942 if ((uchar_t *)ipha + ip_hdr_length + sizeof (ipha_t) > 2943 mp->b_wptr) { 2944 if (ira->ira_pktlen < 2945 ip_hdr_length + sizeof (ipha_t)) { 2946 BUMP_MIB(ill->ill_ip_mib, 2947 ipIfStatsInTruncatedPkts); 2948 ip_drop_input("ipIfStatsInTruncatedPkts", 2949 mp, ill); 2950 freemsg(mp); 2951 return; 2952 } 2953 ipha = ip_pullup(mp, (uchar_t *)ipha + ip_hdr_length + 2954 sizeof (ipha_t) - mp->b_rptr, ira); 2955 if (ipha == NULL) { 2956 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2957 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2958 freemsg(mp); 2959 return; 2960 } 2961 } 2962 inner_ipha = (ipha_t *)((uchar_t *)ipha + ip_hdr_length); 2963 /* 2964 * Check the sanity of the inner IP header. 2965 */ 2966 if ((IPH_HDR_VERSION(inner_ipha) != IPV4_VERSION)) { 2967 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2968 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2969 freemsg(mp); 2970 return; 2971 } 2972 if (IPH_HDR_LENGTH(inner_ipha) < sizeof (ipha_t)) { 2973 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2974 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2975 freemsg(mp); 2976 return; 2977 } 2978 if (inner_ipha->ipha_src != ipha->ipha_src || 2979 inner_ipha->ipha_dst != ipha->ipha_dst) { 2980 /* We fallthru to iptun fanout below */ 2981 goto iptun; 2982 } 2983 2984 /* 2985 * Self-encapsulated tunnel packet. Remove 2986 * the outer IP header and fanout again. 2987 * We also need to make sure that the inner 2988 * header is pulled up until options. 2989 */ 2990 mp->b_rptr = (uchar_t *)inner_ipha; 2991 ipha = inner_ipha; 2992 ip_hdr_length = IPH_HDR_LENGTH(ipha); 2993 if ((uchar_t *)ipha + ip_hdr_length > mp->b_wptr) { 2994 if (ira->ira_pktlen < 2995 (uchar_t *)ipha + ip_hdr_length - mp->b_rptr) { 2996 BUMP_MIB(ill->ill_ip_mib, 2997 ipIfStatsInTruncatedPkts); 2998 ip_drop_input("ipIfStatsInTruncatedPkts", 2999 mp, ill); 3000 freemsg(mp); 3001 return; 3002 } 3003 ipha = ip_pullup(mp, 3004 (uchar_t *)ipha + ip_hdr_length - mp->b_rptr, ira); 3005 if (ipha == NULL) { 3006 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3007 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3008 freemsg(mp); 3009 return; 3010 } 3011 } 3012 if (ip_hdr_length > sizeof (ipha_t)) { 3013 /* We got options on the inner packet. */ 3014 ipaddr_t dst = ipha->ipha_dst; 3015 int error = 0; 3016 3017 dst = ip_input_options(ipha, dst, mp, ira, &error); 3018 if (error != 0) { 3019 /* 3020 * An ICMP error has been sent and the packet 3021 * has been dropped. 3022 */ 3023 return; 3024 } 3025 if (dst != ipha->ipha_dst) { 3026 /* 3027 * Someone put a source-route in 3028 * the inside header of a self- 3029 * encapsulated packet. Drop it 3030 * with extreme prejudice and let 3031 * the sender know. 3032 */ 3033 ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", 3034 mp, ill); 3035 icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, 3036 ira); 3037 return; 3038 } 3039 } 3040 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 3041 /* 3042 * This means that somebody is sending 3043 * Self-encapsualted packets without AH/ESP. 3044 * 3045 * Send this packet to find a tunnel endpoint. 3046 * if I can't find one, an ICMP 3047 * PROTOCOL_UNREACHABLE will get sent. 3048 */ 3049 protocol = ipha->ipha_protocol; 3050 ira->ira_protocol = protocol; 3051 goto iptun; 3052 } 3053 3054 /* Update based on removed IP header */ 3055 ira->ira_ip_hdr_length = ip_hdr_length; 3056 ira->ira_pktlen = ntohs(ipha->ipha_length); 3057 3058 if (ira->ira_flags & IRAF_IPSEC_DECAPS) { 3059 /* 3060 * This packet is self-encapsulated multiple 3061 * times. We don't want to recurse infinitely. 3062 * To keep it simple, drop the packet. 3063 */ 3064 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3065 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3066 freemsg(mp); 3067 return; 3068 } 3069 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3070 ira->ira_flags |= IRAF_IPSEC_DECAPS; 3071 3072 ip_input_post_ipsec(mp, ira); 3073 return; 3074 } 3075 3076 iptun: /* IPPROTO_ENCAPS that is not self-encapsulated */ 3077 case IPPROTO_IPV6: 3078 /* iptun will verify trusted label */ 3079 connp = ipcl_classify_v4(mp, protocol, ip_hdr_length, 3080 ira, ipst); 3081 if (connp != NULL) { 3082 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3083 ira->ira_ill = ira->ira_rill = NULL; 3084 (connp->conn_recv)(connp, mp, NULL, ira); 3085 CONN_DEC_REF(connp); 3086 ira->ira_ill = ill; 3087 ira->ira_rill = rill; 3088 return; 3089 } 3090 /* FALLTHRU */ 3091 default: 3092 /* 3093 * On a labeled system, we have to check whether the zone 3094 * itself is permitted to receive raw traffic. 3095 */ 3096 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 3097 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 3098 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3099 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3100 freemsg(mp); 3101 return; 3102 } 3103 } 3104 break; 3105 } 3106 3107 /* 3108 * The above input functions may have returned the pulled up message. 3109 * So ipha need to be reinitialized. 3110 */ 3111 ipha = (ipha_t *)mp->b_rptr; 3112 ira->ira_protocol = protocol = ipha->ipha_protocol; 3113 if (ipst->ips_ipcl_proto_fanout_v4[protocol].connf_head == NULL) { 3114 /* 3115 * No user-level listener for these packets packets. 3116 * Check for IPPROTO_ENCAP... 3117 */ 3118 if (protocol == IPPROTO_ENCAP && ipst->ips_ip_g_mrouter) { 3119 /* 3120 * Check policy here, 3121 * THEN ship off to ip_mroute_decap(). 3122 * 3123 * BTW, If I match a configured IP-in-IP 3124 * tunnel above, this path will not be reached, and 3125 * ip_mroute_decap will never be called. 3126 */ 3127 mp = ipsec_check_global_policy(mp, connp, 3128 ipha, NULL, ira, ns); 3129 if (mp != NULL) { 3130 ip_mroute_decap(mp, ira); 3131 } /* Else we already freed everything! */ 3132 } else { 3133 ip_proto_not_sup(mp, ira); 3134 } 3135 return; 3136 } 3137 3138 /* 3139 * Handle fanout to raw sockets. There 3140 * can be more than one stream bound to a particular 3141 * protocol. When this is the case, each one gets a copy 3142 * of any incoming packets. 3143 */ 3144 ASSERT(ira->ira_protocol == ipha->ipha_protocol); 3145 ip_fanout_proto_v4(mp, ipha, ira); 3146 return; 3147 3148 discard: 3149 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3150 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3151 freemsg(mp); 3152 #undef rptr 3153 } 3154