1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/ip_ndp.h> 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 #include <inet/sadb.h> 92 #include <inet/ipsec_impl.h> 93 #include <inet/tun.h> 94 #include <inet/sctp_ip.h> 95 #include <sys/pattr.h> 96 #include <inet/ipclassifier.h> 97 #include <inet/ipsecah.h> 98 #include <inet/udp_impl.h> 99 #include <inet/rawip_impl.h> 100 #include <inet/rts_impl.h> 101 #include <sys/squeue_impl.h> 102 #include <sys/squeue.h> 103 104 #include <sys/tsol/label.h> 105 #include <sys/tsol/tnet.h> 106 107 #include <rpc/pmap_prot.h> 108 109 /* Temporary; for CR 6451644 work-around */ 110 #include <sys/ethernet.h> 111 112 extern int ip_squeue_flag; 113 114 /* 115 * Naming conventions: 116 * These rules should be judiciously applied 117 * if there is a need to identify something as IPv6 versus IPv4 118 * IPv6 funcions will end with _v6 in the ip module. 119 * IPv6 funcions will end with _ipv6 in the transport modules. 120 * IPv6 macros: 121 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 122 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 123 * And then there are ..V4_PART_OF_V6. 124 * The intent is that macros in the ip module end with _V6. 125 * IPv6 global variables will start with ipv6_ 126 * IPv6 structures will start with ipv6 127 * IPv6 defined constants should start with IPV6_ 128 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 129 */ 130 131 /* 132 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 133 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 134 * from IANA. This mechanism will remain in effect until an official 135 * number is obtained. 136 */ 137 uchar_t ip6opt_ls; 138 139 const in6_addr_t ipv6_all_ones = 140 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 141 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 142 143 #ifdef _BIG_ENDIAN 144 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 145 #else /* _BIG_ENDIAN */ 146 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 147 #endif /* _BIG_ENDIAN */ 148 149 #ifdef _BIG_ENDIAN 150 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 151 #else /* _BIG_ENDIAN */ 152 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 153 #endif /* _BIG_ENDIAN */ 154 155 #ifdef _BIG_ENDIAN 156 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 157 #else /* _BIG_ENDIAN */ 158 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 159 #endif /* _BIG_ENDIAN */ 160 161 #ifdef _BIG_ENDIAN 162 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 163 #else /* _BIG_ENDIAN */ 164 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 165 #endif /* _BIG_ENDIAN */ 166 167 #ifdef _BIG_ENDIAN 168 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 169 #else /* _BIG_ENDIAN */ 170 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 171 #endif /* _BIG_ENDIAN */ 172 173 #ifdef _BIG_ENDIAN 174 const in6_addr_t ipv6_solicited_node_mcast = 175 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 176 #else /* _BIG_ENDIAN */ 177 const in6_addr_t ipv6_solicited_node_mcast = 178 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 179 #endif /* _BIG_ENDIAN */ 180 181 /* Leave room for ip_newroute to tack on the src and target addresses */ 182 #define OK_RESOLVER_MP_V6(mp) \ 183 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 184 185 #define IP6_MBLK_OK 0 186 #define IP6_MBLK_HDR_ERR 1 187 #define IP6_MBLK_LEN_ERR 2 188 189 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 190 boolean_t, zoneid_t); 191 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 192 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 193 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 194 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 195 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 196 boolean_t, boolean_t, boolean_t, boolean_t); 197 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 198 iulp_t *, ip_stack_t *); 199 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 200 uint16_t, boolean_t, boolean_t, boolean_t); 201 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 202 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 203 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 204 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 205 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 206 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 207 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 208 uint8_t *, uint_t, uint8_t, ip_stack_t *); 209 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 210 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 211 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 212 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 213 conn_t *, int, int, int, zoneid_t); 214 215 /* 216 * A template for an IPv6 AR_ENTRY_QUERY 217 */ 218 static areq_t ipv6_areq_template = { 219 AR_ENTRY_QUERY, /* cmd */ 220 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 221 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 222 IP6_DL_SAP, /* protocol, from arps perspective */ 223 sizeof (areq_t), /* target addr offset */ 224 IPV6_ADDR_LEN, /* target addr_length */ 225 0, /* flags */ 226 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 227 IPV6_ADDR_LEN, /* sender addr length */ 228 6, /* xmit_count */ 229 1000, /* (re)xmit_interval in milliseconds */ 230 4 /* max # of requests to buffer */ 231 /* anything else filled in by the code */ 232 }; 233 234 /* 235 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 236 * The message has already been checksummed and if needed, 237 * a copy has been made to be sent any interested ICMP client (conn) 238 * Note that this is different than icmp_inbound() which does the fanout 239 * to conn's as well as local processing of the ICMP packets. 240 * 241 * All error messages are passed to the matching transport stream. 242 * 243 * Zones notes: 244 * The packet is only processed in the context of the specified zone: typically 245 * only this zone will reply to an echo request. This means that the caller must 246 * call icmp_inbound_v6() for each relevant zone. 247 */ 248 static void 249 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 250 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 251 { 252 icmp6_t *icmp6; 253 ip6_t *ip6h; 254 boolean_t interested; 255 ip6i_t *ip6i; 256 in6_addr_t origsrc; 257 ire_t *ire; 258 mblk_t *first_mp; 259 ipsec_in_t *ii; 260 ip_stack_t *ipst = ill->ill_ipst; 261 262 ASSERT(ill != NULL); 263 first_mp = mp; 264 if (mctl_present) { 265 mp = first_mp->b_cont; 266 ASSERT(mp != NULL); 267 268 ii = (ipsec_in_t *)first_mp->b_rptr; 269 ASSERT(ii->ipsec_in_type == IPSEC_IN); 270 } 271 272 ip6h = (ip6_t *)mp->b_rptr; 273 274 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 275 276 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 277 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 278 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 279 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 280 freemsg(first_mp); 281 return; 282 } 283 ip6h = (ip6_t *)mp->b_rptr; 284 } 285 if (ipst->ips_icmp_accept_clear_messages == 0) { 286 first_mp = ipsec_check_global_policy(first_mp, NULL, 287 NULL, ip6h, mctl_present, ipst->ips_netstack); 288 if (first_mp == NULL) 289 return; 290 } 291 292 /* 293 * On a labeled system, we have to check whether the zone itself is 294 * permitted to receive raw traffic. 295 */ 296 if (is_system_labeled()) { 297 if (zoneid == ALL_ZONES) 298 zoneid = tsol_packet_to_zoneid(mp); 299 if (!tsol_can_accept_raw(mp, B_FALSE)) { 300 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 301 zoneid)); 302 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 303 freemsg(first_mp); 304 return; 305 } 306 } 307 308 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 309 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 310 icmp6->icmp6_code)); 311 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 312 313 /* Initiate IPPF processing here */ 314 if (IP6_IN_IPP(flags, ipst)) { 315 316 /* 317 * If the ifindex changes due to SIOCSLIFINDEX 318 * packet may return to IP on the wrong ill. 319 */ 320 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 321 if (mp == NULL) { 322 if (mctl_present) { 323 freeb(first_mp); 324 } 325 return; 326 } 327 } 328 329 switch (icmp6->icmp6_type) { 330 case ICMP6_DST_UNREACH: 331 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 332 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 333 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 334 break; 335 336 case ICMP6_TIME_EXCEEDED: 337 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 338 break; 339 340 case ICMP6_PARAM_PROB: 341 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 342 break; 343 344 case ICMP6_PACKET_TOO_BIG: 345 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 346 zoneid); 347 return; 348 case ICMP6_ECHO_REQUEST: 349 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 350 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 351 !ipst->ips_ipv6_resp_echo_mcast) 352 break; 353 354 /* 355 * We must have exclusive use of the mblk to convert it to 356 * a response. 357 * If not, we copy it. 358 */ 359 if (mp->b_datap->db_ref > 1) { 360 mblk_t *mp1; 361 362 mp1 = copymsg(mp); 363 freemsg(mp); 364 if (mp1 == NULL) { 365 BUMP_MIB(ill->ill_icmp6_mib, 366 ipv6IfIcmpInErrors); 367 if (mctl_present) 368 freeb(first_mp); 369 return; 370 } 371 mp = mp1; 372 ip6h = (ip6_t *)mp->b_rptr; 373 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 374 if (mctl_present) 375 first_mp->b_cont = mp; 376 else 377 first_mp = mp; 378 } 379 380 /* 381 * Turn the echo into an echo reply. 382 * Remove any extension headers (do not reverse a source route) 383 * and clear the flow id (keep traffic class for now). 384 */ 385 if (hdr_length != IPV6_HDR_LEN) { 386 int i; 387 388 for (i = 0; i < IPV6_HDR_LEN; i++) 389 mp->b_rptr[hdr_length - i - 1] = 390 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 391 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 392 ip6h = (ip6_t *)mp->b_rptr; 393 ip6h->ip6_nxt = IPPROTO_ICMPV6; 394 hdr_length = IPV6_HDR_LEN; 395 } 396 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 397 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 398 399 ip6h->ip6_plen = 400 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 401 origsrc = ip6h->ip6_src; 402 /* 403 * Reverse the source and destination addresses. 404 * If the return address is a multicast, zero out the source 405 * (ip_wput_v6 will set an address). 406 */ 407 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 408 ip6h->ip6_src = ipv6_all_zeros; 409 ip6h->ip6_dst = origsrc; 410 } else { 411 ip6h->ip6_src = ip6h->ip6_dst; 412 ip6h->ip6_dst = origsrc; 413 } 414 415 /* set the hop limit */ 416 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 417 418 /* 419 * Prepare for checksum by putting icmp length in the icmp 420 * checksum field. The checksum is calculated in ip_wput_v6. 421 */ 422 icmp6->icmp6_cksum = ip6h->ip6_plen; 423 /* 424 * ICMP echo replies should go out on the same interface 425 * the request came on as probes used by in.mpathd for 426 * detecting NIC failures are ECHO packets. We turn-off load 427 * spreading by allocating a ip6i and setting ip6i_attach_if 428 * to B_TRUE which is handled both by ip_wput_v6 and 429 * ip_newroute_v6. If we don't turnoff load spreading, 430 * the packets might get dropped if there are no 431 * non-FAILED/INACTIVE interfaces for it to go out on and 432 * in.mpathd would wrongly detect a failure or mis-detect 433 * a NIC failure as a link failure. As load spreading can 434 * happen only if ill_group is not NULL, we do only for 435 * that case and this does not affect the normal case. 436 * 437 * We force this only on echo packets that came from on-link 438 * hosts. We restrict this to link-local addresses which 439 * is used by in.mpathd for probing. In the IPv6 case, 440 * default routes typically have an ire_ipif pointer and 441 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 442 * might work. As a default route out of this interface 443 * may not be present, enforcing this packet to go out in 444 * this case may not work. 445 */ 446 if (ill->ill_group != NULL && 447 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 448 /* 449 * If we are sending replies to ourselves, don't 450 * set ATTACH_IF as we may not be able to find 451 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 452 * causes ip_wput_v6 to look for an IRE_LOCAL on 453 * "ill" which it may not find and will try to 454 * create an IRE_CACHE for our local address. Once 455 * we do this, we will try to forward all packets 456 * meant to our LOCAL address. 457 */ 458 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 459 NULL, ipst); 460 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 461 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 462 if (mp == NULL) { 463 BUMP_MIB(ill->ill_icmp6_mib, 464 ipv6IfIcmpInErrors); 465 if (ire != NULL) 466 ire_refrele(ire); 467 if (mctl_present) 468 freeb(first_mp); 469 return; 470 } else if (mctl_present) { 471 first_mp->b_cont = mp; 472 } else { 473 first_mp = mp; 474 } 475 ip6i = (ip6i_t *)mp->b_rptr; 476 ip6i->ip6i_flags = IP6I_ATTACH_IF; 477 ip6i->ip6i_ifindex = 478 ill->ill_phyint->phyint_ifindex; 479 } 480 if (ire != NULL) 481 ire_refrele(ire); 482 } 483 484 if (!mctl_present) { 485 /* 486 * This packet should go out the same way as it 487 * came in i.e in clear. To make sure that global 488 * policy will not be applied to this in ip_wput, 489 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 490 */ 491 ASSERT(first_mp == mp); 492 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 493 if (first_mp == NULL) { 494 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 495 freemsg(mp); 496 return; 497 } 498 ii = (ipsec_in_t *)first_mp->b_rptr; 499 500 /* This is not a secure packet */ 501 ii->ipsec_in_secure = B_FALSE; 502 first_mp->b_cont = mp; 503 } 504 ii->ipsec_in_zoneid = zoneid; 505 ASSERT(zoneid != ALL_ZONES); 506 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 507 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 508 return; 509 } 510 put(WR(q), first_mp); 511 return; 512 513 case ICMP6_ECHO_REPLY: 514 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 515 break; 516 517 case ND_ROUTER_SOLICIT: 518 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 519 break; 520 521 case ND_ROUTER_ADVERT: 522 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 523 break; 524 525 case ND_NEIGHBOR_SOLICIT: 526 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 527 if (mctl_present) 528 freeb(first_mp); 529 /* XXX may wish to pass first_mp up to ndp_input someday. */ 530 ndp_input(ill, mp, dl_mp); 531 return; 532 533 case ND_NEIGHBOR_ADVERT: 534 BUMP_MIB(ill->ill_icmp6_mib, 535 ipv6IfIcmpInNeighborAdvertisements); 536 if (mctl_present) 537 freeb(first_mp); 538 /* XXX may wish to pass first_mp up to ndp_input someday. */ 539 ndp_input(ill, mp, dl_mp); 540 return; 541 542 case ND_REDIRECT: { 543 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 544 545 if (ipst->ips_ipv6_ignore_redirect) 546 break; 547 548 /* 549 * As there is no upper client to deliver, we don't 550 * need the first_mp any more. 551 */ 552 if (mctl_present) 553 freeb(first_mp); 554 if (!pullupmsg(mp, -1)) { 555 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 556 break; 557 } 558 icmp_redirect_v6(q, mp, ill); 559 return; 560 } 561 562 /* 563 * The next three icmp messages will be handled by MLD. 564 * Pass all valid MLD packets up to any process(es) 565 * listening on a raw ICMP socket. MLD messages are 566 * freed by mld_input function. 567 */ 568 case MLD_LISTENER_QUERY: 569 case MLD_LISTENER_REPORT: 570 case MLD_LISTENER_REDUCTION: 571 if (mctl_present) 572 freeb(first_mp); 573 mld_input(q, mp, ill); 574 return; 575 default: 576 break; 577 } 578 if (interested) { 579 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 580 mctl_present, zoneid); 581 } else { 582 freemsg(first_mp); 583 } 584 } 585 586 /* 587 * Process received IPv6 ICMP Packet too big. 588 * After updating any IRE it does the fanout to any matching transport streams. 589 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 590 */ 591 /* ARGSUSED */ 592 static void 593 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 594 boolean_t mctl_present, zoneid_t zoneid) 595 { 596 ip6_t *ip6h; 597 ip6_t *inner_ip6h; 598 icmp6_t *icmp6; 599 uint16_t hdr_length; 600 uint32_t mtu; 601 ire_t *ire, *first_ire; 602 mblk_t *first_mp; 603 ip_stack_t *ipst = ill->ill_ipst; 604 605 first_mp = mp; 606 if (mctl_present) 607 mp = first_mp->b_cont; 608 /* 609 * We must have exclusive use of the mblk to update the MTU 610 * in the packet. 611 * If not, we copy it. 612 * 613 * If there's an M_CTL present, we know that allocated first_mp 614 * earlier in this function, so we know first_mp has refcnt of one. 615 */ 616 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 617 if (mp->b_datap->db_ref > 1) { 618 mblk_t *mp1; 619 620 mp1 = copymsg(mp); 621 freemsg(mp); 622 if (mp1 == NULL) { 623 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 624 if (mctl_present) 625 freeb(first_mp); 626 return; 627 } 628 mp = mp1; 629 if (mctl_present) 630 first_mp->b_cont = mp; 631 else 632 first_mp = mp; 633 } 634 ip6h = (ip6_t *)mp->b_rptr; 635 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 636 hdr_length = ip_hdr_length_v6(mp, ip6h); 637 else 638 hdr_length = IPV6_HDR_LEN; 639 640 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 641 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 642 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 643 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 644 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 645 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 646 freemsg(first_mp); 647 return; 648 } 649 ip6h = (ip6_t *)mp->b_rptr; 650 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 651 inner_ip6h = (ip6_t *)&icmp6[1]; 652 } 653 654 /* 655 * For link local destinations matching simply on IRE type is not 656 * sufficient. Same link local addresses for different ILL's is 657 * possible. 658 */ 659 660 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 661 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 662 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 663 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 664 665 if (first_ire == NULL) { 666 if (ip_debug > 2) { 667 /* ip1dbg */ 668 pr_addr_dbg("icmp_inbound_too_big_v6:" 669 "no ire for dst %s\n", AF_INET6, 670 &inner_ip6h->ip6_dst); 671 } 672 freemsg(first_mp); 673 return; 674 } 675 676 mtu = ntohl(icmp6->icmp6_mtu); 677 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 678 for (ire = first_ire; ire != NULL && 679 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 680 ire = ire->ire_next) { 681 mutex_enter(&ire->ire_lock); 682 if (mtu < IPV6_MIN_MTU) { 683 ip1dbg(("Received mtu less than IPv6 " 684 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 685 mtu = IPV6_MIN_MTU; 686 /* 687 * If an mtu less than IPv6 min mtu is received, 688 * we must include a fragment header in 689 * subsequent packets. 690 */ 691 ire->ire_frag_flag |= IPH_FRAG_HDR; 692 } 693 ip1dbg(("Received mtu from router: %d\n", mtu)); 694 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 695 /* Record the new max frag size for the ULP. */ 696 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 697 /* 698 * If we need a fragment header in every packet 699 * (above case or multirouting), make sure the 700 * ULP takes it into account when computing the 701 * payload size. 702 */ 703 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 704 sizeof (ip6_frag_t)); 705 } else { 706 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 707 } 708 mutex_exit(&ire->ire_lock); 709 } 710 rw_exit(&first_ire->ire_bucket->irb_lock); 711 ire_refrele(first_ire); 712 } else { 713 irb_t *irb = NULL; 714 /* 715 * for non-link local destinations we match only on the IRE type 716 */ 717 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 718 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 719 ipst); 720 if (ire == NULL) { 721 if (ip_debug > 2) { 722 /* ip1dbg */ 723 pr_addr_dbg("icmp_inbound_too_big_v6:" 724 "no ire for dst %s\n", 725 AF_INET6, &inner_ip6h->ip6_dst); 726 } 727 freemsg(first_mp); 728 return; 729 } 730 irb = ire->ire_bucket; 731 ire_refrele(ire); 732 rw_enter(&irb->irb_lock, RW_READER); 733 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 734 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 735 &inner_ip6h->ip6_dst)) { 736 mtu = ntohl(icmp6->icmp6_mtu); 737 mutex_enter(&ire->ire_lock); 738 if (mtu < IPV6_MIN_MTU) { 739 ip1dbg(("Received mtu less than IPv6" 740 "min mtu %d: %d\n", 741 IPV6_MIN_MTU, mtu)); 742 mtu = IPV6_MIN_MTU; 743 /* 744 * If an mtu less than IPv6 min mtu is 745 * received, we must include a fragment 746 * header in subsequent packets. 747 */ 748 ire->ire_frag_flag |= IPH_FRAG_HDR; 749 } 750 751 ip1dbg(("Received mtu from router: %d\n", mtu)); 752 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 753 /* Record the new max frag size for the ULP. */ 754 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 755 /* 756 * If we need a fragment header in 757 * every packet (above case or 758 * multirouting), make sure the ULP 759 * takes it into account when computing 760 * the payload size. 761 */ 762 icmp6->icmp6_mtu = 763 htonl(ire->ire_max_frag - 764 sizeof (ip6_frag_t)); 765 } else { 766 icmp6->icmp6_mtu = 767 htonl(ire->ire_max_frag); 768 } 769 mutex_exit(&ire->ire_lock); 770 } 771 } 772 rw_exit(&irb->irb_lock); 773 } 774 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 775 mctl_present, zoneid); 776 } 777 778 /* 779 * Fanout received ICMPv6 error packets to the transports. 780 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 781 */ 782 void 783 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 784 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 785 { 786 uint16_t *up; /* Pointer to ports in ULP header */ 787 uint32_t ports; /* reversed ports for fanout */ 788 ip6_t rip6h; /* With reversed addresses */ 789 uint16_t hdr_length; 790 uint8_t *nexthdrp; 791 uint8_t nexthdr; 792 mblk_t *first_mp; 793 ipsec_in_t *ii; 794 tcpha_t *tcpha; 795 conn_t *connp; 796 ip_stack_t *ipst = ill->ill_ipst; 797 798 first_mp = mp; 799 if (mctl_present) { 800 mp = first_mp->b_cont; 801 ASSERT(mp != NULL); 802 803 ii = (ipsec_in_t *)first_mp->b_rptr; 804 ASSERT(ii->ipsec_in_type == IPSEC_IN); 805 } else { 806 ii = NULL; 807 } 808 809 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 810 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 811 812 /* 813 * Need to pullup everything in order to use 814 * ip_hdr_length_nexthdr_v6() 815 */ 816 if (mp->b_cont != NULL) { 817 if (!pullupmsg(mp, -1)) { 818 ip1dbg(("icmp_inbound_error_fanout_v6: " 819 "pullupmsg failed\n")); 820 goto drop_pkt; 821 } 822 ip6h = (ip6_t *)mp->b_rptr; 823 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 824 } 825 826 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 827 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 828 goto drop_pkt; 829 830 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 831 goto drop_pkt; 832 nexthdr = *nexthdrp; 833 834 /* Set message type, must be done after pullups */ 835 mp->b_datap->db_type = M_CTL; 836 837 /* Try to pass the ICMP message to clients who need it */ 838 switch (nexthdr) { 839 case IPPROTO_UDP: { 840 /* 841 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 842 * UDP header to get the port information. 843 */ 844 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 845 mp->b_wptr) { 846 break; 847 } 848 /* 849 * Attempt to find a client stream based on port. 850 * Note that we do a reverse lookup since the header is 851 * in the form we sent it out. 852 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 853 * and we only set the src and dst addresses and nexthdr. 854 */ 855 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 856 rip6h.ip6_src = ip6h->ip6_dst; 857 rip6h.ip6_dst = ip6h->ip6_src; 858 rip6h.ip6_nxt = nexthdr; 859 ((uint16_t *)&ports)[0] = up[1]; 860 ((uint16_t *)&ports)[1] = up[0]; 861 862 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 863 IP6_NO_IPPOLICY, mctl_present, zoneid); 864 return; 865 } 866 case IPPROTO_TCP: { 867 /* 868 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 869 * the TCP header to get the port information. 870 */ 871 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 872 mp->b_wptr) { 873 break; 874 } 875 876 /* 877 * Attempt to find a client stream based on port. 878 * Note that we do a reverse lookup since the header is 879 * in the form we sent it out. 880 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 881 * we only set the src and dst addresses and nexthdr. 882 */ 883 884 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 885 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 886 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 887 if (connp == NULL) { 888 goto drop_pkt; 889 } 890 891 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, tcp_input, connp, 892 SQ_FILL, SQTAG_TCP6_INPUT_ICMP_ERR); 893 return; 894 895 } 896 case IPPROTO_SCTP: 897 /* 898 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 899 * the SCTP header to get the port information. 900 */ 901 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 902 mp->b_wptr) { 903 break; 904 } 905 906 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 907 ((uint16_t *)&ports)[0] = up[1]; 908 ((uint16_t *)&ports)[1] = up[0]; 909 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 0, 910 mctl_present, IP6_NO_IPPOLICY, zoneid); 911 return; 912 case IPPROTO_ESP: 913 case IPPROTO_AH: { 914 int ipsec_rc; 915 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 916 917 /* 918 * We need a IPSEC_IN in the front to fanout to AH/ESP. 919 * We will re-use the IPSEC_IN if it is already present as 920 * AH/ESP will not affect any fields in the IPSEC_IN for 921 * ICMP errors. If there is no IPSEC_IN, allocate a new 922 * one and attach it in the front. 923 */ 924 if (ii != NULL) { 925 /* 926 * ip_fanout_proto_again converts the ICMP errors 927 * that come back from AH/ESP to M_DATA so that 928 * if it is non-AH/ESP and we do a pullupmsg in 929 * this function, it would work. Convert it back 930 * to M_CTL before we send up as this is a ICMP 931 * error. This could have been generated locally or 932 * by some router. Validate the inner IPSEC 933 * headers. 934 * 935 * NOTE : ill_index is used by ip_fanout_proto_again 936 * to locate the ill. 937 */ 938 ASSERT(ill != NULL); 939 ii->ipsec_in_ill_index = 940 ill->ill_phyint->phyint_ifindex; 941 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 942 first_mp->b_cont->b_datap->db_type = M_CTL; 943 } else { 944 /* 945 * IPSEC_IN is not present. We attach a ipsec_in 946 * message and send up to IPSEC for validating 947 * and removing the IPSEC headers. Clear 948 * ipsec_in_secure so that when we return 949 * from IPSEC, we don't mistakenly think that this 950 * is a secure packet came from the network. 951 * 952 * NOTE : ill_index is used by ip_fanout_proto_again 953 * to locate the ill. 954 */ 955 ASSERT(first_mp == mp); 956 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 957 ASSERT(ill != NULL); 958 if (first_mp == NULL) { 959 freemsg(mp); 960 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 961 return; 962 } 963 ii = (ipsec_in_t *)first_mp->b_rptr; 964 965 /* This is not a secure packet */ 966 ii->ipsec_in_secure = B_FALSE; 967 first_mp->b_cont = mp; 968 mp->b_datap->db_type = M_CTL; 969 ii->ipsec_in_ill_index = 970 ill->ill_phyint->phyint_ifindex; 971 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 972 } 973 974 if (!ipsec_loaded(ipss)) { 975 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 976 return; 977 } 978 979 if (nexthdr == IPPROTO_ESP) 980 ipsec_rc = ipsecesp_icmp_error(first_mp); 981 else 982 ipsec_rc = ipsecah_icmp_error(first_mp); 983 if (ipsec_rc == IPSEC_STATUS_FAILED) 984 return; 985 986 ip_fanout_proto_again(first_mp, ill, ill, NULL); 987 return; 988 } 989 case IPPROTO_ENCAP: 990 case IPPROTO_IPV6: 991 if ((uint8_t *)ip6h + hdr_length + 992 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 993 sizeof (ip6_t)) > mp->b_wptr) { 994 goto drop_pkt; 995 } 996 997 if (nexthdr == IPPROTO_ENCAP || 998 !IN6_ARE_ADDR_EQUAL( 999 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1000 &ip6h->ip6_src) || 1001 !IN6_ARE_ADDR_EQUAL( 1002 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1003 &ip6h->ip6_dst)) { 1004 /* 1005 * For tunnels that have used IPsec protection, 1006 * we need to adjust the MTU to take into account 1007 * the IPsec overhead. 1008 */ 1009 if (ii != NULL) 1010 icmp6->icmp6_mtu = htonl( 1011 ntohl(icmp6->icmp6_mtu) - 1012 ipsec_in_extra_length(first_mp)); 1013 } else { 1014 /* 1015 * Self-encapsulated case. As in the ipv4 case, 1016 * we need to strip the 2nd IP header. Since mp 1017 * is already pulled-up, we can simply bcopy 1018 * the 3rd header + data over the 2nd header. 1019 */ 1020 uint16_t unused_len; 1021 ip6_t *inner_ip6h = (ip6_t *) 1022 ((uchar_t *)ip6h + hdr_length); 1023 1024 /* 1025 * Make sure we don't do recursion more than once. 1026 */ 1027 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1028 &unused_len, &nexthdrp) || 1029 *nexthdrp == IPPROTO_IPV6) { 1030 goto drop_pkt; 1031 } 1032 1033 /* 1034 * We are about to modify the packet. Make a copy if 1035 * someone else has a reference to it. 1036 */ 1037 if (DB_REF(mp) > 1) { 1038 mblk_t *mp1; 1039 uint16_t icmp6_offset; 1040 1041 mp1 = copymsg(mp); 1042 if (mp1 == NULL) { 1043 goto drop_pkt; 1044 } 1045 icmp6_offset = (uint16_t) 1046 ((uchar_t *)icmp6 - mp->b_rptr); 1047 freemsg(mp); 1048 mp = mp1; 1049 1050 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1051 ip6h = (ip6_t *)&icmp6[1]; 1052 inner_ip6h = (ip6_t *) 1053 ((uchar_t *)ip6h + hdr_length); 1054 1055 if (mctl_present) 1056 first_mp->b_cont = mp; 1057 else 1058 first_mp = mp; 1059 } 1060 1061 /* 1062 * Need to set db_type back to M_DATA before 1063 * refeeding mp into this function. 1064 */ 1065 DB_TYPE(mp) = M_DATA; 1066 1067 /* 1068 * Copy the 3rd header + remaining data on top 1069 * of the 2nd header. 1070 */ 1071 bcopy(inner_ip6h, ip6h, 1072 mp->b_wptr - (uchar_t *)inner_ip6h); 1073 1074 /* 1075 * Subtract length of the 2nd header. 1076 */ 1077 mp->b_wptr -= hdr_length; 1078 1079 /* 1080 * Now recurse, and see what I _really_ should be 1081 * doing here. 1082 */ 1083 icmp_inbound_error_fanout_v6(q, first_mp, 1084 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1085 zoneid); 1086 return; 1087 } 1088 /* FALLTHRU */ 1089 default: 1090 /* 1091 * The rip6h header is only used for the lookup and we 1092 * only set the src and dst addresses and nexthdr. 1093 */ 1094 rip6h.ip6_src = ip6h->ip6_dst; 1095 rip6h.ip6_dst = ip6h->ip6_src; 1096 rip6h.ip6_nxt = nexthdr; 1097 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1098 IP6_NO_IPPOLICY, mctl_present, zoneid); 1099 return; 1100 } 1101 /* NOTREACHED */ 1102 drop_pkt: 1103 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1104 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1105 freemsg(first_mp); 1106 } 1107 1108 /* 1109 * Process received IPv6 ICMP Redirect messages. 1110 */ 1111 /* ARGSUSED */ 1112 static void 1113 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1114 { 1115 ip6_t *ip6h; 1116 uint16_t hdr_length; 1117 nd_redirect_t *rd; 1118 ire_t *ire; 1119 ire_t *prev_ire; 1120 ire_t *redir_ire; 1121 in6_addr_t *src, *dst, *gateway; 1122 nd_opt_hdr_t *opt; 1123 nce_t *nce; 1124 int nce_flags = 0; 1125 int err = 0; 1126 boolean_t redirect_to_router = B_FALSE; 1127 int len; 1128 int optlen; 1129 iulp_t ulp_info = { 0 }; 1130 ill_t *prev_ire_ill; 1131 ipif_t *ipif; 1132 ip_stack_t *ipst = ill->ill_ipst; 1133 1134 ip6h = (ip6_t *)mp->b_rptr; 1135 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1136 hdr_length = ip_hdr_length_v6(mp, ip6h); 1137 else 1138 hdr_length = IPV6_HDR_LEN; 1139 1140 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1141 len = mp->b_wptr - mp->b_rptr - hdr_length; 1142 src = &ip6h->ip6_src; 1143 dst = &rd->nd_rd_dst; 1144 gateway = &rd->nd_rd_target; 1145 1146 /* Verify if it is a valid redirect */ 1147 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1148 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1149 (rd->nd_rd_code != 0) || 1150 (len < sizeof (nd_redirect_t)) || 1151 (IN6_IS_ADDR_V4MAPPED(dst)) || 1152 (IN6_IS_ADDR_MULTICAST(dst))) { 1153 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1154 freemsg(mp); 1155 return; 1156 } 1157 1158 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1159 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1160 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1161 freemsg(mp); 1162 return; 1163 } 1164 1165 if (len > sizeof (nd_redirect_t)) { 1166 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1167 len - sizeof (nd_redirect_t))) { 1168 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1169 freemsg(mp); 1170 return; 1171 } 1172 } 1173 1174 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1175 redirect_to_router = B_TRUE; 1176 nce_flags |= NCE_F_ISROUTER; 1177 } 1178 1179 /* ipif will be refreleased afterwards */ 1180 ipif = ipif_get_next_ipif(NULL, ill); 1181 if (ipif == NULL) { 1182 freemsg(mp); 1183 return; 1184 } 1185 1186 /* 1187 * Verify that the IP source address of the redirect is 1188 * the same as the current first-hop router for the specified 1189 * ICMP destination address. 1190 * Also, Make sure we had a route for the dest in question and 1191 * that route was pointing to the old gateway (the source of the 1192 * redirect packet.) 1193 */ 1194 1195 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1196 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1197 MATCH_IRE_DEFAULT, ipst); 1198 1199 /* 1200 * Check that 1201 * the redirect was not from ourselves 1202 * old gateway is still directly reachable 1203 */ 1204 if (prev_ire == NULL || 1205 prev_ire->ire_type == IRE_LOCAL) { 1206 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1207 ipif_refrele(ipif); 1208 goto fail_redirect; 1209 } 1210 prev_ire_ill = ire_to_ill(prev_ire); 1211 ASSERT(prev_ire_ill != NULL); 1212 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1213 nce_flags |= NCE_F_NONUD; 1214 1215 /* 1216 * Should we use the old ULP info to create the new gateway? From 1217 * a user's perspective, we should inherit the info so that it 1218 * is a "smooth" transition. If we do not do that, then new 1219 * connections going thru the new gateway will have no route metrics, 1220 * which is counter-intuitive to user. From a network point of 1221 * view, this may or may not make sense even though the new gateway 1222 * is still directly connected to us so the route metrics should not 1223 * change much. 1224 * 1225 * But if the old ire_uinfo is not initialized, we do another 1226 * recursive lookup on the dest using the new gateway. There may 1227 * be a route to that. If so, use it to initialize the redirect 1228 * route. 1229 */ 1230 if (prev_ire->ire_uinfo.iulp_set) { 1231 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1232 } else if (redirect_to_router) { 1233 /* 1234 * Only do the following if the redirection is really to 1235 * a router. 1236 */ 1237 ire_t *tmp_ire; 1238 ire_t *sire; 1239 1240 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1241 ALL_ZONES, 0, NULL, 1242 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1243 ipst); 1244 if (sire != NULL) { 1245 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1246 ASSERT(tmp_ire != NULL); 1247 ire_refrele(tmp_ire); 1248 ire_refrele(sire); 1249 } else if (tmp_ire != NULL) { 1250 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1251 sizeof (iulp_t)); 1252 ire_refrele(tmp_ire); 1253 } 1254 } 1255 1256 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1257 opt = (nd_opt_hdr_t *)&rd[1]; 1258 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1259 if (opt != NULL) { 1260 err = ndp_lookup_then_add_v6(ill, 1261 (uchar_t *)&opt[1], /* Link layer address */ 1262 gateway, 1263 &ipv6_all_ones, /* prefix mask */ 1264 &ipv6_all_zeros, /* Mapping mask */ 1265 0, 1266 nce_flags, 1267 ND_STALE, 1268 &nce); 1269 switch (err) { 1270 case 0: 1271 NCE_REFRELE(nce); 1272 break; 1273 case EEXIST: 1274 /* 1275 * Check to see if link layer address has changed and 1276 * process the nce_state accordingly. 1277 */ 1278 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1279 NCE_REFRELE(nce); 1280 break; 1281 default: 1282 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1283 err)); 1284 ipif_refrele(ipif); 1285 goto fail_redirect; 1286 } 1287 } 1288 if (redirect_to_router) { 1289 /* icmp_redirect_ok_v6() must have already verified this */ 1290 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1291 1292 /* 1293 * Create a Route Association. This will allow us to remember 1294 * a router told us to use the particular gateway. 1295 */ 1296 ire = ire_create_v6( 1297 dst, 1298 &ipv6_all_ones, /* mask */ 1299 &prev_ire->ire_src_addr_v6, /* source addr */ 1300 gateway, /* gateway addr */ 1301 &prev_ire->ire_max_frag, /* max frag */ 1302 NULL, /* no src nce */ 1303 NULL, /* no rfq */ 1304 NULL, /* no stq */ 1305 IRE_HOST, 1306 prev_ire->ire_ipif, 1307 NULL, 1308 0, 1309 0, 1310 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1311 &ulp_info, 1312 NULL, 1313 NULL, 1314 ipst); 1315 } else { 1316 queue_t *stq; 1317 1318 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1319 ? ipif->ipif_rq : ipif->ipif_wq; 1320 1321 /* 1322 * Just create an on link entry, i.e. interface route. 1323 */ 1324 ire = ire_create_v6( 1325 dst, /* gateway == dst */ 1326 &ipv6_all_ones, /* mask */ 1327 &prev_ire->ire_src_addr_v6, /* source addr */ 1328 &ipv6_all_zeros, /* gateway addr */ 1329 &prev_ire->ire_max_frag, /* max frag */ 1330 NULL, /* no src nce */ 1331 NULL, /* ire rfq */ 1332 stq, /* ire stq */ 1333 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1334 prev_ire->ire_ipif, 1335 &ipv6_all_ones, 1336 0, 1337 0, 1338 (RTF_DYNAMIC | RTF_HOST), 1339 &ulp_info, 1340 NULL, 1341 NULL, 1342 ipst); 1343 } 1344 1345 /* Release reference from earlier ipif_get_next_ipif() */ 1346 ipif_refrele(ipif); 1347 1348 if (ire == NULL) 1349 goto fail_redirect; 1350 1351 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1352 1353 /* tell routing sockets that we received a redirect */ 1354 ip_rts_change_v6(RTM_REDIRECT, 1355 &rd->nd_rd_dst, 1356 &rd->nd_rd_target, 1357 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1358 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1359 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1360 1361 /* 1362 * Delete any existing IRE_HOST type ires for this destination. 1363 * This together with the added IRE has the effect of 1364 * modifying an existing redirect. 1365 */ 1366 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1367 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1368 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), 1369 ipst); 1370 1371 ire_refrele(ire); /* Held in ire_add_v6 */ 1372 1373 if (redir_ire != NULL) { 1374 if (redir_ire->ire_flags & RTF_DYNAMIC) 1375 ire_delete(redir_ire); 1376 ire_refrele(redir_ire); 1377 } 1378 } 1379 1380 if (prev_ire->ire_type == IRE_CACHE) 1381 ire_delete(prev_ire); 1382 ire_refrele(prev_ire); 1383 prev_ire = NULL; 1384 1385 fail_redirect: 1386 if (prev_ire != NULL) 1387 ire_refrele(prev_ire); 1388 freemsg(mp); 1389 } 1390 1391 static ill_t * 1392 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1393 { 1394 ill_t *ill; 1395 1396 ASSERT(WR(q) == q); 1397 1398 if (q->q_next != NULL) { 1399 ill = (ill_t *)q->q_ptr; 1400 if (ILL_CAN_LOOKUP(ill)) 1401 ill_refhold(ill); 1402 else 1403 ill = NULL; 1404 } else { 1405 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1406 NULL, NULL, NULL, NULL, NULL, ipst); 1407 } 1408 if (ill == NULL) 1409 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1410 return (ill); 1411 } 1412 1413 /* 1414 * Assigns an appropriate source address to the packet. 1415 * If origdst is one of our IP addresses that use it as the source. 1416 * If the queue is an ill queue then select a source from that ill. 1417 * Otherwise pick a source based on a route lookup back to the origsrc. 1418 * 1419 * src is the return parameter. Returns a pointer to src or NULL if failure. 1420 */ 1421 static in6_addr_t * 1422 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1423 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1424 { 1425 ill_t *ill; 1426 ire_t *ire; 1427 ipif_t *ipif; 1428 1429 ASSERT(!(wq->q_flag & QREADR)); 1430 if (wq->q_next != NULL) { 1431 ill = (ill_t *)wq->q_ptr; 1432 } else { 1433 ill = NULL; 1434 } 1435 1436 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1437 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1438 ipst); 1439 if (ire != NULL) { 1440 /* Destined to one of our addresses */ 1441 *src = *origdst; 1442 ire_refrele(ire); 1443 return (src); 1444 } 1445 if (ire != NULL) { 1446 ire_refrele(ire); 1447 ire = NULL; 1448 } 1449 if (ill == NULL) { 1450 /* What is the route back to the original source? */ 1451 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1452 NULL, NULL, zoneid, NULL, 1453 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1454 if (ire == NULL) { 1455 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1456 return (NULL); 1457 } 1458 /* 1459 * Does not matter whether we use ire_stq or ire_ipif here. 1460 * Just pick an ill for ICMP replies. 1461 */ 1462 ASSERT(ire->ire_ipif != NULL); 1463 ill = ire->ire_ipif->ipif_ill; 1464 ire_refrele(ire); 1465 } 1466 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1467 IPV6_PREFER_SRC_DEFAULT, zoneid); 1468 if (ipif != NULL) { 1469 *src = ipif->ipif_v6src_addr; 1470 ipif_refrele(ipif); 1471 return (src); 1472 } 1473 /* 1474 * Unusual case - can't find a usable source address to reach the 1475 * original source. Use what in the route to the source. 1476 */ 1477 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1478 NULL, NULL, zoneid, NULL, 1479 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1480 if (ire == NULL) { 1481 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1482 return (NULL); 1483 } 1484 ASSERT(ire != NULL); 1485 *src = ire->ire_src_addr_v6; 1486 ire_refrele(ire); 1487 return (src); 1488 } 1489 1490 /* 1491 * Build and ship an IPv6 ICMP message using the packet data in mp, 1492 * and the ICMP header pointed to by "stuff". (May be called as 1493 * writer.) 1494 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1495 * verify that an icmp error packet can be sent. 1496 * 1497 * If q is an ill write side queue (which is the case when packets 1498 * arrive from ip_rput) then ip_wput code will ensure that packets to 1499 * link-local destinations are sent out that ill. 1500 * 1501 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1502 * source address (see above function). 1503 */ 1504 static void 1505 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1506 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1507 ip_stack_t *ipst) 1508 { 1509 ip6_t *ip6h; 1510 in6_addr_t v6dst; 1511 size_t len_needed; 1512 size_t msg_len; 1513 mblk_t *mp1; 1514 icmp6_t *icmp6; 1515 ill_t *ill; 1516 in6_addr_t v6src; 1517 mblk_t *ipsec_mp; 1518 ipsec_out_t *io; 1519 1520 ill = ip_queue_to_ill_v6(q, ipst); 1521 if (ill == NULL) { 1522 freemsg(mp); 1523 return; 1524 } 1525 1526 if (mctl_present) { 1527 /* 1528 * If it is : 1529 * 1530 * 1) a IPSEC_OUT, then this is caused by outbound 1531 * datagram originating on this host. IPSEC processing 1532 * may or may not have been done. Refer to comments above 1533 * icmp_inbound_error_fanout for details. 1534 * 1535 * 2) a IPSEC_IN if we are generating a icmp_message 1536 * for an incoming datagram destined for us i.e called 1537 * from ip_fanout_send_icmp. 1538 */ 1539 ipsec_info_t *in; 1540 1541 ipsec_mp = mp; 1542 mp = ipsec_mp->b_cont; 1543 1544 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1545 ip6h = (ip6_t *)mp->b_rptr; 1546 1547 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1548 in->ipsec_info_type == IPSEC_IN); 1549 1550 if (in->ipsec_info_type == IPSEC_IN) { 1551 /* 1552 * Convert the IPSEC_IN to IPSEC_OUT. 1553 */ 1554 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1555 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1556 ill_refrele(ill); 1557 return; 1558 } 1559 } else { 1560 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1561 io = (ipsec_out_t *)in; 1562 /* 1563 * Clear out ipsec_out_proc_begin, so we do a fresh 1564 * ire lookup. 1565 */ 1566 io->ipsec_out_proc_begin = B_FALSE; 1567 } 1568 } else { 1569 /* 1570 * This is in clear. The icmp message we are building 1571 * here should go out in clear. 1572 */ 1573 ipsec_in_t *ii; 1574 ASSERT(mp->b_datap->db_type == M_DATA); 1575 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1576 if (ipsec_mp == NULL) { 1577 freemsg(mp); 1578 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1579 ill_refrele(ill); 1580 return; 1581 } 1582 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1583 1584 /* This is not a secure packet */ 1585 ii->ipsec_in_secure = B_FALSE; 1586 /* 1587 * For trusted extensions using a shared IP address we can 1588 * send using any zoneid. 1589 */ 1590 if (zoneid == ALL_ZONES) 1591 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1592 else 1593 ii->ipsec_in_zoneid = zoneid; 1594 ipsec_mp->b_cont = mp; 1595 ip6h = (ip6_t *)mp->b_rptr; 1596 /* 1597 * Convert the IPSEC_IN to IPSEC_OUT. 1598 */ 1599 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1600 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1601 ill_refrele(ill); 1602 return; 1603 } 1604 } 1605 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1606 1607 if (v6src_ptr != NULL) { 1608 v6src = *v6src_ptr; 1609 } else { 1610 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1611 &v6src, zoneid, ipst) == NULL) { 1612 freemsg(ipsec_mp); 1613 ill_refrele(ill); 1614 return; 1615 } 1616 } 1617 v6dst = ip6h->ip6_src; 1618 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1619 msg_len = msgdsize(mp); 1620 if (msg_len > len_needed) { 1621 if (!adjmsg(mp, len_needed - msg_len)) { 1622 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1623 freemsg(ipsec_mp); 1624 ill_refrele(ill); 1625 return; 1626 } 1627 msg_len = len_needed; 1628 } 1629 mp1 = allocb_cred(IPV6_HDR_LEN + len, DB_CRED(mp)); 1630 if (mp1 == NULL) { 1631 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1632 freemsg(ipsec_mp); 1633 ill_refrele(ill); 1634 return; 1635 } 1636 ill_refrele(ill); 1637 mp1->b_cont = mp; 1638 mp = mp1; 1639 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1640 io->ipsec_out_type == IPSEC_OUT); 1641 ipsec_mp->b_cont = mp; 1642 1643 /* 1644 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1645 * node generates be accepted in peace by all on-host destinations. 1646 * If we do NOT assume that all on-host destinations trust 1647 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1648 * (Look for ipsec_out_icmp_loopback). 1649 */ 1650 io->ipsec_out_icmp_loopback = B_TRUE; 1651 1652 ip6h = (ip6_t *)mp->b_rptr; 1653 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1654 1655 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1656 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1657 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1658 ip6h->ip6_dst = v6dst; 1659 ip6h->ip6_src = v6src; 1660 msg_len += IPV6_HDR_LEN + len; 1661 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1662 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1663 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1664 } 1665 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1666 icmp6 = (icmp6_t *)&ip6h[1]; 1667 bcopy(stuff, (char *)icmp6, len); 1668 /* 1669 * Prepare for checksum by putting icmp length in the icmp 1670 * checksum field. The checksum is calculated in ip_wput_v6. 1671 */ 1672 icmp6->icmp6_cksum = ip6h->ip6_plen; 1673 if (icmp6->icmp6_type == ND_REDIRECT) { 1674 ip6h->ip6_hops = IPV6_MAX_HOPS; 1675 } 1676 /* Send to V6 writeside put routine */ 1677 put(q, ipsec_mp); 1678 } 1679 1680 /* 1681 * Update the output mib when ICMPv6 packets are sent. 1682 */ 1683 static void 1684 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1685 { 1686 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1687 1688 switch (icmp6->icmp6_type) { 1689 case ICMP6_DST_UNREACH: 1690 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1691 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1692 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1693 break; 1694 1695 case ICMP6_TIME_EXCEEDED: 1696 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1697 break; 1698 1699 case ICMP6_PARAM_PROB: 1700 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1701 break; 1702 1703 case ICMP6_PACKET_TOO_BIG: 1704 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1705 break; 1706 1707 case ICMP6_ECHO_REQUEST: 1708 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1709 break; 1710 1711 case ICMP6_ECHO_REPLY: 1712 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1713 break; 1714 1715 case ND_ROUTER_SOLICIT: 1716 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1717 break; 1718 1719 case ND_ROUTER_ADVERT: 1720 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1721 break; 1722 1723 case ND_NEIGHBOR_SOLICIT: 1724 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1725 break; 1726 1727 case ND_NEIGHBOR_ADVERT: 1728 BUMP_MIB(ill->ill_icmp6_mib, 1729 ipv6IfIcmpOutNeighborAdvertisements); 1730 break; 1731 1732 case ND_REDIRECT: 1733 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1734 break; 1735 1736 case MLD_LISTENER_QUERY: 1737 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1738 break; 1739 1740 case MLD_LISTENER_REPORT: 1741 case MLD_V2_LISTENER_REPORT: 1742 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1743 break; 1744 1745 case MLD_LISTENER_REDUCTION: 1746 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1747 break; 1748 } 1749 } 1750 1751 /* 1752 * Check if it is ok to send an ICMPv6 error packet in 1753 * response to the IP packet in mp. 1754 * Free the message and return null if no 1755 * ICMP error packet should be sent. 1756 */ 1757 static mblk_t * 1758 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1759 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1760 { 1761 ip6_t *ip6h; 1762 1763 if (!mp) 1764 return (NULL); 1765 1766 ip6h = (ip6_t *)mp->b_rptr; 1767 1768 /* Check if source address uniquely identifies the host */ 1769 1770 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1771 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1772 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1773 freemsg(mp); 1774 return (NULL); 1775 } 1776 1777 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1778 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1779 icmp6_t *icmp6; 1780 1781 if (mp->b_wptr - mp->b_rptr < len_needed) { 1782 if (!pullupmsg(mp, len_needed)) { 1783 ill_t *ill; 1784 1785 ill = ip_queue_to_ill_v6(q, ipst); 1786 if (ill == NULL) { 1787 BUMP_MIB(&ipst->ips_icmp6_mib, 1788 ipv6IfIcmpInErrors); 1789 } else { 1790 BUMP_MIB(ill->ill_icmp6_mib, 1791 ipv6IfIcmpInErrors); 1792 ill_refrele(ill); 1793 } 1794 freemsg(mp); 1795 return (NULL); 1796 } 1797 ip6h = (ip6_t *)mp->b_rptr; 1798 } 1799 icmp6 = (icmp6_t *)&ip6h[1]; 1800 /* Explicitly do not generate errors in response to redirects */ 1801 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1802 icmp6->icmp6_type == ND_REDIRECT) { 1803 freemsg(mp); 1804 return (NULL); 1805 } 1806 } 1807 /* 1808 * Check that the destination is not multicast and that the packet 1809 * was not sent on link layer broadcast or multicast. (Exception 1810 * is Packet too big message as per the draft - when mcast_ok is set.) 1811 */ 1812 if (!mcast_ok && 1813 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1814 freemsg(mp); 1815 return (NULL); 1816 } 1817 if (icmp_err_rate_limit(ipst)) { 1818 /* 1819 * Only send ICMP error packets every so often. 1820 * This should be done on a per port/source basis, 1821 * but for now this will suffice. 1822 */ 1823 freemsg(mp); 1824 return (NULL); 1825 } 1826 return (mp); 1827 } 1828 1829 /* 1830 * Generate an ICMPv6 redirect message. 1831 * Include target link layer address option if it exits. 1832 * Always include redirect header. 1833 */ 1834 static void 1835 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1836 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1837 { 1838 nd_redirect_t *rd; 1839 nd_opt_rd_hdr_t *rdh; 1840 uchar_t *buf; 1841 nce_t *nce = NULL; 1842 nd_opt_hdr_t *opt; 1843 int len; 1844 int ll_opt_len = 0; 1845 int max_redir_hdr_data_len; 1846 int pkt_len; 1847 in6_addr_t *srcp; 1848 ip_stack_t *ipst = ill->ill_ipst; 1849 1850 /* 1851 * We are called from ip_rput where we could 1852 * not have attached an IPSEC_IN. 1853 */ 1854 ASSERT(mp->b_datap->db_type == M_DATA); 1855 1856 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1857 if (mp == NULL) 1858 return; 1859 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1860 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1861 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1862 ill->ill_phys_addr_length + 7)/8 * 8; 1863 } 1864 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1865 ASSERT(len % 4 == 0); 1866 buf = kmem_alloc(len, KM_NOSLEEP); 1867 if (buf == NULL) { 1868 if (nce != NULL) 1869 NCE_REFRELE(nce); 1870 freemsg(mp); 1871 return; 1872 } 1873 1874 rd = (nd_redirect_t *)buf; 1875 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1876 rd->nd_rd_code = 0; 1877 rd->nd_rd_reserved = 0; 1878 rd->nd_rd_target = *targetp; 1879 rd->nd_rd_dst = *dest; 1880 1881 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1882 if (nce != NULL && ll_opt_len != 0) { 1883 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1884 opt->nd_opt_len = ll_opt_len/8; 1885 bcopy((char *)nce->nce_res_mp->b_rptr + 1886 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1887 ill->ill_phys_addr_length); 1888 } 1889 if (nce != NULL) 1890 NCE_REFRELE(nce); 1891 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1892 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1893 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1894 max_redir_hdr_data_len = 1895 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1896 pkt_len = msgdsize(mp); 1897 /* Make sure mp is 8 byte aligned */ 1898 if (pkt_len > max_redir_hdr_data_len) { 1899 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1900 sizeof (nd_opt_rd_hdr_t))/8; 1901 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1902 } else { 1903 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1904 (void) adjmsg(mp, -(pkt_len % 8)); 1905 } 1906 rdh->nd_opt_rh_reserved1 = 0; 1907 rdh->nd_opt_rh_reserved2 = 0; 1908 /* ipif_v6src_addr contains the link-local source address */ 1909 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1910 if (ill->ill_group != NULL) { 1911 /* 1912 * The receiver of the redirect will verify whether it 1913 * had a route through us (srcp that we will use in 1914 * the redirect) or not. As we load spread even link-locals, 1915 * we don't know which source address the receiver of 1916 * redirect has in its route for communicating with us. 1917 * Thus we randomly choose a source here and finally we 1918 * should get to the right one and it will eventually 1919 * accept the redirect from us. We can't call 1920 * ip_lookup_scope_v6 because we don't have the right 1921 * link-local address here. Thus we randomly choose one. 1922 */ 1923 int cnt = ill->ill_group->illgrp_ill_count; 1924 1925 ill = ill->ill_group->illgrp_ill; 1926 cnt = ++ipst->ips_icmp_redirect_v6_src_index % cnt; 1927 while (cnt--) 1928 ill = ill->ill_group_next; 1929 srcp = &ill->ill_ipif->ipif_v6src_addr; 1930 } else { 1931 srcp = &ill->ill_ipif->ipif_v6src_addr; 1932 } 1933 rw_exit(&ipst->ips_ill_g_lock); 1934 /* Redirects sent by router, and router is global zone */ 1935 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1936 kmem_free(buf, len); 1937 } 1938 1939 1940 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1941 void 1942 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1943 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1944 ip_stack_t *ipst) 1945 { 1946 icmp6_t icmp6; 1947 boolean_t mctl_present; 1948 mblk_t *first_mp; 1949 1950 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1951 1952 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1953 if (mp == NULL) { 1954 if (mctl_present) 1955 freeb(first_mp); 1956 return; 1957 } 1958 bzero(&icmp6, sizeof (icmp6_t)); 1959 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1960 icmp6.icmp6_code = code; 1961 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1962 zoneid, ipst); 1963 } 1964 1965 /* 1966 * Generate an ICMP unreachable message. 1967 */ 1968 void 1969 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1970 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1971 ip_stack_t *ipst) 1972 { 1973 icmp6_t icmp6; 1974 boolean_t mctl_present; 1975 mblk_t *first_mp; 1976 1977 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1978 1979 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1980 if (mp == NULL) { 1981 if (mctl_present) 1982 freeb(first_mp); 1983 return; 1984 } 1985 bzero(&icmp6, sizeof (icmp6_t)); 1986 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1987 icmp6.icmp6_code = code; 1988 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1989 zoneid, ipst); 1990 } 1991 1992 /* 1993 * Generate an ICMP pkt too big message. 1994 */ 1995 static void 1996 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1997 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1998 { 1999 icmp6_t icmp6; 2000 mblk_t *first_mp; 2001 boolean_t mctl_present; 2002 2003 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2004 2005 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2006 if (mp == NULL) { 2007 if (mctl_present) 2008 freeb(first_mp); 2009 return; 2010 } 2011 bzero(&icmp6, sizeof (icmp6_t)); 2012 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2013 icmp6.icmp6_code = 0; 2014 icmp6.icmp6_mtu = htonl(mtu); 2015 2016 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2017 zoneid, ipst); 2018 } 2019 2020 /* 2021 * Generate an ICMP parameter problem message. (May be called as writer.) 2022 * 'offset' is the offset from the beginning of the packet in error. 2023 */ 2024 static void 2025 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2026 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2027 ip_stack_t *ipst) 2028 { 2029 icmp6_t icmp6; 2030 boolean_t mctl_present; 2031 mblk_t *first_mp; 2032 2033 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2034 2035 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2036 if (mp == NULL) { 2037 if (mctl_present) 2038 freeb(first_mp); 2039 return; 2040 } 2041 bzero((char *)&icmp6, sizeof (icmp6_t)); 2042 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2043 icmp6.icmp6_code = code; 2044 icmp6.icmp6_pptr = htonl(offset); 2045 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2046 zoneid, ipst); 2047 } 2048 2049 /* 2050 * This code will need to take into account the possibility of binding 2051 * to a link local address on a multi-homed host, in which case the 2052 * outgoing interface (from the conn) will need to be used when getting 2053 * an ire for the dst. Going through proper outgoing interface and 2054 * choosing the source address corresponding to the outgoing interface 2055 * is necessary when the destination address is a link-local address and 2056 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2057 * This can happen when active connection is setup; thus ipp pointer 2058 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2059 * pointer is passed as ipp pointer. 2060 */ 2061 mblk_t * 2062 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2063 { 2064 ssize_t len; 2065 int protocol; 2066 struct T_bind_req *tbr; 2067 sin6_t *sin6; 2068 ipa6_conn_t *ac6; 2069 in6_addr_t *v6srcp; 2070 in6_addr_t *v6dstp; 2071 uint16_t lport; 2072 uint16_t fport; 2073 uchar_t *ucp; 2074 mblk_t *mp1; 2075 boolean_t ire_requested; 2076 boolean_t ipsec_policy_set; 2077 int error = 0; 2078 boolean_t local_bind; 2079 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2080 ipa6_conn_x_t *acx6; 2081 boolean_t verify_dst; 2082 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2083 2084 ASSERT(connp->conn_af_isv6); 2085 len = mp->b_wptr - mp->b_rptr; 2086 if (len < (sizeof (*tbr) + 1)) { 2087 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2088 "ip_bind_v6: bogus msg, len %ld", len); 2089 goto bad_addr; 2090 } 2091 /* Back up and extract the protocol identifier. */ 2092 mp->b_wptr--; 2093 tbr = (struct T_bind_req *)mp->b_rptr; 2094 /* Reset the message type in preparation for shipping it back. */ 2095 mp->b_datap->db_type = M_PCPROTO; 2096 2097 protocol = *mp->b_wptr & 0xFF; 2098 connp->conn_ulp = (uint8_t)protocol; 2099 2100 /* 2101 * Check for a zero length address. This is from a protocol that 2102 * wants to register to receive all packets of its type. 2103 */ 2104 if (tbr->ADDR_length == 0) { 2105 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2106 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2107 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2108 NULL) { 2109 /* 2110 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2111 * Do not allow others to bind to these. 2112 */ 2113 goto bad_addr; 2114 } 2115 2116 /* 2117 * 2118 * The udp module never sends down a zero-length address, 2119 * and allowing this on a labeled system will break MLP 2120 * functionality. 2121 */ 2122 if (is_system_labeled() && protocol == IPPROTO_UDP) 2123 goto bad_addr; 2124 2125 /* Allow ipsec plumbing */ 2126 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2127 protocol != IPPROTO_ESP) 2128 goto bad_addr; 2129 2130 connp->conn_srcv6 = ipv6_all_zeros; 2131 ipcl_proto_insert_v6(connp, protocol); 2132 2133 tbr->PRIM_type = T_BIND_ACK; 2134 return (mp); 2135 } 2136 2137 /* Extract the address pointer from the message. */ 2138 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2139 tbr->ADDR_length); 2140 if (ucp == NULL) { 2141 ip1dbg(("ip_bind_v6: no address\n")); 2142 goto bad_addr; 2143 } 2144 if (!OK_32PTR(ucp)) { 2145 ip1dbg(("ip_bind_v6: unaligned address\n")); 2146 goto bad_addr; 2147 } 2148 mp1 = mp->b_cont; /* trailing mp if any */ 2149 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2150 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2151 2152 switch (tbr->ADDR_length) { 2153 default: 2154 ip1dbg(("ip_bind_v6: bad address length %d\n", 2155 (int)tbr->ADDR_length)); 2156 goto bad_addr; 2157 2158 case IPV6_ADDR_LEN: 2159 /* Verification of local address only */ 2160 v6srcp = (in6_addr_t *)ucp; 2161 lport = 0; 2162 local_bind = B_TRUE; 2163 break; 2164 2165 case sizeof (sin6_t): 2166 sin6 = (sin6_t *)ucp; 2167 v6srcp = &sin6->sin6_addr; 2168 lport = sin6->sin6_port; 2169 local_bind = B_TRUE; 2170 break; 2171 2172 case sizeof (ipa6_conn_t): 2173 /* 2174 * Verify that both the source and destination addresses 2175 * are valid. 2176 * Note that we allow connect to broadcast and multicast 2177 * addresses when ire_requested is set. Thus the ULP 2178 * has to check for IRE_BROADCAST and multicast. 2179 */ 2180 ac6 = (ipa6_conn_t *)ucp; 2181 v6srcp = &ac6->ac6_laddr; 2182 v6dstp = &ac6->ac6_faddr; 2183 fport = ac6->ac6_fport; 2184 /* For raw socket, the local port is not set. */ 2185 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2186 connp->conn_lport; 2187 local_bind = B_FALSE; 2188 /* Always verify destination reachability. */ 2189 verify_dst = B_TRUE; 2190 break; 2191 2192 case sizeof (ipa6_conn_x_t): 2193 /* 2194 * Verify that the source address is valid. 2195 * Note that we allow connect to broadcast and multicast 2196 * addresses when ire_requested is set. Thus the ULP 2197 * has to check for IRE_BROADCAST and multicast. 2198 */ 2199 acx6 = (ipa6_conn_x_t *)ucp; 2200 ac6 = &acx6->ac6x_conn; 2201 v6srcp = &ac6->ac6_laddr; 2202 v6dstp = &ac6->ac6_faddr; 2203 fport = ac6->ac6_fport; 2204 lport = ac6->ac6_lport; 2205 local_bind = B_FALSE; 2206 /* 2207 * Client that passed ipa6_conn_x_t to us specifies whether to 2208 * verify destination reachability. 2209 */ 2210 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2211 break; 2212 } 2213 if (local_bind) { 2214 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2215 /* Bind to IPv4 address */ 2216 ipaddr_t v4src; 2217 2218 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2219 2220 error = ip_bind_laddr(connp, mp, v4src, lport, 2221 ire_requested, ipsec_policy_set, 2222 tbr->ADDR_length != IPV6_ADDR_LEN); 2223 if (error != 0) 2224 goto bad_addr; 2225 connp->conn_pkt_isv6 = B_FALSE; 2226 } else { 2227 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2228 error = 0; 2229 goto bad_addr; 2230 } 2231 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2232 ire_requested, ipsec_policy_set, 2233 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2234 if (error != 0) 2235 goto bad_addr; 2236 connp->conn_pkt_isv6 = B_TRUE; 2237 } 2238 } else { 2239 /* 2240 * Bind to local and remote address. Local might be 2241 * unspecified in which case it will be extracted from 2242 * ire_src_addr_v6 2243 */ 2244 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2245 /* Connect to IPv4 address */ 2246 ipaddr_t v4src; 2247 ipaddr_t v4dst; 2248 2249 /* Is the source unspecified or mapped? */ 2250 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2251 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2252 ip1dbg(("ip_bind_v6: " 2253 "dst is mapped, but not the src\n")); 2254 goto bad_addr; 2255 } 2256 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2257 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2258 2259 /* 2260 * XXX Fix needed. Need to pass ipsec_policy_set 2261 * instead of B_FALSE. 2262 */ 2263 2264 /* Always verify destination reachability. */ 2265 error = ip_bind_connected(connp, mp, &v4src, lport, 2266 v4dst, fport, ire_requested, ipsec_policy_set, 2267 B_TRUE, B_TRUE); 2268 if (error != 0) 2269 goto bad_addr; 2270 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2271 connp->conn_pkt_isv6 = B_FALSE; 2272 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2273 ip1dbg(("ip_bind_v6: " 2274 "src is mapped, but not the dst\n")); 2275 goto bad_addr; 2276 } else { 2277 error = ip_bind_connected_v6(connp, mp, v6srcp, 2278 lport, v6dstp, ipp, fport, ire_requested, 2279 ipsec_policy_set, B_TRUE, verify_dst); 2280 if (error != 0) 2281 goto bad_addr; 2282 connp->conn_pkt_isv6 = B_TRUE; 2283 } 2284 } 2285 2286 /* Update conn_send and pktversion if v4/v6 changed */ 2287 if (orig_pkt_isv6 != connp->conn_pkt_isv6) { 2288 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2289 } 2290 /* 2291 * Pass the IPSEC headers size in ire_ipsec_overhead. 2292 * We can't do this in ip_bind_insert_ire because the policy 2293 * may not have been inherited at that point in time and hence 2294 * conn_out_enforce_policy may not be set. 2295 */ 2296 mp1 = mp->b_cont; 2297 if (ire_requested && connp->conn_out_enforce_policy && 2298 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2299 ire_t *ire = (ire_t *)mp1->b_rptr; 2300 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2301 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2302 } 2303 2304 /* Send it home. */ 2305 mp->b_datap->db_type = M_PCPROTO; 2306 tbr->PRIM_type = T_BIND_ACK; 2307 return (mp); 2308 2309 bad_addr: 2310 if (error == EINPROGRESS) 2311 return (NULL); 2312 if (error > 0) 2313 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2314 else 2315 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2316 return (mp); 2317 } 2318 2319 /* 2320 * Here address is verified to be a valid local address. 2321 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2322 * address is also considered a valid local address. 2323 * In the case of a multicast address, however, the 2324 * upper protocol is expected to reset the src address 2325 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2326 * no packets are emitted with multicast address as 2327 * source address. 2328 * The addresses valid for bind are: 2329 * (1) - in6addr_any 2330 * (2) - IP address of an UP interface 2331 * (3) - IP address of a DOWN interface 2332 * (4) - a multicast address. In this case 2333 * the conn will only receive packets destined to 2334 * the specified multicast address. Note: the 2335 * application still has to issue an 2336 * IPV6_JOIN_GROUP socket option. 2337 * 2338 * In all the above cases, the bound address must be valid in the current zone. 2339 * When the address is loopback or multicast, there might be many matching IREs 2340 * so bind has to look up based on the zone. 2341 */ 2342 static int 2343 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2344 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2345 boolean_t fanout_insert) 2346 { 2347 int error = 0; 2348 ire_t *src_ire = NULL; 2349 ipif_t *ipif = NULL; 2350 mblk_t *policy_mp; 2351 zoneid_t zoneid; 2352 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2353 2354 if (ipsec_policy_set) 2355 policy_mp = mp->b_cont; 2356 2357 /* 2358 * If it was previously connected, conn_fully_bound would have 2359 * been set. 2360 */ 2361 connp->conn_fully_bound = B_FALSE; 2362 2363 zoneid = connp->conn_zoneid; 2364 2365 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2366 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2367 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2368 /* 2369 * If an address other than in6addr_any is requested, 2370 * we verify that it is a valid address for bind 2371 * Note: Following code is in if-else-if form for 2372 * readability compared to a condition check. 2373 */ 2374 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2375 if (IRE_IS_LOCAL(src_ire)) { 2376 /* 2377 * (2) Bind to address of local UP interface 2378 */ 2379 ipif = src_ire->ire_ipif; 2380 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2381 ipif_t *multi_ipif = NULL; 2382 ire_t *save_ire; 2383 /* 2384 * (4) bind to multicast address. 2385 * Fake out the IRE returned to upper 2386 * layer to be a broadcast IRE in 2387 * ip_bind_insert_ire_v6(). 2388 * Pass other information that matches 2389 * the ipif (e.g. the source address). 2390 * conn_multicast_ill is only used for 2391 * IPv6 packets 2392 */ 2393 mutex_enter(&connp->conn_lock); 2394 if (connp->conn_multicast_ill != NULL) { 2395 (void) ipif_lookup_zoneid( 2396 connp->conn_multicast_ill, zoneid, 0, 2397 &multi_ipif); 2398 } else { 2399 /* 2400 * Look for default like 2401 * ip_wput_v6 2402 */ 2403 multi_ipif = ipif_lookup_group_v6( 2404 &ipv6_unspecified_group, zoneid, ipst); 2405 } 2406 mutex_exit(&connp->conn_lock); 2407 save_ire = src_ire; 2408 src_ire = NULL; 2409 if (multi_ipif == NULL || !ire_requested || 2410 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2411 src_ire = save_ire; 2412 error = EADDRNOTAVAIL; 2413 } else { 2414 ASSERT(src_ire != NULL); 2415 if (save_ire != NULL) 2416 ire_refrele(save_ire); 2417 } 2418 if (multi_ipif != NULL) 2419 ipif_refrele(multi_ipif); 2420 } else { 2421 *mp->b_wptr++ = (char)connp->conn_ulp; 2422 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2423 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error, 2424 ipst); 2425 if (ipif == NULL) { 2426 if (error == EINPROGRESS) { 2427 if (src_ire != NULL) 2428 ire_refrele(src_ire); 2429 return (error); 2430 } 2431 /* 2432 * Not a valid address for bind 2433 */ 2434 error = EADDRNOTAVAIL; 2435 } else { 2436 ipif_refrele(ipif); 2437 } 2438 /* 2439 * Just to keep it consistent with the processing in 2440 * ip_bind_v6(). 2441 */ 2442 mp->b_wptr--; 2443 } 2444 2445 if (error != 0) { 2446 /* Red Alert! Attempting to be a bogon! */ 2447 if (ip_debug > 2) { 2448 /* ip1dbg */ 2449 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2450 " address %s\n", AF_INET6, v6src); 2451 } 2452 goto bad_addr; 2453 } 2454 } 2455 2456 /* 2457 * Allow setting new policies. For example, disconnects come 2458 * down as ipa_t bind. As we would have set conn_policy_cached 2459 * to B_TRUE before, we should set it to B_FALSE, so that policy 2460 * can change after the disconnect. 2461 */ 2462 connp->conn_policy_cached = B_FALSE; 2463 2464 /* If not fanout_insert this was just an address verification */ 2465 if (fanout_insert) { 2466 /* 2467 * The addresses have been verified. Time to insert in 2468 * the correct fanout list. 2469 */ 2470 connp->conn_srcv6 = *v6src; 2471 connp->conn_remv6 = ipv6_all_zeros; 2472 connp->conn_lport = lport; 2473 connp->conn_fport = 0; 2474 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2475 } 2476 if (error == 0) { 2477 if (ire_requested) { 2478 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL, 2479 ipst)) { 2480 error = -1; 2481 goto bad_addr; 2482 } 2483 } else if (ipsec_policy_set) { 2484 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2485 error = -1; 2486 goto bad_addr; 2487 } 2488 } 2489 } 2490 bad_addr: 2491 if (error != 0) { 2492 if (connp->conn_anon_port) { 2493 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2494 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2495 B_FALSE); 2496 } 2497 connp->conn_mlp_type = mlptSingle; 2498 } 2499 2500 if (src_ire != NULL) 2501 ire_refrele(src_ire); 2502 2503 if (ipsec_policy_set) { 2504 ASSERT(policy_mp != NULL); 2505 freeb(policy_mp); 2506 /* 2507 * As of now assume that nothing else accompanies 2508 * IPSEC_POLICY_SET. 2509 */ 2510 mp->b_cont = NULL; 2511 } 2512 return (error); 2513 } 2514 2515 /* ARGSUSED */ 2516 static void 2517 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2518 void *dummy_arg) 2519 { 2520 conn_t *connp = NULL; 2521 t_scalar_t prim; 2522 2523 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2524 2525 if (CONN_Q(q)) 2526 connp = Q_TO_CONN(q); 2527 ASSERT(connp != NULL); 2528 2529 prim = ((union T_primitives *)mp->b_rptr)->type; 2530 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2531 2532 if (IPCL_IS_TCP(connp)) { 2533 /* Pass sticky_ipp for scope_id and pktinfo */ 2534 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2535 } else { 2536 /* For UDP and ICMP */ 2537 mp = ip_bind_v6(q, mp, connp, NULL); 2538 } 2539 if (mp != NULL) { 2540 if (IPCL_IS_TCP(connp)) { 2541 CONN_INC_REF(connp); 2542 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 2543 ip_resume_tcp_bind, connp, SQ_FILL, 2544 SQTAG_TCP_RPUTOTHER); 2545 } else if (IPCL_IS_UDP(connp)) { 2546 udp_resume_bind(connp, mp); 2547 } else { 2548 ASSERT(IPCL_IS_RAWIP(connp)); 2549 rawip_resume_bind(connp, mp); 2550 } 2551 } 2552 } 2553 2554 /* 2555 * Verify that both the source and destination addresses 2556 * are valid. If verify_dst, then destination address must also be reachable, 2557 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2558 * It takes ip6_pkt_t * as one of the arguments to determine correct 2559 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2560 * destination address. Note that parameter ipp is only useful for TCP connect 2561 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2562 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2563 * 2564 */ 2565 static int 2566 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2567 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2568 boolean_t ire_requested, boolean_t ipsec_policy_set, 2569 boolean_t fanout_insert, boolean_t verify_dst) 2570 { 2571 ire_t *src_ire; 2572 ire_t *dst_ire; 2573 int error = 0; 2574 int protocol; 2575 mblk_t *policy_mp; 2576 ire_t *sire = NULL; 2577 ire_t *md_dst_ire = NULL; 2578 ill_t *md_ill = NULL; 2579 ill_t *dst_ill = NULL; 2580 ipif_t *src_ipif = NULL; 2581 zoneid_t zoneid; 2582 boolean_t ill_held = B_FALSE; 2583 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2584 2585 src_ire = dst_ire = NULL; 2586 /* 2587 * NOTE: The protocol is beyond the wptr because that's how 2588 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2589 */ 2590 protocol = *mp->b_wptr & 0xFF; 2591 2592 /* 2593 * If we never got a disconnect before, clear it now. 2594 */ 2595 connp->conn_fully_bound = B_FALSE; 2596 2597 if (ipsec_policy_set) { 2598 policy_mp = mp->b_cont; 2599 } 2600 2601 zoneid = connp->conn_zoneid; 2602 2603 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2604 ipif_t *ipif; 2605 2606 /* 2607 * Use an "emulated" IRE_BROADCAST to tell the transport it 2608 * is a multicast. 2609 * Pass other information that matches 2610 * the ipif (e.g. the source address). 2611 * 2612 * conn_multicast_ill is only used for IPv6 packets 2613 */ 2614 mutex_enter(&connp->conn_lock); 2615 if (connp->conn_multicast_ill != NULL) { 2616 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2617 zoneid, 0, &ipif); 2618 } else { 2619 /* Look for default like ip_wput_v6 */ 2620 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2621 } 2622 mutex_exit(&connp->conn_lock); 2623 if (ipif == NULL || !ire_requested || 2624 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2625 if (ipif != NULL) 2626 ipif_refrele(ipif); 2627 if (ip_debug > 2) { 2628 /* ip1dbg */ 2629 pr_addr_dbg("ip_bind_connected_v6: bad " 2630 "connected multicast %s\n", AF_INET6, 2631 v6dst); 2632 } 2633 error = ENETUNREACH; 2634 goto bad_addr; 2635 } 2636 if (ipif != NULL) 2637 ipif_refrele(ipif); 2638 } else { 2639 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2640 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2641 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2642 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2643 ipst); 2644 /* 2645 * We also prevent ire's with src address INADDR_ANY to 2646 * be used, which are created temporarily for 2647 * sending out packets from endpoints that have 2648 * conn_unspec_src set. 2649 */ 2650 if (dst_ire == NULL || 2651 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2652 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2653 /* 2654 * When verifying destination reachability, we always 2655 * complain. 2656 * 2657 * When not verifying destination reachability but we 2658 * found an IRE, i.e. the destination is reachable, 2659 * then the other tests still apply and we complain. 2660 */ 2661 if (verify_dst || (dst_ire != NULL)) { 2662 if (ip_debug > 2) { 2663 /* ip1dbg */ 2664 pr_addr_dbg("ip_bind_connected_v6: bad" 2665 " connected dst %s\n", AF_INET6, 2666 v6dst); 2667 } 2668 if (dst_ire == NULL || 2669 !(dst_ire->ire_type & IRE_HOST)) { 2670 error = ENETUNREACH; 2671 } else { 2672 error = EHOSTUNREACH; 2673 } 2674 goto bad_addr; 2675 } 2676 } 2677 } 2678 2679 /* 2680 * We now know that routing will allow us to reach the destination. 2681 * Check whether Trusted Solaris policy allows communication with this 2682 * host, and pretend that the destination is unreachable if not. 2683 * 2684 * This is never a problem for TCP, since that transport is known to 2685 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2686 * handling. If the remote is unreachable, it will be detected at that 2687 * point, so there's no reason to check it here. 2688 * 2689 * Note that for sendto (and other datagram-oriented friends), this 2690 * check is done as part of the data path label computation instead. 2691 * The check here is just to make non-TCP connect() report the right 2692 * error. 2693 */ 2694 if (dst_ire != NULL && is_system_labeled() && 2695 !IPCL_IS_TCP(connp) && 2696 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2697 connp->conn_mac_exempt, ipst) != 0) { 2698 error = EHOSTUNREACH; 2699 if (ip_debug > 2) { 2700 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2701 AF_INET6, v6dst); 2702 } 2703 goto bad_addr; 2704 } 2705 2706 /* 2707 * If the app does a connect(), it means that it will most likely 2708 * send more than 1 packet to the destination. It makes sense 2709 * to clear the temporary flag. 2710 */ 2711 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2712 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2713 irb_t *irb = dst_ire->ire_bucket; 2714 2715 rw_enter(&irb->irb_lock, RW_WRITER); 2716 /* 2717 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2718 * the lock in order to guarantee irb_tmp_ire_cnt. 2719 */ 2720 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2721 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2722 irb->irb_tmp_ire_cnt--; 2723 } 2724 rw_exit(&irb->irb_lock); 2725 } 2726 2727 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2728 2729 /* 2730 * See if we should notify ULP about MDT; we do this whether or not 2731 * ire_requested is TRUE, in order to handle active connects; MDT 2732 * eligibility tests for passive connects are handled separately 2733 * through tcp_adapt_ire(). We do this before the source address 2734 * selection, because dst_ire may change after a call to 2735 * ipif_select_source_v6(). This is a best-effort check, as the 2736 * packet for this connection may not actually go through 2737 * dst_ire->ire_stq, and the exact IRE can only be known after 2738 * calling ip_newroute_v6(). This is why we further check on the 2739 * IRE during Multidata packet transmission in tcp_multisend(). 2740 */ 2741 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2742 dst_ire != NULL && 2743 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2744 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2745 ILL_MDT_CAPABLE(md_ill)) { 2746 md_dst_ire = dst_ire; 2747 IRE_REFHOLD(md_dst_ire); 2748 } 2749 2750 if (dst_ire != NULL && 2751 dst_ire->ire_type == IRE_LOCAL && 2752 dst_ire->ire_zoneid != zoneid && 2753 dst_ire->ire_zoneid != ALL_ZONES) { 2754 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2755 zoneid, 0, NULL, 2756 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2757 MATCH_IRE_RJ_BHOLE, ipst); 2758 if (src_ire == NULL) { 2759 error = EHOSTUNREACH; 2760 goto bad_addr; 2761 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2762 if (!(src_ire->ire_type & IRE_HOST)) 2763 error = ENETUNREACH; 2764 else 2765 error = EHOSTUNREACH; 2766 goto bad_addr; 2767 } 2768 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2769 src_ipif = src_ire->ire_ipif; 2770 ipif_refhold(src_ipif); 2771 *v6src = src_ipif->ipif_v6lcl_addr; 2772 } 2773 ire_refrele(src_ire); 2774 src_ire = NULL; 2775 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2776 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2777 *v6src = sire->ire_src_addr_v6; 2778 ire_refrele(dst_ire); 2779 dst_ire = sire; 2780 sire = NULL; 2781 } else if (dst_ire->ire_type == IRE_CACHE && 2782 (dst_ire->ire_flags & RTF_SETSRC)) { 2783 ASSERT(dst_ire->ire_zoneid == zoneid || 2784 dst_ire->ire_zoneid == ALL_ZONES); 2785 *v6src = dst_ire->ire_src_addr_v6; 2786 } else { 2787 /* 2788 * Pick a source address so that a proper inbound load 2789 * spreading would happen. Use dst_ill specified by the 2790 * app. when socket option or scopeid is set. 2791 */ 2792 int err; 2793 2794 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2795 uint_t if_index; 2796 2797 /* 2798 * Scope id or IPV6_PKTINFO 2799 */ 2800 2801 if_index = ipp->ipp_ifindex; 2802 dst_ill = ill_lookup_on_ifindex( 2803 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2804 ipst); 2805 if (dst_ill == NULL) { 2806 ip1dbg(("ip_bind_connected_v6:" 2807 " bad ifindex %d\n", if_index)); 2808 error = EADDRNOTAVAIL; 2809 goto bad_addr; 2810 } 2811 ill_held = B_TRUE; 2812 } else if (connp->conn_outgoing_ill != NULL) { 2813 /* 2814 * For IPV6_BOUND_IF socket option, 2815 * conn_outgoing_ill should be set 2816 * already in TCP or UDP/ICMP. 2817 */ 2818 dst_ill = conn_get_held_ill(connp, 2819 &connp->conn_outgoing_ill, &err); 2820 if (err == ILL_LOOKUP_FAILED) { 2821 ip1dbg(("ip_bind_connected_v6:" 2822 "no ill for bound_if\n")); 2823 error = EADDRNOTAVAIL; 2824 goto bad_addr; 2825 } 2826 ill_held = B_TRUE; 2827 } else if (dst_ire->ire_stq != NULL) { 2828 /* No need to hold ill here */ 2829 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2830 } else { 2831 /* No need to hold ill here */ 2832 dst_ill = dst_ire->ire_ipif->ipif_ill; 2833 } 2834 if (!ip6_asp_can_lookup(ipst)) { 2835 *mp->b_wptr++ = (char)protocol; 2836 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2837 ip_bind_connected_resume_v6); 2838 error = EINPROGRESS; 2839 goto refrele_and_quit; 2840 } 2841 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2842 RESTRICT_TO_NONE, connp->conn_src_preferences, 2843 zoneid); 2844 ip6_asp_table_refrele(ipst); 2845 if (src_ipif == NULL) { 2846 pr_addr_dbg("ip_bind_connected_v6: " 2847 "no usable source address for " 2848 "connection to %s\n", AF_INET6, v6dst); 2849 error = EADDRNOTAVAIL; 2850 goto bad_addr; 2851 } 2852 *v6src = src_ipif->ipif_v6lcl_addr; 2853 } 2854 } 2855 2856 /* 2857 * We do ire_route_lookup_v6() here (and not an interface lookup) 2858 * as we assert that v6src should only come from an 2859 * UP interface for hard binding. 2860 */ 2861 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2862 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2863 2864 /* src_ire must be a local|loopback */ 2865 if (!IRE_IS_LOCAL(src_ire)) { 2866 if (ip_debug > 2) { 2867 /* ip1dbg */ 2868 pr_addr_dbg("ip_bind_connected_v6: bad " 2869 "connected src %s\n", AF_INET6, v6src); 2870 } 2871 error = EADDRNOTAVAIL; 2872 goto bad_addr; 2873 } 2874 2875 /* 2876 * If the source address is a loopback address, the 2877 * destination had best be local or multicast. 2878 * The transports that can't handle multicast will reject 2879 * those addresses. 2880 */ 2881 if (src_ire->ire_type == IRE_LOOPBACK && 2882 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2883 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2884 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2885 error = -1; 2886 goto bad_addr; 2887 } 2888 /* 2889 * Allow setting new policies. For example, disconnects come 2890 * down as ipa_t bind. As we would have set conn_policy_cached 2891 * to B_TRUE before, we should set it to B_FALSE, so that policy 2892 * can change after the disconnect. 2893 */ 2894 connp->conn_policy_cached = B_FALSE; 2895 2896 /* 2897 * The addresses have been verified. Initialize the conn 2898 * before calling the policy as they expect the conns 2899 * initialized. 2900 */ 2901 connp->conn_srcv6 = *v6src; 2902 connp->conn_remv6 = *v6dst; 2903 connp->conn_lport = lport; 2904 connp->conn_fport = fport; 2905 2906 ASSERT(!(ipsec_policy_set && ire_requested)); 2907 if (ire_requested) { 2908 iulp_t *ulp_info = NULL; 2909 2910 /* 2911 * Note that sire will not be NULL if this is an off-link 2912 * connection and there is not cache for that dest yet. 2913 * 2914 * XXX Because of an existing bug, if there are multiple 2915 * default routes, the IRE returned now may not be the actual 2916 * default route used (default routes are chosen in a 2917 * round robin fashion). So if the metrics for different 2918 * default routes are different, we may return the wrong 2919 * metrics. This will not be a problem if the existing 2920 * bug is fixed. 2921 */ 2922 if (sire != NULL) 2923 ulp_info = &(sire->ire_uinfo); 2924 2925 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info, 2926 ipst)) { 2927 error = -1; 2928 goto bad_addr; 2929 } 2930 } else if (ipsec_policy_set) { 2931 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2932 error = -1; 2933 goto bad_addr; 2934 } 2935 } 2936 2937 /* 2938 * Cache IPsec policy in this conn. If we have per-socket policy, 2939 * we'll cache that. If we don't, we'll inherit global policy. 2940 * 2941 * We can't insert until the conn reflects the policy. Note that 2942 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2943 * connections where we don't have a policy. This is to prevent 2944 * global policy lookups in the inbound path. 2945 * 2946 * If we insert before we set conn_policy_cached, 2947 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2948 * because global policy cound be non-empty. We normally call 2949 * ipsec_check_policy() for conn_policy_cached connections only if 2950 * conn_in_enforce_policy is set. But in this case, 2951 * conn_policy_cached can get set anytime since we made the 2952 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2953 * is called, which will make the above assumption false. Thus, we 2954 * need to insert after we set conn_policy_cached. 2955 */ 2956 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2957 goto bad_addr; 2958 2959 /* If not fanout_insert this was just an address verification */ 2960 if (fanout_insert) { 2961 /* 2962 * The addresses have been verified. Time to insert in 2963 * the correct fanout list. 2964 */ 2965 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2966 connp->conn_ports, 2967 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2968 } 2969 if (error == 0) { 2970 connp->conn_fully_bound = B_TRUE; 2971 /* 2972 * Our initial checks for MDT have passed; the IRE is not 2973 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2974 * be supporting MDT. Pass the IRE, IPC and ILL into 2975 * ip_mdinfo_return(), which performs further checks 2976 * against them and upon success, returns the MDT info 2977 * mblk which we will attach to the bind acknowledgment. 2978 */ 2979 if (md_dst_ire != NULL) { 2980 mblk_t *mdinfo_mp; 2981 2982 ASSERT(md_ill != NULL); 2983 ASSERT(md_ill->ill_mdt_capab != NULL); 2984 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2985 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 2986 linkb(mp, mdinfo_mp); 2987 } 2988 } 2989 bad_addr: 2990 if (ipsec_policy_set) { 2991 ASSERT(policy_mp != NULL); 2992 freeb(policy_mp); 2993 /* 2994 * As of now assume that nothing else accompanies 2995 * IPSEC_POLICY_SET. 2996 */ 2997 mp->b_cont = NULL; 2998 } 2999 refrele_and_quit: 3000 if (src_ire != NULL) 3001 IRE_REFRELE(src_ire); 3002 if (dst_ire != NULL) 3003 IRE_REFRELE(dst_ire); 3004 if (sire != NULL) 3005 IRE_REFRELE(sire); 3006 if (src_ipif != NULL) 3007 ipif_refrele(src_ipif); 3008 if (md_dst_ire != NULL) 3009 IRE_REFRELE(md_dst_ire); 3010 if (ill_held && dst_ill != NULL) 3011 ill_refrele(dst_ill); 3012 return (error); 3013 } 3014 3015 /* 3016 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3017 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3018 */ 3019 /* ARGSUSED4 */ 3020 static boolean_t 3021 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3022 iulp_t *ulp_info, ip_stack_t *ipst) 3023 { 3024 mblk_t *mp1; 3025 ire_t *ret_ire; 3026 3027 mp1 = mp->b_cont; 3028 ASSERT(mp1 != NULL); 3029 3030 if (ire != NULL) { 3031 /* 3032 * mp1 initialized above to IRE_DB_REQ_TYPE 3033 * appended mblk. Its <upper protocol>'s 3034 * job to make sure there is room. 3035 */ 3036 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3037 return (B_FALSE); 3038 3039 mp1->b_datap->db_type = IRE_DB_TYPE; 3040 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3041 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3042 ret_ire = (ire_t *)mp1->b_rptr; 3043 if (IN6_IS_ADDR_MULTICAST(dst) || 3044 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3045 ret_ire->ire_type = IRE_BROADCAST; 3046 ret_ire->ire_addr_v6 = *dst; 3047 } 3048 if (ulp_info != NULL) { 3049 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3050 sizeof (iulp_t)); 3051 } 3052 ret_ire->ire_mp = mp1; 3053 } else { 3054 /* 3055 * No IRE was found. Remove IRE mblk. 3056 */ 3057 mp->b_cont = mp1->b_cont; 3058 freeb(mp1); 3059 } 3060 return (B_TRUE); 3061 } 3062 3063 /* 3064 * Add an ip6i_t header to the front of the mblk. 3065 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3066 * Returns NULL if allocation fails (and frees original message). 3067 * Used in outgoing path when going through ip_newroute_*v6(). 3068 * Used in incoming path to pass ifindex to transports. 3069 */ 3070 mblk_t * 3071 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3072 { 3073 mblk_t *mp1; 3074 ip6i_t *ip6i; 3075 ip6_t *ip6h; 3076 3077 ip6h = (ip6_t *)mp->b_rptr; 3078 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3079 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3080 mp->b_datap->db_ref > 1) { 3081 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3082 if (mp1 == NULL) { 3083 freemsg(mp); 3084 return (NULL); 3085 } 3086 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3087 mp1->b_cont = mp; 3088 mp = mp1; 3089 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3090 } 3091 mp->b_rptr = (uchar_t *)ip6i; 3092 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3093 ip6i->ip6i_nxt = IPPROTO_RAW; 3094 if (ill != NULL) { 3095 ip6i->ip6i_flags = IP6I_IFINDEX; 3096 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3097 } else { 3098 ip6i->ip6i_flags = 0; 3099 } 3100 ip6i->ip6i_nexthop = *dst; 3101 return (mp); 3102 } 3103 3104 /* 3105 * Handle protocols with which IP is less intimate. There 3106 * can be more than one stream bound to a particular 3107 * protocol. When this is the case, normally each one gets a copy 3108 * of any incoming packets. 3109 * However, if the packet was tunneled and not multicast we only send to it 3110 * the first match. 3111 * 3112 * Zones notes: 3113 * Packets will be distributed to streams in all zones. This is really only 3114 * useful for ICMPv6 as only applications in the global zone can create raw 3115 * sockets for other protocols. 3116 */ 3117 static void 3118 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3119 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3120 boolean_t mctl_present, zoneid_t zoneid) 3121 { 3122 queue_t *rq; 3123 mblk_t *mp1, *first_mp1; 3124 in6_addr_t dst = ip6h->ip6_dst; 3125 in6_addr_t src = ip6h->ip6_src; 3126 boolean_t one_only; 3127 mblk_t *first_mp = mp; 3128 boolean_t secure, shared_addr; 3129 conn_t *connp, *first_connp, *next_connp; 3130 connf_t *connfp; 3131 ip_stack_t *ipst = inill->ill_ipst; 3132 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3133 3134 if (mctl_present) { 3135 mp = first_mp->b_cont; 3136 secure = ipsec_in_is_secure(first_mp); 3137 ASSERT(mp != NULL); 3138 } else { 3139 secure = B_FALSE; 3140 } 3141 3142 /* 3143 * If the packet was tunneled and not multicast we only send to it 3144 * the first match. 3145 */ 3146 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3147 !IN6_IS_ADDR_MULTICAST(&dst)); 3148 3149 shared_addr = (zoneid == ALL_ZONES); 3150 if (shared_addr) { 3151 /* 3152 * We don't allow multilevel ports for raw IP, so no need to 3153 * check for that here. 3154 */ 3155 zoneid = tsol_packet_to_zoneid(mp); 3156 } 3157 3158 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3159 mutex_enter(&connfp->connf_lock); 3160 connp = connfp->connf_head; 3161 for (connp = connfp->connf_head; connp != NULL; 3162 connp = connp->conn_next) { 3163 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3164 zoneid) && 3165 (!is_system_labeled() || 3166 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3167 connp))) 3168 break; 3169 } 3170 3171 if (connp == NULL || connp->conn_upq == NULL) { 3172 /* 3173 * No one bound to this port. Is 3174 * there a client that wants all 3175 * unclaimed datagrams? 3176 */ 3177 mutex_exit(&connfp->connf_lock); 3178 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3179 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3180 nexthdr_offset, mctl_present, zoneid, ipst)) { 3181 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3182 } 3183 3184 return; 3185 } 3186 3187 CONN_INC_REF(connp); 3188 first_connp = connp; 3189 3190 /* 3191 * XXX: Fix the multiple protocol listeners case. We should not 3192 * be walking the conn->next list here. 3193 */ 3194 if (one_only) { 3195 /* 3196 * Only send message to one tunnel driver by immediately 3197 * terminating the loop. 3198 */ 3199 connp = NULL; 3200 } else { 3201 connp = connp->conn_next; 3202 3203 } 3204 for (;;) { 3205 while (connp != NULL) { 3206 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3207 flags, zoneid) && 3208 (!is_system_labeled() || 3209 tsol_receive_local(mp, &dst, IPV6_VERSION, 3210 shared_addr, connp))) 3211 break; 3212 connp = connp->conn_next; 3213 } 3214 3215 /* 3216 * Just copy the data part alone. The mctl part is 3217 * needed just for verifying policy and it is never 3218 * sent up. 3219 */ 3220 if (connp == NULL || connp->conn_upq == NULL || 3221 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3222 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3223 /* 3224 * No more intested clients or memory 3225 * allocation failed 3226 */ 3227 connp = first_connp; 3228 break; 3229 } 3230 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3231 CONN_INC_REF(connp); 3232 mutex_exit(&connfp->connf_lock); 3233 rq = connp->conn_rq; 3234 /* 3235 * For link-local always add ifindex so that transport can set 3236 * sin6_scope_id. Avoid it for ICMP error fanout. 3237 */ 3238 if ((connp->conn_ip_recvpktinfo || 3239 IN6_IS_ADDR_LINKLOCAL(&src)) && 3240 (flags & IP_FF_IPINFO)) { 3241 /* Add header */ 3242 mp1 = ip_add_info_v6(mp1, inill, &dst); 3243 } 3244 if (mp1 == NULL) { 3245 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3246 } else if (!canputnext(rq)) { 3247 if (flags & IP_FF_RAWIP) { 3248 BUMP_MIB(ill->ill_ip_mib, 3249 rawipIfStatsInOverflows); 3250 } else { 3251 BUMP_MIB(ill->ill_icmp6_mib, 3252 ipv6IfIcmpInOverflows); 3253 } 3254 3255 freemsg(mp1); 3256 } else { 3257 /* 3258 * Don't enforce here if we're a tunnel - let "tun" do 3259 * it instead. 3260 */ 3261 if (!IPCL_IS_IPTUN(connp) && 3262 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3263 secure)) { 3264 first_mp1 = ipsec_check_inbound_policy( 3265 first_mp1, connp, NULL, ip6h, mctl_present); 3266 } 3267 if (first_mp1 != NULL) { 3268 if (mctl_present) 3269 freeb(first_mp1); 3270 BUMP_MIB(ill->ill_ip_mib, 3271 ipIfStatsHCInDelivers); 3272 (connp->conn_recv)(connp, mp1, NULL); 3273 } 3274 } 3275 mutex_enter(&connfp->connf_lock); 3276 /* Follow the next pointer before releasing the conn. */ 3277 next_connp = connp->conn_next; 3278 CONN_DEC_REF(connp); 3279 connp = next_connp; 3280 } 3281 3282 /* Last one. Send it upstream. */ 3283 mutex_exit(&connfp->connf_lock); 3284 3285 /* Initiate IPPF processing */ 3286 if (IP6_IN_IPP(flags, ipst)) { 3287 uint_t ifindex; 3288 3289 mutex_enter(&ill->ill_lock); 3290 ifindex = ill->ill_phyint->phyint_ifindex; 3291 mutex_exit(&ill->ill_lock); 3292 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3293 if (mp == NULL) { 3294 CONN_DEC_REF(connp); 3295 if (mctl_present) 3296 freeb(first_mp); 3297 return; 3298 } 3299 } 3300 3301 /* 3302 * For link-local always add ifindex so that transport can set 3303 * sin6_scope_id. Avoid it for ICMP error fanout. 3304 */ 3305 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3306 (flags & IP_FF_IPINFO)) { 3307 /* Add header */ 3308 mp = ip_add_info_v6(mp, inill, &dst); 3309 if (mp == NULL) { 3310 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3311 CONN_DEC_REF(connp); 3312 if (mctl_present) 3313 freeb(first_mp); 3314 return; 3315 } else if (mctl_present) { 3316 first_mp->b_cont = mp; 3317 } else { 3318 first_mp = mp; 3319 } 3320 } 3321 3322 rq = connp->conn_rq; 3323 if (!canputnext(rq)) { 3324 if (flags & IP_FF_RAWIP) { 3325 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3326 } else { 3327 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3328 } 3329 3330 freemsg(first_mp); 3331 } else { 3332 if (IPCL_IS_IPTUN(connp)) { 3333 /* 3334 * Tunneled packet. We enforce policy in the tunnel 3335 * module itself. 3336 * 3337 * Send the WHOLE packet up (incl. IPSEC_IN) without 3338 * a policy check. 3339 */ 3340 putnext(rq, first_mp); 3341 CONN_DEC_REF(connp); 3342 return; 3343 } 3344 /* 3345 * Don't enforce here if we're a tunnel - let "tun" do 3346 * it instead. 3347 */ 3348 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3349 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3350 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3351 NULL, ip6h, mctl_present); 3352 if (first_mp == NULL) { 3353 CONN_DEC_REF(connp); 3354 return; 3355 } 3356 } 3357 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3358 (connp->conn_recv)(connp, mp, NULL); 3359 if (mctl_present) 3360 freeb(first_mp); 3361 } 3362 CONN_DEC_REF(connp); 3363 } 3364 3365 /* 3366 * Send an ICMP error after patching up the packet appropriately. Returns 3367 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3368 */ 3369 int 3370 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3371 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3372 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3373 { 3374 ip6_t *ip6h; 3375 mblk_t *first_mp; 3376 boolean_t secure; 3377 unsigned char db_type; 3378 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3379 3380 first_mp = mp; 3381 if (mctl_present) { 3382 mp = mp->b_cont; 3383 secure = ipsec_in_is_secure(first_mp); 3384 ASSERT(mp != NULL); 3385 } else { 3386 /* 3387 * If this is an ICMP error being reported - which goes 3388 * up as M_CTLs, we need to convert them to M_DATA till 3389 * we finish checking with global policy because 3390 * ipsec_check_global_policy() assumes M_DATA as clear 3391 * and M_CTL as secure. 3392 */ 3393 db_type = mp->b_datap->db_type; 3394 mp->b_datap->db_type = M_DATA; 3395 secure = B_FALSE; 3396 } 3397 /* 3398 * We are generating an icmp error for some inbound packet. 3399 * Called from all ip_fanout_(udp, tcp, proto) functions. 3400 * Before we generate an error, check with global policy 3401 * to see whether this is allowed to enter the system. As 3402 * there is no "conn", we are checking with global policy. 3403 */ 3404 ip6h = (ip6_t *)mp->b_rptr; 3405 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3406 first_mp = ipsec_check_global_policy(first_mp, NULL, 3407 NULL, ip6h, mctl_present, ipst->ips_netstack); 3408 if (first_mp == NULL) 3409 return (0); 3410 } 3411 3412 if (!mctl_present) 3413 mp->b_datap->db_type = db_type; 3414 3415 if (flags & IP_FF_SEND_ICMP) { 3416 if (flags & IP_FF_HDR_COMPLETE) { 3417 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3418 freemsg(first_mp); 3419 return (1); 3420 } 3421 } 3422 switch (icmp_type) { 3423 case ICMP6_DST_UNREACH: 3424 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3425 B_FALSE, B_FALSE, zoneid, ipst); 3426 break; 3427 case ICMP6_PARAM_PROB: 3428 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3429 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3430 break; 3431 default: 3432 #ifdef DEBUG 3433 panic("ip_fanout_send_icmp_v6: wrong type"); 3434 /*NOTREACHED*/ 3435 #else 3436 freemsg(first_mp); 3437 break; 3438 #endif 3439 } 3440 } else { 3441 freemsg(first_mp); 3442 return (0); 3443 } 3444 3445 return (1); 3446 } 3447 3448 3449 /* 3450 * Fanout for TCP packets 3451 * The caller puts <fport, lport> in the ports parameter. 3452 */ 3453 static void 3454 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3455 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3456 { 3457 mblk_t *first_mp; 3458 boolean_t secure; 3459 conn_t *connp; 3460 tcph_t *tcph; 3461 boolean_t syn_present = B_FALSE; 3462 ip_stack_t *ipst = inill->ill_ipst; 3463 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3464 3465 first_mp = mp; 3466 if (mctl_present) { 3467 mp = first_mp->b_cont; 3468 secure = ipsec_in_is_secure(first_mp); 3469 ASSERT(mp != NULL); 3470 } else { 3471 secure = B_FALSE; 3472 } 3473 3474 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3475 3476 if (connp == NULL || 3477 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3478 /* 3479 * No hard-bound match. Send Reset. 3480 */ 3481 dblk_t *dp = mp->b_datap; 3482 uint32_t ill_index; 3483 3484 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3485 3486 /* Initiate IPPf processing, if needed. */ 3487 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3488 (flags & IP6_NO_IPPOLICY)) { 3489 ill_index = ill->ill_phyint->phyint_ifindex; 3490 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3491 if (first_mp == NULL) { 3492 if (connp != NULL) 3493 CONN_DEC_REF(connp); 3494 return; 3495 } 3496 } 3497 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3498 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3499 ipst->ips_netstack->netstack_tcp, connp); 3500 if (connp != NULL) 3501 CONN_DEC_REF(connp); 3502 return; 3503 } 3504 3505 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3506 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3507 if (connp->conn_flags & IPCL_TCP) { 3508 squeue_t *sqp; 3509 3510 /* 3511 * For fused tcp loopback, assign the eager's 3512 * squeue to be that of the active connect's. 3513 */ 3514 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3515 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3516 !secure && 3517 !IP6_IN_IPP(flags, ipst)) { 3518 ASSERT(Q_TO_CONN(q) != NULL); 3519 sqp = Q_TO_CONN(q)->conn_sqp; 3520 } else { 3521 sqp = IP_SQUEUE_GET(lbolt); 3522 } 3523 3524 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3525 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3526 3527 /* 3528 * db_cksumstuff is unused in the incoming 3529 * path; Thus store the ifindex here. It will 3530 * be cleared in tcp_conn_create_v6(). 3531 */ 3532 DB_CKSUMSTUFF(mp) = 3533 (intptr_t)ill->ill_phyint->phyint_ifindex; 3534 syn_present = B_TRUE; 3535 } 3536 } 3537 3538 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3539 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3540 if ((flags & TH_RST) || (flags & TH_URG)) { 3541 CONN_DEC_REF(connp); 3542 freemsg(first_mp); 3543 return; 3544 } 3545 if (flags & TH_ACK) { 3546 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3547 ipst->ips_netstack->netstack_tcp, connp); 3548 CONN_DEC_REF(connp); 3549 return; 3550 } 3551 3552 CONN_DEC_REF(connp); 3553 freemsg(first_mp); 3554 return; 3555 } 3556 3557 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3558 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3559 NULL, ip6h, mctl_present); 3560 if (first_mp == NULL) { 3561 CONN_DEC_REF(connp); 3562 return; 3563 } 3564 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3565 ASSERT(syn_present); 3566 if (mctl_present) { 3567 ASSERT(first_mp != mp); 3568 first_mp->b_datap->db_struioflag |= 3569 STRUIO_POLICY; 3570 } else { 3571 ASSERT(first_mp == mp); 3572 mp->b_datap->db_struioflag &= 3573 ~STRUIO_EAGER; 3574 mp->b_datap->db_struioflag |= 3575 STRUIO_POLICY; 3576 } 3577 } else { 3578 /* 3579 * Discard first_mp early since we're dealing with a 3580 * fully-connected conn_t and tcp doesn't do policy in 3581 * this case. Also, if someone is bound to IPPROTO_TCP 3582 * over raw IP, they don't expect to see a M_CTL. 3583 */ 3584 if (mctl_present) { 3585 freeb(first_mp); 3586 mctl_present = B_FALSE; 3587 } 3588 first_mp = mp; 3589 } 3590 } 3591 3592 /* Initiate IPPF processing */ 3593 if (IP6_IN_IPP(flags, ipst)) { 3594 uint_t ifindex; 3595 3596 mutex_enter(&ill->ill_lock); 3597 ifindex = ill->ill_phyint->phyint_ifindex; 3598 mutex_exit(&ill->ill_lock); 3599 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3600 if (mp == NULL) { 3601 CONN_DEC_REF(connp); 3602 if (mctl_present) { 3603 freeb(first_mp); 3604 } 3605 return; 3606 } else if (mctl_present) { 3607 /* 3608 * ip_add_info_v6 might return a new mp. 3609 */ 3610 ASSERT(first_mp != mp); 3611 first_mp->b_cont = mp; 3612 } else { 3613 first_mp = mp; 3614 } 3615 } 3616 3617 /* 3618 * For link-local always add ifindex so that TCP can bind to that 3619 * interface. Avoid it for ICMP error fanout. 3620 */ 3621 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3622 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3623 (flags & IP_FF_IPINFO))) { 3624 /* Add header */ 3625 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3626 if (mp == NULL) { 3627 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3628 CONN_DEC_REF(connp); 3629 if (mctl_present) 3630 freeb(first_mp); 3631 return; 3632 } else if (mctl_present) { 3633 ASSERT(first_mp != mp); 3634 first_mp->b_cont = mp; 3635 } else { 3636 first_mp = mp; 3637 } 3638 } 3639 3640 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3641 if (IPCL_IS_TCP(connp)) { 3642 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, connp->conn_recv, 3643 connp, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 3644 } else { 3645 /* SOCK_RAW, IPPROTO_TCP case */ 3646 (connp->conn_recv)(connp, first_mp, NULL); 3647 CONN_DEC_REF(connp); 3648 } 3649 } 3650 3651 /* 3652 * Fanout for UDP packets. 3653 * The caller puts <fport, lport> in the ports parameter. 3654 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3655 * 3656 * If SO_REUSEADDR is set all multicast and broadcast packets 3657 * will be delivered to all streams bound to the same port. 3658 * 3659 * Zones notes: 3660 * Multicast packets will be distributed to streams in all zones. 3661 */ 3662 static void 3663 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3664 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3665 zoneid_t zoneid) 3666 { 3667 uint32_t dstport, srcport; 3668 in6_addr_t dst; 3669 mblk_t *first_mp; 3670 boolean_t secure; 3671 conn_t *connp; 3672 connf_t *connfp; 3673 conn_t *first_conn; 3674 conn_t *next_conn; 3675 mblk_t *mp1, *first_mp1; 3676 in6_addr_t src; 3677 boolean_t shared_addr; 3678 ip_stack_t *ipst = inill->ill_ipst; 3679 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3680 3681 first_mp = mp; 3682 if (mctl_present) { 3683 mp = first_mp->b_cont; 3684 secure = ipsec_in_is_secure(first_mp); 3685 ASSERT(mp != NULL); 3686 } else { 3687 secure = B_FALSE; 3688 } 3689 3690 /* Extract ports in net byte order */ 3691 dstport = htons(ntohl(ports) & 0xFFFF); 3692 srcport = htons(ntohl(ports) >> 16); 3693 dst = ip6h->ip6_dst; 3694 src = ip6h->ip6_src; 3695 3696 shared_addr = (zoneid == ALL_ZONES); 3697 if (shared_addr) { 3698 /* 3699 * No need to handle exclusive-stack zones since ALL_ZONES 3700 * only applies to the shared stack. 3701 */ 3702 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3703 /* 3704 * If no shared MLP is found, tsol_mlp_findzone returns 3705 * ALL_ZONES. In that case, we assume it's SLP, and 3706 * search for the zone based on the packet label. 3707 * That will also return ALL_ZONES on failure, but 3708 * we never allow conn_zoneid to be set to ALL_ZONES. 3709 */ 3710 if (zoneid == ALL_ZONES) 3711 zoneid = tsol_packet_to_zoneid(mp); 3712 } 3713 3714 /* Attempt to find a client stream based on destination port. */ 3715 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3716 mutex_enter(&connfp->connf_lock); 3717 connp = connfp->connf_head; 3718 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3719 /* 3720 * Not multicast. Send to the one (first) client we find. 3721 */ 3722 while (connp != NULL) { 3723 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3724 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3725 conn_wantpacket_v6(connp, ill, ip6h, 3726 flags, zoneid)) { 3727 break; 3728 } 3729 connp = connp->conn_next; 3730 } 3731 if (connp == NULL || connp->conn_upq == NULL) 3732 goto notfound; 3733 3734 if (is_system_labeled() && 3735 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3736 connp)) 3737 goto notfound; 3738 3739 /* Found a client */ 3740 CONN_INC_REF(connp); 3741 mutex_exit(&connfp->connf_lock); 3742 3743 if (CONN_UDP_FLOWCTLD(connp)) { 3744 freemsg(first_mp); 3745 CONN_DEC_REF(connp); 3746 return; 3747 } 3748 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3749 first_mp = ipsec_check_inbound_policy(first_mp, 3750 connp, NULL, ip6h, mctl_present); 3751 if (first_mp == NULL) { 3752 CONN_DEC_REF(connp); 3753 return; 3754 } 3755 } 3756 /* Initiate IPPF processing */ 3757 if (IP6_IN_IPP(flags, ipst)) { 3758 uint_t ifindex; 3759 3760 mutex_enter(&ill->ill_lock); 3761 ifindex = ill->ill_phyint->phyint_ifindex; 3762 mutex_exit(&ill->ill_lock); 3763 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3764 if (mp == NULL) { 3765 CONN_DEC_REF(connp); 3766 if (mctl_present) 3767 freeb(first_mp); 3768 return; 3769 } 3770 } 3771 /* 3772 * For link-local always add ifindex so that 3773 * transport can set sin6_scope_id. Avoid it for 3774 * ICMP error fanout. 3775 */ 3776 if ((connp->conn_ip_recvpktinfo || 3777 IN6_IS_ADDR_LINKLOCAL(&src)) && 3778 (flags & IP_FF_IPINFO)) { 3779 /* Add header */ 3780 mp = ip_add_info_v6(mp, inill, &dst); 3781 if (mp == NULL) { 3782 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3783 CONN_DEC_REF(connp); 3784 if (mctl_present) 3785 freeb(first_mp); 3786 return; 3787 } else if (mctl_present) { 3788 first_mp->b_cont = mp; 3789 } else { 3790 first_mp = mp; 3791 } 3792 } 3793 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3794 3795 /* Send it upstream */ 3796 (connp->conn_recv)(connp, mp, NULL); 3797 3798 IP6_STAT(ipst, ip6_udp_fannorm); 3799 CONN_DEC_REF(connp); 3800 if (mctl_present) 3801 freeb(first_mp); 3802 return; 3803 } 3804 3805 while (connp != NULL) { 3806 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3807 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3808 (!is_system_labeled() || 3809 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3810 connp))) 3811 break; 3812 connp = connp->conn_next; 3813 } 3814 3815 if (connp == NULL || connp->conn_upq == NULL) 3816 goto notfound; 3817 3818 first_conn = connp; 3819 3820 CONN_INC_REF(connp); 3821 connp = connp->conn_next; 3822 for (;;) { 3823 while (connp != NULL) { 3824 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3825 src) && conn_wantpacket_v6(connp, ill, ip6h, 3826 flags, zoneid) && 3827 (!is_system_labeled() || 3828 tsol_receive_local(mp, &dst, IPV6_VERSION, 3829 shared_addr, connp))) 3830 break; 3831 connp = connp->conn_next; 3832 } 3833 /* 3834 * Just copy the data part alone. The mctl part is 3835 * needed just for verifying policy and it is never 3836 * sent up. 3837 */ 3838 if (connp == NULL || 3839 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3840 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3841 /* 3842 * No more interested clients or memory 3843 * allocation failed 3844 */ 3845 connp = first_conn; 3846 break; 3847 } 3848 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3849 CONN_INC_REF(connp); 3850 mutex_exit(&connfp->connf_lock); 3851 /* 3852 * For link-local always add ifindex so that transport 3853 * can set sin6_scope_id. Avoid it for ICMP error 3854 * fanout. 3855 */ 3856 if ((connp->conn_ip_recvpktinfo || 3857 IN6_IS_ADDR_LINKLOCAL(&src)) && 3858 (flags & IP_FF_IPINFO)) { 3859 /* Add header */ 3860 mp1 = ip_add_info_v6(mp1, inill, &dst); 3861 } 3862 /* mp1 could have changed */ 3863 if (mctl_present) 3864 first_mp1->b_cont = mp1; 3865 else 3866 first_mp1 = mp1; 3867 if (mp1 == NULL) { 3868 if (mctl_present) 3869 freeb(first_mp1); 3870 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3871 goto next_one; 3872 } 3873 if (CONN_UDP_FLOWCTLD(connp)) { 3874 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3875 freemsg(first_mp1); 3876 goto next_one; 3877 } 3878 3879 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3880 first_mp1 = ipsec_check_inbound_policy 3881 (first_mp1, connp, NULL, ip6h, 3882 mctl_present); 3883 } 3884 if (first_mp1 != NULL) { 3885 if (mctl_present) 3886 freeb(first_mp1); 3887 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3888 3889 /* Send it upstream */ 3890 (connp->conn_recv)(connp, mp1, NULL); 3891 } 3892 next_one: 3893 mutex_enter(&connfp->connf_lock); 3894 /* Follow the next pointer before releasing the conn. */ 3895 next_conn = connp->conn_next; 3896 IP6_STAT(ipst, ip6_udp_fanmb); 3897 CONN_DEC_REF(connp); 3898 connp = next_conn; 3899 } 3900 3901 /* Last one. Send it upstream. */ 3902 mutex_exit(&connfp->connf_lock); 3903 3904 /* Initiate IPPF processing */ 3905 if (IP6_IN_IPP(flags, ipst)) { 3906 uint_t ifindex; 3907 3908 mutex_enter(&ill->ill_lock); 3909 ifindex = ill->ill_phyint->phyint_ifindex; 3910 mutex_exit(&ill->ill_lock); 3911 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3912 if (mp == NULL) { 3913 CONN_DEC_REF(connp); 3914 if (mctl_present) { 3915 freeb(first_mp); 3916 } 3917 return; 3918 } 3919 } 3920 3921 /* 3922 * For link-local always add ifindex so that transport can set 3923 * sin6_scope_id. Avoid it for ICMP error fanout. 3924 */ 3925 if ((connp->conn_ip_recvpktinfo || 3926 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3927 /* Add header */ 3928 mp = ip_add_info_v6(mp, inill, &dst); 3929 if (mp == NULL) { 3930 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3931 CONN_DEC_REF(connp); 3932 if (mctl_present) 3933 freeb(first_mp); 3934 return; 3935 } else if (mctl_present) { 3936 first_mp->b_cont = mp; 3937 } else { 3938 first_mp = mp; 3939 } 3940 } 3941 if (CONN_UDP_FLOWCTLD(connp)) { 3942 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3943 freemsg(mp); 3944 } else { 3945 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3946 first_mp = ipsec_check_inbound_policy(first_mp, 3947 connp, NULL, ip6h, mctl_present); 3948 if (first_mp == NULL) { 3949 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3950 CONN_DEC_REF(connp); 3951 return; 3952 } 3953 } 3954 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3955 3956 /* Send it upstream */ 3957 (connp->conn_recv)(connp, mp, NULL); 3958 } 3959 IP6_STAT(ipst, ip6_udp_fanmb); 3960 CONN_DEC_REF(connp); 3961 if (mctl_present) 3962 freeb(first_mp); 3963 return; 3964 3965 notfound: 3966 mutex_exit(&connfp->connf_lock); 3967 /* 3968 * No one bound to this port. Is 3969 * there a client that wants all 3970 * unclaimed datagrams? 3971 */ 3972 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3973 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3974 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3975 zoneid); 3976 } else { 3977 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3978 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3979 mctl_present, zoneid, ipst)) { 3980 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3981 } 3982 } 3983 } 3984 3985 /* 3986 * int ip_find_hdr_v6() 3987 * 3988 * This routine is used by the upper layer protocols and the IP tunnel 3989 * module to: 3990 * - Set extension header pointers to appropriate locations 3991 * - Determine IPv6 header length and return it 3992 * - Return a pointer to the last nexthdr value 3993 * 3994 * The caller must initialize ipp_fields. 3995 * 3996 * NOTE: If multiple extension headers of the same type are present, 3997 * ip_find_hdr_v6() will set the respective extension header pointers 3998 * to the first one that it encounters in the IPv6 header. It also 3999 * skips fragment headers. This routine deals with malformed packets 4000 * of various sorts in which case the returned length is up to the 4001 * malformed part. 4002 */ 4003 int 4004 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4005 { 4006 uint_t length, ehdrlen; 4007 uint8_t nexthdr; 4008 uint8_t *whereptr, *endptr; 4009 ip6_dest_t *tmpdstopts; 4010 ip6_rthdr_t *tmprthdr; 4011 ip6_hbh_t *tmphopopts; 4012 ip6_frag_t *tmpfraghdr; 4013 4014 length = IPV6_HDR_LEN; 4015 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4016 endptr = mp->b_wptr; 4017 4018 nexthdr = ip6h->ip6_nxt; 4019 while (whereptr < endptr) { 4020 /* Is there enough left for len + nexthdr? */ 4021 if (whereptr + MIN_EHDR_LEN > endptr) 4022 goto done; 4023 4024 switch (nexthdr) { 4025 case IPPROTO_HOPOPTS: 4026 tmphopopts = (ip6_hbh_t *)whereptr; 4027 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4028 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4029 goto done; 4030 nexthdr = tmphopopts->ip6h_nxt; 4031 /* return only 1st hbh */ 4032 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4033 ipp->ipp_fields |= IPPF_HOPOPTS; 4034 ipp->ipp_hopopts = tmphopopts; 4035 ipp->ipp_hopoptslen = ehdrlen; 4036 } 4037 break; 4038 case IPPROTO_DSTOPTS: 4039 tmpdstopts = (ip6_dest_t *)whereptr; 4040 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4041 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4042 goto done; 4043 nexthdr = tmpdstopts->ip6d_nxt; 4044 /* 4045 * ipp_dstopts is set to the destination header after a 4046 * routing header. 4047 * Assume it is a post-rthdr destination header 4048 * and adjust when we find an rthdr. 4049 */ 4050 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4051 ipp->ipp_fields |= IPPF_DSTOPTS; 4052 ipp->ipp_dstopts = tmpdstopts; 4053 ipp->ipp_dstoptslen = ehdrlen; 4054 } 4055 break; 4056 case IPPROTO_ROUTING: 4057 tmprthdr = (ip6_rthdr_t *)whereptr; 4058 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4059 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4060 goto done; 4061 nexthdr = tmprthdr->ip6r_nxt; 4062 /* return only 1st rthdr */ 4063 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4064 ipp->ipp_fields |= IPPF_RTHDR; 4065 ipp->ipp_rthdr = tmprthdr; 4066 ipp->ipp_rthdrlen = ehdrlen; 4067 } 4068 /* 4069 * Make any destination header we've seen be a 4070 * pre-rthdr destination header. 4071 */ 4072 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4073 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4074 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4075 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4076 ipp->ipp_dstopts = NULL; 4077 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4078 ipp->ipp_dstoptslen = 0; 4079 } 4080 break; 4081 case IPPROTO_FRAGMENT: 4082 tmpfraghdr = (ip6_frag_t *)whereptr; 4083 ehdrlen = sizeof (ip6_frag_t); 4084 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4085 goto done; 4086 nexthdr = tmpfraghdr->ip6f_nxt; 4087 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4088 ipp->ipp_fields |= IPPF_FRAGHDR; 4089 ipp->ipp_fraghdr = tmpfraghdr; 4090 ipp->ipp_fraghdrlen = ehdrlen; 4091 } 4092 break; 4093 case IPPROTO_NONE: 4094 default: 4095 goto done; 4096 } 4097 length += ehdrlen; 4098 whereptr += ehdrlen; 4099 } 4100 done: 4101 if (nexthdrp != NULL) 4102 *nexthdrp = nexthdr; 4103 return (length); 4104 } 4105 4106 int 4107 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4108 { 4109 ire_t *ire; 4110 4111 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4112 ire = ire_lookup_local_v6(zoneid, ipst); 4113 if (ire == NULL) { 4114 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4115 return (1); 4116 } 4117 ip6h->ip6_src = ire->ire_addr_v6; 4118 ire_refrele(ire); 4119 } 4120 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4121 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4122 return (0); 4123 } 4124 4125 /* 4126 * Try to determine where and what are the IPv6 header length and 4127 * pointer to nexthdr value for the upper layer protocol (or an 4128 * unknown next hdr). 4129 * 4130 * Parameters returns a pointer to the nexthdr value; 4131 * Must handle malformed packets of various sorts. 4132 * Function returns failure for malformed cases. 4133 */ 4134 boolean_t 4135 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4136 uint8_t **nexthdrpp) 4137 { 4138 uint16_t length; 4139 uint_t ehdrlen; 4140 uint8_t *nexthdrp; 4141 uint8_t *whereptr; 4142 uint8_t *endptr; 4143 ip6_dest_t *desthdr; 4144 ip6_rthdr_t *rthdr; 4145 ip6_frag_t *fraghdr; 4146 4147 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4148 length = IPV6_HDR_LEN; 4149 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4150 endptr = mp->b_wptr; 4151 4152 nexthdrp = &ip6h->ip6_nxt; 4153 while (whereptr < endptr) { 4154 /* Is there enough left for len + nexthdr? */ 4155 if (whereptr + MIN_EHDR_LEN > endptr) 4156 break; 4157 4158 switch (*nexthdrp) { 4159 case IPPROTO_HOPOPTS: 4160 case IPPROTO_DSTOPTS: 4161 /* Assumes the headers are identical for hbh and dst */ 4162 desthdr = (ip6_dest_t *)whereptr; 4163 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4164 if ((uchar_t *)desthdr + ehdrlen > endptr) 4165 return (B_FALSE); 4166 nexthdrp = &desthdr->ip6d_nxt; 4167 break; 4168 case IPPROTO_ROUTING: 4169 rthdr = (ip6_rthdr_t *)whereptr; 4170 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4171 if ((uchar_t *)rthdr + ehdrlen > endptr) 4172 return (B_FALSE); 4173 nexthdrp = &rthdr->ip6r_nxt; 4174 break; 4175 case IPPROTO_FRAGMENT: 4176 fraghdr = (ip6_frag_t *)whereptr; 4177 ehdrlen = sizeof (ip6_frag_t); 4178 if ((uchar_t *)&fraghdr[1] > endptr) 4179 return (B_FALSE); 4180 nexthdrp = &fraghdr->ip6f_nxt; 4181 break; 4182 case IPPROTO_NONE: 4183 /* No next header means we're finished */ 4184 default: 4185 *hdr_length_ptr = length; 4186 *nexthdrpp = nexthdrp; 4187 return (B_TRUE); 4188 } 4189 length += ehdrlen; 4190 whereptr += ehdrlen; 4191 *hdr_length_ptr = length; 4192 *nexthdrpp = nexthdrp; 4193 } 4194 switch (*nexthdrp) { 4195 case IPPROTO_HOPOPTS: 4196 case IPPROTO_DSTOPTS: 4197 case IPPROTO_ROUTING: 4198 case IPPROTO_FRAGMENT: 4199 /* 4200 * If any know extension headers are still to be processed, 4201 * the packet's malformed (or at least all the IP header(s) are 4202 * not in the same mblk - and that should never happen. 4203 */ 4204 return (B_FALSE); 4205 4206 default: 4207 /* 4208 * If we get here, we know that all of the IP headers were in 4209 * the same mblk, even if the ULP header is in the next mblk. 4210 */ 4211 *hdr_length_ptr = length; 4212 *nexthdrpp = nexthdrp; 4213 return (B_TRUE); 4214 } 4215 } 4216 4217 /* 4218 * Return the length of the IPv6 related headers (including extension headers) 4219 * Returns a length even if the packet is malformed. 4220 */ 4221 int 4222 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4223 { 4224 uint16_t hdr_len; 4225 uint8_t *nexthdrp; 4226 4227 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4228 return (hdr_len); 4229 } 4230 4231 /* 4232 * Select an ill for the packet by considering load spreading across 4233 * a different ill in the group if dst_ill is part of some group. 4234 */ 4235 static ill_t * 4236 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4237 { 4238 ill_t *ill; 4239 4240 /* 4241 * We schedule irrespective of whether the source address is 4242 * INADDR_UNSPECIED or not. 4243 */ 4244 ill = illgrp_scheduler(dst_ill); 4245 if (ill == NULL) 4246 return (NULL); 4247 4248 /* 4249 * For groups with names ip_sioctl_groupname ensures that all 4250 * ills are of same type. For groups without names, ifgrp_insert 4251 * ensures this. 4252 */ 4253 ASSERT(dst_ill->ill_type == ill->ill_type); 4254 4255 return (ill); 4256 } 4257 4258 /* 4259 * IPv6 - 4260 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4261 * to send out a packet to a destination address for which we do not have 4262 * specific routing information. 4263 * 4264 * Handle non-multicast packets. If ill is non-NULL the match is done 4265 * for that ill. 4266 * 4267 * When a specific ill is specified (using IPV6_PKTINFO, 4268 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4269 * on routing entries (ftable and ctable) that have a matching 4270 * ire->ire_ipif->ipif_ill. Thus this can only be used 4271 * for destinations that are on-link for the specific ill 4272 * and that can appear on multiple links. Thus it is useful 4273 * for multicast destinations, link-local destinations, and 4274 * at some point perhaps for site-local destinations (if the 4275 * node sits at a site boundary). 4276 * We create the cache entries in the regular ctable since 4277 * it can not "confuse" things for other destinations. 4278 * table. 4279 * 4280 * When ill is part of a ill group, we subject the packets 4281 * to load spreading even if the ill is specified by the 4282 * means described above. We disable only for IPV6_BOUND_PIF 4283 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4284 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4285 * set. 4286 * 4287 * NOTE : These are the scopes of some of the variables that point at IRE, 4288 * which needs to be followed while making any future modifications 4289 * to avoid memory leaks. 4290 * 4291 * - ire and sire are the entries looked up initially by 4292 * ire_ftable_lookup_v6. 4293 * - ipif_ire is used to hold the interface ire associated with 4294 * the new cache ire. But it's scope is limited, so we always REFRELE 4295 * it before branching out to error paths. 4296 * - save_ire is initialized before ire_create, so that ire returned 4297 * by ire_create will not over-write the ire. We REFRELE save_ire 4298 * before breaking out of the switch. 4299 * 4300 * Thus on failures, we have to REFRELE only ire and sire, if they 4301 * are not NULL. 4302 * 4303 * v6srcp may be used in the future. Currently unused. 4304 */ 4305 /* ARGSUSED */ 4306 void 4307 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4308 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4309 { 4310 in6_addr_t v6gw; 4311 in6_addr_t dst; 4312 ire_t *ire = NULL; 4313 ipif_t *src_ipif = NULL; 4314 ill_t *dst_ill = NULL; 4315 ire_t *sire = NULL; 4316 ire_t *save_ire; 4317 ip6_t *ip6h; 4318 int err = 0; 4319 mblk_t *first_mp; 4320 ipsec_out_t *io; 4321 ill_t *attach_ill = NULL; 4322 ushort_t ire_marks = 0; 4323 int match_flags; 4324 boolean_t ip6i_present; 4325 ire_t *first_sire = NULL; 4326 mblk_t *copy_mp = NULL; 4327 mblk_t *xmit_mp = NULL; 4328 in6_addr_t save_dst; 4329 uint32_t multirt_flags = 4330 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4331 boolean_t multirt_is_resolvable; 4332 boolean_t multirt_resolve_next; 4333 boolean_t need_rele = B_FALSE; 4334 boolean_t do_attach_ill = B_FALSE; 4335 boolean_t ip6_asp_table_held = B_FALSE; 4336 tsol_ire_gw_secattr_t *attrp = NULL; 4337 tsol_gcgrp_t *gcgrp = NULL; 4338 tsol_gcgrp_addr_t ga; 4339 4340 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4341 4342 first_mp = mp; 4343 if (mp->b_datap->db_type == M_CTL) { 4344 mp = mp->b_cont; 4345 io = (ipsec_out_t *)first_mp->b_rptr; 4346 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4347 } else { 4348 io = NULL; 4349 } 4350 4351 /* 4352 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4353 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4354 * could be NULL. 4355 * 4356 * This information can appear either in an ip6i_t or an IPSEC_OUT 4357 * message. 4358 */ 4359 ip6h = (ip6_t *)mp->b_rptr; 4360 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4361 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4362 if (!ip6i_present || 4363 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4364 attach_ill = ip_grab_attach_ill(ill, first_mp, 4365 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4366 io->ipsec_out_ill_index), B_TRUE, ipst); 4367 /* Failure case frees things for us. */ 4368 if (attach_ill == NULL) 4369 return; 4370 4371 /* 4372 * Check if we need an ire that will not be 4373 * looked up by anybody else i.e. HIDDEN. 4374 */ 4375 if (ill_is_probeonly(attach_ill)) 4376 ire_marks = IRE_MARK_HIDDEN; 4377 } 4378 } 4379 4380 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4381 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4382 goto icmp_err_ret; 4383 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4384 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4385 goto icmp_err_ret; 4386 } 4387 4388 /* 4389 * If this IRE is created for forwarding or it is not for 4390 * TCP traffic, mark it as temporary. 4391 * 4392 * Is it sufficient just to check the next header?? 4393 */ 4394 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4395 ire_marks |= IRE_MARK_TEMPORARY; 4396 4397 /* 4398 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4399 * chain until it gets the most specific information available. 4400 * For example, we know that there is no IRE_CACHE for this dest, 4401 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4402 * ire_ftable_lookup_v6 will look up the gateway, etc. 4403 */ 4404 4405 if (ill == NULL) { 4406 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4407 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4408 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4409 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4410 match_flags, ipst); 4411 /* 4412 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4413 * in a NULL ill, but the packet could be a neighbor 4414 * solicitation/advertisment and could have a valid attach_ill. 4415 */ 4416 if (attach_ill != NULL) 4417 ill_refrele(attach_ill); 4418 } else { 4419 if (attach_ill != NULL) { 4420 /* 4421 * attach_ill is set only for communicating with 4422 * on-link hosts. So, don't look for DEFAULT. 4423 * ip_wput_v6 passes the right ill in this case and 4424 * hence we can assert. 4425 */ 4426 ASSERT(ill == attach_ill); 4427 ill_refrele(attach_ill); 4428 do_attach_ill = B_TRUE; 4429 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4430 } else { 4431 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4432 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4433 } 4434 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4435 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4436 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags, ipst); 4437 } 4438 4439 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4440 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4441 4442 /* 4443 * We enter a loop that will be run only once in most cases. 4444 * The loop is re-entered in the case where the destination 4445 * can be reached through multiple RTF_MULTIRT-flagged routes. 4446 * The intention is to compute multiple routes to a single 4447 * destination in a single ip_newroute_v6 call. 4448 * The information is contained in sire->ire_flags. 4449 */ 4450 do { 4451 multirt_resolve_next = B_FALSE; 4452 4453 if (dst_ill != NULL) { 4454 ill_refrele(dst_ill); 4455 dst_ill = NULL; 4456 } 4457 if (src_ipif != NULL) { 4458 ipif_refrele(src_ipif); 4459 src_ipif = NULL; 4460 } 4461 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4462 ip3dbg(("ip_newroute_v6: starting new resolution " 4463 "with first_mp %p, tag %d\n", 4464 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4465 4466 /* 4467 * We check if there are trailing unresolved routes for 4468 * the destination contained in sire. 4469 */ 4470 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4471 &sire, multirt_flags, MBLK_GETLABEL(mp), ipst); 4472 4473 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4474 "ire %p, sire %p\n", 4475 multirt_is_resolvable, (void *)ire, (void *)sire)); 4476 4477 if (!multirt_is_resolvable) { 4478 /* 4479 * No more multirt routes to resolve; give up 4480 * (all routes resolved or no more resolvable 4481 * routes). 4482 */ 4483 if (ire != NULL) { 4484 ire_refrele(ire); 4485 ire = NULL; 4486 } 4487 } else { 4488 ASSERT(sire != NULL); 4489 ASSERT(ire != NULL); 4490 /* 4491 * We simply use first_sire as a flag that 4492 * indicates if a resolvable multirt route has 4493 * already been found during the preceding 4494 * loops. If it is not the case, we may have 4495 * to send an ICMP error to report that the 4496 * destination is unreachable. We do not 4497 * IRE_REFHOLD first_sire. 4498 */ 4499 if (first_sire == NULL) { 4500 first_sire = sire; 4501 } 4502 } 4503 } 4504 if ((ire == NULL) || (ire == sire)) { 4505 /* 4506 * either ire == NULL (the destination cannot be 4507 * resolved) or ire == sire (the gateway cannot be 4508 * resolved). At this point, there are no more routes 4509 * to resolve for the destination, thus we exit. 4510 */ 4511 if (ip_debug > 3) { 4512 /* ip2dbg */ 4513 pr_addr_dbg("ip_newroute_v6: " 4514 "can't resolve %s\n", AF_INET6, v6dstp); 4515 } 4516 ip3dbg(("ip_newroute_v6: " 4517 "ire %p, sire %p, first_sire %p\n", 4518 (void *)ire, (void *)sire, (void *)first_sire)); 4519 4520 if (sire != NULL) { 4521 ire_refrele(sire); 4522 sire = NULL; 4523 } 4524 4525 if (first_sire != NULL) { 4526 /* 4527 * At least one multirt route has been found 4528 * in the same ip_newroute() call; there is no 4529 * need to report an ICMP error. 4530 * first_sire was not IRE_REFHOLDed. 4531 */ 4532 MULTIRT_DEBUG_UNTAG(first_mp); 4533 freemsg(first_mp); 4534 return; 4535 } 4536 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4537 RTA_DST, ipst); 4538 goto icmp_err_ret; 4539 } 4540 4541 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4542 4543 /* 4544 * Verify that the returned IRE does not have either the 4545 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4546 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4547 */ 4548 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4549 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4550 goto icmp_err_ret; 4551 4552 /* 4553 * Increment the ire_ob_pkt_count field for ire if it is an 4554 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4555 * increment the same for the parent IRE, sire, if it is some 4556 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4557 */ 4558 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4559 UPDATE_OB_PKT_COUNT(ire); 4560 ire->ire_last_used_time = lbolt; 4561 } 4562 4563 if (sire != NULL) { 4564 mutex_enter(&sire->ire_lock); 4565 v6gw = sire->ire_gateway_addr_v6; 4566 mutex_exit(&sire->ire_lock); 4567 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4568 IRE_INTERFACE)) == 0); 4569 UPDATE_OB_PKT_COUNT(sire); 4570 sire->ire_last_used_time = lbolt; 4571 } else { 4572 v6gw = ipv6_all_zeros; 4573 } 4574 4575 /* 4576 * We have a route to reach the destination. 4577 * 4578 * 1) If the interface is part of ill group, try to get a new 4579 * ill taking load spreading into account. 4580 * 4581 * 2) After selecting the ill, get a source address that might 4582 * create good inbound load spreading and that matches the 4583 * right scope. ipif_select_source_v6 does this for us. 4584 * 4585 * If the application specified the ill (ifindex), we still 4586 * load spread. Only if the packets needs to go out specifically 4587 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4588 * IPV6_BOUND_PIF we don't try to use a different ill for load 4589 * spreading. 4590 */ 4591 if (!do_attach_ill) { 4592 /* 4593 * If the interface belongs to an interface group, 4594 * make sure the next possible interface in the group 4595 * is used. This encourages load spreading among 4596 * peers in an interface group. However, in the case 4597 * of multirouting, load spreading is not used, as we 4598 * actually want to replicate outgoing packets through 4599 * particular interfaces. 4600 * 4601 * Note: While we pick a dst_ill we are really only 4602 * interested in the ill for load spreading. 4603 * The source ipif is determined by source address 4604 * selection below. 4605 */ 4606 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4607 dst_ill = ire->ire_ipif->ipif_ill; 4608 /* For uniformity do a refhold */ 4609 ill_refhold(dst_ill); 4610 } else { 4611 /* 4612 * If we are here trying to create an IRE_CACHE 4613 * for an offlink destination and have the 4614 * IRE_CACHE for the next hop and the latter is 4615 * using virtual IP source address selection i.e 4616 * it's ire->ire_ipif is pointing to a virtual 4617 * network interface (vni) then 4618 * ip_newroute_get_dst_ll() will return the vni 4619 * interface as the dst_ill. Since the vni is 4620 * virtual i.e not associated with any physical 4621 * interface, it cannot be the dst_ill, hence 4622 * in such a case call ip_newroute_get_dst_ll() 4623 * with the stq_ill instead of the ire_ipif ILL. 4624 * The function returns a refheld ill. 4625 */ 4626 if ((ire->ire_type == IRE_CACHE) && 4627 IS_VNI(ire->ire_ipif->ipif_ill)) 4628 dst_ill = ip_newroute_get_dst_ill_v6( 4629 ire->ire_stq->q_ptr); 4630 else 4631 dst_ill = ip_newroute_get_dst_ill_v6( 4632 ire->ire_ipif->ipif_ill); 4633 } 4634 if (dst_ill == NULL) { 4635 if (ip_debug > 2) { 4636 pr_addr_dbg("ip_newroute_v6 : no dst " 4637 "ill for dst %s\n", 4638 AF_INET6, v6dstp); 4639 } 4640 goto icmp_err_ret; 4641 } else if (dst_ill->ill_group == NULL && ill != NULL && 4642 dst_ill != ill) { 4643 /* 4644 * If "ill" is not part of any group, we should 4645 * have found a route matching "ill" as we 4646 * called ire_ftable_lookup_v6 with 4647 * MATCH_IRE_ILL_GROUP. 4648 * Rather than asserting when there is a 4649 * mismatch, we just drop the packet. 4650 */ 4651 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4652 "dst_ill %s ill %s\n", 4653 dst_ill->ill_name, 4654 ill->ill_name)); 4655 goto icmp_err_ret; 4656 } 4657 } else { 4658 dst_ill = ire->ire_ipif->ipif_ill; 4659 /* For uniformity do refhold */ 4660 ill_refhold(dst_ill); 4661 /* 4662 * We should have found a route matching ill as we 4663 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4664 * Rather than asserting, while there is a mismatch, 4665 * we just drop the packet. 4666 */ 4667 if (dst_ill != ill) { 4668 ip0dbg(("ip_newroute_v6: Packet dropped as " 4669 "IP6I_ATTACH_IF ill is %s, " 4670 "ire->ire_ipif->ipif_ill is %s\n", 4671 ill->ill_name, 4672 dst_ill->ill_name)); 4673 goto icmp_err_ret; 4674 } 4675 } 4676 /* 4677 * Pick a source address which matches the scope of the 4678 * destination address. 4679 * For RTF_SETSRC routes, the source address is imposed by the 4680 * parent ire (sire). 4681 */ 4682 ASSERT(src_ipif == NULL); 4683 if (ire->ire_type == IRE_IF_RESOLVER && 4684 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4685 ip6_asp_can_lookup(ipst)) { 4686 /* 4687 * The ire cache entry we're adding is for the 4688 * gateway itself. The source address in this case 4689 * is relative to the gateway's address. 4690 */ 4691 ip6_asp_table_held = B_TRUE; 4692 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4693 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4694 if (src_ipif != NULL) 4695 ire_marks |= IRE_MARK_USESRC_CHECK; 4696 } else { 4697 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4698 /* 4699 * Check that the ipif matching the requested 4700 * source address still exists. 4701 */ 4702 src_ipif = ipif_lookup_addr_v6( 4703 &sire->ire_src_addr_v6, NULL, zoneid, 4704 NULL, NULL, NULL, NULL, ipst); 4705 } 4706 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4707 uint_t restrict_ill = RESTRICT_TO_NONE; 4708 4709 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4710 & IP6I_ATTACH_IF) 4711 restrict_ill = RESTRICT_TO_ILL; 4712 ip6_asp_table_held = B_TRUE; 4713 src_ipif = ipif_select_source_v6(dst_ill, 4714 v6dstp, restrict_ill, 4715 IPV6_PREFER_SRC_DEFAULT, zoneid); 4716 if (src_ipif != NULL) 4717 ire_marks |= IRE_MARK_USESRC_CHECK; 4718 } 4719 } 4720 4721 if (src_ipif == NULL) { 4722 if (ip_debug > 2) { 4723 /* ip1dbg */ 4724 pr_addr_dbg("ip_newroute_v6: no src for " 4725 "dst %s\n, ", AF_INET6, v6dstp); 4726 printf("ip_newroute_v6: interface name %s\n", 4727 dst_ill->ill_name); 4728 } 4729 goto icmp_err_ret; 4730 } 4731 4732 if (ip_debug > 3) { 4733 /* ip2dbg */ 4734 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4735 AF_INET6, &v6gw); 4736 } 4737 ip2dbg(("\tire type %s (%d)\n", 4738 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4739 4740 /* 4741 * At this point in ip_newroute_v6(), ire is either the 4742 * IRE_CACHE of the next-hop gateway for an off-subnet 4743 * destination or an IRE_INTERFACE type that should be used 4744 * to resolve an on-subnet destination or an on-subnet 4745 * next-hop gateway. 4746 * 4747 * In the IRE_CACHE case, we have the following : 4748 * 4749 * 1) src_ipif - used for getting a source address. 4750 * 4751 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4752 * means packets using this IRE_CACHE will go out on dst_ill. 4753 * 4754 * 3) The IRE sire will point to the prefix that is the longest 4755 * matching route for the destination. These prefix types 4756 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4757 * 4758 * The newly created IRE_CACHE entry for the off-subnet 4759 * destination is tied to both the prefix route and the 4760 * interface route used to resolve the next-hop gateway 4761 * via the ire_phandle and ire_ihandle fields, respectively. 4762 * 4763 * In the IRE_INTERFACE case, we have the following : 4764 * 4765 * 1) src_ipif - used for getting a source address. 4766 * 4767 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4768 * means packets using the IRE_CACHE that we will build 4769 * here will go out on dst_ill. 4770 * 4771 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4772 * to be created will only be tied to the IRE_INTERFACE that 4773 * was derived from the ire_ihandle field. 4774 * 4775 * If sire is non-NULL, it means the destination is off-link 4776 * and we will first create the IRE_CACHE for the gateway. 4777 * Next time through ip_newroute_v6, we will create the 4778 * IRE_CACHE for the final destination as described above. 4779 */ 4780 save_ire = ire; 4781 switch (ire->ire_type) { 4782 case IRE_CACHE: { 4783 ire_t *ipif_ire; 4784 4785 ASSERT(sire != NULL); 4786 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4787 mutex_enter(&ire->ire_lock); 4788 v6gw = ire->ire_gateway_addr_v6; 4789 mutex_exit(&ire->ire_lock); 4790 } 4791 /* 4792 * We need 3 ire's to create a new cache ire for an 4793 * off-link destination from the cache ire of the 4794 * gateway. 4795 * 4796 * 1. The prefix ire 'sire' 4797 * 2. The cache ire of the gateway 'ire' 4798 * 3. The interface ire 'ipif_ire' 4799 * 4800 * We have (1) and (2). We lookup (3) below. 4801 * 4802 * If there is no interface route to the gateway, 4803 * it is a race condition, where we found the cache 4804 * but the inteface route has been deleted. 4805 */ 4806 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4807 if (ipif_ire == NULL) { 4808 ip1dbg(("ip_newroute_v6:" 4809 "ire_ihandle_lookup_offlink_v6 failed\n")); 4810 goto icmp_err_ret; 4811 } 4812 /* 4813 * Assume DL_UNITDATA_REQ is same for all physical 4814 * interfaces in the ifgrp. If it isn't, this code will 4815 * have to be seriously rewhacked to allow the 4816 * fastpath probing (such that I cache the link 4817 * header in the IRE_CACHE) to work over ifgrps. 4818 * We have what we need to build an IRE_CACHE. 4819 */ 4820 /* 4821 * Note: the new ire inherits RTF_SETSRC 4822 * and RTF_MULTIRT to propagate these flags from prefix 4823 * to cache. 4824 */ 4825 4826 /* 4827 * Check cached gateway IRE for any security 4828 * attributes; if found, associate the gateway 4829 * credentials group to the destination IRE. 4830 */ 4831 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4832 mutex_enter(&attrp->igsa_lock); 4833 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4834 GCGRP_REFHOLD(gcgrp); 4835 mutex_exit(&attrp->igsa_lock); 4836 } 4837 4838 ire = ire_create_v6( 4839 v6dstp, /* dest address */ 4840 &ipv6_all_ones, /* mask */ 4841 &src_ipif->ipif_v6src_addr, /* source address */ 4842 &v6gw, /* gateway address */ 4843 &save_ire->ire_max_frag, 4844 NULL, /* src nce */ 4845 dst_ill->ill_rq, /* recv-from queue */ 4846 dst_ill->ill_wq, /* send-to queue */ 4847 IRE_CACHE, 4848 src_ipif, 4849 &sire->ire_mask_v6, /* Parent mask */ 4850 sire->ire_phandle, /* Parent handle */ 4851 ipif_ire->ire_ihandle, /* Interface handle */ 4852 sire->ire_flags & /* flags if any */ 4853 (RTF_SETSRC | RTF_MULTIRT), 4854 &(sire->ire_uinfo), 4855 NULL, 4856 gcgrp, 4857 ipst); 4858 4859 if (ire == NULL) { 4860 if (gcgrp != NULL) { 4861 GCGRP_REFRELE(gcgrp); 4862 gcgrp = NULL; 4863 } 4864 ire_refrele(save_ire); 4865 ire_refrele(ipif_ire); 4866 break; 4867 } 4868 4869 /* reference now held by IRE */ 4870 gcgrp = NULL; 4871 4872 ire->ire_marks |= ire_marks; 4873 4874 /* 4875 * Prevent sire and ipif_ire from getting deleted. The 4876 * newly created ire is tied to both of them via the 4877 * phandle and ihandle respectively. 4878 */ 4879 IRB_REFHOLD(sire->ire_bucket); 4880 /* Has it been removed already ? */ 4881 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4882 IRB_REFRELE(sire->ire_bucket); 4883 ire_refrele(ipif_ire); 4884 ire_refrele(save_ire); 4885 break; 4886 } 4887 4888 IRB_REFHOLD(ipif_ire->ire_bucket); 4889 /* Has it been removed already ? */ 4890 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4891 IRB_REFRELE(ipif_ire->ire_bucket); 4892 IRB_REFRELE(sire->ire_bucket); 4893 ire_refrele(ipif_ire); 4894 ire_refrele(save_ire); 4895 break; 4896 } 4897 4898 xmit_mp = first_mp; 4899 if (ire->ire_flags & RTF_MULTIRT) { 4900 copy_mp = copymsg(first_mp); 4901 if (copy_mp != NULL) { 4902 xmit_mp = copy_mp; 4903 MULTIRT_DEBUG_TAG(first_mp); 4904 } 4905 } 4906 ire_add_then_send(q, ire, xmit_mp); 4907 if (ip6_asp_table_held) { 4908 ip6_asp_table_refrele(ipst); 4909 ip6_asp_table_held = B_FALSE; 4910 } 4911 ire_refrele(save_ire); 4912 4913 /* Assert that sire is not deleted yet. */ 4914 ASSERT(sire->ire_ptpn != NULL); 4915 IRB_REFRELE(sire->ire_bucket); 4916 4917 /* Assert that ipif_ire is not deleted yet. */ 4918 ASSERT(ipif_ire->ire_ptpn != NULL); 4919 IRB_REFRELE(ipif_ire->ire_bucket); 4920 ire_refrele(ipif_ire); 4921 4922 if (copy_mp != NULL) { 4923 /* 4924 * Search for the next unresolved 4925 * multirt route. 4926 */ 4927 copy_mp = NULL; 4928 ipif_ire = NULL; 4929 ire = NULL; 4930 /* re-enter the loop */ 4931 multirt_resolve_next = B_TRUE; 4932 continue; 4933 } 4934 ire_refrele(sire); 4935 ill_refrele(dst_ill); 4936 ipif_refrele(src_ipif); 4937 return; 4938 } 4939 case IRE_IF_NORESOLVER: 4940 /* 4941 * We have what we need to build an IRE_CACHE. 4942 * 4943 * handle the Gated case, where we create 4944 * a NORESOLVER route for loopback. 4945 */ 4946 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4947 break; 4948 /* 4949 * TSol note: We are creating the ire cache for the 4950 * destination 'dst'. If 'dst' is offlink, going 4951 * through the first hop 'gw', the security attributes 4952 * of 'dst' must be set to point to the gateway 4953 * credentials of gateway 'gw'. If 'dst' is onlink, it 4954 * is possible that 'dst' is a potential gateway that is 4955 * referenced by some route that has some security 4956 * attributes. Thus in the former case, we need to do a 4957 * gcgrp_lookup of 'gw' while in the latter case we 4958 * need to do gcgrp_lookup of 'dst' itself. 4959 */ 4960 ga.ga_af = AF_INET6; 4961 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4962 ga.ga_addr = v6gw; 4963 else 4964 ga.ga_addr = *v6dstp; 4965 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4966 4967 /* 4968 * Note: the new ire inherits sire flags RTF_SETSRC 4969 * and RTF_MULTIRT to propagate those rules from prefix 4970 * to cache. 4971 */ 4972 ire = ire_create_v6( 4973 v6dstp, /* dest address */ 4974 &ipv6_all_ones, /* mask */ 4975 &src_ipif->ipif_v6src_addr, /* source address */ 4976 &v6gw, /* gateway address */ 4977 &save_ire->ire_max_frag, 4978 NULL, /* no src nce */ 4979 dst_ill->ill_rq, /* recv-from queue */ 4980 dst_ill->ill_wq, /* send-to queue */ 4981 IRE_CACHE, 4982 src_ipif, 4983 &save_ire->ire_mask_v6, /* Parent mask */ 4984 (sire != NULL) ? /* Parent handle */ 4985 sire->ire_phandle : 0, 4986 save_ire->ire_ihandle, /* Interface handle */ 4987 (sire != NULL) ? /* flags if any */ 4988 sire->ire_flags & 4989 (RTF_SETSRC | RTF_MULTIRT) : 0, 4990 &(save_ire->ire_uinfo), 4991 NULL, 4992 gcgrp, 4993 ipst); 4994 4995 if (ire == NULL) { 4996 if (gcgrp != NULL) { 4997 GCGRP_REFRELE(gcgrp); 4998 gcgrp = NULL; 4999 } 5000 ire_refrele(save_ire); 5001 break; 5002 } 5003 5004 /* reference now held by IRE */ 5005 gcgrp = NULL; 5006 5007 ire->ire_marks |= ire_marks; 5008 5009 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5010 dst = v6gw; 5011 else 5012 dst = *v6dstp; 5013 err = ndp_noresolver(dst_ill, &dst); 5014 if (err != 0) { 5015 ire_refrele(save_ire); 5016 break; 5017 } 5018 5019 /* Prevent save_ire from getting deleted */ 5020 IRB_REFHOLD(save_ire->ire_bucket); 5021 /* Has it been removed already ? */ 5022 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5023 IRB_REFRELE(save_ire->ire_bucket); 5024 ire_refrele(save_ire); 5025 break; 5026 } 5027 5028 xmit_mp = first_mp; 5029 /* 5030 * In case of MULTIRT, a copy of the current packet 5031 * to send is made to further re-enter the 5032 * loop and attempt another route resolution 5033 */ 5034 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5035 copy_mp = copymsg(first_mp); 5036 if (copy_mp != NULL) { 5037 xmit_mp = copy_mp; 5038 MULTIRT_DEBUG_TAG(first_mp); 5039 } 5040 } 5041 ire_add_then_send(q, ire, xmit_mp); 5042 if (ip6_asp_table_held) { 5043 ip6_asp_table_refrele(ipst); 5044 ip6_asp_table_held = B_FALSE; 5045 } 5046 5047 /* Assert that it is not deleted yet. */ 5048 ASSERT(save_ire->ire_ptpn != NULL); 5049 IRB_REFRELE(save_ire->ire_bucket); 5050 ire_refrele(save_ire); 5051 5052 if (copy_mp != NULL) { 5053 /* 5054 * If we found a (no)resolver, we ignore any 5055 * trailing top priority IRE_CACHE in 5056 * further loops. This ensures that we do not 5057 * omit any (no)resolver despite the priority 5058 * in this call. 5059 * IRE_CACHE, if any, will be processed 5060 * by another thread entering ip_newroute(), 5061 * (on resolver response, for example). 5062 * We use this to force multiple parallel 5063 * resolution as soon as a packet needs to be 5064 * sent. The result is, after one packet 5065 * emission all reachable routes are generally 5066 * resolved. 5067 * Otherwise, complete resolution of MULTIRT 5068 * routes would require several emissions as 5069 * side effect. 5070 */ 5071 multirt_flags &= ~MULTIRT_CACHEGW; 5072 5073 /* 5074 * Search for the next unresolved multirt 5075 * route. 5076 */ 5077 copy_mp = NULL; 5078 save_ire = NULL; 5079 ire = NULL; 5080 /* re-enter the loop */ 5081 multirt_resolve_next = B_TRUE; 5082 continue; 5083 } 5084 5085 /* Don't need sire anymore */ 5086 if (sire != NULL) 5087 ire_refrele(sire); 5088 ill_refrele(dst_ill); 5089 ipif_refrele(src_ipif); 5090 return; 5091 5092 case IRE_IF_RESOLVER: 5093 /* 5094 * We can't build an IRE_CACHE yet, but at least we 5095 * found a resolver that can help. 5096 */ 5097 dst = *v6dstp; 5098 5099 /* 5100 * To be at this point in the code with a non-zero gw 5101 * means that dst is reachable through a gateway that 5102 * we have never resolved. By changing dst to the gw 5103 * addr we resolve the gateway first. When 5104 * ire_add_then_send() tries to put the IP dg to dst, 5105 * it will reenter ip_newroute() at which time we will 5106 * find the IRE_CACHE for the gw and create another 5107 * IRE_CACHE above (for dst itself). 5108 */ 5109 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5110 save_dst = dst; 5111 dst = v6gw; 5112 v6gw = ipv6_all_zeros; 5113 } 5114 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5115 /* 5116 * Ask the external resolver to do its thing. 5117 * Make an mblk chain in the following form: 5118 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5119 */ 5120 mblk_t *ire_mp; 5121 mblk_t *areq_mp; 5122 areq_t *areq; 5123 in6_addr_t *addrp; 5124 5125 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5126 if (ip6_asp_table_held) { 5127 ip6_asp_table_refrele(ipst); 5128 ip6_asp_table_held = B_FALSE; 5129 } 5130 ire = ire_create_mp_v6( 5131 &dst, /* dest address */ 5132 &ipv6_all_ones, /* mask */ 5133 &src_ipif->ipif_v6src_addr, 5134 /* source address */ 5135 &v6gw, /* gateway address */ 5136 NULL, /* no src nce */ 5137 dst_ill->ill_rq, /* recv-from queue */ 5138 dst_ill->ill_wq, /* send-to queue */ 5139 IRE_CACHE, 5140 src_ipif, 5141 &save_ire->ire_mask_v6, /* Parent mask */ 5142 0, 5143 save_ire->ire_ihandle, 5144 /* Interface handle */ 5145 0, /* flags if any */ 5146 &(save_ire->ire_uinfo), 5147 NULL, 5148 NULL, 5149 ipst); 5150 5151 ire_refrele(save_ire); 5152 if (ire == NULL) { 5153 ip1dbg(("ip_newroute_v6:" 5154 "ire is NULL\n")); 5155 break; 5156 } 5157 5158 if ((sire != NULL) && 5159 (sire->ire_flags & RTF_MULTIRT)) { 5160 /* 5161 * processing a copy of the packet to 5162 * send for further resolution loops 5163 */ 5164 copy_mp = copymsg(first_mp); 5165 if (copy_mp != NULL) 5166 MULTIRT_DEBUG_TAG(copy_mp); 5167 } 5168 ire->ire_marks |= ire_marks; 5169 ire_mp = ire->ire_mp; 5170 /* 5171 * Now create or find an nce for this interface. 5172 * The hw addr will need to to be set from 5173 * the reply to the AR_ENTRY_QUERY that 5174 * we're about to send. This will be done in 5175 * ire_add_v6(). 5176 */ 5177 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5178 switch (err) { 5179 case 0: 5180 /* 5181 * New cache entry created. 5182 * Break, then ask the external 5183 * resolver. 5184 */ 5185 break; 5186 case EINPROGRESS: 5187 /* 5188 * Resolution in progress; 5189 * packet has been queued by 5190 * ndp_resolver(). 5191 */ 5192 ire_delete(ire); 5193 ire = NULL; 5194 /* 5195 * Check if another multirt 5196 * route must be resolved. 5197 */ 5198 if (copy_mp != NULL) { 5199 /* 5200 * If we found a resolver, we 5201 * ignore any trailing top 5202 * priority IRE_CACHE in 5203 * further loops. The reason is 5204 * the same as for noresolver. 5205 */ 5206 multirt_flags &= 5207 ~MULTIRT_CACHEGW; 5208 /* 5209 * Search for the next 5210 * unresolved multirt route. 5211 */ 5212 first_mp = copy_mp; 5213 copy_mp = NULL; 5214 mp = first_mp; 5215 if (mp->b_datap->db_type == 5216 M_CTL) { 5217 mp = mp->b_cont; 5218 } 5219 ASSERT(sire != NULL); 5220 dst = save_dst; 5221 /* 5222 * re-enter the loop 5223 */ 5224 multirt_resolve_next = 5225 B_TRUE; 5226 continue; 5227 } 5228 5229 if (sire != NULL) 5230 ire_refrele(sire); 5231 ill_refrele(dst_ill); 5232 ipif_refrele(src_ipif); 5233 return; 5234 default: 5235 /* 5236 * Transient error; packet will be 5237 * freed. 5238 */ 5239 ire_delete(ire); 5240 ire = NULL; 5241 break; 5242 } 5243 if (err != 0) 5244 break; 5245 /* 5246 * Now set up the AR_ENTRY_QUERY and send it. 5247 */ 5248 areq_mp = ill_arp_alloc(dst_ill, 5249 (uchar_t *)&ipv6_areq_template, 5250 (caddr_t)&dst); 5251 if (areq_mp == NULL) { 5252 ip1dbg(("ip_newroute_v6:" 5253 "areq_mp is NULL\n")); 5254 freemsg(ire_mp); 5255 break; 5256 } 5257 areq = (areq_t *)areq_mp->b_rptr; 5258 addrp = (in6_addr_t *)((char *)areq + 5259 areq->areq_target_addr_offset); 5260 *addrp = dst; 5261 addrp = (in6_addr_t *)((char *)areq + 5262 areq->areq_sender_addr_offset); 5263 *addrp = src_ipif->ipif_v6src_addr; 5264 /* 5265 * link the chain, then send up to the resolver. 5266 */ 5267 linkb(areq_mp, ire_mp); 5268 linkb(areq_mp, mp); 5269 ip1dbg(("ip_newroute_v6:" 5270 "putnext to resolver\n")); 5271 putnext(dst_ill->ill_rq, areq_mp); 5272 /* 5273 * Check if another multirt route 5274 * must be resolved. 5275 */ 5276 ire = NULL; 5277 if (copy_mp != NULL) { 5278 /* 5279 * If we find a resolver, we ignore any 5280 * trailing top priority IRE_CACHE in 5281 * further loops. The reason is the 5282 * same as for noresolver. 5283 */ 5284 multirt_flags &= ~MULTIRT_CACHEGW; 5285 /* 5286 * Search for the next unresolved 5287 * multirt route. 5288 */ 5289 first_mp = copy_mp; 5290 copy_mp = NULL; 5291 mp = first_mp; 5292 if (mp->b_datap->db_type == M_CTL) { 5293 mp = mp->b_cont; 5294 } 5295 ASSERT(sire != NULL); 5296 dst = save_dst; 5297 /* 5298 * re-enter the loop 5299 */ 5300 multirt_resolve_next = B_TRUE; 5301 continue; 5302 } 5303 5304 if (sire != NULL) 5305 ire_refrele(sire); 5306 ill_refrele(dst_ill); 5307 ipif_refrele(src_ipif); 5308 return; 5309 } 5310 /* 5311 * Non-external resolver case. 5312 * 5313 * TSol note: Please see the note above the 5314 * IRE_IF_NORESOLVER case. 5315 */ 5316 ga.ga_af = AF_INET6; 5317 ga.ga_addr = dst; 5318 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5319 5320 ire = ire_create_v6( 5321 &dst, /* dest address */ 5322 &ipv6_all_ones, /* mask */ 5323 &src_ipif->ipif_v6src_addr, /* source address */ 5324 &v6gw, /* gateway address */ 5325 &save_ire->ire_max_frag, 5326 NULL, /* no src nce */ 5327 dst_ill->ill_rq, /* recv-from queue */ 5328 dst_ill->ill_wq, /* send-to queue */ 5329 IRE_CACHE, 5330 src_ipif, 5331 &save_ire->ire_mask_v6, /* Parent mask */ 5332 0, 5333 save_ire->ire_ihandle, /* Interface handle */ 5334 0, /* flags if any */ 5335 &(save_ire->ire_uinfo), 5336 NULL, 5337 gcgrp, 5338 ipst); 5339 5340 if (ire == NULL) { 5341 if (gcgrp != NULL) { 5342 GCGRP_REFRELE(gcgrp); 5343 gcgrp = NULL; 5344 } 5345 ire_refrele(save_ire); 5346 break; 5347 } 5348 5349 /* reference now held by IRE */ 5350 gcgrp = NULL; 5351 5352 if ((sire != NULL) && 5353 (sire->ire_flags & RTF_MULTIRT)) { 5354 copy_mp = copymsg(first_mp); 5355 if (copy_mp != NULL) 5356 MULTIRT_DEBUG_TAG(copy_mp); 5357 } 5358 5359 ire->ire_marks |= ire_marks; 5360 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5361 switch (err) { 5362 case 0: 5363 /* Prevent save_ire from getting deleted */ 5364 IRB_REFHOLD(save_ire->ire_bucket); 5365 /* Has it been removed already ? */ 5366 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5367 IRB_REFRELE(save_ire->ire_bucket); 5368 ire_refrele(save_ire); 5369 break; 5370 } 5371 5372 /* 5373 * We have a resolved cache entry, 5374 * add in the IRE. 5375 */ 5376 ire_add_then_send(q, ire, first_mp); 5377 if (ip6_asp_table_held) { 5378 ip6_asp_table_refrele(ipst); 5379 ip6_asp_table_held = B_FALSE; 5380 } 5381 5382 /* Assert that it is not deleted yet. */ 5383 ASSERT(save_ire->ire_ptpn != NULL); 5384 IRB_REFRELE(save_ire->ire_bucket); 5385 ire_refrele(save_ire); 5386 /* 5387 * Check if another multirt route 5388 * must be resolved. 5389 */ 5390 ire = NULL; 5391 if (copy_mp != NULL) { 5392 /* 5393 * If we find a resolver, we ignore any 5394 * trailing top priority IRE_CACHE in 5395 * further loops. The reason is the 5396 * same as for noresolver. 5397 */ 5398 multirt_flags &= ~MULTIRT_CACHEGW; 5399 /* 5400 * Search for the next unresolved 5401 * multirt route. 5402 */ 5403 first_mp = copy_mp; 5404 copy_mp = NULL; 5405 mp = first_mp; 5406 if (mp->b_datap->db_type == M_CTL) { 5407 mp = mp->b_cont; 5408 } 5409 ASSERT(sire != NULL); 5410 dst = save_dst; 5411 /* 5412 * re-enter the loop 5413 */ 5414 multirt_resolve_next = B_TRUE; 5415 continue; 5416 } 5417 5418 if (sire != NULL) 5419 ire_refrele(sire); 5420 ill_refrele(dst_ill); 5421 ipif_refrele(src_ipif); 5422 return; 5423 5424 case EINPROGRESS: 5425 /* 5426 * mp was consumed - presumably queued. 5427 * No need for ire, presumably resolution is 5428 * in progress, and ire will be added when the 5429 * address is resolved. 5430 */ 5431 if (ip6_asp_table_held) { 5432 ip6_asp_table_refrele(ipst); 5433 ip6_asp_table_held = B_FALSE; 5434 } 5435 ASSERT(ire->ire_nce == NULL); 5436 ire_delete(ire); 5437 ire_refrele(save_ire); 5438 /* 5439 * Check if another multirt route 5440 * must be resolved. 5441 */ 5442 ire = NULL; 5443 if (copy_mp != NULL) { 5444 /* 5445 * If we find a resolver, we ignore any 5446 * trailing top priority IRE_CACHE in 5447 * further loops. The reason is the 5448 * same as for noresolver. 5449 */ 5450 multirt_flags &= ~MULTIRT_CACHEGW; 5451 /* 5452 * Search for the next unresolved 5453 * multirt route. 5454 */ 5455 first_mp = copy_mp; 5456 copy_mp = NULL; 5457 mp = first_mp; 5458 if (mp->b_datap->db_type == M_CTL) { 5459 mp = mp->b_cont; 5460 } 5461 ASSERT(sire != NULL); 5462 dst = save_dst; 5463 /* 5464 * re-enter the loop 5465 */ 5466 multirt_resolve_next = B_TRUE; 5467 continue; 5468 } 5469 if (sire != NULL) 5470 ire_refrele(sire); 5471 ill_refrele(dst_ill); 5472 ipif_refrele(src_ipif); 5473 return; 5474 default: 5475 /* Some transient error */ 5476 ASSERT(ire->ire_nce == NULL); 5477 ire_refrele(save_ire); 5478 break; 5479 } 5480 break; 5481 default: 5482 break; 5483 } 5484 if (ip6_asp_table_held) { 5485 ip6_asp_table_refrele(ipst); 5486 ip6_asp_table_held = B_FALSE; 5487 } 5488 } while (multirt_resolve_next); 5489 5490 err_ret: 5491 ip1dbg(("ip_newroute_v6: dropped\n")); 5492 if (src_ipif != NULL) 5493 ipif_refrele(src_ipif); 5494 if (dst_ill != NULL) { 5495 need_rele = B_TRUE; 5496 ill = dst_ill; 5497 } 5498 if (ill != NULL) { 5499 if (mp->b_prev != NULL) { 5500 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5501 } else { 5502 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5503 } 5504 5505 if (need_rele) 5506 ill_refrele(ill); 5507 } else { 5508 if (mp->b_prev != NULL) { 5509 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5510 } else { 5511 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5512 } 5513 } 5514 /* Did this packet originate externally? */ 5515 if (mp->b_prev) { 5516 mp->b_next = NULL; 5517 mp->b_prev = NULL; 5518 } 5519 if (copy_mp != NULL) { 5520 MULTIRT_DEBUG_UNTAG(copy_mp); 5521 freemsg(copy_mp); 5522 } 5523 MULTIRT_DEBUG_UNTAG(first_mp); 5524 freemsg(first_mp); 5525 if (ire != NULL) 5526 ire_refrele(ire); 5527 if (sire != NULL) 5528 ire_refrele(sire); 5529 return; 5530 5531 icmp_err_ret: 5532 if (ip6_asp_table_held) 5533 ip6_asp_table_refrele(ipst); 5534 if (src_ipif != NULL) 5535 ipif_refrele(src_ipif); 5536 if (dst_ill != NULL) { 5537 need_rele = B_TRUE; 5538 ill = dst_ill; 5539 } 5540 ip1dbg(("ip_newroute_v6: no route\n")); 5541 if (sire != NULL) 5542 ire_refrele(sire); 5543 /* 5544 * We need to set sire to NULL to avoid double freeing if we 5545 * ever goto err_ret from below. 5546 */ 5547 sire = NULL; 5548 ip6h = (ip6_t *)mp->b_rptr; 5549 /* Skip ip6i_t header if present */ 5550 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5551 /* Make sure the IPv6 header is present */ 5552 if ((mp->b_wptr - (uchar_t *)ip6h) < 5553 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5554 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5555 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5556 goto err_ret; 5557 } 5558 } 5559 mp->b_rptr += sizeof (ip6i_t); 5560 ip6h = (ip6_t *)mp->b_rptr; 5561 } 5562 /* Did this packet originate externally? */ 5563 if (mp->b_prev) { 5564 if (ill != NULL) { 5565 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5566 } else { 5567 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5568 } 5569 mp->b_next = NULL; 5570 mp->b_prev = NULL; 5571 q = WR(q); 5572 } else { 5573 if (ill != NULL) { 5574 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5575 } else { 5576 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5577 } 5578 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5579 /* Failed */ 5580 if (copy_mp != NULL) { 5581 MULTIRT_DEBUG_UNTAG(copy_mp); 5582 freemsg(copy_mp); 5583 } 5584 MULTIRT_DEBUG_UNTAG(first_mp); 5585 freemsg(first_mp); 5586 if (ire != NULL) 5587 ire_refrele(ire); 5588 if (need_rele) 5589 ill_refrele(ill); 5590 return; 5591 } 5592 } 5593 5594 if (need_rele) 5595 ill_refrele(ill); 5596 5597 /* 5598 * At this point we will have ire only if RTF_BLACKHOLE 5599 * or RTF_REJECT flags are set on the IRE. It will not 5600 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5601 */ 5602 if (ire != NULL) { 5603 if (ire->ire_flags & RTF_BLACKHOLE) { 5604 ire_refrele(ire); 5605 if (copy_mp != NULL) { 5606 MULTIRT_DEBUG_UNTAG(copy_mp); 5607 freemsg(copy_mp); 5608 } 5609 MULTIRT_DEBUG_UNTAG(first_mp); 5610 freemsg(first_mp); 5611 return; 5612 } 5613 ire_refrele(ire); 5614 } 5615 if (ip_debug > 3) { 5616 /* ip2dbg */ 5617 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5618 AF_INET6, v6dstp); 5619 } 5620 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5621 B_FALSE, B_FALSE, zoneid, ipst); 5622 } 5623 5624 /* 5625 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5626 * we need to send out a packet to a destination address for which we do not 5627 * have specific routing information. It is only used for multicast packets. 5628 * 5629 * If unspec_src we allow creating an IRE with source address zero. 5630 * ire_send_v6() will delete it after the packet is sent. 5631 */ 5632 void 5633 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5634 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5635 { 5636 ire_t *ire = NULL; 5637 ipif_t *src_ipif = NULL; 5638 int err = 0; 5639 ill_t *dst_ill = NULL; 5640 ire_t *save_ire; 5641 ushort_t ire_marks = 0; 5642 ipsec_out_t *io; 5643 ill_t *attach_ill = NULL; 5644 ill_t *ill; 5645 ip6_t *ip6h; 5646 mblk_t *first_mp; 5647 boolean_t ip6i_present; 5648 ire_t *fire = NULL; 5649 mblk_t *copy_mp = NULL; 5650 boolean_t multirt_resolve_next; 5651 in6_addr_t *v6dstp = &v6dst; 5652 boolean_t ipif_held = B_FALSE; 5653 boolean_t ill_held = B_FALSE; 5654 boolean_t ip6_asp_table_held = B_FALSE; 5655 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5656 5657 /* 5658 * This loop is run only once in most cases. 5659 * We loop to resolve further routes only when the destination 5660 * can be reached through multiple RTF_MULTIRT-flagged ires. 5661 */ 5662 do { 5663 multirt_resolve_next = B_FALSE; 5664 if (dst_ill != NULL) { 5665 ill_refrele(dst_ill); 5666 dst_ill = NULL; 5667 } 5668 5669 if (src_ipif != NULL) { 5670 ipif_refrele(src_ipif); 5671 src_ipif = NULL; 5672 } 5673 ASSERT(ipif != NULL); 5674 ill = ipif->ipif_ill; 5675 5676 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5677 if (ip_debug > 2) { 5678 /* ip1dbg */ 5679 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5680 AF_INET6, v6dstp); 5681 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5682 ill->ill_name, ipif->ipif_isv6); 5683 } 5684 5685 first_mp = mp; 5686 if (mp->b_datap->db_type == M_CTL) { 5687 mp = mp->b_cont; 5688 io = (ipsec_out_t *)first_mp->b_rptr; 5689 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5690 } else { 5691 io = NULL; 5692 } 5693 5694 /* 5695 * If the interface is a pt-pt interface we look for an 5696 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5697 * local_address and the pt-pt destination address. 5698 * Otherwise we just match the local address. 5699 */ 5700 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5701 goto err_ret; 5702 } 5703 /* 5704 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5705 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5706 * as it could be NULL. 5707 * 5708 * This information can appear either in an ip6i_t or an 5709 * IPSEC_OUT message. 5710 */ 5711 ip6h = (ip6_t *)mp->b_rptr; 5712 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5713 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5714 if (!ip6i_present || 5715 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5716 attach_ill = ip_grab_attach_ill(ill, first_mp, 5717 (ip6i_present ? 5718 ((ip6i_t *)ip6h)->ip6i_ifindex : 5719 io->ipsec_out_ill_index), B_TRUE, ipst); 5720 /* Failure case frees things for us. */ 5721 if (attach_ill == NULL) 5722 return; 5723 5724 /* 5725 * Check if we need an ire that will not be 5726 * looked up by anybody else i.e. HIDDEN. 5727 */ 5728 if (ill_is_probeonly(attach_ill)) 5729 ire_marks = IRE_MARK_HIDDEN; 5730 } 5731 } 5732 5733 /* 5734 * We check if an IRE_OFFSUBNET for the addr that goes through 5735 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5736 * RTF_MULTIRT flags must be honored. 5737 */ 5738 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5739 ip2dbg(("ip_newroute_ipif_v6: " 5740 "ipif_lookup_multi_ire_v6(" 5741 "ipif %p, dst %08x) = fire %p\n", 5742 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5743 (void *)fire)); 5744 5745 /* 5746 * If the application specified the ill (ifindex), we still 5747 * load spread. Only if the packets needs to go out specifically 5748 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5749 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5750 * multirouting, then we don't try to use a different ill for 5751 * load spreading. 5752 */ 5753 if (attach_ill == NULL) { 5754 /* 5755 * If the interface belongs to an interface group, 5756 * make sure the next possible interface in the group 5757 * is used. This encourages load spreading among peers 5758 * in an interface group. 5759 * 5760 * Note: While we pick a dst_ill we are really only 5761 * interested in the ill for load spreading. The source 5762 * ipif is determined by source address selection below. 5763 */ 5764 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5765 dst_ill = ipif->ipif_ill; 5766 /* For uniformity do a refhold */ 5767 ill_refhold(dst_ill); 5768 } else { 5769 /* refheld by ip_newroute_get_dst_ill_v6 */ 5770 dst_ill = 5771 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5772 } 5773 if (dst_ill == NULL) { 5774 if (ip_debug > 2) { 5775 pr_addr_dbg("ip_newroute_ipif_v6: " 5776 "no dst ill for dst %s\n", 5777 AF_INET6, v6dstp); 5778 } 5779 goto err_ret; 5780 } 5781 } else { 5782 dst_ill = ipif->ipif_ill; 5783 /* 5784 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5785 * and IPV6_BOUND_PIF case. 5786 */ 5787 ASSERT(dst_ill == attach_ill); 5788 /* attach_ill is already refheld */ 5789 } 5790 /* 5791 * Pick a source address which matches the scope of the 5792 * destination address. 5793 * For RTF_SETSRC routes, the source address is imposed by the 5794 * parent ire (fire). 5795 */ 5796 ASSERT(src_ipif == NULL); 5797 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5798 /* 5799 * Check that the ipif matching the requested source 5800 * address still exists. 5801 */ 5802 src_ipif = 5803 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5804 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5805 } 5806 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5807 uint_t restrict_ill = RESTRICT_TO_NONE; 5808 5809 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 5810 & IP6I_ATTACH_IF) 5811 restrict_ill = RESTRICT_TO_ILL; 5812 ip6_asp_table_held = B_TRUE; 5813 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5814 restrict_ill, IPV6_PREFER_SRC_DEFAULT, zoneid); 5815 } 5816 5817 if (src_ipif == NULL) { 5818 if (!unspec_src) { 5819 if (ip_debug > 2) { 5820 /* ip1dbg */ 5821 pr_addr_dbg("ip_newroute_ipif_v6: " 5822 "no src for dst %s\n,", 5823 AF_INET6, v6dstp); 5824 printf(" through interface %s\n", 5825 dst_ill->ill_name); 5826 } 5827 goto err_ret; 5828 } 5829 src_ipif = ipif; 5830 ipif_refhold(src_ipif); 5831 } 5832 ire = ipif_to_ire_v6(ipif); 5833 if (ire == NULL) { 5834 if (ip_debug > 2) { 5835 /* ip1dbg */ 5836 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5837 AF_INET6, &ipif->ipif_v6lcl_addr); 5838 printf("ip_newroute_ipif_v6: " 5839 "if %s\n", dst_ill->ill_name); 5840 } 5841 goto err_ret; 5842 } 5843 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5844 goto err_ret; 5845 5846 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5847 5848 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5849 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5850 if (ip_debug > 2) { 5851 /* ip1dbg */ 5852 pr_addr_dbg(" address %s\n", 5853 AF_INET6, &ire->ire_src_addr_v6); 5854 } 5855 save_ire = ire; 5856 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5857 (void *)ire, (void *)ipif)); 5858 5859 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5860 /* 5861 * an IRE_OFFSUBET was looked up 5862 * on that interface. 5863 * this ire has RTF_MULTIRT flag, 5864 * so the resolution loop 5865 * will be re-entered to resolve 5866 * additional routes on other 5867 * interfaces. For that purpose, 5868 * a copy of the packet is 5869 * made at this point. 5870 */ 5871 fire->ire_last_used_time = lbolt; 5872 copy_mp = copymsg(first_mp); 5873 if (copy_mp) { 5874 MULTIRT_DEBUG_TAG(copy_mp); 5875 } 5876 } 5877 5878 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5879 switch (ire->ire_type) { 5880 case IRE_IF_NORESOLVER: { 5881 /* 5882 * We have what we need to build an IRE_CACHE. 5883 * 5884 * handle the Gated case, where we create 5885 * a NORESOLVER route for loopback. 5886 */ 5887 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5888 break; 5889 /* 5890 * The newly created ire will inherit the flags of the 5891 * parent ire, if any. 5892 */ 5893 ire = ire_create_v6( 5894 v6dstp, /* dest address */ 5895 &ipv6_all_ones, /* mask */ 5896 &src_ipif->ipif_v6src_addr, /* source address */ 5897 NULL, /* gateway address */ 5898 &save_ire->ire_max_frag, 5899 NULL, /* no src nce */ 5900 dst_ill->ill_rq, /* recv-from queue */ 5901 dst_ill->ill_wq, /* send-to queue */ 5902 IRE_CACHE, 5903 src_ipif, 5904 NULL, 5905 (fire != NULL) ? /* Parent handle */ 5906 fire->ire_phandle : 0, 5907 save_ire->ire_ihandle, /* Interface handle */ 5908 (fire != NULL) ? 5909 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5910 0, 5911 &ire_uinfo_null, 5912 NULL, 5913 NULL, 5914 ipst); 5915 5916 if (ire == NULL) { 5917 ire_refrele(save_ire); 5918 break; 5919 } 5920 5921 ire->ire_marks |= ire_marks; 5922 5923 err = ndp_noresolver(dst_ill, v6dstp); 5924 if (err != 0) { 5925 ire_refrele(save_ire); 5926 break; 5927 } 5928 5929 /* Prevent save_ire from getting deleted */ 5930 IRB_REFHOLD(save_ire->ire_bucket); 5931 /* Has it been removed already ? */ 5932 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5933 IRB_REFRELE(save_ire->ire_bucket); 5934 ire_refrele(save_ire); 5935 break; 5936 } 5937 5938 ire_add_then_send(q, ire, first_mp); 5939 if (ip6_asp_table_held) { 5940 ip6_asp_table_refrele(ipst); 5941 ip6_asp_table_held = B_FALSE; 5942 } 5943 5944 /* Assert that it is not deleted yet. */ 5945 ASSERT(save_ire->ire_ptpn != NULL); 5946 IRB_REFRELE(save_ire->ire_bucket); 5947 ire_refrele(save_ire); 5948 if (fire != NULL) { 5949 ire_refrele(fire); 5950 fire = NULL; 5951 } 5952 5953 /* 5954 * The resolution loop is re-entered if we 5955 * actually are in a multirouting case. 5956 */ 5957 if (copy_mp != NULL) { 5958 boolean_t need_resolve = 5959 ire_multirt_need_resolve_v6(v6dstp, 5960 MBLK_GETLABEL(copy_mp), ipst); 5961 if (!need_resolve) { 5962 MULTIRT_DEBUG_UNTAG(copy_mp); 5963 freemsg(copy_mp); 5964 copy_mp = NULL; 5965 } else { 5966 /* 5967 * ipif_lookup_group_v6() calls 5968 * ire_lookup_multi_v6() that uses 5969 * ire_ftable_lookup_v6() to find 5970 * an IRE_INTERFACE for the group. 5971 * In the multirt case, 5972 * ire_lookup_multi_v6() then invokes 5973 * ire_multirt_lookup_v6() to find 5974 * the next resolvable ire. 5975 * As a result, we obtain a new 5976 * interface, derived from the 5977 * next ire. 5978 */ 5979 if (ipif_held) { 5980 ipif_refrele(ipif); 5981 ipif_held = B_FALSE; 5982 } 5983 ipif = ipif_lookup_group_v6(v6dstp, 5984 zoneid, ipst); 5985 ip2dbg(("ip_newroute_ipif: " 5986 "multirt dst %08x, ipif %p\n", 5987 ntohl(V4_PART_OF_V6((*v6dstp))), 5988 (void *)ipif)); 5989 if (ipif != NULL) { 5990 ipif_held = B_TRUE; 5991 mp = copy_mp; 5992 copy_mp = NULL; 5993 multirt_resolve_next = 5994 B_TRUE; 5995 continue; 5996 } else { 5997 freemsg(copy_mp); 5998 } 5999 } 6000 } 6001 ill_refrele(dst_ill); 6002 if (ipif_held) { 6003 ipif_refrele(ipif); 6004 ipif_held = B_FALSE; 6005 } 6006 if (src_ipif != NULL) 6007 ipif_refrele(src_ipif); 6008 return; 6009 } 6010 case IRE_IF_RESOLVER: { 6011 6012 ASSERT(dst_ill->ill_isv6); 6013 6014 /* 6015 * We obtain a partial IRE_CACHE which we will pass 6016 * along with the resolver query. When the response 6017 * comes back it will be there ready for us to add. 6018 */ 6019 /* 6020 * the newly created ire will inherit the flags of the 6021 * parent ire, if any. 6022 */ 6023 ire = ire_create_v6( 6024 v6dstp, /* dest address */ 6025 &ipv6_all_ones, /* mask */ 6026 &src_ipif->ipif_v6src_addr, /* source address */ 6027 NULL, /* gateway address */ 6028 &save_ire->ire_max_frag, 6029 NULL, /* src nce */ 6030 dst_ill->ill_rq, /* recv-from queue */ 6031 dst_ill->ill_wq, /* send-to queue */ 6032 IRE_CACHE, 6033 src_ipif, 6034 NULL, 6035 (fire != NULL) ? /* Parent handle */ 6036 fire->ire_phandle : 0, 6037 save_ire->ire_ihandle, /* Interface handle */ 6038 (fire != NULL) ? 6039 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6040 0, 6041 &ire_uinfo_null, 6042 NULL, 6043 NULL, 6044 ipst); 6045 6046 if (ire == NULL) { 6047 ire_refrele(save_ire); 6048 break; 6049 } 6050 6051 ire->ire_marks |= ire_marks; 6052 6053 /* Resolve and add ire to the ctable */ 6054 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6055 switch (err) { 6056 case 0: 6057 /* Prevent save_ire from getting deleted */ 6058 IRB_REFHOLD(save_ire->ire_bucket); 6059 /* Has it been removed already ? */ 6060 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6061 IRB_REFRELE(save_ire->ire_bucket); 6062 ire_refrele(save_ire); 6063 break; 6064 } 6065 /* 6066 * We have a resolved cache entry, 6067 * add in the IRE. 6068 */ 6069 ire_add_then_send(q, ire, first_mp); 6070 if (ip6_asp_table_held) { 6071 ip6_asp_table_refrele(ipst); 6072 ip6_asp_table_held = B_FALSE; 6073 } 6074 6075 /* Assert that it is not deleted yet. */ 6076 ASSERT(save_ire->ire_ptpn != NULL); 6077 IRB_REFRELE(save_ire->ire_bucket); 6078 ire_refrele(save_ire); 6079 if (fire != NULL) { 6080 ire_refrele(fire); 6081 fire = NULL; 6082 } 6083 6084 /* 6085 * The resolution loop is re-entered if we 6086 * actually are in a multirouting case. 6087 */ 6088 if (copy_mp != NULL) { 6089 boolean_t need_resolve = 6090 ire_multirt_need_resolve_v6(v6dstp, 6091 MBLK_GETLABEL(copy_mp), ipst); 6092 if (!need_resolve) { 6093 MULTIRT_DEBUG_UNTAG(copy_mp); 6094 freemsg(copy_mp); 6095 copy_mp = NULL; 6096 } else { 6097 /* 6098 * ipif_lookup_group_v6() calls 6099 * ire_lookup_multi_v6() that 6100 * uses ire_ftable_lookup_v6() 6101 * to find an IRE_INTERFACE for 6102 * the group. In the multirt 6103 * case, ire_lookup_multi_v6() 6104 * then invokes 6105 * ire_multirt_lookup_v6() to 6106 * find the next resolvable ire. 6107 * As a result, we obtain a new 6108 * interface, derived from the 6109 * next ire. 6110 */ 6111 if (ipif_held) { 6112 ipif_refrele(ipif); 6113 ipif_held = B_FALSE; 6114 } 6115 ipif = ipif_lookup_group_v6( 6116 v6dstp, zoneid, ipst); 6117 ip2dbg(("ip_newroute_ipif: " 6118 "multirt dst %08x, " 6119 "ipif %p\n", 6120 ntohl(V4_PART_OF_V6( 6121 (*v6dstp))), 6122 (void *)ipif)); 6123 if (ipif != NULL) { 6124 ipif_held = B_TRUE; 6125 mp = copy_mp; 6126 copy_mp = NULL; 6127 multirt_resolve_next = 6128 B_TRUE; 6129 continue; 6130 } else { 6131 freemsg(copy_mp); 6132 } 6133 } 6134 } 6135 ill_refrele(dst_ill); 6136 if (ipif_held) { 6137 ipif_refrele(ipif); 6138 ipif_held = B_FALSE; 6139 } 6140 if (src_ipif != NULL) 6141 ipif_refrele(src_ipif); 6142 return; 6143 6144 case EINPROGRESS: 6145 /* 6146 * mp was consumed - presumably queued. 6147 * No need for ire, presumably resolution is 6148 * in progress, and ire will be added when the 6149 * address is resolved. 6150 */ 6151 if (ip6_asp_table_held) { 6152 ip6_asp_table_refrele(ipst); 6153 ip6_asp_table_held = B_FALSE; 6154 } 6155 ire_delete(ire); 6156 ire_refrele(save_ire); 6157 if (fire != NULL) { 6158 ire_refrele(fire); 6159 fire = NULL; 6160 } 6161 6162 /* 6163 * The resolution loop is re-entered if we 6164 * actually are in a multirouting case. 6165 */ 6166 if (copy_mp != NULL) { 6167 boolean_t need_resolve = 6168 ire_multirt_need_resolve_v6(v6dstp, 6169 MBLK_GETLABEL(copy_mp), ipst); 6170 if (!need_resolve) { 6171 MULTIRT_DEBUG_UNTAG(copy_mp); 6172 freemsg(copy_mp); 6173 copy_mp = NULL; 6174 } else { 6175 /* 6176 * ipif_lookup_group_v6() calls 6177 * ire_lookup_multi_v6() that 6178 * uses ire_ftable_lookup_v6() 6179 * to find an IRE_INTERFACE for 6180 * the group. In the multirt 6181 * case, ire_lookup_multi_v6() 6182 * then invokes 6183 * ire_multirt_lookup_v6() to 6184 * find the next resolvable ire. 6185 * As a result, we obtain a new 6186 * interface, derived from the 6187 * next ire. 6188 */ 6189 if (ipif_held) { 6190 ipif_refrele(ipif); 6191 ipif_held = B_FALSE; 6192 } 6193 ipif = ipif_lookup_group_v6( 6194 v6dstp, zoneid, ipst); 6195 ip2dbg(("ip_newroute_ipif: " 6196 "multirt dst %08x, " 6197 "ipif %p\n", 6198 ntohl(V4_PART_OF_V6( 6199 (*v6dstp))), 6200 (void *)ipif)); 6201 if (ipif != NULL) { 6202 ipif_held = B_TRUE; 6203 mp = copy_mp; 6204 copy_mp = NULL; 6205 multirt_resolve_next = 6206 B_TRUE; 6207 continue; 6208 } else { 6209 freemsg(copy_mp); 6210 } 6211 } 6212 } 6213 ill_refrele(dst_ill); 6214 if (ipif_held) { 6215 ipif_refrele(ipif); 6216 ipif_held = B_FALSE; 6217 } 6218 if (src_ipif != NULL) 6219 ipif_refrele(src_ipif); 6220 return; 6221 default: 6222 /* Some transient error */ 6223 ire_refrele(save_ire); 6224 break; 6225 } 6226 break; 6227 } 6228 default: 6229 break; 6230 } 6231 if (ip6_asp_table_held) { 6232 ip6_asp_table_refrele(ipst); 6233 ip6_asp_table_held = B_FALSE; 6234 } 6235 } while (multirt_resolve_next); 6236 6237 err_ret: 6238 if (ip6_asp_table_held) 6239 ip6_asp_table_refrele(ipst); 6240 if (ire != NULL) 6241 ire_refrele(ire); 6242 if (fire != NULL) 6243 ire_refrele(fire); 6244 if (ipif != NULL && ipif_held) 6245 ipif_refrele(ipif); 6246 if (src_ipif != NULL) 6247 ipif_refrele(src_ipif); 6248 /* Multicast - no point in trying to generate ICMP error */ 6249 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6250 if (dst_ill != NULL) { 6251 ill = dst_ill; 6252 ill_held = B_TRUE; 6253 } 6254 if (mp->b_prev || mp->b_next) { 6255 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6256 } else { 6257 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6258 } 6259 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6260 mp->b_next = NULL; 6261 mp->b_prev = NULL; 6262 freemsg(first_mp); 6263 if (ill_held) 6264 ill_refrele(ill); 6265 } 6266 6267 /* 6268 * Parse and process any hop-by-hop or destination options. 6269 * 6270 * Assumes that q is an ill read queue so that ICMP errors for link-local 6271 * destinations are sent out the correct interface. 6272 * 6273 * Returns -1 if there was an error and mp has been consumed. 6274 * Returns 0 if no special action is needed. 6275 * Returns 1 if the packet contained a router alert option for this node 6276 * which is verified to be "interesting/known" for our implementation. 6277 * 6278 * XXX Note: In future as more hbh or dest options are defined, 6279 * it may be better to have different routines for hbh and dest 6280 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6281 * may have same value in different namespaces. Or is it same namespace ?? 6282 * Current code checks for each opt_type (other than pads) if it is in 6283 * the expected nexthdr (hbh or dest) 6284 */ 6285 static int 6286 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6287 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6288 { 6289 uint8_t opt_type; 6290 uint_t optused; 6291 int ret = 0; 6292 mblk_t *first_mp; 6293 const char *errtype; 6294 zoneid_t zoneid; 6295 ill_t *ill = q->q_ptr; 6296 ipif_t *ipif; 6297 6298 first_mp = mp; 6299 if (mp->b_datap->db_type == M_CTL) { 6300 mp = mp->b_cont; 6301 } 6302 6303 while (optlen != 0) { 6304 opt_type = *optptr; 6305 if (opt_type == IP6OPT_PAD1) { 6306 optused = 1; 6307 } else { 6308 if (optlen < 2) 6309 goto bad_opt; 6310 errtype = "malformed"; 6311 if (opt_type == ip6opt_ls) { 6312 optused = 2 + optptr[1]; 6313 if (optused > optlen) 6314 goto bad_opt; 6315 } else switch (opt_type) { 6316 case IP6OPT_PADN: 6317 /* 6318 * Note:We don't verify that (N-2) pad octets 6319 * are zero as required by spec. Adhere to 6320 * "be liberal in what you accept..." part of 6321 * implementation philosophy (RFC791,RFC1122) 6322 */ 6323 optused = 2 + optptr[1]; 6324 if (optused > optlen) 6325 goto bad_opt; 6326 break; 6327 6328 case IP6OPT_JUMBO: 6329 if (hdr_type != IPPROTO_HOPOPTS) 6330 goto opt_error; 6331 goto opt_error; /* XXX Not implemented! */ 6332 6333 case IP6OPT_ROUTER_ALERT: { 6334 struct ip6_opt_router *or; 6335 6336 if (hdr_type != IPPROTO_HOPOPTS) 6337 goto opt_error; 6338 optused = 2 + optptr[1]; 6339 if (optused > optlen) 6340 goto bad_opt; 6341 or = (struct ip6_opt_router *)optptr; 6342 /* Check total length and alignment */ 6343 if (optused != sizeof (*or) || 6344 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6345 goto opt_error; 6346 /* Check value */ 6347 switch (*((uint16_t *)or->ip6or_value)) { 6348 case IP6_ALERT_MLD: 6349 case IP6_ALERT_RSVP: 6350 ret = 1; 6351 } 6352 break; 6353 } 6354 case IP6OPT_HOME_ADDRESS: { 6355 /* 6356 * Minimal support for the home address option 6357 * (which is required by all IPv6 nodes). 6358 * Implement by just swapping the home address 6359 * and source address. 6360 * XXX Note: this has IPsec implications since 6361 * AH needs to take this into account. 6362 * Also, when IPsec is used we need to ensure 6363 * that this is only processed once 6364 * in the received packet (to avoid swapping 6365 * back and forth). 6366 * NOTE:This option processing is considered 6367 * to be unsafe and prone to a denial of 6368 * service attack. 6369 * The current processing is not safe even with 6370 * IPsec secured IP packets. Since the home 6371 * address option processing requirement still 6372 * is in the IETF draft and in the process of 6373 * being redefined for its usage, it has been 6374 * decided to turn off the option by default. 6375 * If this section of code needs to be executed, 6376 * ndd variable ip6_ignore_home_address_opt 6377 * should be set to 0 at the user's own risk. 6378 */ 6379 struct ip6_opt_home_address *oh; 6380 in6_addr_t tmp; 6381 6382 if (ipst->ips_ipv6_ignore_home_address_opt) 6383 goto opt_error; 6384 6385 if (hdr_type != IPPROTO_DSTOPTS) 6386 goto opt_error; 6387 optused = 2 + optptr[1]; 6388 if (optused > optlen) 6389 goto bad_opt; 6390 6391 /* 6392 * We did this dest. opt the first time 6393 * around (i.e. before AH processing). 6394 * If we've done AH... stop now. 6395 */ 6396 if (first_mp != mp) { 6397 ipsec_in_t *ii; 6398 6399 ii = (ipsec_in_t *)first_mp->b_rptr; 6400 if (ii->ipsec_in_ah_sa != NULL) 6401 break; 6402 } 6403 6404 oh = (struct ip6_opt_home_address *)optptr; 6405 /* Check total length and alignment */ 6406 if (optused < sizeof (*oh) || 6407 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6408 goto opt_error; 6409 /* Swap ip6_src and the home address */ 6410 tmp = ip6h->ip6_src; 6411 /* XXX Note: only 8 byte alignment option */ 6412 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6413 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6414 break; 6415 } 6416 6417 case IP6OPT_TUNNEL_LIMIT: 6418 if (hdr_type != IPPROTO_DSTOPTS) { 6419 goto opt_error; 6420 } 6421 optused = 2 + optptr[1]; 6422 if (optused > optlen) { 6423 goto bad_opt; 6424 } 6425 if (optused != 3) { 6426 goto opt_error; 6427 } 6428 break; 6429 6430 default: 6431 errtype = "unknown"; 6432 /* FALLTHROUGH */ 6433 opt_error: 6434 /* Determine which zone should send error */ 6435 zoneid = ipif_lookup_addr_zoneid_v6( 6436 &ip6h->ip6_dst, ill, ipst); 6437 switch (IP6OPT_TYPE(opt_type)) { 6438 case IP6OPT_TYPE_SKIP: 6439 optused = 2 + optptr[1]; 6440 if (optused > optlen) 6441 goto bad_opt; 6442 ip1dbg(("ip_process_options_v6: %s " 6443 "opt 0x%x skipped\n", 6444 errtype, opt_type)); 6445 break; 6446 case IP6OPT_TYPE_DISCARD: 6447 ip1dbg(("ip_process_options_v6: %s " 6448 "opt 0x%x; packet dropped\n", 6449 errtype, opt_type)); 6450 freemsg(first_mp); 6451 return (-1); 6452 case IP6OPT_TYPE_ICMP: 6453 if (zoneid == ALL_ZONES) { 6454 freemsg(first_mp); 6455 return (-1); 6456 } 6457 icmp_param_problem_v6(WR(q), first_mp, 6458 ICMP6_PARAMPROB_OPTION, 6459 (uint32_t)(optptr - 6460 (uint8_t *)ip6h), 6461 B_FALSE, B_FALSE, zoneid, ipst); 6462 return (-1); 6463 case IP6OPT_TYPE_FORCEICMP: 6464 /* 6465 * If we don't have a zone and the dst 6466 * addr is multicast, then pick a zone 6467 * based on the inbound interface. 6468 */ 6469 if (zoneid == ALL_ZONES && 6470 IN6_IS_ADDR_MULTICAST( 6471 &ip6h->ip6_dst)) { 6472 ipif = ipif_select_source_v6( 6473 ill, &ip6h->ip6_src, 6474 RESTRICT_TO_GROUP, 6475 IPV6_PREFER_SRC_DEFAULT, 6476 ALL_ZONES); 6477 if (ipif != NULL) { 6478 zoneid = 6479 ipif->ipif_zoneid; 6480 ipif_refrele(ipif); 6481 } 6482 } 6483 if (zoneid == ALL_ZONES) { 6484 freemsg(first_mp); 6485 return (-1); 6486 } 6487 icmp_param_problem_v6(WR(q), first_mp, 6488 ICMP6_PARAMPROB_OPTION, 6489 (uint32_t)(optptr - 6490 (uint8_t *)ip6h), 6491 B_FALSE, B_TRUE, zoneid, ipst); 6492 return (-1); 6493 default: 6494 ASSERT(0); 6495 } 6496 } 6497 } 6498 optlen -= optused; 6499 optptr += optused; 6500 } 6501 return (ret); 6502 6503 bad_opt: 6504 /* Determine which zone should send error */ 6505 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6506 if (zoneid == ALL_ZONES) { 6507 freemsg(first_mp); 6508 } else { 6509 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6510 (uint32_t)(optptr - (uint8_t *)ip6h), 6511 B_FALSE, B_FALSE, zoneid, ipst); 6512 } 6513 return (-1); 6514 } 6515 6516 /* 6517 * Process a routing header that is not yet empty. 6518 * Only handles type 0 routing headers. 6519 */ 6520 static void 6521 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6522 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6523 { 6524 ip6_rthdr0_t *rthdr; 6525 uint_t ehdrlen; 6526 uint_t numaddr; 6527 in6_addr_t *addrptr; 6528 in6_addr_t tmp; 6529 ip_stack_t *ipst = ill->ill_ipst; 6530 6531 ASSERT(rth->ip6r_segleft != 0); 6532 6533 if (!ipst->ips_ipv6_forward_src_routed) { 6534 /* XXX Check for source routed out same interface? */ 6535 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6536 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6537 freemsg(hada_mp); 6538 freemsg(mp); 6539 return; 6540 } 6541 6542 if (rth->ip6r_type != 0) { 6543 if (hada_mp != NULL) 6544 goto hada_drop; 6545 /* Sent by forwarding path, and router is global zone */ 6546 icmp_param_problem_v6(WR(q), mp, 6547 ICMP6_PARAMPROB_HEADER, 6548 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6549 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6550 return; 6551 } 6552 rthdr = (ip6_rthdr0_t *)rth; 6553 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6554 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6555 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6556 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6557 if (rthdr->ip6r0_len & 0x1) { 6558 /* An odd length is impossible */ 6559 if (hada_mp != NULL) 6560 goto hada_drop; 6561 /* Sent by forwarding path, and router is global zone */ 6562 icmp_param_problem_v6(WR(q), mp, 6563 ICMP6_PARAMPROB_HEADER, 6564 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6565 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6566 return; 6567 } 6568 numaddr = rthdr->ip6r0_len / 2; 6569 if (rthdr->ip6r0_segleft > numaddr) { 6570 /* segleft exceeds number of addresses in routing header */ 6571 if (hada_mp != NULL) 6572 goto hada_drop; 6573 /* Sent by forwarding path, and router is global zone */ 6574 icmp_param_problem_v6(WR(q), mp, 6575 ICMP6_PARAMPROB_HEADER, 6576 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6577 (uchar_t *)ip6h), 6578 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6579 return; 6580 } 6581 addrptr += (numaddr - rthdr->ip6r0_segleft); 6582 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6583 IN6_IS_ADDR_MULTICAST(addrptr)) { 6584 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6585 freemsg(hada_mp); 6586 freemsg(mp); 6587 return; 6588 } 6589 /* Swap */ 6590 tmp = *addrptr; 6591 *addrptr = ip6h->ip6_dst; 6592 ip6h->ip6_dst = tmp; 6593 rthdr->ip6r0_segleft--; 6594 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6595 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6596 if (hada_mp != NULL) 6597 goto hada_drop; 6598 /* Sent by forwarding path, and router is global zone */ 6599 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6600 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6601 return; 6602 } 6603 if (ip_check_v6_mblk(mp, ill) == IP6_MBLK_OK) { 6604 ip6h = (ip6_t *)mp->b_rptr; 6605 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6606 } else { 6607 freemsg(mp); 6608 } 6609 return; 6610 hada_drop: 6611 /* IPsec kstats: bean counter? */ 6612 freemsg(hada_mp); 6613 freemsg(mp); 6614 } 6615 6616 /* 6617 * Read side put procedure for IPv6 module. 6618 */ 6619 void 6620 ip_rput_v6(queue_t *q, mblk_t *mp) 6621 { 6622 mblk_t *first_mp; 6623 mblk_t *hada_mp = NULL; 6624 ip6_t *ip6h; 6625 boolean_t ll_multicast = B_FALSE; 6626 boolean_t mctl_present = B_FALSE; 6627 ill_t *ill; 6628 struct iocblk *iocp; 6629 uint_t flags = 0; 6630 mblk_t *dl_mp; 6631 ip_stack_t *ipst; 6632 int check; 6633 6634 ill = (ill_t *)q->q_ptr; 6635 ipst = ill->ill_ipst; 6636 if (ill->ill_state_flags & ILL_CONDEMNED) { 6637 union DL_primitives *dl; 6638 6639 dl = (union DL_primitives *)mp->b_rptr; 6640 /* 6641 * Things are opening or closing - only accept DLPI 6642 * ack messages. If the stream is closing and ip_wsrv 6643 * has completed, ip_close is out of the qwait, but has 6644 * not yet completed qprocsoff. Don't proceed any further 6645 * because the ill has been cleaned up and things hanging 6646 * off the ill have been freed. 6647 */ 6648 if ((mp->b_datap->db_type != M_PCPROTO) || 6649 (dl->dl_primitive == DL_UNITDATA_IND)) { 6650 inet_freemsg(mp); 6651 return; 6652 } 6653 } 6654 6655 dl_mp = NULL; 6656 switch (mp->b_datap->db_type) { 6657 case M_DATA: { 6658 int hlen; 6659 uchar_t *ucp; 6660 struct ether_header *eh; 6661 dl_unitdata_ind_t *dui; 6662 6663 /* 6664 * This is a work-around for CR 6451644, a bug in Nemo. It 6665 * should be removed when that problem is fixed. 6666 */ 6667 if (ill->ill_mactype == DL_ETHER && 6668 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6669 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6670 ucp[-2] == (IP6_DL_SAP >> 8)) { 6671 if (hlen >= sizeof (struct ether_vlan_header) && 6672 ucp[-5] == 0 && ucp[-6] == 0x81) 6673 ucp -= sizeof (struct ether_vlan_header); 6674 else 6675 ucp -= sizeof (struct ether_header); 6676 /* 6677 * If it's a group address, then fabricate a 6678 * DL_UNITDATA_IND message. 6679 */ 6680 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6681 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6682 BPRI_HI)) != NULL) { 6683 eh = (struct ether_header *)ucp; 6684 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6685 DB_TYPE(dl_mp) = M_PROTO; 6686 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6687 dui->dl_primitive = DL_UNITDATA_IND; 6688 dui->dl_dest_addr_length = 8; 6689 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6690 dui->dl_src_addr_length = 8; 6691 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6692 8; 6693 dui->dl_group_address = 1; 6694 ucp = (uchar_t *)(dui + 1); 6695 if (ill->ill_sap_length > 0) 6696 ucp += ill->ill_sap_length; 6697 bcopy(&eh->ether_dhost, ucp, 6); 6698 bcopy(&eh->ether_shost, ucp + 8, 6); 6699 ucp = (uchar_t *)(dui + 1); 6700 if (ill->ill_sap_length < 0) 6701 ucp += 8 + ill->ill_sap_length; 6702 bcopy(&eh->ether_type, ucp, 2); 6703 bcopy(&eh->ether_type, ucp + 8, 2); 6704 } 6705 } 6706 break; 6707 } 6708 6709 case M_PROTO: 6710 case M_PCPROTO: 6711 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6712 DL_UNITDATA_IND) { 6713 /* Go handle anything other than data elsewhere. */ 6714 ip_rput_dlpi(q, mp); 6715 return; 6716 } 6717 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6718 6719 /* Save the DLPI header. */ 6720 dl_mp = mp; 6721 mp = mp->b_cont; 6722 dl_mp->b_cont = NULL; 6723 break; 6724 case M_BREAK: 6725 panic("ip_rput_v6: got an M_BREAK"); 6726 /*NOTREACHED*/ 6727 case M_IOCACK: 6728 iocp = (struct iocblk *)mp->b_rptr; 6729 switch (iocp->ioc_cmd) { 6730 case DL_IOC_HDR_INFO: 6731 ill = (ill_t *)q->q_ptr; 6732 ill_fastpath_ack(ill, mp); 6733 return; 6734 6735 case SIOCGTUNPARAM: 6736 case OSIOCGTUNPARAM: 6737 ip_rput_other(NULL, q, mp, NULL); 6738 return; 6739 6740 case SIOCSTUNPARAM: 6741 case OSIOCSTUNPARAM: 6742 /* Go through qwriter */ 6743 break; 6744 default: 6745 putnext(q, mp); 6746 return; 6747 } 6748 /* FALLTHRU */ 6749 case M_ERROR: 6750 case M_HANGUP: 6751 mutex_enter(&ill->ill_lock); 6752 if (ill->ill_state_flags & ILL_CONDEMNED) { 6753 mutex_exit(&ill->ill_lock); 6754 freemsg(mp); 6755 return; 6756 } 6757 ill_refhold_locked(ill); 6758 mutex_exit(&ill->ill_lock); 6759 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6760 return; 6761 case M_CTL: 6762 if ((MBLKL(mp) > sizeof (int)) && 6763 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6764 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6765 mctl_present = B_TRUE; 6766 break; 6767 } 6768 putnext(q, mp); 6769 return; 6770 case M_IOCNAK: 6771 iocp = (struct iocblk *)mp->b_rptr; 6772 switch (iocp->ioc_cmd) { 6773 case DL_IOC_HDR_INFO: 6774 case SIOCGTUNPARAM: 6775 case OSIOCGTUNPARAM: 6776 ip_rput_other(NULL, q, mp, NULL); 6777 return; 6778 6779 case SIOCSTUNPARAM: 6780 case OSIOCSTUNPARAM: 6781 mutex_enter(&ill->ill_lock); 6782 if (ill->ill_state_flags & ILL_CONDEMNED) { 6783 mutex_exit(&ill->ill_lock); 6784 freemsg(mp); 6785 return; 6786 } 6787 ill_refhold_locked(ill); 6788 mutex_exit(&ill->ill_lock); 6789 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6790 return; 6791 default: 6792 break; 6793 } 6794 /* FALLTHRU */ 6795 default: 6796 putnext(q, mp); 6797 return; 6798 } 6799 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6800 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6801 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6802 /* 6803 * if db_ref > 1 then copymsg and free original. Packet may be 6804 * changed and do not want other entity who has a reference to this 6805 * message to trip over the changes. This is a blind change because 6806 * trying to catch all places that might change packet is too 6807 * difficult (since it may be a module above this one). 6808 */ 6809 if (mp->b_datap->db_ref > 1) { 6810 mblk_t *mp1; 6811 6812 mp1 = copymsg(mp); 6813 freemsg(mp); 6814 if (mp1 == NULL) { 6815 first_mp = NULL; 6816 goto discard; 6817 } 6818 mp = mp1; 6819 } 6820 first_mp = mp; 6821 if (mctl_present) { 6822 hada_mp = first_mp; 6823 mp = first_mp->b_cont; 6824 } 6825 6826 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6827 freemsg(mp); 6828 return; 6829 } 6830 6831 ip6h = (ip6_t *)mp->b_rptr; 6832 6833 /* 6834 * ip:::receive must see ipv6 packets with a full header, 6835 * and so is placed after the IP6_MBLK_HDR_ERR check. 6836 */ 6837 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6838 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6839 int, 0); 6840 6841 if (check != IP6_MBLK_OK) { 6842 freemsg(mp); 6843 return; 6844 } 6845 6846 DTRACE_PROBE4(ip6__physical__in__start, 6847 ill_t *, ill, ill_t *, NULL, 6848 ip6_t *, ip6h, mblk_t *, first_mp); 6849 6850 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6851 ipst->ips_ipv6firewall_physical_in, 6852 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6853 6854 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6855 6856 if (first_mp == NULL) 6857 return; 6858 6859 /* 6860 * Attach any necessary label information to this packet. 6861 */ 6862 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 6863 if (ip6opt_ls != 0) 6864 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 6865 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 6866 goto discard; 6867 } 6868 6869 /* IP observability hook. */ 6870 if (ipst->ips_ipobs_enabled) { 6871 zoneid_t dzone; 6872 6873 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 6874 ALL_ZONES); 6875 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, 6876 IPV6_VERSION, 0, ipst); 6877 } 6878 6879 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6880 IPV6_DEFAULT_VERS_AND_FLOW) { 6881 /* 6882 * It may be a bit too expensive to do this mapped address 6883 * check here, but in the interest of robustness, it seems 6884 * like the correct place. 6885 * TODO: Avoid this check for e.g. connected TCP sockets 6886 */ 6887 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6888 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6889 goto discard; 6890 } 6891 6892 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6893 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6894 goto discard; 6895 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6896 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6897 goto discard; 6898 } 6899 6900 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6901 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6902 } else { 6903 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6904 goto discard; 6905 } 6906 freemsg(dl_mp); 6907 return; 6908 6909 discard: 6910 if (dl_mp != NULL) 6911 freeb(dl_mp); 6912 freemsg(first_mp); 6913 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6914 } 6915 6916 /* 6917 * Walk through the IPv6 packet in mp and see if there's an AH header 6918 * in it. See if the AH header needs to get done before other headers in 6919 * the packet. (Worker function for ipsec_early_ah_v6().) 6920 */ 6921 #define IPSEC_HDR_DONT_PROCESS 0 6922 #define IPSEC_HDR_PROCESS 1 6923 #define IPSEC_MEMORY_ERROR 2 6924 static int 6925 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6926 { 6927 uint_t length; 6928 uint_t ehdrlen; 6929 uint8_t *whereptr; 6930 uint8_t *endptr; 6931 uint8_t *nexthdrp; 6932 ip6_dest_t *desthdr; 6933 ip6_rthdr_t *rthdr; 6934 ip6_t *ip6h; 6935 6936 /* 6937 * For now just pullup everything. In general, the less pullups, 6938 * the better, but there's so much squirrelling through anyway, 6939 * it's just easier this way. 6940 */ 6941 if (!pullupmsg(mp, -1)) { 6942 return (IPSEC_MEMORY_ERROR); 6943 } 6944 6945 ip6h = (ip6_t *)mp->b_rptr; 6946 length = IPV6_HDR_LEN; 6947 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6948 endptr = mp->b_wptr; 6949 6950 /* 6951 * We can't just use the argument nexthdr in the place 6952 * of nexthdrp becaue we don't dereference nexthdrp 6953 * till we confirm whether it is a valid address. 6954 */ 6955 nexthdrp = &ip6h->ip6_nxt; 6956 while (whereptr < endptr) { 6957 /* Is there enough left for len + nexthdr? */ 6958 if (whereptr + MIN_EHDR_LEN > endptr) 6959 return (IPSEC_MEMORY_ERROR); 6960 6961 switch (*nexthdrp) { 6962 case IPPROTO_HOPOPTS: 6963 case IPPROTO_DSTOPTS: 6964 /* Assumes the headers are identical for hbh and dst */ 6965 desthdr = (ip6_dest_t *)whereptr; 6966 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6967 if ((uchar_t *)desthdr + ehdrlen > endptr) 6968 return (IPSEC_MEMORY_ERROR); 6969 /* 6970 * Return DONT_PROCESS because the destination 6971 * options header may be for each hop in a 6972 * routing-header, and we only want AH if we're 6973 * finished with routing headers. 6974 */ 6975 if (*nexthdrp == IPPROTO_DSTOPTS) 6976 return (IPSEC_HDR_DONT_PROCESS); 6977 nexthdrp = &desthdr->ip6d_nxt; 6978 break; 6979 case IPPROTO_ROUTING: 6980 rthdr = (ip6_rthdr_t *)whereptr; 6981 6982 /* 6983 * If there's more hops left on the routing header, 6984 * return now with DON'T PROCESS. 6985 */ 6986 if (rthdr->ip6r_segleft > 0) 6987 return (IPSEC_HDR_DONT_PROCESS); 6988 6989 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6990 if ((uchar_t *)rthdr + ehdrlen > endptr) 6991 return (IPSEC_MEMORY_ERROR); 6992 nexthdrp = &rthdr->ip6r_nxt; 6993 break; 6994 case IPPROTO_FRAGMENT: 6995 /* Wait for reassembly */ 6996 return (IPSEC_HDR_DONT_PROCESS); 6997 case IPPROTO_AH: 6998 *nexthdr = IPPROTO_AH; 6999 return (IPSEC_HDR_PROCESS); 7000 case IPPROTO_NONE: 7001 /* No next header means we're finished */ 7002 default: 7003 return (IPSEC_HDR_DONT_PROCESS); 7004 } 7005 length += ehdrlen; 7006 whereptr += ehdrlen; 7007 } 7008 panic("ipsec_needs_processing_v6"); 7009 /*NOTREACHED*/ 7010 } 7011 7012 /* 7013 * Path for AH if options are present. If this is the first time we are 7014 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7015 * Otherwise, just fanout. Return value answers the boolean question: 7016 * "Did I consume the mblk you sent me?" 7017 * 7018 * Sometimes AH needs to be done before other IPv6 headers for security 7019 * reasons. This function (and its ipsec_needs_processing_v6() above) 7020 * indicates if that is so, and fans out to the appropriate IPsec protocol 7021 * for the datagram passed in. 7022 */ 7023 static boolean_t 7024 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7025 ill_t *ill, mblk_t *hada_mp, zoneid_t zoneid) 7026 { 7027 mblk_t *mp; 7028 uint8_t nexthdr; 7029 ipsec_in_t *ii = NULL; 7030 ah_t *ah; 7031 ipsec_status_t ipsec_rc; 7032 ip_stack_t *ipst = ill->ill_ipst; 7033 netstack_t *ns = ipst->ips_netstack; 7034 ipsec_stack_t *ipss = ns->netstack_ipsec; 7035 7036 ASSERT((hada_mp == NULL) || (!mctl_present)); 7037 7038 switch (ipsec_needs_processing_v6( 7039 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7040 case IPSEC_MEMORY_ERROR: 7041 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7042 freemsg(hada_mp); 7043 freemsg(first_mp); 7044 return (B_TRUE); 7045 case IPSEC_HDR_DONT_PROCESS: 7046 return (B_FALSE); 7047 } 7048 7049 /* Default means send it to AH! */ 7050 ASSERT(nexthdr == IPPROTO_AH); 7051 if (!mctl_present) { 7052 mp = first_mp; 7053 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 7054 if (first_mp == NULL) { 7055 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7056 "allocation failure.\n")); 7057 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7058 freemsg(hada_mp); 7059 freemsg(mp); 7060 return (B_TRUE); 7061 } 7062 /* 7063 * Store the ill_index so that when we come back 7064 * from IPSEC we ride on the same queue. 7065 */ 7066 ii = (ipsec_in_t *)first_mp->b_rptr; 7067 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7068 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7069 first_mp->b_cont = mp; 7070 } 7071 /* 7072 * Cache hardware acceleration info. 7073 */ 7074 if (hada_mp != NULL) { 7075 ASSERT(ii != NULL); 7076 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7077 "caching data attr.\n")); 7078 ii->ipsec_in_accelerated = B_TRUE; 7079 ii->ipsec_in_da = hada_mp; 7080 } 7081 7082 if (!ipsec_loaded(ipss)) { 7083 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 7084 return (B_TRUE); 7085 } 7086 7087 ah = ipsec_inbound_ah_sa(first_mp, ns); 7088 if (ah == NULL) 7089 return (B_TRUE); 7090 ASSERT(ii->ipsec_in_ah_sa != NULL); 7091 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7092 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7093 7094 switch (ipsec_rc) { 7095 case IPSEC_STATUS_SUCCESS: 7096 /* we're done with IPsec processing, send it up */ 7097 ip_fanout_proto_again(first_mp, ill, ill, NULL); 7098 break; 7099 case IPSEC_STATUS_FAILED: 7100 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 7101 break; 7102 case IPSEC_STATUS_PENDING: 7103 /* no action needed */ 7104 break; 7105 } 7106 return (B_TRUE); 7107 } 7108 7109 /* 7110 * Validate the IPv6 mblk for alignment. 7111 */ 7112 int 7113 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7114 { 7115 int pkt_len, ip6_len; 7116 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7117 7118 /* check for alignment and full IPv6 header */ 7119 if (!OK_32PTR((uchar_t *)ip6h) || 7120 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7121 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7122 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7123 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7124 return (IP6_MBLK_HDR_ERR); 7125 } 7126 ip6h = (ip6_t *)mp->b_rptr; 7127 } 7128 7129 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7130 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7131 7132 if (mp->b_cont == NULL) 7133 pkt_len = mp->b_wptr - mp->b_rptr; 7134 else 7135 pkt_len = msgdsize(mp); 7136 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7137 7138 /* 7139 * Check for bogus (too short packet) and packet which 7140 * was padded by the link layer. 7141 */ 7142 if (ip6_len != pkt_len) { 7143 ssize_t diff; 7144 7145 if (ip6_len > pkt_len) { 7146 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7147 ip6_len, pkt_len)); 7148 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7149 return (IP6_MBLK_LEN_ERR); 7150 } 7151 diff = (ssize_t)(pkt_len - ip6_len); 7152 7153 if (!adjmsg(mp, -diff)) { 7154 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7155 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7156 return (IP6_MBLK_LEN_ERR); 7157 } 7158 } 7159 return (IP6_MBLK_OK); 7160 } 7161 7162 /* 7163 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7164 * ip_rput_v6 has already verified alignment, the min length, the version, 7165 * and db_ref = 1. 7166 * 7167 * The ill passed in (the arg named inill) is the ill that the packet 7168 * actually arrived on. We need to remember this when saving the 7169 * input interface index into potential IPV6_PKTINFO data in 7170 * ip_add_info_v6(). 7171 * 7172 * This routine doesn't free dl_mp; that's the caller's responsibility on 7173 * return. (Note that the callers are complex enough that there's no tail 7174 * recursion here anyway.) 7175 */ 7176 void 7177 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7178 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7179 { 7180 ire_t *ire = NULL; 7181 ill_t *ill = inill; 7182 ill_t *outill; 7183 ipif_t *ipif; 7184 uint8_t *whereptr; 7185 uint8_t nexthdr; 7186 uint16_t remlen; 7187 uint_t prev_nexthdr_offset; 7188 uint_t used; 7189 size_t old_pkt_len; 7190 size_t pkt_len; 7191 uint16_t ip6_len; 7192 uint_t hdr_len; 7193 boolean_t mctl_present; 7194 mblk_t *first_mp; 7195 mblk_t *first_mp1; 7196 boolean_t no_forward; 7197 ip6_hbh_t *hbhhdr; 7198 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7199 conn_t *connp; 7200 ilm_t *ilm; 7201 uint32_t ports; 7202 zoneid_t zoneid = GLOBAL_ZONEID; 7203 uint16_t hck_flags, reass_hck_flags; 7204 uint32_t reass_sum; 7205 boolean_t cksum_err; 7206 mblk_t *mp1; 7207 ip_stack_t *ipst = inill->ill_ipst; 7208 7209 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7210 7211 if (hada_mp != NULL) { 7212 /* 7213 * It's an IPsec accelerated packet. 7214 * Keep a pointer to the data attributes around until 7215 * we allocate the ipsecinfo structure. 7216 */ 7217 IPSECHW_DEBUG(IPSECHW_PKT, 7218 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7219 hada_mp->b_cont = NULL; 7220 /* 7221 * Since it is accelerated, it came directly from 7222 * the ill. 7223 */ 7224 ASSERT(mctl_present == B_FALSE); 7225 ASSERT(mp->b_datap->db_type != M_CTL); 7226 } 7227 7228 ip6h = (ip6_t *)mp->b_rptr; 7229 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7230 old_pkt_len = pkt_len = ip6_len; 7231 7232 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7233 hck_flags = DB_CKSUMFLAGS(mp); 7234 else 7235 hck_flags = 0; 7236 7237 /* Clear checksum flags in case we need to forward */ 7238 DB_CKSUMFLAGS(mp) = 0; 7239 reass_sum = reass_hck_flags = 0; 7240 7241 nexthdr = ip6h->ip6_nxt; 7242 7243 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7244 (uchar_t *)ip6h); 7245 whereptr = (uint8_t *)&ip6h[1]; 7246 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7247 7248 /* Process hop by hop header options */ 7249 if (nexthdr == IPPROTO_HOPOPTS) { 7250 uint_t ehdrlen; 7251 uint8_t *optptr; 7252 7253 if (remlen < MIN_EHDR_LEN) 7254 goto pkt_too_short; 7255 if (mp->b_cont != NULL && 7256 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7257 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7258 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7259 freemsg(hada_mp); 7260 freemsg(first_mp); 7261 return; 7262 } 7263 ip6h = (ip6_t *)mp->b_rptr; 7264 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7265 } 7266 hbhhdr = (ip6_hbh_t *)whereptr; 7267 nexthdr = hbhhdr->ip6h_nxt; 7268 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7269 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7270 7271 if (remlen < ehdrlen) 7272 goto pkt_too_short; 7273 if (mp->b_cont != NULL && 7274 whereptr + ehdrlen > mp->b_wptr) { 7275 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7276 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7277 freemsg(hada_mp); 7278 freemsg(first_mp); 7279 return; 7280 } 7281 ip6h = (ip6_t *)mp->b_rptr; 7282 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7283 hbhhdr = (ip6_hbh_t *)whereptr; 7284 } 7285 7286 optptr = whereptr + 2; 7287 whereptr += ehdrlen; 7288 remlen -= ehdrlen; 7289 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7290 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7291 case -1: 7292 /* 7293 * Packet has been consumed and any 7294 * needed ICMP messages sent. 7295 */ 7296 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7297 freemsg(hada_mp); 7298 return; 7299 case 0: 7300 /* no action needed */ 7301 break; 7302 case 1: 7303 /* Known router alert */ 7304 goto ipv6forus; 7305 } 7306 } 7307 7308 /* 7309 * On incoming v6 multicast packets we will bypass the ire table, 7310 * and assume that the read queue corresponds to the targetted 7311 * interface. 7312 * 7313 * The effect of this is the same as the IPv4 original code, but is 7314 * much cleaner I think. See ip_rput for how that was done. 7315 */ 7316 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7317 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7318 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7319 /* 7320 * XXX TODO Give to mrouted to for multicast forwarding. 7321 */ 7322 ILM_WALKER_HOLD(ill); 7323 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7324 ILM_WALKER_RELE(ill); 7325 if (ilm == NULL) { 7326 if (ip_debug > 3) { 7327 /* ip2dbg */ 7328 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7329 " which is not for us: %s\n", AF_INET6, 7330 &ip6h->ip6_dst); 7331 } 7332 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7333 freemsg(hada_mp); 7334 freemsg(first_mp); 7335 return; 7336 } 7337 if (ip_debug > 3) { 7338 /* ip2dbg */ 7339 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7340 AF_INET6, &ip6h->ip6_dst); 7341 } 7342 zoneid = GLOBAL_ZONEID; 7343 goto ipv6forus; 7344 } 7345 7346 ipif = ill->ill_ipif; 7347 7348 /* 7349 * If a packet was received on an interface that is a 6to4 tunnel, 7350 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7351 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7352 * the 6to4 prefix of the address configured on the receiving interface. 7353 * Otherwise, the packet was delivered to this interface in error and 7354 * the packet must be dropped. 7355 */ 7356 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7357 7358 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7359 &ip6h->ip6_dst)) { 7360 if (ip_debug > 2) { 7361 /* ip1dbg */ 7362 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7363 "addressed packet which is not for us: " 7364 "%s\n", AF_INET6, &ip6h->ip6_dst); 7365 } 7366 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7367 freemsg(first_mp); 7368 return; 7369 } 7370 } 7371 7372 /* 7373 * Find an ire that matches destination. For link-local addresses 7374 * we have to match the ill. 7375 * TBD for site local addresses. 7376 */ 7377 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7378 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7379 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7380 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 7381 } else { 7382 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7383 MBLK_GETLABEL(mp), ipst); 7384 7385 if (ire != NULL && ire->ire_stq != NULL && 7386 ire->ire_zoneid != GLOBAL_ZONEID && 7387 ire->ire_zoneid != ALL_ZONES) { 7388 /* 7389 * Should only use IREs that are visible from the 7390 * global zone for forwarding. 7391 */ 7392 ire_refrele(ire); 7393 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7394 GLOBAL_ZONEID, MBLK_GETLABEL(mp), ipst); 7395 } 7396 } 7397 7398 if (ire == NULL) { 7399 /* 7400 * No matching IRE found. Mark this packet as having 7401 * originated externally. 7402 */ 7403 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7404 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7405 if (!(ill->ill_flags & ILLF_ROUTER)) { 7406 BUMP_MIB(ill->ill_ip_mib, 7407 ipIfStatsInAddrErrors); 7408 } 7409 freemsg(hada_mp); 7410 freemsg(first_mp); 7411 return; 7412 } 7413 if (ip6h->ip6_hops <= 1) { 7414 if (hada_mp != NULL) 7415 goto hada_drop; 7416 /* Sent by forwarding path, and router is global zone */ 7417 icmp_time_exceeded_v6(WR(q), first_mp, 7418 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7419 GLOBAL_ZONEID, ipst); 7420 return; 7421 } 7422 /* 7423 * Per RFC 3513 section 2.5.2, we must not forward packets with 7424 * an unspecified source address. 7425 */ 7426 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7427 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7428 freemsg(hada_mp); 7429 freemsg(first_mp); 7430 return; 7431 } 7432 mp->b_prev = (mblk_t *)(uintptr_t) 7433 ill->ill_phyint->phyint_ifindex; 7434 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7435 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7436 GLOBAL_ZONEID, ipst); 7437 return; 7438 } 7439 /* we have a matching IRE */ 7440 if (ire->ire_stq != NULL) { 7441 ill_group_t *ill_group; 7442 ill_group_t *ire_group; 7443 7444 /* 7445 * To be quicker, we may wish not to chase pointers 7446 * (ire->ire_ipif->ipif_ill...) and instead store the 7447 * forwarding policy in the ire. An unfortunate side- 7448 * effect of this would be requiring an ire flush whenever 7449 * the ILLF_ROUTER flag changes. For now, chase pointers 7450 * once and store in the boolean no_forward. 7451 * 7452 * This appears twice to keep it out of the non-forwarding, 7453 * yes-it's-for-us-on-the-right-interface case. 7454 */ 7455 no_forward = ((ill->ill_flags & 7456 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7457 7458 7459 ASSERT(first_mp == mp); 7460 /* 7461 * This ire has a send-to queue - forward the packet. 7462 */ 7463 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7464 freemsg(hada_mp); 7465 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7466 if (no_forward) { 7467 BUMP_MIB(ill->ill_ip_mib, 7468 ipIfStatsInAddrErrors); 7469 } 7470 freemsg(mp); 7471 ire_refrele(ire); 7472 return; 7473 } 7474 /* 7475 * ipIfStatsHCInForwDatagrams should only be increment if there 7476 * will be an attempt to forward the packet, which is why we 7477 * increment after the above condition has been checked. 7478 */ 7479 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7480 if (ip6h->ip6_hops <= 1) { 7481 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7482 /* Sent by forwarding path, and router is global zone */ 7483 icmp_time_exceeded_v6(WR(q), mp, 7484 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7485 GLOBAL_ZONEID, ipst); 7486 ire_refrele(ire); 7487 return; 7488 } 7489 /* 7490 * Per RFC 3513 section 2.5.2, we must not forward packets with 7491 * an unspecified source address. 7492 */ 7493 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7494 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7495 freemsg(mp); 7496 ire_refrele(ire); 7497 return; 7498 } 7499 7500 if (is_system_labeled()) { 7501 mblk_t *mp1; 7502 7503 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7504 BUMP_MIB(ill->ill_ip_mib, 7505 ipIfStatsForwProhibits); 7506 freemsg(mp); 7507 ire_refrele(ire); 7508 return; 7509 } 7510 /* Size may have changed */ 7511 mp = mp1; 7512 ip6h = (ip6_t *)mp->b_rptr; 7513 pkt_len = msgdsize(mp); 7514 } 7515 7516 if (pkt_len > ire->ire_max_frag) { 7517 int max_frag = ire->ire_max_frag; 7518 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7519 /* 7520 * Handle labeled packet resizing. 7521 */ 7522 if (is_system_labeled()) { 7523 max_frag = tsol_pmtu_adjust(mp, max_frag, 7524 pkt_len - old_pkt_len, AF_INET6); 7525 } 7526 7527 /* Sent by forwarding path, and router is global zone */ 7528 icmp_pkt2big_v6(WR(q), mp, max_frag, 7529 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7530 ire_refrele(ire); 7531 return; 7532 } 7533 7534 /* 7535 * Check to see if we're forwarding the packet to a 7536 * different link from which it came. If so, check the 7537 * source and destination addresses since routers must not 7538 * forward any packets with link-local source or 7539 * destination addresses to other links. Otherwise (if 7540 * we're forwarding onto the same link), conditionally send 7541 * a redirect message. 7542 */ 7543 ill_group = ill->ill_group; 7544 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7545 if (ire->ire_rfq != q && (ill_group == NULL || 7546 ill_group != ire_group)) { 7547 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7548 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7549 BUMP_MIB(ill->ill_ip_mib, 7550 ipIfStatsInAddrErrors); 7551 freemsg(mp); 7552 ire_refrele(ire); 7553 return; 7554 } 7555 /* TBD add site-local check at site boundary? */ 7556 } else if (ipst->ips_ipv6_send_redirects) { 7557 in6_addr_t *v6targ; 7558 in6_addr_t gw_addr_v6; 7559 ire_t *src_ire_v6 = NULL; 7560 7561 /* 7562 * Don't send a redirect when forwarding a source 7563 * routed packet. 7564 */ 7565 if (ip_source_routed_v6(ip6h, mp, ipst)) 7566 goto forward; 7567 7568 mutex_enter(&ire->ire_lock); 7569 gw_addr_v6 = ire->ire_gateway_addr_v6; 7570 mutex_exit(&ire->ire_lock); 7571 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7572 v6targ = &gw_addr_v6; 7573 /* 7574 * We won't send redirects to a router 7575 * that doesn't have a link local 7576 * address, but will forward. 7577 */ 7578 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7579 BUMP_MIB(ill->ill_ip_mib, 7580 ipIfStatsInAddrErrors); 7581 goto forward; 7582 } 7583 } else { 7584 v6targ = &ip6h->ip6_dst; 7585 } 7586 7587 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7588 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7589 GLOBAL_ZONEID, 0, NULL, 7590 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7591 ipst); 7592 7593 if (src_ire_v6 != NULL) { 7594 /* 7595 * The source is directly connected. 7596 */ 7597 mp1 = copymsg(mp); 7598 if (mp1 != NULL) { 7599 icmp_send_redirect_v6(WR(q), 7600 mp1, v6targ, &ip6h->ip6_dst, 7601 ill, B_FALSE); 7602 } 7603 ire_refrele(src_ire_v6); 7604 } 7605 } 7606 7607 forward: 7608 /* Hoplimit verified above */ 7609 ip6h->ip6_hops--; 7610 7611 outill = ire->ire_ipif->ipif_ill; 7612 7613 DTRACE_PROBE4(ip6__forwarding__start, 7614 ill_t *, inill, ill_t *, outill, 7615 ip6_t *, ip6h, mblk_t *, mp); 7616 7617 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7618 ipst->ips_ipv6firewall_forwarding, 7619 inill, outill, ip6h, mp, mp, 0, ipst); 7620 7621 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7622 7623 if (mp != NULL) { 7624 UPDATE_IB_PKT_COUNT(ire); 7625 ire->ire_last_used_time = lbolt; 7626 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7627 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7628 } 7629 IRE_REFRELE(ire); 7630 return; 7631 } 7632 7633 /* 7634 * Need to put on correct queue for reassembly to find it. 7635 * No need to use put() since reassembly has its own locks. 7636 * Note: multicast packets and packets destined to addresses 7637 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7638 * the arriving ill. Unlike the IPv4 case, enabling strict 7639 * destination multihoming will prevent accepting packets 7640 * addressed to an IRE_LOCAL on lo0. 7641 */ 7642 if (ire->ire_rfq != q) { 7643 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7644 == NULL) { 7645 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7646 freemsg(hada_mp); 7647 freemsg(first_mp); 7648 return; 7649 } 7650 if (ire->ire_rfq != NULL) { 7651 q = ire->ire_rfq; 7652 ill = (ill_t *)q->q_ptr; 7653 ASSERT(ill != NULL); 7654 } 7655 } 7656 7657 zoneid = ire->ire_zoneid; 7658 UPDATE_IB_PKT_COUNT(ire); 7659 ire->ire_last_used_time = lbolt; 7660 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7661 ire_refrele(ire); 7662 ire = NULL; 7663 ipv6forus: 7664 /* 7665 * Looks like this packet is for us one way or another. 7666 * This is where we'll process destination headers etc. 7667 */ 7668 for (; ; ) { 7669 switch (nexthdr) { 7670 case IPPROTO_TCP: { 7671 uint16_t *up; 7672 uint32_t sum; 7673 int offset; 7674 7675 hdr_len = pkt_len - remlen; 7676 7677 if (hada_mp != NULL) { 7678 ip0dbg(("tcp hada drop\n")); 7679 goto hada_drop; 7680 } 7681 7682 7683 /* TCP needs all of the TCP header */ 7684 if (remlen < TCP_MIN_HEADER_LENGTH) 7685 goto pkt_too_short; 7686 if (mp->b_cont != NULL && 7687 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7688 if (!pullupmsg(mp, 7689 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7690 BUMP_MIB(ill->ill_ip_mib, 7691 ipIfStatsInDiscards); 7692 freemsg(first_mp); 7693 return; 7694 } 7695 hck_flags = 0; 7696 ip6h = (ip6_t *)mp->b_rptr; 7697 whereptr = (uint8_t *)ip6h + hdr_len; 7698 } 7699 /* 7700 * Extract the offset field from the TCP header. 7701 */ 7702 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7703 if (offset != 5) { 7704 if (offset < 5) { 7705 ip1dbg(("ip_rput_data_v6: short " 7706 "TCP data offset")); 7707 BUMP_MIB(ill->ill_ip_mib, 7708 ipIfStatsInDiscards); 7709 freemsg(first_mp); 7710 return; 7711 } 7712 /* 7713 * There must be TCP options. 7714 * Make sure we can grab them. 7715 */ 7716 offset <<= 2; 7717 if (remlen < offset) 7718 goto pkt_too_short; 7719 if (mp->b_cont != NULL && 7720 whereptr + offset > mp->b_wptr) { 7721 if (!pullupmsg(mp, 7722 hdr_len + offset)) { 7723 BUMP_MIB(ill->ill_ip_mib, 7724 ipIfStatsInDiscards); 7725 freemsg(first_mp); 7726 return; 7727 } 7728 hck_flags = 0; 7729 ip6h = (ip6_t *)mp->b_rptr; 7730 whereptr = (uint8_t *)ip6h + hdr_len; 7731 } 7732 } 7733 7734 up = (uint16_t *)&ip6h->ip6_src; 7735 /* 7736 * TCP checksum calculation. First sum up the 7737 * pseudo-header fields: 7738 * - Source IPv6 address 7739 * - Destination IPv6 address 7740 * - TCP payload length 7741 * - TCP protocol ID 7742 */ 7743 sum = htons(IPPROTO_TCP + remlen) + 7744 up[0] + up[1] + up[2] + up[3] + 7745 up[4] + up[5] + up[6] + up[7] + 7746 up[8] + up[9] + up[10] + up[11] + 7747 up[12] + up[13] + up[14] + up[15]; 7748 7749 /* Fold initial sum */ 7750 sum = (sum & 0xffff) + (sum >> 16); 7751 7752 mp1 = mp->b_cont; 7753 7754 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7755 IP6_STAT(ipst, ip6_in_sw_cksum); 7756 7757 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7758 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7759 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7760 mp, mp1, cksum_err); 7761 7762 if (cksum_err) { 7763 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7764 7765 if (hck_flags & HCK_FULLCKSUM) { 7766 IP6_STAT(ipst, 7767 ip6_tcp_in_full_hw_cksum_err); 7768 } else if (hck_flags & HCK_PARTIALCKSUM) { 7769 IP6_STAT(ipst, 7770 ip6_tcp_in_part_hw_cksum_err); 7771 } else { 7772 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7773 } 7774 freemsg(first_mp); 7775 return; 7776 } 7777 tcp_fanout: 7778 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7779 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7780 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7781 return; 7782 } 7783 case IPPROTO_SCTP: 7784 { 7785 sctp_hdr_t *sctph; 7786 uint32_t calcsum, pktsum; 7787 uint_t hdr_len = pkt_len - remlen; 7788 sctp_stack_t *sctps; 7789 7790 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7791 7792 /* SCTP needs all of the SCTP header */ 7793 if (remlen < sizeof (*sctph)) { 7794 goto pkt_too_short; 7795 } 7796 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7797 ASSERT(mp->b_cont != NULL); 7798 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7799 BUMP_MIB(ill->ill_ip_mib, 7800 ipIfStatsInDiscards); 7801 freemsg(mp); 7802 return; 7803 } 7804 ip6h = (ip6_t *)mp->b_rptr; 7805 whereptr = (uint8_t *)ip6h + hdr_len; 7806 } 7807 7808 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7809 /* checksum */ 7810 pktsum = sctph->sh_chksum; 7811 sctph->sh_chksum = 0; 7812 calcsum = sctp_cksum(mp, hdr_len); 7813 if (calcsum != pktsum) { 7814 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7815 freemsg(mp); 7816 return; 7817 } 7818 sctph->sh_chksum = pktsum; 7819 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7820 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7821 ports, zoneid, mp, sctps)) == NULL) { 7822 ip_fanout_sctp_raw(first_mp, ill, 7823 (ipha_t *)ip6h, B_FALSE, ports, 7824 mctl_present, 7825 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7826 B_TRUE, zoneid); 7827 return; 7828 } 7829 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7830 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7831 B_FALSE, mctl_present); 7832 return; 7833 } 7834 case IPPROTO_UDP: { 7835 uint16_t *up; 7836 uint32_t sum; 7837 7838 hdr_len = pkt_len - remlen; 7839 7840 if (hada_mp != NULL) { 7841 ip0dbg(("udp hada drop\n")); 7842 goto hada_drop; 7843 } 7844 7845 /* Verify that at least the ports are present */ 7846 if (remlen < UDPH_SIZE) 7847 goto pkt_too_short; 7848 if (mp->b_cont != NULL && 7849 whereptr + UDPH_SIZE > mp->b_wptr) { 7850 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7851 BUMP_MIB(ill->ill_ip_mib, 7852 ipIfStatsInDiscards); 7853 freemsg(first_mp); 7854 return; 7855 } 7856 hck_flags = 0; 7857 ip6h = (ip6_t *)mp->b_rptr; 7858 whereptr = (uint8_t *)ip6h + hdr_len; 7859 } 7860 7861 /* 7862 * Before going through the regular checksum 7863 * calculation, make sure the received checksum 7864 * is non-zero. RFC 2460 says, a 0x0000 checksum 7865 * in a UDP packet (within IPv6 packet) is invalid 7866 * and should be replaced by 0xffff. This makes 7867 * sense as regular checksum calculation will 7868 * pass for both the cases i.e. 0x0000 and 0xffff. 7869 * Removing one of the case makes error detection 7870 * stronger. 7871 */ 7872 7873 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7874 /* 0x0000 checksum is invalid */ 7875 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7876 "checksum value 0x0000\n")); 7877 BUMP_MIB(ill->ill_ip_mib, 7878 udpIfStatsInCksumErrs); 7879 freemsg(first_mp); 7880 return; 7881 } 7882 7883 up = (uint16_t *)&ip6h->ip6_src; 7884 7885 /* 7886 * UDP checksum calculation. First sum up the 7887 * pseudo-header fields: 7888 * - Source IPv6 address 7889 * - Destination IPv6 address 7890 * - UDP payload length 7891 * - UDP protocol ID 7892 */ 7893 7894 sum = htons(IPPROTO_UDP + remlen) + 7895 up[0] + up[1] + up[2] + up[3] + 7896 up[4] + up[5] + up[6] + up[7] + 7897 up[8] + up[9] + up[10] + up[11] + 7898 up[12] + up[13] + up[14] + up[15]; 7899 7900 /* Fold initial sum */ 7901 sum = (sum & 0xffff) + (sum >> 16); 7902 7903 if (reass_hck_flags != 0) { 7904 hck_flags = reass_hck_flags; 7905 7906 IP_CKSUM_RECV_REASS(hck_flags, 7907 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7908 sum, reass_sum, cksum_err); 7909 } else { 7910 mp1 = mp->b_cont; 7911 7912 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7913 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7914 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7915 mp, mp1, cksum_err); 7916 } 7917 7918 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7919 IP6_STAT(ipst, ip6_in_sw_cksum); 7920 7921 if (cksum_err) { 7922 BUMP_MIB(ill->ill_ip_mib, 7923 udpIfStatsInCksumErrs); 7924 7925 if (hck_flags & HCK_FULLCKSUM) 7926 IP6_STAT(ipst, 7927 ip6_udp_in_full_hw_cksum_err); 7928 else if (hck_flags & HCK_PARTIALCKSUM) 7929 IP6_STAT(ipst, 7930 ip6_udp_in_part_hw_cksum_err); 7931 else 7932 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7933 7934 freemsg(first_mp); 7935 return; 7936 } 7937 goto udp_fanout; 7938 } 7939 case IPPROTO_ICMPV6: { 7940 uint16_t *up; 7941 uint32_t sum; 7942 uint_t hdr_len = pkt_len - remlen; 7943 7944 if (hada_mp != NULL) { 7945 ip0dbg(("icmp hada drop\n")); 7946 goto hada_drop; 7947 } 7948 7949 up = (uint16_t *)&ip6h->ip6_src; 7950 sum = htons(IPPROTO_ICMPV6 + remlen) + 7951 up[0] + up[1] + up[2] + up[3] + 7952 up[4] + up[5] + up[6] + up[7] + 7953 up[8] + up[9] + up[10] + up[11] + 7954 up[12] + up[13] + up[14] + up[15]; 7955 sum = (sum & 0xffff) + (sum >> 16); 7956 sum = IP_CSUM(mp, hdr_len, sum); 7957 if (sum != 0) { 7958 /* IPv6 ICMP checksum failed */ 7959 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7960 "failed %x\n", 7961 sum)); 7962 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7963 BUMP_MIB(ill->ill_icmp6_mib, 7964 ipv6IfIcmpInErrors); 7965 freemsg(first_mp); 7966 return; 7967 } 7968 7969 icmp_fanout: 7970 /* Check variable for testing applications */ 7971 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7972 freemsg(first_mp); 7973 return; 7974 } 7975 /* 7976 * Assume that there is always at least one conn for 7977 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7978 * where there is no conn. 7979 */ 7980 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7981 ASSERT(!IS_LOOPBACK((ill))); 7982 /* 7983 * In the multicast case, applications may have 7984 * joined the group from different zones, so we 7985 * need to deliver the packet to each of them. 7986 * Loop through the multicast memberships 7987 * structures (ilm) on the receive ill and send 7988 * a copy of the packet up each matching one. 7989 */ 7990 ILM_WALKER_HOLD(ill); 7991 for (ilm = ill->ill_ilm; ilm != NULL; 7992 ilm = ilm->ilm_next) { 7993 if (ilm->ilm_flags & ILM_DELETED) 7994 continue; 7995 if (!IN6_ARE_ADDR_EQUAL( 7996 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7997 continue; 7998 if (!ipif_lookup_zoneid(ill, 7999 ilm->ilm_zoneid, IPIF_UP, NULL)) 8000 continue; 8001 8002 first_mp1 = ip_copymsg(first_mp); 8003 if (first_mp1 == NULL) 8004 continue; 8005 icmp_inbound_v6(q, first_mp1, ill, 8006 hdr_len, mctl_present, 0, 8007 ilm->ilm_zoneid, dl_mp); 8008 } 8009 ILM_WALKER_RELE(ill); 8010 } else { 8011 first_mp1 = ip_copymsg(first_mp); 8012 if (first_mp1 != NULL) 8013 icmp_inbound_v6(q, first_mp1, ill, 8014 hdr_len, mctl_present, 0, zoneid, 8015 dl_mp); 8016 } 8017 } 8018 /* FALLTHRU */ 8019 default: { 8020 /* 8021 * Handle protocols with which IPv6 is less intimate. 8022 */ 8023 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 8024 8025 if (hada_mp != NULL) { 8026 ip0dbg(("default hada drop\n")); 8027 goto hada_drop; 8028 } 8029 8030 /* 8031 * Enable sending ICMP for "Unknown" nexthdr 8032 * case. i.e. where we did not FALLTHRU from 8033 * IPPROTO_ICMPV6 processing case above. 8034 * If we did FALLTHRU, then the packet has already been 8035 * processed for IPPF, don't process it again in 8036 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8037 * flags 8038 */ 8039 if (nexthdr != IPPROTO_ICMPV6) 8040 proto_flags |= IP_FF_SEND_ICMP; 8041 else 8042 proto_flags |= IP6_NO_IPPOLICY; 8043 8044 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8045 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8046 mctl_present, zoneid); 8047 return; 8048 } 8049 8050 case IPPROTO_DSTOPTS: { 8051 uint_t ehdrlen; 8052 uint8_t *optptr; 8053 ip6_dest_t *desthdr; 8054 8055 /* Check if AH is present. */ 8056 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8057 hada_mp, zoneid)) { 8058 ip0dbg(("dst early hada drop\n")); 8059 return; 8060 } 8061 8062 /* 8063 * Reinitialize pointers, as ipsec_early_ah_v6() does 8064 * complete pullups. We don't have to do more pullups 8065 * as a result. 8066 */ 8067 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8068 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8069 ip6h = (ip6_t *)mp->b_rptr; 8070 8071 if (remlen < MIN_EHDR_LEN) 8072 goto pkt_too_short; 8073 8074 desthdr = (ip6_dest_t *)whereptr; 8075 nexthdr = desthdr->ip6d_nxt; 8076 prev_nexthdr_offset = (uint_t)(whereptr - 8077 (uint8_t *)ip6h); 8078 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8079 if (remlen < ehdrlen) 8080 goto pkt_too_short; 8081 optptr = whereptr + 2; 8082 /* 8083 * Note: XXX This code does not seem to make 8084 * distinction between Destination Options Header 8085 * being before/after Routing Header which can 8086 * happen if we are at the end of source route. 8087 * This may become significant in future. 8088 * (No real significant Destination Options are 8089 * defined/implemented yet ). 8090 */ 8091 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8092 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 8093 case -1: 8094 /* 8095 * Packet has been consumed and any needed 8096 * ICMP errors sent. 8097 */ 8098 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8099 freemsg(hada_mp); 8100 return; 8101 case 0: 8102 /* No action needed continue */ 8103 break; 8104 case 1: 8105 /* 8106 * Unnexpected return value 8107 * (Router alert is a Hop-by-Hop option) 8108 */ 8109 #ifdef DEBUG 8110 panic("ip_rput_data_v6: router " 8111 "alert hbh opt indication in dest opt"); 8112 /*NOTREACHED*/ 8113 #else 8114 freemsg(hada_mp); 8115 freemsg(first_mp); 8116 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8117 return; 8118 #endif 8119 } 8120 used = ehdrlen; 8121 break; 8122 } 8123 case IPPROTO_FRAGMENT: { 8124 ip6_frag_t *fraghdr; 8125 size_t no_frag_hdr_len; 8126 8127 if (hada_mp != NULL) { 8128 ip0dbg(("frag hada drop\n")); 8129 goto hada_drop; 8130 } 8131 8132 ASSERT(first_mp == mp); 8133 if (remlen < sizeof (ip6_frag_t)) 8134 goto pkt_too_short; 8135 8136 if (mp->b_cont != NULL && 8137 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8138 if (!pullupmsg(mp, 8139 pkt_len - remlen + sizeof (ip6_frag_t))) { 8140 BUMP_MIB(ill->ill_ip_mib, 8141 ipIfStatsInDiscards); 8142 freemsg(mp); 8143 return; 8144 } 8145 hck_flags = 0; 8146 ip6h = (ip6_t *)mp->b_rptr; 8147 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8148 } 8149 8150 fraghdr = (ip6_frag_t *)whereptr; 8151 used = (uint_t)sizeof (ip6_frag_t); 8152 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8153 8154 /* 8155 * Invoke the CGTP (multirouting) filtering module to 8156 * process the incoming packet. Packets identified as 8157 * duplicates must be discarded. Filtering is active 8158 * only if the the ip_cgtp_filter ndd variable is 8159 * non-zero. 8160 */ 8161 if (ipst->ips_ip_cgtp_filter && 8162 ipst->ips_ip_cgtp_filter_ops != NULL) { 8163 int cgtp_flt_pkt; 8164 netstackid_t stackid; 8165 8166 stackid = ipst->ips_netstack->netstack_stackid; 8167 8168 cgtp_flt_pkt = 8169 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 8170 stackid, inill->ill_phyint->phyint_ifindex, 8171 ip6h, fraghdr); 8172 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8173 freemsg(mp); 8174 return; 8175 } 8176 } 8177 8178 /* Restore the flags */ 8179 DB_CKSUMFLAGS(mp) = hck_flags; 8180 8181 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8182 remlen - used, &prev_nexthdr_offset, 8183 &reass_sum, &reass_hck_flags); 8184 if (mp == NULL) { 8185 /* Reassembly is still pending */ 8186 return; 8187 } 8188 /* The first mblk are the headers before the frag hdr */ 8189 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8190 8191 first_mp = mp; /* mp has most likely changed! */ 8192 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8193 ip6h = (ip6_t *)mp->b_rptr; 8194 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8195 whereptr = mp->b_rptr + no_frag_hdr_len; 8196 remlen = ntohs(ip6h->ip6_plen) + 8197 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8198 pkt_len = msgdsize(mp); 8199 used = 0; 8200 break; 8201 } 8202 case IPPROTO_HOPOPTS: { 8203 if (hada_mp != NULL) { 8204 ip0dbg(("hop hada drop\n")); 8205 goto hada_drop; 8206 } 8207 /* 8208 * Illegal header sequence. 8209 * (Hop-by-hop headers are processed above 8210 * and required to immediately follow IPv6 header) 8211 */ 8212 icmp_param_problem_v6(WR(q), first_mp, 8213 ICMP6_PARAMPROB_NEXTHEADER, 8214 prev_nexthdr_offset, 8215 B_FALSE, B_FALSE, zoneid, ipst); 8216 return; 8217 } 8218 case IPPROTO_ROUTING: { 8219 uint_t ehdrlen; 8220 ip6_rthdr_t *rthdr; 8221 8222 /* Check if AH is present. */ 8223 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8224 hada_mp, zoneid)) { 8225 ip0dbg(("routing hada drop\n")); 8226 return; 8227 } 8228 8229 /* 8230 * Reinitialize pointers, as ipsec_early_ah_v6() does 8231 * complete pullups. We don't have to do more pullups 8232 * as a result. 8233 */ 8234 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8235 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8236 ip6h = (ip6_t *)mp->b_rptr; 8237 8238 if (remlen < MIN_EHDR_LEN) 8239 goto pkt_too_short; 8240 rthdr = (ip6_rthdr_t *)whereptr; 8241 nexthdr = rthdr->ip6r_nxt; 8242 prev_nexthdr_offset = (uint_t)(whereptr - 8243 (uint8_t *)ip6h); 8244 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8245 if (remlen < ehdrlen) 8246 goto pkt_too_short; 8247 if (rthdr->ip6r_segleft != 0) { 8248 /* Not end of source route */ 8249 if (ll_multicast) { 8250 BUMP_MIB(ill->ill_ip_mib, 8251 ipIfStatsForwProhibits); 8252 freemsg(hada_mp); 8253 freemsg(mp); 8254 return; 8255 } 8256 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8257 flags, hada_mp, dl_mp); 8258 return; 8259 } 8260 used = ehdrlen; 8261 break; 8262 } 8263 case IPPROTO_AH: 8264 case IPPROTO_ESP: { 8265 /* 8266 * Fast path for AH/ESP. If this is the first time 8267 * we are sending a datagram to AH/ESP, allocate 8268 * a IPSEC_IN message and prepend it. Otherwise, 8269 * just fanout. 8270 */ 8271 8272 ipsec_in_t *ii; 8273 int ipsec_rc; 8274 ipsec_stack_t *ipss; 8275 8276 ipss = ipst->ips_netstack->netstack_ipsec; 8277 if (!mctl_present) { 8278 ASSERT(first_mp == mp); 8279 first_mp = ipsec_in_alloc(B_FALSE, 8280 ipst->ips_netstack); 8281 if (first_mp == NULL) { 8282 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8283 "allocation failure.\n")); 8284 BUMP_MIB(ill->ill_ip_mib, 8285 ipIfStatsInDiscards); 8286 freemsg(mp); 8287 return; 8288 } 8289 /* 8290 * Store the ill_index so that when we come back 8291 * from IPSEC we ride on the same queue. 8292 */ 8293 ii = (ipsec_in_t *)first_mp->b_rptr; 8294 ii->ipsec_in_ill_index = 8295 ill->ill_phyint->phyint_ifindex; 8296 ii->ipsec_in_rill_index = 8297 ii->ipsec_in_ill_index; 8298 first_mp->b_cont = mp; 8299 /* 8300 * Cache hardware acceleration info. 8301 */ 8302 if (hada_mp != NULL) { 8303 IPSECHW_DEBUG(IPSECHW_PKT, 8304 ("ip_rput_data_v6: " 8305 "caching data attr.\n")); 8306 ii->ipsec_in_accelerated = B_TRUE; 8307 ii->ipsec_in_da = hada_mp; 8308 hada_mp = NULL; 8309 } 8310 } else { 8311 ii = (ipsec_in_t *)first_mp->b_rptr; 8312 } 8313 8314 if (!ipsec_loaded(ipss)) { 8315 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8316 zoneid, ipst); 8317 return; 8318 } 8319 8320 /* select inbound SA and have IPsec process the pkt */ 8321 if (nexthdr == IPPROTO_ESP) { 8322 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8323 ipst->ips_netstack); 8324 if (esph == NULL) 8325 return; 8326 ASSERT(ii->ipsec_in_esp_sa != NULL); 8327 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8328 NULL); 8329 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8330 first_mp, esph); 8331 } else { 8332 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8333 ipst->ips_netstack); 8334 if (ah == NULL) 8335 return; 8336 ASSERT(ii->ipsec_in_ah_sa != NULL); 8337 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8338 NULL); 8339 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8340 first_mp, ah); 8341 } 8342 8343 switch (ipsec_rc) { 8344 case IPSEC_STATUS_SUCCESS: 8345 break; 8346 case IPSEC_STATUS_FAILED: 8347 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8348 /* FALLTHRU */ 8349 case IPSEC_STATUS_PENDING: 8350 return; 8351 } 8352 /* we're done with IPsec processing, send it up */ 8353 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8354 return; 8355 } 8356 case IPPROTO_NONE: 8357 /* All processing is done. Count as "delivered". */ 8358 freemsg(hada_mp); 8359 freemsg(first_mp); 8360 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8361 return; 8362 } 8363 whereptr += used; 8364 ASSERT(remlen >= used); 8365 remlen -= used; 8366 } 8367 /* NOTREACHED */ 8368 8369 pkt_too_short: 8370 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8371 ip6_len, pkt_len, remlen)); 8372 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8373 freemsg(hada_mp); 8374 freemsg(first_mp); 8375 return; 8376 udp_fanout: 8377 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8378 connp = NULL; 8379 } else { 8380 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8381 ipst); 8382 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8383 CONN_DEC_REF(connp); 8384 connp = NULL; 8385 } 8386 } 8387 8388 if (connp == NULL) { 8389 uint32_t ports; 8390 8391 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8392 UDP_PORTS_OFFSET); 8393 IP6_STAT(ipst, ip6_udp_slow_path); 8394 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8395 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8396 zoneid); 8397 return; 8398 } 8399 8400 if (CONN_UDP_FLOWCTLD(connp)) { 8401 freemsg(first_mp); 8402 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8403 CONN_DEC_REF(connp); 8404 return; 8405 } 8406 8407 /* Initiate IPPF processing */ 8408 if (IP6_IN_IPP(flags, ipst)) { 8409 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8410 if (mp == NULL) { 8411 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8412 CONN_DEC_REF(connp); 8413 return; 8414 } 8415 } 8416 8417 if (connp->conn_ip_recvpktinfo || 8418 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8419 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8420 if (mp == NULL) { 8421 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8422 CONN_DEC_REF(connp); 8423 return; 8424 } 8425 } 8426 8427 IP6_STAT(ipst, ip6_udp_fast_path); 8428 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8429 8430 /* Send it upstream */ 8431 (connp->conn_recv)(connp, mp, NULL); 8432 8433 CONN_DEC_REF(connp); 8434 freemsg(hada_mp); 8435 return; 8436 8437 hada_drop: 8438 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8439 /* IPsec kstats: bump counter here */ 8440 freemsg(hada_mp); 8441 freemsg(first_mp); 8442 } 8443 8444 /* 8445 * Reassemble fragment. 8446 * When it returns a completed message the first mblk will only contain 8447 * the headers prior to the fragment header. 8448 * 8449 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8450 * of the preceding header. This is needed to patch the previous header's 8451 * nexthdr field when reassembly completes. 8452 */ 8453 static mblk_t * 8454 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8455 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8456 uint32_t *cksum_val, uint16_t *cksum_flags) 8457 { 8458 ill_t *ill = (ill_t *)q->q_ptr; 8459 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8460 uint16_t offset; 8461 boolean_t more_frags; 8462 uint8_t nexthdr = fraghdr->ip6f_nxt; 8463 in6_addr_t *v6dst_ptr; 8464 in6_addr_t *v6src_ptr; 8465 uint_t end; 8466 uint_t hdr_length; 8467 size_t count; 8468 ipf_t *ipf; 8469 ipf_t **ipfp; 8470 ipfb_t *ipfb; 8471 mblk_t *mp1; 8472 uint8_t ecn_info = 0; 8473 size_t msg_len; 8474 mblk_t *tail_mp; 8475 mblk_t *t_mp; 8476 boolean_t pruned = B_FALSE; 8477 uint32_t sum_val; 8478 uint16_t sum_flags; 8479 ip_stack_t *ipst = ill->ill_ipst; 8480 8481 if (cksum_val != NULL) 8482 *cksum_val = 0; 8483 if (cksum_flags != NULL) 8484 *cksum_flags = 0; 8485 8486 /* 8487 * We utilize hardware computed checksum info only for UDP since 8488 * IP fragmentation is a normal occurence for the protocol. In 8489 * addition, checksum offload support for IP fragments carrying 8490 * UDP payload is commonly implemented across network adapters. 8491 */ 8492 ASSERT(ill != NULL); 8493 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8494 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8495 mblk_t *mp1 = mp->b_cont; 8496 int32_t len; 8497 8498 /* Record checksum information from the packet */ 8499 sum_val = (uint32_t)DB_CKSUM16(mp); 8500 sum_flags = DB_CKSUMFLAGS(mp); 8501 8502 /* fragmented payload offset from beginning of mblk */ 8503 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8504 8505 if ((sum_flags & HCK_PARTIALCKSUM) && 8506 (mp1 == NULL || mp1->b_cont == NULL) && 8507 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8508 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8509 uint32_t adj; 8510 /* 8511 * Partial checksum has been calculated by hardware 8512 * and attached to the packet; in addition, any 8513 * prepended extraneous data is even byte aligned. 8514 * If any such data exists, we adjust the checksum; 8515 * this would also handle any postpended data. 8516 */ 8517 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8518 mp, mp1, len, adj); 8519 8520 /* One's complement subtract extraneous checksum */ 8521 if (adj >= sum_val) 8522 sum_val = ~(adj - sum_val) & 0xFFFF; 8523 else 8524 sum_val -= adj; 8525 } 8526 } else { 8527 sum_val = 0; 8528 sum_flags = 0; 8529 } 8530 8531 /* Clear hardware checksumming flag */ 8532 DB_CKSUMFLAGS(mp) = 0; 8533 8534 /* 8535 * Note: Fragment offset in header is in 8-octet units. 8536 * Clearing least significant 3 bits not only extracts 8537 * it but also gets it in units of octets. 8538 */ 8539 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8540 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8541 8542 /* 8543 * Is the more frags flag on and the payload length not a multiple 8544 * of eight? 8545 */ 8546 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8547 zoneid_t zoneid; 8548 8549 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8550 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8551 if (zoneid == ALL_ZONES) { 8552 freemsg(mp); 8553 return (NULL); 8554 } 8555 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8556 (uint32_t)((char *)&ip6h->ip6_plen - 8557 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8558 return (NULL); 8559 } 8560 8561 v6src_ptr = &ip6h->ip6_src; 8562 v6dst_ptr = &ip6h->ip6_dst; 8563 end = remlen; 8564 8565 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8566 end += offset; 8567 8568 /* 8569 * Would fragment cause reassembled packet to have a payload length 8570 * greater than IP_MAXPACKET - the max payload size? 8571 */ 8572 if (end > IP_MAXPACKET) { 8573 zoneid_t zoneid; 8574 8575 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8576 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8577 if (zoneid == ALL_ZONES) { 8578 freemsg(mp); 8579 return (NULL); 8580 } 8581 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8582 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8583 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8584 return (NULL); 8585 } 8586 8587 /* 8588 * This packet just has one fragment. Reassembly not 8589 * needed. 8590 */ 8591 if (!more_frags && offset == 0) { 8592 goto reass_done; 8593 } 8594 8595 /* 8596 * Drop the fragmented as early as possible, if 8597 * we don't have resource(s) to re-assemble. 8598 */ 8599 if (ipst->ips_ip_reass_queue_bytes == 0) { 8600 freemsg(mp); 8601 return (NULL); 8602 } 8603 8604 /* Record the ECN field info. */ 8605 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8606 /* 8607 * If this is not the first fragment, dump the unfragmentable 8608 * portion of the packet. 8609 */ 8610 if (offset) 8611 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8612 8613 /* 8614 * Fragmentation reassembly. Each ILL has a hash table for 8615 * queueing packets undergoing reassembly for all IPIFs 8616 * associated with the ILL. The hash is based on the packet 8617 * IP ident field. The ILL frag hash table was allocated 8618 * as a timer block at the time the ILL was created. Whenever 8619 * there is anything on the reassembly queue, the timer will 8620 * be running. 8621 */ 8622 msg_len = MBLKSIZE(mp); 8623 tail_mp = mp; 8624 while (tail_mp->b_cont != NULL) { 8625 tail_mp = tail_mp->b_cont; 8626 msg_len += MBLKSIZE(tail_mp); 8627 } 8628 /* 8629 * If the reassembly list for this ILL will get too big 8630 * prune it. 8631 */ 8632 8633 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8634 ipst->ips_ip_reass_queue_bytes) { 8635 ill_frag_prune(ill, 8636 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8637 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8638 pruned = B_TRUE; 8639 } 8640 8641 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8642 mutex_enter(&ipfb->ipfb_lock); 8643 8644 ipfp = &ipfb->ipfb_ipf; 8645 /* Try to find an existing fragment queue for this packet. */ 8646 for (;;) { 8647 ipf = ipfp[0]; 8648 if (ipf) { 8649 /* 8650 * It has to match on ident, source address, and 8651 * dest address. 8652 */ 8653 if (ipf->ipf_ident == ident && 8654 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8655 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8656 8657 /* 8658 * If we have received too many 8659 * duplicate fragments for this packet 8660 * free it. 8661 */ 8662 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8663 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8664 freemsg(mp); 8665 mutex_exit(&ipfb->ipfb_lock); 8666 return (NULL); 8667 } 8668 8669 break; 8670 } 8671 ipfp = &ipf->ipf_hash_next; 8672 continue; 8673 } 8674 8675 8676 /* 8677 * If we pruned the list, do we want to store this new 8678 * fragment?. We apply an optimization here based on the 8679 * fact that most fragments will be received in order. 8680 * So if the offset of this incoming fragment is zero, 8681 * it is the first fragment of a new packet. We will 8682 * keep it. Otherwise drop the fragment, as we have 8683 * probably pruned the packet already (since the 8684 * packet cannot be found). 8685 */ 8686 8687 if (pruned && offset != 0) { 8688 mutex_exit(&ipfb->ipfb_lock); 8689 freemsg(mp); 8690 return (NULL); 8691 } 8692 8693 /* New guy. Allocate a frag message. */ 8694 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8695 if (!mp1) { 8696 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8697 freemsg(mp); 8698 partial_reass_done: 8699 mutex_exit(&ipfb->ipfb_lock); 8700 return (NULL); 8701 } 8702 8703 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8704 /* 8705 * Too many fragmented packets in this hash bucket. 8706 * Free the oldest. 8707 */ 8708 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8709 } 8710 8711 mp1->b_cont = mp; 8712 8713 /* Initialize the fragment header. */ 8714 ipf = (ipf_t *)mp1->b_rptr; 8715 ipf->ipf_mp = mp1; 8716 ipf->ipf_ptphn = ipfp; 8717 ipfp[0] = ipf; 8718 ipf->ipf_hash_next = NULL; 8719 ipf->ipf_ident = ident; 8720 ipf->ipf_v6src = *v6src_ptr; 8721 ipf->ipf_v6dst = *v6dst_ptr; 8722 /* Record reassembly start time. */ 8723 ipf->ipf_timestamp = gethrestime_sec(); 8724 /* Record ipf generation and account for frag header */ 8725 ipf->ipf_gen = ill->ill_ipf_gen++; 8726 ipf->ipf_count = MBLKSIZE(mp1); 8727 ipf->ipf_protocol = nexthdr; 8728 ipf->ipf_nf_hdr_len = 0; 8729 ipf->ipf_prev_nexthdr_offset = 0; 8730 ipf->ipf_last_frag_seen = B_FALSE; 8731 ipf->ipf_ecn = ecn_info; 8732 ipf->ipf_num_dups = 0; 8733 ipfb->ipfb_frag_pkts++; 8734 ipf->ipf_checksum = 0; 8735 ipf->ipf_checksum_flags = 0; 8736 8737 /* Store checksum value in fragment header */ 8738 if (sum_flags != 0) { 8739 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8740 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8741 ipf->ipf_checksum = sum_val; 8742 ipf->ipf_checksum_flags = sum_flags; 8743 } 8744 8745 /* 8746 * We handle reassembly two ways. In the easy case, 8747 * where all the fragments show up in order, we do 8748 * minimal bookkeeping, and just clip new pieces on 8749 * the end. If we ever see a hole, then we go off 8750 * to ip_reassemble which has to mark the pieces and 8751 * keep track of the number of holes, etc. Obviously, 8752 * the point of having both mechanisms is so we can 8753 * handle the easy case as efficiently as possible. 8754 */ 8755 if (offset == 0) { 8756 /* Easy case, in-order reassembly so far. */ 8757 /* Update the byte count */ 8758 ipf->ipf_count += msg_len; 8759 ipf->ipf_tail_mp = tail_mp; 8760 /* 8761 * Keep track of next expected offset in 8762 * ipf_end. 8763 */ 8764 ipf->ipf_end = end; 8765 ipf->ipf_nf_hdr_len = hdr_length; 8766 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8767 } else { 8768 /* Hard case, hole at the beginning. */ 8769 ipf->ipf_tail_mp = NULL; 8770 /* 8771 * ipf_end == 0 means that we have given up 8772 * on easy reassembly. 8773 */ 8774 ipf->ipf_end = 0; 8775 8776 /* Forget checksum offload from now on */ 8777 ipf->ipf_checksum_flags = 0; 8778 8779 /* 8780 * ipf_hole_cnt is set by ip_reassemble. 8781 * ipf_count is updated by ip_reassemble. 8782 * No need to check for return value here 8783 * as we don't expect reassembly to complete or 8784 * fail for the first fragment itself. 8785 */ 8786 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8787 msg_len); 8788 } 8789 /* Update per ipfb and ill byte counts */ 8790 ipfb->ipfb_count += ipf->ipf_count; 8791 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8792 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8793 /* If the frag timer wasn't already going, start it. */ 8794 mutex_enter(&ill->ill_lock); 8795 ill_frag_timer_start(ill); 8796 mutex_exit(&ill->ill_lock); 8797 goto partial_reass_done; 8798 } 8799 8800 /* 8801 * If the packet's flag has changed (it could be coming up 8802 * from an interface different than the previous, therefore 8803 * possibly different checksum capability), then forget about 8804 * any stored checksum states. Otherwise add the value to 8805 * the existing one stored in the fragment header. 8806 */ 8807 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8808 sum_val += ipf->ipf_checksum; 8809 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8810 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8811 ipf->ipf_checksum = sum_val; 8812 } else if (ipf->ipf_checksum_flags != 0) { 8813 /* Forget checksum offload from now on */ 8814 ipf->ipf_checksum_flags = 0; 8815 } 8816 8817 /* 8818 * We have a new piece of a datagram which is already being 8819 * reassembled. Update the ECN info if all IP fragments 8820 * are ECN capable. If there is one which is not, clear 8821 * all the info. If there is at least one which has CE 8822 * code point, IP needs to report that up to transport. 8823 */ 8824 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8825 if (ecn_info == IPH_ECN_CE) 8826 ipf->ipf_ecn = IPH_ECN_CE; 8827 } else { 8828 ipf->ipf_ecn = IPH_ECN_NECT; 8829 } 8830 8831 if (offset && ipf->ipf_end == offset) { 8832 /* The new fragment fits at the end */ 8833 ipf->ipf_tail_mp->b_cont = mp; 8834 /* Update the byte count */ 8835 ipf->ipf_count += msg_len; 8836 /* Update per ipfb and ill byte counts */ 8837 ipfb->ipfb_count += msg_len; 8838 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8839 atomic_add_32(&ill->ill_frag_count, msg_len); 8840 if (more_frags) { 8841 /* More to come. */ 8842 ipf->ipf_end = end; 8843 ipf->ipf_tail_mp = tail_mp; 8844 goto partial_reass_done; 8845 } 8846 } else { 8847 /* 8848 * Go do the hard cases. 8849 * Call ip_reassemble(). 8850 */ 8851 int ret; 8852 8853 if (offset == 0) { 8854 if (ipf->ipf_prev_nexthdr_offset == 0) { 8855 ipf->ipf_nf_hdr_len = hdr_length; 8856 ipf->ipf_prev_nexthdr_offset = 8857 *prev_nexthdr_offset; 8858 } 8859 } 8860 /* Save current byte count */ 8861 count = ipf->ipf_count; 8862 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8863 8864 /* Count of bytes added and subtracted (freeb()ed) */ 8865 count = ipf->ipf_count - count; 8866 if (count) { 8867 /* Update per ipfb and ill byte counts */ 8868 ipfb->ipfb_count += count; 8869 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8870 atomic_add_32(&ill->ill_frag_count, count); 8871 } 8872 if (ret == IP_REASS_PARTIAL) { 8873 goto partial_reass_done; 8874 } else if (ret == IP_REASS_FAILED) { 8875 /* Reassembly failed. Free up all resources */ 8876 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8877 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8878 IP_REASS_SET_START(t_mp, 0); 8879 IP_REASS_SET_END(t_mp, 0); 8880 } 8881 freemsg(mp); 8882 goto partial_reass_done; 8883 } 8884 8885 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8886 } 8887 /* 8888 * We have completed reassembly. Unhook the frag header from 8889 * the reassembly list. 8890 * 8891 * Grab the unfragmentable header length next header value out 8892 * of the first fragment 8893 */ 8894 ASSERT(ipf->ipf_nf_hdr_len != 0); 8895 hdr_length = ipf->ipf_nf_hdr_len; 8896 8897 /* 8898 * Before we free the frag header, record the ECN info 8899 * to report back to the transport. 8900 */ 8901 ecn_info = ipf->ipf_ecn; 8902 8903 /* 8904 * Store the nextheader field in the header preceding the fragment 8905 * header 8906 */ 8907 nexthdr = ipf->ipf_protocol; 8908 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8909 ipfp = ipf->ipf_ptphn; 8910 8911 /* We need to supply these to caller */ 8912 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8913 sum_val = ipf->ipf_checksum; 8914 else 8915 sum_val = 0; 8916 8917 mp1 = ipf->ipf_mp; 8918 count = ipf->ipf_count; 8919 ipf = ipf->ipf_hash_next; 8920 if (ipf) 8921 ipf->ipf_ptphn = ipfp; 8922 ipfp[0] = ipf; 8923 atomic_add_32(&ill->ill_frag_count, -count); 8924 ASSERT(ipfb->ipfb_count >= count); 8925 ipfb->ipfb_count -= count; 8926 ipfb->ipfb_frag_pkts--; 8927 mutex_exit(&ipfb->ipfb_lock); 8928 /* Ditch the frag header. */ 8929 mp = mp1->b_cont; 8930 freeb(mp1); 8931 8932 /* 8933 * Make sure the packet is good by doing some sanity 8934 * check. If bad we can silentely drop the packet. 8935 */ 8936 reass_done: 8937 if (hdr_length < sizeof (ip6_frag_t)) { 8938 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8939 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8940 freemsg(mp); 8941 return (NULL); 8942 } 8943 8944 /* 8945 * Remove the fragment header from the initial header by 8946 * splitting the mblk into the non-fragmentable header and 8947 * everthing after the fragment extension header. This has the 8948 * side effect of putting all the headers that need destination 8949 * processing into the b_cont block-- on return this fact is 8950 * used in order to avoid having to look at the extensions 8951 * already processed. 8952 * 8953 * Note that this code assumes that the unfragmentable portion 8954 * of the header is in the first mblk and increments 8955 * the read pointer past it. If this assumption is broken 8956 * this code fails badly. 8957 */ 8958 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8959 mblk_t *nmp; 8960 8961 if (!(nmp = dupb(mp))) { 8962 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8963 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8964 freemsg(mp); 8965 return (NULL); 8966 } 8967 nmp->b_cont = mp->b_cont; 8968 mp->b_cont = nmp; 8969 nmp->b_rptr += hdr_length; 8970 } 8971 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8972 8973 ip6h = (ip6_t *)mp->b_rptr; 8974 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8975 8976 /* Restore original IP length in header. */ 8977 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8978 /* Record the ECN info. */ 8979 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8980 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8981 8982 /* Reassembly is successful; return checksum information if needed */ 8983 if (cksum_val != NULL) 8984 *cksum_val = sum_val; 8985 if (cksum_flags != NULL) 8986 *cksum_flags = sum_flags; 8987 8988 return (mp); 8989 } 8990 8991 /* 8992 * Walk through the options to see if there is a routing header. 8993 * If present get the destination which is the last address of 8994 * the option. 8995 */ 8996 in6_addr_t 8997 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8998 { 8999 uint8_t nexthdr; 9000 uint8_t *whereptr; 9001 ip6_hbh_t *hbhhdr; 9002 ip6_dest_t *dsthdr; 9003 ip6_rthdr0_t *rthdr; 9004 ip6_frag_t *fraghdr; 9005 int ehdrlen; 9006 int left; 9007 in6_addr_t *ap, rv; 9008 9009 if (is_fragment != NULL) 9010 *is_fragment = B_FALSE; 9011 9012 rv = ip6h->ip6_dst; 9013 9014 nexthdr = ip6h->ip6_nxt; 9015 whereptr = (uint8_t *)&ip6h[1]; 9016 for (;;) { 9017 9018 ASSERT(nexthdr != IPPROTO_RAW); 9019 switch (nexthdr) { 9020 case IPPROTO_HOPOPTS: 9021 hbhhdr = (ip6_hbh_t *)whereptr; 9022 nexthdr = hbhhdr->ip6h_nxt; 9023 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 9024 break; 9025 case IPPROTO_DSTOPTS: 9026 dsthdr = (ip6_dest_t *)whereptr; 9027 nexthdr = dsthdr->ip6d_nxt; 9028 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 9029 break; 9030 case IPPROTO_ROUTING: 9031 rthdr = (ip6_rthdr0_t *)whereptr; 9032 nexthdr = rthdr->ip6r0_nxt; 9033 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9034 9035 left = rthdr->ip6r0_segleft; 9036 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9037 rv = *(ap + left - 1); 9038 /* 9039 * If the caller doesn't care whether the packet 9040 * is a fragment or not, we can stop here since 9041 * we have our destination. 9042 */ 9043 if (is_fragment == NULL) 9044 goto done; 9045 break; 9046 case IPPROTO_FRAGMENT: 9047 fraghdr = (ip6_frag_t *)whereptr; 9048 nexthdr = fraghdr->ip6f_nxt; 9049 ehdrlen = sizeof (ip6_frag_t); 9050 if (is_fragment != NULL) 9051 *is_fragment = B_TRUE; 9052 goto done; 9053 default : 9054 goto done; 9055 } 9056 whereptr += ehdrlen; 9057 } 9058 9059 done: 9060 return (rv); 9061 } 9062 9063 /* 9064 * ip_source_routed_v6: 9065 * This function is called by redirect code in ip_rput_data_v6 to 9066 * know whether this packet is source routed through this node i.e 9067 * whether this node (router) is part of the journey. This 9068 * function is called under two cases : 9069 * 9070 * case 1 : Routing header was processed by this node and 9071 * ip_process_rthdr replaced ip6_dst with the next hop 9072 * and we are forwarding the packet to the next hop. 9073 * 9074 * case 2 : Routing header was not processed by this node and we 9075 * are just forwarding the packet. 9076 * 9077 * For case (1) we don't want to send redirects. For case(2) we 9078 * want to send redirects. 9079 */ 9080 static boolean_t 9081 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 9082 { 9083 uint8_t nexthdr; 9084 in6_addr_t *addrptr; 9085 ip6_rthdr0_t *rthdr; 9086 uint8_t numaddr; 9087 ip6_hbh_t *hbhhdr; 9088 uint_t ehdrlen; 9089 uint8_t *byteptr; 9090 9091 ip2dbg(("ip_source_routed_v6\n")); 9092 nexthdr = ip6h->ip6_nxt; 9093 ehdrlen = IPV6_HDR_LEN; 9094 9095 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9096 while (nexthdr == IPPROTO_HOPOPTS || 9097 nexthdr == IPPROTO_DSTOPTS) { 9098 byteptr = (uint8_t *)ip6h + ehdrlen; 9099 /* 9100 * Check if we have already processed 9101 * packets or we are just a forwarding 9102 * router which only pulled up msgs up 9103 * to IPV6HDR and one HBH ext header 9104 */ 9105 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9106 ip2dbg(("ip_source_routed_v6: Extension" 9107 " headers not processed\n")); 9108 return (B_FALSE); 9109 } 9110 hbhhdr = (ip6_hbh_t *)byteptr; 9111 nexthdr = hbhhdr->ip6h_nxt; 9112 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9113 } 9114 switch (nexthdr) { 9115 case IPPROTO_ROUTING: 9116 byteptr = (uint8_t *)ip6h + ehdrlen; 9117 /* 9118 * If for some reason, we haven't pulled up 9119 * the routing hdr data mblk, then we must 9120 * not have processed it at all. So for sure 9121 * we are not part of the source routed journey. 9122 */ 9123 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9124 ip2dbg(("ip_source_routed_v6: Routing" 9125 " header not processed\n")); 9126 return (B_FALSE); 9127 } 9128 rthdr = (ip6_rthdr0_t *)byteptr; 9129 /* 9130 * Either we are an intermediate router or the 9131 * last hop before destination and we have 9132 * already processed the routing header. 9133 * If segment_left is greater than or equal to zero, 9134 * then we must be the (numaddr - segleft) entry 9135 * of the routing header. Although ip6r0_segleft 9136 * is a unit8_t variable, we still check for zero 9137 * or greater value, if in case the data type 9138 * is changed someday in future. 9139 */ 9140 if (rthdr->ip6r0_segleft > 0 || 9141 rthdr->ip6r0_segleft == 0) { 9142 ire_t *ire = NULL; 9143 9144 numaddr = rthdr->ip6r0_len / 2; 9145 addrptr = (in6_addr_t *)((char *)rthdr + 9146 sizeof (*rthdr)); 9147 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9148 if (addrptr != NULL) { 9149 ire = ire_ctable_lookup_v6(addrptr, NULL, 9150 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9151 MATCH_IRE_TYPE, 9152 ipst); 9153 if (ire != NULL) { 9154 ire_refrele(ire); 9155 return (B_TRUE); 9156 } 9157 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9158 } 9159 } 9160 /* FALLTHRU */ 9161 default: 9162 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9163 return (B_FALSE); 9164 } 9165 } 9166 9167 /* 9168 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9169 * Assumes that the following set of headers appear in the first 9170 * mblk: 9171 * ip6i_t (if present) CAN also appear as a separate mblk. 9172 * ip6_t 9173 * Any extension headers 9174 * TCP/UDP/SCTP header (if present) 9175 * The routine can handle an ICMPv6 header that is not in the first mblk. 9176 * 9177 * The order to determine the outgoing interface is as follows: 9178 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9179 * 2. If conn_nofailover_ill is set then use that ill. 9180 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9181 * 4. If q is an ill queue and (link local or multicast destination) then 9182 * use that ill. 9183 * 5. If IPV6_BOUND_IF has been set use that ill. 9184 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9185 * look for the best IRE match for the unspecified group to determine 9186 * the ill. 9187 * 7. For unicast: Just do an IRE lookup for the best match. 9188 * 9189 * arg2 is always a queue_t *. 9190 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9191 * the zoneid. 9192 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9193 */ 9194 void 9195 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9196 { 9197 conn_t *connp = NULL; 9198 queue_t *q = (queue_t *)arg2; 9199 ire_t *ire = NULL; 9200 ire_t *sctp_ire = NULL; 9201 ip6_t *ip6h; 9202 in6_addr_t *v6dstp; 9203 ill_t *ill = NULL; 9204 ipif_t *ipif; 9205 ip6i_t *ip6i; 9206 int cksum_request; /* -1 => normal. */ 9207 /* 1 => Skip TCP/UDP/SCTP checksum */ 9208 /* Otherwise contains insert offset for checksum */ 9209 int unspec_src; 9210 boolean_t do_outrequests; /* Increment OutRequests? */ 9211 mib2_ipIfStatsEntry_t *mibptr; 9212 int match_flags = MATCH_IRE_ILL_GROUP; 9213 boolean_t attach_if = B_FALSE; 9214 mblk_t *first_mp; 9215 boolean_t mctl_present; 9216 ipsec_out_t *io; 9217 boolean_t drop_if_delayed = B_FALSE; 9218 boolean_t multirt_need_resolve = B_FALSE; 9219 mblk_t *copy_mp = NULL; 9220 int err = 0; 9221 int ip6i_flags = 0; 9222 zoneid_t zoneid; 9223 ill_t *saved_ill = NULL; 9224 boolean_t conn_lock_held; 9225 boolean_t need_decref = B_FALSE; 9226 ip_stack_t *ipst; 9227 9228 if (q->q_next != NULL) { 9229 ill = (ill_t *)q->q_ptr; 9230 ipst = ill->ill_ipst; 9231 } else { 9232 connp = (conn_t *)arg; 9233 ASSERT(connp != NULL); 9234 ipst = connp->conn_netstack->netstack_ip; 9235 } 9236 9237 /* 9238 * Highest bit in version field is Reachability Confirmation bit 9239 * used by NUD in ip_xmit_v6(). 9240 */ 9241 #ifdef _BIG_ENDIAN 9242 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9243 #else 9244 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9245 #endif 9246 9247 /* 9248 * M_CTL comes from 6 places 9249 * 9250 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9251 * both V4 and V6 datagrams. 9252 * 9253 * 2) AH/ESP sends down M_CTL after doing their job with both 9254 * V4 and V6 datagrams. 9255 * 9256 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9257 * attached. 9258 * 9259 * 4) Notifications from an external resolver (for XRESOLV ifs) 9260 * 9261 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9262 * IPsec hardware acceleration support. 9263 * 9264 * 6) TUN_HELLO. 9265 * 9266 * We need to handle (1)'s IPv6 case and (3) here. For the 9267 * IPv4 case in (1), and (2), IPSEC processing has already 9268 * started. The code in ip_wput() already knows how to handle 9269 * continuing IPSEC processing (for IPv4 and IPv6). All other 9270 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9271 * for handling. 9272 */ 9273 first_mp = mp; 9274 mctl_present = B_FALSE; 9275 io = NULL; 9276 9277 /* Multidata transmit? */ 9278 if (DB_TYPE(mp) == M_MULTIDATA) { 9279 /* 9280 * We should never get here, since all Multidata messages 9281 * originating from tcp should have been directed over to 9282 * tcp_multisend() in the first place. 9283 */ 9284 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9285 freemsg(mp); 9286 return; 9287 } else if (DB_TYPE(mp) == M_CTL) { 9288 uint32_t mctltype = 0; 9289 uint32_t mlen = MBLKL(first_mp); 9290 9291 mp = mp->b_cont; 9292 mctl_present = B_TRUE; 9293 io = (ipsec_out_t *)first_mp->b_rptr; 9294 9295 /* 9296 * Validate this M_CTL message. The only three types of 9297 * M_CTL messages we expect to see in this code path are 9298 * ipsec_out_t or ipsec_in_t structures (allocated as 9299 * ipsec_info_t unions), or ipsec_ctl_t structures. 9300 * The ipsec_out_type and ipsec_in_type overlap in the two 9301 * data structures, and they are either set to IPSEC_OUT 9302 * or IPSEC_IN depending on which data structure it is. 9303 * ipsec_ctl_t is an IPSEC_CTL. 9304 * 9305 * All other M_CTL messages are sent to ip_wput_nondata() 9306 * for handling. 9307 */ 9308 if (mlen >= sizeof (io->ipsec_out_type)) 9309 mctltype = io->ipsec_out_type; 9310 9311 if ((mlen == sizeof (ipsec_ctl_t)) && 9312 (mctltype == IPSEC_CTL)) { 9313 ip_output(arg, first_mp, arg2, caller); 9314 return; 9315 } 9316 9317 if ((mlen < sizeof (ipsec_info_t)) || 9318 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9319 mp == NULL) { 9320 ip_wput_nondata(NULL, q, first_mp, NULL); 9321 return; 9322 } 9323 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9324 if (q->q_next == NULL) { 9325 ip6h = (ip6_t *)mp->b_rptr; 9326 /* 9327 * For a freshly-generated TCP dgram that needs IPV6 9328 * processing, don't call ip_wput immediately. We can 9329 * tell this by the ipsec_out_proc_begin. In-progress 9330 * IPSEC_OUT messages have proc_begin set to TRUE, 9331 * and we want to send all IPSEC_IN messages to 9332 * ip_wput() for IPsec processing or finishing. 9333 */ 9334 if (mctltype == IPSEC_IN || 9335 IPVER(ip6h) != IPV6_VERSION || 9336 io->ipsec_out_proc_begin) { 9337 mibptr = &ipst->ips_ip6_mib; 9338 goto notv6; 9339 } 9340 } 9341 } else if (DB_TYPE(mp) != M_DATA) { 9342 ip_wput_nondata(NULL, q, mp, NULL); 9343 return; 9344 } 9345 9346 ip6h = (ip6_t *)mp->b_rptr; 9347 9348 if (IPVER(ip6h) != IPV6_VERSION) { 9349 mibptr = &ipst->ips_ip6_mib; 9350 goto notv6; 9351 } 9352 9353 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9354 (connp == NULL || !connp->conn_ulp_labeled)) { 9355 if (connp != NULL) { 9356 ASSERT(CONN_CRED(connp) != NULL); 9357 err = tsol_check_label_v6(BEST_CRED(mp, connp), 9358 &mp, connp->conn_mac_exempt, ipst); 9359 } else if (DB_CRED(mp) != NULL) { 9360 err = tsol_check_label_v6(DB_CRED(mp), 9361 &mp, B_FALSE, ipst); 9362 } 9363 if (mctl_present) 9364 first_mp->b_cont = mp; 9365 else 9366 first_mp = mp; 9367 if (err != 0) { 9368 DTRACE_PROBE3( 9369 tsol_ip_log_drop_checklabel_ip6, char *, 9370 "conn(1), failed to check/update mp(2)", 9371 conn_t, connp, mblk_t, mp); 9372 freemsg(first_mp); 9373 return; 9374 } 9375 ip6h = (ip6_t *)mp->b_rptr; 9376 } 9377 if (q->q_next != NULL) { 9378 /* 9379 * We don't know if this ill will be used for IPv6 9380 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9381 * ipif_set_values() sets the ill_isv6 flag to true if 9382 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9383 * just drop the packet. 9384 */ 9385 if (!ill->ill_isv6) { 9386 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9387 "ILLF_IPV6 was set\n")); 9388 freemsg(first_mp); 9389 return; 9390 } 9391 /* For uniformity do a refhold */ 9392 mutex_enter(&ill->ill_lock); 9393 if (!ILL_CAN_LOOKUP(ill)) { 9394 mutex_exit(&ill->ill_lock); 9395 freemsg(first_mp); 9396 return; 9397 } 9398 ill_refhold_locked(ill); 9399 mutex_exit(&ill->ill_lock); 9400 mibptr = ill->ill_ip_mib; 9401 9402 ASSERT(mibptr != NULL); 9403 unspec_src = 0; 9404 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9405 do_outrequests = B_FALSE; 9406 zoneid = (zoneid_t)(uintptr_t)arg; 9407 } else { 9408 ASSERT(connp != NULL); 9409 zoneid = connp->conn_zoneid; 9410 9411 /* is queue flow controlled? */ 9412 if ((q->q_first || connp->conn_draining) && 9413 (caller == IP_WPUT)) { 9414 /* 9415 * 1) TCP sends down M_CTL for detached connections. 9416 * 2) AH/ESP sends down M_CTL. 9417 * 9418 * We don't flow control either of the above. Only 9419 * UDP and others are flow controlled for which we 9420 * can't have a M_CTL. 9421 */ 9422 ASSERT(first_mp == mp); 9423 (void) putq(q, mp); 9424 return; 9425 } 9426 mibptr = &ipst->ips_ip6_mib; 9427 unspec_src = connp->conn_unspec_src; 9428 do_outrequests = B_TRUE; 9429 if (mp->b_flag & MSGHASREF) { 9430 mp->b_flag &= ~MSGHASREF; 9431 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9432 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9433 need_decref = B_TRUE; 9434 } 9435 9436 /* 9437 * If there is a policy, try to attach an ipsec_out in 9438 * the front. At the end, first_mp either points to a 9439 * M_DATA message or IPSEC_OUT message linked to a 9440 * M_DATA message. We have to do it now as we might 9441 * lose the "conn" if we go through ip_newroute. 9442 */ 9443 if (!mctl_present && 9444 (connp->conn_out_enforce_policy || 9445 connp->conn_latch != NULL)) { 9446 ASSERT(first_mp == mp); 9447 /* XXX Any better way to get the protocol fast ? */ 9448 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9449 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9450 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9451 if (need_decref) 9452 CONN_DEC_REF(connp); 9453 return; 9454 } else { 9455 ASSERT(mp->b_datap->db_type == M_CTL); 9456 first_mp = mp; 9457 mp = mp->b_cont; 9458 mctl_present = B_TRUE; 9459 io = (ipsec_out_t *)first_mp->b_rptr; 9460 } 9461 } 9462 } 9463 9464 /* check for alignment and full IPv6 header */ 9465 if (!OK_32PTR((uchar_t *)ip6h) || 9466 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9467 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9468 if (do_outrequests) 9469 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9470 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9471 freemsg(first_mp); 9472 if (ill != NULL) 9473 ill_refrele(ill); 9474 if (need_decref) 9475 CONN_DEC_REF(connp); 9476 return; 9477 } 9478 v6dstp = &ip6h->ip6_dst; 9479 cksum_request = -1; 9480 ip6i = NULL; 9481 9482 /* 9483 * Once neighbor discovery has completed, ndp_process() will provide 9484 * locally generated packets for which processing can be reattempted. 9485 * In these cases, connp is NULL and the original zone is part of a 9486 * prepended ipsec_out_t. 9487 */ 9488 if (io != NULL) { 9489 /* 9490 * When coming from icmp_input_v6, the zoneid might not match 9491 * for the loopback case, because inside icmp_input_v6 the 9492 * queue_t is a conn queue from the sending side. 9493 */ 9494 zoneid = io->ipsec_out_zoneid; 9495 ASSERT(zoneid != ALL_ZONES); 9496 } 9497 9498 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9499 /* 9500 * This is an ip6i_t header followed by an ip6_hdr. 9501 * Check which fields are set. 9502 * 9503 * When the packet comes from a transport we should have 9504 * all needed headers in the first mblk. However, when 9505 * going through ip_newroute*_v6 the ip6i might be in 9506 * a separate mblk when we return here. In that case 9507 * we pullup everything to ensure that extension and transport 9508 * headers "stay" in the first mblk. 9509 */ 9510 ip6i = (ip6i_t *)ip6h; 9511 ip6i_flags = ip6i->ip6i_flags; 9512 9513 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9514 ((mp->b_wptr - (uchar_t *)ip6i) >= 9515 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9516 9517 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9518 if (!pullupmsg(mp, -1)) { 9519 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9520 if (do_outrequests) { 9521 BUMP_MIB(mibptr, 9522 ipIfStatsHCOutRequests); 9523 } 9524 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9525 freemsg(first_mp); 9526 if (ill != NULL) 9527 ill_refrele(ill); 9528 if (need_decref) 9529 CONN_DEC_REF(connp); 9530 return; 9531 } 9532 ip6h = (ip6_t *)mp->b_rptr; 9533 v6dstp = &ip6h->ip6_dst; 9534 ip6i = (ip6i_t *)ip6h; 9535 } 9536 ip6h = (ip6_t *)&ip6i[1]; 9537 9538 /* 9539 * Advance rptr past the ip6i_t to get ready for 9540 * transmitting the packet. However, if the packet gets 9541 * passed to ip_newroute*_v6 then rptr is moved back so 9542 * that the ip6i_t header can be inspected when the 9543 * packet comes back here after passing through 9544 * ire_add_then_send. 9545 */ 9546 mp->b_rptr = (uchar_t *)ip6h; 9547 9548 /* 9549 * IP6I_ATTACH_IF is set in this function when we had a 9550 * conn and it was either bound to the IPFF_NOFAILOVER address 9551 * or IPV6_BOUND_PIF was set. These options override other 9552 * options that set the ifindex. We come here with 9553 * IP6I_ATTACH_IF set when we can't find the ire and 9554 * ip_newroute_v6 is feeding the packet for second time. 9555 */ 9556 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9557 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9558 ASSERT(ip6i->ip6i_ifindex != 0); 9559 if (ill != NULL) 9560 ill_refrele(ill); 9561 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9562 NULL, NULL, NULL, NULL, ipst); 9563 if (ill == NULL) { 9564 if (do_outrequests) { 9565 BUMP_MIB(mibptr, 9566 ipIfStatsHCOutRequests); 9567 } 9568 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9569 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9570 ip6i->ip6i_ifindex)); 9571 if (need_decref) 9572 CONN_DEC_REF(connp); 9573 freemsg(first_mp); 9574 return; 9575 } 9576 mibptr = ill->ill_ip_mib; 9577 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9578 /* 9579 * Preserve the index so that when we return 9580 * from IPSEC processing, we know where to 9581 * send the packet. 9582 */ 9583 if (mctl_present) { 9584 ASSERT(io != NULL); 9585 io->ipsec_out_ill_index = 9586 ip6i->ip6i_ifindex; 9587 } 9588 } 9589 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9590 /* 9591 * This is a multipathing probe packet that has 9592 * been delayed in ND resolution. Drop the 9593 * packet for the reasons mentioned in 9594 * nce_queue_mp() 9595 */ 9596 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9597 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9598 freemsg(first_mp); 9599 ill_refrele(ill); 9600 if (need_decref) 9601 CONN_DEC_REF(connp); 9602 return; 9603 } 9604 } 9605 } 9606 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9607 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9608 9609 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9610 if (secpolicy_net_rawaccess(cr) != 0) { 9611 /* 9612 * Use IPCL_ZONEID to honor SO_ALLZONES. 9613 */ 9614 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9615 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9616 NULL, connp != NULL ? 9617 IPCL_ZONEID(connp) : zoneid, NULL, 9618 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9619 if (ire == NULL) { 9620 if (do_outrequests) 9621 BUMP_MIB(mibptr, 9622 ipIfStatsHCOutRequests); 9623 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9624 ip1dbg(("ip_wput_v6: bad source " 9625 "addr\n")); 9626 freemsg(first_mp); 9627 if (ill != NULL) 9628 ill_refrele(ill); 9629 if (need_decref) 9630 CONN_DEC_REF(connp); 9631 return; 9632 } 9633 ire_refrele(ire); 9634 } 9635 /* No need to verify again when using ip_newroute */ 9636 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9637 } 9638 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9639 /* 9640 * Make sure they match since ip_newroute*_v6 etc might 9641 * (unknown to them) inspect ip6i_nexthop when 9642 * they think they access ip6_dst. 9643 */ 9644 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9645 } 9646 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9647 cksum_request = 1; 9648 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9649 cksum_request = ip6i->ip6i_checksum_off; 9650 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9651 unspec_src = 1; 9652 9653 if (do_outrequests && ill != NULL) { 9654 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9655 do_outrequests = B_FALSE; 9656 } 9657 /* 9658 * Store ip6i_t info that we need after we come back 9659 * from IPSEC processing. 9660 */ 9661 if (mctl_present) { 9662 ASSERT(io != NULL); 9663 io->ipsec_out_unspec_src = unspec_src; 9664 } 9665 } 9666 if (connp != NULL && connp->conn_dontroute) 9667 ip6h->ip6_hops = 1; 9668 9669 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9670 goto ipv6multicast; 9671 9672 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9673 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9674 ill_t *conn_outgoing_pill; 9675 9676 conn_outgoing_pill = conn_get_held_ill(connp, 9677 &connp->conn_outgoing_pill, &err); 9678 if (err == ILL_LOOKUP_FAILED) { 9679 if (ill != NULL) 9680 ill_refrele(ill); 9681 if (need_decref) 9682 CONN_DEC_REF(connp); 9683 freemsg(first_mp); 9684 return; 9685 } 9686 if (conn_outgoing_pill != NULL) { 9687 if (ill != NULL) 9688 ill_refrele(ill); 9689 ill = conn_outgoing_pill; 9690 attach_if = B_TRUE; 9691 match_flags = MATCH_IRE_ILL; 9692 mibptr = ill->ill_ip_mib; 9693 9694 /* 9695 * Check if we need an ire that will not be 9696 * looked up by anybody else i.e. HIDDEN. 9697 */ 9698 if (ill_is_probeonly(ill)) 9699 match_flags |= MATCH_IRE_MARK_HIDDEN; 9700 goto send_from_ill; 9701 } 9702 } 9703 9704 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9705 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9706 ill_t *conn_nofailover_ill; 9707 9708 conn_nofailover_ill = conn_get_held_ill(connp, 9709 &connp->conn_nofailover_ill, &err); 9710 if (err == ILL_LOOKUP_FAILED) { 9711 if (ill != NULL) 9712 ill_refrele(ill); 9713 if (need_decref) 9714 CONN_DEC_REF(connp); 9715 freemsg(first_mp); 9716 return; 9717 } 9718 if (conn_nofailover_ill != NULL) { 9719 if (ill != NULL) 9720 ill_refrele(ill); 9721 ill = conn_nofailover_ill; 9722 attach_if = B_TRUE; 9723 /* 9724 * Assumes that ipc_nofailover_ill is used only for 9725 * multipathing probe packets. These packets are better 9726 * dropped, if they are delayed in ND resolution, for 9727 * the reasons described in nce_queue_mp(). 9728 * IP6I_DROP_IFDELAYED will be set later on in this 9729 * function for this packet. 9730 */ 9731 drop_if_delayed = B_TRUE; 9732 match_flags = MATCH_IRE_ILL; 9733 mibptr = ill->ill_ip_mib; 9734 9735 /* 9736 * Check if we need an ire that will not be 9737 * looked up by anybody else i.e. HIDDEN. 9738 */ 9739 if (ill_is_probeonly(ill)) 9740 match_flags |= MATCH_IRE_MARK_HIDDEN; 9741 goto send_from_ill; 9742 } 9743 } 9744 9745 /* 9746 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9747 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9748 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9749 */ 9750 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9751 ASSERT(ip6i->ip6i_ifindex != 0); 9752 attach_if = B_TRUE; 9753 ASSERT(ill != NULL); 9754 match_flags = MATCH_IRE_ILL; 9755 9756 /* 9757 * Check if we need an ire that will not be 9758 * looked up by anybody else i.e. HIDDEN. 9759 */ 9760 if (ill_is_probeonly(ill)) 9761 match_flags |= MATCH_IRE_MARK_HIDDEN; 9762 goto send_from_ill; 9763 } 9764 9765 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9766 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9767 ASSERT(ill != NULL); 9768 goto send_from_ill; 9769 } 9770 9771 /* 9772 * 4. If q is an ill queue and (link local or multicast destination) 9773 * then use that ill. 9774 */ 9775 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9776 goto send_from_ill; 9777 } 9778 9779 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9780 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9781 ill_t *conn_outgoing_ill; 9782 9783 conn_outgoing_ill = conn_get_held_ill(connp, 9784 &connp->conn_outgoing_ill, &err); 9785 if (err == ILL_LOOKUP_FAILED) { 9786 if (ill != NULL) 9787 ill_refrele(ill); 9788 if (need_decref) 9789 CONN_DEC_REF(connp); 9790 freemsg(first_mp); 9791 return; 9792 } 9793 if (ill != NULL) 9794 ill_refrele(ill); 9795 ill = conn_outgoing_ill; 9796 mibptr = ill->ill_ip_mib; 9797 goto send_from_ill; 9798 } 9799 9800 /* 9801 * 6. For unicast: Just do an IRE lookup for the best match. 9802 * If we get here for a link-local address it is rather random 9803 * what interface we pick on a multihomed host. 9804 * *If* there is an IRE_CACHE (and the link-local address 9805 * isn't duplicated on multi links) this will find the IRE_CACHE. 9806 * Otherwise it will use one of the matching IRE_INTERFACE routes 9807 * for the link-local prefix. Hence, applications 9808 * *should* be encouraged to specify an outgoing interface when sending 9809 * to a link local address. 9810 */ 9811 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9812 !connp->conn_fully_bound)) { 9813 /* 9814 * We cache IRE_CACHEs to avoid lookups. We don't do 9815 * this for the tcp global queue and listen end point 9816 * as it does not really have a real destination to 9817 * talk to. 9818 */ 9819 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp), 9820 ipst); 9821 } else { 9822 /* 9823 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9824 * grab a lock here to check for CONDEMNED as it is okay 9825 * to send a packet or two with the IRE_CACHE that is going 9826 * away. 9827 */ 9828 mutex_enter(&connp->conn_lock); 9829 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9830 if (ire != NULL && 9831 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9832 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9833 9834 IRE_REFHOLD(ire); 9835 mutex_exit(&connp->conn_lock); 9836 9837 } else { 9838 boolean_t cached = B_FALSE; 9839 9840 connp->conn_ire_cache = NULL; 9841 mutex_exit(&connp->conn_lock); 9842 /* Release the old ire */ 9843 if (ire != NULL && sctp_ire == NULL) 9844 IRE_REFRELE_NOTR(ire); 9845 9846 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9847 MBLK_GETLABEL(mp), ipst); 9848 if (ire != NULL) { 9849 IRE_REFHOLD_NOTR(ire); 9850 9851 mutex_enter(&connp->conn_lock); 9852 if (CONN_CACHE_IRE(connp) && 9853 (connp->conn_ire_cache == NULL)) { 9854 rw_enter(&ire->ire_bucket->irb_lock, 9855 RW_READER); 9856 if (!(ire->ire_marks & 9857 IRE_MARK_CONDEMNED)) { 9858 connp->conn_ire_cache = ire; 9859 cached = B_TRUE; 9860 } 9861 rw_exit(&ire->ire_bucket->irb_lock); 9862 } 9863 mutex_exit(&connp->conn_lock); 9864 9865 /* 9866 * We can continue to use the ire but since it 9867 * was not cached, we should drop the extra 9868 * reference. 9869 */ 9870 if (!cached) 9871 IRE_REFRELE_NOTR(ire); 9872 } 9873 } 9874 } 9875 9876 if (ire != NULL) { 9877 if (do_outrequests) { 9878 /* Handle IRE_LOCAL's that might appear here */ 9879 if (ire->ire_type == IRE_CACHE) { 9880 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9881 ill_ip_mib; 9882 } else { 9883 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9884 } 9885 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9886 } 9887 ASSERT(!attach_if); 9888 9889 /* 9890 * Check if the ire has the RTF_MULTIRT flag, inherited 9891 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9892 */ 9893 if (ire->ire_flags & RTF_MULTIRT) { 9894 /* 9895 * Force hop limit of multirouted packets if required. 9896 * The hop limit of such packets is bounded by the 9897 * ip_multirt_ttl ndd variable. 9898 * NDP packets must have a hop limit of 255; don't 9899 * change the hop limit in that case. 9900 */ 9901 if ((ipst->ips_ip_multirt_ttl > 0) && 9902 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9903 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9904 if (ip_debug > 3) { 9905 ip2dbg(("ip_wput_v6: forcing multirt " 9906 "hop limit to %d (was %d) ", 9907 ipst->ips_ip_multirt_ttl, 9908 ip6h->ip6_hops)); 9909 pr_addr_dbg("v6dst %s\n", AF_INET6, 9910 &ire->ire_addr_v6); 9911 } 9912 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9913 } 9914 9915 /* 9916 * We look at this point if there are pending 9917 * unresolved routes. ire_multirt_need_resolve_v6() 9918 * checks in O(n) that all IRE_OFFSUBNET ire 9919 * entries for the packet's destination and 9920 * flagged RTF_MULTIRT are currently resolved. 9921 * If some remain unresolved, we do a copy 9922 * of the current message. It will be used 9923 * to initiate additional route resolutions. 9924 */ 9925 multirt_need_resolve = 9926 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9927 MBLK_GETLABEL(first_mp), ipst); 9928 ip2dbg(("ip_wput_v6: ire %p, " 9929 "multirt_need_resolve %d, first_mp %p\n", 9930 (void *)ire, multirt_need_resolve, 9931 (void *)first_mp)); 9932 if (multirt_need_resolve) { 9933 copy_mp = copymsg(first_mp); 9934 if (copy_mp != NULL) { 9935 MULTIRT_DEBUG_TAG(copy_mp); 9936 } 9937 } 9938 } 9939 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9940 connp, caller, 0, ip6i_flags, zoneid); 9941 if (need_decref) { 9942 CONN_DEC_REF(connp); 9943 connp = NULL; 9944 } 9945 IRE_REFRELE(ire); 9946 9947 /* 9948 * Try to resolve another multiroute if 9949 * ire_multirt_need_resolve_v6() deemed it necessary. 9950 * copy_mp will be consumed (sent or freed) by 9951 * ip_newroute_v6(). 9952 */ 9953 if (copy_mp != NULL) { 9954 if (mctl_present) { 9955 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9956 } else { 9957 ip6h = (ip6_t *)copy_mp->b_rptr; 9958 } 9959 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9960 &ip6h->ip6_src, NULL, zoneid, ipst); 9961 } 9962 if (ill != NULL) 9963 ill_refrele(ill); 9964 return; 9965 } 9966 9967 /* 9968 * No full IRE for this destination. Send it to 9969 * ip_newroute_v6 to see if anything else matches. 9970 * Mark this packet as having originated on this 9971 * machine. 9972 * Update rptr if there was an ip6i_t header. 9973 */ 9974 mp->b_prev = NULL; 9975 mp->b_next = NULL; 9976 if (ip6i != NULL) 9977 mp->b_rptr -= sizeof (ip6i_t); 9978 9979 if (unspec_src) { 9980 if (ip6i == NULL) { 9981 /* 9982 * Add ip6i_t header to carry unspec_src 9983 * until the packet comes back in ip_wput_v6. 9984 */ 9985 mp = ip_add_info_v6(mp, NULL, v6dstp); 9986 if (mp == NULL) { 9987 if (do_outrequests) 9988 BUMP_MIB(mibptr, 9989 ipIfStatsHCOutRequests); 9990 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9991 if (mctl_present) 9992 freeb(first_mp); 9993 if (ill != NULL) 9994 ill_refrele(ill); 9995 if (need_decref) 9996 CONN_DEC_REF(connp); 9997 return; 9998 } 9999 ip6i = (ip6i_t *)mp->b_rptr; 10000 10001 if (mctl_present) { 10002 ASSERT(first_mp != mp); 10003 first_mp->b_cont = mp; 10004 } else { 10005 first_mp = mp; 10006 } 10007 10008 if ((mp->b_wptr - (uchar_t *)ip6i) == 10009 sizeof (ip6i_t)) { 10010 /* 10011 * ndp_resolver called from ip_newroute_v6 10012 * expects pulled up message. 10013 */ 10014 if (!pullupmsg(mp, -1)) { 10015 ip1dbg(("ip_wput_v6: pullupmsg" 10016 " failed\n")); 10017 if (do_outrequests) { 10018 BUMP_MIB(mibptr, 10019 ipIfStatsHCOutRequests); 10020 } 10021 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10022 freemsg(first_mp); 10023 if (ill != NULL) 10024 ill_refrele(ill); 10025 if (need_decref) 10026 CONN_DEC_REF(connp); 10027 return; 10028 } 10029 ip6i = (ip6i_t *)mp->b_rptr; 10030 } 10031 ip6h = (ip6_t *)&ip6i[1]; 10032 v6dstp = &ip6h->ip6_dst; 10033 } 10034 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10035 if (mctl_present) { 10036 ASSERT(io != NULL); 10037 io->ipsec_out_unspec_src = unspec_src; 10038 } 10039 } 10040 if (do_outrequests) 10041 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10042 if (need_decref) 10043 CONN_DEC_REF(connp); 10044 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 10045 if (ill != NULL) 10046 ill_refrele(ill); 10047 return; 10048 10049 10050 /* 10051 * Handle multicast packets with or without an conn. 10052 * Assumes that the transports set ip6_hops taking 10053 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10054 * into account. 10055 */ 10056 ipv6multicast: 10057 ip2dbg(("ip_wput_v6: multicast\n")); 10058 10059 /* 10060 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10061 * 2. If conn_nofailover_ill is set then use that ill. 10062 * 10063 * Hold the conn_lock till we refhold the ill of interest that is 10064 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10065 * while holding any locks, postpone the refrele until after the 10066 * conn_lock is dropped. 10067 */ 10068 if (connp != NULL) { 10069 mutex_enter(&connp->conn_lock); 10070 conn_lock_held = B_TRUE; 10071 } else { 10072 conn_lock_held = B_FALSE; 10073 } 10074 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10075 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10076 if (err == ILL_LOOKUP_FAILED) { 10077 ip1dbg(("ip_output_v6: multicast" 10078 " conn_outgoing_pill no ipif\n")); 10079 multicast_discard: 10080 ASSERT(saved_ill == NULL); 10081 if (conn_lock_held) 10082 mutex_exit(&connp->conn_lock); 10083 if (ill != NULL) 10084 ill_refrele(ill); 10085 freemsg(first_mp); 10086 if (do_outrequests) 10087 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10088 if (need_decref) 10089 CONN_DEC_REF(connp); 10090 return; 10091 } 10092 saved_ill = ill; 10093 ill = connp->conn_outgoing_pill; 10094 attach_if = B_TRUE; 10095 match_flags = MATCH_IRE_ILL; 10096 mibptr = ill->ill_ip_mib; 10097 10098 /* 10099 * Check if we need an ire that will not be 10100 * looked up by anybody else i.e. HIDDEN. 10101 */ 10102 if (ill_is_probeonly(ill)) 10103 match_flags |= MATCH_IRE_MARK_HIDDEN; 10104 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10105 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10106 if (err == ILL_LOOKUP_FAILED) { 10107 ip1dbg(("ip_output_v6: multicast" 10108 " conn_nofailover_ill no ipif\n")); 10109 goto multicast_discard; 10110 } 10111 saved_ill = ill; 10112 ill = connp->conn_nofailover_ill; 10113 attach_if = B_TRUE; 10114 match_flags = MATCH_IRE_ILL; 10115 10116 /* 10117 * Check if we need an ire that will not be 10118 * looked up by anybody else i.e. HIDDEN. 10119 */ 10120 if (ill_is_probeonly(ill)) 10121 match_flags |= MATCH_IRE_MARK_HIDDEN; 10122 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10123 /* 10124 * Redo 1. If we did not find an IRE_CACHE the first time, 10125 * we should have an ip6i_t with IP6I_ATTACH_IF if 10126 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10127 * used on this endpoint. 10128 */ 10129 ASSERT(ip6i->ip6i_ifindex != 0); 10130 attach_if = B_TRUE; 10131 ASSERT(ill != NULL); 10132 match_flags = MATCH_IRE_ILL; 10133 10134 /* 10135 * Check if we need an ire that will not be 10136 * looked up by anybody else i.e. HIDDEN. 10137 */ 10138 if (ill_is_probeonly(ill)) 10139 match_flags |= MATCH_IRE_MARK_HIDDEN; 10140 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10141 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10142 10143 ASSERT(ill != NULL); 10144 } else if (ill != NULL) { 10145 /* 10146 * 4. If q is an ill queue and (link local or multicast 10147 * destination) then use that ill. 10148 * We don't need the ipif initialization here. 10149 * This useless assert below is just to prevent lint from 10150 * reporting a null body if statement. 10151 */ 10152 ASSERT(ill != NULL); 10153 } else if (connp != NULL) { 10154 /* 10155 * 5. If IPV6_BOUND_IF has been set use that ill. 10156 * 10157 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10158 * Otherwise look for the best IRE match for the unspecified 10159 * group to determine the ill. 10160 * 10161 * conn_multicast_ill is used for only IPv6 packets. 10162 * conn_multicast_ipif is used for only IPv4 packets. 10163 * Thus a PF_INET6 socket send both IPv4 and IPv6 10164 * multicast packets using different IP*_MULTICAST_IF 10165 * interfaces. 10166 */ 10167 if (connp->conn_outgoing_ill != NULL) { 10168 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10169 if (err == ILL_LOOKUP_FAILED) { 10170 ip1dbg(("ip_output_v6: multicast" 10171 " conn_outgoing_ill no ipif\n")); 10172 goto multicast_discard; 10173 } 10174 ill = connp->conn_outgoing_ill; 10175 } else if (connp->conn_multicast_ill != NULL) { 10176 err = ill_check_and_refhold(connp->conn_multicast_ill); 10177 if (err == ILL_LOOKUP_FAILED) { 10178 ip1dbg(("ip_output_v6: multicast" 10179 " conn_multicast_ill no ipif\n")); 10180 goto multicast_discard; 10181 } 10182 ill = connp->conn_multicast_ill; 10183 } else { 10184 mutex_exit(&connp->conn_lock); 10185 conn_lock_held = B_FALSE; 10186 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 10187 if (ipif == NULL) { 10188 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10189 goto multicast_discard; 10190 } 10191 /* 10192 * We have a ref to this ipif, so we can safely 10193 * access ipif_ill. 10194 */ 10195 ill = ipif->ipif_ill; 10196 mutex_enter(&ill->ill_lock); 10197 if (!ILL_CAN_LOOKUP(ill)) { 10198 mutex_exit(&ill->ill_lock); 10199 ipif_refrele(ipif); 10200 ill = NULL; 10201 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10202 goto multicast_discard; 10203 } 10204 ill_refhold_locked(ill); 10205 mutex_exit(&ill->ill_lock); 10206 ipif_refrele(ipif); 10207 /* 10208 * Save binding until IPV6_MULTICAST_IF 10209 * changes it 10210 */ 10211 mutex_enter(&connp->conn_lock); 10212 connp->conn_multicast_ill = ill; 10213 connp->conn_orig_multicast_ifindex = 10214 ill->ill_phyint->phyint_ifindex; 10215 mutex_exit(&connp->conn_lock); 10216 } 10217 } 10218 if (conn_lock_held) 10219 mutex_exit(&connp->conn_lock); 10220 10221 if (saved_ill != NULL) 10222 ill_refrele(saved_ill); 10223 10224 ASSERT(ill != NULL); 10225 /* 10226 * For multicast loopback interfaces replace the multicast address 10227 * with a unicast address for the ire lookup. 10228 */ 10229 if (IS_LOOPBACK(ill)) 10230 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10231 10232 mibptr = ill->ill_ip_mib; 10233 if (do_outrequests) { 10234 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10235 do_outrequests = B_FALSE; 10236 } 10237 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10238 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10239 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10240 10241 /* 10242 * As we may lose the conn by the time we reach ip_wput_ire_v6 10243 * we copy conn_multicast_loop and conn_dontroute on to an 10244 * ipsec_out. In case if this datagram goes out secure, 10245 * we need the ill_index also. Copy that also into the 10246 * ipsec_out. 10247 */ 10248 if (mctl_present) { 10249 io = (ipsec_out_t *)first_mp->b_rptr; 10250 ASSERT(first_mp->b_datap->db_type == M_CTL); 10251 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10252 } else { 10253 ASSERT(mp == first_mp); 10254 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 10255 NULL) { 10256 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10257 freemsg(mp); 10258 if (ill != NULL) 10259 ill_refrele(ill); 10260 if (need_decref) 10261 CONN_DEC_REF(connp); 10262 return; 10263 } 10264 io = (ipsec_out_t *)first_mp->b_rptr; 10265 /* This is not a secure packet */ 10266 io->ipsec_out_secure = B_FALSE; 10267 io->ipsec_out_use_global_policy = B_TRUE; 10268 io->ipsec_out_zoneid = 10269 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10270 first_mp->b_cont = mp; 10271 mctl_present = B_TRUE; 10272 } 10273 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10274 io->ipsec_out_unspec_src = unspec_src; 10275 if (connp != NULL) 10276 io->ipsec_out_dontroute = connp->conn_dontroute; 10277 10278 send_from_ill: 10279 ASSERT(ill != NULL); 10280 ASSERT(mibptr == ill->ill_ip_mib); 10281 if (do_outrequests) { 10282 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10283 do_outrequests = B_FALSE; 10284 } 10285 10286 if (io != NULL) 10287 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10288 10289 /* 10290 * When a specific ill is specified (using IPV6_PKTINFO, 10291 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10292 * on routing entries (ftable and ctable) that have a matching 10293 * ire->ire_ipif->ipif_ill. Thus this can only be used 10294 * for destinations that are on-link for the specific ill 10295 * and that can appear on multiple links. Thus it is useful 10296 * for multicast destinations, link-local destinations, and 10297 * at some point perhaps for site-local destinations (if the 10298 * node sits at a site boundary). 10299 * We create the cache entries in the regular ctable since 10300 * it can not "confuse" things for other destinations. 10301 * table. 10302 * 10303 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10304 * It is used only when ire_cache_lookup is used above. 10305 */ 10306 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10307 zoneid, MBLK_GETLABEL(mp), match_flags, ipst); 10308 if (ire != NULL) { 10309 /* 10310 * Check if the ire has the RTF_MULTIRT flag, inherited 10311 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10312 */ 10313 if (ire->ire_flags & RTF_MULTIRT) { 10314 /* 10315 * Force hop limit of multirouted packets if required. 10316 * The hop limit of such packets is bounded by the 10317 * ip_multirt_ttl ndd variable. 10318 * NDP packets must have a hop limit of 255; don't 10319 * change the hop limit in that case. 10320 */ 10321 if ((ipst->ips_ip_multirt_ttl > 0) && 10322 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10323 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10324 if (ip_debug > 3) { 10325 ip2dbg(("ip_wput_v6: forcing multirt " 10326 "hop limit to %d (was %d) ", 10327 ipst->ips_ip_multirt_ttl, 10328 ip6h->ip6_hops)); 10329 pr_addr_dbg("v6dst %s\n", AF_INET6, 10330 &ire->ire_addr_v6); 10331 } 10332 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10333 } 10334 10335 /* 10336 * We look at this point if there are pending 10337 * unresolved routes. ire_multirt_need_resolve_v6() 10338 * checks in O(n) that all IRE_OFFSUBNET ire 10339 * entries for the packet's destination and 10340 * flagged RTF_MULTIRT are currently resolved. 10341 * If some remain unresolved, we make a copy 10342 * of the current message. It will be used 10343 * to initiate additional route resolutions. 10344 */ 10345 multirt_need_resolve = 10346 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10347 MBLK_GETLABEL(first_mp), ipst); 10348 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10349 "multirt_need_resolve %d, first_mp %p\n", 10350 (void *)ire, multirt_need_resolve, 10351 (void *)first_mp)); 10352 if (multirt_need_resolve) { 10353 copy_mp = copymsg(first_mp); 10354 if (copy_mp != NULL) { 10355 MULTIRT_DEBUG_TAG(copy_mp); 10356 } 10357 } 10358 } 10359 10360 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10361 ill->ill_name, (void *)ire, 10362 ill->ill_phyint->phyint_ifindex)); 10363 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10364 connp, caller, 10365 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10366 ip6i_flags, zoneid); 10367 ire_refrele(ire); 10368 if (need_decref) { 10369 CONN_DEC_REF(connp); 10370 connp = NULL; 10371 } 10372 10373 /* 10374 * Try to resolve another multiroute if 10375 * ire_multirt_need_resolve_v6() deemed it necessary. 10376 * copy_mp will be consumed (sent or freed) by 10377 * ip_newroute_[ipif_]v6(). 10378 */ 10379 if (copy_mp != NULL) { 10380 if (mctl_present) { 10381 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10382 } else { 10383 ip6h = (ip6_t *)copy_mp->b_rptr; 10384 } 10385 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10386 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10387 zoneid, ipst); 10388 if (ipif == NULL) { 10389 ip1dbg(("ip_wput_v6: No ipif for " 10390 "multicast\n")); 10391 MULTIRT_DEBUG_UNTAG(copy_mp); 10392 freemsg(copy_mp); 10393 return; 10394 } 10395 ip_newroute_ipif_v6(q, copy_mp, ipif, 10396 ip6h->ip6_dst, unspec_src, zoneid); 10397 ipif_refrele(ipif); 10398 } else { 10399 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10400 &ip6h->ip6_src, ill, zoneid, ipst); 10401 } 10402 } 10403 ill_refrele(ill); 10404 return; 10405 } 10406 if (need_decref) { 10407 CONN_DEC_REF(connp); 10408 connp = NULL; 10409 } 10410 10411 /* Update rptr if there was an ip6i_t header. */ 10412 if (ip6i != NULL) 10413 mp->b_rptr -= sizeof (ip6i_t); 10414 if (unspec_src || attach_if) { 10415 if (ip6i == NULL) { 10416 /* 10417 * Add ip6i_t header to carry unspec_src 10418 * or attach_if until the packet comes back in 10419 * ip_wput_v6. 10420 */ 10421 if (mctl_present) { 10422 first_mp->b_cont = 10423 ip_add_info_v6(mp, NULL, v6dstp); 10424 mp = first_mp->b_cont; 10425 if (mp == NULL) 10426 freeb(first_mp); 10427 } else { 10428 first_mp = mp = ip_add_info_v6(mp, NULL, 10429 v6dstp); 10430 } 10431 if (mp == NULL) { 10432 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10433 ill_refrele(ill); 10434 return; 10435 } 10436 ip6i = (ip6i_t *)mp->b_rptr; 10437 if ((mp->b_wptr - (uchar_t *)ip6i) == 10438 sizeof (ip6i_t)) { 10439 /* 10440 * ndp_resolver called from ip_newroute_v6 10441 * expects a pulled up message. 10442 */ 10443 if (!pullupmsg(mp, -1)) { 10444 ip1dbg(("ip_wput_v6: pullupmsg" 10445 " failed\n")); 10446 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10447 freemsg(first_mp); 10448 return; 10449 } 10450 ip6i = (ip6i_t *)mp->b_rptr; 10451 } 10452 ip6h = (ip6_t *)&ip6i[1]; 10453 v6dstp = &ip6h->ip6_dst; 10454 } 10455 if (unspec_src) 10456 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10457 if (attach_if) { 10458 /* 10459 * Bind to nofailover/BOUND_PIF overrides ifindex. 10460 */ 10461 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10462 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10463 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10464 if (drop_if_delayed) { 10465 /* This is a multipathing probe packet */ 10466 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10467 } 10468 } 10469 if (mctl_present) { 10470 ASSERT(io != NULL); 10471 io->ipsec_out_unspec_src = unspec_src; 10472 } 10473 } 10474 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10475 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10476 unspec_src, zoneid); 10477 } else { 10478 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10479 zoneid, ipst); 10480 } 10481 ill_refrele(ill); 10482 return; 10483 10484 notv6: 10485 /* FIXME?: assume the caller calls the right version of ip_output? */ 10486 if (q->q_next == NULL) { 10487 connp = Q_TO_CONN(q); 10488 10489 /* 10490 * We can change conn_send for all types of conn, even 10491 * though only TCP uses it right now. 10492 * FIXME: sctp could use conn_send but doesn't currently. 10493 */ 10494 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10495 } 10496 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10497 (void) ip_output(arg, first_mp, arg2, caller); 10498 if (ill != NULL) 10499 ill_refrele(ill); 10500 } 10501 10502 /* 10503 * If this is a conn_t queue, then we pass in the conn. This includes the 10504 * zoneid. 10505 * Otherwise, this is a message for an ill_t queue, 10506 * in which case we use the global zoneid since those are all part of 10507 * the global zone. 10508 */ 10509 void 10510 ip_wput_v6(queue_t *q, mblk_t *mp) 10511 { 10512 if (CONN_Q(q)) 10513 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10514 else 10515 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10516 } 10517 10518 static void 10519 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10520 { 10521 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10522 io->ipsec_out_attach_if = B_TRUE; 10523 io->ipsec_out_ill_index = attach_index; 10524 } 10525 10526 /* 10527 * NULL send-to queue - packet is to be delivered locally. 10528 */ 10529 void 10530 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10531 ire_t *ire, int fanout_flags, zoneid_t zoneid) 10532 { 10533 uint32_t ports; 10534 mblk_t *mp = first_mp, *first_mp1; 10535 boolean_t mctl_present; 10536 uint8_t nexthdr; 10537 uint16_t hdr_length; 10538 ipsec_out_t *io; 10539 mib2_ipIfStatsEntry_t *mibptr; 10540 ilm_t *ilm; 10541 uint_t nexthdr_offset; 10542 ip_stack_t *ipst = ill->ill_ipst; 10543 10544 if (DB_TYPE(mp) == M_CTL) { 10545 io = (ipsec_out_t *)mp->b_rptr; 10546 if (!io->ipsec_out_secure) { 10547 mp = mp->b_cont; 10548 freeb(first_mp); 10549 first_mp = mp; 10550 mctl_present = B_FALSE; 10551 } else { 10552 mctl_present = B_TRUE; 10553 mp = first_mp->b_cont; 10554 ipsec_out_to_in(first_mp); 10555 } 10556 } else { 10557 mctl_present = B_FALSE; 10558 } 10559 10560 /* 10561 * Remove reachability confirmation bit from version field 10562 * before passing the packet on to any firewall hooks or 10563 * looping back the packet. 10564 */ 10565 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10566 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10567 10568 DTRACE_PROBE4(ip6__loopback__in__start, 10569 ill_t *, ill, ill_t *, NULL, 10570 ip6_t *, ip6h, mblk_t *, first_mp); 10571 10572 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10573 ipst->ips_ipv6firewall_loopback_in, 10574 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10575 10576 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10577 10578 if (first_mp == NULL) 10579 return; 10580 10581 if (ipst->ips_ipobs_enabled) { 10582 zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; 10583 zoneid_t stackzoneid = netstackid_to_zoneid( 10584 ipst->ips_netstack->netstack_stackid); 10585 10586 szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; 10587 /* 10588 * ::1 is special, as we cannot lookup its zoneid by 10589 * address. For this case, restrict the lookup to the 10590 * source zone. 10591 */ 10592 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) 10593 lookup_zoneid = zoneid; 10594 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 10595 lookup_zoneid); 10596 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, 10597 IPV6_VERSION, 0, ipst); 10598 } 10599 10600 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10601 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10602 int, 1); 10603 10604 nexthdr = ip6h->ip6_nxt; 10605 mibptr = ill->ill_ip_mib; 10606 10607 /* Fastpath */ 10608 switch (nexthdr) { 10609 case IPPROTO_TCP: 10610 case IPPROTO_UDP: 10611 case IPPROTO_ICMPV6: 10612 case IPPROTO_SCTP: 10613 hdr_length = IPV6_HDR_LEN; 10614 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10615 (uchar_t *)ip6h); 10616 break; 10617 default: { 10618 uint8_t *nexthdrp; 10619 10620 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10621 &hdr_length, &nexthdrp)) { 10622 /* Malformed packet */ 10623 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10624 freemsg(first_mp); 10625 return; 10626 } 10627 nexthdr = *nexthdrp; 10628 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10629 break; 10630 } 10631 } 10632 10633 UPDATE_OB_PKT_COUNT(ire); 10634 ire->ire_last_used_time = lbolt; 10635 10636 switch (nexthdr) { 10637 case IPPROTO_TCP: 10638 if (DB_TYPE(mp) == M_DATA) { 10639 /* 10640 * M_DATA mblk, so init mblk (chain) for 10641 * no struio(). 10642 */ 10643 mblk_t *mp1 = mp; 10644 10645 do { 10646 mp1->b_datap->db_struioflag = 0; 10647 } while ((mp1 = mp1->b_cont) != NULL); 10648 } 10649 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10650 TCP_PORTS_OFFSET); 10651 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10652 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10653 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10654 hdr_length, mctl_present, ire->ire_zoneid); 10655 return; 10656 10657 case IPPROTO_UDP: 10658 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10659 UDP_PORTS_OFFSET); 10660 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10661 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10662 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10663 return; 10664 10665 case IPPROTO_SCTP: 10666 { 10667 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10668 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10669 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10670 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10671 return; 10672 } 10673 case IPPROTO_ICMPV6: { 10674 icmp6_t *icmp6; 10675 10676 /* check for full IPv6+ICMPv6 header */ 10677 if ((mp->b_wptr - mp->b_rptr) < 10678 (hdr_length + ICMP6_MINLEN)) { 10679 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10680 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10681 " failed\n")); 10682 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10683 freemsg(first_mp); 10684 return; 10685 } 10686 ip6h = (ip6_t *)mp->b_rptr; 10687 } 10688 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10689 10690 /* Update output mib stats */ 10691 icmp_update_out_mib_v6(ill, icmp6); 10692 10693 /* Check variable for testing applications */ 10694 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10695 freemsg(first_mp); 10696 return; 10697 } 10698 /* 10699 * Assume that there is always at least one conn for 10700 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10701 * where there is no conn. 10702 */ 10703 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10704 !IS_LOOPBACK(ill)) { 10705 /* 10706 * In the multicast case, applications may have 10707 * joined the group from different zones, so we 10708 * need to deliver the packet to each of them. 10709 * Loop through the multicast memberships 10710 * structures (ilm) on the receive ill and send 10711 * a copy of the packet up each matching one. 10712 * However, we don't do this for multicasts sent 10713 * on the loopback interface (PHYI_LOOPBACK flag 10714 * set) as they must stay in the sender's zone. 10715 */ 10716 ILM_WALKER_HOLD(ill); 10717 for (ilm = ill->ill_ilm; ilm != NULL; 10718 ilm = ilm->ilm_next) { 10719 if (ilm->ilm_flags & ILM_DELETED) 10720 continue; 10721 if (!IN6_ARE_ADDR_EQUAL( 10722 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10723 continue; 10724 if ((fanout_flags & 10725 IP_FF_NO_MCAST_LOOP) && 10726 ilm->ilm_zoneid == ire->ire_zoneid) 10727 continue; 10728 if (!ipif_lookup_zoneid(ill, 10729 ilm->ilm_zoneid, IPIF_UP, NULL)) 10730 continue; 10731 10732 first_mp1 = ip_copymsg(first_mp); 10733 if (first_mp1 == NULL) 10734 continue; 10735 icmp_inbound_v6(q, first_mp1, ill, 10736 hdr_length, mctl_present, 10737 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10738 NULL); 10739 } 10740 ILM_WALKER_RELE(ill); 10741 } else { 10742 first_mp1 = ip_copymsg(first_mp); 10743 if (first_mp1 != NULL) 10744 icmp_inbound_v6(q, first_mp1, ill, 10745 hdr_length, mctl_present, 10746 IP6_NO_IPPOLICY, ire->ire_zoneid, 10747 NULL); 10748 } 10749 } 10750 /* FALLTHRU */ 10751 default: { 10752 /* 10753 * Handle protocols with which IPv6 is less intimate. 10754 */ 10755 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10756 10757 /* 10758 * Enable sending ICMP for "Unknown" nexthdr 10759 * case. i.e. where we did not FALLTHRU from 10760 * IPPROTO_ICMPV6 processing case above. 10761 */ 10762 if (nexthdr != IPPROTO_ICMPV6) 10763 fanout_flags |= IP_FF_SEND_ICMP; 10764 /* 10765 * Note: There can be more than one stream bound 10766 * to a particular protocol. When this is the case, 10767 * each one gets a copy of any incoming packets. 10768 */ 10769 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10770 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10771 mctl_present, ire->ire_zoneid); 10772 return; 10773 } 10774 } 10775 } 10776 10777 /* 10778 * Send packet using IRE. 10779 * Checksumming is controlled by cksum_request: 10780 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10781 * 1 => Skip TCP/UDP/SCTP checksum 10782 * Otherwise => checksum_request contains insert offset for checksum 10783 * 10784 * Assumes that the following set of headers appear in the first 10785 * mblk: 10786 * ip6_t 10787 * Any extension headers 10788 * TCP/UDP/SCTP header (if present) 10789 * The routine can handle an ICMPv6 header that is not in the first mblk. 10790 * 10791 * NOTE : This function does not ire_refrele the ire passed in as the 10792 * argument unlike ip_wput_ire where the REFRELE is done. 10793 * Refer to ip_wput_ire for more on this. 10794 */ 10795 static void 10796 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10797 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10798 zoneid_t zoneid) 10799 { 10800 ip6_t *ip6h; 10801 uint8_t nexthdr; 10802 uint16_t hdr_length; 10803 uint_t reachable = 0x0; 10804 ill_t *ill; 10805 mib2_ipIfStatsEntry_t *mibptr; 10806 mblk_t *first_mp; 10807 boolean_t mctl_present; 10808 ipsec_out_t *io; 10809 boolean_t conn_dontroute; /* conn value for multicast */ 10810 boolean_t conn_multicast_loop; /* conn value for multicast */ 10811 boolean_t multicast_forward; /* Should we forward ? */ 10812 int max_frag; 10813 ip_stack_t *ipst = ire->ire_ipst; 10814 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10815 10816 ill = ire_to_ill(ire); 10817 first_mp = mp; 10818 multicast_forward = B_FALSE; 10819 10820 if (mp->b_datap->db_type != M_CTL) { 10821 ip6h = (ip6_t *)first_mp->b_rptr; 10822 } else { 10823 io = (ipsec_out_t *)first_mp->b_rptr; 10824 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10825 /* 10826 * Grab the zone id now because the M_CTL can be discarded by 10827 * ip_wput_ire_parse_ipsec_out() below. 10828 */ 10829 ASSERT(zoneid == io->ipsec_out_zoneid); 10830 ASSERT(zoneid != ALL_ZONES); 10831 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10832 /* 10833 * For the multicast case, ipsec_out carries conn_dontroute and 10834 * conn_multicast_loop as conn may not be available here. We 10835 * need this for multicast loopback and forwarding which is done 10836 * later in the code. 10837 */ 10838 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10839 conn_dontroute = io->ipsec_out_dontroute; 10840 conn_multicast_loop = io->ipsec_out_multicast_loop; 10841 /* 10842 * If conn_dontroute is not set or conn_multicast_loop 10843 * is set, we need to do forwarding/loopback. For 10844 * datagrams from ip_wput_multicast, conn_dontroute is 10845 * set to B_TRUE and conn_multicast_loop is set to 10846 * B_FALSE so that we neither do forwarding nor 10847 * loopback. 10848 */ 10849 if (!conn_dontroute || conn_multicast_loop) 10850 multicast_forward = B_TRUE; 10851 } 10852 } 10853 10854 /* 10855 * If the sender didn't supply the hop limit and there is a default 10856 * unicast hop limit associated with the output interface, we use 10857 * that if the packet is unicast. Interface specific unicast hop 10858 * limits as set via the SIOCSLIFLNKINFO ioctl. 10859 */ 10860 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10861 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10862 ip6h->ip6_hops = ill->ill_max_hops; 10863 } 10864 10865 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10866 ire->ire_zoneid != ALL_ZONES) { 10867 /* 10868 * When a zone sends a packet to another zone, we try to deliver 10869 * the packet under the same conditions as if the destination 10870 * was a real node on the network. To do so, we look for a 10871 * matching route in the forwarding table. 10872 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10873 * ip_newroute_v6() does. 10874 * Note that IRE_LOCAL are special, since they are used 10875 * when the zoneid doesn't match in some cases. This means that 10876 * we need to handle ipha_src differently since ire_src_addr 10877 * belongs to the receiving zone instead of the sending zone. 10878 * When ip_restrict_interzone_loopback is set, then 10879 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10880 * for loopback between zones when the logical "Ethernet" would 10881 * have looped them back. 10882 */ 10883 ire_t *src_ire; 10884 10885 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10886 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10887 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10888 if (src_ire != NULL && 10889 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10890 (!ipst->ips_ip_restrict_interzone_loopback || 10891 ire_local_same_ill_group(ire, src_ire))) { 10892 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10893 !unspec_src) { 10894 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10895 } 10896 ire_refrele(src_ire); 10897 } else { 10898 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10899 if (src_ire != NULL) { 10900 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10901 ire_refrele(src_ire); 10902 freemsg(first_mp); 10903 return; 10904 } 10905 ire_refrele(src_ire); 10906 } 10907 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10908 /* Failed */ 10909 freemsg(first_mp); 10910 return; 10911 } 10912 icmp_unreachable_v6(q, first_mp, 10913 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10914 zoneid, ipst); 10915 return; 10916 } 10917 } 10918 10919 if (mp->b_datap->db_type == M_CTL || 10920 ipss->ipsec_outbound_v6_policy_present) { 10921 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10922 connp, unspec_src, zoneid); 10923 if (mp == NULL) { 10924 return; 10925 } 10926 } 10927 10928 first_mp = mp; 10929 if (mp->b_datap->db_type == M_CTL) { 10930 io = (ipsec_out_t *)mp->b_rptr; 10931 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10932 mp = mp->b_cont; 10933 mctl_present = B_TRUE; 10934 } else { 10935 mctl_present = B_FALSE; 10936 } 10937 10938 ip6h = (ip6_t *)mp->b_rptr; 10939 nexthdr = ip6h->ip6_nxt; 10940 mibptr = ill->ill_ip_mib; 10941 10942 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10943 ipif_t *ipif; 10944 10945 /* 10946 * Select the source address using ipif_select_source_v6. 10947 */ 10948 if (attach_index != 0) { 10949 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10950 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10951 } else { 10952 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10953 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10954 } 10955 if (ipif == NULL) { 10956 if (ip_debug > 2) { 10957 /* ip1dbg */ 10958 pr_addr_dbg("ip_wput_ire_v6: no src for " 10959 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10960 printf("ip_wput_ire_v6: interface name %s\n", 10961 ill->ill_name); 10962 } 10963 freemsg(first_mp); 10964 return; 10965 } 10966 ip6h->ip6_src = ipif->ipif_v6src_addr; 10967 ipif_refrele(ipif); 10968 } 10969 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10970 if ((connp != NULL && connp->conn_multicast_loop) || 10971 !IS_LOOPBACK(ill)) { 10972 ilm_t *ilm; 10973 10974 ILM_WALKER_HOLD(ill); 10975 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10976 ILM_WALKER_RELE(ill); 10977 if (ilm != NULL) { 10978 mblk_t *nmp; 10979 int fanout_flags = 0; 10980 10981 if (connp != NULL && 10982 !connp->conn_multicast_loop) { 10983 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10984 } 10985 ip1dbg(("ip_wput_ire_v6: " 10986 "Loopback multicast\n")); 10987 nmp = ip_copymsg(first_mp); 10988 if (nmp != NULL) { 10989 ip6_t *nip6h; 10990 mblk_t *mp_ip6h; 10991 10992 if (mctl_present) { 10993 nip6h = (ip6_t *) 10994 nmp->b_cont->b_rptr; 10995 mp_ip6h = nmp->b_cont; 10996 } else { 10997 nip6h = (ip6_t *)nmp->b_rptr; 10998 mp_ip6h = nmp; 10999 } 11000 11001 DTRACE_PROBE4( 11002 ip6__loopback__out__start, 11003 ill_t *, NULL, 11004 ill_t *, ill, 11005 ip6_t *, nip6h, 11006 mblk_t *, nmp); 11007 11008 FW_HOOKS6( 11009 ipst->ips_ip6_loopback_out_event, 11010 ipst->ips_ipv6firewall_loopback_out, 11011 NULL, ill, nip6h, nmp, mp_ip6h, 11012 0, ipst); 11013 11014 DTRACE_PROBE1( 11015 ip6__loopback__out__end, 11016 mblk_t *, nmp); 11017 11018 /* 11019 * DTrace this as ip:::send. A blocked 11020 * packet will fire the send probe, but 11021 * not the receive probe. 11022 */ 11023 DTRACE_IP7(send, mblk_t *, nmp, 11024 conn_t *, NULL, void_ip_t *, nip6h, 11025 __dtrace_ipsr_ill_t *, ill, 11026 ipha_t *, NULL, ip6_t *, nip6h, 11027 int, 1); 11028 11029 if (nmp != NULL) { 11030 /* 11031 * Deliver locally and to 11032 * every local zone, except 11033 * the sending zone when 11034 * IPV6_MULTICAST_LOOP is 11035 * disabled. 11036 */ 11037 ip_wput_local_v6(RD(q), ill, 11038 nip6h, nmp, ire, 11039 fanout_flags, zoneid); 11040 } 11041 } else { 11042 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11043 ip1dbg(("ip_wput_ire_v6: " 11044 "copymsg failed\n")); 11045 } 11046 } 11047 } 11048 if (ip6h->ip6_hops == 0 || 11049 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 11050 IS_LOOPBACK(ill)) { 11051 /* 11052 * Local multicast or just loopback on loopback 11053 * interface. 11054 */ 11055 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 11056 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 11057 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11058 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 11059 freemsg(first_mp); 11060 return; 11061 } 11062 } 11063 11064 if (ire->ire_stq != NULL) { 11065 uint32_t sum; 11066 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 11067 ill_phyint->phyint_ifindex; 11068 queue_t *dev_q = ire->ire_stq->q_next; 11069 11070 /* 11071 * non-NULL send-to queue - packet is to be sent 11072 * out an interface. 11073 */ 11074 11075 /* Driver is flow-controlling? */ 11076 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 11077 DEV_Q_FLOW_BLOCKED(dev_q)) { 11078 /* 11079 * Queue packet if we have an conn to give back 11080 * pressure. We can't queue packets intended for 11081 * hardware acceleration since we've tossed that 11082 * state already. If the packet is being fed back 11083 * from ire_send_v6, we don't know the position in 11084 * the queue to enqueue the packet and we discard 11085 * the packet. 11086 */ 11087 if (ipst->ips_ip_output_queue && connp != NULL && 11088 !mctl_present && caller != IRE_SEND) { 11089 if (caller == IP_WSRV) { 11090 connp->conn_did_putbq = 1; 11091 (void) putbq(connp->conn_wq, mp); 11092 conn_drain_insert(connp); 11093 /* 11094 * caller == IP_WSRV implies we are 11095 * the service thread, and the 11096 * queue is already noenabled. 11097 * The check for canput and 11098 * the putbq is not atomic. 11099 * So we need to check again. 11100 */ 11101 if (canput(dev_q)) 11102 connp->conn_did_putbq = 0; 11103 } else { 11104 (void) putq(connp->conn_wq, mp); 11105 } 11106 return; 11107 } 11108 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11109 freemsg(first_mp); 11110 return; 11111 } 11112 11113 /* 11114 * Look for reachability confirmations from the transport. 11115 */ 11116 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11117 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11118 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11119 if (mctl_present) 11120 io->ipsec_out_reachable = B_TRUE; 11121 } 11122 /* Fastpath */ 11123 switch (nexthdr) { 11124 case IPPROTO_TCP: 11125 case IPPROTO_UDP: 11126 case IPPROTO_ICMPV6: 11127 case IPPROTO_SCTP: 11128 hdr_length = IPV6_HDR_LEN; 11129 break; 11130 default: { 11131 uint8_t *nexthdrp; 11132 11133 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11134 &hdr_length, &nexthdrp)) { 11135 /* Malformed packet */ 11136 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11137 freemsg(first_mp); 11138 return; 11139 } 11140 nexthdr = *nexthdrp; 11141 break; 11142 } 11143 } 11144 11145 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11146 uint16_t *up; 11147 uint16_t *insp; 11148 11149 /* 11150 * The packet header is processed once for all, even 11151 * in the multirouting case. We disable hardware 11152 * checksum if the packet is multirouted, as it will be 11153 * replicated via several interfaces, and not all of 11154 * them may have this capability. 11155 */ 11156 if (cksum_request == 1 && 11157 !(ire->ire_flags & RTF_MULTIRT)) { 11158 /* Skip the transport checksum */ 11159 goto cksum_done; 11160 } 11161 /* 11162 * Do user-configured raw checksum. 11163 * Compute checksum and insert at offset "cksum_request" 11164 */ 11165 11166 /* check for enough headers for checksum */ 11167 cksum_request += hdr_length; /* offset from rptr */ 11168 if ((mp->b_wptr - mp->b_rptr) < 11169 (cksum_request + sizeof (int16_t))) { 11170 if (!pullupmsg(mp, 11171 cksum_request + sizeof (int16_t))) { 11172 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11173 " failed\n")); 11174 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11175 freemsg(first_mp); 11176 return; 11177 } 11178 ip6h = (ip6_t *)mp->b_rptr; 11179 } 11180 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11181 ASSERT(((uintptr_t)insp & 0x1) == 0); 11182 up = (uint16_t *)&ip6h->ip6_src; 11183 /* 11184 * icmp has placed length and routing 11185 * header adjustment in *insp. 11186 */ 11187 sum = htons(nexthdr) + 11188 up[0] + up[1] + up[2] + up[3] + 11189 up[4] + up[5] + up[6] + up[7] + 11190 up[8] + up[9] + up[10] + up[11] + 11191 up[12] + up[13] + up[14] + up[15]; 11192 sum = (sum & 0xffff) + (sum >> 16); 11193 *insp = IP_CSUM(mp, hdr_length, sum); 11194 } else if (nexthdr == IPPROTO_TCP) { 11195 uint16_t *up; 11196 11197 /* 11198 * Check for full IPv6 header + enough TCP header 11199 * to get at the checksum field. 11200 */ 11201 if ((mp->b_wptr - mp->b_rptr) < 11202 (hdr_length + TCP_CHECKSUM_OFFSET + 11203 TCP_CHECKSUM_SIZE)) { 11204 if (!pullupmsg(mp, hdr_length + 11205 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11206 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11207 " failed\n")); 11208 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11209 freemsg(first_mp); 11210 return; 11211 } 11212 ip6h = (ip6_t *)mp->b_rptr; 11213 } 11214 11215 up = (uint16_t *)&ip6h->ip6_src; 11216 /* 11217 * Note: The TCP module has stored the length value 11218 * into the tcp checksum field, so we don't 11219 * need to explicitly sum it in here. 11220 */ 11221 sum = up[0] + up[1] + up[2] + up[3] + 11222 up[4] + up[5] + up[6] + up[7] + 11223 up[8] + up[9] + up[10] + up[11] + 11224 up[12] + up[13] + up[14] + up[15]; 11225 11226 /* Fold the initial sum */ 11227 sum = (sum & 0xffff) + (sum >> 16); 11228 11229 up = (uint16_t *)(((uchar_t *)ip6h) + 11230 hdr_length + TCP_CHECKSUM_OFFSET); 11231 11232 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11233 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11234 ire->ire_max_frag, mctl_present, sum); 11235 11236 /* Software checksum? */ 11237 if (DB_CKSUMFLAGS(mp) == 0) { 11238 IP6_STAT(ipst, ip6_out_sw_cksum); 11239 IP6_STAT_UPDATE(ipst, 11240 ip6_tcp_out_sw_cksum_bytes, 11241 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11242 hdr_length); 11243 } 11244 } else if (nexthdr == IPPROTO_UDP) { 11245 uint16_t *up; 11246 11247 /* 11248 * check for full IPv6 header + enough UDP header 11249 * to get at the UDP checksum field 11250 */ 11251 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11252 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11253 if (!pullupmsg(mp, hdr_length + 11254 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11255 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11256 " failed\n")); 11257 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11258 freemsg(first_mp); 11259 return; 11260 } 11261 ip6h = (ip6_t *)mp->b_rptr; 11262 } 11263 up = (uint16_t *)&ip6h->ip6_src; 11264 /* 11265 * Note: The UDP module has stored the length value 11266 * into the udp checksum field, so we don't 11267 * need to explicitly sum it in here. 11268 */ 11269 sum = up[0] + up[1] + up[2] + up[3] + 11270 up[4] + up[5] + up[6] + up[7] + 11271 up[8] + up[9] + up[10] + up[11] + 11272 up[12] + up[13] + up[14] + up[15]; 11273 11274 /* Fold the initial sum */ 11275 sum = (sum & 0xffff) + (sum >> 16); 11276 11277 up = (uint16_t *)(((uchar_t *)ip6h) + 11278 hdr_length + UDP_CHECKSUM_OFFSET); 11279 11280 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11281 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11282 ire->ire_max_frag, mctl_present, sum); 11283 11284 /* Software checksum? */ 11285 if (DB_CKSUMFLAGS(mp) == 0) { 11286 IP6_STAT(ipst, ip6_out_sw_cksum); 11287 IP6_STAT_UPDATE(ipst, 11288 ip6_udp_out_sw_cksum_bytes, 11289 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11290 hdr_length); 11291 } 11292 } else if (nexthdr == IPPROTO_ICMPV6) { 11293 uint16_t *up; 11294 icmp6_t *icmp6; 11295 11296 /* check for full IPv6+ICMPv6 header */ 11297 if ((mp->b_wptr - mp->b_rptr) < 11298 (hdr_length + ICMP6_MINLEN)) { 11299 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11300 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11301 " failed\n")); 11302 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11303 freemsg(first_mp); 11304 return; 11305 } 11306 ip6h = (ip6_t *)mp->b_rptr; 11307 } 11308 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11309 up = (uint16_t *)&ip6h->ip6_src; 11310 /* 11311 * icmp has placed length and routing 11312 * header adjustment in icmp6_cksum. 11313 */ 11314 sum = htons(IPPROTO_ICMPV6) + 11315 up[0] + up[1] + up[2] + up[3] + 11316 up[4] + up[5] + up[6] + up[7] + 11317 up[8] + up[9] + up[10] + up[11] + 11318 up[12] + up[13] + up[14] + up[15]; 11319 sum = (sum & 0xffff) + (sum >> 16); 11320 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11321 11322 /* Update output mib stats */ 11323 icmp_update_out_mib_v6(ill, icmp6); 11324 } else if (nexthdr == IPPROTO_SCTP) { 11325 sctp_hdr_t *sctph; 11326 11327 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11328 if (!pullupmsg(mp, hdr_length + 11329 sizeof (*sctph))) { 11330 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11331 " failed\n")); 11332 BUMP_MIB(ill->ill_ip_mib, 11333 ipIfStatsOutDiscards); 11334 freemsg(mp); 11335 return; 11336 } 11337 ip6h = (ip6_t *)mp->b_rptr; 11338 } 11339 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11340 sctph->sh_chksum = 0; 11341 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11342 } 11343 11344 cksum_done: 11345 /* 11346 * We force the insertion of a fragment header using the 11347 * IPH_FRAG_HDR flag in two cases: 11348 * - after reception of an ICMPv6 "packet too big" message 11349 * with a MTU < 1280 (cf. RFC 2460 section 5) 11350 * - for multirouted IPv6 packets, so that the receiver can 11351 * discard duplicates according to their fragment identifier 11352 * 11353 * Two flags modifed from the API can modify this behavior. 11354 * The first is IPV6_USE_MIN_MTU. With this API the user 11355 * can specify how to manage PMTUD for unicast and multicast. 11356 * 11357 * IPV6_DONTFRAG disallows fragmentation. 11358 */ 11359 max_frag = ire->ire_max_frag; 11360 switch (IP6I_USE_MIN_MTU_API(flags)) { 11361 case IPV6_USE_MIN_MTU_DEFAULT: 11362 case IPV6_USE_MIN_MTU_UNICAST: 11363 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11364 max_frag = IPV6_MIN_MTU; 11365 } 11366 break; 11367 11368 case IPV6_USE_MIN_MTU_NEVER: 11369 max_frag = IPV6_MIN_MTU; 11370 break; 11371 } 11372 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11373 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11374 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11375 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11376 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11377 return; 11378 } 11379 11380 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11381 (mp->b_cont ? msgdsize(mp) : 11382 mp->b_wptr - (uchar_t *)ip6h)) { 11383 ip0dbg(("Packet length mismatch: %d, %ld\n", 11384 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11385 msgdsize(mp))); 11386 freemsg(first_mp); 11387 return; 11388 } 11389 /* Do IPSEC processing first */ 11390 if (mctl_present) { 11391 if (attach_index != 0) 11392 ipsec_out_attach_if(io, attach_index); 11393 ipsec_out_process(q, first_mp, ire, ill_index); 11394 return; 11395 } 11396 ASSERT(mp->b_prev == NULL); 11397 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11398 ntohs(ip6h->ip6_plen) + 11399 IPV6_HDR_LEN, max_frag)); 11400 ASSERT(mp == first_mp); 11401 /* Initiate IPPF processing */ 11402 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11403 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11404 if (mp == NULL) { 11405 return; 11406 } 11407 } 11408 ip_wput_frag_v6(mp, ire, reachable, connp, 11409 caller, max_frag); 11410 return; 11411 } 11412 /* Do IPSEC processing first */ 11413 if (mctl_present) { 11414 int extra_len = ipsec_out_extra_length(first_mp); 11415 11416 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11417 max_frag) { 11418 /* 11419 * IPsec headers will push the packet over the 11420 * MTU limit. Issue an ICMPv6 Packet Too Big 11421 * message for this packet if the upper-layer 11422 * that issued this packet will be able to 11423 * react to the icmp_pkt2big_v6() that we'll 11424 * generate. 11425 */ 11426 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11427 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11428 return; 11429 } 11430 if (attach_index != 0) 11431 ipsec_out_attach_if(io, attach_index); 11432 ipsec_out_process(q, first_mp, ire, ill_index); 11433 return; 11434 } 11435 /* 11436 * XXX multicast: add ip_mforward_v6() here. 11437 * Check conn_dontroute 11438 */ 11439 #ifdef lint 11440 /* 11441 * XXX The only purpose of this statement is to avoid lint 11442 * errors. See the above "XXX multicast". When that gets 11443 * fixed, remove this whole #ifdef lint section. 11444 */ 11445 ip3dbg(("multicast forward is %s.\n", 11446 (multicast_forward ? "TRUE" : "FALSE"))); 11447 #endif 11448 11449 UPDATE_OB_PKT_COUNT(ire); 11450 ire->ire_last_used_time = lbolt; 11451 ASSERT(mp == first_mp); 11452 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11453 } else { 11454 /* 11455 * DTrace this as ip:::send. A blocked packet will fire the 11456 * send probe, but not the receive probe. 11457 */ 11458 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11459 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11460 NULL, ip6_t *, ip6h, int, 1); 11461 DTRACE_PROBE4(ip6__loopback__out__start, 11462 ill_t *, NULL, ill_t *, ill, 11463 ip6_t *, ip6h, mblk_t *, first_mp); 11464 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11465 ipst->ips_ipv6firewall_loopback_out, 11466 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11467 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11468 if (first_mp != NULL) { 11469 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0, 11470 zoneid); 11471 } 11472 } 11473 } 11474 11475 /* 11476 * Outbound IPv6 fragmentation routine using MDT. 11477 */ 11478 static void 11479 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11480 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11481 { 11482 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11483 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11484 mblk_t *hdr_mp, *md_mp = NULL; 11485 int i1; 11486 multidata_t *mmd; 11487 unsigned char *hdr_ptr, *pld_ptr; 11488 ip_pdescinfo_t pdi; 11489 uint32_t ident; 11490 size_t len; 11491 uint16_t offset; 11492 queue_t *stq = ire->ire_stq; 11493 ill_t *ill = (ill_t *)stq->q_ptr; 11494 ip_stack_t *ipst = ill->ill_ipst; 11495 11496 ASSERT(DB_TYPE(mp) == M_DATA); 11497 ASSERT(MBLKL(mp) > unfragmentable_len); 11498 11499 /* 11500 * Move read ptr past unfragmentable portion, we don't want this part 11501 * of the data in our fragments. 11502 */ 11503 mp->b_rptr += unfragmentable_len; 11504 11505 /* Calculate how many packets we will send out */ 11506 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11507 pkts = (i1 + max_chunk - 1) / max_chunk; 11508 ASSERT(pkts > 1); 11509 11510 /* Allocate a message block which will hold all the IP Headers. */ 11511 wroff = ipst->ips_ip_wroff_extra; 11512 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11513 11514 i1 = pkts * hdr_chunk_len; 11515 /* 11516 * Create the header buffer, Multidata and destination address 11517 * and SAP attribute that should be associated with it. 11518 */ 11519 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11520 ((hdr_mp->b_wptr += i1), 11521 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11522 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11523 freemsg(mp); 11524 if (md_mp == NULL) { 11525 freemsg(hdr_mp); 11526 } else { 11527 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11528 freemsg(md_mp); 11529 } 11530 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11531 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11532 return; 11533 } 11534 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11535 11536 /* 11537 * Add a payload buffer to the Multidata; this operation must not 11538 * fail, or otherwise our logic in this routine is broken. There 11539 * is no memory allocation done by the routine, so any returned 11540 * failure simply tells us that we've done something wrong. 11541 * 11542 * A failure tells us that either we're adding the same payload 11543 * buffer more than once, or we're trying to add more buffers than 11544 * allowed. None of the above cases should happen, and we panic 11545 * because either there's horrible heap corruption, and/or 11546 * programming mistake. 11547 */ 11548 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11549 goto pbuf_panic; 11550 } 11551 11552 hdr_ptr = hdr_mp->b_rptr; 11553 pld_ptr = mp->b_rptr; 11554 11555 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11556 11557 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11558 11559 /* 11560 * len is the total length of the fragmentable data in this 11561 * datagram. For each fragment sent, we will decrement len 11562 * by the amount of fragmentable data sent in that fragment 11563 * until len reaches zero. 11564 */ 11565 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11566 11567 offset = 0; 11568 prev_nexthdr_offset += wroff; 11569 11570 while (len != 0) { 11571 size_t mlen; 11572 ip6_t *fip6h; 11573 ip6_frag_t *fraghdr; 11574 int error; 11575 11576 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11577 mlen = MIN(len, max_chunk); 11578 len -= mlen; 11579 11580 fip6h = (ip6_t *)(hdr_ptr + wroff); 11581 ASSERT(OK_32PTR(fip6h)); 11582 bcopy(ip6h, fip6h, unfragmentable_len); 11583 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11584 11585 fip6h->ip6_plen = htons((uint16_t)(mlen + 11586 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11587 11588 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11589 unfragmentable_len); 11590 fraghdr->ip6f_nxt = nexthdr; 11591 fraghdr->ip6f_reserved = 0; 11592 fraghdr->ip6f_offlg = htons(offset) | 11593 ((len != 0) ? IP6F_MORE_FRAG : 0); 11594 fraghdr->ip6f_ident = ident; 11595 11596 /* 11597 * Record offset and size of header and data of the next packet 11598 * in the multidata message. 11599 */ 11600 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11601 unfragmentable_len + sizeof (ip6_frag_t), 0); 11602 PDESC_PLD_INIT(&pdi); 11603 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11604 ASSERT(i1 > 0); 11605 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11606 if (i1 == mlen) { 11607 pld_ptr += mlen; 11608 } else { 11609 i1 = mlen - i1; 11610 mp = mp->b_cont; 11611 ASSERT(mp != NULL); 11612 ASSERT(MBLKL(mp) >= i1); 11613 /* 11614 * Attach the next payload message block to the 11615 * multidata message. 11616 */ 11617 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11618 goto pbuf_panic; 11619 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11620 pld_ptr = mp->b_rptr + i1; 11621 } 11622 11623 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11624 KM_NOSLEEP)) == NULL) { 11625 /* 11626 * Any failure other than ENOMEM indicates that we 11627 * have passed in invalid pdesc info or parameters 11628 * to mmd_addpdesc, which must not happen. 11629 * 11630 * EINVAL is a result of failure on boundary checks 11631 * against the pdesc info contents. It should not 11632 * happen, and we panic because either there's 11633 * horrible heap corruption, and/or programming 11634 * mistake. 11635 */ 11636 if (error != ENOMEM) { 11637 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11638 "pdesc logic error detected for " 11639 "mmd %p pinfo %p (%d)\n", 11640 (void *)mmd, (void *)&pdi, error); 11641 /* NOTREACHED */ 11642 } 11643 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11644 /* Free unattached payload message blocks as well */ 11645 md_mp->b_cont = mp->b_cont; 11646 goto free_mmd; 11647 } 11648 11649 /* Advance fragment offset. */ 11650 offset += mlen; 11651 11652 /* Advance to location for next header in the buffer. */ 11653 hdr_ptr += hdr_chunk_len; 11654 11655 /* Did we reach the next payload message block? */ 11656 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11657 mp = mp->b_cont; 11658 /* 11659 * Attach the next message block with payload 11660 * data to the multidata message. 11661 */ 11662 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11663 goto pbuf_panic; 11664 pld_ptr = mp->b_rptr; 11665 } 11666 } 11667 11668 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11669 ASSERT(mp->b_wptr == pld_ptr); 11670 11671 /* Update IP statistics */ 11672 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11673 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11674 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11675 /* 11676 * The ipv6 header len is accounted for in unfragmentable_len so 11677 * when calculating the fragmentation overhead just add the frag 11678 * header len. 11679 */ 11680 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11681 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11682 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11683 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11684 11685 ire->ire_ob_pkt_count += pkts; 11686 if (ire->ire_ipif != NULL) 11687 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11688 11689 ire->ire_last_used_time = lbolt; 11690 /* Send it down */ 11691 putnext(stq, md_mp); 11692 return; 11693 11694 pbuf_panic: 11695 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11696 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11697 pbuf_idx); 11698 /* NOTREACHED */ 11699 } 11700 11701 /* 11702 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11703 * We have not optimized this in terms of number of mblks 11704 * allocated. For instance, for each fragment sent we always allocate a 11705 * mblk to hold the IPv6 header and fragment header. 11706 * 11707 * Assumes that all the extension headers are contained in the first mblk. 11708 * 11709 * The fragment header is inserted after an hop-by-hop options header 11710 * and after [an optional destinations header followed by] a routing header. 11711 * 11712 * NOTE : This function does not ire_refrele the ire passed in as 11713 * the argument. 11714 */ 11715 void 11716 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11717 int caller, int max_frag) 11718 { 11719 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11720 ip6_t *fip6h; 11721 mblk_t *hmp; 11722 mblk_t *hmp0; 11723 mblk_t *dmp; 11724 ip6_frag_t *fraghdr; 11725 size_t unfragmentable_len; 11726 size_t len; 11727 size_t mlen; 11728 size_t max_chunk; 11729 uint32_t ident; 11730 uint16_t off_flags; 11731 uint16_t offset = 0; 11732 ill_t *ill; 11733 uint8_t nexthdr; 11734 uint_t prev_nexthdr_offset; 11735 uint8_t *ptr; 11736 ip_stack_t *ipst = ire->ire_ipst; 11737 11738 ASSERT(ire->ire_type == IRE_CACHE); 11739 ill = (ill_t *)ire->ire_stq->q_ptr; 11740 11741 if (max_frag <= 0) { 11742 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11743 freemsg(mp); 11744 return; 11745 } 11746 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11747 11748 /* 11749 * Determine the length of the unfragmentable portion of this 11750 * datagram. This consists of the IPv6 header, a potential 11751 * hop-by-hop options header, a potential pre-routing-header 11752 * destination options header, and a potential routing header. 11753 */ 11754 nexthdr = ip6h->ip6_nxt; 11755 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11756 ptr = (uint8_t *)&ip6h[1]; 11757 11758 if (nexthdr == IPPROTO_HOPOPTS) { 11759 ip6_hbh_t *hbh_hdr; 11760 uint_t hdr_len; 11761 11762 hbh_hdr = (ip6_hbh_t *)ptr; 11763 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11764 nexthdr = hbh_hdr->ip6h_nxt; 11765 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11766 - (uint8_t *)ip6h; 11767 ptr += hdr_len; 11768 } 11769 if (nexthdr == IPPROTO_DSTOPTS) { 11770 ip6_dest_t *dest_hdr; 11771 uint_t hdr_len; 11772 11773 dest_hdr = (ip6_dest_t *)ptr; 11774 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11775 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11776 nexthdr = dest_hdr->ip6d_nxt; 11777 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11778 - (uint8_t *)ip6h; 11779 ptr += hdr_len; 11780 } 11781 } 11782 if (nexthdr == IPPROTO_ROUTING) { 11783 ip6_rthdr_t *rthdr; 11784 uint_t hdr_len; 11785 11786 rthdr = (ip6_rthdr_t *)ptr; 11787 nexthdr = rthdr->ip6r_nxt; 11788 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11789 - (uint8_t *)ip6h; 11790 hdr_len = 8 * (rthdr->ip6r_len + 1); 11791 ptr += hdr_len; 11792 } 11793 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11794 11795 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11796 sizeof (ip6_frag_t)) & ~7; 11797 11798 /* Check if we can use MDT to send out the frags. */ 11799 ASSERT(!IRE_IS_LOCAL(ire)); 11800 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11801 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11802 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11803 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11804 nexthdr, prev_nexthdr_offset); 11805 return; 11806 } 11807 11808 /* 11809 * Allocate an mblk with enough room for the link-layer 11810 * header, the unfragmentable part of the datagram, and the 11811 * fragment header. This (or a copy) will be used as the 11812 * first mblk for each fragment we send. 11813 */ 11814 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + 11815 ipst->ips_ip_wroff_extra, BPRI_HI); 11816 if (hmp == NULL) { 11817 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11818 freemsg(mp); 11819 return; 11820 } 11821 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11822 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11823 11824 fip6h = (ip6_t *)hmp->b_rptr; 11825 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11826 11827 bcopy(ip6h, fip6h, unfragmentable_len); 11828 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11829 11830 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11831 11832 fraghdr->ip6f_nxt = nexthdr; 11833 fraghdr->ip6f_reserved = 0; 11834 fraghdr->ip6f_offlg = 0; 11835 fraghdr->ip6f_ident = htonl(ident); 11836 11837 /* 11838 * len is the total length of the fragmentable data in this 11839 * datagram. For each fragment sent, we will decrement len 11840 * by the amount of fragmentable data sent in that fragment 11841 * until len reaches zero. 11842 */ 11843 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11844 11845 /* 11846 * Move read ptr past unfragmentable portion, we don't want this part 11847 * of the data in our fragments. 11848 */ 11849 mp->b_rptr += unfragmentable_len; 11850 11851 while (len != 0) { 11852 mlen = MIN(len, max_chunk); 11853 len -= mlen; 11854 if (len != 0) { 11855 /* Not last */ 11856 hmp0 = copyb(hmp); 11857 if (hmp0 == NULL) { 11858 freeb(hmp); 11859 freemsg(mp); 11860 BUMP_MIB(ill->ill_ip_mib, 11861 ipIfStatsOutFragFails); 11862 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11863 return; 11864 } 11865 off_flags = IP6F_MORE_FRAG; 11866 } else { 11867 /* Last fragment */ 11868 hmp0 = hmp; 11869 hmp = NULL; 11870 off_flags = 0; 11871 } 11872 fip6h = (ip6_t *)(hmp0->b_rptr); 11873 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11874 11875 fip6h->ip6_plen = htons((uint16_t)(mlen + 11876 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11877 /* 11878 * Note: Optimization alert. 11879 * In IPv6 (and IPv4) protocol header, Fragment Offset 11880 * ("offset") is 13 bits wide and in 8-octet units. 11881 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11882 * it occupies the most significant 13 bits. 11883 * (least significant 13 bits in IPv4). 11884 * We do not do any shifts here. Not shifting is same effect 11885 * as taking offset value in octet units, dividing by 8 and 11886 * then shifting 3 bits left to line it up in place in proper 11887 * place protocol header. 11888 */ 11889 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11890 11891 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11892 /* mp has already been freed by ip_carve_mp() */ 11893 if (hmp != NULL) 11894 freeb(hmp); 11895 freeb(hmp0); 11896 ip1dbg(("ip_carve_mp: failed\n")); 11897 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11898 return; 11899 } 11900 hmp0->b_cont = dmp; 11901 /* Get the priority marking, if any */ 11902 hmp0->b_band = dmp->b_band; 11903 UPDATE_OB_PKT_COUNT(ire); 11904 ire->ire_last_used_time = lbolt; 11905 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11906 caller, NULL); 11907 reachable = 0; /* No need to redo state machine in loop */ 11908 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11909 offset += mlen; 11910 } 11911 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11912 } 11913 11914 /* 11915 * Determine if the ill and multicast aspects of that packets 11916 * "matches" the conn. 11917 */ 11918 boolean_t 11919 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11920 zoneid_t zoneid) 11921 { 11922 ill_t *in_ill; 11923 boolean_t wantpacket = B_TRUE; 11924 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11925 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11926 11927 /* 11928 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11929 * unicast and multicast reception to conn_incoming_ill. 11930 * conn_wantpacket_v6 is called both for unicast and 11931 * multicast. 11932 * 11933 * 1) The unicast copy of the packet can come anywhere in 11934 * the ill group if it is part of the group. Thus, we 11935 * need to check to see whether the ill group matches 11936 * if in_ill is part of a group. 11937 * 11938 * 2) ip_rput does not suppress duplicate multicast packets. 11939 * If there are two interfaces in a ill group and we have 11940 * 2 applications (conns) joined a multicast group G on 11941 * both the interfaces, ilm_lookup_ill filter in ip_rput 11942 * will give us two packets because we join G on both the 11943 * interfaces rather than nominating just one interface 11944 * for receiving multicast like broadcast above. So, 11945 * we have to call ilg_lookup_ill to filter out duplicate 11946 * copies, if ill is part of a group, to supress duplicates. 11947 */ 11948 in_ill = connp->conn_incoming_ill; 11949 if (in_ill != NULL) { 11950 mutex_enter(&connp->conn_lock); 11951 in_ill = connp->conn_incoming_ill; 11952 mutex_enter(&ill->ill_lock); 11953 /* 11954 * No IPMP, and the packet did not arrive on conn_incoming_ill 11955 * OR, IPMP in use and the packet arrived on an IPMP group 11956 * different from the conn_incoming_ill's IPMP group. 11957 * Reject the packet. 11958 */ 11959 if ((in_ill->ill_group == NULL && in_ill != ill) || 11960 (in_ill->ill_group != NULL && 11961 in_ill->ill_group != ill->ill_group)) { 11962 wantpacket = B_FALSE; 11963 } 11964 mutex_exit(&ill->ill_lock); 11965 mutex_exit(&connp->conn_lock); 11966 if (!wantpacket) 11967 return (B_FALSE); 11968 } 11969 11970 if (connp->conn_multi_router) 11971 return (B_TRUE); 11972 11973 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11974 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11975 /* 11976 * Unicast case: we match the conn only if it's in the specified 11977 * zone. 11978 */ 11979 return (IPCL_ZONE_MATCH(connp, zoneid)); 11980 } 11981 11982 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11983 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11984 /* 11985 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11986 * disabled, therefore we don't dispatch the multicast packet to 11987 * the sending zone. 11988 */ 11989 return (B_FALSE); 11990 } 11991 11992 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11993 zoneid != ALL_ZONES) { 11994 /* 11995 * Multicast packet on the loopback interface: we only match 11996 * conns who joined the group in the specified zone. 11997 */ 11998 return (B_FALSE); 11999 } 12000 12001 mutex_enter(&connp->conn_lock); 12002 wantpacket = 12003 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 12004 mutex_exit(&connp->conn_lock); 12005 12006 return (wantpacket); 12007 } 12008 12009 12010 /* 12011 * Transmit a packet and update any NUD state based on the flags 12012 * XXX need to "recover" any ip6i_t when doing putq! 12013 * 12014 * NOTE : This function does not ire_refrele the ire passed in as the 12015 * argument. 12016 */ 12017 void 12018 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 12019 int caller, ipsec_out_t *io) 12020 { 12021 mblk_t *mp1; 12022 nce_t *nce = ire->ire_nce; 12023 ill_t *ill; 12024 ill_t *out_ill; 12025 uint64_t delta; 12026 ip6_t *ip6h; 12027 queue_t *stq = ire->ire_stq; 12028 ire_t *ire1 = NULL; 12029 ire_t *save_ire = ire; 12030 boolean_t multirt_send = B_FALSE; 12031 mblk_t *next_mp = NULL; 12032 ip_stack_t *ipst = ire->ire_ipst; 12033 boolean_t fp_prepend = B_FALSE; 12034 uint32_t hlen; 12035 12036 ip6h = (ip6_t *)mp->b_rptr; 12037 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 12038 ASSERT(ire->ire_ipversion == IPV6_VERSION); 12039 ASSERT(nce != NULL); 12040 ASSERT(mp->b_datap->db_type == M_DATA); 12041 ASSERT(stq != NULL); 12042 12043 ill = ire_to_ill(ire); 12044 if (!ill) { 12045 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 12046 freemsg(mp); 12047 return; 12048 } 12049 12050 /* 12051 * If a packet is to be sent out an interface that is a 6to4 12052 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 12053 * destination, must be checked to have a 6to4 prefix 12054 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 12055 * address configured on the sending interface. Otherwise, 12056 * the packet was delivered to this interface in error and the 12057 * packet must be dropped. 12058 */ 12059 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 12060 ipif_t *ipif = ill->ill_ipif; 12061 12062 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 12063 &ip6h->ip6_dst)) { 12064 if (ip_debug > 2) { 12065 /* ip1dbg */ 12066 pr_addr_dbg("ip_xmit_v6: attempting to " 12067 "send 6to4 addressed IPv6 " 12068 "destination (%s) out the wrong " 12069 "interface.\n", AF_INET6, 12070 &ip6h->ip6_dst); 12071 } 12072 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12073 freemsg(mp); 12074 return; 12075 } 12076 } 12077 12078 /* Flow-control check has been done in ip_wput_ire_v6 */ 12079 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 12080 caller == IP_WSRV || canput(stq->q_next)) { 12081 uint32_t ill_index; 12082 12083 /* 12084 * In most cases, the emission loop below is entered only 12085 * once. Only in the case where the ire holds the 12086 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 12087 * flagged ires in the bucket, and send the packet 12088 * through all crossed RTF_MULTIRT routes. 12089 */ 12090 if (ire->ire_flags & RTF_MULTIRT) { 12091 /* 12092 * Multirouting case. The bucket where ire is stored 12093 * probably holds other RTF_MULTIRT flagged ires 12094 * to the destination. In this call to ip_xmit_v6, 12095 * we attempt to send the packet through all 12096 * those ires. Thus, we first ensure that ire is the 12097 * first RTF_MULTIRT ire in the bucket, 12098 * before walking the ire list. 12099 */ 12100 ire_t *first_ire; 12101 irb_t *irb = ire->ire_bucket; 12102 ASSERT(irb != NULL); 12103 multirt_send = B_TRUE; 12104 12105 /* Make sure we do not omit any multiroute ire. */ 12106 IRB_REFHOLD(irb); 12107 for (first_ire = irb->irb_ire; 12108 first_ire != NULL; 12109 first_ire = first_ire->ire_next) { 12110 if ((first_ire->ire_flags & RTF_MULTIRT) && 12111 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12112 &ire->ire_addr_v6)) && 12113 !(first_ire->ire_marks & 12114 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12115 break; 12116 } 12117 12118 if ((first_ire != NULL) && (first_ire != ire)) { 12119 IRE_REFHOLD(first_ire); 12120 /* ire will be released by the caller */ 12121 ire = first_ire; 12122 nce = ire->ire_nce; 12123 stq = ire->ire_stq; 12124 ill = ire_to_ill(ire); 12125 } 12126 IRB_REFRELE(irb); 12127 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12128 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12129 ILL_MDT_USABLE(ill)) { 12130 /* 12131 * This tcp connection was marked as MDT-capable, but 12132 * it has been turned off due changes in the interface. 12133 * Now that the interface support is back, turn it on 12134 * by notifying tcp. We don't directly modify tcp_mdt, 12135 * since we leave all the details to the tcp code that 12136 * knows better. 12137 */ 12138 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12139 12140 if (mdimp == NULL) { 12141 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12142 "connp %p (ENOMEM)\n", (void *)connp)); 12143 } else { 12144 CONN_INC_REF(connp); 12145 SQUEUE_ENTER_ONE(connp->conn_sqp, mdimp, 12146 tcp_input, connp, SQ_FILL, 12147 SQTAG_TCP_INPUT_MCTL); 12148 } 12149 } 12150 12151 do { 12152 mblk_t *mp_ip6h; 12153 12154 if (multirt_send) { 12155 irb_t *irb; 12156 /* 12157 * We are in a multiple send case, need to get 12158 * the next ire and make a duplicate of the 12159 * packet. ire1 holds here the next ire to 12160 * process in the bucket. If multirouting is 12161 * expected, any non-RTF_MULTIRT ire that has 12162 * the right destination address is ignored. 12163 */ 12164 irb = ire->ire_bucket; 12165 ASSERT(irb != NULL); 12166 12167 IRB_REFHOLD(irb); 12168 for (ire1 = ire->ire_next; 12169 ire1 != NULL; 12170 ire1 = ire1->ire_next) { 12171 if (!(ire1->ire_flags & RTF_MULTIRT)) 12172 continue; 12173 if (!IN6_ARE_ADDR_EQUAL( 12174 &ire1->ire_addr_v6, 12175 &ire->ire_addr_v6)) 12176 continue; 12177 if (ire1->ire_marks & 12178 (IRE_MARK_CONDEMNED| 12179 IRE_MARK_HIDDEN)) 12180 continue; 12181 12182 /* Got one */ 12183 if (ire1 != save_ire) { 12184 IRE_REFHOLD(ire1); 12185 } 12186 break; 12187 } 12188 IRB_REFRELE(irb); 12189 12190 if (ire1 != NULL) { 12191 next_mp = copyb(mp); 12192 if ((next_mp == NULL) || 12193 ((mp->b_cont != NULL) && 12194 ((next_mp->b_cont = 12195 dupmsg(mp->b_cont)) == NULL))) { 12196 freemsg(next_mp); 12197 next_mp = NULL; 12198 ire_refrele(ire1); 12199 ire1 = NULL; 12200 } 12201 } 12202 12203 /* Last multiroute ire; don't loop anymore. */ 12204 if (ire1 == NULL) { 12205 multirt_send = B_FALSE; 12206 } 12207 } 12208 12209 ill_index = 12210 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12211 12212 /* Initiate IPPF processing */ 12213 if (IP6_OUT_IPP(flags, ipst)) { 12214 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12215 if (mp == NULL) { 12216 BUMP_MIB(ill->ill_ip_mib, 12217 ipIfStatsOutDiscards); 12218 if (next_mp != NULL) 12219 freemsg(next_mp); 12220 if (ire != save_ire) { 12221 ire_refrele(ire); 12222 } 12223 return; 12224 } 12225 ip6h = (ip6_t *)mp->b_rptr; 12226 } 12227 mp_ip6h = mp; 12228 12229 /* 12230 * Check for fastpath, we need to hold nce_lock to 12231 * prevent fastpath update from chaining nce_fp_mp. 12232 */ 12233 12234 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12235 mutex_enter(&nce->nce_lock); 12236 if ((mp1 = nce->nce_fp_mp) != NULL) { 12237 uchar_t *rptr; 12238 12239 hlen = MBLKL(mp1); 12240 rptr = mp->b_rptr - hlen; 12241 /* 12242 * make sure there is room for the fastpath 12243 * datalink header 12244 */ 12245 if (rptr < mp->b_datap->db_base) { 12246 mp1 = copyb(mp1); 12247 mutex_exit(&nce->nce_lock); 12248 if (mp1 == NULL) { 12249 BUMP_MIB(ill->ill_ip_mib, 12250 ipIfStatsOutDiscards); 12251 freemsg(mp); 12252 if (next_mp != NULL) 12253 freemsg(next_mp); 12254 if (ire != save_ire) { 12255 ire_refrele(ire); 12256 } 12257 return; 12258 } 12259 mp1->b_cont = mp; 12260 12261 /* Get the priority marking, if any */ 12262 mp1->b_band = mp->b_band; 12263 mp = mp1; 12264 } else { 12265 mp->b_rptr = rptr; 12266 /* 12267 * fastpath - pre-pend datalink 12268 * header 12269 */ 12270 bcopy(mp1->b_rptr, rptr, hlen); 12271 mutex_exit(&nce->nce_lock); 12272 fp_prepend = B_TRUE; 12273 } 12274 } else { 12275 /* 12276 * Get the DL_UNITDATA_REQ. 12277 */ 12278 mp1 = nce->nce_res_mp; 12279 if (mp1 == NULL) { 12280 mutex_exit(&nce->nce_lock); 12281 ip1dbg(("ip_xmit_v6: No resolution " 12282 "block ire = %p\n", (void *)ire)); 12283 freemsg(mp); 12284 if (next_mp != NULL) 12285 freemsg(next_mp); 12286 if (ire != save_ire) { 12287 ire_refrele(ire); 12288 } 12289 return; 12290 } 12291 /* 12292 * Prepend the DL_UNITDATA_REQ. 12293 */ 12294 mp1 = copyb(mp1); 12295 mutex_exit(&nce->nce_lock); 12296 if (mp1 == NULL) { 12297 BUMP_MIB(ill->ill_ip_mib, 12298 ipIfStatsOutDiscards); 12299 freemsg(mp); 12300 if (next_mp != NULL) 12301 freemsg(next_mp); 12302 if (ire != save_ire) { 12303 ire_refrele(ire); 12304 } 12305 return; 12306 } 12307 mp1->b_cont = mp; 12308 12309 /* Get the priority marking, if any */ 12310 mp1->b_band = mp->b_band; 12311 mp = mp1; 12312 } 12313 12314 out_ill = (ill_t *)stq->q_ptr; 12315 12316 DTRACE_PROBE4(ip6__physical__out__start, 12317 ill_t *, NULL, ill_t *, out_ill, 12318 ip6_t *, ip6h, mblk_t *, mp); 12319 12320 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12321 ipst->ips_ipv6firewall_physical_out, 12322 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 12323 12324 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12325 12326 if (mp == NULL) { 12327 if (multirt_send) { 12328 ASSERT(ire1 != NULL); 12329 if (ire != save_ire) { 12330 ire_refrele(ire); 12331 } 12332 /* 12333 * Proceed with the next RTF_MULTIRT 12334 * ire, also set up the send-to queue 12335 * accordingly. 12336 */ 12337 ire = ire1; 12338 ire1 = NULL; 12339 stq = ire->ire_stq; 12340 nce = ire->ire_nce; 12341 ill = ire_to_ill(ire); 12342 mp = next_mp; 12343 next_mp = NULL; 12344 continue; 12345 } else { 12346 ASSERT(next_mp == NULL); 12347 ASSERT(ire1 == NULL); 12348 break; 12349 } 12350 } 12351 12352 if (ipst->ips_ipobs_enabled) { 12353 zoneid_t szone; 12354 12355 szone = ip_get_zoneid_v6(&ip6h->ip6_src, 12356 mp_ip6h, out_ill, ipst, ALL_ZONES); 12357 ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone, 12358 ALL_ZONES, out_ill, IPV6_VERSION, 12359 fp_prepend ? hlen : 0, ipst); 12360 } 12361 12362 /* 12363 * Update ire and MIB counters; for save_ire, this has 12364 * been done by the caller. 12365 */ 12366 if (ire != save_ire) { 12367 UPDATE_OB_PKT_COUNT(ire); 12368 ire->ire_last_used_time = lbolt; 12369 12370 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12371 BUMP_MIB(ill->ill_ip_mib, 12372 ipIfStatsHCOutMcastPkts); 12373 UPDATE_MIB(ill->ill_ip_mib, 12374 ipIfStatsHCOutMcastOctets, 12375 ntohs(ip6h->ip6_plen) + 12376 IPV6_HDR_LEN); 12377 } 12378 } 12379 12380 /* 12381 * Send it down. XXX Do we want to flow control AH/ESP 12382 * packets that carry TCP payloads? We don't flow 12383 * control TCP packets, but we should also not 12384 * flow-control TCP packets that have been protected. 12385 * We don't have an easy way to find out if an AH/ESP 12386 * packet was originally TCP or not currently. 12387 */ 12388 if (io == NULL) { 12389 BUMP_MIB(ill->ill_ip_mib, 12390 ipIfStatsHCOutTransmits); 12391 UPDATE_MIB(ill->ill_ip_mib, 12392 ipIfStatsHCOutOctets, 12393 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12394 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 12395 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 12396 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 12397 int, 0); 12398 12399 putnext(stq, mp); 12400 } else { 12401 /* 12402 * Safety Pup says: make sure this is 12403 * going to the right interface! 12404 */ 12405 if (io->ipsec_out_capab_ill_index != 12406 ill_index) { 12407 /* IPsec kstats: bump lose counter */ 12408 freemsg(mp1); 12409 } else { 12410 BUMP_MIB(ill->ill_ip_mib, 12411 ipIfStatsHCOutTransmits); 12412 UPDATE_MIB(ill->ill_ip_mib, 12413 ipIfStatsHCOutOctets, 12414 ntohs(ip6h->ip6_plen) + 12415 IPV6_HDR_LEN); 12416 DTRACE_IP7(send, mblk_t *, mp, 12417 conn_t *, NULL, void_ip_t *, ip6h, 12418 __dtrace_ipsr_ill_t *, out_ill, 12419 ipha_t *, NULL, ip6_t *, ip6h, int, 12420 0); 12421 ipsec_hw_putnext(stq, mp); 12422 } 12423 } 12424 12425 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12426 if (ire != save_ire) { 12427 ire_refrele(ire); 12428 } 12429 if (multirt_send) { 12430 ASSERT(ire1 != NULL); 12431 /* 12432 * Proceed with the next RTF_MULTIRT 12433 * ire, also set up the send-to queue 12434 * accordingly. 12435 */ 12436 ire = ire1; 12437 ire1 = NULL; 12438 stq = ire->ire_stq; 12439 nce = ire->ire_nce; 12440 ill = ire_to_ill(ire); 12441 mp = next_mp; 12442 next_mp = NULL; 12443 continue; 12444 } 12445 ASSERT(next_mp == NULL); 12446 ASSERT(ire1 == NULL); 12447 return; 12448 } 12449 12450 ASSERT(nce->nce_state != ND_INCOMPLETE); 12451 12452 /* 12453 * Check for upper layer advice 12454 */ 12455 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12456 /* 12457 * It should be o.k. to check the state without 12458 * a lock here, at most we lose an advice. 12459 */ 12460 nce->nce_last = TICK_TO_MSEC(lbolt64); 12461 if (nce->nce_state != ND_REACHABLE) { 12462 12463 mutex_enter(&nce->nce_lock); 12464 nce->nce_state = ND_REACHABLE; 12465 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12466 mutex_exit(&nce->nce_lock); 12467 (void) untimeout(nce->nce_timeout_id); 12468 if (ip_debug > 2) { 12469 /* ip1dbg */ 12470 pr_addr_dbg("ip_xmit_v6: state" 12471 " for %s changed to" 12472 " REACHABLE\n", AF_INET6, 12473 &ire->ire_addr_v6); 12474 } 12475 } 12476 if (ire != save_ire) { 12477 ire_refrele(ire); 12478 } 12479 if (multirt_send) { 12480 ASSERT(ire1 != NULL); 12481 /* 12482 * Proceed with the next RTF_MULTIRT 12483 * ire, also set up the send-to queue 12484 * accordingly. 12485 */ 12486 ire = ire1; 12487 ire1 = NULL; 12488 stq = ire->ire_stq; 12489 nce = ire->ire_nce; 12490 ill = ire_to_ill(ire); 12491 mp = next_mp; 12492 next_mp = NULL; 12493 continue; 12494 } 12495 ASSERT(next_mp == NULL); 12496 ASSERT(ire1 == NULL); 12497 return; 12498 } 12499 12500 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12501 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12502 " ill_reachable_time = %d \n", delta, 12503 ill->ill_reachable_time)); 12504 if (delta > (uint64_t)ill->ill_reachable_time) { 12505 nce = ire->ire_nce; 12506 mutex_enter(&nce->nce_lock); 12507 switch (nce->nce_state) { 12508 case ND_REACHABLE: 12509 case ND_STALE: 12510 /* 12511 * ND_REACHABLE is identical to 12512 * ND_STALE in this specific case. If 12513 * reachable time has expired for this 12514 * neighbor (delta is greater than 12515 * reachable time), conceptually, the 12516 * neighbor cache is no longer in 12517 * REACHABLE state, but already in 12518 * STALE state. So the correct 12519 * transition here is to ND_DELAY. 12520 */ 12521 nce->nce_state = ND_DELAY; 12522 mutex_exit(&nce->nce_lock); 12523 NDP_RESTART_TIMER(nce, 12524 ipst->ips_delay_first_probe_time); 12525 if (ip_debug > 3) { 12526 /* ip2dbg */ 12527 pr_addr_dbg("ip_xmit_v6: state" 12528 " for %s changed to" 12529 " DELAY\n", AF_INET6, 12530 &ire->ire_addr_v6); 12531 } 12532 break; 12533 case ND_DELAY: 12534 case ND_PROBE: 12535 mutex_exit(&nce->nce_lock); 12536 /* Timers have already started */ 12537 break; 12538 case ND_UNREACHABLE: 12539 /* 12540 * ndp timer has detected that this nce 12541 * is unreachable and initiated deleting 12542 * this nce and all its associated IREs. 12543 * This is a race where we found the 12544 * ire before it was deleted and have 12545 * just sent out a packet using this 12546 * unreachable nce. 12547 */ 12548 mutex_exit(&nce->nce_lock); 12549 break; 12550 default: 12551 ASSERT(0); 12552 } 12553 } 12554 12555 if (multirt_send) { 12556 ASSERT(ire1 != NULL); 12557 /* 12558 * Proceed with the next RTF_MULTIRT ire, 12559 * Also set up the send-to queue accordingly. 12560 */ 12561 if (ire != save_ire) { 12562 ire_refrele(ire); 12563 } 12564 ire = ire1; 12565 ire1 = NULL; 12566 stq = ire->ire_stq; 12567 nce = ire->ire_nce; 12568 ill = ire_to_ill(ire); 12569 mp = next_mp; 12570 next_mp = NULL; 12571 } 12572 } while (multirt_send); 12573 /* 12574 * In the multirouting case, release the last ire used for 12575 * emission. save_ire will be released by the caller. 12576 */ 12577 if (ire != save_ire) { 12578 ire_refrele(ire); 12579 } 12580 } else { 12581 /* 12582 * Can't apply backpressure, just discard the packet. 12583 */ 12584 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12585 freemsg(mp); 12586 return; 12587 } 12588 } 12589 12590 /* 12591 * pr_addr_dbg function provides the needed buffer space to call 12592 * inet_ntop() function's 3rd argument. This function should be 12593 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12594 * stack buffer space in it's own stack frame. This function uses 12595 * a buffer from it's own stack and prints the information. 12596 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12597 * 12598 * Note: This function can call inet_ntop() once. 12599 */ 12600 void 12601 pr_addr_dbg(char *fmt1, int af, const void *addr) 12602 { 12603 char buf[INET6_ADDRSTRLEN]; 12604 12605 if (fmt1 == NULL) { 12606 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12607 return; 12608 } 12609 12610 /* 12611 * This does not compare debug level and just prints 12612 * out. Thus it is the responsibility of the caller 12613 * to check the appropriate debug-level before calling 12614 * this function. 12615 */ 12616 if (ip_debug > 0) { 12617 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12618 } 12619 12620 12621 } 12622 12623 12624 /* 12625 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12626 * if needed and extension headers) that will be needed based on the 12627 * ip6_pkt_t structure passed by the caller. 12628 * 12629 * The returned length does not include the length of the upper level 12630 * protocol (ULP) header. 12631 */ 12632 int 12633 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12634 { 12635 int len; 12636 12637 len = IPV6_HDR_LEN; 12638 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12639 len += sizeof (ip6i_t); 12640 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12641 ASSERT(ipp->ipp_hopoptslen != 0); 12642 len += ipp->ipp_hopoptslen; 12643 } 12644 if (ipp->ipp_fields & IPPF_RTHDR) { 12645 ASSERT(ipp->ipp_rthdrlen != 0); 12646 len += ipp->ipp_rthdrlen; 12647 } 12648 /* 12649 * En-route destination options 12650 * Only do them if there's a routing header as well 12651 */ 12652 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12653 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12654 ASSERT(ipp->ipp_rtdstoptslen != 0); 12655 len += ipp->ipp_rtdstoptslen; 12656 } 12657 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12658 ASSERT(ipp->ipp_dstoptslen != 0); 12659 len += ipp->ipp_dstoptslen; 12660 } 12661 return (len); 12662 } 12663 12664 /* 12665 * All-purpose routine to build a header chain of an IPv6 header 12666 * followed by any required extension headers and a proto header, 12667 * preceeded (where necessary) by an ip6i_t private header. 12668 * 12669 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12670 * will be filled in appropriately. 12671 * Thus the caller must fill in the rest of the IPv6 header, such as 12672 * traffic class/flowid, source address (if not set here), hoplimit (if not 12673 * set here) and destination address. 12674 * 12675 * The extension headers and ip6i_t header will all be fully filled in. 12676 */ 12677 void 12678 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12679 ip6_pkt_t *ipp, uint8_t protocol) 12680 { 12681 uint8_t *nxthdr_ptr; 12682 uint8_t *cp; 12683 ip6i_t *ip6i; 12684 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12685 12686 /* 12687 * If sending private ip6i_t header down (checksum info, nexthop, 12688 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12689 * then fill it in. (The checksum info will be filled in by icmp). 12690 */ 12691 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12692 ip6i = (ip6i_t *)ip6h; 12693 ip6h = (ip6_t *)&ip6i[1]; 12694 12695 ip6i->ip6i_flags = 0; 12696 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12697 if (ipp->ipp_fields & IPPF_IFINDEX || 12698 ipp->ipp_fields & IPPF_SCOPE_ID) { 12699 ASSERT(ipp->ipp_ifindex != 0); 12700 ip6i->ip6i_flags |= IP6I_IFINDEX; 12701 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12702 } 12703 if (ipp->ipp_fields & IPPF_ADDR) { 12704 /* 12705 * Enable per-packet source address verification if 12706 * IPV6_PKTINFO specified the source address. 12707 * ip6_src is set in the transport's _wput function. 12708 */ 12709 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12710 &ipp->ipp_addr)); 12711 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12712 } 12713 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12714 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12715 /* 12716 * We need to set this flag so that IP doesn't 12717 * rewrite the IPv6 header's hoplimit with the 12718 * current default value. 12719 */ 12720 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12721 } 12722 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12723 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12724 &ipp->ipp_nexthop)); 12725 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12726 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12727 } 12728 /* 12729 * tell IP this is an ip6i_t private header 12730 */ 12731 ip6i->ip6i_nxt = IPPROTO_RAW; 12732 } 12733 /* Initialize IPv6 header */ 12734 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12735 if (ipp->ipp_fields & IPPF_TCLASS) { 12736 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12737 (ipp->ipp_tclass << 20); 12738 } 12739 if (ipp->ipp_fields & IPPF_ADDR) 12740 ip6h->ip6_src = ipp->ipp_addr; 12741 12742 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12743 cp = (uint8_t *)&ip6h[1]; 12744 /* 12745 * Here's where we have to start stringing together 12746 * any extension headers in the right order: 12747 * Hop-by-hop, destination, routing, and final destination opts. 12748 */ 12749 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12750 /* Hop-by-hop options */ 12751 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12752 12753 *nxthdr_ptr = IPPROTO_HOPOPTS; 12754 nxthdr_ptr = &hbh->ip6h_nxt; 12755 12756 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12757 cp += ipp->ipp_hopoptslen; 12758 } 12759 /* 12760 * En-route destination options 12761 * Only do them if there's a routing header as well 12762 */ 12763 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12764 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12765 ip6_dest_t *dst = (ip6_dest_t *)cp; 12766 12767 *nxthdr_ptr = IPPROTO_DSTOPTS; 12768 nxthdr_ptr = &dst->ip6d_nxt; 12769 12770 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12771 cp += ipp->ipp_rtdstoptslen; 12772 } 12773 /* 12774 * Routing header next 12775 */ 12776 if (ipp->ipp_fields & IPPF_RTHDR) { 12777 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12778 12779 *nxthdr_ptr = IPPROTO_ROUTING; 12780 nxthdr_ptr = &rt->ip6r_nxt; 12781 12782 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12783 cp += ipp->ipp_rthdrlen; 12784 } 12785 /* 12786 * Do ultimate destination options 12787 */ 12788 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12789 ip6_dest_t *dest = (ip6_dest_t *)cp; 12790 12791 *nxthdr_ptr = IPPROTO_DSTOPTS; 12792 nxthdr_ptr = &dest->ip6d_nxt; 12793 12794 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12795 cp += ipp->ipp_dstoptslen; 12796 } 12797 /* 12798 * Now set the last header pointer to the proto passed in 12799 */ 12800 *nxthdr_ptr = protocol; 12801 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12802 } 12803 12804 /* 12805 * Return a pointer to the routing header extension header 12806 * in the IPv6 header(s) chain passed in. 12807 * If none found, return NULL 12808 * Assumes that all extension headers are in same mblk as the v6 header 12809 */ 12810 ip6_rthdr_t * 12811 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12812 { 12813 ip6_dest_t *desthdr; 12814 ip6_frag_t *fraghdr; 12815 uint_t hdrlen; 12816 uint8_t nexthdr; 12817 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12818 12819 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12820 return ((ip6_rthdr_t *)ptr); 12821 12822 /* 12823 * The routing header will precede all extension headers 12824 * other than the hop-by-hop and destination options 12825 * extension headers, so if we see anything other than those, 12826 * we're done and didn't find it. 12827 * We could see a destination options header alone but no 12828 * routing header, in which case we'll return NULL as soon as 12829 * we see anything after that. 12830 * Hop-by-hop and destination option headers are identical, 12831 * so we can use either one we want as a template. 12832 */ 12833 nexthdr = ip6h->ip6_nxt; 12834 while (ptr < endptr) { 12835 /* Is there enough left for len + nexthdr? */ 12836 if (ptr + MIN_EHDR_LEN > endptr) 12837 return (NULL); 12838 12839 switch (nexthdr) { 12840 case IPPROTO_HOPOPTS: 12841 case IPPROTO_DSTOPTS: 12842 /* Assumes the headers are identical for hbh and dst */ 12843 desthdr = (ip6_dest_t *)ptr; 12844 hdrlen = 8 * (desthdr->ip6d_len + 1); 12845 nexthdr = desthdr->ip6d_nxt; 12846 break; 12847 12848 case IPPROTO_ROUTING: 12849 return ((ip6_rthdr_t *)ptr); 12850 12851 case IPPROTO_FRAGMENT: 12852 fraghdr = (ip6_frag_t *)ptr; 12853 hdrlen = sizeof (ip6_frag_t); 12854 nexthdr = fraghdr->ip6f_nxt; 12855 break; 12856 12857 default: 12858 return (NULL); 12859 } 12860 ptr += hdrlen; 12861 } 12862 return (NULL); 12863 } 12864 12865 /* 12866 * Called for source-routed packets originating on this node. 12867 * Manipulates the original routing header by moving every entry up 12868 * one slot, placing the first entry in the v6 header's v6_dst field, 12869 * and placing the ultimate destination in the routing header's last 12870 * slot. 12871 * 12872 * Returns the checksum diference between the ultimate destination 12873 * (last hop in the routing header when the packet is sent) and 12874 * the first hop (ip6_dst when the packet is sent) 12875 */ 12876 /* ARGSUSED2 */ 12877 uint32_t 12878 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12879 { 12880 uint_t numaddr; 12881 uint_t i; 12882 in6_addr_t *addrptr; 12883 in6_addr_t tmp; 12884 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12885 uint32_t cksm; 12886 uint32_t addrsum = 0; 12887 uint16_t *ptr; 12888 12889 /* 12890 * Perform any processing needed for source routing. 12891 * We know that all extension headers will be in the same mblk 12892 * as the IPv6 header. 12893 */ 12894 12895 /* 12896 * If no segments left in header, or the header length field is zero, 12897 * don't move hop addresses around; 12898 * Checksum difference is zero. 12899 */ 12900 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12901 return (0); 12902 12903 ptr = (uint16_t *)&ip6h->ip6_dst; 12904 cksm = 0; 12905 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12906 cksm += ptr[i]; 12907 } 12908 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12909 12910 /* 12911 * Here's where the fun begins - we have to 12912 * move all addresses up one spot, take the 12913 * first hop and make it our first ip6_dst, 12914 * and place the ultimate destination in the 12915 * newly-opened last slot. 12916 */ 12917 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12918 numaddr = rthdr->ip6r0_len / 2; 12919 tmp = *addrptr; 12920 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12921 *addrptr = addrptr[1]; 12922 } 12923 *addrptr = ip6h->ip6_dst; 12924 ip6h->ip6_dst = tmp; 12925 12926 /* 12927 * From the checksummed ultimate destination subtract the checksummed 12928 * current ip6_dst (the first hop address). Return that number. 12929 * (In the v4 case, the second part of this is done in each routine 12930 * that calls ip_massage_options(). We do it all in this one place 12931 * for v6). 12932 */ 12933 ptr = (uint16_t *)&ip6h->ip6_dst; 12934 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12935 addrsum += ptr[i]; 12936 } 12937 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12938 if ((int)cksm < 0) 12939 cksm--; 12940 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12941 12942 return (cksm); 12943 } 12944 12945 /* 12946 * Propagate a multicast group membership operation (join/leave) (*fn) on 12947 * all interfaces crossed by the related multirt routes. 12948 * The call is considered successful if the operation succeeds 12949 * on at least one interface. 12950 * The function is called if the destination address in the packet to send 12951 * is multirouted. 12952 */ 12953 int 12954 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12955 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12956 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12957 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12958 { 12959 ire_t *ire_gw; 12960 irb_t *irb; 12961 int index, error = 0; 12962 opt_restart_t *or; 12963 ip_stack_t *ipst = ire->ire_ipst; 12964 12965 irb = ire->ire_bucket; 12966 ASSERT(irb != NULL); 12967 12968 ASSERT(DB_TYPE(first_mp) == M_CTL); 12969 or = (opt_restart_t *)first_mp->b_rptr; 12970 12971 IRB_REFHOLD(irb); 12972 for (; ire != NULL; ire = ire->ire_next) { 12973 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12974 continue; 12975 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12976 continue; 12977 12978 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12979 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12980 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12981 /* No resolver exists for the gateway; skip this ire. */ 12982 if (ire_gw == NULL) 12983 continue; 12984 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12985 /* 12986 * A resolver exists: we can get the interface on which we have 12987 * to apply the operation. 12988 */ 12989 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12990 first_mp); 12991 if (error == 0) 12992 or->or_private = CGTP_MCAST_SUCCESS; 12993 12994 if (ip_debug > 0) { 12995 ulong_t off; 12996 char *ksym; 12997 12998 ksym = kobj_getsymname((uintptr_t)fn, &off); 12999 ip2dbg(("ip_multirt_apply_membership_v6: " 13000 "called %s, multirt group 0x%08x via itf 0x%08x, " 13001 "error %d [success %u]\n", 13002 ksym ? ksym : "?", 13003 ntohl(V4_PART_OF_V6((*v6grp))), 13004 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 13005 error, or->or_private)); 13006 } 13007 13008 ire_refrele(ire_gw); 13009 if (error == EINPROGRESS) { 13010 IRB_REFRELE(irb); 13011 return (error); 13012 } 13013 } 13014 IRB_REFRELE(irb); 13015 /* 13016 * Consider the call as successful if we succeeded on at least 13017 * one interface. Otherwise, return the last encountered error. 13018 */ 13019 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 13020 } 13021 13022 void 13023 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 13024 { 13025 kstat_t *ksp; 13026 13027 ip6_stat_t template = { 13028 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 13029 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 13030 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 13031 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 13032 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 13033 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 13034 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 13035 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13036 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13037 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13038 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 13039 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13040 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13041 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13042 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 13043 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 13044 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 13045 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 13046 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 13047 }; 13048 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 13049 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 13050 KSTAT_FLAG_VIRTUAL, stackid); 13051 13052 if (ksp == NULL) 13053 return (NULL); 13054 13055 bcopy(&template, ip6_statisticsp, sizeof (template)); 13056 ksp->ks_data = (void *)ip6_statisticsp; 13057 ksp->ks_private = (void *)(uintptr_t)stackid; 13058 13059 kstat_install(ksp); 13060 return (ksp); 13061 } 13062 13063 void 13064 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 13065 { 13066 if (ksp != NULL) { 13067 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 13068 kstat_delete_netstack(ksp, stackid); 13069 } 13070 } 13071 13072 /* 13073 * The following two functions set and get the value for the 13074 * IPV6_SRC_PREFERENCES socket option. 13075 */ 13076 int 13077 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 13078 { 13079 /* 13080 * We only support preferences that are covered by 13081 * IPV6_PREFER_SRC_MASK. 13082 */ 13083 if (prefs & ~IPV6_PREFER_SRC_MASK) 13084 return (EINVAL); 13085 13086 /* 13087 * Look for conflicting preferences or default preferences. If 13088 * both bits of a related pair are clear, the application wants the 13089 * system's default value for that pair. Both bits in a pair can't 13090 * be set. 13091 */ 13092 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13093 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13094 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13095 IPV6_PREFER_SRC_MIPMASK) { 13096 return (EINVAL); 13097 } 13098 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13099 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13100 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13101 IPV6_PREFER_SRC_TMPMASK) { 13102 return (EINVAL); 13103 } 13104 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13105 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13106 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13107 IPV6_PREFER_SRC_CGAMASK) { 13108 return (EINVAL); 13109 } 13110 13111 connp->conn_src_preferences = prefs; 13112 return (0); 13113 } 13114 13115 size_t 13116 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13117 { 13118 *val = connp->conn_src_preferences; 13119 return (sizeof (connp->conn_src_preferences)); 13120 } 13121 13122 int 13123 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13124 { 13125 ill_t *ill; 13126 ire_t *ire; 13127 int error; 13128 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13129 13130 /* 13131 * Verify the source address and ifindex. Privileged users can use 13132 * any source address. For ancillary data the source address is 13133 * checked in ip_wput_v6. 13134 */ 13135 if (pkti->ipi6_ifindex != 0) { 13136 ASSERT(connp != NULL); 13137 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13138 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error, ipst); 13139 if (ill == NULL) { 13140 /* 13141 * We just want to know if the interface exists, we 13142 * don't really care about the ill pointer itself. 13143 */ 13144 if (error != EINPROGRESS) 13145 return (error); 13146 error = 0; /* Ensure we don't use it below */ 13147 } else { 13148 ill_refrele(ill); 13149 } 13150 } 13151 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13152 secpolicy_net_rawaccess(cr) != 0) { 13153 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13154 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13155 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 13156 if (ire != NULL) 13157 ire_refrele(ire); 13158 else 13159 return (ENXIO); 13160 } 13161 return (0); 13162 } 13163 13164 /* 13165 * Get the size of the IP options (including the IP headers size) 13166 * without including the AH header's size. If till_ah is B_FALSE, 13167 * and if AH header is present, dest options beyond AH header will 13168 * also be included in the returned size. 13169 */ 13170 int 13171 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13172 { 13173 ip6_t *ip6h; 13174 uint8_t nexthdr; 13175 uint8_t *whereptr; 13176 ip6_hbh_t *hbhhdr; 13177 ip6_dest_t *dsthdr; 13178 ip6_rthdr_t *rthdr; 13179 int ehdrlen; 13180 int size; 13181 ah_t *ah; 13182 13183 ip6h = (ip6_t *)mp->b_rptr; 13184 size = IPV6_HDR_LEN; 13185 nexthdr = ip6h->ip6_nxt; 13186 whereptr = (uint8_t *)&ip6h[1]; 13187 for (;;) { 13188 /* Assume IP has already stripped it */ 13189 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13190 switch (nexthdr) { 13191 case IPPROTO_HOPOPTS: 13192 hbhhdr = (ip6_hbh_t *)whereptr; 13193 nexthdr = hbhhdr->ip6h_nxt; 13194 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13195 break; 13196 case IPPROTO_DSTOPTS: 13197 dsthdr = (ip6_dest_t *)whereptr; 13198 nexthdr = dsthdr->ip6d_nxt; 13199 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13200 break; 13201 case IPPROTO_ROUTING: 13202 rthdr = (ip6_rthdr_t *)whereptr; 13203 nexthdr = rthdr->ip6r_nxt; 13204 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13205 break; 13206 default : 13207 if (till_ah) { 13208 ASSERT(nexthdr == IPPROTO_AH); 13209 return (size); 13210 } 13211 /* 13212 * If we don't have a AH header to traverse, 13213 * return now. This happens normally for 13214 * outbound datagrams where we have not inserted 13215 * the AH header. 13216 */ 13217 if (nexthdr != IPPROTO_AH) { 13218 return (size); 13219 } 13220 13221 /* 13222 * We don't include the AH header's size 13223 * to be symmetrical with other cases where 13224 * we either don't have a AH header (outbound) 13225 * or peek into the AH header yet (inbound and 13226 * not pulled up yet). 13227 */ 13228 ah = (ah_t *)whereptr; 13229 nexthdr = ah->ah_nexthdr; 13230 ehdrlen = (ah->ah_length << 2) + 8; 13231 13232 if (nexthdr == IPPROTO_DSTOPTS) { 13233 if (whereptr + ehdrlen >= mp->b_wptr) { 13234 /* 13235 * The destination options header 13236 * is not part of the first mblk. 13237 */ 13238 whereptr = mp->b_cont->b_rptr; 13239 } else { 13240 whereptr += ehdrlen; 13241 } 13242 13243 dsthdr = (ip6_dest_t *)whereptr; 13244 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13245 size += ehdrlen; 13246 } 13247 return (size); 13248 } 13249 whereptr += ehdrlen; 13250 size += ehdrlen; 13251 } 13252 } 13253