1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/ip_ndp.h> 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 #include <inet/sadb.h> 92 #include <inet/ipsec_impl.h> 93 #include <inet/tun.h> 94 #include <inet/sctp_ip.h> 95 #include <sys/pattr.h> 96 #include <inet/ipclassifier.h> 97 #include <inet/ipsecah.h> 98 #include <inet/udp_impl.h> 99 #include <inet/rawip_impl.h> 100 #include <inet/rts_impl.h> 101 #include <sys/squeue_impl.h> 102 #include <sys/squeue.h> 103 104 #include <sys/tsol/label.h> 105 #include <sys/tsol/tnet.h> 106 107 #include <rpc/pmap_prot.h> 108 109 /* Temporary; for CR 6451644 work-around */ 110 #include <sys/ethernet.h> 111 112 extern int ip_squeue_flag; 113 114 /* 115 * Naming conventions: 116 * These rules should be judiciously applied 117 * if there is a need to identify something as IPv6 versus IPv4 118 * IPv6 funcions will end with _v6 in the ip module. 119 * IPv6 funcions will end with _ipv6 in the transport modules. 120 * IPv6 macros: 121 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 122 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 123 * And then there are ..V4_PART_OF_V6. 124 * The intent is that macros in the ip module end with _V6. 125 * IPv6 global variables will start with ipv6_ 126 * IPv6 structures will start with ipv6 127 * IPv6 defined constants should start with IPV6_ 128 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 129 */ 130 131 /* 132 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 133 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 134 * from IANA. This mechanism will remain in effect until an official 135 * number is obtained. 136 */ 137 uchar_t ip6opt_ls; 138 139 const in6_addr_t ipv6_all_ones = 140 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 141 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 142 143 #ifdef _BIG_ENDIAN 144 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 145 #else /* _BIG_ENDIAN */ 146 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 147 #endif /* _BIG_ENDIAN */ 148 149 #ifdef _BIG_ENDIAN 150 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 151 #else /* _BIG_ENDIAN */ 152 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 153 #endif /* _BIG_ENDIAN */ 154 155 #ifdef _BIG_ENDIAN 156 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 157 #else /* _BIG_ENDIAN */ 158 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 159 #endif /* _BIG_ENDIAN */ 160 161 #ifdef _BIG_ENDIAN 162 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 163 #else /* _BIG_ENDIAN */ 164 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 165 #endif /* _BIG_ENDIAN */ 166 167 #ifdef _BIG_ENDIAN 168 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 169 #else /* _BIG_ENDIAN */ 170 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 171 #endif /* _BIG_ENDIAN */ 172 173 #ifdef _BIG_ENDIAN 174 const in6_addr_t ipv6_solicited_node_mcast = 175 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 176 #else /* _BIG_ENDIAN */ 177 const in6_addr_t ipv6_solicited_node_mcast = 178 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 179 #endif /* _BIG_ENDIAN */ 180 181 /* Leave room for ip_newroute to tack on the src and target addresses */ 182 #define OK_RESOLVER_MP_V6(mp) \ 183 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 184 185 #define IP6_MBLK_OK 0 186 #define IP6_MBLK_HDR_ERR 1 187 #define IP6_MBLK_LEN_ERR 2 188 189 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 190 boolean_t, zoneid_t); 191 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 192 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 193 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 194 static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *, 195 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 196 boolean_t, boolean_t); 197 static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *, 198 iulp_t *, ip_stack_t *); 199 static void ip_bind_post_handling_v6(conn_t *, mblk_t *, boolean_t, 200 boolean_t, ip_stack_t *); 201 static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t, 202 const in6_addr_t *, uint16_t, boolean_t); 203 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 204 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 205 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 206 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 207 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 208 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 209 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 210 uint8_t *, uint_t, uint8_t, ip_stack_t *); 211 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 212 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 213 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 214 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 215 conn_t *, int, int, int, zoneid_t); 216 217 /* 218 * A template for an IPv6 AR_ENTRY_QUERY 219 */ 220 static areq_t ipv6_areq_template = { 221 AR_ENTRY_QUERY, /* cmd */ 222 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 223 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 224 IP6_DL_SAP, /* protocol, from arps perspective */ 225 sizeof (areq_t), /* target addr offset */ 226 IPV6_ADDR_LEN, /* target addr_length */ 227 0, /* flags */ 228 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 229 IPV6_ADDR_LEN, /* sender addr length */ 230 6, /* xmit_count */ 231 1000, /* (re)xmit_interval in milliseconds */ 232 4 /* max # of requests to buffer */ 233 /* anything else filled in by the code */ 234 }; 235 236 /* 237 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 238 * The message has already been checksummed and if needed, 239 * a copy has been made to be sent any interested ICMP client (conn) 240 * Note that this is different than icmp_inbound() which does the fanout 241 * to conn's as well as local processing of the ICMP packets. 242 * 243 * All error messages are passed to the matching transport stream. 244 * 245 * Zones notes: 246 * The packet is only processed in the context of the specified zone: typically 247 * only this zone will reply to an echo request. This means that the caller must 248 * call icmp_inbound_v6() for each relevant zone. 249 */ 250 static void 251 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 252 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 253 { 254 icmp6_t *icmp6; 255 ip6_t *ip6h; 256 boolean_t interested; 257 ip6i_t *ip6i; 258 in6_addr_t origsrc; 259 ire_t *ire; 260 mblk_t *first_mp; 261 ipsec_in_t *ii; 262 ip_stack_t *ipst = ill->ill_ipst; 263 264 ASSERT(ill != NULL); 265 first_mp = mp; 266 if (mctl_present) { 267 mp = first_mp->b_cont; 268 ASSERT(mp != NULL); 269 270 ii = (ipsec_in_t *)first_mp->b_rptr; 271 ASSERT(ii->ipsec_in_type == IPSEC_IN); 272 } 273 274 ip6h = (ip6_t *)mp->b_rptr; 275 276 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 277 278 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 279 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 280 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 281 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 282 freemsg(first_mp); 283 return; 284 } 285 ip6h = (ip6_t *)mp->b_rptr; 286 } 287 if (ipst->ips_icmp_accept_clear_messages == 0) { 288 first_mp = ipsec_check_global_policy(first_mp, NULL, 289 NULL, ip6h, mctl_present, ipst->ips_netstack); 290 if (first_mp == NULL) 291 return; 292 } 293 294 /* 295 * On a labeled system, we have to check whether the zone itself is 296 * permitted to receive raw traffic. 297 */ 298 if (is_system_labeled()) { 299 if (zoneid == ALL_ZONES) 300 zoneid = tsol_packet_to_zoneid(mp); 301 if (!tsol_can_accept_raw(mp, B_FALSE)) { 302 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 303 zoneid)); 304 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 305 freemsg(first_mp); 306 return; 307 } 308 } 309 310 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 311 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 312 icmp6->icmp6_code)); 313 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 314 315 /* Initiate IPPF processing here */ 316 if (IP6_IN_IPP(flags, ipst)) { 317 318 /* 319 * If the ifindex changes due to SIOCSLIFINDEX 320 * packet may return to IP on the wrong ill. 321 */ 322 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 323 if (mp == NULL) { 324 if (mctl_present) { 325 freeb(first_mp); 326 } 327 return; 328 } 329 } 330 331 switch (icmp6->icmp6_type) { 332 case ICMP6_DST_UNREACH: 333 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 334 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 335 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 336 break; 337 338 case ICMP6_TIME_EXCEEDED: 339 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 340 break; 341 342 case ICMP6_PARAM_PROB: 343 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 344 break; 345 346 case ICMP6_PACKET_TOO_BIG: 347 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 348 zoneid); 349 return; 350 case ICMP6_ECHO_REQUEST: 351 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 352 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 353 !ipst->ips_ipv6_resp_echo_mcast) 354 break; 355 356 /* 357 * We must have exclusive use of the mblk to convert it to 358 * a response. 359 * If not, we copy it. 360 */ 361 if (mp->b_datap->db_ref > 1) { 362 mblk_t *mp1; 363 364 mp1 = copymsg(mp); 365 freemsg(mp); 366 if (mp1 == NULL) { 367 BUMP_MIB(ill->ill_icmp6_mib, 368 ipv6IfIcmpInErrors); 369 if (mctl_present) 370 freeb(first_mp); 371 return; 372 } 373 mp = mp1; 374 ip6h = (ip6_t *)mp->b_rptr; 375 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 376 if (mctl_present) 377 first_mp->b_cont = mp; 378 else 379 first_mp = mp; 380 } 381 382 /* 383 * Turn the echo into an echo reply. 384 * Remove any extension headers (do not reverse a source route) 385 * and clear the flow id (keep traffic class for now). 386 */ 387 if (hdr_length != IPV6_HDR_LEN) { 388 int i; 389 390 for (i = 0; i < IPV6_HDR_LEN; i++) 391 mp->b_rptr[hdr_length - i - 1] = 392 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 393 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 394 ip6h = (ip6_t *)mp->b_rptr; 395 ip6h->ip6_nxt = IPPROTO_ICMPV6; 396 hdr_length = IPV6_HDR_LEN; 397 } 398 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 399 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 400 401 ip6h->ip6_plen = 402 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 403 origsrc = ip6h->ip6_src; 404 /* 405 * Reverse the source and destination addresses. 406 * If the return address is a multicast, zero out the source 407 * (ip_wput_v6 will set an address). 408 */ 409 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 410 ip6h->ip6_src = ipv6_all_zeros; 411 ip6h->ip6_dst = origsrc; 412 } else { 413 ip6h->ip6_src = ip6h->ip6_dst; 414 ip6h->ip6_dst = origsrc; 415 } 416 417 /* set the hop limit */ 418 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 419 420 /* 421 * Prepare for checksum by putting icmp length in the icmp 422 * checksum field. The checksum is calculated in ip_wput_v6. 423 */ 424 icmp6->icmp6_cksum = ip6h->ip6_plen; 425 /* 426 * ICMP echo replies should go out on the same interface 427 * the request came on as probes used by in.mpathd for 428 * detecting NIC failures are ECHO packets. We turn-off load 429 * spreading by allocating a ip6i and setting ip6i_attach_if 430 * to B_TRUE which is handled both by ip_wput_v6 and 431 * ip_newroute_v6. If we don't turnoff load spreading, 432 * the packets might get dropped if there are no 433 * non-FAILED/INACTIVE interfaces for it to go out on and 434 * in.mpathd would wrongly detect a failure or mis-detect 435 * a NIC failure as a link failure. As load spreading can 436 * happen only if ill_group is not NULL, we do only for 437 * that case and this does not affect the normal case. 438 * 439 * We force this only on echo packets that came from on-link 440 * hosts. We restrict this to link-local addresses which 441 * is used by in.mpathd for probing. In the IPv6 case, 442 * default routes typically have an ire_ipif pointer and 443 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 444 * might work. As a default route out of this interface 445 * may not be present, enforcing this packet to go out in 446 * this case may not work. 447 */ 448 if (ill->ill_group != NULL && 449 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 450 /* 451 * If we are sending replies to ourselves, don't 452 * set ATTACH_IF as we may not be able to find 453 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 454 * causes ip_wput_v6 to look for an IRE_LOCAL on 455 * "ill" which it may not find and will try to 456 * create an IRE_CACHE for our local address. Once 457 * we do this, we will try to forward all packets 458 * meant to our LOCAL address. 459 */ 460 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 461 NULL, ipst); 462 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 463 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 464 if (mp == NULL) { 465 BUMP_MIB(ill->ill_icmp6_mib, 466 ipv6IfIcmpInErrors); 467 if (ire != NULL) 468 ire_refrele(ire); 469 if (mctl_present) 470 freeb(first_mp); 471 return; 472 } else if (mctl_present) { 473 first_mp->b_cont = mp; 474 } else { 475 first_mp = mp; 476 } 477 ip6i = (ip6i_t *)mp->b_rptr; 478 ip6i->ip6i_flags = IP6I_ATTACH_IF; 479 ip6i->ip6i_ifindex = 480 ill->ill_phyint->phyint_ifindex; 481 } 482 if (ire != NULL) 483 ire_refrele(ire); 484 } 485 486 if (!mctl_present) { 487 /* 488 * This packet should go out the same way as it 489 * came in i.e in clear. To make sure that global 490 * policy will not be applied to this in ip_wput, 491 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 492 */ 493 ASSERT(first_mp == mp); 494 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 495 if (first_mp == NULL) { 496 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 497 freemsg(mp); 498 return; 499 } 500 ii = (ipsec_in_t *)first_mp->b_rptr; 501 502 /* This is not a secure packet */ 503 ii->ipsec_in_secure = B_FALSE; 504 first_mp->b_cont = mp; 505 } 506 ii->ipsec_in_zoneid = zoneid; 507 ASSERT(zoneid != ALL_ZONES); 508 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 509 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 510 return; 511 } 512 put(WR(q), first_mp); 513 return; 514 515 case ICMP6_ECHO_REPLY: 516 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 517 break; 518 519 case ND_ROUTER_SOLICIT: 520 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 521 break; 522 523 case ND_ROUTER_ADVERT: 524 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 525 break; 526 527 case ND_NEIGHBOR_SOLICIT: 528 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 529 if (mctl_present) 530 freeb(first_mp); 531 /* XXX may wish to pass first_mp up to ndp_input someday. */ 532 ndp_input(ill, mp, dl_mp); 533 return; 534 535 case ND_NEIGHBOR_ADVERT: 536 BUMP_MIB(ill->ill_icmp6_mib, 537 ipv6IfIcmpInNeighborAdvertisements); 538 if (mctl_present) 539 freeb(first_mp); 540 /* XXX may wish to pass first_mp up to ndp_input someday. */ 541 ndp_input(ill, mp, dl_mp); 542 return; 543 544 case ND_REDIRECT: { 545 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 546 547 if (ipst->ips_ipv6_ignore_redirect) 548 break; 549 550 /* 551 * As there is no upper client to deliver, we don't 552 * need the first_mp any more. 553 */ 554 if (mctl_present) 555 freeb(first_mp); 556 if (!pullupmsg(mp, -1)) { 557 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 558 break; 559 } 560 icmp_redirect_v6(q, mp, ill); 561 return; 562 } 563 564 /* 565 * The next three icmp messages will be handled by MLD. 566 * Pass all valid MLD packets up to any process(es) 567 * listening on a raw ICMP socket. MLD messages are 568 * freed by mld_input function. 569 */ 570 case MLD_LISTENER_QUERY: 571 case MLD_LISTENER_REPORT: 572 case MLD_LISTENER_REDUCTION: 573 if (mctl_present) 574 freeb(first_mp); 575 mld_input(q, mp, ill); 576 return; 577 default: 578 break; 579 } 580 if (interested) { 581 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 582 mctl_present, zoneid); 583 } else { 584 freemsg(first_mp); 585 } 586 } 587 588 /* 589 * Process received IPv6 ICMP Packet too big. 590 * After updating any IRE it does the fanout to any matching transport streams. 591 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 592 */ 593 /* ARGSUSED */ 594 static void 595 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 596 boolean_t mctl_present, zoneid_t zoneid) 597 { 598 ip6_t *ip6h; 599 ip6_t *inner_ip6h; 600 icmp6_t *icmp6; 601 uint16_t hdr_length; 602 uint32_t mtu; 603 ire_t *ire, *first_ire; 604 mblk_t *first_mp; 605 ip_stack_t *ipst = ill->ill_ipst; 606 607 first_mp = mp; 608 if (mctl_present) 609 mp = first_mp->b_cont; 610 /* 611 * We must have exclusive use of the mblk to update the MTU 612 * in the packet. 613 * If not, we copy it. 614 * 615 * If there's an M_CTL present, we know that allocated first_mp 616 * earlier in this function, so we know first_mp has refcnt of one. 617 */ 618 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 619 if (mp->b_datap->db_ref > 1) { 620 mblk_t *mp1; 621 622 mp1 = copymsg(mp); 623 freemsg(mp); 624 if (mp1 == NULL) { 625 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 626 if (mctl_present) 627 freeb(first_mp); 628 return; 629 } 630 mp = mp1; 631 if (mctl_present) 632 first_mp->b_cont = mp; 633 else 634 first_mp = mp; 635 } 636 ip6h = (ip6_t *)mp->b_rptr; 637 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 638 hdr_length = ip_hdr_length_v6(mp, ip6h); 639 else 640 hdr_length = IPV6_HDR_LEN; 641 642 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 643 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 644 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 645 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 646 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 647 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 648 freemsg(first_mp); 649 return; 650 } 651 ip6h = (ip6_t *)mp->b_rptr; 652 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 653 inner_ip6h = (ip6_t *)&icmp6[1]; 654 } 655 656 /* 657 * For link local destinations matching simply on IRE type is not 658 * sufficient. Same link local addresses for different ILL's is 659 * possible. 660 */ 661 662 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 663 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 664 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 665 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 666 667 if (first_ire == NULL) { 668 if (ip_debug > 2) { 669 /* ip1dbg */ 670 pr_addr_dbg("icmp_inbound_too_big_v6:" 671 "no ire for dst %s\n", AF_INET6, 672 &inner_ip6h->ip6_dst); 673 } 674 freemsg(first_mp); 675 return; 676 } 677 678 mtu = ntohl(icmp6->icmp6_mtu); 679 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 680 for (ire = first_ire; ire != NULL && 681 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 682 ire = ire->ire_next) { 683 mutex_enter(&ire->ire_lock); 684 if (mtu < IPV6_MIN_MTU) { 685 ip1dbg(("Received mtu less than IPv6 " 686 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 687 mtu = IPV6_MIN_MTU; 688 /* 689 * If an mtu less than IPv6 min mtu is received, 690 * we must include a fragment header in 691 * subsequent packets. 692 */ 693 ire->ire_frag_flag |= IPH_FRAG_HDR; 694 } 695 ip1dbg(("Received mtu from router: %d\n", mtu)); 696 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 697 /* Record the new max frag size for the ULP. */ 698 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 699 /* 700 * If we need a fragment header in every packet 701 * (above case or multirouting), make sure the 702 * ULP takes it into account when computing the 703 * payload size. 704 */ 705 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 706 sizeof (ip6_frag_t)); 707 } else { 708 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 709 } 710 mutex_exit(&ire->ire_lock); 711 } 712 rw_exit(&first_ire->ire_bucket->irb_lock); 713 ire_refrele(first_ire); 714 } else { 715 irb_t *irb = NULL; 716 /* 717 * for non-link local destinations we match only on the IRE type 718 */ 719 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 720 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 721 ipst); 722 if (ire == NULL) { 723 if (ip_debug > 2) { 724 /* ip1dbg */ 725 pr_addr_dbg("icmp_inbound_too_big_v6:" 726 "no ire for dst %s\n", 727 AF_INET6, &inner_ip6h->ip6_dst); 728 } 729 freemsg(first_mp); 730 return; 731 } 732 irb = ire->ire_bucket; 733 ire_refrele(ire); 734 rw_enter(&irb->irb_lock, RW_READER); 735 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 736 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 737 &inner_ip6h->ip6_dst)) { 738 mtu = ntohl(icmp6->icmp6_mtu); 739 mutex_enter(&ire->ire_lock); 740 if (mtu < IPV6_MIN_MTU) { 741 ip1dbg(("Received mtu less than IPv6" 742 "min mtu %d: %d\n", 743 IPV6_MIN_MTU, mtu)); 744 mtu = IPV6_MIN_MTU; 745 /* 746 * If an mtu less than IPv6 min mtu is 747 * received, we must include a fragment 748 * header in subsequent packets. 749 */ 750 ire->ire_frag_flag |= IPH_FRAG_HDR; 751 } 752 753 ip1dbg(("Received mtu from router: %d\n", mtu)); 754 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 755 /* Record the new max frag size for the ULP. */ 756 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 757 /* 758 * If we need a fragment header in 759 * every packet (above case or 760 * multirouting), make sure the ULP 761 * takes it into account when computing 762 * the payload size. 763 */ 764 icmp6->icmp6_mtu = 765 htonl(ire->ire_max_frag - 766 sizeof (ip6_frag_t)); 767 } else { 768 icmp6->icmp6_mtu = 769 htonl(ire->ire_max_frag); 770 } 771 mutex_exit(&ire->ire_lock); 772 } 773 } 774 rw_exit(&irb->irb_lock); 775 } 776 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 777 mctl_present, zoneid); 778 } 779 780 /* 781 * Fanout received ICMPv6 error packets to the transports. 782 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 783 */ 784 void 785 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 786 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 787 { 788 uint16_t *up; /* Pointer to ports in ULP header */ 789 uint32_t ports; /* reversed ports for fanout */ 790 ip6_t rip6h; /* With reversed addresses */ 791 uint16_t hdr_length; 792 uint8_t *nexthdrp; 793 uint8_t nexthdr; 794 mblk_t *first_mp; 795 ipsec_in_t *ii; 796 tcpha_t *tcpha; 797 conn_t *connp; 798 ip_stack_t *ipst = ill->ill_ipst; 799 800 first_mp = mp; 801 if (mctl_present) { 802 mp = first_mp->b_cont; 803 ASSERT(mp != NULL); 804 805 ii = (ipsec_in_t *)first_mp->b_rptr; 806 ASSERT(ii->ipsec_in_type == IPSEC_IN); 807 } else { 808 ii = NULL; 809 } 810 811 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 812 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 813 814 /* 815 * Need to pullup everything in order to use 816 * ip_hdr_length_nexthdr_v6() 817 */ 818 if (mp->b_cont != NULL) { 819 if (!pullupmsg(mp, -1)) { 820 ip1dbg(("icmp_inbound_error_fanout_v6: " 821 "pullupmsg failed\n")); 822 goto drop_pkt; 823 } 824 ip6h = (ip6_t *)mp->b_rptr; 825 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 826 } 827 828 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 829 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 830 goto drop_pkt; 831 832 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 833 goto drop_pkt; 834 nexthdr = *nexthdrp; 835 836 /* Set message type, must be done after pullups */ 837 mp->b_datap->db_type = M_CTL; 838 839 /* Try to pass the ICMP message to clients who need it */ 840 switch (nexthdr) { 841 case IPPROTO_UDP: { 842 /* 843 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 844 * UDP header to get the port information. 845 */ 846 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 847 mp->b_wptr) { 848 break; 849 } 850 /* 851 * Attempt to find a client stream based on port. 852 * Note that we do a reverse lookup since the header is 853 * in the form we sent it out. 854 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 855 * and we only set the src and dst addresses and nexthdr. 856 */ 857 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 858 rip6h.ip6_src = ip6h->ip6_dst; 859 rip6h.ip6_dst = ip6h->ip6_src; 860 rip6h.ip6_nxt = nexthdr; 861 ((uint16_t *)&ports)[0] = up[1]; 862 ((uint16_t *)&ports)[1] = up[0]; 863 864 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 865 IP6_NO_IPPOLICY, mctl_present, zoneid); 866 return; 867 } 868 case IPPROTO_TCP: { 869 /* 870 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 871 * the TCP header to get the port information. 872 */ 873 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 874 mp->b_wptr) { 875 break; 876 } 877 878 /* 879 * Attempt to find a client stream based on port. 880 * Note that we do a reverse lookup since the header is 881 * in the form we sent it out. 882 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 883 * we only set the src and dst addresses and nexthdr. 884 */ 885 886 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 887 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 888 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 889 if (connp == NULL) { 890 goto drop_pkt; 891 } 892 893 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, tcp_input, connp, 894 SQ_FILL, SQTAG_TCP6_INPUT_ICMP_ERR); 895 return; 896 897 } 898 case IPPROTO_SCTP: 899 /* 900 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 901 * the SCTP header to get the port information. 902 */ 903 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 904 mp->b_wptr) { 905 break; 906 } 907 908 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 909 ((uint16_t *)&ports)[0] = up[1]; 910 ((uint16_t *)&ports)[1] = up[0]; 911 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 0, 912 mctl_present, IP6_NO_IPPOLICY, zoneid); 913 return; 914 case IPPROTO_ESP: 915 case IPPROTO_AH: { 916 int ipsec_rc; 917 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 918 919 /* 920 * We need a IPSEC_IN in the front to fanout to AH/ESP. 921 * We will re-use the IPSEC_IN if it is already present as 922 * AH/ESP will not affect any fields in the IPSEC_IN for 923 * ICMP errors. If there is no IPSEC_IN, allocate a new 924 * one and attach it in the front. 925 */ 926 if (ii != NULL) { 927 /* 928 * ip_fanout_proto_again converts the ICMP errors 929 * that come back from AH/ESP to M_DATA so that 930 * if it is non-AH/ESP and we do a pullupmsg in 931 * this function, it would work. Convert it back 932 * to M_CTL before we send up as this is a ICMP 933 * error. This could have been generated locally or 934 * by some router. Validate the inner IPSEC 935 * headers. 936 * 937 * NOTE : ill_index is used by ip_fanout_proto_again 938 * to locate the ill. 939 */ 940 ASSERT(ill != NULL); 941 ii->ipsec_in_ill_index = 942 ill->ill_phyint->phyint_ifindex; 943 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 944 first_mp->b_cont->b_datap->db_type = M_CTL; 945 } else { 946 /* 947 * IPSEC_IN is not present. We attach a ipsec_in 948 * message and send up to IPSEC for validating 949 * and removing the IPSEC headers. Clear 950 * ipsec_in_secure so that when we return 951 * from IPSEC, we don't mistakenly think that this 952 * is a secure packet came from the network. 953 * 954 * NOTE : ill_index is used by ip_fanout_proto_again 955 * to locate the ill. 956 */ 957 ASSERT(first_mp == mp); 958 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 959 ASSERT(ill != NULL); 960 if (first_mp == NULL) { 961 freemsg(mp); 962 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 963 return; 964 } 965 ii = (ipsec_in_t *)first_mp->b_rptr; 966 967 /* This is not a secure packet */ 968 ii->ipsec_in_secure = B_FALSE; 969 first_mp->b_cont = mp; 970 mp->b_datap->db_type = M_CTL; 971 ii->ipsec_in_ill_index = 972 ill->ill_phyint->phyint_ifindex; 973 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 974 } 975 976 if (!ipsec_loaded(ipss)) { 977 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 978 return; 979 } 980 981 if (nexthdr == IPPROTO_ESP) 982 ipsec_rc = ipsecesp_icmp_error(first_mp); 983 else 984 ipsec_rc = ipsecah_icmp_error(first_mp); 985 if (ipsec_rc == IPSEC_STATUS_FAILED) 986 return; 987 988 ip_fanout_proto_again(first_mp, ill, ill, NULL); 989 return; 990 } 991 case IPPROTO_ENCAP: 992 case IPPROTO_IPV6: 993 if ((uint8_t *)ip6h + hdr_length + 994 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 995 sizeof (ip6_t)) > mp->b_wptr) { 996 goto drop_pkt; 997 } 998 999 if (nexthdr == IPPROTO_ENCAP || 1000 !IN6_ARE_ADDR_EQUAL( 1001 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1002 &ip6h->ip6_src) || 1003 !IN6_ARE_ADDR_EQUAL( 1004 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1005 &ip6h->ip6_dst)) { 1006 /* 1007 * For tunnels that have used IPsec protection, 1008 * we need to adjust the MTU to take into account 1009 * the IPsec overhead. 1010 */ 1011 if (ii != NULL) 1012 icmp6->icmp6_mtu = htonl( 1013 ntohl(icmp6->icmp6_mtu) - 1014 ipsec_in_extra_length(first_mp)); 1015 } else { 1016 /* 1017 * Self-encapsulated case. As in the ipv4 case, 1018 * we need to strip the 2nd IP header. Since mp 1019 * is already pulled-up, we can simply bcopy 1020 * the 3rd header + data over the 2nd header. 1021 */ 1022 uint16_t unused_len; 1023 ip6_t *inner_ip6h = (ip6_t *) 1024 ((uchar_t *)ip6h + hdr_length); 1025 1026 /* 1027 * Make sure we don't do recursion more than once. 1028 */ 1029 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1030 &unused_len, &nexthdrp) || 1031 *nexthdrp == IPPROTO_IPV6) { 1032 goto drop_pkt; 1033 } 1034 1035 /* 1036 * We are about to modify the packet. Make a copy if 1037 * someone else has a reference to it. 1038 */ 1039 if (DB_REF(mp) > 1) { 1040 mblk_t *mp1; 1041 uint16_t icmp6_offset; 1042 1043 mp1 = copymsg(mp); 1044 if (mp1 == NULL) { 1045 goto drop_pkt; 1046 } 1047 icmp6_offset = (uint16_t) 1048 ((uchar_t *)icmp6 - mp->b_rptr); 1049 freemsg(mp); 1050 mp = mp1; 1051 1052 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1053 ip6h = (ip6_t *)&icmp6[1]; 1054 inner_ip6h = (ip6_t *) 1055 ((uchar_t *)ip6h + hdr_length); 1056 1057 if (mctl_present) 1058 first_mp->b_cont = mp; 1059 else 1060 first_mp = mp; 1061 } 1062 1063 /* 1064 * Need to set db_type back to M_DATA before 1065 * refeeding mp into this function. 1066 */ 1067 DB_TYPE(mp) = M_DATA; 1068 1069 /* 1070 * Copy the 3rd header + remaining data on top 1071 * of the 2nd header. 1072 */ 1073 bcopy(inner_ip6h, ip6h, 1074 mp->b_wptr - (uchar_t *)inner_ip6h); 1075 1076 /* 1077 * Subtract length of the 2nd header. 1078 */ 1079 mp->b_wptr -= hdr_length; 1080 1081 /* 1082 * Now recurse, and see what I _really_ should be 1083 * doing here. 1084 */ 1085 icmp_inbound_error_fanout_v6(q, first_mp, 1086 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1087 zoneid); 1088 return; 1089 } 1090 /* FALLTHRU */ 1091 default: 1092 /* 1093 * The rip6h header is only used for the lookup and we 1094 * only set the src and dst addresses and nexthdr. 1095 */ 1096 rip6h.ip6_src = ip6h->ip6_dst; 1097 rip6h.ip6_dst = ip6h->ip6_src; 1098 rip6h.ip6_nxt = nexthdr; 1099 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1100 IP6_NO_IPPOLICY, mctl_present, zoneid); 1101 return; 1102 } 1103 /* NOTREACHED */ 1104 drop_pkt: 1105 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1106 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1107 freemsg(first_mp); 1108 } 1109 1110 /* 1111 * Process received IPv6 ICMP Redirect messages. 1112 */ 1113 /* ARGSUSED */ 1114 static void 1115 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1116 { 1117 ip6_t *ip6h; 1118 uint16_t hdr_length; 1119 nd_redirect_t *rd; 1120 ire_t *ire; 1121 ire_t *prev_ire; 1122 ire_t *redir_ire; 1123 in6_addr_t *src, *dst, *gateway; 1124 nd_opt_hdr_t *opt; 1125 nce_t *nce; 1126 int nce_flags = 0; 1127 int err = 0; 1128 boolean_t redirect_to_router = B_FALSE; 1129 int len; 1130 int optlen; 1131 iulp_t ulp_info = { 0 }; 1132 ill_t *prev_ire_ill; 1133 ipif_t *ipif; 1134 ip_stack_t *ipst = ill->ill_ipst; 1135 1136 ip6h = (ip6_t *)mp->b_rptr; 1137 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1138 hdr_length = ip_hdr_length_v6(mp, ip6h); 1139 else 1140 hdr_length = IPV6_HDR_LEN; 1141 1142 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1143 len = mp->b_wptr - mp->b_rptr - hdr_length; 1144 src = &ip6h->ip6_src; 1145 dst = &rd->nd_rd_dst; 1146 gateway = &rd->nd_rd_target; 1147 1148 /* Verify if it is a valid redirect */ 1149 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1150 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1151 (rd->nd_rd_code != 0) || 1152 (len < sizeof (nd_redirect_t)) || 1153 (IN6_IS_ADDR_V4MAPPED(dst)) || 1154 (IN6_IS_ADDR_MULTICAST(dst))) { 1155 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1156 freemsg(mp); 1157 return; 1158 } 1159 1160 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1161 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1162 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1163 freemsg(mp); 1164 return; 1165 } 1166 1167 if (len > sizeof (nd_redirect_t)) { 1168 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1169 len - sizeof (nd_redirect_t))) { 1170 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1171 freemsg(mp); 1172 return; 1173 } 1174 } 1175 1176 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1177 redirect_to_router = B_TRUE; 1178 nce_flags |= NCE_F_ISROUTER; 1179 } 1180 1181 /* ipif will be refreleased afterwards */ 1182 ipif = ipif_get_next_ipif(NULL, ill); 1183 if (ipif == NULL) { 1184 freemsg(mp); 1185 return; 1186 } 1187 1188 /* 1189 * Verify that the IP source address of the redirect is 1190 * the same as the current first-hop router for the specified 1191 * ICMP destination address. 1192 * Also, Make sure we had a route for the dest in question and 1193 * that route was pointing to the old gateway (the source of the 1194 * redirect packet.) 1195 */ 1196 1197 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1198 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1199 MATCH_IRE_DEFAULT, ipst); 1200 1201 /* 1202 * Check that 1203 * the redirect was not from ourselves 1204 * old gateway is still directly reachable 1205 */ 1206 if (prev_ire == NULL || 1207 prev_ire->ire_type == IRE_LOCAL) { 1208 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1209 ipif_refrele(ipif); 1210 goto fail_redirect; 1211 } 1212 prev_ire_ill = ire_to_ill(prev_ire); 1213 ASSERT(prev_ire_ill != NULL); 1214 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1215 nce_flags |= NCE_F_NONUD; 1216 1217 /* 1218 * Should we use the old ULP info to create the new gateway? From 1219 * a user's perspective, we should inherit the info so that it 1220 * is a "smooth" transition. If we do not do that, then new 1221 * connections going thru the new gateway will have no route metrics, 1222 * which is counter-intuitive to user. From a network point of 1223 * view, this may or may not make sense even though the new gateway 1224 * is still directly connected to us so the route metrics should not 1225 * change much. 1226 * 1227 * But if the old ire_uinfo is not initialized, we do another 1228 * recursive lookup on the dest using the new gateway. There may 1229 * be a route to that. If so, use it to initialize the redirect 1230 * route. 1231 */ 1232 if (prev_ire->ire_uinfo.iulp_set) { 1233 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1234 } else if (redirect_to_router) { 1235 /* 1236 * Only do the following if the redirection is really to 1237 * a router. 1238 */ 1239 ire_t *tmp_ire; 1240 ire_t *sire; 1241 1242 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1243 ALL_ZONES, 0, NULL, 1244 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1245 ipst); 1246 if (sire != NULL) { 1247 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1248 ASSERT(tmp_ire != NULL); 1249 ire_refrele(tmp_ire); 1250 ire_refrele(sire); 1251 } else if (tmp_ire != NULL) { 1252 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1253 sizeof (iulp_t)); 1254 ire_refrele(tmp_ire); 1255 } 1256 } 1257 1258 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1259 opt = (nd_opt_hdr_t *)&rd[1]; 1260 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1261 if (opt != NULL) { 1262 err = ndp_lookup_then_add_v6(ill, 1263 (uchar_t *)&opt[1], /* Link layer address */ 1264 gateway, 1265 &ipv6_all_ones, /* prefix mask */ 1266 &ipv6_all_zeros, /* Mapping mask */ 1267 0, 1268 nce_flags, 1269 ND_STALE, 1270 &nce); 1271 switch (err) { 1272 case 0: 1273 NCE_REFRELE(nce); 1274 break; 1275 case EEXIST: 1276 /* 1277 * Check to see if link layer address has changed and 1278 * process the nce_state accordingly. 1279 */ 1280 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1281 NCE_REFRELE(nce); 1282 break; 1283 default: 1284 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1285 err)); 1286 ipif_refrele(ipif); 1287 goto fail_redirect; 1288 } 1289 } 1290 if (redirect_to_router) { 1291 /* icmp_redirect_ok_v6() must have already verified this */ 1292 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1293 1294 /* 1295 * Create a Route Association. This will allow us to remember 1296 * a router told us to use the particular gateway. 1297 */ 1298 ire = ire_create_v6( 1299 dst, 1300 &ipv6_all_ones, /* mask */ 1301 &prev_ire->ire_src_addr_v6, /* source addr */ 1302 gateway, /* gateway addr */ 1303 &prev_ire->ire_max_frag, /* max frag */ 1304 NULL, /* no src nce */ 1305 NULL, /* no rfq */ 1306 NULL, /* no stq */ 1307 IRE_HOST, 1308 prev_ire->ire_ipif, 1309 NULL, 1310 0, 1311 0, 1312 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1313 &ulp_info, 1314 NULL, 1315 NULL, 1316 ipst); 1317 } else { 1318 queue_t *stq; 1319 1320 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1321 ? ipif->ipif_rq : ipif->ipif_wq; 1322 1323 /* 1324 * Just create an on link entry, i.e. interface route. 1325 */ 1326 ire = ire_create_v6( 1327 dst, /* gateway == dst */ 1328 &ipv6_all_ones, /* mask */ 1329 &prev_ire->ire_src_addr_v6, /* source addr */ 1330 &ipv6_all_zeros, /* gateway addr */ 1331 &prev_ire->ire_max_frag, /* max frag */ 1332 NULL, /* no src nce */ 1333 NULL, /* ire rfq */ 1334 stq, /* ire stq */ 1335 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1336 prev_ire->ire_ipif, 1337 &ipv6_all_ones, 1338 0, 1339 0, 1340 (RTF_DYNAMIC | RTF_HOST), 1341 &ulp_info, 1342 NULL, 1343 NULL, 1344 ipst); 1345 } 1346 1347 /* Release reference from earlier ipif_get_next_ipif() */ 1348 ipif_refrele(ipif); 1349 1350 if (ire == NULL) 1351 goto fail_redirect; 1352 1353 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1354 1355 /* tell routing sockets that we received a redirect */ 1356 ip_rts_change_v6(RTM_REDIRECT, 1357 &rd->nd_rd_dst, 1358 &rd->nd_rd_target, 1359 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1360 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1361 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1362 1363 /* 1364 * Delete any existing IRE_HOST type ires for this destination. 1365 * This together with the added IRE has the effect of 1366 * modifying an existing redirect. 1367 */ 1368 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1369 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1370 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), 1371 ipst); 1372 1373 ire_refrele(ire); /* Held in ire_add_v6 */ 1374 1375 if (redir_ire != NULL) { 1376 if (redir_ire->ire_flags & RTF_DYNAMIC) 1377 ire_delete(redir_ire); 1378 ire_refrele(redir_ire); 1379 } 1380 } 1381 1382 if (prev_ire->ire_type == IRE_CACHE) 1383 ire_delete(prev_ire); 1384 ire_refrele(prev_ire); 1385 prev_ire = NULL; 1386 1387 fail_redirect: 1388 if (prev_ire != NULL) 1389 ire_refrele(prev_ire); 1390 freemsg(mp); 1391 } 1392 1393 static ill_t * 1394 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1395 { 1396 ill_t *ill; 1397 1398 ASSERT(WR(q) == q); 1399 1400 if (q->q_next != NULL) { 1401 ill = (ill_t *)q->q_ptr; 1402 if (ILL_CAN_LOOKUP(ill)) 1403 ill_refhold(ill); 1404 else 1405 ill = NULL; 1406 } else { 1407 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1408 NULL, NULL, NULL, NULL, NULL, ipst); 1409 } 1410 if (ill == NULL) 1411 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1412 return (ill); 1413 } 1414 1415 /* 1416 * Assigns an appropriate source address to the packet. 1417 * If origdst is one of our IP addresses that use it as the source. 1418 * If the queue is an ill queue then select a source from that ill. 1419 * Otherwise pick a source based on a route lookup back to the origsrc. 1420 * 1421 * src is the return parameter. Returns a pointer to src or NULL if failure. 1422 */ 1423 static in6_addr_t * 1424 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1425 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1426 { 1427 ill_t *ill; 1428 ire_t *ire; 1429 ipif_t *ipif; 1430 1431 ASSERT(!(wq->q_flag & QREADR)); 1432 if (wq->q_next != NULL) { 1433 ill = (ill_t *)wq->q_ptr; 1434 } else { 1435 ill = NULL; 1436 } 1437 1438 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1439 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1440 ipst); 1441 if (ire != NULL) { 1442 /* Destined to one of our addresses */ 1443 *src = *origdst; 1444 ire_refrele(ire); 1445 return (src); 1446 } 1447 if (ire != NULL) { 1448 ire_refrele(ire); 1449 ire = NULL; 1450 } 1451 if (ill == NULL) { 1452 /* What is the route back to the original source? */ 1453 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1454 NULL, NULL, zoneid, NULL, 1455 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1456 if (ire == NULL) { 1457 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1458 return (NULL); 1459 } 1460 /* 1461 * Does not matter whether we use ire_stq or ire_ipif here. 1462 * Just pick an ill for ICMP replies. 1463 */ 1464 ASSERT(ire->ire_ipif != NULL); 1465 ill = ire->ire_ipif->ipif_ill; 1466 ire_refrele(ire); 1467 } 1468 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1469 IPV6_PREFER_SRC_DEFAULT, zoneid); 1470 if (ipif != NULL) { 1471 *src = ipif->ipif_v6src_addr; 1472 ipif_refrele(ipif); 1473 return (src); 1474 } 1475 /* 1476 * Unusual case - can't find a usable source address to reach the 1477 * original source. Use what in the route to the source. 1478 */ 1479 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1480 NULL, NULL, zoneid, NULL, 1481 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1482 if (ire == NULL) { 1483 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1484 return (NULL); 1485 } 1486 ASSERT(ire != NULL); 1487 *src = ire->ire_src_addr_v6; 1488 ire_refrele(ire); 1489 return (src); 1490 } 1491 1492 /* 1493 * Build and ship an IPv6 ICMP message using the packet data in mp, 1494 * and the ICMP header pointed to by "stuff". (May be called as 1495 * writer.) 1496 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1497 * verify that an icmp error packet can be sent. 1498 * 1499 * If q is an ill write side queue (which is the case when packets 1500 * arrive from ip_rput) then ip_wput code will ensure that packets to 1501 * link-local destinations are sent out that ill. 1502 * 1503 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1504 * source address (see above function). 1505 */ 1506 static void 1507 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1508 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1509 ip_stack_t *ipst) 1510 { 1511 ip6_t *ip6h; 1512 in6_addr_t v6dst; 1513 size_t len_needed; 1514 size_t msg_len; 1515 mblk_t *mp1; 1516 icmp6_t *icmp6; 1517 ill_t *ill; 1518 in6_addr_t v6src; 1519 mblk_t *ipsec_mp; 1520 ipsec_out_t *io; 1521 1522 ill = ip_queue_to_ill_v6(q, ipst); 1523 if (ill == NULL) { 1524 freemsg(mp); 1525 return; 1526 } 1527 1528 if (mctl_present) { 1529 /* 1530 * If it is : 1531 * 1532 * 1) a IPSEC_OUT, then this is caused by outbound 1533 * datagram originating on this host. IPSEC processing 1534 * may or may not have been done. Refer to comments above 1535 * icmp_inbound_error_fanout for details. 1536 * 1537 * 2) a IPSEC_IN if we are generating a icmp_message 1538 * for an incoming datagram destined for us i.e called 1539 * from ip_fanout_send_icmp. 1540 */ 1541 ipsec_info_t *in; 1542 1543 ipsec_mp = mp; 1544 mp = ipsec_mp->b_cont; 1545 1546 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1547 ip6h = (ip6_t *)mp->b_rptr; 1548 1549 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1550 in->ipsec_info_type == IPSEC_IN); 1551 1552 if (in->ipsec_info_type == IPSEC_IN) { 1553 /* 1554 * Convert the IPSEC_IN to IPSEC_OUT. 1555 */ 1556 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1557 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1558 ill_refrele(ill); 1559 return; 1560 } 1561 } else { 1562 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1563 io = (ipsec_out_t *)in; 1564 /* 1565 * Clear out ipsec_out_proc_begin, so we do a fresh 1566 * ire lookup. 1567 */ 1568 io->ipsec_out_proc_begin = B_FALSE; 1569 } 1570 } else { 1571 /* 1572 * This is in clear. The icmp message we are building 1573 * here should go out in clear. 1574 */ 1575 ipsec_in_t *ii; 1576 ASSERT(mp->b_datap->db_type == M_DATA); 1577 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1578 if (ipsec_mp == NULL) { 1579 freemsg(mp); 1580 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1581 ill_refrele(ill); 1582 return; 1583 } 1584 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1585 1586 /* This is not a secure packet */ 1587 ii->ipsec_in_secure = B_FALSE; 1588 /* 1589 * For trusted extensions using a shared IP address we can 1590 * send using any zoneid. 1591 */ 1592 if (zoneid == ALL_ZONES) 1593 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1594 else 1595 ii->ipsec_in_zoneid = zoneid; 1596 ipsec_mp->b_cont = mp; 1597 ip6h = (ip6_t *)mp->b_rptr; 1598 /* 1599 * Convert the IPSEC_IN to IPSEC_OUT. 1600 */ 1601 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1602 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1603 ill_refrele(ill); 1604 return; 1605 } 1606 } 1607 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1608 1609 if (v6src_ptr != NULL) { 1610 v6src = *v6src_ptr; 1611 } else { 1612 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1613 &v6src, zoneid, ipst) == NULL) { 1614 freemsg(ipsec_mp); 1615 ill_refrele(ill); 1616 return; 1617 } 1618 } 1619 v6dst = ip6h->ip6_src; 1620 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1621 msg_len = msgdsize(mp); 1622 if (msg_len > len_needed) { 1623 if (!adjmsg(mp, len_needed - msg_len)) { 1624 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1625 freemsg(ipsec_mp); 1626 ill_refrele(ill); 1627 return; 1628 } 1629 msg_len = len_needed; 1630 } 1631 mp1 = allocb_cred(IPV6_HDR_LEN + len, DB_CRED(mp)); 1632 if (mp1 == NULL) { 1633 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1634 freemsg(ipsec_mp); 1635 ill_refrele(ill); 1636 return; 1637 } 1638 ill_refrele(ill); 1639 mp1->b_cont = mp; 1640 mp = mp1; 1641 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1642 io->ipsec_out_type == IPSEC_OUT); 1643 ipsec_mp->b_cont = mp; 1644 1645 /* 1646 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1647 * node generates be accepted in peace by all on-host destinations. 1648 * If we do NOT assume that all on-host destinations trust 1649 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1650 * (Look for ipsec_out_icmp_loopback). 1651 */ 1652 io->ipsec_out_icmp_loopback = B_TRUE; 1653 1654 ip6h = (ip6_t *)mp->b_rptr; 1655 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1656 1657 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1658 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1659 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1660 ip6h->ip6_dst = v6dst; 1661 ip6h->ip6_src = v6src; 1662 msg_len += IPV6_HDR_LEN + len; 1663 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1664 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1665 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1666 } 1667 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1668 icmp6 = (icmp6_t *)&ip6h[1]; 1669 bcopy(stuff, (char *)icmp6, len); 1670 /* 1671 * Prepare for checksum by putting icmp length in the icmp 1672 * checksum field. The checksum is calculated in ip_wput_v6. 1673 */ 1674 icmp6->icmp6_cksum = ip6h->ip6_plen; 1675 if (icmp6->icmp6_type == ND_REDIRECT) { 1676 ip6h->ip6_hops = IPV6_MAX_HOPS; 1677 } 1678 /* Send to V6 writeside put routine */ 1679 put(q, ipsec_mp); 1680 } 1681 1682 /* 1683 * Update the output mib when ICMPv6 packets are sent. 1684 */ 1685 static void 1686 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1687 { 1688 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1689 1690 switch (icmp6->icmp6_type) { 1691 case ICMP6_DST_UNREACH: 1692 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1693 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1694 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1695 break; 1696 1697 case ICMP6_TIME_EXCEEDED: 1698 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1699 break; 1700 1701 case ICMP6_PARAM_PROB: 1702 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1703 break; 1704 1705 case ICMP6_PACKET_TOO_BIG: 1706 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1707 break; 1708 1709 case ICMP6_ECHO_REQUEST: 1710 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1711 break; 1712 1713 case ICMP6_ECHO_REPLY: 1714 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1715 break; 1716 1717 case ND_ROUTER_SOLICIT: 1718 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1719 break; 1720 1721 case ND_ROUTER_ADVERT: 1722 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1723 break; 1724 1725 case ND_NEIGHBOR_SOLICIT: 1726 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1727 break; 1728 1729 case ND_NEIGHBOR_ADVERT: 1730 BUMP_MIB(ill->ill_icmp6_mib, 1731 ipv6IfIcmpOutNeighborAdvertisements); 1732 break; 1733 1734 case ND_REDIRECT: 1735 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1736 break; 1737 1738 case MLD_LISTENER_QUERY: 1739 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1740 break; 1741 1742 case MLD_LISTENER_REPORT: 1743 case MLD_V2_LISTENER_REPORT: 1744 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1745 break; 1746 1747 case MLD_LISTENER_REDUCTION: 1748 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1749 break; 1750 } 1751 } 1752 1753 /* 1754 * Check if it is ok to send an ICMPv6 error packet in 1755 * response to the IP packet in mp. 1756 * Free the message and return null if no 1757 * ICMP error packet should be sent. 1758 */ 1759 static mblk_t * 1760 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1761 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1762 { 1763 ip6_t *ip6h; 1764 1765 if (!mp) 1766 return (NULL); 1767 1768 ip6h = (ip6_t *)mp->b_rptr; 1769 1770 /* Check if source address uniquely identifies the host */ 1771 1772 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1773 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1774 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1775 freemsg(mp); 1776 return (NULL); 1777 } 1778 1779 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1780 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1781 icmp6_t *icmp6; 1782 1783 if (mp->b_wptr - mp->b_rptr < len_needed) { 1784 if (!pullupmsg(mp, len_needed)) { 1785 ill_t *ill; 1786 1787 ill = ip_queue_to_ill_v6(q, ipst); 1788 if (ill == NULL) { 1789 BUMP_MIB(&ipst->ips_icmp6_mib, 1790 ipv6IfIcmpInErrors); 1791 } else { 1792 BUMP_MIB(ill->ill_icmp6_mib, 1793 ipv6IfIcmpInErrors); 1794 ill_refrele(ill); 1795 } 1796 freemsg(mp); 1797 return (NULL); 1798 } 1799 ip6h = (ip6_t *)mp->b_rptr; 1800 } 1801 icmp6 = (icmp6_t *)&ip6h[1]; 1802 /* Explicitly do not generate errors in response to redirects */ 1803 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1804 icmp6->icmp6_type == ND_REDIRECT) { 1805 freemsg(mp); 1806 return (NULL); 1807 } 1808 } 1809 /* 1810 * Check that the destination is not multicast and that the packet 1811 * was not sent on link layer broadcast or multicast. (Exception 1812 * is Packet too big message as per the draft - when mcast_ok is set.) 1813 */ 1814 if (!mcast_ok && 1815 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1816 freemsg(mp); 1817 return (NULL); 1818 } 1819 if (icmp_err_rate_limit(ipst)) { 1820 /* 1821 * Only send ICMP error packets every so often. 1822 * This should be done on a per port/source basis, 1823 * but for now this will suffice. 1824 */ 1825 freemsg(mp); 1826 return (NULL); 1827 } 1828 return (mp); 1829 } 1830 1831 /* 1832 * Generate an ICMPv6 redirect message. 1833 * Include target link layer address option if it exits. 1834 * Always include redirect header. 1835 */ 1836 static void 1837 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1838 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1839 { 1840 nd_redirect_t *rd; 1841 nd_opt_rd_hdr_t *rdh; 1842 uchar_t *buf; 1843 nce_t *nce = NULL; 1844 nd_opt_hdr_t *opt; 1845 int len; 1846 int ll_opt_len = 0; 1847 int max_redir_hdr_data_len; 1848 int pkt_len; 1849 in6_addr_t *srcp; 1850 ip_stack_t *ipst = ill->ill_ipst; 1851 1852 /* 1853 * We are called from ip_rput where we could 1854 * not have attached an IPSEC_IN. 1855 */ 1856 ASSERT(mp->b_datap->db_type == M_DATA); 1857 1858 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1859 if (mp == NULL) 1860 return; 1861 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1862 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1863 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1864 ill->ill_phys_addr_length + 7)/8 * 8; 1865 } 1866 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1867 ASSERT(len % 4 == 0); 1868 buf = kmem_alloc(len, KM_NOSLEEP); 1869 if (buf == NULL) { 1870 if (nce != NULL) 1871 NCE_REFRELE(nce); 1872 freemsg(mp); 1873 return; 1874 } 1875 1876 rd = (nd_redirect_t *)buf; 1877 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1878 rd->nd_rd_code = 0; 1879 rd->nd_rd_reserved = 0; 1880 rd->nd_rd_target = *targetp; 1881 rd->nd_rd_dst = *dest; 1882 1883 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1884 if (nce != NULL && ll_opt_len != 0) { 1885 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1886 opt->nd_opt_len = ll_opt_len/8; 1887 bcopy((char *)nce->nce_res_mp->b_rptr + 1888 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1889 ill->ill_phys_addr_length); 1890 } 1891 if (nce != NULL) 1892 NCE_REFRELE(nce); 1893 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1894 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1895 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1896 max_redir_hdr_data_len = 1897 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1898 pkt_len = msgdsize(mp); 1899 /* Make sure mp is 8 byte aligned */ 1900 if (pkt_len > max_redir_hdr_data_len) { 1901 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1902 sizeof (nd_opt_rd_hdr_t))/8; 1903 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1904 } else { 1905 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1906 (void) adjmsg(mp, -(pkt_len % 8)); 1907 } 1908 rdh->nd_opt_rh_reserved1 = 0; 1909 rdh->nd_opt_rh_reserved2 = 0; 1910 /* ipif_v6src_addr contains the link-local source address */ 1911 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1912 if (ill->ill_group != NULL) { 1913 /* 1914 * The receiver of the redirect will verify whether it 1915 * had a route through us (srcp that we will use in 1916 * the redirect) or not. As we load spread even link-locals, 1917 * we don't know which source address the receiver of 1918 * redirect has in its route for communicating with us. 1919 * Thus we randomly choose a source here and finally we 1920 * should get to the right one and it will eventually 1921 * accept the redirect from us. We can't call 1922 * ip_lookup_scope_v6 because we don't have the right 1923 * link-local address here. Thus we randomly choose one. 1924 */ 1925 int cnt = ill->ill_group->illgrp_ill_count; 1926 1927 ill = ill->ill_group->illgrp_ill; 1928 cnt = ++ipst->ips_icmp_redirect_v6_src_index % cnt; 1929 while (cnt--) 1930 ill = ill->ill_group_next; 1931 srcp = &ill->ill_ipif->ipif_v6src_addr; 1932 } else { 1933 srcp = &ill->ill_ipif->ipif_v6src_addr; 1934 } 1935 rw_exit(&ipst->ips_ill_g_lock); 1936 /* Redirects sent by router, and router is global zone */ 1937 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1938 kmem_free(buf, len); 1939 } 1940 1941 1942 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1943 void 1944 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1945 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1946 ip_stack_t *ipst) 1947 { 1948 icmp6_t icmp6; 1949 boolean_t mctl_present; 1950 mblk_t *first_mp; 1951 1952 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1953 1954 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1955 if (mp == NULL) { 1956 if (mctl_present) 1957 freeb(first_mp); 1958 return; 1959 } 1960 bzero(&icmp6, sizeof (icmp6_t)); 1961 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1962 icmp6.icmp6_code = code; 1963 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1964 zoneid, ipst); 1965 } 1966 1967 /* 1968 * Generate an ICMP unreachable message. 1969 */ 1970 void 1971 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1972 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1973 ip_stack_t *ipst) 1974 { 1975 icmp6_t icmp6; 1976 boolean_t mctl_present; 1977 mblk_t *first_mp; 1978 1979 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1980 1981 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1982 if (mp == NULL) { 1983 if (mctl_present) 1984 freeb(first_mp); 1985 return; 1986 } 1987 bzero(&icmp6, sizeof (icmp6_t)); 1988 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1989 icmp6.icmp6_code = code; 1990 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1991 zoneid, ipst); 1992 } 1993 1994 /* 1995 * Generate an ICMP pkt too big message. 1996 */ 1997 static void 1998 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1999 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 2000 { 2001 icmp6_t icmp6; 2002 mblk_t *first_mp; 2003 boolean_t mctl_present; 2004 2005 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2006 2007 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2008 if (mp == NULL) { 2009 if (mctl_present) 2010 freeb(first_mp); 2011 return; 2012 } 2013 bzero(&icmp6, sizeof (icmp6_t)); 2014 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2015 icmp6.icmp6_code = 0; 2016 icmp6.icmp6_mtu = htonl(mtu); 2017 2018 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2019 zoneid, ipst); 2020 } 2021 2022 /* 2023 * Generate an ICMP parameter problem message. (May be called as writer.) 2024 * 'offset' is the offset from the beginning of the packet in error. 2025 */ 2026 static void 2027 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2028 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2029 ip_stack_t *ipst) 2030 { 2031 icmp6_t icmp6; 2032 boolean_t mctl_present; 2033 mblk_t *first_mp; 2034 2035 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2036 2037 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2038 if (mp == NULL) { 2039 if (mctl_present) 2040 freeb(first_mp); 2041 return; 2042 } 2043 bzero((char *)&icmp6, sizeof (icmp6_t)); 2044 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2045 icmp6.icmp6_code = code; 2046 icmp6.icmp6_pptr = htonl(offset); 2047 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2048 zoneid, ipst); 2049 } 2050 2051 /* 2052 * This code will need to take into account the possibility of binding 2053 * to a link local address on a multi-homed host, in which case the 2054 * outgoing interface (from the conn) will need to be used when getting 2055 * an ire for the dst. Going through proper outgoing interface and 2056 * choosing the source address corresponding to the outgoing interface 2057 * is necessary when the destination address is a link-local address and 2058 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2059 * This can happen when active connection is setup; thus ipp pointer 2060 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2061 * pointer is passed as ipp pointer. 2062 */ 2063 mblk_t * 2064 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2065 { 2066 ssize_t len; 2067 int protocol; 2068 struct T_bind_req *tbr; 2069 sin6_t *sin6; 2070 ipa6_conn_t *ac6; 2071 in6_addr_t *v6srcp; 2072 in6_addr_t *v6dstp; 2073 uint16_t lport; 2074 uint16_t fport; 2075 uchar_t *ucp; 2076 int error = 0; 2077 boolean_t local_bind; 2078 ipa6_conn_x_t *acx6; 2079 boolean_t verify_dst; 2080 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2081 2082 ASSERT(connp->conn_af_isv6); 2083 len = mp->b_wptr - mp->b_rptr; 2084 if (len < (sizeof (*tbr) + 1)) { 2085 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2086 "ip_bind_v6: bogus msg, len %ld", len); 2087 goto bad_addr; 2088 } 2089 /* Back up and extract the protocol identifier. */ 2090 mp->b_wptr--; 2091 tbr = (struct T_bind_req *)mp->b_rptr; 2092 /* Reset the message type in preparation for shipping it back. */ 2093 mp->b_datap->db_type = M_PCPROTO; 2094 2095 protocol = *mp->b_wptr & 0xFF; 2096 connp->conn_ulp = (uint8_t)protocol; 2097 2098 /* 2099 * Check for a zero length address. This is from a protocol that 2100 * wants to register to receive all packets of its type. 2101 */ 2102 if (tbr->ADDR_length == 0) { 2103 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2104 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2105 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2106 NULL) { 2107 /* 2108 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2109 * Do not allow others to bind to these. 2110 */ 2111 goto bad_addr; 2112 } 2113 2114 /* 2115 * 2116 * The udp module never sends down a zero-length address, 2117 * and allowing this on a labeled system will break MLP 2118 * functionality. 2119 */ 2120 if (is_system_labeled() && protocol == IPPROTO_UDP) 2121 goto bad_addr; 2122 2123 /* Allow ipsec plumbing */ 2124 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2125 protocol != IPPROTO_ESP) 2126 goto bad_addr; 2127 2128 connp->conn_srcv6 = ipv6_all_zeros; 2129 ipcl_proto_insert_v6(connp, protocol); 2130 2131 tbr->PRIM_type = T_BIND_ACK; 2132 return (mp); 2133 } 2134 2135 /* Extract the address pointer from the message. */ 2136 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2137 tbr->ADDR_length); 2138 if (ucp == NULL) { 2139 ip1dbg(("ip_bind_v6: no address\n")); 2140 goto bad_addr; 2141 } 2142 if (!OK_32PTR(ucp)) { 2143 ip1dbg(("ip_bind_v6: unaligned address\n")); 2144 goto bad_addr; 2145 } 2146 2147 switch (tbr->ADDR_length) { 2148 default: 2149 ip1dbg(("ip_bind_v6: bad address length %d\n", 2150 (int)tbr->ADDR_length)); 2151 goto bad_addr; 2152 2153 case IPV6_ADDR_LEN: 2154 /* Verification of local address only */ 2155 v6srcp = (in6_addr_t *)ucp; 2156 lport = 0; 2157 local_bind = B_TRUE; 2158 break; 2159 2160 case sizeof (sin6_t): 2161 sin6 = (sin6_t *)ucp; 2162 v6srcp = &sin6->sin6_addr; 2163 lport = sin6->sin6_port; 2164 local_bind = B_TRUE; 2165 break; 2166 2167 case sizeof (ipa6_conn_t): 2168 /* 2169 * Verify that both the source and destination addresses 2170 * are valid. 2171 */ 2172 ac6 = (ipa6_conn_t *)ucp; 2173 v6srcp = &ac6->ac6_laddr; 2174 v6dstp = &ac6->ac6_faddr; 2175 fport = ac6->ac6_fport; 2176 /* For raw socket, the local port is not set. */ 2177 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2178 connp->conn_lport; 2179 local_bind = B_FALSE; 2180 /* Always verify destination reachability. */ 2181 verify_dst = B_TRUE; 2182 break; 2183 2184 case sizeof (ipa6_conn_x_t): 2185 /* 2186 * Verify that the source address is valid. 2187 */ 2188 acx6 = (ipa6_conn_x_t *)ucp; 2189 ac6 = &acx6->ac6x_conn; 2190 v6srcp = &ac6->ac6_laddr; 2191 v6dstp = &ac6->ac6_faddr; 2192 fport = ac6->ac6_fport; 2193 lport = ac6->ac6_lport; 2194 local_bind = B_FALSE; 2195 /* 2196 * Client that passed ipa6_conn_x_t to us specifies whether to 2197 * verify destination reachability. 2198 */ 2199 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2200 break; 2201 } 2202 if (local_bind) { 2203 error = ip_proto_bind_laddr_v6(connp, &mp->b_cont, protocol, 2204 v6srcp, lport, tbr->ADDR_length != IPV6_ADDR_LEN); 2205 } else { 2206 error = ip_proto_bind_connected_v6(connp, &mp->b_cont, protocol, 2207 v6srcp, lport, v6dstp, ipp, fport, B_TRUE, verify_dst); 2208 } 2209 2210 if (error == 0) { 2211 /* Send it home. */ 2212 mp->b_datap->db_type = M_PCPROTO; 2213 tbr->PRIM_type = T_BIND_ACK; 2214 return (mp); 2215 } 2216 2217 bad_addr: 2218 ASSERT(error != EINPROGRESS); 2219 if (error > 0) 2220 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2221 else 2222 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2223 return (mp); 2224 } 2225 2226 static void 2227 ip_bind_post_handling_v6(conn_t *connp, mblk_t *mp, 2228 boolean_t version_changed, boolean_t ire_requested, ip_stack_t *ipst) 2229 { 2230 /* Update conn_send and pktversion if v4/v6 changed */ 2231 if (version_changed) { 2232 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2233 } 2234 /* 2235 * Pass the IPSEC headers size in ire_ipsec_overhead. 2236 * We can't do this in ip_bind_insert_ire because the policy 2237 * may not have been inherited at that point in time and hence 2238 * conn_out_enforce_policy may not be set. 2239 */ 2240 if (ire_requested && connp->conn_out_enforce_policy && 2241 mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE) { 2242 ire_t *ire = (ire_t *)mp->b_rptr; 2243 ASSERT(MBLKL(mp) >= sizeof (ire_t)); 2244 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2245 } 2246 } 2247 2248 /* 2249 * Here address is verified to be a valid local address. 2250 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2251 * address is also considered a valid local address. 2252 * In the case of a multicast address, however, the 2253 * upper protocol is expected to reset the src address 2254 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2255 * no packets are emitted with multicast address as 2256 * source address. 2257 * The addresses valid for bind are: 2258 * (1) - in6addr_any 2259 * (2) - IP address of an UP interface 2260 * (3) - IP address of a DOWN interface 2261 * (4) - a multicast address. In this case 2262 * the conn will only receive packets destined to 2263 * the specified multicast address. Note: the 2264 * application still has to issue an 2265 * IPV6_JOIN_GROUP socket option. 2266 * 2267 * In all the above cases, the bound address must be valid in the current zone. 2268 * When the address is loopback or multicast, there might be many matching IREs 2269 * so bind has to look up based on the zone. 2270 */ 2271 /* 2272 * Verify the local IP address. Does not change the conn_t except 2273 * conn_fully_bound and conn_policy_cached. 2274 */ 2275 static int 2276 ip_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2277 const in6_addr_t *v6src, uint16_t lport, boolean_t fanout_insert) 2278 { 2279 int error = 0; 2280 ire_t *src_ire = NULL; 2281 zoneid_t zoneid; 2282 mblk_t *mp = NULL; 2283 boolean_t ire_requested; 2284 boolean_t ipsec_policy_set; 2285 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2286 2287 if (mpp) 2288 mp = *mpp; 2289 2290 ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2291 ipsec_policy_set = (mp != NULL && DB_TYPE(mp) == IPSEC_POLICY_SET); 2292 2293 /* 2294 * If it was previously connected, conn_fully_bound would have 2295 * been set. 2296 */ 2297 connp->conn_fully_bound = B_FALSE; 2298 2299 zoneid = connp->conn_zoneid; 2300 2301 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2302 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2303 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2304 /* 2305 * If an address other than in6addr_any is requested, 2306 * we verify that it is a valid address for bind 2307 * Note: Following code is in if-else-if form for 2308 * readability compared to a condition check. 2309 */ 2310 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2311 /* LINTED - statement has no consequent */ 2312 if (IRE_IS_LOCAL(src_ire)) { 2313 /* 2314 * (2) Bind to address of local UP interface 2315 */ 2316 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2317 ipif_t *multi_ipif = NULL; 2318 ire_t *save_ire; 2319 /* 2320 * (4) bind to multicast address. 2321 * Fake out the IRE returned to upper 2322 * layer to be a broadcast IRE in 2323 * ip_bind_insert_ire_v6(). 2324 * Pass other information that matches 2325 * the ipif (e.g. the source address). 2326 * conn_multicast_ill is only used for 2327 * IPv6 packets 2328 */ 2329 mutex_enter(&connp->conn_lock); 2330 if (connp->conn_multicast_ill != NULL) { 2331 (void) ipif_lookup_zoneid( 2332 connp->conn_multicast_ill, zoneid, 0, 2333 &multi_ipif); 2334 } else { 2335 /* 2336 * Look for default like 2337 * ip_wput_v6 2338 */ 2339 multi_ipif = ipif_lookup_group_v6( 2340 &ipv6_unspecified_group, zoneid, ipst); 2341 } 2342 mutex_exit(&connp->conn_lock); 2343 save_ire = src_ire; 2344 src_ire = NULL; 2345 if (multi_ipif == NULL || !ire_requested || 2346 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2347 src_ire = save_ire; 2348 error = EADDRNOTAVAIL; 2349 } else { 2350 ASSERT(src_ire != NULL); 2351 if (save_ire != NULL) 2352 ire_refrele(save_ire); 2353 } 2354 if (multi_ipif != NULL) 2355 ipif_refrele(multi_ipif); 2356 } else { 2357 if (!ip_addr_exists_v6(v6src, zoneid, ipst)) { 2358 /* 2359 * Not a valid address for bind 2360 */ 2361 error = EADDRNOTAVAIL; 2362 } 2363 } 2364 2365 if (error != 0) { 2366 /* Red Alert! Attempting to be a bogon! */ 2367 if (ip_debug > 2) { 2368 /* ip1dbg */ 2369 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2370 " address %s\n", AF_INET6, v6src); 2371 } 2372 goto bad_addr; 2373 } 2374 } 2375 2376 /* 2377 * Allow setting new policies. For example, disconnects come 2378 * down as ipa_t bind. As we would have set conn_policy_cached 2379 * to B_TRUE before, we should set it to B_FALSE, so that policy 2380 * can change after the disconnect. 2381 */ 2382 connp->conn_policy_cached = B_FALSE; 2383 2384 /* If not fanout_insert this was just an address verification */ 2385 if (fanout_insert) { 2386 /* 2387 * The addresses have been verified. Time to insert in 2388 * the correct fanout list. 2389 */ 2390 connp->conn_srcv6 = *v6src; 2391 connp->conn_remv6 = ipv6_all_zeros; 2392 connp->conn_lport = lport; 2393 connp->conn_fport = 0; 2394 error = ipcl_bind_insert_v6(connp, protocol, v6src, lport); 2395 } 2396 if (error == 0) { 2397 if (ire_requested) { 2398 if (!ip_bind_get_ire_v6(mpp, src_ire, v6src, NULL, 2399 ipst)) { 2400 error = -1; 2401 goto bad_addr; 2402 } 2403 mp = *mpp; 2404 } else if (ipsec_policy_set) { 2405 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2406 error = -1; 2407 goto bad_addr; 2408 } 2409 } 2410 } 2411 bad_addr: 2412 if (error != 0) { 2413 if (connp->conn_anon_port) { 2414 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2415 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2416 B_FALSE); 2417 } 2418 connp->conn_mlp_type = mlptSingle; 2419 } 2420 2421 if (src_ire != NULL) 2422 ire_refrele(src_ire); 2423 2424 if (ipsec_policy_set) { 2425 ASSERT(mp != NULL); 2426 freeb(mp); 2427 /* 2428 * As of now assume that nothing else accompanies 2429 * IPSEC_POLICY_SET. 2430 */ 2431 *mpp = NULL; 2432 } 2433 2434 return (error); 2435 } 2436 int 2437 ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2438 const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert) 2439 { 2440 int error; 2441 boolean_t ire_requested; 2442 mblk_t *mp = NULL; 2443 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2444 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2445 2446 /* 2447 * Note that we allow connect to broadcast and multicast 2448 * address when ire_requested is set. Thus the ULP 2449 * has to check for IRE_BROADCAST and multicast. 2450 */ 2451 if (mpp) 2452 mp = *mpp; 2453 ire_requested = (mp && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2454 2455 ASSERT(connp->conn_af_isv6); 2456 connp->conn_ulp = protocol; 2457 2458 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2459 /* Bind to IPv4 address */ 2460 ipaddr_t v4src; 2461 2462 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2463 2464 error = ip_bind_laddr_v4(connp, mpp, protocol, v4src, lport, 2465 fanout_insert); 2466 if (error != 0) 2467 goto bad_addr; 2468 connp->conn_pkt_isv6 = B_FALSE; 2469 } else { 2470 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2471 error = 0; 2472 goto bad_addr; 2473 } 2474 error = ip_bind_laddr_v6(connp, mpp, protocol, v6srcp, 2475 lport, fanout_insert); 2476 if (error != 0) 2477 goto bad_addr; 2478 connp->conn_pkt_isv6 = B_TRUE; 2479 } 2480 2481 ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL, 2482 orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst); 2483 return (0); 2484 2485 bad_addr: 2486 if (error < 0) 2487 error = -TBADADDR; 2488 return (error); 2489 } 2490 2491 /* 2492 * Verify that both the source and destination addresses 2493 * are valid. If verify_dst, then destination address must also be reachable, 2494 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2495 * It takes ip6_pkt_t * as one of the arguments to determine correct 2496 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2497 * destination address. Note that parameter ipp is only useful for TCP connect 2498 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2499 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2500 * 2501 */ 2502 int 2503 ip_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2504 in6_addr_t *v6src, uint16_t lport, const in6_addr_t *v6dst, 2505 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2506 boolean_t verify_dst) 2507 { 2508 ire_t *src_ire; 2509 ire_t *dst_ire; 2510 int error = 0; 2511 ire_t *sire = NULL; 2512 ire_t *md_dst_ire = NULL; 2513 ill_t *md_ill = NULL; 2514 ill_t *dst_ill = NULL; 2515 ipif_t *src_ipif = NULL; 2516 zoneid_t zoneid; 2517 boolean_t ill_held = B_FALSE; 2518 mblk_t *mp = NULL; 2519 boolean_t ire_requested = B_FALSE; 2520 boolean_t ipsec_policy_set = B_FALSE; 2521 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2522 ts_label_t *tsl = NULL; 2523 2524 if (mpp) 2525 mp = *mpp; 2526 2527 if (mp != NULL) { 2528 ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2529 ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET); 2530 tsl = MBLK_GETLABEL(mp); 2531 } 2532 2533 src_ire = dst_ire = NULL; 2534 /* 2535 * If we never got a disconnect before, clear it now. 2536 */ 2537 connp->conn_fully_bound = B_FALSE; 2538 2539 zoneid = connp->conn_zoneid; 2540 2541 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2542 ipif_t *ipif; 2543 2544 /* 2545 * Use an "emulated" IRE_BROADCAST to tell the transport it 2546 * is a multicast. 2547 * Pass other information that matches 2548 * the ipif (e.g. the source address). 2549 * 2550 * conn_multicast_ill is only used for IPv6 packets 2551 */ 2552 mutex_enter(&connp->conn_lock); 2553 if (connp->conn_multicast_ill != NULL) { 2554 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2555 zoneid, 0, &ipif); 2556 } else { 2557 /* Look for default like ip_wput_v6 */ 2558 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2559 } 2560 mutex_exit(&connp->conn_lock); 2561 if (ipif == NULL || ire_requested || 2562 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2563 if (ipif != NULL) 2564 ipif_refrele(ipif); 2565 if (ip_debug > 2) { 2566 /* ip1dbg */ 2567 pr_addr_dbg("ip_bind_connected_v6: bad " 2568 "connected multicast %s\n", AF_INET6, 2569 v6dst); 2570 } 2571 error = ENETUNREACH; 2572 goto bad_addr; 2573 } 2574 if (ipif != NULL) 2575 ipif_refrele(ipif); 2576 } else { 2577 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2578 NULL, &sire, zoneid, tsl, 2579 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2580 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2581 ipst); 2582 /* 2583 * We also prevent ire's with src address INADDR_ANY to 2584 * be used, which are created temporarily for 2585 * sending out packets from endpoints that have 2586 * conn_unspec_src set. 2587 */ 2588 if (dst_ire == NULL || 2589 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2590 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2591 /* 2592 * When verifying destination reachability, we always 2593 * complain. 2594 * 2595 * When not verifying destination reachability but we 2596 * found an IRE, i.e. the destination is reachable, 2597 * then the other tests still apply and we complain. 2598 */ 2599 if (verify_dst || (dst_ire != NULL)) { 2600 if (ip_debug > 2) { 2601 /* ip1dbg */ 2602 pr_addr_dbg("ip_bind_connected_v6: bad" 2603 " connected dst %s\n", AF_INET6, 2604 v6dst); 2605 } 2606 if (dst_ire == NULL || 2607 !(dst_ire->ire_type & IRE_HOST)) { 2608 error = ENETUNREACH; 2609 } else { 2610 error = EHOSTUNREACH; 2611 } 2612 goto bad_addr; 2613 } 2614 } 2615 } 2616 2617 /* 2618 * We now know that routing will allow us to reach the destination. 2619 * Check whether Trusted Solaris policy allows communication with this 2620 * host, and pretend that the destination is unreachable if not. 2621 * 2622 * This is never a problem for TCP, since that transport is known to 2623 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2624 * handling. If the remote is unreachable, it will be detected at that 2625 * point, so there's no reason to check it here. 2626 * 2627 * Note that for sendto (and other datagram-oriented friends), this 2628 * check is done as part of the data path label computation instead. 2629 * The check here is just to make non-TCP connect() report the right 2630 * error. 2631 */ 2632 if (dst_ire != NULL && is_system_labeled() && 2633 !IPCL_IS_TCP(connp) && 2634 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), 2635 v6dst, NULL, connp->conn_mac_exempt, ipst) != 0) { 2636 error = EHOSTUNREACH; 2637 if (ip_debug > 2) { 2638 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2639 AF_INET6, v6dst); 2640 } 2641 goto bad_addr; 2642 } 2643 2644 /* 2645 * If the app does a connect(), it means that it will most likely 2646 * send more than 1 packet to the destination. It makes sense 2647 * to clear the temporary flag. 2648 */ 2649 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2650 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2651 irb_t *irb = dst_ire->ire_bucket; 2652 2653 rw_enter(&irb->irb_lock, RW_WRITER); 2654 /* 2655 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2656 * the lock in order to guarantee irb_tmp_ire_cnt. 2657 */ 2658 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2659 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2660 irb->irb_tmp_ire_cnt--; 2661 } 2662 rw_exit(&irb->irb_lock); 2663 } 2664 2665 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2666 2667 /* 2668 * See if we should notify ULP about MDT; we do this whether or not 2669 * ire_requested is TRUE, in order to handle active connects; MDT 2670 * eligibility tests for passive connects are handled separately 2671 * through tcp_adapt_ire(). We do this before the source address 2672 * selection, because dst_ire may change after a call to 2673 * ipif_select_source_v6(). This is a best-effort check, as the 2674 * packet for this connection may not actually go through 2675 * dst_ire->ire_stq, and the exact IRE can only be known after 2676 * calling ip_newroute_v6(). This is why we further check on the 2677 * IRE during Multidata packet transmission in tcp_multisend(). 2678 */ 2679 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2680 dst_ire != NULL && 2681 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2682 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2683 ILL_MDT_CAPABLE(md_ill)) { 2684 md_dst_ire = dst_ire; 2685 IRE_REFHOLD(md_dst_ire); 2686 } 2687 2688 if (dst_ire != NULL && 2689 dst_ire->ire_type == IRE_LOCAL && 2690 dst_ire->ire_zoneid != zoneid && 2691 dst_ire->ire_zoneid != ALL_ZONES) { 2692 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2693 zoneid, 0, NULL, 2694 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2695 MATCH_IRE_RJ_BHOLE, ipst); 2696 if (src_ire == NULL) { 2697 error = EHOSTUNREACH; 2698 goto bad_addr; 2699 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2700 if (!(src_ire->ire_type & IRE_HOST)) 2701 error = ENETUNREACH; 2702 else 2703 error = EHOSTUNREACH; 2704 goto bad_addr; 2705 } 2706 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2707 src_ipif = src_ire->ire_ipif; 2708 ipif_refhold(src_ipif); 2709 *v6src = src_ipif->ipif_v6lcl_addr; 2710 } 2711 ire_refrele(src_ire); 2712 src_ire = NULL; 2713 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2714 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2715 *v6src = sire->ire_src_addr_v6; 2716 ire_refrele(dst_ire); 2717 dst_ire = sire; 2718 sire = NULL; 2719 } else if (dst_ire->ire_type == IRE_CACHE && 2720 (dst_ire->ire_flags & RTF_SETSRC)) { 2721 ASSERT(dst_ire->ire_zoneid == zoneid || 2722 dst_ire->ire_zoneid == ALL_ZONES); 2723 *v6src = dst_ire->ire_src_addr_v6; 2724 } else { 2725 /* 2726 * Pick a source address so that a proper inbound load 2727 * spreading would happen. Use dst_ill specified by the 2728 * app. when socket option or scopeid is set. 2729 */ 2730 int err; 2731 2732 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2733 uint_t if_index; 2734 2735 /* 2736 * Scope id or IPV6_PKTINFO 2737 */ 2738 2739 if_index = ipp->ipp_ifindex; 2740 dst_ill = ill_lookup_on_ifindex( 2741 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2742 ipst); 2743 if (dst_ill == NULL) { 2744 ip1dbg(("ip_bind_connected_v6:" 2745 " bad ifindex %d\n", if_index)); 2746 error = EADDRNOTAVAIL; 2747 goto bad_addr; 2748 } 2749 ill_held = B_TRUE; 2750 } else if (connp->conn_outgoing_ill != NULL) { 2751 /* 2752 * For IPV6_BOUND_IF socket option, 2753 * conn_outgoing_ill should be set 2754 * already in TCP or UDP/ICMP. 2755 */ 2756 dst_ill = conn_get_held_ill(connp, 2757 &connp->conn_outgoing_ill, &err); 2758 if (err == ILL_LOOKUP_FAILED) { 2759 ip1dbg(("ip_bind_connected_v6:" 2760 "no ill for bound_if\n")); 2761 error = EADDRNOTAVAIL; 2762 goto bad_addr; 2763 } 2764 ill_held = B_TRUE; 2765 } else if (dst_ire->ire_stq != NULL) { 2766 /* No need to hold ill here */ 2767 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2768 } else { 2769 /* No need to hold ill here */ 2770 dst_ill = dst_ire->ire_ipif->ipif_ill; 2771 } 2772 if (ip6_asp_can_lookup(ipst)) { 2773 src_ipif = ipif_select_source_v6(dst_ill, 2774 v6dst, RESTRICT_TO_NONE, 2775 connp->conn_src_preferences, zoneid); 2776 ip6_asp_table_refrele(ipst); 2777 if (src_ipif == NULL) { 2778 pr_addr_dbg("ip_bind_connected_v6: " 2779 "no usable source address for " 2780 "connection to %s\n", 2781 AF_INET6, v6dst); 2782 error = EADDRNOTAVAIL; 2783 goto bad_addr; 2784 } 2785 *v6src = src_ipif->ipif_v6lcl_addr; 2786 } else { 2787 error = EADDRNOTAVAIL; 2788 goto bad_addr; 2789 } 2790 } 2791 } 2792 2793 /* 2794 * We do ire_route_lookup_v6() here (and not an interface lookup) 2795 * as we assert that v6src should only come from an 2796 * UP interface for hard binding. 2797 */ 2798 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2799 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2800 2801 /* src_ire must be a local|loopback */ 2802 if (!IRE_IS_LOCAL(src_ire)) { 2803 if (ip_debug > 2) { 2804 /* ip1dbg */ 2805 pr_addr_dbg("ip_bind_connected_v6: bad " 2806 "connected src %s\n", AF_INET6, v6src); 2807 } 2808 error = EADDRNOTAVAIL; 2809 goto bad_addr; 2810 } 2811 2812 /* 2813 * If the source address is a loopback address, the 2814 * destination had best be local or multicast. 2815 * The transports that can't handle multicast will reject 2816 * those addresses. 2817 */ 2818 if (src_ire->ire_type == IRE_LOOPBACK && 2819 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2820 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2821 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2822 error = -1; 2823 goto bad_addr; 2824 } 2825 /* 2826 * Allow setting new policies. For example, disconnects come 2827 * down as ipa_t bind. As we would have set conn_policy_cached 2828 * to B_TRUE before, we should set it to B_FALSE, so that policy 2829 * can change after the disconnect. 2830 */ 2831 connp->conn_policy_cached = B_FALSE; 2832 2833 /* 2834 * The addresses have been verified. Initialize the conn 2835 * before calling the policy as they expect the conns 2836 * initialized. 2837 */ 2838 connp->conn_srcv6 = *v6src; 2839 connp->conn_remv6 = *v6dst; 2840 connp->conn_lport = lport; 2841 connp->conn_fport = fport; 2842 2843 ASSERT(!(ipsec_policy_set && ire_requested)); 2844 if (ire_requested) { 2845 iulp_t *ulp_info = NULL; 2846 2847 /* 2848 * Note that sire will not be NULL if this is an off-link 2849 * connection and there is not cache for that dest yet. 2850 * 2851 * XXX Because of an existing bug, if there are multiple 2852 * default routes, the IRE returned now may not be the actual 2853 * default route used (default routes are chosen in a 2854 * round robin fashion). So if the metrics for different 2855 * default routes are different, we may return the wrong 2856 * metrics. This will not be a problem if the existing 2857 * bug is fixed. 2858 */ 2859 if (sire != NULL) 2860 ulp_info = &(sire->ire_uinfo); 2861 2862 if (!ip_bind_get_ire_v6(mpp, dst_ire, v6dst, ulp_info, 2863 ipst)) { 2864 error = -1; 2865 goto bad_addr; 2866 } 2867 } else if (ipsec_policy_set) { 2868 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2869 error = -1; 2870 goto bad_addr; 2871 } 2872 } 2873 2874 /* 2875 * Cache IPsec policy in this conn. If we have per-socket policy, 2876 * we'll cache that. If we don't, we'll inherit global policy. 2877 * 2878 * We can't insert until the conn reflects the policy. Note that 2879 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2880 * connections where we don't have a policy. This is to prevent 2881 * global policy lookups in the inbound path. 2882 * 2883 * If we insert before we set conn_policy_cached, 2884 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2885 * because global policy cound be non-empty. We normally call 2886 * ipsec_check_policy() for conn_policy_cached connections only if 2887 * conn_in_enforce_policy is set. But in this case, 2888 * conn_policy_cached can get set anytime since we made the 2889 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2890 * is called, which will make the above assumption false. Thus, we 2891 * need to insert after we set conn_policy_cached. 2892 */ 2893 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2894 goto bad_addr; 2895 2896 /* If not fanout_insert this was just an address verification */ 2897 if (fanout_insert) { 2898 /* 2899 * The addresses have been verified. Time to insert in 2900 * the correct fanout list. 2901 */ 2902 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2903 connp->conn_ports, 2904 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2905 } 2906 if (error == 0) { 2907 connp->conn_fully_bound = B_TRUE; 2908 /* 2909 * Our initial checks for MDT have passed; the IRE is not 2910 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2911 * be supporting MDT. Pass the IRE, IPC and ILL into 2912 * ip_mdinfo_return(), which performs further checks 2913 * against them and upon success, returns the MDT info 2914 * mblk which we will attach to the bind acknowledgment. 2915 */ 2916 if (md_dst_ire != NULL) { 2917 mblk_t *mdinfo_mp; 2918 2919 ASSERT(md_ill != NULL); 2920 ASSERT(md_ill->ill_mdt_capab != NULL); 2921 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2922 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) { 2923 if (mp == NULL) { 2924 *mpp = mdinfo_mp; 2925 } else { 2926 linkb(mp, mdinfo_mp); 2927 } 2928 } 2929 } 2930 } 2931 bad_addr: 2932 if (ipsec_policy_set) { 2933 ASSERT(mp != NULL); 2934 freeb(mp); 2935 /* 2936 * As of now assume that nothing else accompanies 2937 * IPSEC_POLICY_SET. 2938 */ 2939 *mpp = NULL; 2940 } 2941 refrele_and_quit: 2942 if (src_ire != NULL) 2943 IRE_REFRELE(src_ire); 2944 if (dst_ire != NULL) 2945 IRE_REFRELE(dst_ire); 2946 if (sire != NULL) 2947 IRE_REFRELE(sire); 2948 if (src_ipif != NULL) 2949 ipif_refrele(src_ipif); 2950 if (md_dst_ire != NULL) 2951 IRE_REFRELE(md_dst_ire); 2952 if (ill_held && dst_ill != NULL) 2953 ill_refrele(dst_ill); 2954 return (error); 2955 } 2956 2957 /* ARGSUSED */ 2958 int 2959 ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2960 in6_addr_t *v6srcp, uint16_t lport, const in6_addr_t *v6dstp, 2961 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2962 boolean_t verify_dst) 2963 { 2964 int error = 0; 2965 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2966 boolean_t ire_requested; 2967 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2968 2969 /* 2970 * Note that we allow connect to broadcast and multicast 2971 * address when ire_requested is set. Thus the ULP 2972 * has to check for IRE_BROADCAST and multicast. 2973 */ 2974 ASSERT(mpp != NULL); 2975 ire_requested = (*mpp != NULL && DB_TYPE(*mpp) == IRE_DB_REQ_TYPE); 2976 2977 ASSERT(connp->conn_af_isv6); 2978 connp->conn_ulp = protocol; 2979 2980 /* For raw socket, the local port is not set. */ 2981 lport = lport != 0 ? lport : connp->conn_lport; 2982 2983 /* 2984 * Bind to local and remote address. Local might be 2985 * unspecified in which case it will be extracted from 2986 * ire_src_addr_v6 2987 */ 2988 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2989 /* Connect to IPv4 address */ 2990 ipaddr_t v4src; 2991 ipaddr_t v4dst; 2992 2993 /* Is the source unspecified or mapped? */ 2994 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2995 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2996 ip1dbg(("ip_proto_bind_connected_v6: " 2997 "dst is mapped, but not the src\n")); 2998 goto bad_addr; 2999 } 3000 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 3001 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 3002 3003 /* Always verify destination reachability. */ 3004 error = ip_bind_connected_v4(connp, mpp, protocol, &v4src, 3005 lport, v4dst, fport, B_TRUE, B_TRUE); 3006 if (error != 0) 3007 goto bad_addr; 3008 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 3009 connp->conn_pkt_isv6 = B_FALSE; 3010 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 3011 ip1dbg(("ip_proto_bind_connected_v6: " 3012 "src is mapped, but not the dst\n")); 3013 goto bad_addr; 3014 } else { 3015 error = ip_bind_connected_v6(connp, mpp, protocol, v6srcp, 3016 lport, v6dstp, ipp, fport, B_TRUE, verify_dst); 3017 if (error != 0) 3018 goto bad_addr; 3019 connp->conn_pkt_isv6 = B_TRUE; 3020 } 3021 3022 ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL, 3023 orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst); 3024 3025 /* Send it home. */ 3026 return (0); 3027 3028 bad_addr: 3029 if (error == 0) 3030 error = -TBADADDR; 3031 return (error); 3032 } 3033 3034 /* 3035 * Get the ire in *mpp. Returns false if it fails (due to lack of space). 3036 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3037 */ 3038 /* ARGSUSED4 */ 3039 static boolean_t 3040 ip_bind_get_ire_v6(mblk_t **mpp, ire_t *ire, const in6_addr_t *dst, 3041 iulp_t *ulp_info, ip_stack_t *ipst) 3042 { 3043 mblk_t *mp = *mpp; 3044 ire_t *ret_ire; 3045 3046 ASSERT(mp != NULL); 3047 3048 if (ire != NULL) { 3049 /* 3050 * mp initialized above to IRE_DB_REQ_TYPE 3051 * appended mblk. Its <upper protocol>'s 3052 * job to make sure there is room. 3053 */ 3054 if ((mp->b_datap->db_lim - mp->b_rptr) < sizeof (ire_t)) 3055 return (B_FALSE); 3056 3057 mp->b_datap->db_type = IRE_DB_TYPE; 3058 mp->b_wptr = mp->b_rptr + sizeof (ire_t); 3059 bcopy(ire, mp->b_rptr, sizeof (ire_t)); 3060 ret_ire = (ire_t *)mp->b_rptr; 3061 if (IN6_IS_ADDR_MULTICAST(dst) || 3062 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3063 ret_ire->ire_type = IRE_BROADCAST; 3064 ret_ire->ire_addr_v6 = *dst; 3065 } 3066 if (ulp_info != NULL) { 3067 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3068 sizeof (iulp_t)); 3069 } 3070 ret_ire->ire_mp = mp; 3071 } else { 3072 /* 3073 * No IRE was found. Remove IRE mblk. 3074 */ 3075 *mpp = mp->b_cont; 3076 freeb(mp); 3077 } 3078 return (B_TRUE); 3079 } 3080 3081 /* 3082 * Add an ip6i_t header to the front of the mblk. 3083 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3084 * Returns NULL if allocation fails (and frees original message). 3085 * Used in outgoing path when going through ip_newroute_*v6(). 3086 * Used in incoming path to pass ifindex to transports. 3087 */ 3088 mblk_t * 3089 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3090 { 3091 mblk_t *mp1; 3092 ip6i_t *ip6i; 3093 ip6_t *ip6h; 3094 3095 ip6h = (ip6_t *)mp->b_rptr; 3096 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3097 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3098 mp->b_datap->db_ref > 1) { 3099 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3100 if (mp1 == NULL) { 3101 freemsg(mp); 3102 return (NULL); 3103 } 3104 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3105 mp1->b_cont = mp; 3106 mp = mp1; 3107 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3108 } 3109 mp->b_rptr = (uchar_t *)ip6i; 3110 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3111 ip6i->ip6i_nxt = IPPROTO_RAW; 3112 if (ill != NULL) { 3113 ip6i->ip6i_flags = IP6I_IFINDEX; 3114 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3115 } else { 3116 ip6i->ip6i_flags = 0; 3117 } 3118 ip6i->ip6i_nexthop = *dst; 3119 return (mp); 3120 } 3121 3122 /* 3123 * Handle protocols with which IP is less intimate. There 3124 * can be more than one stream bound to a particular 3125 * protocol. When this is the case, normally each one gets a copy 3126 * of any incoming packets. 3127 * However, if the packet was tunneled and not multicast we only send to it 3128 * the first match. 3129 * 3130 * Zones notes: 3131 * Packets will be distributed to streams in all zones. This is really only 3132 * useful for ICMPv6 as only applications in the global zone can create raw 3133 * sockets for other protocols. 3134 */ 3135 static void 3136 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3137 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3138 boolean_t mctl_present, zoneid_t zoneid) 3139 { 3140 queue_t *rq; 3141 mblk_t *mp1, *first_mp1; 3142 in6_addr_t dst = ip6h->ip6_dst; 3143 in6_addr_t src = ip6h->ip6_src; 3144 boolean_t one_only; 3145 mblk_t *first_mp = mp; 3146 boolean_t secure, shared_addr; 3147 conn_t *connp, *first_connp, *next_connp; 3148 connf_t *connfp; 3149 ip_stack_t *ipst = inill->ill_ipst; 3150 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3151 3152 if (mctl_present) { 3153 mp = first_mp->b_cont; 3154 secure = ipsec_in_is_secure(first_mp); 3155 ASSERT(mp != NULL); 3156 } else { 3157 secure = B_FALSE; 3158 } 3159 3160 /* 3161 * If the packet was tunneled and not multicast we only send to it 3162 * the first match. 3163 */ 3164 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3165 !IN6_IS_ADDR_MULTICAST(&dst)); 3166 3167 shared_addr = (zoneid == ALL_ZONES); 3168 if (shared_addr) { 3169 /* 3170 * We don't allow multilevel ports for raw IP, so no need to 3171 * check for that here. 3172 */ 3173 zoneid = tsol_packet_to_zoneid(mp); 3174 } 3175 3176 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3177 mutex_enter(&connfp->connf_lock); 3178 connp = connfp->connf_head; 3179 for (connp = connfp->connf_head; connp != NULL; 3180 connp = connp->conn_next) { 3181 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3182 zoneid) && 3183 (!is_system_labeled() || 3184 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3185 connp))) 3186 break; 3187 } 3188 3189 if (connp == NULL) { 3190 /* 3191 * No one bound to this port. Is 3192 * there a client that wants all 3193 * unclaimed datagrams? 3194 */ 3195 mutex_exit(&connfp->connf_lock); 3196 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3197 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3198 nexthdr_offset, mctl_present, zoneid, ipst)) { 3199 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3200 } 3201 3202 return; 3203 } 3204 3205 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL); 3206 3207 CONN_INC_REF(connp); 3208 first_connp = connp; 3209 3210 /* 3211 * XXX: Fix the multiple protocol listeners case. We should not 3212 * be walking the conn->next list here. 3213 */ 3214 if (one_only) { 3215 /* 3216 * Only send message to one tunnel driver by immediately 3217 * terminating the loop. 3218 */ 3219 connp = NULL; 3220 } else { 3221 connp = connp->conn_next; 3222 3223 } 3224 for (;;) { 3225 while (connp != NULL) { 3226 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3227 flags, zoneid) && 3228 (!is_system_labeled() || 3229 tsol_receive_local(mp, &dst, IPV6_VERSION, 3230 shared_addr, connp))) 3231 break; 3232 connp = connp->conn_next; 3233 } 3234 3235 /* 3236 * Just copy the data part alone. The mctl part is 3237 * needed just for verifying policy and it is never 3238 * sent up. 3239 */ 3240 if (connp == NULL || 3241 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3242 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3243 /* 3244 * No more intested clients or memory 3245 * allocation failed 3246 */ 3247 connp = first_connp; 3248 break; 3249 } 3250 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 3251 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3252 CONN_INC_REF(connp); 3253 mutex_exit(&connfp->connf_lock); 3254 rq = connp->conn_rq; 3255 /* 3256 * For link-local always add ifindex so that transport can set 3257 * sin6_scope_id. Avoid it for ICMP error fanout. 3258 */ 3259 if ((connp->conn_ip_recvpktinfo || 3260 IN6_IS_ADDR_LINKLOCAL(&src)) && 3261 (flags & IP_FF_IPINFO)) { 3262 /* Add header */ 3263 mp1 = ip_add_info_v6(mp1, inill, &dst); 3264 } 3265 if (mp1 == NULL) { 3266 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3267 } else if ( 3268 (IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3269 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3270 if (flags & IP_FF_RAWIP) { 3271 BUMP_MIB(ill->ill_ip_mib, 3272 rawipIfStatsInOverflows); 3273 } else { 3274 BUMP_MIB(ill->ill_icmp6_mib, 3275 ipv6IfIcmpInOverflows); 3276 } 3277 3278 freemsg(mp1); 3279 } else { 3280 /* 3281 * Don't enforce here if we're a tunnel - let "tun" do 3282 * it instead. 3283 */ 3284 if (!IPCL_IS_IPTUN(connp) && 3285 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3286 secure)) { 3287 first_mp1 = ipsec_check_inbound_policy( 3288 first_mp1, connp, NULL, ip6h, mctl_present); 3289 } 3290 if (first_mp1 != NULL) { 3291 if (mctl_present) 3292 freeb(first_mp1); 3293 BUMP_MIB(ill->ill_ip_mib, 3294 ipIfStatsHCInDelivers); 3295 (connp->conn_recv)(connp, mp1, NULL); 3296 } 3297 } 3298 mutex_enter(&connfp->connf_lock); 3299 /* Follow the next pointer before releasing the conn. */ 3300 next_connp = connp->conn_next; 3301 CONN_DEC_REF(connp); 3302 connp = next_connp; 3303 } 3304 3305 /* Last one. Send it upstream. */ 3306 mutex_exit(&connfp->connf_lock); 3307 3308 /* Initiate IPPF processing */ 3309 if (IP6_IN_IPP(flags, ipst)) { 3310 uint_t ifindex; 3311 3312 mutex_enter(&ill->ill_lock); 3313 ifindex = ill->ill_phyint->phyint_ifindex; 3314 mutex_exit(&ill->ill_lock); 3315 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3316 if (mp == NULL) { 3317 CONN_DEC_REF(connp); 3318 if (mctl_present) 3319 freeb(first_mp); 3320 return; 3321 } 3322 } 3323 3324 /* 3325 * For link-local always add ifindex so that transport can set 3326 * sin6_scope_id. Avoid it for ICMP error fanout. 3327 */ 3328 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3329 (flags & IP_FF_IPINFO)) { 3330 /* Add header */ 3331 mp = ip_add_info_v6(mp, inill, &dst); 3332 if (mp == NULL) { 3333 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3334 CONN_DEC_REF(connp); 3335 if (mctl_present) 3336 freeb(first_mp); 3337 return; 3338 } else if (mctl_present) { 3339 first_mp->b_cont = mp; 3340 } else { 3341 first_mp = mp; 3342 } 3343 } 3344 3345 rq = connp->conn_rq; 3346 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3347 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3348 3349 if (flags & IP_FF_RAWIP) { 3350 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3351 } else { 3352 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3353 } 3354 3355 freemsg(first_mp); 3356 } else { 3357 if (IPCL_IS_IPTUN(connp)) { 3358 /* 3359 * Tunneled packet. We enforce policy in the tunnel 3360 * module itself. 3361 * 3362 * Send the WHOLE packet up (incl. IPSEC_IN) without 3363 * a policy check. 3364 */ 3365 putnext(rq, first_mp); 3366 CONN_DEC_REF(connp); 3367 return; 3368 } 3369 /* 3370 * Don't enforce here if we're a tunnel - let "tun" do 3371 * it instead. 3372 */ 3373 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3374 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3375 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3376 NULL, ip6h, mctl_present); 3377 if (first_mp == NULL) { 3378 CONN_DEC_REF(connp); 3379 return; 3380 } 3381 } 3382 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3383 (connp->conn_recv)(connp, mp, NULL); 3384 if (mctl_present) 3385 freeb(first_mp); 3386 } 3387 CONN_DEC_REF(connp); 3388 } 3389 3390 /* 3391 * Send an ICMP error after patching up the packet appropriately. Returns 3392 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3393 */ 3394 int 3395 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3396 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3397 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3398 { 3399 ip6_t *ip6h; 3400 mblk_t *first_mp; 3401 boolean_t secure; 3402 unsigned char db_type; 3403 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3404 3405 first_mp = mp; 3406 if (mctl_present) { 3407 mp = mp->b_cont; 3408 secure = ipsec_in_is_secure(first_mp); 3409 ASSERT(mp != NULL); 3410 } else { 3411 /* 3412 * If this is an ICMP error being reported - which goes 3413 * up as M_CTLs, we need to convert them to M_DATA till 3414 * we finish checking with global policy because 3415 * ipsec_check_global_policy() assumes M_DATA as clear 3416 * and M_CTL as secure. 3417 */ 3418 db_type = mp->b_datap->db_type; 3419 mp->b_datap->db_type = M_DATA; 3420 secure = B_FALSE; 3421 } 3422 /* 3423 * We are generating an icmp error for some inbound packet. 3424 * Called from all ip_fanout_(udp, tcp, proto) functions. 3425 * Before we generate an error, check with global policy 3426 * to see whether this is allowed to enter the system. As 3427 * there is no "conn", we are checking with global policy. 3428 */ 3429 ip6h = (ip6_t *)mp->b_rptr; 3430 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3431 first_mp = ipsec_check_global_policy(first_mp, NULL, 3432 NULL, ip6h, mctl_present, ipst->ips_netstack); 3433 if (first_mp == NULL) 3434 return (0); 3435 } 3436 3437 if (!mctl_present) 3438 mp->b_datap->db_type = db_type; 3439 3440 if (flags & IP_FF_SEND_ICMP) { 3441 if (flags & IP_FF_HDR_COMPLETE) { 3442 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3443 freemsg(first_mp); 3444 return (1); 3445 } 3446 } 3447 switch (icmp_type) { 3448 case ICMP6_DST_UNREACH: 3449 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3450 B_FALSE, B_FALSE, zoneid, ipst); 3451 break; 3452 case ICMP6_PARAM_PROB: 3453 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3454 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3455 break; 3456 default: 3457 #ifdef DEBUG 3458 panic("ip_fanout_send_icmp_v6: wrong type"); 3459 /*NOTREACHED*/ 3460 #else 3461 freemsg(first_mp); 3462 break; 3463 #endif 3464 } 3465 } else { 3466 freemsg(first_mp); 3467 return (0); 3468 } 3469 3470 return (1); 3471 } 3472 3473 3474 /* 3475 * Fanout for TCP packets 3476 * The caller puts <fport, lport> in the ports parameter. 3477 */ 3478 static void 3479 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3480 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3481 { 3482 mblk_t *first_mp; 3483 boolean_t secure; 3484 conn_t *connp; 3485 tcph_t *tcph; 3486 boolean_t syn_present = B_FALSE; 3487 ip_stack_t *ipst = inill->ill_ipst; 3488 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3489 3490 first_mp = mp; 3491 if (mctl_present) { 3492 mp = first_mp->b_cont; 3493 secure = ipsec_in_is_secure(first_mp); 3494 ASSERT(mp != NULL); 3495 } else { 3496 secure = B_FALSE; 3497 } 3498 3499 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3500 3501 if (connp == NULL || 3502 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3503 /* 3504 * No hard-bound match. Send Reset. 3505 */ 3506 dblk_t *dp = mp->b_datap; 3507 uint32_t ill_index; 3508 3509 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3510 3511 /* Initiate IPPf processing, if needed. */ 3512 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3513 (flags & IP6_NO_IPPOLICY)) { 3514 ill_index = ill->ill_phyint->phyint_ifindex; 3515 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3516 if (first_mp == NULL) { 3517 if (connp != NULL) 3518 CONN_DEC_REF(connp); 3519 return; 3520 } 3521 } 3522 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3523 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3524 ipst->ips_netstack->netstack_tcp, connp); 3525 if (connp != NULL) 3526 CONN_DEC_REF(connp); 3527 return; 3528 } 3529 3530 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3531 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3532 if (connp->conn_flags & IPCL_TCP) { 3533 squeue_t *sqp; 3534 3535 /* 3536 * For fused tcp loopback, assign the eager's 3537 * squeue to be that of the active connect's. 3538 */ 3539 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3540 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3541 !secure && 3542 !IP6_IN_IPP(flags, ipst)) { 3543 ASSERT(Q_TO_CONN(q) != NULL); 3544 sqp = Q_TO_CONN(q)->conn_sqp; 3545 } else { 3546 sqp = IP_SQUEUE_GET(lbolt); 3547 } 3548 3549 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3550 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3551 3552 /* 3553 * db_cksumstuff is unused in the incoming 3554 * path; Thus store the ifindex here. It will 3555 * be cleared in tcp_conn_create_v6(). 3556 */ 3557 DB_CKSUMSTUFF(mp) = 3558 (intptr_t)ill->ill_phyint->phyint_ifindex; 3559 syn_present = B_TRUE; 3560 } 3561 } 3562 3563 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3564 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3565 if ((flags & TH_RST) || (flags & TH_URG)) { 3566 CONN_DEC_REF(connp); 3567 freemsg(first_mp); 3568 return; 3569 } 3570 if (flags & TH_ACK) { 3571 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3572 ipst->ips_netstack->netstack_tcp, connp); 3573 CONN_DEC_REF(connp); 3574 return; 3575 } 3576 3577 CONN_DEC_REF(connp); 3578 freemsg(first_mp); 3579 return; 3580 } 3581 3582 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3583 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3584 NULL, ip6h, mctl_present); 3585 if (first_mp == NULL) { 3586 CONN_DEC_REF(connp); 3587 return; 3588 } 3589 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3590 ASSERT(syn_present); 3591 if (mctl_present) { 3592 ASSERT(first_mp != mp); 3593 first_mp->b_datap->db_struioflag |= 3594 STRUIO_POLICY; 3595 } else { 3596 ASSERT(first_mp == mp); 3597 mp->b_datap->db_struioflag &= 3598 ~STRUIO_EAGER; 3599 mp->b_datap->db_struioflag |= 3600 STRUIO_POLICY; 3601 } 3602 } else { 3603 /* 3604 * Discard first_mp early since we're dealing with a 3605 * fully-connected conn_t and tcp doesn't do policy in 3606 * this case. Also, if someone is bound to IPPROTO_TCP 3607 * over raw IP, they don't expect to see a M_CTL. 3608 */ 3609 if (mctl_present) { 3610 freeb(first_mp); 3611 mctl_present = B_FALSE; 3612 } 3613 first_mp = mp; 3614 } 3615 } 3616 3617 /* Initiate IPPF processing */ 3618 if (IP6_IN_IPP(flags, ipst)) { 3619 uint_t ifindex; 3620 3621 mutex_enter(&ill->ill_lock); 3622 ifindex = ill->ill_phyint->phyint_ifindex; 3623 mutex_exit(&ill->ill_lock); 3624 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3625 if (mp == NULL) { 3626 CONN_DEC_REF(connp); 3627 if (mctl_present) { 3628 freeb(first_mp); 3629 } 3630 return; 3631 } else if (mctl_present) { 3632 /* 3633 * ip_add_info_v6 might return a new mp. 3634 */ 3635 ASSERT(first_mp != mp); 3636 first_mp->b_cont = mp; 3637 } else { 3638 first_mp = mp; 3639 } 3640 } 3641 3642 /* 3643 * For link-local always add ifindex so that TCP can bind to that 3644 * interface. Avoid it for ICMP error fanout. 3645 */ 3646 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3647 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3648 (flags & IP_FF_IPINFO))) { 3649 /* Add header */ 3650 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3651 if (mp == NULL) { 3652 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3653 CONN_DEC_REF(connp); 3654 if (mctl_present) 3655 freeb(first_mp); 3656 return; 3657 } else if (mctl_present) { 3658 ASSERT(first_mp != mp); 3659 first_mp->b_cont = mp; 3660 } else { 3661 first_mp = mp; 3662 } 3663 } 3664 3665 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3666 if (IPCL_IS_TCP(connp)) { 3667 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, connp->conn_recv, 3668 connp, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 3669 } else { 3670 /* SOCK_RAW, IPPROTO_TCP case */ 3671 (connp->conn_recv)(connp, first_mp, NULL); 3672 CONN_DEC_REF(connp); 3673 } 3674 } 3675 3676 /* 3677 * Fanout for UDP packets. 3678 * The caller puts <fport, lport> in the ports parameter. 3679 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3680 * 3681 * If SO_REUSEADDR is set all multicast and broadcast packets 3682 * will be delivered to all streams bound to the same port. 3683 * 3684 * Zones notes: 3685 * Multicast packets will be distributed to streams in all zones. 3686 */ 3687 static void 3688 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3689 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3690 zoneid_t zoneid) 3691 { 3692 uint32_t dstport, srcport; 3693 in6_addr_t dst; 3694 mblk_t *first_mp; 3695 boolean_t secure; 3696 conn_t *connp; 3697 connf_t *connfp; 3698 conn_t *first_conn; 3699 conn_t *next_conn; 3700 mblk_t *mp1, *first_mp1; 3701 in6_addr_t src; 3702 boolean_t shared_addr; 3703 ip_stack_t *ipst = inill->ill_ipst; 3704 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3705 3706 first_mp = mp; 3707 if (mctl_present) { 3708 mp = first_mp->b_cont; 3709 secure = ipsec_in_is_secure(first_mp); 3710 ASSERT(mp != NULL); 3711 } else { 3712 secure = B_FALSE; 3713 } 3714 3715 /* Extract ports in net byte order */ 3716 dstport = htons(ntohl(ports) & 0xFFFF); 3717 srcport = htons(ntohl(ports) >> 16); 3718 dst = ip6h->ip6_dst; 3719 src = ip6h->ip6_src; 3720 3721 shared_addr = (zoneid == ALL_ZONES); 3722 if (shared_addr) { 3723 /* 3724 * No need to handle exclusive-stack zones since ALL_ZONES 3725 * only applies to the shared stack. 3726 */ 3727 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3728 /* 3729 * If no shared MLP is found, tsol_mlp_findzone returns 3730 * ALL_ZONES. In that case, we assume it's SLP, and 3731 * search for the zone based on the packet label. 3732 * That will also return ALL_ZONES on failure, but 3733 * we never allow conn_zoneid to be set to ALL_ZONES. 3734 */ 3735 if (zoneid == ALL_ZONES) 3736 zoneid = tsol_packet_to_zoneid(mp); 3737 } 3738 3739 /* Attempt to find a client stream based on destination port. */ 3740 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3741 mutex_enter(&connfp->connf_lock); 3742 connp = connfp->connf_head; 3743 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3744 /* 3745 * Not multicast. Send to the one (first) client we find. 3746 */ 3747 while (connp != NULL) { 3748 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3749 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3750 conn_wantpacket_v6(connp, ill, ip6h, 3751 flags, zoneid)) { 3752 break; 3753 } 3754 connp = connp->conn_next; 3755 } 3756 if (connp == NULL || connp->conn_upq == NULL) 3757 goto notfound; 3758 3759 if (is_system_labeled() && 3760 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3761 connp)) 3762 goto notfound; 3763 3764 /* Found a client */ 3765 CONN_INC_REF(connp); 3766 mutex_exit(&connfp->connf_lock); 3767 3768 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3769 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3770 freemsg(first_mp); 3771 CONN_DEC_REF(connp); 3772 return; 3773 } 3774 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3775 first_mp = ipsec_check_inbound_policy(first_mp, 3776 connp, NULL, ip6h, mctl_present); 3777 if (first_mp == NULL) { 3778 CONN_DEC_REF(connp); 3779 return; 3780 } 3781 } 3782 /* Initiate IPPF processing */ 3783 if (IP6_IN_IPP(flags, ipst)) { 3784 uint_t ifindex; 3785 3786 mutex_enter(&ill->ill_lock); 3787 ifindex = ill->ill_phyint->phyint_ifindex; 3788 mutex_exit(&ill->ill_lock); 3789 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3790 if (mp == NULL) { 3791 CONN_DEC_REF(connp); 3792 if (mctl_present) 3793 freeb(first_mp); 3794 return; 3795 } 3796 } 3797 /* 3798 * For link-local always add ifindex so that 3799 * transport can set sin6_scope_id. Avoid it for 3800 * ICMP error fanout. 3801 */ 3802 if ((connp->conn_ip_recvpktinfo || 3803 IN6_IS_ADDR_LINKLOCAL(&src)) && 3804 (flags & IP_FF_IPINFO)) { 3805 /* Add header */ 3806 mp = ip_add_info_v6(mp, inill, &dst); 3807 if (mp == NULL) { 3808 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3809 CONN_DEC_REF(connp); 3810 if (mctl_present) 3811 freeb(first_mp); 3812 return; 3813 } else if (mctl_present) { 3814 first_mp->b_cont = mp; 3815 } else { 3816 first_mp = mp; 3817 } 3818 } 3819 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3820 3821 /* Send it upstream */ 3822 (connp->conn_recv)(connp, mp, NULL); 3823 3824 IP6_STAT(ipst, ip6_udp_fannorm); 3825 CONN_DEC_REF(connp); 3826 if (mctl_present) 3827 freeb(first_mp); 3828 return; 3829 } 3830 3831 while (connp != NULL) { 3832 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3833 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3834 (!is_system_labeled() || 3835 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3836 connp))) 3837 break; 3838 connp = connp->conn_next; 3839 } 3840 3841 if (connp == NULL || connp->conn_upq == NULL) 3842 goto notfound; 3843 3844 first_conn = connp; 3845 3846 CONN_INC_REF(connp); 3847 connp = connp->conn_next; 3848 for (;;) { 3849 while (connp != NULL) { 3850 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3851 src) && conn_wantpacket_v6(connp, ill, ip6h, 3852 flags, zoneid) && 3853 (!is_system_labeled() || 3854 tsol_receive_local(mp, &dst, IPV6_VERSION, 3855 shared_addr, connp))) 3856 break; 3857 connp = connp->conn_next; 3858 } 3859 /* 3860 * Just copy the data part alone. The mctl part is 3861 * needed just for verifying policy and it is never 3862 * sent up. 3863 */ 3864 if (connp == NULL || 3865 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3866 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3867 /* 3868 * No more interested clients or memory 3869 * allocation failed 3870 */ 3871 connp = first_conn; 3872 break; 3873 } 3874 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3875 CONN_INC_REF(connp); 3876 mutex_exit(&connfp->connf_lock); 3877 /* 3878 * For link-local always add ifindex so that transport 3879 * can set sin6_scope_id. Avoid it for ICMP error 3880 * fanout. 3881 */ 3882 if ((connp->conn_ip_recvpktinfo || 3883 IN6_IS_ADDR_LINKLOCAL(&src)) && 3884 (flags & IP_FF_IPINFO)) { 3885 /* Add header */ 3886 mp1 = ip_add_info_v6(mp1, inill, &dst); 3887 } 3888 /* mp1 could have changed */ 3889 if (mctl_present) 3890 first_mp1->b_cont = mp1; 3891 else 3892 first_mp1 = mp1; 3893 if (mp1 == NULL) { 3894 if (mctl_present) 3895 freeb(first_mp1); 3896 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3897 goto next_one; 3898 } 3899 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3900 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3901 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3902 freemsg(first_mp1); 3903 goto next_one; 3904 } 3905 3906 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3907 first_mp1 = ipsec_check_inbound_policy 3908 (first_mp1, connp, NULL, ip6h, 3909 mctl_present); 3910 } 3911 if (first_mp1 != NULL) { 3912 if (mctl_present) 3913 freeb(first_mp1); 3914 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3915 3916 /* Send it upstream */ 3917 (connp->conn_recv)(connp, mp1, NULL); 3918 } 3919 next_one: 3920 mutex_enter(&connfp->connf_lock); 3921 /* Follow the next pointer before releasing the conn. */ 3922 next_conn = connp->conn_next; 3923 IP6_STAT(ipst, ip6_udp_fanmb); 3924 CONN_DEC_REF(connp); 3925 connp = next_conn; 3926 } 3927 3928 /* Last one. Send it upstream. */ 3929 mutex_exit(&connfp->connf_lock); 3930 3931 /* Initiate IPPF processing */ 3932 if (IP6_IN_IPP(flags, ipst)) { 3933 uint_t ifindex; 3934 3935 mutex_enter(&ill->ill_lock); 3936 ifindex = ill->ill_phyint->phyint_ifindex; 3937 mutex_exit(&ill->ill_lock); 3938 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3939 if (mp == NULL) { 3940 CONN_DEC_REF(connp); 3941 if (mctl_present) { 3942 freeb(first_mp); 3943 } 3944 return; 3945 } 3946 } 3947 3948 /* 3949 * For link-local always add ifindex so that transport can set 3950 * sin6_scope_id. Avoid it for ICMP error fanout. 3951 */ 3952 if ((connp->conn_ip_recvpktinfo || 3953 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3954 /* Add header */ 3955 mp = ip_add_info_v6(mp, inill, &dst); 3956 if (mp == NULL) { 3957 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3958 CONN_DEC_REF(connp); 3959 if (mctl_present) 3960 freeb(first_mp); 3961 return; 3962 } else if (mctl_present) { 3963 first_mp->b_cont = mp; 3964 } else { 3965 first_mp = mp; 3966 } 3967 } 3968 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3969 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3970 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3971 freemsg(mp); 3972 } else { 3973 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3974 first_mp = ipsec_check_inbound_policy(first_mp, 3975 connp, NULL, ip6h, mctl_present); 3976 if (first_mp == NULL) { 3977 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3978 CONN_DEC_REF(connp); 3979 return; 3980 } 3981 } 3982 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3983 3984 /* Send it upstream */ 3985 (connp->conn_recv)(connp, mp, NULL); 3986 } 3987 IP6_STAT(ipst, ip6_udp_fanmb); 3988 CONN_DEC_REF(connp); 3989 if (mctl_present) 3990 freeb(first_mp); 3991 return; 3992 3993 notfound: 3994 mutex_exit(&connfp->connf_lock); 3995 /* 3996 * No one bound to this port. Is 3997 * there a client that wants all 3998 * unclaimed datagrams? 3999 */ 4000 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4001 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4002 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 4003 zoneid); 4004 } else { 4005 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4006 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4007 mctl_present, zoneid, ipst)) { 4008 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 4009 } 4010 } 4011 } 4012 4013 /* 4014 * int ip_find_hdr_v6() 4015 * 4016 * This routine is used by the upper layer protocols and the IP tunnel 4017 * module to: 4018 * - Set extension header pointers to appropriate locations 4019 * - Determine IPv6 header length and return it 4020 * - Return a pointer to the last nexthdr value 4021 * 4022 * The caller must initialize ipp_fields. 4023 * 4024 * NOTE: If multiple extension headers of the same type are present, 4025 * ip_find_hdr_v6() will set the respective extension header pointers 4026 * to the first one that it encounters in the IPv6 header. It also 4027 * skips fragment headers. This routine deals with malformed packets 4028 * of various sorts in which case the returned length is up to the 4029 * malformed part. 4030 */ 4031 int 4032 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4033 { 4034 uint_t length, ehdrlen; 4035 uint8_t nexthdr; 4036 uint8_t *whereptr, *endptr; 4037 ip6_dest_t *tmpdstopts; 4038 ip6_rthdr_t *tmprthdr; 4039 ip6_hbh_t *tmphopopts; 4040 ip6_frag_t *tmpfraghdr; 4041 4042 length = IPV6_HDR_LEN; 4043 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4044 endptr = mp->b_wptr; 4045 4046 nexthdr = ip6h->ip6_nxt; 4047 while (whereptr < endptr) { 4048 /* Is there enough left for len + nexthdr? */ 4049 if (whereptr + MIN_EHDR_LEN > endptr) 4050 goto done; 4051 4052 switch (nexthdr) { 4053 case IPPROTO_HOPOPTS: 4054 tmphopopts = (ip6_hbh_t *)whereptr; 4055 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4056 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4057 goto done; 4058 nexthdr = tmphopopts->ip6h_nxt; 4059 /* return only 1st hbh */ 4060 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4061 ipp->ipp_fields |= IPPF_HOPOPTS; 4062 ipp->ipp_hopopts = tmphopopts; 4063 ipp->ipp_hopoptslen = ehdrlen; 4064 } 4065 break; 4066 case IPPROTO_DSTOPTS: 4067 tmpdstopts = (ip6_dest_t *)whereptr; 4068 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4069 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4070 goto done; 4071 nexthdr = tmpdstopts->ip6d_nxt; 4072 /* 4073 * ipp_dstopts is set to the destination header after a 4074 * routing header. 4075 * Assume it is a post-rthdr destination header 4076 * and adjust when we find an rthdr. 4077 */ 4078 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4079 ipp->ipp_fields |= IPPF_DSTOPTS; 4080 ipp->ipp_dstopts = tmpdstopts; 4081 ipp->ipp_dstoptslen = ehdrlen; 4082 } 4083 break; 4084 case IPPROTO_ROUTING: 4085 tmprthdr = (ip6_rthdr_t *)whereptr; 4086 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4087 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4088 goto done; 4089 nexthdr = tmprthdr->ip6r_nxt; 4090 /* return only 1st rthdr */ 4091 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4092 ipp->ipp_fields |= IPPF_RTHDR; 4093 ipp->ipp_rthdr = tmprthdr; 4094 ipp->ipp_rthdrlen = ehdrlen; 4095 } 4096 /* 4097 * Make any destination header we've seen be a 4098 * pre-rthdr destination header. 4099 */ 4100 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4101 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4102 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4103 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4104 ipp->ipp_dstopts = NULL; 4105 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4106 ipp->ipp_dstoptslen = 0; 4107 } 4108 break; 4109 case IPPROTO_FRAGMENT: 4110 tmpfraghdr = (ip6_frag_t *)whereptr; 4111 ehdrlen = sizeof (ip6_frag_t); 4112 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4113 goto done; 4114 nexthdr = tmpfraghdr->ip6f_nxt; 4115 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4116 ipp->ipp_fields |= IPPF_FRAGHDR; 4117 ipp->ipp_fraghdr = tmpfraghdr; 4118 ipp->ipp_fraghdrlen = ehdrlen; 4119 } 4120 break; 4121 case IPPROTO_NONE: 4122 default: 4123 goto done; 4124 } 4125 length += ehdrlen; 4126 whereptr += ehdrlen; 4127 } 4128 done: 4129 if (nexthdrp != NULL) 4130 *nexthdrp = nexthdr; 4131 return (length); 4132 } 4133 4134 int 4135 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4136 { 4137 ire_t *ire; 4138 4139 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4140 ire = ire_lookup_local_v6(zoneid, ipst); 4141 if (ire == NULL) { 4142 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4143 return (1); 4144 } 4145 ip6h->ip6_src = ire->ire_addr_v6; 4146 ire_refrele(ire); 4147 } 4148 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4149 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4150 return (0); 4151 } 4152 4153 /* 4154 * Try to determine where and what are the IPv6 header length and 4155 * pointer to nexthdr value for the upper layer protocol (or an 4156 * unknown next hdr). 4157 * 4158 * Parameters returns a pointer to the nexthdr value; 4159 * Must handle malformed packets of various sorts. 4160 * Function returns failure for malformed cases. 4161 */ 4162 boolean_t 4163 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4164 uint8_t **nexthdrpp) 4165 { 4166 uint16_t length; 4167 uint_t ehdrlen; 4168 uint8_t *nexthdrp; 4169 uint8_t *whereptr; 4170 uint8_t *endptr; 4171 ip6_dest_t *desthdr; 4172 ip6_rthdr_t *rthdr; 4173 ip6_frag_t *fraghdr; 4174 4175 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4176 length = IPV6_HDR_LEN; 4177 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4178 endptr = mp->b_wptr; 4179 4180 nexthdrp = &ip6h->ip6_nxt; 4181 while (whereptr < endptr) { 4182 /* Is there enough left for len + nexthdr? */ 4183 if (whereptr + MIN_EHDR_LEN > endptr) 4184 break; 4185 4186 switch (*nexthdrp) { 4187 case IPPROTO_HOPOPTS: 4188 case IPPROTO_DSTOPTS: 4189 /* Assumes the headers are identical for hbh and dst */ 4190 desthdr = (ip6_dest_t *)whereptr; 4191 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4192 if ((uchar_t *)desthdr + ehdrlen > endptr) 4193 return (B_FALSE); 4194 nexthdrp = &desthdr->ip6d_nxt; 4195 break; 4196 case IPPROTO_ROUTING: 4197 rthdr = (ip6_rthdr_t *)whereptr; 4198 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4199 if ((uchar_t *)rthdr + ehdrlen > endptr) 4200 return (B_FALSE); 4201 nexthdrp = &rthdr->ip6r_nxt; 4202 break; 4203 case IPPROTO_FRAGMENT: 4204 fraghdr = (ip6_frag_t *)whereptr; 4205 ehdrlen = sizeof (ip6_frag_t); 4206 if ((uchar_t *)&fraghdr[1] > endptr) 4207 return (B_FALSE); 4208 nexthdrp = &fraghdr->ip6f_nxt; 4209 break; 4210 case IPPROTO_NONE: 4211 /* No next header means we're finished */ 4212 default: 4213 *hdr_length_ptr = length; 4214 *nexthdrpp = nexthdrp; 4215 return (B_TRUE); 4216 } 4217 length += ehdrlen; 4218 whereptr += ehdrlen; 4219 *hdr_length_ptr = length; 4220 *nexthdrpp = nexthdrp; 4221 } 4222 switch (*nexthdrp) { 4223 case IPPROTO_HOPOPTS: 4224 case IPPROTO_DSTOPTS: 4225 case IPPROTO_ROUTING: 4226 case IPPROTO_FRAGMENT: 4227 /* 4228 * If any know extension headers are still to be processed, 4229 * the packet's malformed (or at least all the IP header(s) are 4230 * not in the same mblk - and that should never happen. 4231 */ 4232 return (B_FALSE); 4233 4234 default: 4235 /* 4236 * If we get here, we know that all of the IP headers were in 4237 * the same mblk, even if the ULP header is in the next mblk. 4238 */ 4239 *hdr_length_ptr = length; 4240 *nexthdrpp = nexthdrp; 4241 return (B_TRUE); 4242 } 4243 } 4244 4245 /* 4246 * Return the length of the IPv6 related headers (including extension headers) 4247 * Returns a length even if the packet is malformed. 4248 */ 4249 int 4250 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4251 { 4252 uint16_t hdr_len; 4253 uint8_t *nexthdrp; 4254 4255 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4256 return (hdr_len); 4257 } 4258 4259 /* 4260 * Select an ill for the packet by considering load spreading across 4261 * a different ill in the group if dst_ill is part of some group. 4262 */ 4263 static ill_t * 4264 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4265 { 4266 ill_t *ill; 4267 4268 /* 4269 * We schedule irrespective of whether the source address is 4270 * INADDR_UNSPECIED or not. 4271 */ 4272 ill = illgrp_scheduler(dst_ill); 4273 if (ill == NULL) 4274 return (NULL); 4275 4276 /* 4277 * For groups with names ip_sioctl_groupname ensures that all 4278 * ills are of same type. For groups without names, ifgrp_insert 4279 * ensures this. 4280 */ 4281 ASSERT(dst_ill->ill_type == ill->ill_type); 4282 4283 return (ill); 4284 } 4285 4286 /* 4287 * IPv6 - 4288 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4289 * to send out a packet to a destination address for which we do not have 4290 * specific routing information. 4291 * 4292 * Handle non-multicast packets. If ill is non-NULL the match is done 4293 * for that ill. 4294 * 4295 * When a specific ill is specified (using IPV6_PKTINFO, 4296 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4297 * on routing entries (ftable and ctable) that have a matching 4298 * ire->ire_ipif->ipif_ill. Thus this can only be used 4299 * for destinations that are on-link for the specific ill 4300 * and that can appear on multiple links. Thus it is useful 4301 * for multicast destinations, link-local destinations, and 4302 * at some point perhaps for site-local destinations (if the 4303 * node sits at a site boundary). 4304 * We create the cache entries in the regular ctable since 4305 * it can not "confuse" things for other destinations. 4306 * table. 4307 * 4308 * When ill is part of a ill group, we subject the packets 4309 * to load spreading even if the ill is specified by the 4310 * means described above. We disable only for IPV6_BOUND_PIF 4311 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4312 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4313 * set. 4314 * 4315 * NOTE : These are the scopes of some of the variables that point at IRE, 4316 * which needs to be followed while making any future modifications 4317 * to avoid memory leaks. 4318 * 4319 * - ire and sire are the entries looked up initially by 4320 * ire_ftable_lookup_v6. 4321 * - ipif_ire is used to hold the interface ire associated with 4322 * the new cache ire. But it's scope is limited, so we always REFRELE 4323 * it before branching out to error paths. 4324 * - save_ire is initialized before ire_create, so that ire returned 4325 * by ire_create will not over-write the ire. We REFRELE save_ire 4326 * before breaking out of the switch. 4327 * 4328 * Thus on failures, we have to REFRELE only ire and sire, if they 4329 * are not NULL. 4330 * 4331 * v6srcp may be used in the future. Currently unused. 4332 */ 4333 /* ARGSUSED */ 4334 void 4335 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4336 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4337 { 4338 in6_addr_t v6gw; 4339 in6_addr_t dst; 4340 ire_t *ire = NULL; 4341 ipif_t *src_ipif = NULL; 4342 ill_t *dst_ill = NULL; 4343 ire_t *sire = NULL; 4344 ire_t *save_ire; 4345 ip6_t *ip6h; 4346 int err = 0; 4347 mblk_t *first_mp; 4348 ipsec_out_t *io; 4349 ill_t *attach_ill = NULL; 4350 ushort_t ire_marks = 0; 4351 int match_flags; 4352 boolean_t ip6i_present; 4353 ire_t *first_sire = NULL; 4354 mblk_t *copy_mp = NULL; 4355 mblk_t *xmit_mp = NULL; 4356 in6_addr_t save_dst; 4357 uint32_t multirt_flags = 4358 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4359 boolean_t multirt_is_resolvable; 4360 boolean_t multirt_resolve_next; 4361 boolean_t need_rele = B_FALSE; 4362 boolean_t do_attach_ill = B_FALSE; 4363 boolean_t ip6_asp_table_held = B_FALSE; 4364 tsol_ire_gw_secattr_t *attrp = NULL; 4365 tsol_gcgrp_t *gcgrp = NULL; 4366 tsol_gcgrp_addr_t ga; 4367 4368 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4369 4370 first_mp = mp; 4371 if (mp->b_datap->db_type == M_CTL) { 4372 mp = mp->b_cont; 4373 io = (ipsec_out_t *)first_mp->b_rptr; 4374 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4375 } else { 4376 io = NULL; 4377 } 4378 4379 /* 4380 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4381 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4382 * could be NULL. 4383 * 4384 * This information can appear either in an ip6i_t or an IPSEC_OUT 4385 * message. 4386 */ 4387 ip6h = (ip6_t *)mp->b_rptr; 4388 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4389 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4390 if (!ip6i_present || 4391 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4392 attach_ill = ip_grab_attach_ill(ill, first_mp, 4393 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4394 io->ipsec_out_ill_index), B_TRUE, ipst); 4395 /* Failure case frees things for us. */ 4396 if (attach_ill == NULL) 4397 return; 4398 4399 /* 4400 * Check if we need an ire that will not be 4401 * looked up by anybody else i.e. HIDDEN. 4402 */ 4403 if (ill_is_probeonly(attach_ill)) 4404 ire_marks = IRE_MARK_HIDDEN; 4405 } 4406 } 4407 4408 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4409 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4410 goto icmp_err_ret; 4411 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4412 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4413 goto icmp_err_ret; 4414 } 4415 4416 /* 4417 * If this IRE is created for forwarding or it is not for 4418 * TCP traffic, mark it as temporary. 4419 * 4420 * Is it sufficient just to check the next header?? 4421 */ 4422 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4423 ire_marks |= IRE_MARK_TEMPORARY; 4424 4425 /* 4426 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4427 * chain until it gets the most specific information available. 4428 * For example, we know that there is no IRE_CACHE for this dest, 4429 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4430 * ire_ftable_lookup_v6 will look up the gateway, etc. 4431 */ 4432 4433 if (ill == NULL) { 4434 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4435 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4436 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4437 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4438 match_flags, ipst); 4439 /* 4440 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4441 * in a NULL ill, but the packet could be a neighbor 4442 * solicitation/advertisment and could have a valid attach_ill. 4443 */ 4444 if (attach_ill != NULL) 4445 ill_refrele(attach_ill); 4446 } else { 4447 if (attach_ill != NULL) { 4448 /* 4449 * attach_ill is set only for communicating with 4450 * on-link hosts. So, don't look for DEFAULT. 4451 * ip_wput_v6 passes the right ill in this case and 4452 * hence we can assert. 4453 */ 4454 ASSERT(ill == attach_ill); 4455 ill_refrele(attach_ill); 4456 do_attach_ill = B_TRUE; 4457 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4458 } else { 4459 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4460 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4461 } 4462 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4463 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4464 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags, ipst); 4465 } 4466 4467 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4468 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4469 4470 /* 4471 * We enter a loop that will be run only once in most cases. 4472 * The loop is re-entered in the case where the destination 4473 * can be reached through multiple RTF_MULTIRT-flagged routes. 4474 * The intention is to compute multiple routes to a single 4475 * destination in a single ip_newroute_v6 call. 4476 * The information is contained in sire->ire_flags. 4477 */ 4478 do { 4479 multirt_resolve_next = B_FALSE; 4480 4481 if (dst_ill != NULL) { 4482 ill_refrele(dst_ill); 4483 dst_ill = NULL; 4484 } 4485 if (src_ipif != NULL) { 4486 ipif_refrele(src_ipif); 4487 src_ipif = NULL; 4488 } 4489 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4490 ip3dbg(("ip_newroute_v6: starting new resolution " 4491 "with first_mp %p, tag %d\n", 4492 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4493 4494 /* 4495 * We check if there are trailing unresolved routes for 4496 * the destination contained in sire. 4497 */ 4498 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4499 &sire, multirt_flags, MBLK_GETLABEL(mp), ipst); 4500 4501 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4502 "ire %p, sire %p\n", 4503 multirt_is_resolvable, (void *)ire, (void *)sire)); 4504 4505 if (!multirt_is_resolvable) { 4506 /* 4507 * No more multirt routes to resolve; give up 4508 * (all routes resolved or no more resolvable 4509 * routes). 4510 */ 4511 if (ire != NULL) { 4512 ire_refrele(ire); 4513 ire = NULL; 4514 } 4515 } else { 4516 ASSERT(sire != NULL); 4517 ASSERT(ire != NULL); 4518 /* 4519 * We simply use first_sire as a flag that 4520 * indicates if a resolvable multirt route has 4521 * already been found during the preceding 4522 * loops. If it is not the case, we may have 4523 * to send an ICMP error to report that the 4524 * destination is unreachable. We do not 4525 * IRE_REFHOLD first_sire. 4526 */ 4527 if (first_sire == NULL) { 4528 first_sire = sire; 4529 } 4530 } 4531 } 4532 if ((ire == NULL) || (ire == sire)) { 4533 /* 4534 * either ire == NULL (the destination cannot be 4535 * resolved) or ire == sire (the gateway cannot be 4536 * resolved). At this point, there are no more routes 4537 * to resolve for the destination, thus we exit. 4538 */ 4539 if (ip_debug > 3) { 4540 /* ip2dbg */ 4541 pr_addr_dbg("ip_newroute_v6: " 4542 "can't resolve %s\n", AF_INET6, v6dstp); 4543 } 4544 ip3dbg(("ip_newroute_v6: " 4545 "ire %p, sire %p, first_sire %p\n", 4546 (void *)ire, (void *)sire, (void *)first_sire)); 4547 4548 if (sire != NULL) { 4549 ire_refrele(sire); 4550 sire = NULL; 4551 } 4552 4553 if (first_sire != NULL) { 4554 /* 4555 * At least one multirt route has been found 4556 * in the same ip_newroute() call; there is no 4557 * need to report an ICMP error. 4558 * first_sire was not IRE_REFHOLDed. 4559 */ 4560 MULTIRT_DEBUG_UNTAG(first_mp); 4561 freemsg(first_mp); 4562 return; 4563 } 4564 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4565 RTA_DST, ipst); 4566 goto icmp_err_ret; 4567 } 4568 4569 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4570 4571 /* 4572 * Verify that the returned IRE does not have either the 4573 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4574 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4575 */ 4576 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4577 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4578 goto icmp_err_ret; 4579 4580 /* 4581 * Increment the ire_ob_pkt_count field for ire if it is an 4582 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4583 * increment the same for the parent IRE, sire, if it is some 4584 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4585 */ 4586 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4587 UPDATE_OB_PKT_COUNT(ire); 4588 ire->ire_last_used_time = lbolt; 4589 } 4590 4591 if (sire != NULL) { 4592 mutex_enter(&sire->ire_lock); 4593 v6gw = sire->ire_gateway_addr_v6; 4594 mutex_exit(&sire->ire_lock); 4595 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4596 IRE_INTERFACE)) == 0); 4597 UPDATE_OB_PKT_COUNT(sire); 4598 sire->ire_last_used_time = lbolt; 4599 } else { 4600 v6gw = ipv6_all_zeros; 4601 } 4602 4603 /* 4604 * We have a route to reach the destination. 4605 * 4606 * 1) If the interface is part of ill group, try to get a new 4607 * ill taking load spreading into account. 4608 * 4609 * 2) After selecting the ill, get a source address that might 4610 * create good inbound load spreading and that matches the 4611 * right scope. ipif_select_source_v6 does this for us. 4612 * 4613 * If the application specified the ill (ifindex), we still 4614 * load spread. Only if the packets needs to go out specifically 4615 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4616 * IPV6_BOUND_PIF we don't try to use a different ill for load 4617 * spreading. 4618 */ 4619 if (!do_attach_ill) { 4620 /* 4621 * If the interface belongs to an interface group, 4622 * make sure the next possible interface in the group 4623 * is used. This encourages load spreading among 4624 * peers in an interface group. However, in the case 4625 * of multirouting, load spreading is not used, as we 4626 * actually want to replicate outgoing packets through 4627 * particular interfaces. 4628 * 4629 * Note: While we pick a dst_ill we are really only 4630 * interested in the ill for load spreading. 4631 * The source ipif is determined by source address 4632 * selection below. 4633 */ 4634 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4635 dst_ill = ire->ire_ipif->ipif_ill; 4636 /* For uniformity do a refhold */ 4637 ill_refhold(dst_ill); 4638 } else { 4639 /* 4640 * If we are here trying to create an IRE_CACHE 4641 * for an offlink destination and have the 4642 * IRE_CACHE for the next hop and the latter is 4643 * using virtual IP source address selection i.e 4644 * it's ire->ire_ipif is pointing to a virtual 4645 * network interface (vni) then 4646 * ip_newroute_get_dst_ll() will return the vni 4647 * interface as the dst_ill. Since the vni is 4648 * virtual i.e not associated with any physical 4649 * interface, it cannot be the dst_ill, hence 4650 * in such a case call ip_newroute_get_dst_ll() 4651 * with the stq_ill instead of the ire_ipif ILL. 4652 * The function returns a refheld ill. 4653 */ 4654 if ((ire->ire_type == IRE_CACHE) && 4655 IS_VNI(ire->ire_ipif->ipif_ill)) 4656 dst_ill = ip_newroute_get_dst_ill_v6( 4657 ire->ire_stq->q_ptr); 4658 else 4659 dst_ill = ip_newroute_get_dst_ill_v6( 4660 ire->ire_ipif->ipif_ill); 4661 } 4662 if (dst_ill == NULL) { 4663 if (ip_debug > 2) { 4664 pr_addr_dbg("ip_newroute_v6 : no dst " 4665 "ill for dst %s\n", 4666 AF_INET6, v6dstp); 4667 } 4668 goto icmp_err_ret; 4669 } else if (dst_ill->ill_group == NULL && ill != NULL && 4670 dst_ill != ill) { 4671 /* 4672 * If "ill" is not part of any group, we should 4673 * have found a route matching "ill" as we 4674 * called ire_ftable_lookup_v6 with 4675 * MATCH_IRE_ILL_GROUP. 4676 * Rather than asserting when there is a 4677 * mismatch, we just drop the packet. 4678 */ 4679 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4680 "dst_ill %s ill %s\n", 4681 dst_ill->ill_name, 4682 ill->ill_name)); 4683 goto icmp_err_ret; 4684 } 4685 } else { 4686 dst_ill = ire->ire_ipif->ipif_ill; 4687 /* For uniformity do refhold */ 4688 ill_refhold(dst_ill); 4689 /* 4690 * We should have found a route matching ill as we 4691 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4692 * Rather than asserting, while there is a mismatch, 4693 * we just drop the packet. 4694 */ 4695 if (dst_ill != ill) { 4696 ip0dbg(("ip_newroute_v6: Packet dropped as " 4697 "IP6I_ATTACH_IF ill is %s, " 4698 "ire->ire_ipif->ipif_ill is %s\n", 4699 ill->ill_name, 4700 dst_ill->ill_name)); 4701 goto icmp_err_ret; 4702 } 4703 } 4704 /* 4705 * Pick a source address which matches the scope of the 4706 * destination address. 4707 * For RTF_SETSRC routes, the source address is imposed by the 4708 * parent ire (sire). 4709 */ 4710 ASSERT(src_ipif == NULL); 4711 if (ire->ire_type == IRE_IF_RESOLVER && 4712 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4713 ip6_asp_can_lookup(ipst)) { 4714 /* 4715 * The ire cache entry we're adding is for the 4716 * gateway itself. The source address in this case 4717 * is relative to the gateway's address. 4718 */ 4719 ip6_asp_table_held = B_TRUE; 4720 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4721 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4722 if (src_ipif != NULL) 4723 ire_marks |= IRE_MARK_USESRC_CHECK; 4724 } else { 4725 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4726 /* 4727 * Check that the ipif matching the requested 4728 * source address still exists. 4729 */ 4730 src_ipif = ipif_lookup_addr_v6( 4731 &sire->ire_src_addr_v6, NULL, zoneid, 4732 NULL, NULL, NULL, NULL, ipst); 4733 } 4734 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4735 uint_t restrict_ill = RESTRICT_TO_NONE; 4736 4737 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4738 & IP6I_ATTACH_IF) 4739 restrict_ill = RESTRICT_TO_ILL; 4740 ip6_asp_table_held = B_TRUE; 4741 src_ipif = ipif_select_source_v6(dst_ill, 4742 v6dstp, restrict_ill, 4743 IPV6_PREFER_SRC_DEFAULT, zoneid); 4744 if (src_ipif != NULL) 4745 ire_marks |= IRE_MARK_USESRC_CHECK; 4746 } 4747 } 4748 4749 if (src_ipif == NULL) { 4750 if (ip_debug > 2) { 4751 /* ip1dbg */ 4752 pr_addr_dbg("ip_newroute_v6: no src for " 4753 "dst %s\n, ", AF_INET6, v6dstp); 4754 printf("ip_newroute_v6: interface name %s\n", 4755 dst_ill->ill_name); 4756 } 4757 goto icmp_err_ret; 4758 } 4759 4760 if (ip_debug > 3) { 4761 /* ip2dbg */ 4762 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4763 AF_INET6, &v6gw); 4764 } 4765 ip2dbg(("\tire type %s (%d)\n", 4766 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4767 4768 /* 4769 * At this point in ip_newroute_v6(), ire is either the 4770 * IRE_CACHE of the next-hop gateway for an off-subnet 4771 * destination or an IRE_INTERFACE type that should be used 4772 * to resolve an on-subnet destination or an on-subnet 4773 * next-hop gateway. 4774 * 4775 * In the IRE_CACHE case, we have the following : 4776 * 4777 * 1) src_ipif - used for getting a source address. 4778 * 4779 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4780 * means packets using this IRE_CACHE will go out on dst_ill. 4781 * 4782 * 3) The IRE sire will point to the prefix that is the longest 4783 * matching route for the destination. These prefix types 4784 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4785 * 4786 * The newly created IRE_CACHE entry for the off-subnet 4787 * destination is tied to both the prefix route and the 4788 * interface route used to resolve the next-hop gateway 4789 * via the ire_phandle and ire_ihandle fields, respectively. 4790 * 4791 * In the IRE_INTERFACE case, we have the following : 4792 * 4793 * 1) src_ipif - used for getting a source address. 4794 * 4795 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4796 * means packets using the IRE_CACHE that we will build 4797 * here will go out on dst_ill. 4798 * 4799 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4800 * to be created will only be tied to the IRE_INTERFACE that 4801 * was derived from the ire_ihandle field. 4802 * 4803 * If sire is non-NULL, it means the destination is off-link 4804 * and we will first create the IRE_CACHE for the gateway. 4805 * Next time through ip_newroute_v6, we will create the 4806 * IRE_CACHE for the final destination as described above. 4807 */ 4808 save_ire = ire; 4809 switch (ire->ire_type) { 4810 case IRE_CACHE: { 4811 ire_t *ipif_ire; 4812 4813 ASSERT(sire != NULL); 4814 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4815 mutex_enter(&ire->ire_lock); 4816 v6gw = ire->ire_gateway_addr_v6; 4817 mutex_exit(&ire->ire_lock); 4818 } 4819 /* 4820 * We need 3 ire's to create a new cache ire for an 4821 * off-link destination from the cache ire of the 4822 * gateway. 4823 * 4824 * 1. The prefix ire 'sire' 4825 * 2. The cache ire of the gateway 'ire' 4826 * 3. The interface ire 'ipif_ire' 4827 * 4828 * We have (1) and (2). We lookup (3) below. 4829 * 4830 * If there is no interface route to the gateway, 4831 * it is a race condition, where we found the cache 4832 * but the inteface route has been deleted. 4833 */ 4834 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4835 if (ipif_ire == NULL) { 4836 ip1dbg(("ip_newroute_v6:" 4837 "ire_ihandle_lookup_offlink_v6 failed\n")); 4838 goto icmp_err_ret; 4839 } 4840 /* 4841 * Assume DL_UNITDATA_REQ is same for all physical 4842 * interfaces in the ifgrp. If it isn't, this code will 4843 * have to be seriously rewhacked to allow the 4844 * fastpath probing (such that I cache the link 4845 * header in the IRE_CACHE) to work over ifgrps. 4846 * We have what we need to build an IRE_CACHE. 4847 */ 4848 /* 4849 * Note: the new ire inherits RTF_SETSRC 4850 * and RTF_MULTIRT to propagate these flags from prefix 4851 * to cache. 4852 */ 4853 4854 /* 4855 * Check cached gateway IRE for any security 4856 * attributes; if found, associate the gateway 4857 * credentials group to the destination IRE. 4858 */ 4859 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4860 mutex_enter(&attrp->igsa_lock); 4861 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4862 GCGRP_REFHOLD(gcgrp); 4863 mutex_exit(&attrp->igsa_lock); 4864 } 4865 4866 ire = ire_create_v6( 4867 v6dstp, /* dest address */ 4868 &ipv6_all_ones, /* mask */ 4869 &src_ipif->ipif_v6src_addr, /* source address */ 4870 &v6gw, /* gateway address */ 4871 &save_ire->ire_max_frag, 4872 NULL, /* src nce */ 4873 dst_ill->ill_rq, /* recv-from queue */ 4874 dst_ill->ill_wq, /* send-to queue */ 4875 IRE_CACHE, 4876 src_ipif, 4877 &sire->ire_mask_v6, /* Parent mask */ 4878 sire->ire_phandle, /* Parent handle */ 4879 ipif_ire->ire_ihandle, /* Interface handle */ 4880 sire->ire_flags & /* flags if any */ 4881 (RTF_SETSRC | RTF_MULTIRT), 4882 &(sire->ire_uinfo), 4883 NULL, 4884 gcgrp, 4885 ipst); 4886 4887 if (ire == NULL) { 4888 if (gcgrp != NULL) { 4889 GCGRP_REFRELE(gcgrp); 4890 gcgrp = NULL; 4891 } 4892 ire_refrele(save_ire); 4893 ire_refrele(ipif_ire); 4894 break; 4895 } 4896 4897 /* reference now held by IRE */ 4898 gcgrp = NULL; 4899 4900 ire->ire_marks |= ire_marks; 4901 4902 /* 4903 * Prevent sire and ipif_ire from getting deleted. The 4904 * newly created ire is tied to both of them via the 4905 * phandle and ihandle respectively. 4906 */ 4907 IRB_REFHOLD(sire->ire_bucket); 4908 /* Has it been removed already ? */ 4909 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4910 IRB_REFRELE(sire->ire_bucket); 4911 ire_refrele(ipif_ire); 4912 ire_refrele(save_ire); 4913 break; 4914 } 4915 4916 IRB_REFHOLD(ipif_ire->ire_bucket); 4917 /* Has it been removed already ? */ 4918 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4919 IRB_REFRELE(ipif_ire->ire_bucket); 4920 IRB_REFRELE(sire->ire_bucket); 4921 ire_refrele(ipif_ire); 4922 ire_refrele(save_ire); 4923 break; 4924 } 4925 4926 xmit_mp = first_mp; 4927 if (ire->ire_flags & RTF_MULTIRT) { 4928 copy_mp = copymsg(first_mp); 4929 if (copy_mp != NULL) { 4930 xmit_mp = copy_mp; 4931 MULTIRT_DEBUG_TAG(first_mp); 4932 } 4933 } 4934 ire_add_then_send(q, ire, xmit_mp); 4935 if (ip6_asp_table_held) { 4936 ip6_asp_table_refrele(ipst); 4937 ip6_asp_table_held = B_FALSE; 4938 } 4939 ire_refrele(save_ire); 4940 4941 /* Assert that sire is not deleted yet. */ 4942 ASSERT(sire->ire_ptpn != NULL); 4943 IRB_REFRELE(sire->ire_bucket); 4944 4945 /* Assert that ipif_ire is not deleted yet. */ 4946 ASSERT(ipif_ire->ire_ptpn != NULL); 4947 IRB_REFRELE(ipif_ire->ire_bucket); 4948 ire_refrele(ipif_ire); 4949 4950 if (copy_mp != NULL) { 4951 /* 4952 * Search for the next unresolved 4953 * multirt route. 4954 */ 4955 copy_mp = NULL; 4956 ipif_ire = NULL; 4957 ire = NULL; 4958 /* re-enter the loop */ 4959 multirt_resolve_next = B_TRUE; 4960 continue; 4961 } 4962 ire_refrele(sire); 4963 ill_refrele(dst_ill); 4964 ipif_refrele(src_ipif); 4965 return; 4966 } 4967 case IRE_IF_NORESOLVER: 4968 /* 4969 * We have what we need to build an IRE_CACHE. 4970 * 4971 * handle the Gated case, where we create 4972 * a NORESOLVER route for loopback. 4973 */ 4974 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4975 break; 4976 /* 4977 * TSol note: We are creating the ire cache for the 4978 * destination 'dst'. If 'dst' is offlink, going 4979 * through the first hop 'gw', the security attributes 4980 * of 'dst' must be set to point to the gateway 4981 * credentials of gateway 'gw'. If 'dst' is onlink, it 4982 * is possible that 'dst' is a potential gateway that is 4983 * referenced by some route that has some security 4984 * attributes. Thus in the former case, we need to do a 4985 * gcgrp_lookup of 'gw' while in the latter case we 4986 * need to do gcgrp_lookup of 'dst' itself. 4987 */ 4988 ga.ga_af = AF_INET6; 4989 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4990 ga.ga_addr = v6gw; 4991 else 4992 ga.ga_addr = *v6dstp; 4993 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4994 4995 /* 4996 * Note: the new ire inherits sire flags RTF_SETSRC 4997 * and RTF_MULTIRT to propagate those rules from prefix 4998 * to cache. 4999 */ 5000 ire = ire_create_v6( 5001 v6dstp, /* dest address */ 5002 &ipv6_all_ones, /* mask */ 5003 &src_ipif->ipif_v6src_addr, /* source address */ 5004 &v6gw, /* gateway address */ 5005 &save_ire->ire_max_frag, 5006 NULL, /* no src nce */ 5007 dst_ill->ill_rq, /* recv-from queue */ 5008 dst_ill->ill_wq, /* send-to queue */ 5009 IRE_CACHE, 5010 src_ipif, 5011 &save_ire->ire_mask_v6, /* Parent mask */ 5012 (sire != NULL) ? /* Parent handle */ 5013 sire->ire_phandle : 0, 5014 save_ire->ire_ihandle, /* Interface handle */ 5015 (sire != NULL) ? /* flags if any */ 5016 sire->ire_flags & 5017 (RTF_SETSRC | RTF_MULTIRT) : 0, 5018 &(save_ire->ire_uinfo), 5019 NULL, 5020 gcgrp, 5021 ipst); 5022 5023 if (ire == NULL) { 5024 if (gcgrp != NULL) { 5025 GCGRP_REFRELE(gcgrp); 5026 gcgrp = NULL; 5027 } 5028 ire_refrele(save_ire); 5029 break; 5030 } 5031 5032 /* reference now held by IRE */ 5033 gcgrp = NULL; 5034 5035 ire->ire_marks |= ire_marks; 5036 5037 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5038 dst = v6gw; 5039 else 5040 dst = *v6dstp; 5041 err = ndp_noresolver(dst_ill, &dst); 5042 if (err != 0) { 5043 ire_refrele(save_ire); 5044 break; 5045 } 5046 5047 /* Prevent save_ire from getting deleted */ 5048 IRB_REFHOLD(save_ire->ire_bucket); 5049 /* Has it been removed already ? */ 5050 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5051 IRB_REFRELE(save_ire->ire_bucket); 5052 ire_refrele(save_ire); 5053 break; 5054 } 5055 5056 xmit_mp = first_mp; 5057 /* 5058 * In case of MULTIRT, a copy of the current packet 5059 * to send is made to further re-enter the 5060 * loop and attempt another route resolution 5061 */ 5062 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5063 copy_mp = copymsg(first_mp); 5064 if (copy_mp != NULL) { 5065 xmit_mp = copy_mp; 5066 MULTIRT_DEBUG_TAG(first_mp); 5067 } 5068 } 5069 ire_add_then_send(q, ire, xmit_mp); 5070 if (ip6_asp_table_held) { 5071 ip6_asp_table_refrele(ipst); 5072 ip6_asp_table_held = B_FALSE; 5073 } 5074 5075 /* Assert that it is not deleted yet. */ 5076 ASSERT(save_ire->ire_ptpn != NULL); 5077 IRB_REFRELE(save_ire->ire_bucket); 5078 ire_refrele(save_ire); 5079 5080 if (copy_mp != NULL) { 5081 /* 5082 * If we found a (no)resolver, we ignore any 5083 * trailing top priority IRE_CACHE in 5084 * further loops. This ensures that we do not 5085 * omit any (no)resolver despite the priority 5086 * in this call. 5087 * IRE_CACHE, if any, will be processed 5088 * by another thread entering ip_newroute(), 5089 * (on resolver response, for example). 5090 * We use this to force multiple parallel 5091 * resolution as soon as a packet needs to be 5092 * sent. The result is, after one packet 5093 * emission all reachable routes are generally 5094 * resolved. 5095 * Otherwise, complete resolution of MULTIRT 5096 * routes would require several emissions as 5097 * side effect. 5098 */ 5099 multirt_flags &= ~MULTIRT_CACHEGW; 5100 5101 /* 5102 * Search for the next unresolved multirt 5103 * route. 5104 */ 5105 copy_mp = NULL; 5106 save_ire = NULL; 5107 ire = NULL; 5108 /* re-enter the loop */ 5109 multirt_resolve_next = B_TRUE; 5110 continue; 5111 } 5112 5113 /* Don't need sire anymore */ 5114 if (sire != NULL) 5115 ire_refrele(sire); 5116 ill_refrele(dst_ill); 5117 ipif_refrele(src_ipif); 5118 return; 5119 5120 case IRE_IF_RESOLVER: 5121 /* 5122 * We can't build an IRE_CACHE yet, but at least we 5123 * found a resolver that can help. 5124 */ 5125 dst = *v6dstp; 5126 5127 /* 5128 * To be at this point in the code with a non-zero gw 5129 * means that dst is reachable through a gateway that 5130 * we have never resolved. By changing dst to the gw 5131 * addr we resolve the gateway first. When 5132 * ire_add_then_send() tries to put the IP dg to dst, 5133 * it will reenter ip_newroute() at which time we will 5134 * find the IRE_CACHE for the gw and create another 5135 * IRE_CACHE above (for dst itself). 5136 */ 5137 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5138 save_dst = dst; 5139 dst = v6gw; 5140 v6gw = ipv6_all_zeros; 5141 } 5142 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5143 /* 5144 * Ask the external resolver to do its thing. 5145 * Make an mblk chain in the following form: 5146 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5147 */ 5148 mblk_t *ire_mp; 5149 mblk_t *areq_mp; 5150 areq_t *areq; 5151 in6_addr_t *addrp; 5152 5153 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5154 if (ip6_asp_table_held) { 5155 ip6_asp_table_refrele(ipst); 5156 ip6_asp_table_held = B_FALSE; 5157 } 5158 ire = ire_create_mp_v6( 5159 &dst, /* dest address */ 5160 &ipv6_all_ones, /* mask */ 5161 &src_ipif->ipif_v6src_addr, 5162 /* source address */ 5163 &v6gw, /* gateway address */ 5164 NULL, /* no src nce */ 5165 dst_ill->ill_rq, /* recv-from queue */ 5166 dst_ill->ill_wq, /* send-to queue */ 5167 IRE_CACHE, 5168 src_ipif, 5169 &save_ire->ire_mask_v6, /* Parent mask */ 5170 0, 5171 save_ire->ire_ihandle, 5172 /* Interface handle */ 5173 0, /* flags if any */ 5174 &(save_ire->ire_uinfo), 5175 NULL, 5176 NULL, 5177 ipst); 5178 5179 ire_refrele(save_ire); 5180 if (ire == NULL) { 5181 ip1dbg(("ip_newroute_v6:" 5182 "ire is NULL\n")); 5183 break; 5184 } 5185 5186 if ((sire != NULL) && 5187 (sire->ire_flags & RTF_MULTIRT)) { 5188 /* 5189 * processing a copy of the packet to 5190 * send for further resolution loops 5191 */ 5192 copy_mp = copymsg(first_mp); 5193 if (copy_mp != NULL) 5194 MULTIRT_DEBUG_TAG(copy_mp); 5195 } 5196 ire->ire_marks |= ire_marks; 5197 ire_mp = ire->ire_mp; 5198 /* 5199 * Now create or find an nce for this interface. 5200 * The hw addr will need to to be set from 5201 * the reply to the AR_ENTRY_QUERY that 5202 * we're about to send. This will be done in 5203 * ire_add_v6(). 5204 */ 5205 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5206 switch (err) { 5207 case 0: 5208 /* 5209 * New cache entry created. 5210 * Break, then ask the external 5211 * resolver. 5212 */ 5213 break; 5214 case EINPROGRESS: 5215 /* 5216 * Resolution in progress; 5217 * packet has been queued by 5218 * ndp_resolver(). 5219 */ 5220 ire_delete(ire); 5221 ire = NULL; 5222 /* 5223 * Check if another multirt 5224 * route must be resolved. 5225 */ 5226 if (copy_mp != NULL) { 5227 /* 5228 * If we found a resolver, we 5229 * ignore any trailing top 5230 * priority IRE_CACHE in 5231 * further loops. The reason is 5232 * the same as for noresolver. 5233 */ 5234 multirt_flags &= 5235 ~MULTIRT_CACHEGW; 5236 /* 5237 * Search for the next 5238 * unresolved multirt route. 5239 */ 5240 first_mp = copy_mp; 5241 copy_mp = NULL; 5242 mp = first_mp; 5243 if (mp->b_datap->db_type == 5244 M_CTL) { 5245 mp = mp->b_cont; 5246 } 5247 ASSERT(sire != NULL); 5248 dst = save_dst; 5249 /* 5250 * re-enter the loop 5251 */ 5252 multirt_resolve_next = 5253 B_TRUE; 5254 continue; 5255 } 5256 5257 if (sire != NULL) 5258 ire_refrele(sire); 5259 ill_refrele(dst_ill); 5260 ipif_refrele(src_ipif); 5261 return; 5262 default: 5263 /* 5264 * Transient error; packet will be 5265 * freed. 5266 */ 5267 ire_delete(ire); 5268 ire = NULL; 5269 break; 5270 } 5271 if (err != 0) 5272 break; 5273 /* 5274 * Now set up the AR_ENTRY_QUERY and send it. 5275 */ 5276 areq_mp = ill_arp_alloc(dst_ill, 5277 (uchar_t *)&ipv6_areq_template, 5278 (caddr_t)&dst); 5279 if (areq_mp == NULL) { 5280 ip1dbg(("ip_newroute_v6:" 5281 "areq_mp is NULL\n")); 5282 freemsg(ire_mp); 5283 break; 5284 } 5285 areq = (areq_t *)areq_mp->b_rptr; 5286 addrp = (in6_addr_t *)((char *)areq + 5287 areq->areq_target_addr_offset); 5288 *addrp = dst; 5289 addrp = (in6_addr_t *)((char *)areq + 5290 areq->areq_sender_addr_offset); 5291 *addrp = src_ipif->ipif_v6src_addr; 5292 /* 5293 * link the chain, then send up to the resolver. 5294 */ 5295 linkb(areq_mp, ire_mp); 5296 linkb(areq_mp, mp); 5297 ip1dbg(("ip_newroute_v6:" 5298 "putnext to resolver\n")); 5299 putnext(dst_ill->ill_rq, areq_mp); 5300 /* 5301 * Check if another multirt route 5302 * must be resolved. 5303 */ 5304 ire = NULL; 5305 if (copy_mp != NULL) { 5306 /* 5307 * If we find a resolver, we ignore any 5308 * trailing top priority IRE_CACHE in 5309 * further loops. The reason is the 5310 * same as for noresolver. 5311 */ 5312 multirt_flags &= ~MULTIRT_CACHEGW; 5313 /* 5314 * Search for the next unresolved 5315 * multirt route. 5316 */ 5317 first_mp = copy_mp; 5318 copy_mp = NULL; 5319 mp = first_mp; 5320 if (mp->b_datap->db_type == M_CTL) { 5321 mp = mp->b_cont; 5322 } 5323 ASSERT(sire != NULL); 5324 dst = save_dst; 5325 /* 5326 * re-enter the loop 5327 */ 5328 multirt_resolve_next = B_TRUE; 5329 continue; 5330 } 5331 5332 if (sire != NULL) 5333 ire_refrele(sire); 5334 ill_refrele(dst_ill); 5335 ipif_refrele(src_ipif); 5336 return; 5337 } 5338 /* 5339 * Non-external resolver case. 5340 * 5341 * TSol note: Please see the note above the 5342 * IRE_IF_NORESOLVER case. 5343 */ 5344 ga.ga_af = AF_INET6; 5345 ga.ga_addr = dst; 5346 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5347 5348 ire = ire_create_v6( 5349 &dst, /* dest address */ 5350 &ipv6_all_ones, /* mask */ 5351 &src_ipif->ipif_v6src_addr, /* source address */ 5352 &v6gw, /* gateway address */ 5353 &save_ire->ire_max_frag, 5354 NULL, /* no src nce */ 5355 dst_ill->ill_rq, /* recv-from queue */ 5356 dst_ill->ill_wq, /* send-to queue */ 5357 IRE_CACHE, 5358 src_ipif, 5359 &save_ire->ire_mask_v6, /* Parent mask */ 5360 0, 5361 save_ire->ire_ihandle, /* Interface handle */ 5362 0, /* flags if any */ 5363 &(save_ire->ire_uinfo), 5364 NULL, 5365 gcgrp, 5366 ipst); 5367 5368 if (ire == NULL) { 5369 if (gcgrp != NULL) { 5370 GCGRP_REFRELE(gcgrp); 5371 gcgrp = NULL; 5372 } 5373 ire_refrele(save_ire); 5374 break; 5375 } 5376 5377 /* reference now held by IRE */ 5378 gcgrp = NULL; 5379 5380 if ((sire != NULL) && 5381 (sire->ire_flags & RTF_MULTIRT)) { 5382 copy_mp = copymsg(first_mp); 5383 if (copy_mp != NULL) 5384 MULTIRT_DEBUG_TAG(copy_mp); 5385 } 5386 5387 ire->ire_marks |= ire_marks; 5388 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5389 switch (err) { 5390 case 0: 5391 /* Prevent save_ire from getting deleted */ 5392 IRB_REFHOLD(save_ire->ire_bucket); 5393 /* Has it been removed already ? */ 5394 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5395 IRB_REFRELE(save_ire->ire_bucket); 5396 ire_refrele(save_ire); 5397 break; 5398 } 5399 5400 /* 5401 * We have a resolved cache entry, 5402 * add in the IRE. 5403 */ 5404 ire_add_then_send(q, ire, first_mp); 5405 if (ip6_asp_table_held) { 5406 ip6_asp_table_refrele(ipst); 5407 ip6_asp_table_held = B_FALSE; 5408 } 5409 5410 /* Assert that it is not deleted yet. */ 5411 ASSERT(save_ire->ire_ptpn != NULL); 5412 IRB_REFRELE(save_ire->ire_bucket); 5413 ire_refrele(save_ire); 5414 /* 5415 * Check if another multirt route 5416 * must be resolved. 5417 */ 5418 ire = NULL; 5419 if (copy_mp != NULL) { 5420 /* 5421 * If we find a resolver, we ignore any 5422 * trailing top priority IRE_CACHE in 5423 * further loops. The reason is the 5424 * same as for noresolver. 5425 */ 5426 multirt_flags &= ~MULTIRT_CACHEGW; 5427 /* 5428 * Search for the next unresolved 5429 * multirt route. 5430 */ 5431 first_mp = copy_mp; 5432 copy_mp = NULL; 5433 mp = first_mp; 5434 if (mp->b_datap->db_type == M_CTL) { 5435 mp = mp->b_cont; 5436 } 5437 ASSERT(sire != NULL); 5438 dst = save_dst; 5439 /* 5440 * re-enter the loop 5441 */ 5442 multirt_resolve_next = B_TRUE; 5443 continue; 5444 } 5445 5446 if (sire != NULL) 5447 ire_refrele(sire); 5448 ill_refrele(dst_ill); 5449 ipif_refrele(src_ipif); 5450 return; 5451 5452 case EINPROGRESS: 5453 /* 5454 * mp was consumed - presumably queued. 5455 * No need for ire, presumably resolution is 5456 * in progress, and ire will be added when the 5457 * address is resolved. 5458 */ 5459 if (ip6_asp_table_held) { 5460 ip6_asp_table_refrele(ipst); 5461 ip6_asp_table_held = B_FALSE; 5462 } 5463 ASSERT(ire->ire_nce == NULL); 5464 ire_delete(ire); 5465 ire_refrele(save_ire); 5466 /* 5467 * Check if another multirt route 5468 * must be resolved. 5469 */ 5470 ire = NULL; 5471 if (copy_mp != NULL) { 5472 /* 5473 * If we find a resolver, we ignore any 5474 * trailing top priority IRE_CACHE in 5475 * further loops. The reason is the 5476 * same as for noresolver. 5477 */ 5478 multirt_flags &= ~MULTIRT_CACHEGW; 5479 /* 5480 * Search for the next unresolved 5481 * multirt route. 5482 */ 5483 first_mp = copy_mp; 5484 copy_mp = NULL; 5485 mp = first_mp; 5486 if (mp->b_datap->db_type == M_CTL) { 5487 mp = mp->b_cont; 5488 } 5489 ASSERT(sire != NULL); 5490 dst = save_dst; 5491 /* 5492 * re-enter the loop 5493 */ 5494 multirt_resolve_next = B_TRUE; 5495 continue; 5496 } 5497 if (sire != NULL) 5498 ire_refrele(sire); 5499 ill_refrele(dst_ill); 5500 ipif_refrele(src_ipif); 5501 return; 5502 default: 5503 /* Some transient error */ 5504 ASSERT(ire->ire_nce == NULL); 5505 ire_refrele(save_ire); 5506 break; 5507 } 5508 break; 5509 default: 5510 break; 5511 } 5512 if (ip6_asp_table_held) { 5513 ip6_asp_table_refrele(ipst); 5514 ip6_asp_table_held = B_FALSE; 5515 } 5516 } while (multirt_resolve_next); 5517 5518 err_ret: 5519 ip1dbg(("ip_newroute_v6: dropped\n")); 5520 if (src_ipif != NULL) 5521 ipif_refrele(src_ipif); 5522 if (dst_ill != NULL) { 5523 need_rele = B_TRUE; 5524 ill = dst_ill; 5525 } 5526 if (ill != NULL) { 5527 if (mp->b_prev != NULL) { 5528 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5529 } else { 5530 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5531 } 5532 5533 if (need_rele) 5534 ill_refrele(ill); 5535 } else { 5536 if (mp->b_prev != NULL) { 5537 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5538 } else { 5539 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5540 } 5541 } 5542 /* Did this packet originate externally? */ 5543 if (mp->b_prev) { 5544 mp->b_next = NULL; 5545 mp->b_prev = NULL; 5546 } 5547 if (copy_mp != NULL) { 5548 MULTIRT_DEBUG_UNTAG(copy_mp); 5549 freemsg(copy_mp); 5550 } 5551 MULTIRT_DEBUG_UNTAG(first_mp); 5552 freemsg(first_mp); 5553 if (ire != NULL) 5554 ire_refrele(ire); 5555 if (sire != NULL) 5556 ire_refrele(sire); 5557 return; 5558 5559 icmp_err_ret: 5560 if (ip6_asp_table_held) 5561 ip6_asp_table_refrele(ipst); 5562 if (src_ipif != NULL) 5563 ipif_refrele(src_ipif); 5564 if (dst_ill != NULL) { 5565 need_rele = B_TRUE; 5566 ill = dst_ill; 5567 } 5568 ip1dbg(("ip_newroute_v6: no route\n")); 5569 if (sire != NULL) 5570 ire_refrele(sire); 5571 /* 5572 * We need to set sire to NULL to avoid double freeing if we 5573 * ever goto err_ret from below. 5574 */ 5575 sire = NULL; 5576 ip6h = (ip6_t *)mp->b_rptr; 5577 /* Skip ip6i_t header if present */ 5578 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5579 /* Make sure the IPv6 header is present */ 5580 if ((mp->b_wptr - (uchar_t *)ip6h) < 5581 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5582 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5583 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5584 goto err_ret; 5585 } 5586 } 5587 mp->b_rptr += sizeof (ip6i_t); 5588 ip6h = (ip6_t *)mp->b_rptr; 5589 } 5590 /* Did this packet originate externally? */ 5591 if (mp->b_prev) { 5592 if (ill != NULL) { 5593 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5594 } else { 5595 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5596 } 5597 mp->b_next = NULL; 5598 mp->b_prev = NULL; 5599 q = WR(q); 5600 } else { 5601 if (ill != NULL) { 5602 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5603 } else { 5604 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5605 } 5606 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5607 /* Failed */ 5608 if (copy_mp != NULL) { 5609 MULTIRT_DEBUG_UNTAG(copy_mp); 5610 freemsg(copy_mp); 5611 } 5612 MULTIRT_DEBUG_UNTAG(first_mp); 5613 freemsg(first_mp); 5614 if (ire != NULL) 5615 ire_refrele(ire); 5616 if (need_rele) 5617 ill_refrele(ill); 5618 return; 5619 } 5620 } 5621 5622 if (need_rele) 5623 ill_refrele(ill); 5624 5625 /* 5626 * At this point we will have ire only if RTF_BLACKHOLE 5627 * or RTF_REJECT flags are set on the IRE. It will not 5628 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5629 */ 5630 if (ire != NULL) { 5631 if (ire->ire_flags & RTF_BLACKHOLE) { 5632 ire_refrele(ire); 5633 if (copy_mp != NULL) { 5634 MULTIRT_DEBUG_UNTAG(copy_mp); 5635 freemsg(copy_mp); 5636 } 5637 MULTIRT_DEBUG_UNTAG(first_mp); 5638 freemsg(first_mp); 5639 return; 5640 } 5641 ire_refrele(ire); 5642 } 5643 if (ip_debug > 3) { 5644 /* ip2dbg */ 5645 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5646 AF_INET6, v6dstp); 5647 } 5648 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5649 B_FALSE, B_FALSE, zoneid, ipst); 5650 } 5651 5652 /* 5653 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5654 * we need to send out a packet to a destination address for which we do not 5655 * have specific routing information. It is only used for multicast packets. 5656 * 5657 * If unspec_src we allow creating an IRE with source address zero. 5658 * ire_send_v6() will delete it after the packet is sent. 5659 */ 5660 void 5661 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5662 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5663 { 5664 ire_t *ire = NULL; 5665 ipif_t *src_ipif = NULL; 5666 int err = 0; 5667 ill_t *dst_ill = NULL; 5668 ire_t *save_ire; 5669 ushort_t ire_marks = 0; 5670 ipsec_out_t *io; 5671 ill_t *attach_ill = NULL; 5672 ill_t *ill; 5673 ip6_t *ip6h; 5674 mblk_t *first_mp; 5675 boolean_t ip6i_present; 5676 ire_t *fire = NULL; 5677 mblk_t *copy_mp = NULL; 5678 boolean_t multirt_resolve_next; 5679 in6_addr_t *v6dstp = &v6dst; 5680 boolean_t ipif_held = B_FALSE; 5681 boolean_t ill_held = B_FALSE; 5682 boolean_t ip6_asp_table_held = B_FALSE; 5683 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5684 5685 /* 5686 * This loop is run only once in most cases. 5687 * We loop to resolve further routes only when the destination 5688 * can be reached through multiple RTF_MULTIRT-flagged ires. 5689 */ 5690 do { 5691 multirt_resolve_next = B_FALSE; 5692 if (dst_ill != NULL) { 5693 ill_refrele(dst_ill); 5694 dst_ill = NULL; 5695 } 5696 5697 if (src_ipif != NULL) { 5698 ipif_refrele(src_ipif); 5699 src_ipif = NULL; 5700 } 5701 ASSERT(ipif != NULL); 5702 ill = ipif->ipif_ill; 5703 5704 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5705 if (ip_debug > 2) { 5706 /* ip1dbg */ 5707 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5708 AF_INET6, v6dstp); 5709 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5710 ill->ill_name, ipif->ipif_isv6); 5711 } 5712 5713 first_mp = mp; 5714 if (mp->b_datap->db_type == M_CTL) { 5715 mp = mp->b_cont; 5716 io = (ipsec_out_t *)first_mp->b_rptr; 5717 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5718 } else { 5719 io = NULL; 5720 } 5721 5722 /* 5723 * If the interface is a pt-pt interface we look for an 5724 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5725 * local_address and the pt-pt destination address. 5726 * Otherwise we just match the local address. 5727 */ 5728 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5729 goto err_ret; 5730 } 5731 /* 5732 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5733 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5734 * as it could be NULL. 5735 * 5736 * This information can appear either in an ip6i_t or an 5737 * IPSEC_OUT message. 5738 */ 5739 ip6h = (ip6_t *)mp->b_rptr; 5740 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5741 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5742 if (!ip6i_present || 5743 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5744 attach_ill = ip_grab_attach_ill(ill, first_mp, 5745 (ip6i_present ? 5746 ((ip6i_t *)ip6h)->ip6i_ifindex : 5747 io->ipsec_out_ill_index), B_TRUE, ipst); 5748 /* Failure case frees things for us. */ 5749 if (attach_ill == NULL) 5750 return; 5751 5752 /* 5753 * Check if we need an ire that will not be 5754 * looked up by anybody else i.e. HIDDEN. 5755 */ 5756 if (ill_is_probeonly(attach_ill)) 5757 ire_marks = IRE_MARK_HIDDEN; 5758 } 5759 } 5760 5761 /* 5762 * We check if an IRE_OFFSUBNET for the addr that goes through 5763 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5764 * RTF_MULTIRT flags must be honored. 5765 */ 5766 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5767 ip2dbg(("ip_newroute_ipif_v6: " 5768 "ipif_lookup_multi_ire_v6(" 5769 "ipif %p, dst %08x) = fire %p\n", 5770 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5771 (void *)fire)); 5772 5773 /* 5774 * If the application specified the ill (ifindex), we still 5775 * load spread. Only if the packets needs to go out specifically 5776 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5777 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5778 * multirouting, then we don't try to use a different ill for 5779 * load spreading. 5780 */ 5781 if (attach_ill == NULL) { 5782 /* 5783 * If the interface belongs to an interface group, 5784 * make sure the next possible interface in the group 5785 * is used. This encourages load spreading among peers 5786 * in an interface group. 5787 * 5788 * Note: While we pick a dst_ill we are really only 5789 * interested in the ill for load spreading. The source 5790 * ipif is determined by source address selection below. 5791 */ 5792 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5793 dst_ill = ipif->ipif_ill; 5794 /* For uniformity do a refhold */ 5795 ill_refhold(dst_ill); 5796 } else { 5797 /* refheld by ip_newroute_get_dst_ill_v6 */ 5798 dst_ill = 5799 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5800 } 5801 if (dst_ill == NULL) { 5802 if (ip_debug > 2) { 5803 pr_addr_dbg("ip_newroute_ipif_v6: " 5804 "no dst ill for dst %s\n", 5805 AF_INET6, v6dstp); 5806 } 5807 goto err_ret; 5808 } 5809 } else { 5810 dst_ill = ipif->ipif_ill; 5811 /* 5812 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5813 * and IPV6_BOUND_PIF case. 5814 */ 5815 ASSERT(dst_ill == attach_ill); 5816 /* attach_ill is already refheld */ 5817 } 5818 /* 5819 * Pick a source address which matches the scope of the 5820 * destination address. 5821 * For RTF_SETSRC routes, the source address is imposed by the 5822 * parent ire (fire). 5823 */ 5824 ASSERT(src_ipif == NULL); 5825 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5826 /* 5827 * Check that the ipif matching the requested source 5828 * address still exists. 5829 */ 5830 src_ipif = 5831 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5832 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5833 } 5834 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5835 uint_t restrict_ill = RESTRICT_TO_NONE; 5836 5837 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 5838 & IP6I_ATTACH_IF) 5839 restrict_ill = RESTRICT_TO_ILL; 5840 ip6_asp_table_held = B_TRUE; 5841 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5842 restrict_ill, IPV6_PREFER_SRC_DEFAULT, zoneid); 5843 } 5844 5845 if (src_ipif == NULL) { 5846 if (!unspec_src) { 5847 if (ip_debug > 2) { 5848 /* ip1dbg */ 5849 pr_addr_dbg("ip_newroute_ipif_v6: " 5850 "no src for dst %s\n,", 5851 AF_INET6, v6dstp); 5852 printf(" through interface %s\n", 5853 dst_ill->ill_name); 5854 } 5855 goto err_ret; 5856 } 5857 src_ipif = ipif; 5858 ipif_refhold(src_ipif); 5859 } 5860 ire = ipif_to_ire_v6(ipif); 5861 if (ire == NULL) { 5862 if (ip_debug > 2) { 5863 /* ip1dbg */ 5864 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5865 AF_INET6, &ipif->ipif_v6lcl_addr); 5866 printf("ip_newroute_ipif_v6: " 5867 "if %s\n", dst_ill->ill_name); 5868 } 5869 goto err_ret; 5870 } 5871 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5872 goto err_ret; 5873 5874 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5875 5876 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5877 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5878 if (ip_debug > 2) { 5879 /* ip1dbg */ 5880 pr_addr_dbg(" address %s\n", 5881 AF_INET6, &ire->ire_src_addr_v6); 5882 } 5883 save_ire = ire; 5884 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5885 (void *)ire, (void *)ipif)); 5886 5887 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5888 /* 5889 * an IRE_OFFSUBET was looked up 5890 * on that interface. 5891 * this ire has RTF_MULTIRT flag, 5892 * so the resolution loop 5893 * will be re-entered to resolve 5894 * additional routes on other 5895 * interfaces. For that purpose, 5896 * a copy of the packet is 5897 * made at this point. 5898 */ 5899 fire->ire_last_used_time = lbolt; 5900 copy_mp = copymsg(first_mp); 5901 if (copy_mp) { 5902 MULTIRT_DEBUG_TAG(copy_mp); 5903 } 5904 } 5905 5906 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5907 switch (ire->ire_type) { 5908 case IRE_IF_NORESOLVER: { 5909 /* 5910 * We have what we need to build an IRE_CACHE. 5911 * 5912 * handle the Gated case, where we create 5913 * a NORESOLVER route for loopback. 5914 */ 5915 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5916 break; 5917 /* 5918 * The newly created ire will inherit the flags of the 5919 * parent ire, if any. 5920 */ 5921 ire = ire_create_v6( 5922 v6dstp, /* dest address */ 5923 &ipv6_all_ones, /* mask */ 5924 &src_ipif->ipif_v6src_addr, /* source address */ 5925 NULL, /* gateway address */ 5926 &save_ire->ire_max_frag, 5927 NULL, /* no src nce */ 5928 dst_ill->ill_rq, /* recv-from queue */ 5929 dst_ill->ill_wq, /* send-to queue */ 5930 IRE_CACHE, 5931 src_ipif, 5932 NULL, 5933 (fire != NULL) ? /* Parent handle */ 5934 fire->ire_phandle : 0, 5935 save_ire->ire_ihandle, /* Interface handle */ 5936 (fire != NULL) ? 5937 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5938 0, 5939 &ire_uinfo_null, 5940 NULL, 5941 NULL, 5942 ipst); 5943 5944 if (ire == NULL) { 5945 ire_refrele(save_ire); 5946 break; 5947 } 5948 5949 ire->ire_marks |= ire_marks; 5950 5951 err = ndp_noresolver(dst_ill, v6dstp); 5952 if (err != 0) { 5953 ire_refrele(save_ire); 5954 break; 5955 } 5956 5957 /* Prevent save_ire from getting deleted */ 5958 IRB_REFHOLD(save_ire->ire_bucket); 5959 /* Has it been removed already ? */ 5960 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5961 IRB_REFRELE(save_ire->ire_bucket); 5962 ire_refrele(save_ire); 5963 break; 5964 } 5965 5966 ire_add_then_send(q, ire, first_mp); 5967 if (ip6_asp_table_held) { 5968 ip6_asp_table_refrele(ipst); 5969 ip6_asp_table_held = B_FALSE; 5970 } 5971 5972 /* Assert that it is not deleted yet. */ 5973 ASSERT(save_ire->ire_ptpn != NULL); 5974 IRB_REFRELE(save_ire->ire_bucket); 5975 ire_refrele(save_ire); 5976 if (fire != NULL) { 5977 ire_refrele(fire); 5978 fire = NULL; 5979 } 5980 5981 /* 5982 * The resolution loop is re-entered if we 5983 * actually are in a multirouting case. 5984 */ 5985 if (copy_mp != NULL) { 5986 boolean_t need_resolve = 5987 ire_multirt_need_resolve_v6(v6dstp, 5988 MBLK_GETLABEL(copy_mp), ipst); 5989 if (!need_resolve) { 5990 MULTIRT_DEBUG_UNTAG(copy_mp); 5991 freemsg(copy_mp); 5992 copy_mp = NULL; 5993 } else { 5994 /* 5995 * ipif_lookup_group_v6() calls 5996 * ire_lookup_multi_v6() that uses 5997 * ire_ftable_lookup_v6() to find 5998 * an IRE_INTERFACE for the group. 5999 * In the multirt case, 6000 * ire_lookup_multi_v6() then invokes 6001 * ire_multirt_lookup_v6() to find 6002 * the next resolvable ire. 6003 * As a result, we obtain a new 6004 * interface, derived from the 6005 * next ire. 6006 */ 6007 if (ipif_held) { 6008 ipif_refrele(ipif); 6009 ipif_held = B_FALSE; 6010 } 6011 ipif = ipif_lookup_group_v6(v6dstp, 6012 zoneid, ipst); 6013 ip2dbg(("ip_newroute_ipif: " 6014 "multirt dst %08x, ipif %p\n", 6015 ntohl(V4_PART_OF_V6((*v6dstp))), 6016 (void *)ipif)); 6017 if (ipif != NULL) { 6018 ipif_held = B_TRUE; 6019 mp = copy_mp; 6020 copy_mp = NULL; 6021 multirt_resolve_next = 6022 B_TRUE; 6023 continue; 6024 } else { 6025 freemsg(copy_mp); 6026 } 6027 } 6028 } 6029 ill_refrele(dst_ill); 6030 if (ipif_held) { 6031 ipif_refrele(ipif); 6032 ipif_held = B_FALSE; 6033 } 6034 if (src_ipif != NULL) 6035 ipif_refrele(src_ipif); 6036 return; 6037 } 6038 case IRE_IF_RESOLVER: { 6039 6040 ASSERT(dst_ill->ill_isv6); 6041 6042 /* 6043 * We obtain a partial IRE_CACHE which we will pass 6044 * along with the resolver query. When the response 6045 * comes back it will be there ready for us to add. 6046 */ 6047 /* 6048 * the newly created ire will inherit the flags of the 6049 * parent ire, if any. 6050 */ 6051 ire = ire_create_v6( 6052 v6dstp, /* dest address */ 6053 &ipv6_all_ones, /* mask */ 6054 &src_ipif->ipif_v6src_addr, /* source address */ 6055 NULL, /* gateway address */ 6056 &save_ire->ire_max_frag, 6057 NULL, /* src nce */ 6058 dst_ill->ill_rq, /* recv-from queue */ 6059 dst_ill->ill_wq, /* send-to queue */ 6060 IRE_CACHE, 6061 src_ipif, 6062 NULL, 6063 (fire != NULL) ? /* Parent handle */ 6064 fire->ire_phandle : 0, 6065 save_ire->ire_ihandle, /* Interface handle */ 6066 (fire != NULL) ? 6067 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6068 0, 6069 &ire_uinfo_null, 6070 NULL, 6071 NULL, 6072 ipst); 6073 6074 if (ire == NULL) { 6075 ire_refrele(save_ire); 6076 break; 6077 } 6078 6079 ire->ire_marks |= ire_marks; 6080 6081 /* Resolve and add ire to the ctable */ 6082 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6083 switch (err) { 6084 case 0: 6085 /* Prevent save_ire from getting deleted */ 6086 IRB_REFHOLD(save_ire->ire_bucket); 6087 /* Has it been removed already ? */ 6088 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6089 IRB_REFRELE(save_ire->ire_bucket); 6090 ire_refrele(save_ire); 6091 break; 6092 } 6093 /* 6094 * We have a resolved cache entry, 6095 * add in the IRE. 6096 */ 6097 ire_add_then_send(q, ire, first_mp); 6098 if (ip6_asp_table_held) { 6099 ip6_asp_table_refrele(ipst); 6100 ip6_asp_table_held = B_FALSE; 6101 } 6102 6103 /* Assert that it is not deleted yet. */ 6104 ASSERT(save_ire->ire_ptpn != NULL); 6105 IRB_REFRELE(save_ire->ire_bucket); 6106 ire_refrele(save_ire); 6107 if (fire != NULL) { 6108 ire_refrele(fire); 6109 fire = NULL; 6110 } 6111 6112 /* 6113 * The resolution loop is re-entered if we 6114 * actually are in a multirouting case. 6115 */ 6116 if (copy_mp != NULL) { 6117 boolean_t need_resolve = 6118 ire_multirt_need_resolve_v6(v6dstp, 6119 MBLK_GETLABEL(copy_mp), ipst); 6120 if (!need_resolve) { 6121 MULTIRT_DEBUG_UNTAG(copy_mp); 6122 freemsg(copy_mp); 6123 copy_mp = NULL; 6124 } else { 6125 /* 6126 * ipif_lookup_group_v6() calls 6127 * ire_lookup_multi_v6() that 6128 * uses ire_ftable_lookup_v6() 6129 * to find an IRE_INTERFACE for 6130 * the group. In the multirt 6131 * case, ire_lookup_multi_v6() 6132 * then invokes 6133 * ire_multirt_lookup_v6() to 6134 * find the next resolvable ire. 6135 * As a result, we obtain a new 6136 * interface, derived from the 6137 * next ire. 6138 */ 6139 if (ipif_held) { 6140 ipif_refrele(ipif); 6141 ipif_held = B_FALSE; 6142 } 6143 ipif = ipif_lookup_group_v6( 6144 v6dstp, zoneid, ipst); 6145 ip2dbg(("ip_newroute_ipif: " 6146 "multirt dst %08x, " 6147 "ipif %p\n", 6148 ntohl(V4_PART_OF_V6( 6149 (*v6dstp))), 6150 (void *)ipif)); 6151 if (ipif != NULL) { 6152 ipif_held = B_TRUE; 6153 mp = copy_mp; 6154 copy_mp = NULL; 6155 multirt_resolve_next = 6156 B_TRUE; 6157 continue; 6158 } else { 6159 freemsg(copy_mp); 6160 } 6161 } 6162 } 6163 ill_refrele(dst_ill); 6164 if (ipif_held) { 6165 ipif_refrele(ipif); 6166 ipif_held = B_FALSE; 6167 } 6168 if (src_ipif != NULL) 6169 ipif_refrele(src_ipif); 6170 return; 6171 6172 case EINPROGRESS: 6173 /* 6174 * mp was consumed - presumably queued. 6175 * No need for ire, presumably resolution is 6176 * in progress, and ire will be added when the 6177 * address is resolved. 6178 */ 6179 if (ip6_asp_table_held) { 6180 ip6_asp_table_refrele(ipst); 6181 ip6_asp_table_held = B_FALSE; 6182 } 6183 ire_delete(ire); 6184 ire_refrele(save_ire); 6185 if (fire != NULL) { 6186 ire_refrele(fire); 6187 fire = NULL; 6188 } 6189 6190 /* 6191 * The resolution loop is re-entered if we 6192 * actually are in a multirouting case. 6193 */ 6194 if (copy_mp != NULL) { 6195 boolean_t need_resolve = 6196 ire_multirt_need_resolve_v6(v6dstp, 6197 MBLK_GETLABEL(copy_mp), ipst); 6198 if (!need_resolve) { 6199 MULTIRT_DEBUG_UNTAG(copy_mp); 6200 freemsg(copy_mp); 6201 copy_mp = NULL; 6202 } else { 6203 /* 6204 * ipif_lookup_group_v6() calls 6205 * ire_lookup_multi_v6() that 6206 * uses ire_ftable_lookup_v6() 6207 * to find an IRE_INTERFACE for 6208 * the group. In the multirt 6209 * case, ire_lookup_multi_v6() 6210 * then invokes 6211 * ire_multirt_lookup_v6() to 6212 * find the next resolvable ire. 6213 * As a result, we obtain a new 6214 * interface, derived from the 6215 * next ire. 6216 */ 6217 if (ipif_held) { 6218 ipif_refrele(ipif); 6219 ipif_held = B_FALSE; 6220 } 6221 ipif = ipif_lookup_group_v6( 6222 v6dstp, zoneid, ipst); 6223 ip2dbg(("ip_newroute_ipif: " 6224 "multirt dst %08x, " 6225 "ipif %p\n", 6226 ntohl(V4_PART_OF_V6( 6227 (*v6dstp))), 6228 (void *)ipif)); 6229 if (ipif != NULL) { 6230 ipif_held = B_TRUE; 6231 mp = copy_mp; 6232 copy_mp = NULL; 6233 multirt_resolve_next = 6234 B_TRUE; 6235 continue; 6236 } else { 6237 freemsg(copy_mp); 6238 } 6239 } 6240 } 6241 ill_refrele(dst_ill); 6242 if (ipif_held) { 6243 ipif_refrele(ipif); 6244 ipif_held = B_FALSE; 6245 } 6246 if (src_ipif != NULL) 6247 ipif_refrele(src_ipif); 6248 return; 6249 default: 6250 /* Some transient error */ 6251 ire_refrele(save_ire); 6252 break; 6253 } 6254 break; 6255 } 6256 default: 6257 break; 6258 } 6259 if (ip6_asp_table_held) { 6260 ip6_asp_table_refrele(ipst); 6261 ip6_asp_table_held = B_FALSE; 6262 } 6263 } while (multirt_resolve_next); 6264 6265 err_ret: 6266 if (ip6_asp_table_held) 6267 ip6_asp_table_refrele(ipst); 6268 if (ire != NULL) 6269 ire_refrele(ire); 6270 if (fire != NULL) 6271 ire_refrele(fire); 6272 if (ipif != NULL && ipif_held) 6273 ipif_refrele(ipif); 6274 if (src_ipif != NULL) 6275 ipif_refrele(src_ipif); 6276 /* Multicast - no point in trying to generate ICMP error */ 6277 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6278 if (dst_ill != NULL) { 6279 ill = dst_ill; 6280 ill_held = B_TRUE; 6281 } 6282 if (mp->b_prev || mp->b_next) { 6283 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6284 } else { 6285 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6286 } 6287 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6288 mp->b_next = NULL; 6289 mp->b_prev = NULL; 6290 freemsg(first_mp); 6291 if (ill_held) 6292 ill_refrele(ill); 6293 } 6294 6295 /* 6296 * Parse and process any hop-by-hop or destination options. 6297 * 6298 * Assumes that q is an ill read queue so that ICMP errors for link-local 6299 * destinations are sent out the correct interface. 6300 * 6301 * Returns -1 if there was an error and mp has been consumed. 6302 * Returns 0 if no special action is needed. 6303 * Returns 1 if the packet contained a router alert option for this node 6304 * which is verified to be "interesting/known" for our implementation. 6305 * 6306 * XXX Note: In future as more hbh or dest options are defined, 6307 * it may be better to have different routines for hbh and dest 6308 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6309 * may have same value in different namespaces. Or is it same namespace ?? 6310 * Current code checks for each opt_type (other than pads) if it is in 6311 * the expected nexthdr (hbh or dest) 6312 */ 6313 static int 6314 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6315 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6316 { 6317 uint8_t opt_type; 6318 uint_t optused; 6319 int ret = 0; 6320 mblk_t *first_mp; 6321 const char *errtype; 6322 zoneid_t zoneid; 6323 ill_t *ill = q->q_ptr; 6324 ipif_t *ipif; 6325 6326 first_mp = mp; 6327 if (mp->b_datap->db_type == M_CTL) { 6328 mp = mp->b_cont; 6329 } 6330 6331 while (optlen != 0) { 6332 opt_type = *optptr; 6333 if (opt_type == IP6OPT_PAD1) { 6334 optused = 1; 6335 } else { 6336 if (optlen < 2) 6337 goto bad_opt; 6338 errtype = "malformed"; 6339 if (opt_type == ip6opt_ls) { 6340 optused = 2 + optptr[1]; 6341 if (optused > optlen) 6342 goto bad_opt; 6343 } else switch (opt_type) { 6344 case IP6OPT_PADN: 6345 /* 6346 * Note:We don't verify that (N-2) pad octets 6347 * are zero as required by spec. Adhere to 6348 * "be liberal in what you accept..." part of 6349 * implementation philosophy (RFC791,RFC1122) 6350 */ 6351 optused = 2 + optptr[1]; 6352 if (optused > optlen) 6353 goto bad_opt; 6354 break; 6355 6356 case IP6OPT_JUMBO: 6357 if (hdr_type != IPPROTO_HOPOPTS) 6358 goto opt_error; 6359 goto opt_error; /* XXX Not implemented! */ 6360 6361 case IP6OPT_ROUTER_ALERT: { 6362 struct ip6_opt_router *or; 6363 6364 if (hdr_type != IPPROTO_HOPOPTS) 6365 goto opt_error; 6366 optused = 2 + optptr[1]; 6367 if (optused > optlen) 6368 goto bad_opt; 6369 or = (struct ip6_opt_router *)optptr; 6370 /* Check total length and alignment */ 6371 if (optused != sizeof (*or) || 6372 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6373 goto opt_error; 6374 /* Check value */ 6375 switch (*((uint16_t *)or->ip6or_value)) { 6376 case IP6_ALERT_MLD: 6377 case IP6_ALERT_RSVP: 6378 ret = 1; 6379 } 6380 break; 6381 } 6382 case IP6OPT_HOME_ADDRESS: { 6383 /* 6384 * Minimal support for the home address option 6385 * (which is required by all IPv6 nodes). 6386 * Implement by just swapping the home address 6387 * and source address. 6388 * XXX Note: this has IPsec implications since 6389 * AH needs to take this into account. 6390 * Also, when IPsec is used we need to ensure 6391 * that this is only processed once 6392 * in the received packet (to avoid swapping 6393 * back and forth). 6394 * NOTE:This option processing is considered 6395 * to be unsafe and prone to a denial of 6396 * service attack. 6397 * The current processing is not safe even with 6398 * IPsec secured IP packets. Since the home 6399 * address option processing requirement still 6400 * is in the IETF draft and in the process of 6401 * being redefined for its usage, it has been 6402 * decided to turn off the option by default. 6403 * If this section of code needs to be executed, 6404 * ndd variable ip6_ignore_home_address_opt 6405 * should be set to 0 at the user's own risk. 6406 */ 6407 struct ip6_opt_home_address *oh; 6408 in6_addr_t tmp; 6409 6410 if (ipst->ips_ipv6_ignore_home_address_opt) 6411 goto opt_error; 6412 6413 if (hdr_type != IPPROTO_DSTOPTS) 6414 goto opt_error; 6415 optused = 2 + optptr[1]; 6416 if (optused > optlen) 6417 goto bad_opt; 6418 6419 /* 6420 * We did this dest. opt the first time 6421 * around (i.e. before AH processing). 6422 * If we've done AH... stop now. 6423 */ 6424 if (first_mp != mp) { 6425 ipsec_in_t *ii; 6426 6427 ii = (ipsec_in_t *)first_mp->b_rptr; 6428 if (ii->ipsec_in_ah_sa != NULL) 6429 break; 6430 } 6431 6432 oh = (struct ip6_opt_home_address *)optptr; 6433 /* Check total length and alignment */ 6434 if (optused < sizeof (*oh) || 6435 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6436 goto opt_error; 6437 /* Swap ip6_src and the home address */ 6438 tmp = ip6h->ip6_src; 6439 /* XXX Note: only 8 byte alignment option */ 6440 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6441 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6442 break; 6443 } 6444 6445 case IP6OPT_TUNNEL_LIMIT: 6446 if (hdr_type != IPPROTO_DSTOPTS) { 6447 goto opt_error; 6448 } 6449 optused = 2 + optptr[1]; 6450 if (optused > optlen) { 6451 goto bad_opt; 6452 } 6453 if (optused != 3) { 6454 goto opt_error; 6455 } 6456 break; 6457 6458 default: 6459 errtype = "unknown"; 6460 /* FALLTHROUGH */ 6461 opt_error: 6462 /* Determine which zone should send error */ 6463 zoneid = ipif_lookup_addr_zoneid_v6( 6464 &ip6h->ip6_dst, ill, ipst); 6465 switch (IP6OPT_TYPE(opt_type)) { 6466 case IP6OPT_TYPE_SKIP: 6467 optused = 2 + optptr[1]; 6468 if (optused > optlen) 6469 goto bad_opt; 6470 ip1dbg(("ip_process_options_v6: %s " 6471 "opt 0x%x skipped\n", 6472 errtype, opt_type)); 6473 break; 6474 case IP6OPT_TYPE_DISCARD: 6475 ip1dbg(("ip_process_options_v6: %s " 6476 "opt 0x%x; packet dropped\n", 6477 errtype, opt_type)); 6478 freemsg(first_mp); 6479 return (-1); 6480 case IP6OPT_TYPE_ICMP: 6481 if (zoneid == ALL_ZONES) { 6482 freemsg(first_mp); 6483 return (-1); 6484 } 6485 icmp_param_problem_v6(WR(q), first_mp, 6486 ICMP6_PARAMPROB_OPTION, 6487 (uint32_t)(optptr - 6488 (uint8_t *)ip6h), 6489 B_FALSE, B_FALSE, zoneid, ipst); 6490 return (-1); 6491 case IP6OPT_TYPE_FORCEICMP: 6492 /* 6493 * If we don't have a zone and the dst 6494 * addr is multicast, then pick a zone 6495 * based on the inbound interface. 6496 */ 6497 if (zoneid == ALL_ZONES && 6498 IN6_IS_ADDR_MULTICAST( 6499 &ip6h->ip6_dst)) { 6500 ipif = ipif_select_source_v6( 6501 ill, &ip6h->ip6_src, 6502 RESTRICT_TO_GROUP, 6503 IPV6_PREFER_SRC_DEFAULT, 6504 ALL_ZONES); 6505 if (ipif != NULL) { 6506 zoneid = 6507 ipif->ipif_zoneid; 6508 ipif_refrele(ipif); 6509 } 6510 } 6511 if (zoneid == ALL_ZONES) { 6512 freemsg(first_mp); 6513 return (-1); 6514 } 6515 icmp_param_problem_v6(WR(q), first_mp, 6516 ICMP6_PARAMPROB_OPTION, 6517 (uint32_t)(optptr - 6518 (uint8_t *)ip6h), 6519 B_FALSE, B_TRUE, zoneid, ipst); 6520 return (-1); 6521 default: 6522 ASSERT(0); 6523 } 6524 } 6525 } 6526 optlen -= optused; 6527 optptr += optused; 6528 } 6529 return (ret); 6530 6531 bad_opt: 6532 /* Determine which zone should send error */ 6533 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6534 if (zoneid == ALL_ZONES) { 6535 freemsg(first_mp); 6536 } else { 6537 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6538 (uint32_t)(optptr - (uint8_t *)ip6h), 6539 B_FALSE, B_FALSE, zoneid, ipst); 6540 } 6541 return (-1); 6542 } 6543 6544 /* 6545 * Process a routing header that is not yet empty. 6546 * Only handles type 0 routing headers. 6547 */ 6548 static void 6549 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6550 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6551 { 6552 ip6_rthdr0_t *rthdr; 6553 uint_t ehdrlen; 6554 uint_t numaddr; 6555 in6_addr_t *addrptr; 6556 in6_addr_t tmp; 6557 ip_stack_t *ipst = ill->ill_ipst; 6558 6559 ASSERT(rth->ip6r_segleft != 0); 6560 6561 if (!ipst->ips_ipv6_forward_src_routed) { 6562 /* XXX Check for source routed out same interface? */ 6563 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6564 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6565 freemsg(hada_mp); 6566 freemsg(mp); 6567 return; 6568 } 6569 6570 if (rth->ip6r_type != 0) { 6571 if (hada_mp != NULL) 6572 goto hada_drop; 6573 /* Sent by forwarding path, and router is global zone */ 6574 icmp_param_problem_v6(WR(q), mp, 6575 ICMP6_PARAMPROB_HEADER, 6576 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6577 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6578 return; 6579 } 6580 rthdr = (ip6_rthdr0_t *)rth; 6581 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6582 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6583 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6584 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6585 if (rthdr->ip6r0_len & 0x1) { 6586 /* An odd length is impossible */ 6587 if (hada_mp != NULL) 6588 goto hada_drop; 6589 /* Sent by forwarding path, and router is global zone */ 6590 icmp_param_problem_v6(WR(q), mp, 6591 ICMP6_PARAMPROB_HEADER, 6592 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6593 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6594 return; 6595 } 6596 numaddr = rthdr->ip6r0_len / 2; 6597 if (rthdr->ip6r0_segleft > numaddr) { 6598 /* segleft exceeds number of addresses in routing header */ 6599 if (hada_mp != NULL) 6600 goto hada_drop; 6601 /* Sent by forwarding path, and router is global zone */ 6602 icmp_param_problem_v6(WR(q), mp, 6603 ICMP6_PARAMPROB_HEADER, 6604 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6605 (uchar_t *)ip6h), 6606 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6607 return; 6608 } 6609 addrptr += (numaddr - rthdr->ip6r0_segleft); 6610 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6611 IN6_IS_ADDR_MULTICAST(addrptr)) { 6612 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6613 freemsg(hada_mp); 6614 freemsg(mp); 6615 return; 6616 } 6617 /* Swap */ 6618 tmp = *addrptr; 6619 *addrptr = ip6h->ip6_dst; 6620 ip6h->ip6_dst = tmp; 6621 rthdr->ip6r0_segleft--; 6622 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6623 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6624 if (hada_mp != NULL) 6625 goto hada_drop; 6626 /* Sent by forwarding path, and router is global zone */ 6627 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6628 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6629 return; 6630 } 6631 if (ip_check_v6_mblk(mp, ill) == IP6_MBLK_OK) { 6632 ip6h = (ip6_t *)mp->b_rptr; 6633 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6634 } else { 6635 freemsg(mp); 6636 } 6637 return; 6638 hada_drop: 6639 /* IPsec kstats: bean counter? */ 6640 freemsg(hada_mp); 6641 freemsg(mp); 6642 } 6643 6644 /* 6645 * Read side put procedure for IPv6 module. 6646 */ 6647 void 6648 ip_rput_v6(queue_t *q, mblk_t *mp) 6649 { 6650 mblk_t *first_mp; 6651 mblk_t *hada_mp = NULL; 6652 ip6_t *ip6h; 6653 boolean_t ll_multicast = B_FALSE; 6654 boolean_t mctl_present = B_FALSE; 6655 ill_t *ill; 6656 struct iocblk *iocp; 6657 uint_t flags = 0; 6658 mblk_t *dl_mp; 6659 ip_stack_t *ipst; 6660 int check; 6661 6662 ill = (ill_t *)q->q_ptr; 6663 ipst = ill->ill_ipst; 6664 if (ill->ill_state_flags & ILL_CONDEMNED) { 6665 union DL_primitives *dl; 6666 6667 dl = (union DL_primitives *)mp->b_rptr; 6668 /* 6669 * Things are opening or closing - only accept DLPI 6670 * ack messages. If the stream is closing and ip_wsrv 6671 * has completed, ip_close is out of the qwait, but has 6672 * not yet completed qprocsoff. Don't proceed any further 6673 * because the ill has been cleaned up and things hanging 6674 * off the ill have been freed. 6675 */ 6676 if ((mp->b_datap->db_type != M_PCPROTO) || 6677 (dl->dl_primitive == DL_UNITDATA_IND)) { 6678 inet_freemsg(mp); 6679 return; 6680 } 6681 } 6682 6683 dl_mp = NULL; 6684 switch (mp->b_datap->db_type) { 6685 case M_DATA: { 6686 int hlen; 6687 uchar_t *ucp; 6688 struct ether_header *eh; 6689 dl_unitdata_ind_t *dui; 6690 6691 /* 6692 * This is a work-around for CR 6451644, a bug in Nemo. It 6693 * should be removed when that problem is fixed. 6694 */ 6695 if (ill->ill_mactype == DL_ETHER && 6696 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6697 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6698 ucp[-2] == (IP6_DL_SAP >> 8)) { 6699 if (hlen >= sizeof (struct ether_vlan_header) && 6700 ucp[-5] == 0 && ucp[-6] == 0x81) 6701 ucp -= sizeof (struct ether_vlan_header); 6702 else 6703 ucp -= sizeof (struct ether_header); 6704 /* 6705 * If it's a group address, then fabricate a 6706 * DL_UNITDATA_IND message. 6707 */ 6708 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6709 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6710 BPRI_HI)) != NULL) { 6711 eh = (struct ether_header *)ucp; 6712 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6713 DB_TYPE(dl_mp) = M_PROTO; 6714 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6715 dui->dl_primitive = DL_UNITDATA_IND; 6716 dui->dl_dest_addr_length = 8; 6717 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6718 dui->dl_src_addr_length = 8; 6719 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6720 8; 6721 dui->dl_group_address = 1; 6722 ucp = (uchar_t *)(dui + 1); 6723 if (ill->ill_sap_length > 0) 6724 ucp += ill->ill_sap_length; 6725 bcopy(&eh->ether_dhost, ucp, 6); 6726 bcopy(&eh->ether_shost, ucp + 8, 6); 6727 ucp = (uchar_t *)(dui + 1); 6728 if (ill->ill_sap_length < 0) 6729 ucp += 8 + ill->ill_sap_length; 6730 bcopy(&eh->ether_type, ucp, 2); 6731 bcopy(&eh->ether_type, ucp + 8, 2); 6732 } 6733 } 6734 break; 6735 } 6736 6737 case M_PROTO: 6738 case M_PCPROTO: 6739 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6740 DL_UNITDATA_IND) { 6741 /* Go handle anything other than data elsewhere. */ 6742 ip_rput_dlpi(q, mp); 6743 return; 6744 } 6745 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6746 6747 /* Save the DLPI header. */ 6748 dl_mp = mp; 6749 mp = mp->b_cont; 6750 dl_mp->b_cont = NULL; 6751 break; 6752 case M_BREAK: 6753 panic("ip_rput_v6: got an M_BREAK"); 6754 /*NOTREACHED*/ 6755 case M_IOCACK: 6756 iocp = (struct iocblk *)mp->b_rptr; 6757 switch (iocp->ioc_cmd) { 6758 case DL_IOC_HDR_INFO: 6759 ill = (ill_t *)q->q_ptr; 6760 ill_fastpath_ack(ill, mp); 6761 return; 6762 6763 case SIOCGTUNPARAM: 6764 case OSIOCGTUNPARAM: 6765 ip_rput_other(NULL, q, mp, NULL); 6766 return; 6767 6768 case SIOCSTUNPARAM: 6769 case OSIOCSTUNPARAM: 6770 /* Go through qwriter */ 6771 break; 6772 default: 6773 putnext(q, mp); 6774 return; 6775 } 6776 /* FALLTHRU */ 6777 case M_ERROR: 6778 case M_HANGUP: 6779 mutex_enter(&ill->ill_lock); 6780 if (ill->ill_state_flags & ILL_CONDEMNED) { 6781 mutex_exit(&ill->ill_lock); 6782 freemsg(mp); 6783 return; 6784 } 6785 ill_refhold_locked(ill); 6786 mutex_exit(&ill->ill_lock); 6787 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6788 return; 6789 case M_CTL: 6790 if ((MBLKL(mp) > sizeof (int)) && 6791 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6792 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6793 mctl_present = B_TRUE; 6794 break; 6795 } 6796 putnext(q, mp); 6797 return; 6798 case M_IOCNAK: 6799 iocp = (struct iocblk *)mp->b_rptr; 6800 switch (iocp->ioc_cmd) { 6801 case DL_IOC_HDR_INFO: 6802 case SIOCGTUNPARAM: 6803 case OSIOCGTUNPARAM: 6804 ip_rput_other(NULL, q, mp, NULL); 6805 return; 6806 6807 case SIOCSTUNPARAM: 6808 case OSIOCSTUNPARAM: 6809 mutex_enter(&ill->ill_lock); 6810 if (ill->ill_state_flags & ILL_CONDEMNED) { 6811 mutex_exit(&ill->ill_lock); 6812 freemsg(mp); 6813 return; 6814 } 6815 ill_refhold_locked(ill); 6816 mutex_exit(&ill->ill_lock); 6817 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6818 return; 6819 default: 6820 break; 6821 } 6822 /* FALLTHRU */ 6823 default: 6824 putnext(q, mp); 6825 return; 6826 } 6827 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6828 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6829 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6830 /* 6831 * if db_ref > 1 then copymsg and free original. Packet may be 6832 * changed and do not want other entity who has a reference to this 6833 * message to trip over the changes. This is a blind change because 6834 * trying to catch all places that might change packet is too 6835 * difficult (since it may be a module above this one). 6836 */ 6837 if (mp->b_datap->db_ref > 1) { 6838 mblk_t *mp1; 6839 6840 mp1 = copymsg(mp); 6841 freemsg(mp); 6842 if (mp1 == NULL) { 6843 first_mp = NULL; 6844 goto discard; 6845 } 6846 mp = mp1; 6847 } 6848 first_mp = mp; 6849 if (mctl_present) { 6850 hada_mp = first_mp; 6851 mp = first_mp->b_cont; 6852 } 6853 6854 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6855 freemsg(mp); 6856 return; 6857 } 6858 6859 ip6h = (ip6_t *)mp->b_rptr; 6860 6861 /* 6862 * ip:::receive must see ipv6 packets with a full header, 6863 * and so is placed after the IP6_MBLK_HDR_ERR check. 6864 */ 6865 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6866 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6867 int, 0); 6868 6869 if (check != IP6_MBLK_OK) { 6870 freemsg(mp); 6871 return; 6872 } 6873 6874 DTRACE_PROBE4(ip6__physical__in__start, 6875 ill_t *, ill, ill_t *, NULL, 6876 ip6_t *, ip6h, mblk_t *, first_mp); 6877 6878 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6879 ipst->ips_ipv6firewall_physical_in, 6880 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6881 6882 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6883 6884 if (first_mp == NULL) 6885 return; 6886 6887 /* 6888 * Attach any necessary label information to this packet. 6889 */ 6890 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 6891 if (ip6opt_ls != 0) 6892 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 6893 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 6894 goto discard; 6895 } 6896 6897 /* IP observability hook. */ 6898 if (ipst->ips_ipobs_enabled) { 6899 zoneid_t dzone; 6900 6901 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 6902 ALL_ZONES); 6903 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, 6904 IPV6_VERSION, 0, ipst); 6905 } 6906 6907 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6908 IPV6_DEFAULT_VERS_AND_FLOW) { 6909 /* 6910 * It may be a bit too expensive to do this mapped address 6911 * check here, but in the interest of robustness, it seems 6912 * like the correct place. 6913 * TODO: Avoid this check for e.g. connected TCP sockets 6914 */ 6915 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6916 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6917 goto discard; 6918 } 6919 6920 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6921 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6922 goto discard; 6923 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6924 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6925 goto discard; 6926 } 6927 6928 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6929 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6930 } else { 6931 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6932 goto discard; 6933 } 6934 freemsg(dl_mp); 6935 return; 6936 6937 discard: 6938 if (dl_mp != NULL) 6939 freeb(dl_mp); 6940 freemsg(first_mp); 6941 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6942 } 6943 6944 /* 6945 * Walk through the IPv6 packet in mp and see if there's an AH header 6946 * in it. See if the AH header needs to get done before other headers in 6947 * the packet. (Worker function for ipsec_early_ah_v6().) 6948 */ 6949 #define IPSEC_HDR_DONT_PROCESS 0 6950 #define IPSEC_HDR_PROCESS 1 6951 #define IPSEC_MEMORY_ERROR 2 6952 static int 6953 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6954 { 6955 uint_t length; 6956 uint_t ehdrlen; 6957 uint8_t *whereptr; 6958 uint8_t *endptr; 6959 uint8_t *nexthdrp; 6960 ip6_dest_t *desthdr; 6961 ip6_rthdr_t *rthdr; 6962 ip6_t *ip6h; 6963 6964 /* 6965 * For now just pullup everything. In general, the less pullups, 6966 * the better, but there's so much squirrelling through anyway, 6967 * it's just easier this way. 6968 */ 6969 if (!pullupmsg(mp, -1)) { 6970 return (IPSEC_MEMORY_ERROR); 6971 } 6972 6973 ip6h = (ip6_t *)mp->b_rptr; 6974 length = IPV6_HDR_LEN; 6975 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6976 endptr = mp->b_wptr; 6977 6978 /* 6979 * We can't just use the argument nexthdr in the place 6980 * of nexthdrp becaue we don't dereference nexthdrp 6981 * till we confirm whether it is a valid address. 6982 */ 6983 nexthdrp = &ip6h->ip6_nxt; 6984 while (whereptr < endptr) { 6985 /* Is there enough left for len + nexthdr? */ 6986 if (whereptr + MIN_EHDR_LEN > endptr) 6987 return (IPSEC_MEMORY_ERROR); 6988 6989 switch (*nexthdrp) { 6990 case IPPROTO_HOPOPTS: 6991 case IPPROTO_DSTOPTS: 6992 /* Assumes the headers are identical for hbh and dst */ 6993 desthdr = (ip6_dest_t *)whereptr; 6994 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6995 if ((uchar_t *)desthdr + ehdrlen > endptr) 6996 return (IPSEC_MEMORY_ERROR); 6997 /* 6998 * Return DONT_PROCESS because the destination 6999 * options header may be for each hop in a 7000 * routing-header, and we only want AH if we're 7001 * finished with routing headers. 7002 */ 7003 if (*nexthdrp == IPPROTO_DSTOPTS) 7004 return (IPSEC_HDR_DONT_PROCESS); 7005 nexthdrp = &desthdr->ip6d_nxt; 7006 break; 7007 case IPPROTO_ROUTING: 7008 rthdr = (ip6_rthdr_t *)whereptr; 7009 7010 /* 7011 * If there's more hops left on the routing header, 7012 * return now with DON'T PROCESS. 7013 */ 7014 if (rthdr->ip6r_segleft > 0) 7015 return (IPSEC_HDR_DONT_PROCESS); 7016 7017 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7018 if ((uchar_t *)rthdr + ehdrlen > endptr) 7019 return (IPSEC_MEMORY_ERROR); 7020 nexthdrp = &rthdr->ip6r_nxt; 7021 break; 7022 case IPPROTO_FRAGMENT: 7023 /* Wait for reassembly */ 7024 return (IPSEC_HDR_DONT_PROCESS); 7025 case IPPROTO_AH: 7026 *nexthdr = IPPROTO_AH; 7027 return (IPSEC_HDR_PROCESS); 7028 case IPPROTO_NONE: 7029 /* No next header means we're finished */ 7030 default: 7031 return (IPSEC_HDR_DONT_PROCESS); 7032 } 7033 length += ehdrlen; 7034 whereptr += ehdrlen; 7035 } 7036 panic("ipsec_needs_processing_v6"); 7037 /*NOTREACHED*/ 7038 } 7039 7040 /* 7041 * Path for AH if options are present. If this is the first time we are 7042 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7043 * Otherwise, just fanout. Return value answers the boolean question: 7044 * "Did I consume the mblk you sent me?" 7045 * 7046 * Sometimes AH needs to be done before other IPv6 headers for security 7047 * reasons. This function (and its ipsec_needs_processing_v6() above) 7048 * indicates if that is so, and fans out to the appropriate IPsec protocol 7049 * for the datagram passed in. 7050 */ 7051 static boolean_t 7052 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7053 ill_t *ill, mblk_t *hada_mp, zoneid_t zoneid) 7054 { 7055 mblk_t *mp; 7056 uint8_t nexthdr; 7057 ipsec_in_t *ii = NULL; 7058 ah_t *ah; 7059 ipsec_status_t ipsec_rc; 7060 ip_stack_t *ipst = ill->ill_ipst; 7061 netstack_t *ns = ipst->ips_netstack; 7062 ipsec_stack_t *ipss = ns->netstack_ipsec; 7063 7064 ASSERT((hada_mp == NULL) || (!mctl_present)); 7065 7066 switch (ipsec_needs_processing_v6( 7067 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7068 case IPSEC_MEMORY_ERROR: 7069 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7070 freemsg(hada_mp); 7071 freemsg(first_mp); 7072 return (B_TRUE); 7073 case IPSEC_HDR_DONT_PROCESS: 7074 return (B_FALSE); 7075 } 7076 7077 /* Default means send it to AH! */ 7078 ASSERT(nexthdr == IPPROTO_AH); 7079 if (!mctl_present) { 7080 mp = first_mp; 7081 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 7082 if (first_mp == NULL) { 7083 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7084 "allocation failure.\n")); 7085 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7086 freemsg(hada_mp); 7087 freemsg(mp); 7088 return (B_TRUE); 7089 } 7090 /* 7091 * Store the ill_index so that when we come back 7092 * from IPSEC we ride on the same queue. 7093 */ 7094 ii = (ipsec_in_t *)first_mp->b_rptr; 7095 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7096 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7097 first_mp->b_cont = mp; 7098 } 7099 /* 7100 * Cache hardware acceleration info. 7101 */ 7102 if (hada_mp != NULL) { 7103 ASSERT(ii != NULL); 7104 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7105 "caching data attr.\n")); 7106 ii->ipsec_in_accelerated = B_TRUE; 7107 ii->ipsec_in_da = hada_mp; 7108 } 7109 7110 if (!ipsec_loaded(ipss)) { 7111 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 7112 return (B_TRUE); 7113 } 7114 7115 ah = ipsec_inbound_ah_sa(first_mp, ns); 7116 if (ah == NULL) 7117 return (B_TRUE); 7118 ASSERT(ii->ipsec_in_ah_sa != NULL); 7119 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7120 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7121 7122 switch (ipsec_rc) { 7123 case IPSEC_STATUS_SUCCESS: 7124 /* we're done with IPsec processing, send it up */ 7125 ip_fanout_proto_again(first_mp, ill, ill, NULL); 7126 break; 7127 case IPSEC_STATUS_FAILED: 7128 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 7129 break; 7130 case IPSEC_STATUS_PENDING: 7131 /* no action needed */ 7132 break; 7133 } 7134 return (B_TRUE); 7135 } 7136 7137 /* 7138 * Validate the IPv6 mblk for alignment. 7139 */ 7140 int 7141 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7142 { 7143 int pkt_len, ip6_len; 7144 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7145 7146 /* check for alignment and full IPv6 header */ 7147 if (!OK_32PTR((uchar_t *)ip6h) || 7148 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7149 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7150 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7151 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7152 return (IP6_MBLK_HDR_ERR); 7153 } 7154 ip6h = (ip6_t *)mp->b_rptr; 7155 } 7156 7157 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7158 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7159 7160 if (mp->b_cont == NULL) 7161 pkt_len = mp->b_wptr - mp->b_rptr; 7162 else 7163 pkt_len = msgdsize(mp); 7164 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7165 7166 /* 7167 * Check for bogus (too short packet) and packet which 7168 * was padded by the link layer. 7169 */ 7170 if (ip6_len != pkt_len) { 7171 ssize_t diff; 7172 7173 if (ip6_len > pkt_len) { 7174 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7175 ip6_len, pkt_len)); 7176 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7177 return (IP6_MBLK_LEN_ERR); 7178 } 7179 diff = (ssize_t)(pkt_len - ip6_len); 7180 7181 if (!adjmsg(mp, -diff)) { 7182 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7183 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7184 return (IP6_MBLK_LEN_ERR); 7185 } 7186 } 7187 return (IP6_MBLK_OK); 7188 } 7189 7190 /* 7191 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7192 * ip_rput_v6 has already verified alignment, the min length, the version, 7193 * and db_ref = 1. 7194 * 7195 * The ill passed in (the arg named inill) is the ill that the packet 7196 * actually arrived on. We need to remember this when saving the 7197 * input interface index into potential IPV6_PKTINFO data in 7198 * ip_add_info_v6(). 7199 * 7200 * This routine doesn't free dl_mp; that's the caller's responsibility on 7201 * return. (Note that the callers are complex enough that there's no tail 7202 * recursion here anyway.) 7203 */ 7204 void 7205 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7206 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7207 { 7208 ire_t *ire = NULL; 7209 ill_t *ill = inill; 7210 ill_t *outill; 7211 ipif_t *ipif; 7212 uint8_t *whereptr; 7213 uint8_t nexthdr; 7214 uint16_t remlen; 7215 uint_t prev_nexthdr_offset; 7216 uint_t used; 7217 size_t old_pkt_len; 7218 size_t pkt_len; 7219 uint16_t ip6_len; 7220 uint_t hdr_len; 7221 boolean_t mctl_present; 7222 mblk_t *first_mp; 7223 mblk_t *first_mp1; 7224 boolean_t no_forward; 7225 ip6_hbh_t *hbhhdr; 7226 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7227 conn_t *connp; 7228 ilm_t *ilm; 7229 uint32_t ports; 7230 zoneid_t zoneid = GLOBAL_ZONEID; 7231 uint16_t hck_flags, reass_hck_flags; 7232 uint32_t reass_sum; 7233 boolean_t cksum_err; 7234 mblk_t *mp1; 7235 ip_stack_t *ipst = inill->ill_ipst; 7236 7237 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7238 7239 if (hada_mp != NULL) { 7240 /* 7241 * It's an IPsec accelerated packet. 7242 * Keep a pointer to the data attributes around until 7243 * we allocate the ipsecinfo structure. 7244 */ 7245 IPSECHW_DEBUG(IPSECHW_PKT, 7246 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7247 hada_mp->b_cont = NULL; 7248 /* 7249 * Since it is accelerated, it came directly from 7250 * the ill. 7251 */ 7252 ASSERT(mctl_present == B_FALSE); 7253 ASSERT(mp->b_datap->db_type != M_CTL); 7254 } 7255 7256 ip6h = (ip6_t *)mp->b_rptr; 7257 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7258 old_pkt_len = pkt_len = ip6_len; 7259 7260 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7261 hck_flags = DB_CKSUMFLAGS(mp); 7262 else 7263 hck_flags = 0; 7264 7265 /* Clear checksum flags in case we need to forward */ 7266 DB_CKSUMFLAGS(mp) = 0; 7267 reass_sum = reass_hck_flags = 0; 7268 7269 nexthdr = ip6h->ip6_nxt; 7270 7271 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7272 (uchar_t *)ip6h); 7273 whereptr = (uint8_t *)&ip6h[1]; 7274 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7275 7276 /* Process hop by hop header options */ 7277 if (nexthdr == IPPROTO_HOPOPTS) { 7278 uint_t ehdrlen; 7279 uint8_t *optptr; 7280 7281 if (remlen < MIN_EHDR_LEN) 7282 goto pkt_too_short; 7283 if (mp->b_cont != NULL && 7284 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7285 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7286 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7287 freemsg(hada_mp); 7288 freemsg(first_mp); 7289 return; 7290 } 7291 ip6h = (ip6_t *)mp->b_rptr; 7292 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7293 } 7294 hbhhdr = (ip6_hbh_t *)whereptr; 7295 nexthdr = hbhhdr->ip6h_nxt; 7296 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7297 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7298 7299 if (remlen < ehdrlen) 7300 goto pkt_too_short; 7301 if (mp->b_cont != NULL && 7302 whereptr + ehdrlen > mp->b_wptr) { 7303 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7304 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7305 freemsg(hada_mp); 7306 freemsg(first_mp); 7307 return; 7308 } 7309 ip6h = (ip6_t *)mp->b_rptr; 7310 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7311 hbhhdr = (ip6_hbh_t *)whereptr; 7312 } 7313 7314 optptr = whereptr + 2; 7315 whereptr += ehdrlen; 7316 remlen -= ehdrlen; 7317 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7318 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7319 case -1: 7320 /* 7321 * Packet has been consumed and any 7322 * needed ICMP messages sent. 7323 */ 7324 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7325 freemsg(hada_mp); 7326 return; 7327 case 0: 7328 /* no action needed */ 7329 break; 7330 case 1: 7331 /* Known router alert */ 7332 goto ipv6forus; 7333 } 7334 } 7335 7336 /* 7337 * On incoming v6 multicast packets we will bypass the ire table, 7338 * and assume that the read queue corresponds to the targetted 7339 * interface. 7340 * 7341 * The effect of this is the same as the IPv4 original code, but is 7342 * much cleaner I think. See ip_rput for how that was done. 7343 */ 7344 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7345 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7346 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7347 /* 7348 * XXX TODO Give to mrouted to for multicast forwarding. 7349 */ 7350 ILM_WALKER_HOLD(ill); 7351 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7352 ILM_WALKER_RELE(ill); 7353 if (ilm == NULL) { 7354 if (ip_debug > 3) { 7355 /* ip2dbg */ 7356 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7357 " which is not for us: %s\n", AF_INET6, 7358 &ip6h->ip6_dst); 7359 } 7360 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7361 freemsg(hada_mp); 7362 freemsg(first_mp); 7363 return; 7364 } 7365 if (ip_debug > 3) { 7366 /* ip2dbg */ 7367 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7368 AF_INET6, &ip6h->ip6_dst); 7369 } 7370 zoneid = GLOBAL_ZONEID; 7371 goto ipv6forus; 7372 } 7373 7374 ipif = ill->ill_ipif; 7375 7376 /* 7377 * If a packet was received on an interface that is a 6to4 tunnel, 7378 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7379 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7380 * the 6to4 prefix of the address configured on the receiving interface. 7381 * Otherwise, the packet was delivered to this interface in error and 7382 * the packet must be dropped. 7383 */ 7384 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7385 7386 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7387 &ip6h->ip6_dst)) { 7388 if (ip_debug > 2) { 7389 /* ip1dbg */ 7390 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7391 "addressed packet which is not for us: " 7392 "%s\n", AF_INET6, &ip6h->ip6_dst); 7393 } 7394 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7395 freemsg(first_mp); 7396 return; 7397 } 7398 } 7399 7400 /* 7401 * Find an ire that matches destination. For link-local addresses 7402 * we have to match the ill. 7403 * TBD for site local addresses. 7404 */ 7405 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7406 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7407 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7408 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 7409 } else { 7410 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7411 MBLK_GETLABEL(mp), ipst); 7412 7413 if (ire != NULL && ire->ire_stq != NULL && 7414 ire->ire_zoneid != GLOBAL_ZONEID && 7415 ire->ire_zoneid != ALL_ZONES) { 7416 /* 7417 * Should only use IREs that are visible from the 7418 * global zone for forwarding. 7419 */ 7420 ire_refrele(ire); 7421 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7422 GLOBAL_ZONEID, MBLK_GETLABEL(mp), ipst); 7423 } 7424 } 7425 7426 if (ire == NULL) { 7427 /* 7428 * No matching IRE found. Mark this packet as having 7429 * originated externally. 7430 */ 7431 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7432 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7433 if (!(ill->ill_flags & ILLF_ROUTER)) { 7434 BUMP_MIB(ill->ill_ip_mib, 7435 ipIfStatsInAddrErrors); 7436 } 7437 freemsg(hada_mp); 7438 freemsg(first_mp); 7439 return; 7440 } 7441 if (ip6h->ip6_hops <= 1) { 7442 if (hada_mp != NULL) 7443 goto hada_drop; 7444 /* Sent by forwarding path, and router is global zone */ 7445 icmp_time_exceeded_v6(WR(q), first_mp, 7446 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7447 GLOBAL_ZONEID, ipst); 7448 return; 7449 } 7450 /* 7451 * Per RFC 3513 section 2.5.2, we must not forward packets with 7452 * an unspecified source address. 7453 */ 7454 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7455 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7456 freemsg(hada_mp); 7457 freemsg(first_mp); 7458 return; 7459 } 7460 mp->b_prev = (mblk_t *)(uintptr_t) 7461 ill->ill_phyint->phyint_ifindex; 7462 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7463 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7464 GLOBAL_ZONEID, ipst); 7465 return; 7466 } 7467 /* we have a matching IRE */ 7468 if (ire->ire_stq != NULL) { 7469 ill_group_t *ill_group; 7470 ill_group_t *ire_group; 7471 7472 /* 7473 * To be quicker, we may wish not to chase pointers 7474 * (ire->ire_ipif->ipif_ill...) and instead store the 7475 * forwarding policy in the ire. An unfortunate side- 7476 * effect of this would be requiring an ire flush whenever 7477 * the ILLF_ROUTER flag changes. For now, chase pointers 7478 * once and store in the boolean no_forward. 7479 * 7480 * This appears twice to keep it out of the non-forwarding, 7481 * yes-it's-for-us-on-the-right-interface case. 7482 */ 7483 no_forward = ((ill->ill_flags & 7484 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7485 7486 7487 ASSERT(first_mp == mp); 7488 /* 7489 * This ire has a send-to queue - forward the packet. 7490 */ 7491 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7492 freemsg(hada_mp); 7493 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7494 if (no_forward) { 7495 BUMP_MIB(ill->ill_ip_mib, 7496 ipIfStatsInAddrErrors); 7497 } 7498 freemsg(mp); 7499 ire_refrele(ire); 7500 return; 7501 } 7502 /* 7503 * ipIfStatsHCInForwDatagrams should only be increment if there 7504 * will be an attempt to forward the packet, which is why we 7505 * increment after the above condition has been checked. 7506 */ 7507 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7508 if (ip6h->ip6_hops <= 1) { 7509 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7510 /* Sent by forwarding path, and router is global zone */ 7511 icmp_time_exceeded_v6(WR(q), mp, 7512 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7513 GLOBAL_ZONEID, ipst); 7514 ire_refrele(ire); 7515 return; 7516 } 7517 /* 7518 * Per RFC 3513 section 2.5.2, we must not forward packets with 7519 * an unspecified source address. 7520 */ 7521 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7522 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7523 freemsg(mp); 7524 ire_refrele(ire); 7525 return; 7526 } 7527 7528 if (is_system_labeled()) { 7529 mblk_t *mp1; 7530 7531 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7532 BUMP_MIB(ill->ill_ip_mib, 7533 ipIfStatsForwProhibits); 7534 freemsg(mp); 7535 ire_refrele(ire); 7536 return; 7537 } 7538 /* Size may have changed */ 7539 mp = mp1; 7540 ip6h = (ip6_t *)mp->b_rptr; 7541 pkt_len = msgdsize(mp); 7542 } 7543 7544 if (pkt_len > ire->ire_max_frag) { 7545 int max_frag = ire->ire_max_frag; 7546 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7547 /* 7548 * Handle labeled packet resizing. 7549 */ 7550 if (is_system_labeled()) { 7551 max_frag = tsol_pmtu_adjust(mp, max_frag, 7552 pkt_len - old_pkt_len, AF_INET6); 7553 } 7554 7555 /* Sent by forwarding path, and router is global zone */ 7556 icmp_pkt2big_v6(WR(q), mp, max_frag, 7557 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7558 ire_refrele(ire); 7559 return; 7560 } 7561 7562 /* 7563 * Check to see if we're forwarding the packet to a 7564 * different link from which it came. If so, check the 7565 * source and destination addresses since routers must not 7566 * forward any packets with link-local source or 7567 * destination addresses to other links. Otherwise (if 7568 * we're forwarding onto the same link), conditionally send 7569 * a redirect message. 7570 */ 7571 ill_group = ill->ill_group; 7572 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7573 if (ire->ire_rfq != q && (ill_group == NULL || 7574 ill_group != ire_group)) { 7575 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7576 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7577 BUMP_MIB(ill->ill_ip_mib, 7578 ipIfStatsInAddrErrors); 7579 freemsg(mp); 7580 ire_refrele(ire); 7581 return; 7582 } 7583 /* TBD add site-local check at site boundary? */ 7584 } else if (ipst->ips_ipv6_send_redirects) { 7585 in6_addr_t *v6targ; 7586 in6_addr_t gw_addr_v6; 7587 ire_t *src_ire_v6 = NULL; 7588 7589 /* 7590 * Don't send a redirect when forwarding a source 7591 * routed packet. 7592 */ 7593 if (ip_source_routed_v6(ip6h, mp, ipst)) 7594 goto forward; 7595 7596 mutex_enter(&ire->ire_lock); 7597 gw_addr_v6 = ire->ire_gateway_addr_v6; 7598 mutex_exit(&ire->ire_lock); 7599 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7600 v6targ = &gw_addr_v6; 7601 /* 7602 * We won't send redirects to a router 7603 * that doesn't have a link local 7604 * address, but will forward. 7605 */ 7606 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7607 BUMP_MIB(ill->ill_ip_mib, 7608 ipIfStatsInAddrErrors); 7609 goto forward; 7610 } 7611 } else { 7612 v6targ = &ip6h->ip6_dst; 7613 } 7614 7615 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7616 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7617 GLOBAL_ZONEID, 0, NULL, 7618 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7619 ipst); 7620 7621 if (src_ire_v6 != NULL) { 7622 /* 7623 * The source is directly connected. 7624 */ 7625 mp1 = copymsg(mp); 7626 if (mp1 != NULL) { 7627 icmp_send_redirect_v6(WR(q), 7628 mp1, v6targ, &ip6h->ip6_dst, 7629 ill, B_FALSE); 7630 } 7631 ire_refrele(src_ire_v6); 7632 } 7633 } 7634 7635 forward: 7636 /* Hoplimit verified above */ 7637 ip6h->ip6_hops--; 7638 7639 outill = ire->ire_ipif->ipif_ill; 7640 7641 DTRACE_PROBE4(ip6__forwarding__start, 7642 ill_t *, inill, ill_t *, outill, 7643 ip6_t *, ip6h, mblk_t *, mp); 7644 7645 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7646 ipst->ips_ipv6firewall_forwarding, 7647 inill, outill, ip6h, mp, mp, 0, ipst); 7648 7649 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7650 7651 if (mp != NULL) { 7652 UPDATE_IB_PKT_COUNT(ire); 7653 ire->ire_last_used_time = lbolt; 7654 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7655 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7656 } 7657 IRE_REFRELE(ire); 7658 return; 7659 } 7660 7661 /* 7662 * Need to put on correct queue for reassembly to find it. 7663 * No need to use put() since reassembly has its own locks. 7664 * Note: multicast packets and packets destined to addresses 7665 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7666 * the arriving ill. Unlike the IPv4 case, enabling strict 7667 * destination multihoming will prevent accepting packets 7668 * addressed to an IRE_LOCAL on lo0. 7669 */ 7670 if (ire->ire_rfq != q) { 7671 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7672 == NULL) { 7673 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7674 freemsg(hada_mp); 7675 freemsg(first_mp); 7676 return; 7677 } 7678 if (ire->ire_rfq != NULL) { 7679 q = ire->ire_rfq; 7680 ill = (ill_t *)q->q_ptr; 7681 ASSERT(ill != NULL); 7682 } 7683 } 7684 7685 zoneid = ire->ire_zoneid; 7686 UPDATE_IB_PKT_COUNT(ire); 7687 ire->ire_last_used_time = lbolt; 7688 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7689 ire_refrele(ire); 7690 ire = NULL; 7691 ipv6forus: 7692 /* 7693 * Looks like this packet is for us one way or another. 7694 * This is where we'll process destination headers etc. 7695 */ 7696 for (; ; ) { 7697 switch (nexthdr) { 7698 case IPPROTO_TCP: { 7699 uint16_t *up; 7700 uint32_t sum; 7701 int offset; 7702 7703 hdr_len = pkt_len - remlen; 7704 7705 if (hada_mp != NULL) { 7706 ip0dbg(("tcp hada drop\n")); 7707 goto hada_drop; 7708 } 7709 7710 7711 /* TCP needs all of the TCP header */ 7712 if (remlen < TCP_MIN_HEADER_LENGTH) 7713 goto pkt_too_short; 7714 if (mp->b_cont != NULL && 7715 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7716 if (!pullupmsg(mp, 7717 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7718 BUMP_MIB(ill->ill_ip_mib, 7719 ipIfStatsInDiscards); 7720 freemsg(first_mp); 7721 return; 7722 } 7723 hck_flags = 0; 7724 ip6h = (ip6_t *)mp->b_rptr; 7725 whereptr = (uint8_t *)ip6h + hdr_len; 7726 } 7727 /* 7728 * Extract the offset field from the TCP header. 7729 */ 7730 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7731 if (offset != 5) { 7732 if (offset < 5) { 7733 ip1dbg(("ip_rput_data_v6: short " 7734 "TCP data offset")); 7735 BUMP_MIB(ill->ill_ip_mib, 7736 ipIfStatsInDiscards); 7737 freemsg(first_mp); 7738 return; 7739 } 7740 /* 7741 * There must be TCP options. 7742 * Make sure we can grab them. 7743 */ 7744 offset <<= 2; 7745 if (remlen < offset) 7746 goto pkt_too_short; 7747 if (mp->b_cont != NULL && 7748 whereptr + offset > mp->b_wptr) { 7749 if (!pullupmsg(mp, 7750 hdr_len + offset)) { 7751 BUMP_MIB(ill->ill_ip_mib, 7752 ipIfStatsInDiscards); 7753 freemsg(first_mp); 7754 return; 7755 } 7756 hck_flags = 0; 7757 ip6h = (ip6_t *)mp->b_rptr; 7758 whereptr = (uint8_t *)ip6h + hdr_len; 7759 } 7760 } 7761 7762 up = (uint16_t *)&ip6h->ip6_src; 7763 /* 7764 * TCP checksum calculation. First sum up the 7765 * pseudo-header fields: 7766 * - Source IPv6 address 7767 * - Destination IPv6 address 7768 * - TCP payload length 7769 * - TCP protocol ID 7770 */ 7771 sum = htons(IPPROTO_TCP + remlen) + 7772 up[0] + up[1] + up[2] + up[3] + 7773 up[4] + up[5] + up[6] + up[7] + 7774 up[8] + up[9] + up[10] + up[11] + 7775 up[12] + up[13] + up[14] + up[15]; 7776 7777 /* Fold initial sum */ 7778 sum = (sum & 0xffff) + (sum >> 16); 7779 7780 mp1 = mp->b_cont; 7781 7782 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7783 IP6_STAT(ipst, ip6_in_sw_cksum); 7784 7785 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7786 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7787 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7788 mp, mp1, cksum_err); 7789 7790 if (cksum_err) { 7791 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7792 7793 if (hck_flags & HCK_FULLCKSUM) { 7794 IP6_STAT(ipst, 7795 ip6_tcp_in_full_hw_cksum_err); 7796 } else if (hck_flags & HCK_PARTIALCKSUM) { 7797 IP6_STAT(ipst, 7798 ip6_tcp_in_part_hw_cksum_err); 7799 } else { 7800 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7801 } 7802 freemsg(first_mp); 7803 return; 7804 } 7805 tcp_fanout: 7806 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7807 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7808 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7809 return; 7810 } 7811 case IPPROTO_SCTP: 7812 { 7813 sctp_hdr_t *sctph; 7814 uint32_t calcsum, pktsum; 7815 uint_t hdr_len = pkt_len - remlen; 7816 sctp_stack_t *sctps; 7817 7818 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7819 7820 /* SCTP needs all of the SCTP header */ 7821 if (remlen < sizeof (*sctph)) { 7822 goto pkt_too_short; 7823 } 7824 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7825 ASSERT(mp->b_cont != NULL); 7826 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7827 BUMP_MIB(ill->ill_ip_mib, 7828 ipIfStatsInDiscards); 7829 freemsg(mp); 7830 return; 7831 } 7832 ip6h = (ip6_t *)mp->b_rptr; 7833 whereptr = (uint8_t *)ip6h + hdr_len; 7834 } 7835 7836 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7837 /* checksum */ 7838 pktsum = sctph->sh_chksum; 7839 sctph->sh_chksum = 0; 7840 calcsum = sctp_cksum(mp, hdr_len); 7841 if (calcsum != pktsum) { 7842 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7843 freemsg(mp); 7844 return; 7845 } 7846 sctph->sh_chksum = pktsum; 7847 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7848 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7849 ports, zoneid, mp, sctps)) == NULL) { 7850 ip_fanout_sctp_raw(first_mp, ill, 7851 (ipha_t *)ip6h, B_FALSE, ports, 7852 mctl_present, 7853 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7854 B_TRUE, zoneid); 7855 return; 7856 } 7857 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7858 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7859 B_FALSE, mctl_present); 7860 return; 7861 } 7862 case IPPROTO_UDP: { 7863 uint16_t *up; 7864 uint32_t sum; 7865 7866 hdr_len = pkt_len - remlen; 7867 7868 if (hada_mp != NULL) { 7869 ip0dbg(("udp hada drop\n")); 7870 goto hada_drop; 7871 } 7872 7873 /* Verify that at least the ports are present */ 7874 if (remlen < UDPH_SIZE) 7875 goto pkt_too_short; 7876 if (mp->b_cont != NULL && 7877 whereptr + UDPH_SIZE > mp->b_wptr) { 7878 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7879 BUMP_MIB(ill->ill_ip_mib, 7880 ipIfStatsInDiscards); 7881 freemsg(first_mp); 7882 return; 7883 } 7884 hck_flags = 0; 7885 ip6h = (ip6_t *)mp->b_rptr; 7886 whereptr = (uint8_t *)ip6h + hdr_len; 7887 } 7888 7889 /* 7890 * Before going through the regular checksum 7891 * calculation, make sure the received checksum 7892 * is non-zero. RFC 2460 says, a 0x0000 checksum 7893 * in a UDP packet (within IPv6 packet) is invalid 7894 * and should be replaced by 0xffff. This makes 7895 * sense as regular checksum calculation will 7896 * pass for both the cases i.e. 0x0000 and 0xffff. 7897 * Removing one of the case makes error detection 7898 * stronger. 7899 */ 7900 7901 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7902 /* 0x0000 checksum is invalid */ 7903 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7904 "checksum value 0x0000\n")); 7905 BUMP_MIB(ill->ill_ip_mib, 7906 udpIfStatsInCksumErrs); 7907 freemsg(first_mp); 7908 return; 7909 } 7910 7911 up = (uint16_t *)&ip6h->ip6_src; 7912 7913 /* 7914 * UDP checksum calculation. First sum up the 7915 * pseudo-header fields: 7916 * - Source IPv6 address 7917 * - Destination IPv6 address 7918 * - UDP payload length 7919 * - UDP protocol ID 7920 */ 7921 7922 sum = htons(IPPROTO_UDP + remlen) + 7923 up[0] + up[1] + up[2] + up[3] + 7924 up[4] + up[5] + up[6] + up[7] + 7925 up[8] + up[9] + up[10] + up[11] + 7926 up[12] + up[13] + up[14] + up[15]; 7927 7928 /* Fold initial sum */ 7929 sum = (sum & 0xffff) + (sum >> 16); 7930 7931 if (reass_hck_flags != 0) { 7932 hck_flags = reass_hck_flags; 7933 7934 IP_CKSUM_RECV_REASS(hck_flags, 7935 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7936 sum, reass_sum, cksum_err); 7937 } else { 7938 mp1 = mp->b_cont; 7939 7940 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7941 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7942 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7943 mp, mp1, cksum_err); 7944 } 7945 7946 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7947 IP6_STAT(ipst, ip6_in_sw_cksum); 7948 7949 if (cksum_err) { 7950 BUMP_MIB(ill->ill_ip_mib, 7951 udpIfStatsInCksumErrs); 7952 7953 if (hck_flags & HCK_FULLCKSUM) 7954 IP6_STAT(ipst, 7955 ip6_udp_in_full_hw_cksum_err); 7956 else if (hck_flags & HCK_PARTIALCKSUM) 7957 IP6_STAT(ipst, 7958 ip6_udp_in_part_hw_cksum_err); 7959 else 7960 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7961 7962 freemsg(first_mp); 7963 return; 7964 } 7965 goto udp_fanout; 7966 } 7967 case IPPROTO_ICMPV6: { 7968 uint16_t *up; 7969 uint32_t sum; 7970 uint_t hdr_len = pkt_len - remlen; 7971 7972 if (hada_mp != NULL) { 7973 ip0dbg(("icmp hada drop\n")); 7974 goto hada_drop; 7975 } 7976 7977 up = (uint16_t *)&ip6h->ip6_src; 7978 sum = htons(IPPROTO_ICMPV6 + remlen) + 7979 up[0] + up[1] + up[2] + up[3] + 7980 up[4] + up[5] + up[6] + up[7] + 7981 up[8] + up[9] + up[10] + up[11] + 7982 up[12] + up[13] + up[14] + up[15]; 7983 sum = (sum & 0xffff) + (sum >> 16); 7984 sum = IP_CSUM(mp, hdr_len, sum); 7985 if (sum != 0) { 7986 /* IPv6 ICMP checksum failed */ 7987 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7988 "failed %x\n", 7989 sum)); 7990 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7991 BUMP_MIB(ill->ill_icmp6_mib, 7992 ipv6IfIcmpInErrors); 7993 freemsg(first_mp); 7994 return; 7995 } 7996 7997 icmp_fanout: 7998 /* Check variable for testing applications */ 7999 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 8000 freemsg(first_mp); 8001 return; 8002 } 8003 /* 8004 * Assume that there is always at least one conn for 8005 * ICMPv6 (in.ndpd) i.e. don't optimize the case 8006 * where there is no conn. 8007 */ 8008 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8009 ASSERT(!IS_LOOPBACK((ill))); 8010 /* 8011 * In the multicast case, applications may have 8012 * joined the group from different zones, so we 8013 * need to deliver the packet to each of them. 8014 * Loop through the multicast memberships 8015 * structures (ilm) on the receive ill and send 8016 * a copy of the packet up each matching one. 8017 */ 8018 ILM_WALKER_HOLD(ill); 8019 for (ilm = ill->ill_ilm; ilm != NULL; 8020 ilm = ilm->ilm_next) { 8021 if (ilm->ilm_flags & ILM_DELETED) 8022 continue; 8023 if (!IN6_ARE_ADDR_EQUAL( 8024 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 8025 continue; 8026 if (!ipif_lookup_zoneid(ill, 8027 ilm->ilm_zoneid, IPIF_UP, NULL)) 8028 continue; 8029 8030 first_mp1 = ip_copymsg(first_mp); 8031 if (first_mp1 == NULL) 8032 continue; 8033 icmp_inbound_v6(q, first_mp1, ill, 8034 hdr_len, mctl_present, 0, 8035 ilm->ilm_zoneid, dl_mp); 8036 } 8037 ILM_WALKER_RELE(ill); 8038 } else { 8039 first_mp1 = ip_copymsg(first_mp); 8040 if (first_mp1 != NULL) 8041 icmp_inbound_v6(q, first_mp1, ill, 8042 hdr_len, mctl_present, 0, zoneid, 8043 dl_mp); 8044 } 8045 } 8046 /* FALLTHRU */ 8047 default: { 8048 /* 8049 * Handle protocols with which IPv6 is less intimate. 8050 */ 8051 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 8052 8053 if (hada_mp != NULL) { 8054 ip0dbg(("default hada drop\n")); 8055 goto hada_drop; 8056 } 8057 8058 /* 8059 * Enable sending ICMP for "Unknown" nexthdr 8060 * case. i.e. where we did not FALLTHRU from 8061 * IPPROTO_ICMPV6 processing case above. 8062 * If we did FALLTHRU, then the packet has already been 8063 * processed for IPPF, don't process it again in 8064 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8065 * flags 8066 */ 8067 if (nexthdr != IPPROTO_ICMPV6) 8068 proto_flags |= IP_FF_SEND_ICMP; 8069 else 8070 proto_flags |= IP6_NO_IPPOLICY; 8071 8072 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8073 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8074 mctl_present, zoneid); 8075 return; 8076 } 8077 8078 case IPPROTO_DSTOPTS: { 8079 uint_t ehdrlen; 8080 uint8_t *optptr; 8081 ip6_dest_t *desthdr; 8082 8083 /* Check if AH is present. */ 8084 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8085 hada_mp, zoneid)) { 8086 ip0dbg(("dst early hada drop\n")); 8087 return; 8088 } 8089 8090 /* 8091 * Reinitialize pointers, as ipsec_early_ah_v6() does 8092 * complete pullups. We don't have to do more pullups 8093 * as a result. 8094 */ 8095 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8096 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8097 ip6h = (ip6_t *)mp->b_rptr; 8098 8099 if (remlen < MIN_EHDR_LEN) 8100 goto pkt_too_short; 8101 8102 desthdr = (ip6_dest_t *)whereptr; 8103 nexthdr = desthdr->ip6d_nxt; 8104 prev_nexthdr_offset = (uint_t)(whereptr - 8105 (uint8_t *)ip6h); 8106 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8107 if (remlen < ehdrlen) 8108 goto pkt_too_short; 8109 optptr = whereptr + 2; 8110 /* 8111 * Note: XXX This code does not seem to make 8112 * distinction between Destination Options Header 8113 * being before/after Routing Header which can 8114 * happen if we are at the end of source route. 8115 * This may become significant in future. 8116 * (No real significant Destination Options are 8117 * defined/implemented yet ). 8118 */ 8119 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8120 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 8121 case -1: 8122 /* 8123 * Packet has been consumed and any needed 8124 * ICMP errors sent. 8125 */ 8126 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8127 freemsg(hada_mp); 8128 return; 8129 case 0: 8130 /* No action needed continue */ 8131 break; 8132 case 1: 8133 /* 8134 * Unnexpected return value 8135 * (Router alert is a Hop-by-Hop option) 8136 */ 8137 #ifdef DEBUG 8138 panic("ip_rput_data_v6: router " 8139 "alert hbh opt indication in dest opt"); 8140 /*NOTREACHED*/ 8141 #else 8142 freemsg(hada_mp); 8143 freemsg(first_mp); 8144 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8145 return; 8146 #endif 8147 } 8148 used = ehdrlen; 8149 break; 8150 } 8151 case IPPROTO_FRAGMENT: { 8152 ip6_frag_t *fraghdr; 8153 size_t no_frag_hdr_len; 8154 8155 if (hada_mp != NULL) { 8156 ip0dbg(("frag hada drop\n")); 8157 goto hada_drop; 8158 } 8159 8160 ASSERT(first_mp == mp); 8161 if (remlen < sizeof (ip6_frag_t)) 8162 goto pkt_too_short; 8163 8164 if (mp->b_cont != NULL && 8165 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8166 if (!pullupmsg(mp, 8167 pkt_len - remlen + sizeof (ip6_frag_t))) { 8168 BUMP_MIB(ill->ill_ip_mib, 8169 ipIfStatsInDiscards); 8170 freemsg(mp); 8171 return; 8172 } 8173 hck_flags = 0; 8174 ip6h = (ip6_t *)mp->b_rptr; 8175 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8176 } 8177 8178 fraghdr = (ip6_frag_t *)whereptr; 8179 used = (uint_t)sizeof (ip6_frag_t); 8180 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8181 8182 /* 8183 * Invoke the CGTP (multirouting) filtering module to 8184 * process the incoming packet. Packets identified as 8185 * duplicates must be discarded. Filtering is active 8186 * only if the the ip_cgtp_filter ndd variable is 8187 * non-zero. 8188 */ 8189 if (ipst->ips_ip_cgtp_filter && 8190 ipst->ips_ip_cgtp_filter_ops != NULL) { 8191 int cgtp_flt_pkt; 8192 netstackid_t stackid; 8193 8194 stackid = ipst->ips_netstack->netstack_stackid; 8195 8196 cgtp_flt_pkt = 8197 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 8198 stackid, inill->ill_phyint->phyint_ifindex, 8199 ip6h, fraghdr); 8200 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8201 freemsg(mp); 8202 return; 8203 } 8204 } 8205 8206 /* Restore the flags */ 8207 DB_CKSUMFLAGS(mp) = hck_flags; 8208 8209 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8210 remlen - used, &prev_nexthdr_offset, 8211 &reass_sum, &reass_hck_flags); 8212 if (mp == NULL) { 8213 /* Reassembly is still pending */ 8214 return; 8215 } 8216 /* The first mblk are the headers before the frag hdr */ 8217 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8218 8219 first_mp = mp; /* mp has most likely changed! */ 8220 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8221 ip6h = (ip6_t *)mp->b_rptr; 8222 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8223 whereptr = mp->b_rptr + no_frag_hdr_len; 8224 remlen = ntohs(ip6h->ip6_plen) + 8225 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8226 pkt_len = msgdsize(mp); 8227 used = 0; 8228 break; 8229 } 8230 case IPPROTO_HOPOPTS: { 8231 if (hada_mp != NULL) { 8232 ip0dbg(("hop hada drop\n")); 8233 goto hada_drop; 8234 } 8235 /* 8236 * Illegal header sequence. 8237 * (Hop-by-hop headers are processed above 8238 * and required to immediately follow IPv6 header) 8239 */ 8240 icmp_param_problem_v6(WR(q), first_mp, 8241 ICMP6_PARAMPROB_NEXTHEADER, 8242 prev_nexthdr_offset, 8243 B_FALSE, B_FALSE, zoneid, ipst); 8244 return; 8245 } 8246 case IPPROTO_ROUTING: { 8247 uint_t ehdrlen; 8248 ip6_rthdr_t *rthdr; 8249 8250 /* Check if AH is present. */ 8251 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8252 hada_mp, zoneid)) { 8253 ip0dbg(("routing hada drop\n")); 8254 return; 8255 } 8256 8257 /* 8258 * Reinitialize pointers, as ipsec_early_ah_v6() does 8259 * complete pullups. We don't have to do more pullups 8260 * as a result. 8261 */ 8262 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8263 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8264 ip6h = (ip6_t *)mp->b_rptr; 8265 8266 if (remlen < MIN_EHDR_LEN) 8267 goto pkt_too_short; 8268 rthdr = (ip6_rthdr_t *)whereptr; 8269 nexthdr = rthdr->ip6r_nxt; 8270 prev_nexthdr_offset = (uint_t)(whereptr - 8271 (uint8_t *)ip6h); 8272 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8273 if (remlen < ehdrlen) 8274 goto pkt_too_short; 8275 if (rthdr->ip6r_segleft != 0) { 8276 /* Not end of source route */ 8277 if (ll_multicast) { 8278 BUMP_MIB(ill->ill_ip_mib, 8279 ipIfStatsForwProhibits); 8280 freemsg(hada_mp); 8281 freemsg(mp); 8282 return; 8283 } 8284 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8285 flags, hada_mp, dl_mp); 8286 return; 8287 } 8288 used = ehdrlen; 8289 break; 8290 } 8291 case IPPROTO_AH: 8292 case IPPROTO_ESP: { 8293 /* 8294 * Fast path for AH/ESP. If this is the first time 8295 * we are sending a datagram to AH/ESP, allocate 8296 * a IPSEC_IN message and prepend it. Otherwise, 8297 * just fanout. 8298 */ 8299 8300 ipsec_in_t *ii; 8301 int ipsec_rc; 8302 ipsec_stack_t *ipss; 8303 8304 ipss = ipst->ips_netstack->netstack_ipsec; 8305 if (!mctl_present) { 8306 ASSERT(first_mp == mp); 8307 first_mp = ipsec_in_alloc(B_FALSE, 8308 ipst->ips_netstack); 8309 if (first_mp == NULL) { 8310 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8311 "allocation failure.\n")); 8312 BUMP_MIB(ill->ill_ip_mib, 8313 ipIfStatsInDiscards); 8314 freemsg(mp); 8315 return; 8316 } 8317 /* 8318 * Store the ill_index so that when we come back 8319 * from IPSEC we ride on the same queue. 8320 */ 8321 ii = (ipsec_in_t *)first_mp->b_rptr; 8322 ii->ipsec_in_ill_index = 8323 ill->ill_phyint->phyint_ifindex; 8324 ii->ipsec_in_rill_index = 8325 ii->ipsec_in_ill_index; 8326 first_mp->b_cont = mp; 8327 /* 8328 * Cache hardware acceleration info. 8329 */ 8330 if (hada_mp != NULL) { 8331 IPSECHW_DEBUG(IPSECHW_PKT, 8332 ("ip_rput_data_v6: " 8333 "caching data attr.\n")); 8334 ii->ipsec_in_accelerated = B_TRUE; 8335 ii->ipsec_in_da = hada_mp; 8336 hada_mp = NULL; 8337 } 8338 } else { 8339 ii = (ipsec_in_t *)first_mp->b_rptr; 8340 } 8341 8342 if (!ipsec_loaded(ipss)) { 8343 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8344 zoneid, ipst); 8345 return; 8346 } 8347 8348 /* select inbound SA and have IPsec process the pkt */ 8349 if (nexthdr == IPPROTO_ESP) { 8350 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8351 ipst->ips_netstack); 8352 if (esph == NULL) 8353 return; 8354 ASSERT(ii->ipsec_in_esp_sa != NULL); 8355 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8356 NULL); 8357 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8358 first_mp, esph); 8359 } else { 8360 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8361 ipst->ips_netstack); 8362 if (ah == NULL) 8363 return; 8364 ASSERT(ii->ipsec_in_ah_sa != NULL); 8365 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8366 NULL); 8367 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8368 first_mp, ah); 8369 } 8370 8371 switch (ipsec_rc) { 8372 case IPSEC_STATUS_SUCCESS: 8373 break; 8374 case IPSEC_STATUS_FAILED: 8375 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8376 /* FALLTHRU */ 8377 case IPSEC_STATUS_PENDING: 8378 return; 8379 } 8380 /* we're done with IPsec processing, send it up */ 8381 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8382 return; 8383 } 8384 case IPPROTO_NONE: 8385 /* All processing is done. Count as "delivered". */ 8386 freemsg(hada_mp); 8387 freemsg(first_mp); 8388 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8389 return; 8390 } 8391 whereptr += used; 8392 ASSERT(remlen >= used); 8393 remlen -= used; 8394 } 8395 /* NOTREACHED */ 8396 8397 pkt_too_short: 8398 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8399 ip6_len, pkt_len, remlen)); 8400 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8401 freemsg(hada_mp); 8402 freemsg(first_mp); 8403 return; 8404 udp_fanout: 8405 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8406 connp = NULL; 8407 } else { 8408 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8409 ipst); 8410 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8411 CONN_DEC_REF(connp); 8412 connp = NULL; 8413 } 8414 } 8415 8416 if (connp == NULL) { 8417 uint32_t ports; 8418 8419 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8420 UDP_PORTS_OFFSET); 8421 IP6_STAT(ipst, ip6_udp_slow_path); 8422 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8423 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8424 zoneid); 8425 return; 8426 } 8427 8428 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 8429 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 8430 freemsg(first_mp); 8431 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8432 CONN_DEC_REF(connp); 8433 return; 8434 } 8435 8436 /* Initiate IPPF processing */ 8437 if (IP6_IN_IPP(flags, ipst)) { 8438 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8439 if (mp == NULL) { 8440 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8441 CONN_DEC_REF(connp); 8442 return; 8443 } 8444 } 8445 8446 if (connp->conn_ip_recvpktinfo || 8447 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8448 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8449 if (mp == NULL) { 8450 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8451 CONN_DEC_REF(connp); 8452 return; 8453 } 8454 } 8455 8456 IP6_STAT(ipst, ip6_udp_fast_path); 8457 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8458 8459 /* Send it upstream */ 8460 (connp->conn_recv)(connp, mp, NULL); 8461 8462 CONN_DEC_REF(connp); 8463 freemsg(hada_mp); 8464 return; 8465 8466 hada_drop: 8467 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8468 /* IPsec kstats: bump counter here */ 8469 freemsg(hada_mp); 8470 freemsg(first_mp); 8471 } 8472 8473 /* 8474 * Reassemble fragment. 8475 * When it returns a completed message the first mblk will only contain 8476 * the headers prior to the fragment header. 8477 * 8478 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8479 * of the preceding header. This is needed to patch the previous header's 8480 * nexthdr field when reassembly completes. 8481 */ 8482 static mblk_t * 8483 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8484 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8485 uint32_t *cksum_val, uint16_t *cksum_flags) 8486 { 8487 ill_t *ill = (ill_t *)q->q_ptr; 8488 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8489 uint16_t offset; 8490 boolean_t more_frags; 8491 uint8_t nexthdr = fraghdr->ip6f_nxt; 8492 in6_addr_t *v6dst_ptr; 8493 in6_addr_t *v6src_ptr; 8494 uint_t end; 8495 uint_t hdr_length; 8496 size_t count; 8497 ipf_t *ipf; 8498 ipf_t **ipfp; 8499 ipfb_t *ipfb; 8500 mblk_t *mp1; 8501 uint8_t ecn_info = 0; 8502 size_t msg_len; 8503 mblk_t *tail_mp; 8504 mblk_t *t_mp; 8505 boolean_t pruned = B_FALSE; 8506 uint32_t sum_val; 8507 uint16_t sum_flags; 8508 ip_stack_t *ipst = ill->ill_ipst; 8509 8510 if (cksum_val != NULL) 8511 *cksum_val = 0; 8512 if (cksum_flags != NULL) 8513 *cksum_flags = 0; 8514 8515 /* 8516 * We utilize hardware computed checksum info only for UDP since 8517 * IP fragmentation is a normal occurence for the protocol. In 8518 * addition, checksum offload support for IP fragments carrying 8519 * UDP payload is commonly implemented across network adapters. 8520 */ 8521 ASSERT(ill != NULL); 8522 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8523 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8524 mblk_t *mp1 = mp->b_cont; 8525 int32_t len; 8526 8527 /* Record checksum information from the packet */ 8528 sum_val = (uint32_t)DB_CKSUM16(mp); 8529 sum_flags = DB_CKSUMFLAGS(mp); 8530 8531 /* fragmented payload offset from beginning of mblk */ 8532 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8533 8534 if ((sum_flags & HCK_PARTIALCKSUM) && 8535 (mp1 == NULL || mp1->b_cont == NULL) && 8536 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8537 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8538 uint32_t adj; 8539 /* 8540 * Partial checksum has been calculated by hardware 8541 * and attached to the packet; in addition, any 8542 * prepended extraneous data is even byte aligned. 8543 * If any such data exists, we adjust the checksum; 8544 * this would also handle any postpended data. 8545 */ 8546 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8547 mp, mp1, len, adj); 8548 8549 /* One's complement subtract extraneous checksum */ 8550 if (adj >= sum_val) 8551 sum_val = ~(adj - sum_val) & 0xFFFF; 8552 else 8553 sum_val -= adj; 8554 } 8555 } else { 8556 sum_val = 0; 8557 sum_flags = 0; 8558 } 8559 8560 /* Clear hardware checksumming flag */ 8561 DB_CKSUMFLAGS(mp) = 0; 8562 8563 /* 8564 * Note: Fragment offset in header is in 8-octet units. 8565 * Clearing least significant 3 bits not only extracts 8566 * it but also gets it in units of octets. 8567 */ 8568 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8569 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8570 8571 /* 8572 * Is the more frags flag on and the payload length not a multiple 8573 * of eight? 8574 */ 8575 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8576 zoneid_t zoneid; 8577 8578 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8579 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8580 if (zoneid == ALL_ZONES) { 8581 freemsg(mp); 8582 return (NULL); 8583 } 8584 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8585 (uint32_t)((char *)&ip6h->ip6_plen - 8586 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8587 return (NULL); 8588 } 8589 8590 v6src_ptr = &ip6h->ip6_src; 8591 v6dst_ptr = &ip6h->ip6_dst; 8592 end = remlen; 8593 8594 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8595 end += offset; 8596 8597 /* 8598 * Would fragment cause reassembled packet to have a payload length 8599 * greater than IP_MAXPACKET - the max payload size? 8600 */ 8601 if (end > IP_MAXPACKET) { 8602 zoneid_t zoneid; 8603 8604 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8605 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8606 if (zoneid == ALL_ZONES) { 8607 freemsg(mp); 8608 return (NULL); 8609 } 8610 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8611 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8612 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8613 return (NULL); 8614 } 8615 8616 /* 8617 * This packet just has one fragment. Reassembly not 8618 * needed. 8619 */ 8620 if (!more_frags && offset == 0) { 8621 goto reass_done; 8622 } 8623 8624 /* 8625 * Drop the fragmented as early as possible, if 8626 * we don't have resource(s) to re-assemble. 8627 */ 8628 if (ipst->ips_ip_reass_queue_bytes == 0) { 8629 freemsg(mp); 8630 return (NULL); 8631 } 8632 8633 /* Record the ECN field info. */ 8634 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8635 /* 8636 * If this is not the first fragment, dump the unfragmentable 8637 * portion of the packet. 8638 */ 8639 if (offset) 8640 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8641 8642 /* 8643 * Fragmentation reassembly. Each ILL has a hash table for 8644 * queueing packets undergoing reassembly for all IPIFs 8645 * associated with the ILL. The hash is based on the packet 8646 * IP ident field. The ILL frag hash table was allocated 8647 * as a timer block at the time the ILL was created. Whenever 8648 * there is anything on the reassembly queue, the timer will 8649 * be running. 8650 */ 8651 msg_len = MBLKSIZE(mp); 8652 tail_mp = mp; 8653 while (tail_mp->b_cont != NULL) { 8654 tail_mp = tail_mp->b_cont; 8655 msg_len += MBLKSIZE(tail_mp); 8656 } 8657 /* 8658 * If the reassembly list for this ILL will get too big 8659 * prune it. 8660 */ 8661 8662 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8663 ipst->ips_ip_reass_queue_bytes) { 8664 ill_frag_prune(ill, 8665 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8666 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8667 pruned = B_TRUE; 8668 } 8669 8670 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8671 mutex_enter(&ipfb->ipfb_lock); 8672 8673 ipfp = &ipfb->ipfb_ipf; 8674 /* Try to find an existing fragment queue for this packet. */ 8675 for (;;) { 8676 ipf = ipfp[0]; 8677 if (ipf) { 8678 /* 8679 * It has to match on ident, source address, and 8680 * dest address. 8681 */ 8682 if (ipf->ipf_ident == ident && 8683 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8684 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8685 8686 /* 8687 * If we have received too many 8688 * duplicate fragments for this packet 8689 * free it. 8690 */ 8691 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8692 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8693 freemsg(mp); 8694 mutex_exit(&ipfb->ipfb_lock); 8695 return (NULL); 8696 } 8697 8698 break; 8699 } 8700 ipfp = &ipf->ipf_hash_next; 8701 continue; 8702 } 8703 8704 8705 /* 8706 * If we pruned the list, do we want to store this new 8707 * fragment?. We apply an optimization here based on the 8708 * fact that most fragments will be received in order. 8709 * So if the offset of this incoming fragment is zero, 8710 * it is the first fragment of a new packet. We will 8711 * keep it. Otherwise drop the fragment, as we have 8712 * probably pruned the packet already (since the 8713 * packet cannot be found). 8714 */ 8715 8716 if (pruned && offset != 0) { 8717 mutex_exit(&ipfb->ipfb_lock); 8718 freemsg(mp); 8719 return (NULL); 8720 } 8721 8722 /* New guy. Allocate a frag message. */ 8723 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8724 if (!mp1) { 8725 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8726 freemsg(mp); 8727 partial_reass_done: 8728 mutex_exit(&ipfb->ipfb_lock); 8729 return (NULL); 8730 } 8731 8732 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8733 /* 8734 * Too many fragmented packets in this hash bucket. 8735 * Free the oldest. 8736 */ 8737 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8738 } 8739 8740 mp1->b_cont = mp; 8741 8742 /* Initialize the fragment header. */ 8743 ipf = (ipf_t *)mp1->b_rptr; 8744 ipf->ipf_mp = mp1; 8745 ipf->ipf_ptphn = ipfp; 8746 ipfp[0] = ipf; 8747 ipf->ipf_hash_next = NULL; 8748 ipf->ipf_ident = ident; 8749 ipf->ipf_v6src = *v6src_ptr; 8750 ipf->ipf_v6dst = *v6dst_ptr; 8751 /* Record reassembly start time. */ 8752 ipf->ipf_timestamp = gethrestime_sec(); 8753 /* Record ipf generation and account for frag header */ 8754 ipf->ipf_gen = ill->ill_ipf_gen++; 8755 ipf->ipf_count = MBLKSIZE(mp1); 8756 ipf->ipf_protocol = nexthdr; 8757 ipf->ipf_nf_hdr_len = 0; 8758 ipf->ipf_prev_nexthdr_offset = 0; 8759 ipf->ipf_last_frag_seen = B_FALSE; 8760 ipf->ipf_ecn = ecn_info; 8761 ipf->ipf_num_dups = 0; 8762 ipfb->ipfb_frag_pkts++; 8763 ipf->ipf_checksum = 0; 8764 ipf->ipf_checksum_flags = 0; 8765 8766 /* Store checksum value in fragment header */ 8767 if (sum_flags != 0) { 8768 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8769 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8770 ipf->ipf_checksum = sum_val; 8771 ipf->ipf_checksum_flags = sum_flags; 8772 } 8773 8774 /* 8775 * We handle reassembly two ways. In the easy case, 8776 * where all the fragments show up in order, we do 8777 * minimal bookkeeping, and just clip new pieces on 8778 * the end. If we ever see a hole, then we go off 8779 * to ip_reassemble which has to mark the pieces and 8780 * keep track of the number of holes, etc. Obviously, 8781 * the point of having both mechanisms is so we can 8782 * handle the easy case as efficiently as possible. 8783 */ 8784 if (offset == 0) { 8785 /* Easy case, in-order reassembly so far. */ 8786 /* Update the byte count */ 8787 ipf->ipf_count += msg_len; 8788 ipf->ipf_tail_mp = tail_mp; 8789 /* 8790 * Keep track of next expected offset in 8791 * ipf_end. 8792 */ 8793 ipf->ipf_end = end; 8794 ipf->ipf_nf_hdr_len = hdr_length; 8795 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8796 } else { 8797 /* Hard case, hole at the beginning. */ 8798 ipf->ipf_tail_mp = NULL; 8799 /* 8800 * ipf_end == 0 means that we have given up 8801 * on easy reassembly. 8802 */ 8803 ipf->ipf_end = 0; 8804 8805 /* Forget checksum offload from now on */ 8806 ipf->ipf_checksum_flags = 0; 8807 8808 /* 8809 * ipf_hole_cnt is set by ip_reassemble. 8810 * ipf_count is updated by ip_reassemble. 8811 * No need to check for return value here 8812 * as we don't expect reassembly to complete or 8813 * fail for the first fragment itself. 8814 */ 8815 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8816 msg_len); 8817 } 8818 /* Update per ipfb and ill byte counts */ 8819 ipfb->ipfb_count += ipf->ipf_count; 8820 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8821 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8822 /* If the frag timer wasn't already going, start it. */ 8823 mutex_enter(&ill->ill_lock); 8824 ill_frag_timer_start(ill); 8825 mutex_exit(&ill->ill_lock); 8826 goto partial_reass_done; 8827 } 8828 8829 /* 8830 * If the packet's flag has changed (it could be coming up 8831 * from an interface different than the previous, therefore 8832 * possibly different checksum capability), then forget about 8833 * any stored checksum states. Otherwise add the value to 8834 * the existing one stored in the fragment header. 8835 */ 8836 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8837 sum_val += ipf->ipf_checksum; 8838 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8839 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8840 ipf->ipf_checksum = sum_val; 8841 } else if (ipf->ipf_checksum_flags != 0) { 8842 /* Forget checksum offload from now on */ 8843 ipf->ipf_checksum_flags = 0; 8844 } 8845 8846 /* 8847 * We have a new piece of a datagram which is already being 8848 * reassembled. Update the ECN info if all IP fragments 8849 * are ECN capable. If there is one which is not, clear 8850 * all the info. If there is at least one which has CE 8851 * code point, IP needs to report that up to transport. 8852 */ 8853 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8854 if (ecn_info == IPH_ECN_CE) 8855 ipf->ipf_ecn = IPH_ECN_CE; 8856 } else { 8857 ipf->ipf_ecn = IPH_ECN_NECT; 8858 } 8859 8860 if (offset && ipf->ipf_end == offset) { 8861 /* The new fragment fits at the end */ 8862 ipf->ipf_tail_mp->b_cont = mp; 8863 /* Update the byte count */ 8864 ipf->ipf_count += msg_len; 8865 /* Update per ipfb and ill byte counts */ 8866 ipfb->ipfb_count += msg_len; 8867 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8868 atomic_add_32(&ill->ill_frag_count, msg_len); 8869 if (more_frags) { 8870 /* More to come. */ 8871 ipf->ipf_end = end; 8872 ipf->ipf_tail_mp = tail_mp; 8873 goto partial_reass_done; 8874 } 8875 } else { 8876 /* 8877 * Go do the hard cases. 8878 * Call ip_reassemble(). 8879 */ 8880 int ret; 8881 8882 if (offset == 0) { 8883 if (ipf->ipf_prev_nexthdr_offset == 0) { 8884 ipf->ipf_nf_hdr_len = hdr_length; 8885 ipf->ipf_prev_nexthdr_offset = 8886 *prev_nexthdr_offset; 8887 } 8888 } 8889 /* Save current byte count */ 8890 count = ipf->ipf_count; 8891 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8892 8893 /* Count of bytes added and subtracted (freeb()ed) */ 8894 count = ipf->ipf_count - count; 8895 if (count) { 8896 /* Update per ipfb and ill byte counts */ 8897 ipfb->ipfb_count += count; 8898 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8899 atomic_add_32(&ill->ill_frag_count, count); 8900 } 8901 if (ret == IP_REASS_PARTIAL) { 8902 goto partial_reass_done; 8903 } else if (ret == IP_REASS_FAILED) { 8904 /* Reassembly failed. Free up all resources */ 8905 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8906 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8907 IP_REASS_SET_START(t_mp, 0); 8908 IP_REASS_SET_END(t_mp, 0); 8909 } 8910 freemsg(mp); 8911 goto partial_reass_done; 8912 } 8913 8914 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8915 } 8916 /* 8917 * We have completed reassembly. Unhook the frag header from 8918 * the reassembly list. 8919 * 8920 * Grab the unfragmentable header length next header value out 8921 * of the first fragment 8922 */ 8923 ASSERT(ipf->ipf_nf_hdr_len != 0); 8924 hdr_length = ipf->ipf_nf_hdr_len; 8925 8926 /* 8927 * Before we free the frag header, record the ECN info 8928 * to report back to the transport. 8929 */ 8930 ecn_info = ipf->ipf_ecn; 8931 8932 /* 8933 * Store the nextheader field in the header preceding the fragment 8934 * header 8935 */ 8936 nexthdr = ipf->ipf_protocol; 8937 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8938 ipfp = ipf->ipf_ptphn; 8939 8940 /* We need to supply these to caller */ 8941 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8942 sum_val = ipf->ipf_checksum; 8943 else 8944 sum_val = 0; 8945 8946 mp1 = ipf->ipf_mp; 8947 count = ipf->ipf_count; 8948 ipf = ipf->ipf_hash_next; 8949 if (ipf) 8950 ipf->ipf_ptphn = ipfp; 8951 ipfp[0] = ipf; 8952 atomic_add_32(&ill->ill_frag_count, -count); 8953 ASSERT(ipfb->ipfb_count >= count); 8954 ipfb->ipfb_count -= count; 8955 ipfb->ipfb_frag_pkts--; 8956 mutex_exit(&ipfb->ipfb_lock); 8957 /* Ditch the frag header. */ 8958 mp = mp1->b_cont; 8959 freeb(mp1); 8960 8961 /* 8962 * Make sure the packet is good by doing some sanity 8963 * check. If bad we can silentely drop the packet. 8964 */ 8965 reass_done: 8966 if (hdr_length < sizeof (ip6_frag_t)) { 8967 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8968 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8969 freemsg(mp); 8970 return (NULL); 8971 } 8972 8973 /* 8974 * Remove the fragment header from the initial header by 8975 * splitting the mblk into the non-fragmentable header and 8976 * everthing after the fragment extension header. This has the 8977 * side effect of putting all the headers that need destination 8978 * processing into the b_cont block-- on return this fact is 8979 * used in order to avoid having to look at the extensions 8980 * already processed. 8981 * 8982 * Note that this code assumes that the unfragmentable portion 8983 * of the header is in the first mblk and increments 8984 * the read pointer past it. If this assumption is broken 8985 * this code fails badly. 8986 */ 8987 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8988 mblk_t *nmp; 8989 8990 if (!(nmp = dupb(mp))) { 8991 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8992 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8993 freemsg(mp); 8994 return (NULL); 8995 } 8996 nmp->b_cont = mp->b_cont; 8997 mp->b_cont = nmp; 8998 nmp->b_rptr += hdr_length; 8999 } 9000 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 9001 9002 ip6h = (ip6_t *)mp->b_rptr; 9003 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 9004 9005 /* Restore original IP length in header. */ 9006 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 9007 /* Record the ECN info. */ 9008 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 9009 ip6h->ip6_vcf |= htonl(ecn_info << 20); 9010 9011 /* Reassembly is successful; return checksum information if needed */ 9012 if (cksum_val != NULL) 9013 *cksum_val = sum_val; 9014 if (cksum_flags != NULL) 9015 *cksum_flags = sum_flags; 9016 9017 return (mp); 9018 } 9019 9020 /* 9021 * Walk through the options to see if there is a routing header. 9022 * If present get the destination which is the last address of 9023 * the option. 9024 */ 9025 in6_addr_t 9026 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 9027 { 9028 uint8_t nexthdr; 9029 uint8_t *whereptr; 9030 ip6_hbh_t *hbhhdr; 9031 ip6_dest_t *dsthdr; 9032 ip6_rthdr0_t *rthdr; 9033 ip6_frag_t *fraghdr; 9034 int ehdrlen; 9035 int left; 9036 in6_addr_t *ap, rv; 9037 9038 if (is_fragment != NULL) 9039 *is_fragment = B_FALSE; 9040 9041 rv = ip6h->ip6_dst; 9042 9043 nexthdr = ip6h->ip6_nxt; 9044 whereptr = (uint8_t *)&ip6h[1]; 9045 for (;;) { 9046 9047 ASSERT(nexthdr != IPPROTO_RAW); 9048 switch (nexthdr) { 9049 case IPPROTO_HOPOPTS: 9050 hbhhdr = (ip6_hbh_t *)whereptr; 9051 nexthdr = hbhhdr->ip6h_nxt; 9052 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 9053 break; 9054 case IPPROTO_DSTOPTS: 9055 dsthdr = (ip6_dest_t *)whereptr; 9056 nexthdr = dsthdr->ip6d_nxt; 9057 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 9058 break; 9059 case IPPROTO_ROUTING: 9060 rthdr = (ip6_rthdr0_t *)whereptr; 9061 nexthdr = rthdr->ip6r0_nxt; 9062 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9063 9064 left = rthdr->ip6r0_segleft; 9065 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9066 rv = *(ap + left - 1); 9067 /* 9068 * If the caller doesn't care whether the packet 9069 * is a fragment or not, we can stop here since 9070 * we have our destination. 9071 */ 9072 if (is_fragment == NULL) 9073 goto done; 9074 break; 9075 case IPPROTO_FRAGMENT: 9076 fraghdr = (ip6_frag_t *)whereptr; 9077 nexthdr = fraghdr->ip6f_nxt; 9078 ehdrlen = sizeof (ip6_frag_t); 9079 if (is_fragment != NULL) 9080 *is_fragment = B_TRUE; 9081 goto done; 9082 default : 9083 goto done; 9084 } 9085 whereptr += ehdrlen; 9086 } 9087 9088 done: 9089 return (rv); 9090 } 9091 9092 /* 9093 * ip_source_routed_v6: 9094 * This function is called by redirect code in ip_rput_data_v6 to 9095 * know whether this packet is source routed through this node i.e 9096 * whether this node (router) is part of the journey. This 9097 * function is called under two cases : 9098 * 9099 * case 1 : Routing header was processed by this node and 9100 * ip_process_rthdr replaced ip6_dst with the next hop 9101 * and we are forwarding the packet to the next hop. 9102 * 9103 * case 2 : Routing header was not processed by this node and we 9104 * are just forwarding the packet. 9105 * 9106 * For case (1) we don't want to send redirects. For case(2) we 9107 * want to send redirects. 9108 */ 9109 static boolean_t 9110 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 9111 { 9112 uint8_t nexthdr; 9113 in6_addr_t *addrptr; 9114 ip6_rthdr0_t *rthdr; 9115 uint8_t numaddr; 9116 ip6_hbh_t *hbhhdr; 9117 uint_t ehdrlen; 9118 uint8_t *byteptr; 9119 9120 ip2dbg(("ip_source_routed_v6\n")); 9121 nexthdr = ip6h->ip6_nxt; 9122 ehdrlen = IPV6_HDR_LEN; 9123 9124 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9125 while (nexthdr == IPPROTO_HOPOPTS || 9126 nexthdr == IPPROTO_DSTOPTS) { 9127 byteptr = (uint8_t *)ip6h + ehdrlen; 9128 /* 9129 * Check if we have already processed 9130 * packets or we are just a forwarding 9131 * router which only pulled up msgs up 9132 * to IPV6HDR and one HBH ext header 9133 */ 9134 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9135 ip2dbg(("ip_source_routed_v6: Extension" 9136 " headers not processed\n")); 9137 return (B_FALSE); 9138 } 9139 hbhhdr = (ip6_hbh_t *)byteptr; 9140 nexthdr = hbhhdr->ip6h_nxt; 9141 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9142 } 9143 switch (nexthdr) { 9144 case IPPROTO_ROUTING: 9145 byteptr = (uint8_t *)ip6h + ehdrlen; 9146 /* 9147 * If for some reason, we haven't pulled up 9148 * the routing hdr data mblk, then we must 9149 * not have processed it at all. So for sure 9150 * we are not part of the source routed journey. 9151 */ 9152 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9153 ip2dbg(("ip_source_routed_v6: Routing" 9154 " header not processed\n")); 9155 return (B_FALSE); 9156 } 9157 rthdr = (ip6_rthdr0_t *)byteptr; 9158 /* 9159 * Either we are an intermediate router or the 9160 * last hop before destination and we have 9161 * already processed the routing header. 9162 * If segment_left is greater than or equal to zero, 9163 * then we must be the (numaddr - segleft) entry 9164 * of the routing header. Although ip6r0_segleft 9165 * is a unit8_t variable, we still check for zero 9166 * or greater value, if in case the data type 9167 * is changed someday in future. 9168 */ 9169 if (rthdr->ip6r0_segleft > 0 || 9170 rthdr->ip6r0_segleft == 0) { 9171 ire_t *ire = NULL; 9172 9173 numaddr = rthdr->ip6r0_len / 2; 9174 addrptr = (in6_addr_t *)((char *)rthdr + 9175 sizeof (*rthdr)); 9176 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9177 if (addrptr != NULL) { 9178 ire = ire_ctable_lookup_v6(addrptr, NULL, 9179 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9180 MATCH_IRE_TYPE, 9181 ipst); 9182 if (ire != NULL) { 9183 ire_refrele(ire); 9184 return (B_TRUE); 9185 } 9186 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9187 } 9188 } 9189 /* FALLTHRU */ 9190 default: 9191 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9192 return (B_FALSE); 9193 } 9194 } 9195 9196 /* 9197 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9198 * Assumes that the following set of headers appear in the first 9199 * mblk: 9200 * ip6i_t (if present) CAN also appear as a separate mblk. 9201 * ip6_t 9202 * Any extension headers 9203 * TCP/UDP/SCTP header (if present) 9204 * The routine can handle an ICMPv6 header that is not in the first mblk. 9205 * 9206 * The order to determine the outgoing interface is as follows: 9207 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9208 * 2. If conn_nofailover_ill is set then use that ill. 9209 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9210 * 4. If q is an ill queue and (link local or multicast destination) then 9211 * use that ill. 9212 * 5. If IPV6_BOUND_IF has been set use that ill. 9213 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9214 * look for the best IRE match for the unspecified group to determine 9215 * the ill. 9216 * 7. For unicast: Just do an IRE lookup for the best match. 9217 * 9218 * arg2 is always a queue_t *. 9219 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9220 * the zoneid. 9221 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9222 */ 9223 void 9224 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9225 { 9226 conn_t *connp = NULL; 9227 queue_t *q = (queue_t *)arg2; 9228 ire_t *ire = NULL; 9229 ire_t *sctp_ire = NULL; 9230 ip6_t *ip6h; 9231 in6_addr_t *v6dstp; 9232 ill_t *ill = NULL; 9233 ipif_t *ipif; 9234 ip6i_t *ip6i; 9235 int cksum_request; /* -1 => normal. */ 9236 /* 1 => Skip TCP/UDP/SCTP checksum */ 9237 /* Otherwise contains insert offset for checksum */ 9238 int unspec_src; 9239 boolean_t do_outrequests; /* Increment OutRequests? */ 9240 mib2_ipIfStatsEntry_t *mibptr; 9241 int match_flags = MATCH_IRE_ILL_GROUP; 9242 boolean_t attach_if = B_FALSE; 9243 mblk_t *first_mp; 9244 boolean_t mctl_present; 9245 ipsec_out_t *io; 9246 boolean_t drop_if_delayed = B_FALSE; 9247 boolean_t multirt_need_resolve = B_FALSE; 9248 mblk_t *copy_mp = NULL; 9249 int err = 0; 9250 int ip6i_flags = 0; 9251 zoneid_t zoneid; 9252 ill_t *saved_ill = NULL; 9253 boolean_t conn_lock_held; 9254 boolean_t need_decref = B_FALSE; 9255 ip_stack_t *ipst; 9256 9257 if (q->q_next != NULL) { 9258 ill = (ill_t *)q->q_ptr; 9259 ipst = ill->ill_ipst; 9260 } else { 9261 connp = (conn_t *)arg; 9262 ASSERT(connp != NULL); 9263 ipst = connp->conn_netstack->netstack_ip; 9264 } 9265 9266 /* 9267 * Highest bit in version field is Reachability Confirmation bit 9268 * used by NUD in ip_xmit_v6(). 9269 */ 9270 #ifdef _BIG_ENDIAN 9271 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9272 #else 9273 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9274 #endif 9275 9276 /* 9277 * M_CTL comes from 6 places 9278 * 9279 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9280 * both V4 and V6 datagrams. 9281 * 9282 * 2) AH/ESP sends down M_CTL after doing their job with both 9283 * V4 and V6 datagrams. 9284 * 9285 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9286 * attached. 9287 * 9288 * 4) Notifications from an external resolver (for XRESOLV ifs) 9289 * 9290 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9291 * IPsec hardware acceleration support. 9292 * 9293 * 6) TUN_HELLO. 9294 * 9295 * We need to handle (1)'s IPv6 case and (3) here. For the 9296 * IPv4 case in (1), and (2), IPSEC processing has already 9297 * started. The code in ip_wput() already knows how to handle 9298 * continuing IPSEC processing (for IPv4 and IPv6). All other 9299 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9300 * for handling. 9301 */ 9302 first_mp = mp; 9303 mctl_present = B_FALSE; 9304 io = NULL; 9305 9306 /* Multidata transmit? */ 9307 if (DB_TYPE(mp) == M_MULTIDATA) { 9308 /* 9309 * We should never get here, since all Multidata messages 9310 * originating from tcp should have been directed over to 9311 * tcp_multisend() in the first place. 9312 */ 9313 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9314 freemsg(mp); 9315 return; 9316 } else if (DB_TYPE(mp) == M_CTL) { 9317 uint32_t mctltype = 0; 9318 uint32_t mlen = MBLKL(first_mp); 9319 9320 mp = mp->b_cont; 9321 mctl_present = B_TRUE; 9322 io = (ipsec_out_t *)first_mp->b_rptr; 9323 9324 /* 9325 * Validate this M_CTL message. The only three types of 9326 * M_CTL messages we expect to see in this code path are 9327 * ipsec_out_t or ipsec_in_t structures (allocated as 9328 * ipsec_info_t unions), or ipsec_ctl_t structures. 9329 * The ipsec_out_type and ipsec_in_type overlap in the two 9330 * data structures, and they are either set to IPSEC_OUT 9331 * or IPSEC_IN depending on which data structure it is. 9332 * ipsec_ctl_t is an IPSEC_CTL. 9333 * 9334 * All other M_CTL messages are sent to ip_wput_nondata() 9335 * for handling. 9336 */ 9337 if (mlen >= sizeof (io->ipsec_out_type)) 9338 mctltype = io->ipsec_out_type; 9339 9340 if ((mlen == sizeof (ipsec_ctl_t)) && 9341 (mctltype == IPSEC_CTL)) { 9342 ip_output(arg, first_mp, arg2, caller); 9343 return; 9344 } 9345 9346 if ((mlen < sizeof (ipsec_info_t)) || 9347 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9348 mp == NULL) { 9349 ip_wput_nondata(NULL, q, first_mp, NULL); 9350 return; 9351 } 9352 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9353 if (q->q_next == NULL) { 9354 ip6h = (ip6_t *)mp->b_rptr; 9355 /* 9356 * For a freshly-generated TCP dgram that needs IPV6 9357 * processing, don't call ip_wput immediately. We can 9358 * tell this by the ipsec_out_proc_begin. In-progress 9359 * IPSEC_OUT messages have proc_begin set to TRUE, 9360 * and we want to send all IPSEC_IN messages to 9361 * ip_wput() for IPsec processing or finishing. 9362 */ 9363 if (mctltype == IPSEC_IN || 9364 IPVER(ip6h) != IPV6_VERSION || 9365 io->ipsec_out_proc_begin) { 9366 mibptr = &ipst->ips_ip6_mib; 9367 goto notv6; 9368 } 9369 } 9370 } else if (DB_TYPE(mp) != M_DATA) { 9371 ip_wput_nondata(NULL, q, mp, NULL); 9372 return; 9373 } 9374 9375 ip6h = (ip6_t *)mp->b_rptr; 9376 9377 if (IPVER(ip6h) != IPV6_VERSION) { 9378 mibptr = &ipst->ips_ip6_mib; 9379 goto notv6; 9380 } 9381 9382 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9383 (connp == NULL || !connp->conn_ulp_labeled)) { 9384 if (connp != NULL) { 9385 ASSERT(CONN_CRED(connp) != NULL); 9386 err = tsol_check_label_v6(BEST_CRED(mp, connp), 9387 &mp, connp->conn_mac_exempt, ipst); 9388 } else if (DB_CRED(mp) != NULL) { 9389 err = tsol_check_label_v6(DB_CRED(mp), 9390 &mp, B_FALSE, ipst); 9391 } 9392 if (mctl_present) 9393 first_mp->b_cont = mp; 9394 else 9395 first_mp = mp; 9396 if (err != 0) { 9397 DTRACE_PROBE3( 9398 tsol_ip_log_drop_checklabel_ip6, char *, 9399 "conn(1), failed to check/update mp(2)", 9400 conn_t, connp, mblk_t, mp); 9401 freemsg(first_mp); 9402 return; 9403 } 9404 ip6h = (ip6_t *)mp->b_rptr; 9405 } 9406 if (q->q_next != NULL) { 9407 /* 9408 * We don't know if this ill will be used for IPv6 9409 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9410 * ipif_set_values() sets the ill_isv6 flag to true if 9411 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9412 * just drop the packet. 9413 */ 9414 if (!ill->ill_isv6) { 9415 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9416 "ILLF_IPV6 was set\n")); 9417 freemsg(first_mp); 9418 return; 9419 } 9420 /* For uniformity do a refhold */ 9421 mutex_enter(&ill->ill_lock); 9422 if (!ILL_CAN_LOOKUP(ill)) { 9423 mutex_exit(&ill->ill_lock); 9424 freemsg(first_mp); 9425 return; 9426 } 9427 ill_refhold_locked(ill); 9428 mutex_exit(&ill->ill_lock); 9429 mibptr = ill->ill_ip_mib; 9430 9431 ASSERT(mibptr != NULL); 9432 unspec_src = 0; 9433 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9434 do_outrequests = B_FALSE; 9435 zoneid = (zoneid_t)(uintptr_t)arg; 9436 } else { 9437 ASSERT(connp != NULL); 9438 zoneid = connp->conn_zoneid; 9439 9440 /* is queue flow controlled? */ 9441 if ((q->q_first || connp->conn_draining) && 9442 (caller == IP_WPUT)) { 9443 /* 9444 * 1) TCP sends down M_CTL for detached connections. 9445 * 2) AH/ESP sends down M_CTL. 9446 * 9447 * We don't flow control either of the above. Only 9448 * UDP and others are flow controlled for which we 9449 * can't have a M_CTL. 9450 */ 9451 ASSERT(first_mp == mp); 9452 (void) putq(q, mp); 9453 return; 9454 } 9455 mibptr = &ipst->ips_ip6_mib; 9456 unspec_src = connp->conn_unspec_src; 9457 do_outrequests = B_TRUE; 9458 if (mp->b_flag & MSGHASREF) { 9459 mp->b_flag &= ~MSGHASREF; 9460 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9461 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9462 need_decref = B_TRUE; 9463 } 9464 9465 /* 9466 * If there is a policy, try to attach an ipsec_out in 9467 * the front. At the end, first_mp either points to a 9468 * M_DATA message or IPSEC_OUT message linked to a 9469 * M_DATA message. We have to do it now as we might 9470 * lose the "conn" if we go through ip_newroute. 9471 */ 9472 if (!mctl_present && 9473 (connp->conn_out_enforce_policy || 9474 connp->conn_latch != NULL)) { 9475 ASSERT(first_mp == mp); 9476 /* XXX Any better way to get the protocol fast ? */ 9477 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9478 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9479 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9480 if (need_decref) 9481 CONN_DEC_REF(connp); 9482 return; 9483 } else { 9484 ASSERT(mp->b_datap->db_type == M_CTL); 9485 first_mp = mp; 9486 mp = mp->b_cont; 9487 mctl_present = B_TRUE; 9488 io = (ipsec_out_t *)first_mp->b_rptr; 9489 } 9490 } 9491 } 9492 9493 /* check for alignment and full IPv6 header */ 9494 if (!OK_32PTR((uchar_t *)ip6h) || 9495 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9496 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9497 if (do_outrequests) 9498 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9499 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9500 freemsg(first_mp); 9501 if (ill != NULL) 9502 ill_refrele(ill); 9503 if (need_decref) 9504 CONN_DEC_REF(connp); 9505 return; 9506 } 9507 v6dstp = &ip6h->ip6_dst; 9508 cksum_request = -1; 9509 ip6i = NULL; 9510 9511 /* 9512 * Once neighbor discovery has completed, ndp_process() will provide 9513 * locally generated packets for which processing can be reattempted. 9514 * In these cases, connp is NULL and the original zone is part of a 9515 * prepended ipsec_out_t. 9516 */ 9517 if (io != NULL) { 9518 /* 9519 * When coming from icmp_input_v6, the zoneid might not match 9520 * for the loopback case, because inside icmp_input_v6 the 9521 * queue_t is a conn queue from the sending side. 9522 */ 9523 zoneid = io->ipsec_out_zoneid; 9524 ASSERT(zoneid != ALL_ZONES); 9525 } 9526 9527 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9528 /* 9529 * This is an ip6i_t header followed by an ip6_hdr. 9530 * Check which fields are set. 9531 * 9532 * When the packet comes from a transport we should have 9533 * all needed headers in the first mblk. However, when 9534 * going through ip_newroute*_v6 the ip6i might be in 9535 * a separate mblk when we return here. In that case 9536 * we pullup everything to ensure that extension and transport 9537 * headers "stay" in the first mblk. 9538 */ 9539 ip6i = (ip6i_t *)ip6h; 9540 ip6i_flags = ip6i->ip6i_flags; 9541 9542 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9543 ((mp->b_wptr - (uchar_t *)ip6i) >= 9544 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9545 9546 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9547 if (!pullupmsg(mp, -1)) { 9548 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9549 if (do_outrequests) { 9550 BUMP_MIB(mibptr, 9551 ipIfStatsHCOutRequests); 9552 } 9553 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9554 freemsg(first_mp); 9555 if (ill != NULL) 9556 ill_refrele(ill); 9557 if (need_decref) 9558 CONN_DEC_REF(connp); 9559 return; 9560 } 9561 ip6h = (ip6_t *)mp->b_rptr; 9562 v6dstp = &ip6h->ip6_dst; 9563 ip6i = (ip6i_t *)ip6h; 9564 } 9565 ip6h = (ip6_t *)&ip6i[1]; 9566 9567 /* 9568 * Advance rptr past the ip6i_t to get ready for 9569 * transmitting the packet. However, if the packet gets 9570 * passed to ip_newroute*_v6 then rptr is moved back so 9571 * that the ip6i_t header can be inspected when the 9572 * packet comes back here after passing through 9573 * ire_add_then_send. 9574 */ 9575 mp->b_rptr = (uchar_t *)ip6h; 9576 9577 /* 9578 * IP6I_ATTACH_IF is set in this function when we had a 9579 * conn and it was either bound to the IPFF_NOFAILOVER address 9580 * or IPV6_BOUND_PIF was set. These options override other 9581 * options that set the ifindex. We come here with 9582 * IP6I_ATTACH_IF set when we can't find the ire and 9583 * ip_newroute_v6 is feeding the packet for second time. 9584 */ 9585 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9586 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9587 ASSERT(ip6i->ip6i_ifindex != 0); 9588 if (ill != NULL) 9589 ill_refrele(ill); 9590 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9591 NULL, NULL, NULL, NULL, ipst); 9592 if (ill == NULL) { 9593 if (do_outrequests) { 9594 BUMP_MIB(mibptr, 9595 ipIfStatsHCOutRequests); 9596 } 9597 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9598 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9599 ip6i->ip6i_ifindex)); 9600 if (need_decref) 9601 CONN_DEC_REF(connp); 9602 freemsg(first_mp); 9603 return; 9604 } 9605 mibptr = ill->ill_ip_mib; 9606 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9607 /* 9608 * Preserve the index so that when we return 9609 * from IPSEC processing, we know where to 9610 * send the packet. 9611 */ 9612 if (mctl_present) { 9613 ASSERT(io != NULL); 9614 io->ipsec_out_ill_index = 9615 ip6i->ip6i_ifindex; 9616 } 9617 } 9618 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9619 /* 9620 * This is a multipathing probe packet that has 9621 * been delayed in ND resolution. Drop the 9622 * packet for the reasons mentioned in 9623 * nce_queue_mp() 9624 */ 9625 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9626 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9627 freemsg(first_mp); 9628 ill_refrele(ill); 9629 if (need_decref) 9630 CONN_DEC_REF(connp); 9631 return; 9632 } 9633 } 9634 } 9635 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9636 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9637 9638 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9639 if (secpolicy_net_rawaccess(cr) != 0) { 9640 /* 9641 * Use IPCL_ZONEID to honor SO_ALLZONES. 9642 */ 9643 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9644 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9645 NULL, connp != NULL ? 9646 IPCL_ZONEID(connp) : zoneid, NULL, 9647 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9648 if (ire == NULL) { 9649 if (do_outrequests) 9650 BUMP_MIB(mibptr, 9651 ipIfStatsHCOutRequests); 9652 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9653 ip1dbg(("ip_wput_v6: bad source " 9654 "addr\n")); 9655 freemsg(first_mp); 9656 if (ill != NULL) 9657 ill_refrele(ill); 9658 if (need_decref) 9659 CONN_DEC_REF(connp); 9660 return; 9661 } 9662 ire_refrele(ire); 9663 } 9664 /* No need to verify again when using ip_newroute */ 9665 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9666 } 9667 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9668 /* 9669 * Make sure they match since ip_newroute*_v6 etc might 9670 * (unknown to them) inspect ip6i_nexthop when 9671 * they think they access ip6_dst. 9672 */ 9673 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9674 } 9675 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9676 cksum_request = 1; 9677 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9678 cksum_request = ip6i->ip6i_checksum_off; 9679 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9680 unspec_src = 1; 9681 9682 if (do_outrequests && ill != NULL) { 9683 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9684 do_outrequests = B_FALSE; 9685 } 9686 /* 9687 * Store ip6i_t info that we need after we come back 9688 * from IPSEC processing. 9689 */ 9690 if (mctl_present) { 9691 ASSERT(io != NULL); 9692 io->ipsec_out_unspec_src = unspec_src; 9693 } 9694 } 9695 if (connp != NULL && connp->conn_dontroute) 9696 ip6h->ip6_hops = 1; 9697 9698 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9699 goto ipv6multicast; 9700 9701 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9702 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9703 ill_t *conn_outgoing_pill; 9704 9705 conn_outgoing_pill = conn_get_held_ill(connp, 9706 &connp->conn_outgoing_pill, &err); 9707 if (err == ILL_LOOKUP_FAILED) { 9708 if (ill != NULL) 9709 ill_refrele(ill); 9710 if (need_decref) 9711 CONN_DEC_REF(connp); 9712 freemsg(first_mp); 9713 return; 9714 } 9715 if (conn_outgoing_pill != NULL) { 9716 if (ill != NULL) 9717 ill_refrele(ill); 9718 ill = conn_outgoing_pill; 9719 attach_if = B_TRUE; 9720 match_flags = MATCH_IRE_ILL; 9721 mibptr = ill->ill_ip_mib; 9722 9723 /* 9724 * Check if we need an ire that will not be 9725 * looked up by anybody else i.e. HIDDEN. 9726 */ 9727 if (ill_is_probeonly(ill)) 9728 match_flags |= MATCH_IRE_MARK_HIDDEN; 9729 goto send_from_ill; 9730 } 9731 } 9732 9733 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9734 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9735 ill_t *conn_nofailover_ill; 9736 9737 conn_nofailover_ill = conn_get_held_ill(connp, 9738 &connp->conn_nofailover_ill, &err); 9739 if (err == ILL_LOOKUP_FAILED) { 9740 if (ill != NULL) 9741 ill_refrele(ill); 9742 if (need_decref) 9743 CONN_DEC_REF(connp); 9744 freemsg(first_mp); 9745 return; 9746 } 9747 if (conn_nofailover_ill != NULL) { 9748 if (ill != NULL) 9749 ill_refrele(ill); 9750 ill = conn_nofailover_ill; 9751 attach_if = B_TRUE; 9752 /* 9753 * Assumes that ipc_nofailover_ill is used only for 9754 * multipathing probe packets. These packets are better 9755 * dropped, if they are delayed in ND resolution, for 9756 * the reasons described in nce_queue_mp(). 9757 * IP6I_DROP_IFDELAYED will be set later on in this 9758 * function for this packet. 9759 */ 9760 drop_if_delayed = B_TRUE; 9761 match_flags = MATCH_IRE_ILL; 9762 mibptr = ill->ill_ip_mib; 9763 9764 /* 9765 * Check if we need an ire that will not be 9766 * looked up by anybody else i.e. HIDDEN. 9767 */ 9768 if (ill_is_probeonly(ill)) 9769 match_flags |= MATCH_IRE_MARK_HIDDEN; 9770 goto send_from_ill; 9771 } 9772 } 9773 9774 /* 9775 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9776 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9777 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9778 */ 9779 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9780 ASSERT(ip6i->ip6i_ifindex != 0); 9781 attach_if = B_TRUE; 9782 ASSERT(ill != NULL); 9783 match_flags = MATCH_IRE_ILL; 9784 9785 /* 9786 * Check if we need an ire that will not be 9787 * looked up by anybody else i.e. HIDDEN. 9788 */ 9789 if (ill_is_probeonly(ill)) 9790 match_flags |= MATCH_IRE_MARK_HIDDEN; 9791 goto send_from_ill; 9792 } 9793 9794 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9795 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9796 ASSERT(ill != NULL); 9797 goto send_from_ill; 9798 } 9799 9800 /* 9801 * 4. If q is an ill queue and (link local or multicast destination) 9802 * then use that ill. 9803 */ 9804 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9805 goto send_from_ill; 9806 } 9807 9808 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9809 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9810 ill_t *conn_outgoing_ill; 9811 9812 conn_outgoing_ill = conn_get_held_ill(connp, 9813 &connp->conn_outgoing_ill, &err); 9814 if (err == ILL_LOOKUP_FAILED) { 9815 if (ill != NULL) 9816 ill_refrele(ill); 9817 if (need_decref) 9818 CONN_DEC_REF(connp); 9819 freemsg(first_mp); 9820 return; 9821 } 9822 if (ill != NULL) 9823 ill_refrele(ill); 9824 ill = conn_outgoing_ill; 9825 mibptr = ill->ill_ip_mib; 9826 goto send_from_ill; 9827 } 9828 9829 /* 9830 * 6. For unicast: Just do an IRE lookup for the best match. 9831 * If we get here for a link-local address it is rather random 9832 * what interface we pick on a multihomed host. 9833 * *If* there is an IRE_CACHE (and the link-local address 9834 * isn't duplicated on multi links) this will find the IRE_CACHE. 9835 * Otherwise it will use one of the matching IRE_INTERFACE routes 9836 * for the link-local prefix. Hence, applications 9837 * *should* be encouraged to specify an outgoing interface when sending 9838 * to a link local address. 9839 */ 9840 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9841 !connp->conn_fully_bound)) { 9842 /* 9843 * We cache IRE_CACHEs to avoid lookups. We don't do 9844 * this for the tcp global queue and listen end point 9845 * as it does not really have a real destination to 9846 * talk to. 9847 */ 9848 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp), 9849 ipst); 9850 } else { 9851 /* 9852 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9853 * grab a lock here to check for CONDEMNED as it is okay 9854 * to send a packet or two with the IRE_CACHE that is going 9855 * away. 9856 */ 9857 mutex_enter(&connp->conn_lock); 9858 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9859 if (ire != NULL && 9860 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9861 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9862 9863 IRE_REFHOLD(ire); 9864 mutex_exit(&connp->conn_lock); 9865 9866 } else { 9867 boolean_t cached = B_FALSE; 9868 9869 connp->conn_ire_cache = NULL; 9870 mutex_exit(&connp->conn_lock); 9871 /* Release the old ire */ 9872 if (ire != NULL && sctp_ire == NULL) 9873 IRE_REFRELE_NOTR(ire); 9874 9875 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9876 MBLK_GETLABEL(mp), ipst); 9877 if (ire != NULL) { 9878 IRE_REFHOLD_NOTR(ire); 9879 9880 mutex_enter(&connp->conn_lock); 9881 if (CONN_CACHE_IRE(connp) && 9882 (connp->conn_ire_cache == NULL)) { 9883 rw_enter(&ire->ire_bucket->irb_lock, 9884 RW_READER); 9885 if (!(ire->ire_marks & 9886 IRE_MARK_CONDEMNED)) { 9887 connp->conn_ire_cache = ire; 9888 cached = B_TRUE; 9889 } 9890 rw_exit(&ire->ire_bucket->irb_lock); 9891 } 9892 mutex_exit(&connp->conn_lock); 9893 9894 /* 9895 * We can continue to use the ire but since it 9896 * was not cached, we should drop the extra 9897 * reference. 9898 */ 9899 if (!cached) 9900 IRE_REFRELE_NOTR(ire); 9901 } 9902 } 9903 } 9904 9905 if (ire != NULL) { 9906 if (do_outrequests) { 9907 /* Handle IRE_LOCAL's that might appear here */ 9908 if (ire->ire_type == IRE_CACHE) { 9909 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9910 ill_ip_mib; 9911 } else { 9912 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9913 } 9914 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9915 } 9916 ASSERT(!attach_if); 9917 9918 /* 9919 * Check if the ire has the RTF_MULTIRT flag, inherited 9920 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9921 */ 9922 if (ire->ire_flags & RTF_MULTIRT) { 9923 /* 9924 * Force hop limit of multirouted packets if required. 9925 * The hop limit of such packets is bounded by the 9926 * ip_multirt_ttl ndd variable. 9927 * NDP packets must have a hop limit of 255; don't 9928 * change the hop limit in that case. 9929 */ 9930 if ((ipst->ips_ip_multirt_ttl > 0) && 9931 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9932 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9933 if (ip_debug > 3) { 9934 ip2dbg(("ip_wput_v6: forcing multirt " 9935 "hop limit to %d (was %d) ", 9936 ipst->ips_ip_multirt_ttl, 9937 ip6h->ip6_hops)); 9938 pr_addr_dbg("v6dst %s\n", AF_INET6, 9939 &ire->ire_addr_v6); 9940 } 9941 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9942 } 9943 9944 /* 9945 * We look at this point if there are pending 9946 * unresolved routes. ire_multirt_need_resolve_v6() 9947 * checks in O(n) that all IRE_OFFSUBNET ire 9948 * entries for the packet's destination and 9949 * flagged RTF_MULTIRT are currently resolved. 9950 * If some remain unresolved, we do a copy 9951 * of the current message. It will be used 9952 * to initiate additional route resolutions. 9953 */ 9954 multirt_need_resolve = 9955 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9956 MBLK_GETLABEL(first_mp), ipst); 9957 ip2dbg(("ip_wput_v6: ire %p, " 9958 "multirt_need_resolve %d, first_mp %p\n", 9959 (void *)ire, multirt_need_resolve, 9960 (void *)first_mp)); 9961 if (multirt_need_resolve) { 9962 copy_mp = copymsg(first_mp); 9963 if (copy_mp != NULL) { 9964 MULTIRT_DEBUG_TAG(copy_mp); 9965 } 9966 } 9967 } 9968 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9969 connp, caller, 0, ip6i_flags, zoneid); 9970 if (need_decref) { 9971 CONN_DEC_REF(connp); 9972 connp = NULL; 9973 } 9974 IRE_REFRELE(ire); 9975 9976 /* 9977 * Try to resolve another multiroute if 9978 * ire_multirt_need_resolve_v6() deemed it necessary. 9979 * copy_mp will be consumed (sent or freed) by 9980 * ip_newroute_v6(). 9981 */ 9982 if (copy_mp != NULL) { 9983 if (mctl_present) { 9984 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9985 } else { 9986 ip6h = (ip6_t *)copy_mp->b_rptr; 9987 } 9988 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9989 &ip6h->ip6_src, NULL, zoneid, ipst); 9990 } 9991 if (ill != NULL) 9992 ill_refrele(ill); 9993 return; 9994 } 9995 9996 /* 9997 * No full IRE for this destination. Send it to 9998 * ip_newroute_v6 to see if anything else matches. 9999 * Mark this packet as having originated on this 10000 * machine. 10001 * Update rptr if there was an ip6i_t header. 10002 */ 10003 mp->b_prev = NULL; 10004 mp->b_next = NULL; 10005 if (ip6i != NULL) 10006 mp->b_rptr -= sizeof (ip6i_t); 10007 10008 if (unspec_src) { 10009 if (ip6i == NULL) { 10010 /* 10011 * Add ip6i_t header to carry unspec_src 10012 * until the packet comes back in ip_wput_v6. 10013 */ 10014 mp = ip_add_info_v6(mp, NULL, v6dstp); 10015 if (mp == NULL) { 10016 if (do_outrequests) 10017 BUMP_MIB(mibptr, 10018 ipIfStatsHCOutRequests); 10019 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10020 if (mctl_present) 10021 freeb(first_mp); 10022 if (ill != NULL) 10023 ill_refrele(ill); 10024 if (need_decref) 10025 CONN_DEC_REF(connp); 10026 return; 10027 } 10028 ip6i = (ip6i_t *)mp->b_rptr; 10029 10030 if (mctl_present) { 10031 ASSERT(first_mp != mp); 10032 first_mp->b_cont = mp; 10033 } else { 10034 first_mp = mp; 10035 } 10036 10037 if ((mp->b_wptr - (uchar_t *)ip6i) == 10038 sizeof (ip6i_t)) { 10039 /* 10040 * ndp_resolver called from ip_newroute_v6 10041 * expects pulled up message. 10042 */ 10043 if (!pullupmsg(mp, -1)) { 10044 ip1dbg(("ip_wput_v6: pullupmsg" 10045 " failed\n")); 10046 if (do_outrequests) { 10047 BUMP_MIB(mibptr, 10048 ipIfStatsHCOutRequests); 10049 } 10050 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10051 freemsg(first_mp); 10052 if (ill != NULL) 10053 ill_refrele(ill); 10054 if (need_decref) 10055 CONN_DEC_REF(connp); 10056 return; 10057 } 10058 ip6i = (ip6i_t *)mp->b_rptr; 10059 } 10060 ip6h = (ip6_t *)&ip6i[1]; 10061 v6dstp = &ip6h->ip6_dst; 10062 } 10063 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10064 if (mctl_present) { 10065 ASSERT(io != NULL); 10066 io->ipsec_out_unspec_src = unspec_src; 10067 } 10068 } 10069 if (do_outrequests) 10070 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10071 if (need_decref) 10072 CONN_DEC_REF(connp); 10073 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 10074 if (ill != NULL) 10075 ill_refrele(ill); 10076 return; 10077 10078 10079 /* 10080 * Handle multicast packets with or without an conn. 10081 * Assumes that the transports set ip6_hops taking 10082 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10083 * into account. 10084 */ 10085 ipv6multicast: 10086 ip2dbg(("ip_wput_v6: multicast\n")); 10087 10088 /* 10089 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10090 * 2. If conn_nofailover_ill is set then use that ill. 10091 * 10092 * Hold the conn_lock till we refhold the ill of interest that is 10093 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10094 * while holding any locks, postpone the refrele until after the 10095 * conn_lock is dropped. 10096 */ 10097 if (connp != NULL) { 10098 mutex_enter(&connp->conn_lock); 10099 conn_lock_held = B_TRUE; 10100 } else { 10101 conn_lock_held = B_FALSE; 10102 } 10103 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10104 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10105 if (err == ILL_LOOKUP_FAILED) { 10106 ip1dbg(("ip_output_v6: multicast" 10107 " conn_outgoing_pill no ipif\n")); 10108 multicast_discard: 10109 ASSERT(saved_ill == NULL); 10110 if (conn_lock_held) 10111 mutex_exit(&connp->conn_lock); 10112 if (ill != NULL) 10113 ill_refrele(ill); 10114 freemsg(first_mp); 10115 if (do_outrequests) 10116 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10117 if (need_decref) 10118 CONN_DEC_REF(connp); 10119 return; 10120 } 10121 saved_ill = ill; 10122 ill = connp->conn_outgoing_pill; 10123 attach_if = B_TRUE; 10124 match_flags = MATCH_IRE_ILL; 10125 mibptr = ill->ill_ip_mib; 10126 10127 /* 10128 * Check if we need an ire that will not be 10129 * looked up by anybody else i.e. HIDDEN. 10130 */ 10131 if (ill_is_probeonly(ill)) 10132 match_flags |= MATCH_IRE_MARK_HIDDEN; 10133 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10134 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10135 if (err == ILL_LOOKUP_FAILED) { 10136 ip1dbg(("ip_output_v6: multicast" 10137 " conn_nofailover_ill no ipif\n")); 10138 goto multicast_discard; 10139 } 10140 saved_ill = ill; 10141 ill = connp->conn_nofailover_ill; 10142 attach_if = B_TRUE; 10143 match_flags = MATCH_IRE_ILL; 10144 10145 /* 10146 * Check if we need an ire that will not be 10147 * looked up by anybody else i.e. HIDDEN. 10148 */ 10149 if (ill_is_probeonly(ill)) 10150 match_flags |= MATCH_IRE_MARK_HIDDEN; 10151 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10152 /* 10153 * Redo 1. If we did not find an IRE_CACHE the first time, 10154 * we should have an ip6i_t with IP6I_ATTACH_IF if 10155 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10156 * used on this endpoint. 10157 */ 10158 ASSERT(ip6i->ip6i_ifindex != 0); 10159 attach_if = B_TRUE; 10160 ASSERT(ill != NULL); 10161 match_flags = MATCH_IRE_ILL; 10162 10163 /* 10164 * Check if we need an ire that will not be 10165 * looked up by anybody else i.e. HIDDEN. 10166 */ 10167 if (ill_is_probeonly(ill)) 10168 match_flags |= MATCH_IRE_MARK_HIDDEN; 10169 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10170 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10171 10172 ASSERT(ill != NULL); 10173 } else if (ill != NULL) { 10174 /* 10175 * 4. If q is an ill queue and (link local or multicast 10176 * destination) then use that ill. 10177 * We don't need the ipif initialization here. 10178 * This useless assert below is just to prevent lint from 10179 * reporting a null body if statement. 10180 */ 10181 ASSERT(ill != NULL); 10182 } else if (connp != NULL) { 10183 /* 10184 * 5. If IPV6_BOUND_IF has been set use that ill. 10185 * 10186 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10187 * Otherwise look for the best IRE match for the unspecified 10188 * group to determine the ill. 10189 * 10190 * conn_multicast_ill is used for only IPv6 packets. 10191 * conn_multicast_ipif is used for only IPv4 packets. 10192 * Thus a PF_INET6 socket send both IPv4 and IPv6 10193 * multicast packets using different IP*_MULTICAST_IF 10194 * interfaces. 10195 */ 10196 if (connp->conn_outgoing_ill != NULL) { 10197 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10198 if (err == ILL_LOOKUP_FAILED) { 10199 ip1dbg(("ip_output_v6: multicast" 10200 " conn_outgoing_ill no ipif\n")); 10201 goto multicast_discard; 10202 } 10203 ill = connp->conn_outgoing_ill; 10204 } else if (connp->conn_multicast_ill != NULL) { 10205 err = ill_check_and_refhold(connp->conn_multicast_ill); 10206 if (err == ILL_LOOKUP_FAILED) { 10207 ip1dbg(("ip_output_v6: multicast" 10208 " conn_multicast_ill no ipif\n")); 10209 goto multicast_discard; 10210 } 10211 ill = connp->conn_multicast_ill; 10212 } else { 10213 mutex_exit(&connp->conn_lock); 10214 conn_lock_held = B_FALSE; 10215 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 10216 if (ipif == NULL) { 10217 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10218 goto multicast_discard; 10219 } 10220 /* 10221 * We have a ref to this ipif, so we can safely 10222 * access ipif_ill. 10223 */ 10224 ill = ipif->ipif_ill; 10225 mutex_enter(&ill->ill_lock); 10226 if (!ILL_CAN_LOOKUP(ill)) { 10227 mutex_exit(&ill->ill_lock); 10228 ipif_refrele(ipif); 10229 ill = NULL; 10230 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10231 goto multicast_discard; 10232 } 10233 ill_refhold_locked(ill); 10234 mutex_exit(&ill->ill_lock); 10235 ipif_refrele(ipif); 10236 /* 10237 * Save binding until IPV6_MULTICAST_IF 10238 * changes it 10239 */ 10240 mutex_enter(&connp->conn_lock); 10241 connp->conn_multicast_ill = ill; 10242 connp->conn_orig_multicast_ifindex = 10243 ill->ill_phyint->phyint_ifindex; 10244 mutex_exit(&connp->conn_lock); 10245 } 10246 } 10247 if (conn_lock_held) 10248 mutex_exit(&connp->conn_lock); 10249 10250 if (saved_ill != NULL) 10251 ill_refrele(saved_ill); 10252 10253 ASSERT(ill != NULL); 10254 /* 10255 * For multicast loopback interfaces replace the multicast address 10256 * with a unicast address for the ire lookup. 10257 */ 10258 if (IS_LOOPBACK(ill)) 10259 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10260 10261 mibptr = ill->ill_ip_mib; 10262 if (do_outrequests) { 10263 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10264 do_outrequests = B_FALSE; 10265 } 10266 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10267 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10268 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10269 10270 /* 10271 * As we may lose the conn by the time we reach ip_wput_ire_v6 10272 * we copy conn_multicast_loop and conn_dontroute on to an 10273 * ipsec_out. In case if this datagram goes out secure, 10274 * we need the ill_index also. Copy that also into the 10275 * ipsec_out. 10276 */ 10277 if (mctl_present) { 10278 io = (ipsec_out_t *)first_mp->b_rptr; 10279 ASSERT(first_mp->b_datap->db_type == M_CTL); 10280 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10281 } else { 10282 ASSERT(mp == first_mp); 10283 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 10284 NULL) { 10285 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10286 freemsg(mp); 10287 if (ill != NULL) 10288 ill_refrele(ill); 10289 if (need_decref) 10290 CONN_DEC_REF(connp); 10291 return; 10292 } 10293 io = (ipsec_out_t *)first_mp->b_rptr; 10294 /* This is not a secure packet */ 10295 io->ipsec_out_secure = B_FALSE; 10296 io->ipsec_out_use_global_policy = B_TRUE; 10297 io->ipsec_out_zoneid = 10298 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10299 first_mp->b_cont = mp; 10300 mctl_present = B_TRUE; 10301 } 10302 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10303 io->ipsec_out_unspec_src = unspec_src; 10304 if (connp != NULL) 10305 io->ipsec_out_dontroute = connp->conn_dontroute; 10306 10307 send_from_ill: 10308 ASSERT(ill != NULL); 10309 ASSERT(mibptr == ill->ill_ip_mib); 10310 if (do_outrequests) { 10311 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10312 do_outrequests = B_FALSE; 10313 } 10314 10315 if (io != NULL) 10316 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10317 10318 /* 10319 * When a specific ill is specified (using IPV6_PKTINFO, 10320 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10321 * on routing entries (ftable and ctable) that have a matching 10322 * ire->ire_ipif->ipif_ill. Thus this can only be used 10323 * for destinations that are on-link for the specific ill 10324 * and that can appear on multiple links. Thus it is useful 10325 * for multicast destinations, link-local destinations, and 10326 * at some point perhaps for site-local destinations (if the 10327 * node sits at a site boundary). 10328 * We create the cache entries in the regular ctable since 10329 * it can not "confuse" things for other destinations. 10330 * table. 10331 * 10332 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10333 * It is used only when ire_cache_lookup is used above. 10334 */ 10335 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10336 zoneid, MBLK_GETLABEL(mp), match_flags, ipst); 10337 if (ire != NULL) { 10338 /* 10339 * Check if the ire has the RTF_MULTIRT flag, inherited 10340 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10341 */ 10342 if (ire->ire_flags & RTF_MULTIRT) { 10343 /* 10344 * Force hop limit of multirouted packets if required. 10345 * The hop limit of such packets is bounded by the 10346 * ip_multirt_ttl ndd variable. 10347 * NDP packets must have a hop limit of 255; don't 10348 * change the hop limit in that case. 10349 */ 10350 if ((ipst->ips_ip_multirt_ttl > 0) && 10351 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10352 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10353 if (ip_debug > 3) { 10354 ip2dbg(("ip_wput_v6: forcing multirt " 10355 "hop limit to %d (was %d) ", 10356 ipst->ips_ip_multirt_ttl, 10357 ip6h->ip6_hops)); 10358 pr_addr_dbg("v6dst %s\n", AF_INET6, 10359 &ire->ire_addr_v6); 10360 } 10361 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10362 } 10363 10364 /* 10365 * We look at this point if there are pending 10366 * unresolved routes. ire_multirt_need_resolve_v6() 10367 * checks in O(n) that all IRE_OFFSUBNET ire 10368 * entries for the packet's destination and 10369 * flagged RTF_MULTIRT are currently resolved. 10370 * If some remain unresolved, we make a copy 10371 * of the current message. It will be used 10372 * to initiate additional route resolutions. 10373 */ 10374 multirt_need_resolve = 10375 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10376 MBLK_GETLABEL(first_mp), ipst); 10377 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10378 "multirt_need_resolve %d, first_mp %p\n", 10379 (void *)ire, multirt_need_resolve, 10380 (void *)first_mp)); 10381 if (multirt_need_resolve) { 10382 copy_mp = copymsg(first_mp); 10383 if (copy_mp != NULL) { 10384 MULTIRT_DEBUG_TAG(copy_mp); 10385 } 10386 } 10387 } 10388 10389 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10390 ill->ill_name, (void *)ire, 10391 ill->ill_phyint->phyint_ifindex)); 10392 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10393 connp, caller, 10394 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10395 ip6i_flags, zoneid); 10396 ire_refrele(ire); 10397 if (need_decref) { 10398 CONN_DEC_REF(connp); 10399 connp = NULL; 10400 } 10401 10402 /* 10403 * Try to resolve another multiroute if 10404 * ire_multirt_need_resolve_v6() deemed it necessary. 10405 * copy_mp will be consumed (sent or freed) by 10406 * ip_newroute_[ipif_]v6(). 10407 */ 10408 if (copy_mp != NULL) { 10409 if (mctl_present) { 10410 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10411 } else { 10412 ip6h = (ip6_t *)copy_mp->b_rptr; 10413 } 10414 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10415 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10416 zoneid, ipst); 10417 if (ipif == NULL) { 10418 ip1dbg(("ip_wput_v6: No ipif for " 10419 "multicast\n")); 10420 MULTIRT_DEBUG_UNTAG(copy_mp); 10421 freemsg(copy_mp); 10422 return; 10423 } 10424 ip_newroute_ipif_v6(q, copy_mp, ipif, 10425 ip6h->ip6_dst, unspec_src, zoneid); 10426 ipif_refrele(ipif); 10427 } else { 10428 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10429 &ip6h->ip6_src, ill, zoneid, ipst); 10430 } 10431 } 10432 ill_refrele(ill); 10433 return; 10434 } 10435 if (need_decref) { 10436 CONN_DEC_REF(connp); 10437 connp = NULL; 10438 } 10439 10440 /* Update rptr if there was an ip6i_t header. */ 10441 if (ip6i != NULL) 10442 mp->b_rptr -= sizeof (ip6i_t); 10443 if (unspec_src || attach_if) { 10444 if (ip6i == NULL) { 10445 /* 10446 * Add ip6i_t header to carry unspec_src 10447 * or attach_if until the packet comes back in 10448 * ip_wput_v6. 10449 */ 10450 if (mctl_present) { 10451 first_mp->b_cont = 10452 ip_add_info_v6(mp, NULL, v6dstp); 10453 mp = first_mp->b_cont; 10454 if (mp == NULL) 10455 freeb(first_mp); 10456 } else { 10457 first_mp = mp = ip_add_info_v6(mp, NULL, 10458 v6dstp); 10459 } 10460 if (mp == NULL) { 10461 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10462 ill_refrele(ill); 10463 return; 10464 } 10465 ip6i = (ip6i_t *)mp->b_rptr; 10466 if ((mp->b_wptr - (uchar_t *)ip6i) == 10467 sizeof (ip6i_t)) { 10468 /* 10469 * ndp_resolver called from ip_newroute_v6 10470 * expects a pulled up message. 10471 */ 10472 if (!pullupmsg(mp, -1)) { 10473 ip1dbg(("ip_wput_v6: pullupmsg" 10474 " failed\n")); 10475 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10476 freemsg(first_mp); 10477 return; 10478 } 10479 ip6i = (ip6i_t *)mp->b_rptr; 10480 } 10481 ip6h = (ip6_t *)&ip6i[1]; 10482 v6dstp = &ip6h->ip6_dst; 10483 } 10484 if (unspec_src) 10485 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10486 if (attach_if) { 10487 /* 10488 * Bind to nofailover/BOUND_PIF overrides ifindex. 10489 */ 10490 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10491 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10492 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10493 if (drop_if_delayed) { 10494 /* This is a multipathing probe packet */ 10495 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10496 } 10497 } 10498 if (mctl_present) { 10499 ASSERT(io != NULL); 10500 io->ipsec_out_unspec_src = unspec_src; 10501 } 10502 } 10503 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10504 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10505 unspec_src, zoneid); 10506 } else { 10507 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10508 zoneid, ipst); 10509 } 10510 ill_refrele(ill); 10511 return; 10512 10513 notv6: 10514 /* FIXME?: assume the caller calls the right version of ip_output? */ 10515 if (q->q_next == NULL) { 10516 connp = Q_TO_CONN(q); 10517 10518 /* 10519 * We can change conn_send for all types of conn, even 10520 * though only TCP uses it right now. 10521 * FIXME: sctp could use conn_send but doesn't currently. 10522 */ 10523 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10524 } 10525 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10526 (void) ip_output(arg, first_mp, arg2, caller); 10527 if (ill != NULL) 10528 ill_refrele(ill); 10529 } 10530 10531 /* 10532 * If this is a conn_t queue, then we pass in the conn. This includes the 10533 * zoneid. 10534 * Otherwise, this is a message for an ill_t queue, 10535 * in which case we use the global zoneid since those are all part of 10536 * the global zone. 10537 */ 10538 void 10539 ip_wput_v6(queue_t *q, mblk_t *mp) 10540 { 10541 if (CONN_Q(q)) 10542 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10543 else 10544 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10545 } 10546 10547 static void 10548 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10549 { 10550 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10551 io->ipsec_out_attach_if = B_TRUE; 10552 io->ipsec_out_ill_index = attach_index; 10553 } 10554 10555 /* 10556 * NULL send-to queue - packet is to be delivered locally. 10557 */ 10558 void 10559 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10560 ire_t *ire, int fanout_flags, zoneid_t zoneid) 10561 { 10562 uint32_t ports; 10563 mblk_t *mp = first_mp, *first_mp1; 10564 boolean_t mctl_present; 10565 uint8_t nexthdr; 10566 uint16_t hdr_length; 10567 ipsec_out_t *io; 10568 mib2_ipIfStatsEntry_t *mibptr; 10569 ilm_t *ilm; 10570 uint_t nexthdr_offset; 10571 ip_stack_t *ipst = ill->ill_ipst; 10572 10573 if (DB_TYPE(mp) == M_CTL) { 10574 io = (ipsec_out_t *)mp->b_rptr; 10575 if (!io->ipsec_out_secure) { 10576 mp = mp->b_cont; 10577 freeb(first_mp); 10578 first_mp = mp; 10579 mctl_present = B_FALSE; 10580 } else { 10581 mctl_present = B_TRUE; 10582 mp = first_mp->b_cont; 10583 ipsec_out_to_in(first_mp); 10584 } 10585 } else { 10586 mctl_present = B_FALSE; 10587 } 10588 10589 /* 10590 * Remove reachability confirmation bit from version field 10591 * before passing the packet on to any firewall hooks or 10592 * looping back the packet. 10593 */ 10594 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10595 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10596 10597 DTRACE_PROBE4(ip6__loopback__in__start, 10598 ill_t *, ill, ill_t *, NULL, 10599 ip6_t *, ip6h, mblk_t *, first_mp); 10600 10601 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10602 ipst->ips_ipv6firewall_loopback_in, 10603 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10604 10605 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10606 10607 if (first_mp == NULL) 10608 return; 10609 10610 if (ipst->ips_ipobs_enabled) { 10611 zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; 10612 zoneid_t stackzoneid = netstackid_to_zoneid( 10613 ipst->ips_netstack->netstack_stackid); 10614 10615 szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; 10616 /* 10617 * ::1 is special, as we cannot lookup its zoneid by 10618 * address. For this case, restrict the lookup to the 10619 * source zone. 10620 */ 10621 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) 10622 lookup_zoneid = zoneid; 10623 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 10624 lookup_zoneid); 10625 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, 10626 IPV6_VERSION, 0, ipst); 10627 } 10628 10629 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10630 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10631 int, 1); 10632 10633 nexthdr = ip6h->ip6_nxt; 10634 mibptr = ill->ill_ip_mib; 10635 10636 /* Fastpath */ 10637 switch (nexthdr) { 10638 case IPPROTO_TCP: 10639 case IPPROTO_UDP: 10640 case IPPROTO_ICMPV6: 10641 case IPPROTO_SCTP: 10642 hdr_length = IPV6_HDR_LEN; 10643 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10644 (uchar_t *)ip6h); 10645 break; 10646 default: { 10647 uint8_t *nexthdrp; 10648 10649 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10650 &hdr_length, &nexthdrp)) { 10651 /* Malformed packet */ 10652 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10653 freemsg(first_mp); 10654 return; 10655 } 10656 nexthdr = *nexthdrp; 10657 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10658 break; 10659 } 10660 } 10661 10662 UPDATE_OB_PKT_COUNT(ire); 10663 ire->ire_last_used_time = lbolt; 10664 10665 switch (nexthdr) { 10666 case IPPROTO_TCP: 10667 if (DB_TYPE(mp) == M_DATA) { 10668 /* 10669 * M_DATA mblk, so init mblk (chain) for 10670 * no struio(). 10671 */ 10672 mblk_t *mp1 = mp; 10673 10674 do { 10675 mp1->b_datap->db_struioflag = 0; 10676 } while ((mp1 = mp1->b_cont) != NULL); 10677 } 10678 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10679 TCP_PORTS_OFFSET); 10680 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10681 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10682 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10683 hdr_length, mctl_present, ire->ire_zoneid); 10684 return; 10685 10686 case IPPROTO_UDP: 10687 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10688 UDP_PORTS_OFFSET); 10689 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10690 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10691 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10692 return; 10693 10694 case IPPROTO_SCTP: 10695 { 10696 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10697 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10698 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10699 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10700 return; 10701 } 10702 case IPPROTO_ICMPV6: { 10703 icmp6_t *icmp6; 10704 10705 /* check for full IPv6+ICMPv6 header */ 10706 if ((mp->b_wptr - mp->b_rptr) < 10707 (hdr_length + ICMP6_MINLEN)) { 10708 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10709 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10710 " failed\n")); 10711 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10712 freemsg(first_mp); 10713 return; 10714 } 10715 ip6h = (ip6_t *)mp->b_rptr; 10716 } 10717 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10718 10719 /* Update output mib stats */ 10720 icmp_update_out_mib_v6(ill, icmp6); 10721 10722 /* Check variable for testing applications */ 10723 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10724 freemsg(first_mp); 10725 return; 10726 } 10727 /* 10728 * Assume that there is always at least one conn for 10729 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10730 * where there is no conn. 10731 */ 10732 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10733 !IS_LOOPBACK(ill)) { 10734 /* 10735 * In the multicast case, applications may have 10736 * joined the group from different zones, so we 10737 * need to deliver the packet to each of them. 10738 * Loop through the multicast memberships 10739 * structures (ilm) on the receive ill and send 10740 * a copy of the packet up each matching one. 10741 * However, we don't do this for multicasts sent 10742 * on the loopback interface (PHYI_LOOPBACK flag 10743 * set) as they must stay in the sender's zone. 10744 */ 10745 ILM_WALKER_HOLD(ill); 10746 for (ilm = ill->ill_ilm; ilm != NULL; 10747 ilm = ilm->ilm_next) { 10748 if (ilm->ilm_flags & ILM_DELETED) 10749 continue; 10750 if (!IN6_ARE_ADDR_EQUAL( 10751 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10752 continue; 10753 if ((fanout_flags & 10754 IP_FF_NO_MCAST_LOOP) && 10755 ilm->ilm_zoneid == ire->ire_zoneid) 10756 continue; 10757 if (!ipif_lookup_zoneid(ill, 10758 ilm->ilm_zoneid, IPIF_UP, NULL)) 10759 continue; 10760 10761 first_mp1 = ip_copymsg(first_mp); 10762 if (first_mp1 == NULL) 10763 continue; 10764 icmp_inbound_v6(q, first_mp1, ill, 10765 hdr_length, mctl_present, 10766 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10767 NULL); 10768 } 10769 ILM_WALKER_RELE(ill); 10770 } else { 10771 first_mp1 = ip_copymsg(first_mp); 10772 if (first_mp1 != NULL) 10773 icmp_inbound_v6(q, first_mp1, ill, 10774 hdr_length, mctl_present, 10775 IP6_NO_IPPOLICY, ire->ire_zoneid, 10776 NULL); 10777 } 10778 } 10779 /* FALLTHRU */ 10780 default: { 10781 /* 10782 * Handle protocols with which IPv6 is less intimate. 10783 */ 10784 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10785 10786 /* 10787 * Enable sending ICMP for "Unknown" nexthdr 10788 * case. i.e. where we did not FALLTHRU from 10789 * IPPROTO_ICMPV6 processing case above. 10790 */ 10791 if (nexthdr != IPPROTO_ICMPV6) 10792 fanout_flags |= IP_FF_SEND_ICMP; 10793 /* 10794 * Note: There can be more than one stream bound 10795 * to a particular protocol. When this is the case, 10796 * each one gets a copy of any incoming packets. 10797 */ 10798 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10799 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10800 mctl_present, ire->ire_zoneid); 10801 return; 10802 } 10803 } 10804 } 10805 10806 /* 10807 * Send packet using IRE. 10808 * Checksumming is controlled by cksum_request: 10809 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10810 * 1 => Skip TCP/UDP/SCTP checksum 10811 * Otherwise => checksum_request contains insert offset for checksum 10812 * 10813 * Assumes that the following set of headers appear in the first 10814 * mblk: 10815 * ip6_t 10816 * Any extension headers 10817 * TCP/UDP/SCTP header (if present) 10818 * The routine can handle an ICMPv6 header that is not in the first mblk. 10819 * 10820 * NOTE : This function does not ire_refrele the ire passed in as the 10821 * argument unlike ip_wput_ire where the REFRELE is done. 10822 * Refer to ip_wput_ire for more on this. 10823 */ 10824 static void 10825 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10826 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10827 zoneid_t zoneid) 10828 { 10829 ip6_t *ip6h; 10830 uint8_t nexthdr; 10831 uint16_t hdr_length; 10832 uint_t reachable = 0x0; 10833 ill_t *ill; 10834 mib2_ipIfStatsEntry_t *mibptr; 10835 mblk_t *first_mp; 10836 boolean_t mctl_present; 10837 ipsec_out_t *io; 10838 boolean_t conn_dontroute; /* conn value for multicast */ 10839 boolean_t conn_multicast_loop; /* conn value for multicast */ 10840 boolean_t multicast_forward; /* Should we forward ? */ 10841 int max_frag; 10842 ip_stack_t *ipst = ire->ire_ipst; 10843 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10844 10845 ill = ire_to_ill(ire); 10846 first_mp = mp; 10847 multicast_forward = B_FALSE; 10848 10849 if (mp->b_datap->db_type != M_CTL) { 10850 ip6h = (ip6_t *)first_mp->b_rptr; 10851 } else { 10852 io = (ipsec_out_t *)first_mp->b_rptr; 10853 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10854 /* 10855 * Grab the zone id now because the M_CTL can be discarded by 10856 * ip_wput_ire_parse_ipsec_out() below. 10857 */ 10858 ASSERT(zoneid == io->ipsec_out_zoneid); 10859 ASSERT(zoneid != ALL_ZONES); 10860 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10861 /* 10862 * For the multicast case, ipsec_out carries conn_dontroute and 10863 * conn_multicast_loop as conn may not be available here. We 10864 * need this for multicast loopback and forwarding which is done 10865 * later in the code. 10866 */ 10867 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10868 conn_dontroute = io->ipsec_out_dontroute; 10869 conn_multicast_loop = io->ipsec_out_multicast_loop; 10870 /* 10871 * If conn_dontroute is not set or conn_multicast_loop 10872 * is set, we need to do forwarding/loopback. For 10873 * datagrams from ip_wput_multicast, conn_dontroute is 10874 * set to B_TRUE and conn_multicast_loop is set to 10875 * B_FALSE so that we neither do forwarding nor 10876 * loopback. 10877 */ 10878 if (!conn_dontroute || conn_multicast_loop) 10879 multicast_forward = B_TRUE; 10880 } 10881 } 10882 10883 /* 10884 * If the sender didn't supply the hop limit and there is a default 10885 * unicast hop limit associated with the output interface, we use 10886 * that if the packet is unicast. Interface specific unicast hop 10887 * limits as set via the SIOCSLIFLNKINFO ioctl. 10888 */ 10889 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10890 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10891 ip6h->ip6_hops = ill->ill_max_hops; 10892 } 10893 10894 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10895 ire->ire_zoneid != ALL_ZONES) { 10896 /* 10897 * When a zone sends a packet to another zone, we try to deliver 10898 * the packet under the same conditions as if the destination 10899 * was a real node on the network. To do so, we look for a 10900 * matching route in the forwarding table. 10901 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10902 * ip_newroute_v6() does. 10903 * Note that IRE_LOCAL are special, since they are used 10904 * when the zoneid doesn't match in some cases. This means that 10905 * we need to handle ipha_src differently since ire_src_addr 10906 * belongs to the receiving zone instead of the sending zone. 10907 * When ip_restrict_interzone_loopback is set, then 10908 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10909 * for loopback between zones when the logical "Ethernet" would 10910 * have looped them back. 10911 */ 10912 ire_t *src_ire; 10913 10914 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10915 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10916 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10917 if (src_ire != NULL && 10918 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10919 (!ipst->ips_ip_restrict_interzone_loopback || 10920 ire_local_same_ill_group(ire, src_ire))) { 10921 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10922 !unspec_src) { 10923 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10924 } 10925 ire_refrele(src_ire); 10926 } else { 10927 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10928 if (src_ire != NULL) { 10929 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10930 ire_refrele(src_ire); 10931 freemsg(first_mp); 10932 return; 10933 } 10934 ire_refrele(src_ire); 10935 } 10936 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10937 /* Failed */ 10938 freemsg(first_mp); 10939 return; 10940 } 10941 icmp_unreachable_v6(q, first_mp, 10942 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10943 zoneid, ipst); 10944 return; 10945 } 10946 } 10947 10948 if (mp->b_datap->db_type == M_CTL || 10949 ipss->ipsec_outbound_v6_policy_present) { 10950 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10951 connp, unspec_src, zoneid); 10952 if (mp == NULL) { 10953 return; 10954 } 10955 } 10956 10957 first_mp = mp; 10958 if (mp->b_datap->db_type == M_CTL) { 10959 io = (ipsec_out_t *)mp->b_rptr; 10960 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10961 mp = mp->b_cont; 10962 mctl_present = B_TRUE; 10963 } else { 10964 mctl_present = B_FALSE; 10965 } 10966 10967 ip6h = (ip6_t *)mp->b_rptr; 10968 nexthdr = ip6h->ip6_nxt; 10969 mibptr = ill->ill_ip_mib; 10970 10971 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10972 ipif_t *ipif; 10973 10974 /* 10975 * Select the source address using ipif_select_source_v6. 10976 */ 10977 if (attach_index != 0) { 10978 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10979 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10980 } else { 10981 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10982 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10983 } 10984 if (ipif == NULL) { 10985 if (ip_debug > 2) { 10986 /* ip1dbg */ 10987 pr_addr_dbg("ip_wput_ire_v6: no src for " 10988 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10989 printf("ip_wput_ire_v6: interface name %s\n", 10990 ill->ill_name); 10991 } 10992 freemsg(first_mp); 10993 return; 10994 } 10995 ip6h->ip6_src = ipif->ipif_v6src_addr; 10996 ipif_refrele(ipif); 10997 } 10998 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10999 if ((connp != NULL && connp->conn_multicast_loop) || 11000 !IS_LOOPBACK(ill)) { 11001 ilm_t *ilm; 11002 11003 ILM_WALKER_HOLD(ill); 11004 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 11005 ILM_WALKER_RELE(ill); 11006 if (ilm != NULL) { 11007 mblk_t *nmp; 11008 int fanout_flags = 0; 11009 11010 if (connp != NULL && 11011 !connp->conn_multicast_loop) { 11012 fanout_flags |= IP_FF_NO_MCAST_LOOP; 11013 } 11014 ip1dbg(("ip_wput_ire_v6: " 11015 "Loopback multicast\n")); 11016 nmp = ip_copymsg(first_mp); 11017 if (nmp != NULL) { 11018 ip6_t *nip6h; 11019 mblk_t *mp_ip6h; 11020 11021 if (mctl_present) { 11022 nip6h = (ip6_t *) 11023 nmp->b_cont->b_rptr; 11024 mp_ip6h = nmp->b_cont; 11025 } else { 11026 nip6h = (ip6_t *)nmp->b_rptr; 11027 mp_ip6h = nmp; 11028 } 11029 11030 DTRACE_PROBE4( 11031 ip6__loopback__out__start, 11032 ill_t *, NULL, 11033 ill_t *, ill, 11034 ip6_t *, nip6h, 11035 mblk_t *, nmp); 11036 11037 FW_HOOKS6( 11038 ipst->ips_ip6_loopback_out_event, 11039 ipst->ips_ipv6firewall_loopback_out, 11040 NULL, ill, nip6h, nmp, mp_ip6h, 11041 0, ipst); 11042 11043 DTRACE_PROBE1( 11044 ip6__loopback__out__end, 11045 mblk_t *, nmp); 11046 11047 /* 11048 * DTrace this as ip:::send. A blocked 11049 * packet will fire the send probe, but 11050 * not the receive probe. 11051 */ 11052 DTRACE_IP7(send, mblk_t *, nmp, 11053 conn_t *, NULL, void_ip_t *, nip6h, 11054 __dtrace_ipsr_ill_t *, ill, 11055 ipha_t *, NULL, ip6_t *, nip6h, 11056 int, 1); 11057 11058 if (nmp != NULL) { 11059 /* 11060 * Deliver locally and to 11061 * every local zone, except 11062 * the sending zone when 11063 * IPV6_MULTICAST_LOOP is 11064 * disabled. 11065 */ 11066 ip_wput_local_v6(RD(q), ill, 11067 nip6h, nmp, ire, 11068 fanout_flags, zoneid); 11069 } 11070 } else { 11071 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11072 ip1dbg(("ip_wput_ire_v6: " 11073 "copymsg failed\n")); 11074 } 11075 } 11076 } 11077 if (ip6h->ip6_hops == 0 || 11078 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 11079 IS_LOOPBACK(ill)) { 11080 /* 11081 * Local multicast or just loopback on loopback 11082 * interface. 11083 */ 11084 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 11085 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 11086 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11087 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 11088 freemsg(first_mp); 11089 return; 11090 } 11091 } 11092 11093 if (ire->ire_stq != NULL) { 11094 uint32_t sum; 11095 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 11096 ill_phyint->phyint_ifindex; 11097 queue_t *dev_q = ire->ire_stq->q_next; 11098 11099 /* 11100 * non-NULL send-to queue - packet is to be sent 11101 * out an interface. 11102 */ 11103 11104 /* Driver is flow-controlling? */ 11105 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 11106 DEV_Q_FLOW_BLOCKED(dev_q)) { 11107 /* 11108 * Queue packet if we have an conn to give back 11109 * pressure. We can't queue packets intended for 11110 * hardware acceleration since we've tossed that 11111 * state already. If the packet is being fed back 11112 * from ire_send_v6, we don't know the position in 11113 * the queue to enqueue the packet and we discard 11114 * the packet. 11115 */ 11116 if (ipst->ips_ip_output_queue && connp != NULL && 11117 !mctl_present && caller != IRE_SEND) { 11118 if (caller == IP_WSRV) { 11119 connp->conn_did_putbq = 1; 11120 (void) putbq(connp->conn_wq, mp); 11121 conn_drain_insert(connp); 11122 /* 11123 * caller == IP_WSRV implies we are 11124 * the service thread, and the 11125 * queue is already noenabled. 11126 * The check for canput and 11127 * the putbq is not atomic. 11128 * So we need to check again. 11129 */ 11130 if (canput(dev_q)) 11131 connp->conn_did_putbq = 0; 11132 } else { 11133 (void) putq(connp->conn_wq, mp); 11134 } 11135 return; 11136 } 11137 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11138 freemsg(first_mp); 11139 return; 11140 } 11141 11142 /* 11143 * Look for reachability confirmations from the transport. 11144 */ 11145 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11146 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11147 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11148 if (mctl_present) 11149 io->ipsec_out_reachable = B_TRUE; 11150 } 11151 /* Fastpath */ 11152 switch (nexthdr) { 11153 case IPPROTO_TCP: 11154 case IPPROTO_UDP: 11155 case IPPROTO_ICMPV6: 11156 case IPPROTO_SCTP: 11157 hdr_length = IPV6_HDR_LEN; 11158 break; 11159 default: { 11160 uint8_t *nexthdrp; 11161 11162 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11163 &hdr_length, &nexthdrp)) { 11164 /* Malformed packet */ 11165 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11166 freemsg(first_mp); 11167 return; 11168 } 11169 nexthdr = *nexthdrp; 11170 break; 11171 } 11172 } 11173 11174 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11175 uint16_t *up; 11176 uint16_t *insp; 11177 11178 /* 11179 * The packet header is processed once for all, even 11180 * in the multirouting case. We disable hardware 11181 * checksum if the packet is multirouted, as it will be 11182 * replicated via several interfaces, and not all of 11183 * them may have this capability. 11184 */ 11185 if (cksum_request == 1 && 11186 !(ire->ire_flags & RTF_MULTIRT)) { 11187 /* Skip the transport checksum */ 11188 goto cksum_done; 11189 } 11190 /* 11191 * Do user-configured raw checksum. 11192 * Compute checksum and insert at offset "cksum_request" 11193 */ 11194 11195 /* check for enough headers for checksum */ 11196 cksum_request += hdr_length; /* offset from rptr */ 11197 if ((mp->b_wptr - mp->b_rptr) < 11198 (cksum_request + sizeof (int16_t))) { 11199 if (!pullupmsg(mp, 11200 cksum_request + sizeof (int16_t))) { 11201 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11202 " failed\n")); 11203 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11204 freemsg(first_mp); 11205 return; 11206 } 11207 ip6h = (ip6_t *)mp->b_rptr; 11208 } 11209 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11210 ASSERT(((uintptr_t)insp & 0x1) == 0); 11211 up = (uint16_t *)&ip6h->ip6_src; 11212 /* 11213 * icmp has placed length and routing 11214 * header adjustment in *insp. 11215 */ 11216 sum = htons(nexthdr) + 11217 up[0] + up[1] + up[2] + up[3] + 11218 up[4] + up[5] + up[6] + up[7] + 11219 up[8] + up[9] + up[10] + up[11] + 11220 up[12] + up[13] + up[14] + up[15]; 11221 sum = (sum & 0xffff) + (sum >> 16); 11222 *insp = IP_CSUM(mp, hdr_length, sum); 11223 } else if (nexthdr == IPPROTO_TCP) { 11224 uint16_t *up; 11225 11226 /* 11227 * Check for full IPv6 header + enough TCP header 11228 * to get at the checksum field. 11229 */ 11230 if ((mp->b_wptr - mp->b_rptr) < 11231 (hdr_length + TCP_CHECKSUM_OFFSET + 11232 TCP_CHECKSUM_SIZE)) { 11233 if (!pullupmsg(mp, hdr_length + 11234 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11235 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11236 " failed\n")); 11237 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11238 freemsg(first_mp); 11239 return; 11240 } 11241 ip6h = (ip6_t *)mp->b_rptr; 11242 } 11243 11244 up = (uint16_t *)&ip6h->ip6_src; 11245 /* 11246 * Note: The TCP module has stored the length value 11247 * into the tcp checksum field, so we don't 11248 * need to explicitly sum it in here. 11249 */ 11250 sum = up[0] + up[1] + up[2] + up[3] + 11251 up[4] + up[5] + up[6] + up[7] + 11252 up[8] + up[9] + up[10] + up[11] + 11253 up[12] + up[13] + up[14] + up[15]; 11254 11255 /* Fold the initial sum */ 11256 sum = (sum & 0xffff) + (sum >> 16); 11257 11258 up = (uint16_t *)(((uchar_t *)ip6h) + 11259 hdr_length + TCP_CHECKSUM_OFFSET); 11260 11261 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11262 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11263 ire->ire_max_frag, mctl_present, sum); 11264 11265 /* Software checksum? */ 11266 if (DB_CKSUMFLAGS(mp) == 0) { 11267 IP6_STAT(ipst, ip6_out_sw_cksum); 11268 IP6_STAT_UPDATE(ipst, 11269 ip6_tcp_out_sw_cksum_bytes, 11270 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11271 hdr_length); 11272 } 11273 } else if (nexthdr == IPPROTO_UDP) { 11274 uint16_t *up; 11275 11276 /* 11277 * check for full IPv6 header + enough UDP header 11278 * to get at the UDP checksum field 11279 */ 11280 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11281 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11282 if (!pullupmsg(mp, hdr_length + 11283 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11284 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11285 " failed\n")); 11286 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11287 freemsg(first_mp); 11288 return; 11289 } 11290 ip6h = (ip6_t *)mp->b_rptr; 11291 } 11292 up = (uint16_t *)&ip6h->ip6_src; 11293 /* 11294 * Note: The UDP module has stored the length value 11295 * into the udp checksum field, so we don't 11296 * need to explicitly sum it in here. 11297 */ 11298 sum = up[0] + up[1] + up[2] + up[3] + 11299 up[4] + up[5] + up[6] + up[7] + 11300 up[8] + up[9] + up[10] + up[11] + 11301 up[12] + up[13] + up[14] + up[15]; 11302 11303 /* Fold the initial sum */ 11304 sum = (sum & 0xffff) + (sum >> 16); 11305 11306 up = (uint16_t *)(((uchar_t *)ip6h) + 11307 hdr_length + UDP_CHECKSUM_OFFSET); 11308 11309 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11310 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11311 ire->ire_max_frag, mctl_present, sum); 11312 11313 /* Software checksum? */ 11314 if (DB_CKSUMFLAGS(mp) == 0) { 11315 IP6_STAT(ipst, ip6_out_sw_cksum); 11316 IP6_STAT_UPDATE(ipst, 11317 ip6_udp_out_sw_cksum_bytes, 11318 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11319 hdr_length); 11320 } 11321 } else if (nexthdr == IPPROTO_ICMPV6) { 11322 uint16_t *up; 11323 icmp6_t *icmp6; 11324 11325 /* check for full IPv6+ICMPv6 header */ 11326 if ((mp->b_wptr - mp->b_rptr) < 11327 (hdr_length + ICMP6_MINLEN)) { 11328 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11329 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11330 " failed\n")); 11331 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11332 freemsg(first_mp); 11333 return; 11334 } 11335 ip6h = (ip6_t *)mp->b_rptr; 11336 } 11337 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11338 up = (uint16_t *)&ip6h->ip6_src; 11339 /* 11340 * icmp has placed length and routing 11341 * header adjustment in icmp6_cksum. 11342 */ 11343 sum = htons(IPPROTO_ICMPV6) + 11344 up[0] + up[1] + up[2] + up[3] + 11345 up[4] + up[5] + up[6] + up[7] + 11346 up[8] + up[9] + up[10] + up[11] + 11347 up[12] + up[13] + up[14] + up[15]; 11348 sum = (sum & 0xffff) + (sum >> 16); 11349 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11350 11351 /* Update output mib stats */ 11352 icmp_update_out_mib_v6(ill, icmp6); 11353 } else if (nexthdr == IPPROTO_SCTP) { 11354 sctp_hdr_t *sctph; 11355 11356 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11357 if (!pullupmsg(mp, hdr_length + 11358 sizeof (*sctph))) { 11359 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11360 " failed\n")); 11361 BUMP_MIB(ill->ill_ip_mib, 11362 ipIfStatsOutDiscards); 11363 freemsg(mp); 11364 return; 11365 } 11366 ip6h = (ip6_t *)mp->b_rptr; 11367 } 11368 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11369 sctph->sh_chksum = 0; 11370 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11371 } 11372 11373 cksum_done: 11374 /* 11375 * We force the insertion of a fragment header using the 11376 * IPH_FRAG_HDR flag in two cases: 11377 * - after reception of an ICMPv6 "packet too big" message 11378 * with a MTU < 1280 (cf. RFC 2460 section 5) 11379 * - for multirouted IPv6 packets, so that the receiver can 11380 * discard duplicates according to their fragment identifier 11381 * 11382 * Two flags modifed from the API can modify this behavior. 11383 * The first is IPV6_USE_MIN_MTU. With this API the user 11384 * can specify how to manage PMTUD for unicast and multicast. 11385 * 11386 * IPV6_DONTFRAG disallows fragmentation. 11387 */ 11388 max_frag = ire->ire_max_frag; 11389 switch (IP6I_USE_MIN_MTU_API(flags)) { 11390 case IPV6_USE_MIN_MTU_DEFAULT: 11391 case IPV6_USE_MIN_MTU_UNICAST: 11392 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11393 max_frag = IPV6_MIN_MTU; 11394 } 11395 break; 11396 11397 case IPV6_USE_MIN_MTU_NEVER: 11398 max_frag = IPV6_MIN_MTU; 11399 break; 11400 } 11401 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11402 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11403 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11404 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11405 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11406 return; 11407 } 11408 11409 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11410 (mp->b_cont ? msgdsize(mp) : 11411 mp->b_wptr - (uchar_t *)ip6h)) { 11412 ip0dbg(("Packet length mismatch: %d, %ld\n", 11413 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11414 msgdsize(mp))); 11415 freemsg(first_mp); 11416 return; 11417 } 11418 /* Do IPSEC processing first */ 11419 if (mctl_present) { 11420 if (attach_index != 0) 11421 ipsec_out_attach_if(io, attach_index); 11422 ipsec_out_process(q, first_mp, ire, ill_index); 11423 return; 11424 } 11425 ASSERT(mp->b_prev == NULL); 11426 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11427 ntohs(ip6h->ip6_plen) + 11428 IPV6_HDR_LEN, max_frag)); 11429 ASSERT(mp == first_mp); 11430 /* Initiate IPPF processing */ 11431 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11432 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11433 if (mp == NULL) { 11434 return; 11435 } 11436 } 11437 ip_wput_frag_v6(mp, ire, reachable, connp, 11438 caller, max_frag); 11439 return; 11440 } 11441 /* Do IPSEC processing first */ 11442 if (mctl_present) { 11443 int extra_len = ipsec_out_extra_length(first_mp); 11444 11445 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11446 max_frag) { 11447 /* 11448 * IPsec headers will push the packet over the 11449 * MTU limit. Issue an ICMPv6 Packet Too Big 11450 * message for this packet if the upper-layer 11451 * that issued this packet will be able to 11452 * react to the icmp_pkt2big_v6() that we'll 11453 * generate. 11454 */ 11455 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11456 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11457 return; 11458 } 11459 if (attach_index != 0) 11460 ipsec_out_attach_if(io, attach_index); 11461 ipsec_out_process(q, first_mp, ire, ill_index); 11462 return; 11463 } 11464 /* 11465 * XXX multicast: add ip_mforward_v6() here. 11466 * Check conn_dontroute 11467 */ 11468 #ifdef lint 11469 /* 11470 * XXX The only purpose of this statement is to avoid lint 11471 * errors. See the above "XXX multicast". When that gets 11472 * fixed, remove this whole #ifdef lint section. 11473 */ 11474 ip3dbg(("multicast forward is %s.\n", 11475 (multicast_forward ? "TRUE" : "FALSE"))); 11476 #endif 11477 11478 UPDATE_OB_PKT_COUNT(ire); 11479 ire->ire_last_used_time = lbolt; 11480 ASSERT(mp == first_mp); 11481 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11482 } else { 11483 /* 11484 * DTrace this as ip:::send. A blocked packet will fire the 11485 * send probe, but not the receive probe. 11486 */ 11487 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11488 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11489 NULL, ip6_t *, ip6h, int, 1); 11490 DTRACE_PROBE4(ip6__loopback__out__start, 11491 ill_t *, NULL, ill_t *, ill, 11492 ip6_t *, ip6h, mblk_t *, first_mp); 11493 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11494 ipst->ips_ipv6firewall_loopback_out, 11495 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11496 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11497 if (first_mp != NULL) { 11498 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0, 11499 zoneid); 11500 } 11501 } 11502 } 11503 11504 /* 11505 * Outbound IPv6 fragmentation routine using MDT. 11506 */ 11507 static void 11508 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11509 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11510 { 11511 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11512 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11513 mblk_t *hdr_mp, *md_mp = NULL; 11514 int i1; 11515 multidata_t *mmd; 11516 unsigned char *hdr_ptr, *pld_ptr; 11517 ip_pdescinfo_t pdi; 11518 uint32_t ident; 11519 size_t len; 11520 uint16_t offset; 11521 queue_t *stq = ire->ire_stq; 11522 ill_t *ill = (ill_t *)stq->q_ptr; 11523 ip_stack_t *ipst = ill->ill_ipst; 11524 11525 ASSERT(DB_TYPE(mp) == M_DATA); 11526 ASSERT(MBLKL(mp) > unfragmentable_len); 11527 11528 /* 11529 * Move read ptr past unfragmentable portion, we don't want this part 11530 * of the data in our fragments. 11531 */ 11532 mp->b_rptr += unfragmentable_len; 11533 11534 /* Calculate how many packets we will send out */ 11535 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11536 pkts = (i1 + max_chunk - 1) / max_chunk; 11537 ASSERT(pkts > 1); 11538 11539 /* Allocate a message block which will hold all the IP Headers. */ 11540 wroff = ipst->ips_ip_wroff_extra; 11541 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11542 11543 i1 = pkts * hdr_chunk_len; 11544 /* 11545 * Create the header buffer, Multidata and destination address 11546 * and SAP attribute that should be associated with it. 11547 */ 11548 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11549 ((hdr_mp->b_wptr += i1), 11550 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11551 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11552 freemsg(mp); 11553 if (md_mp == NULL) { 11554 freemsg(hdr_mp); 11555 } else { 11556 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11557 freemsg(md_mp); 11558 } 11559 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11560 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11561 return; 11562 } 11563 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11564 11565 /* 11566 * Add a payload buffer to the Multidata; this operation must not 11567 * fail, or otherwise our logic in this routine is broken. There 11568 * is no memory allocation done by the routine, so any returned 11569 * failure simply tells us that we've done something wrong. 11570 * 11571 * A failure tells us that either we're adding the same payload 11572 * buffer more than once, or we're trying to add more buffers than 11573 * allowed. None of the above cases should happen, and we panic 11574 * because either there's horrible heap corruption, and/or 11575 * programming mistake. 11576 */ 11577 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11578 goto pbuf_panic; 11579 } 11580 11581 hdr_ptr = hdr_mp->b_rptr; 11582 pld_ptr = mp->b_rptr; 11583 11584 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11585 11586 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11587 11588 /* 11589 * len is the total length of the fragmentable data in this 11590 * datagram. For each fragment sent, we will decrement len 11591 * by the amount of fragmentable data sent in that fragment 11592 * until len reaches zero. 11593 */ 11594 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11595 11596 offset = 0; 11597 prev_nexthdr_offset += wroff; 11598 11599 while (len != 0) { 11600 size_t mlen; 11601 ip6_t *fip6h; 11602 ip6_frag_t *fraghdr; 11603 int error; 11604 11605 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11606 mlen = MIN(len, max_chunk); 11607 len -= mlen; 11608 11609 fip6h = (ip6_t *)(hdr_ptr + wroff); 11610 ASSERT(OK_32PTR(fip6h)); 11611 bcopy(ip6h, fip6h, unfragmentable_len); 11612 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11613 11614 fip6h->ip6_plen = htons((uint16_t)(mlen + 11615 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11616 11617 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11618 unfragmentable_len); 11619 fraghdr->ip6f_nxt = nexthdr; 11620 fraghdr->ip6f_reserved = 0; 11621 fraghdr->ip6f_offlg = htons(offset) | 11622 ((len != 0) ? IP6F_MORE_FRAG : 0); 11623 fraghdr->ip6f_ident = ident; 11624 11625 /* 11626 * Record offset and size of header and data of the next packet 11627 * in the multidata message. 11628 */ 11629 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11630 unfragmentable_len + sizeof (ip6_frag_t), 0); 11631 PDESC_PLD_INIT(&pdi); 11632 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11633 ASSERT(i1 > 0); 11634 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11635 if (i1 == mlen) { 11636 pld_ptr += mlen; 11637 } else { 11638 i1 = mlen - i1; 11639 mp = mp->b_cont; 11640 ASSERT(mp != NULL); 11641 ASSERT(MBLKL(mp) >= i1); 11642 /* 11643 * Attach the next payload message block to the 11644 * multidata message. 11645 */ 11646 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11647 goto pbuf_panic; 11648 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11649 pld_ptr = mp->b_rptr + i1; 11650 } 11651 11652 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11653 KM_NOSLEEP)) == NULL) { 11654 /* 11655 * Any failure other than ENOMEM indicates that we 11656 * have passed in invalid pdesc info or parameters 11657 * to mmd_addpdesc, which must not happen. 11658 * 11659 * EINVAL is a result of failure on boundary checks 11660 * against the pdesc info contents. It should not 11661 * happen, and we panic because either there's 11662 * horrible heap corruption, and/or programming 11663 * mistake. 11664 */ 11665 if (error != ENOMEM) { 11666 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11667 "pdesc logic error detected for " 11668 "mmd %p pinfo %p (%d)\n", 11669 (void *)mmd, (void *)&pdi, error); 11670 /* NOTREACHED */ 11671 } 11672 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11673 /* Free unattached payload message blocks as well */ 11674 md_mp->b_cont = mp->b_cont; 11675 goto free_mmd; 11676 } 11677 11678 /* Advance fragment offset. */ 11679 offset += mlen; 11680 11681 /* Advance to location for next header in the buffer. */ 11682 hdr_ptr += hdr_chunk_len; 11683 11684 /* Did we reach the next payload message block? */ 11685 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11686 mp = mp->b_cont; 11687 /* 11688 * Attach the next message block with payload 11689 * data to the multidata message. 11690 */ 11691 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11692 goto pbuf_panic; 11693 pld_ptr = mp->b_rptr; 11694 } 11695 } 11696 11697 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11698 ASSERT(mp->b_wptr == pld_ptr); 11699 11700 /* Update IP statistics */ 11701 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11702 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11703 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11704 /* 11705 * The ipv6 header len is accounted for in unfragmentable_len so 11706 * when calculating the fragmentation overhead just add the frag 11707 * header len. 11708 */ 11709 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11710 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11711 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11712 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11713 11714 ire->ire_ob_pkt_count += pkts; 11715 if (ire->ire_ipif != NULL) 11716 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11717 11718 ire->ire_last_used_time = lbolt; 11719 /* Send it down */ 11720 putnext(stq, md_mp); 11721 return; 11722 11723 pbuf_panic: 11724 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11725 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11726 pbuf_idx); 11727 /* NOTREACHED */ 11728 } 11729 11730 /* 11731 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11732 * We have not optimized this in terms of number of mblks 11733 * allocated. For instance, for each fragment sent we always allocate a 11734 * mblk to hold the IPv6 header and fragment header. 11735 * 11736 * Assumes that all the extension headers are contained in the first mblk. 11737 * 11738 * The fragment header is inserted after an hop-by-hop options header 11739 * and after [an optional destinations header followed by] a routing header. 11740 * 11741 * NOTE : This function does not ire_refrele the ire passed in as 11742 * the argument. 11743 */ 11744 void 11745 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11746 int caller, int max_frag) 11747 { 11748 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11749 ip6_t *fip6h; 11750 mblk_t *hmp; 11751 mblk_t *hmp0; 11752 mblk_t *dmp; 11753 ip6_frag_t *fraghdr; 11754 size_t unfragmentable_len; 11755 size_t len; 11756 size_t mlen; 11757 size_t max_chunk; 11758 uint32_t ident; 11759 uint16_t off_flags; 11760 uint16_t offset = 0; 11761 ill_t *ill; 11762 uint8_t nexthdr; 11763 uint_t prev_nexthdr_offset; 11764 uint8_t *ptr; 11765 ip_stack_t *ipst = ire->ire_ipst; 11766 11767 ASSERT(ire->ire_type == IRE_CACHE); 11768 ill = (ill_t *)ire->ire_stq->q_ptr; 11769 11770 if (max_frag <= 0) { 11771 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11772 freemsg(mp); 11773 return; 11774 } 11775 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11776 11777 /* 11778 * Determine the length of the unfragmentable portion of this 11779 * datagram. This consists of the IPv6 header, a potential 11780 * hop-by-hop options header, a potential pre-routing-header 11781 * destination options header, and a potential routing header. 11782 */ 11783 nexthdr = ip6h->ip6_nxt; 11784 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11785 ptr = (uint8_t *)&ip6h[1]; 11786 11787 if (nexthdr == IPPROTO_HOPOPTS) { 11788 ip6_hbh_t *hbh_hdr; 11789 uint_t hdr_len; 11790 11791 hbh_hdr = (ip6_hbh_t *)ptr; 11792 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11793 nexthdr = hbh_hdr->ip6h_nxt; 11794 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11795 - (uint8_t *)ip6h; 11796 ptr += hdr_len; 11797 } 11798 if (nexthdr == IPPROTO_DSTOPTS) { 11799 ip6_dest_t *dest_hdr; 11800 uint_t hdr_len; 11801 11802 dest_hdr = (ip6_dest_t *)ptr; 11803 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11804 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11805 nexthdr = dest_hdr->ip6d_nxt; 11806 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11807 - (uint8_t *)ip6h; 11808 ptr += hdr_len; 11809 } 11810 } 11811 if (nexthdr == IPPROTO_ROUTING) { 11812 ip6_rthdr_t *rthdr; 11813 uint_t hdr_len; 11814 11815 rthdr = (ip6_rthdr_t *)ptr; 11816 nexthdr = rthdr->ip6r_nxt; 11817 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11818 - (uint8_t *)ip6h; 11819 hdr_len = 8 * (rthdr->ip6r_len + 1); 11820 ptr += hdr_len; 11821 } 11822 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11823 11824 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11825 sizeof (ip6_frag_t)) & ~7; 11826 11827 /* Check if we can use MDT to send out the frags. */ 11828 ASSERT(!IRE_IS_LOCAL(ire)); 11829 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11830 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11831 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11832 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11833 nexthdr, prev_nexthdr_offset); 11834 return; 11835 } 11836 11837 /* 11838 * Allocate an mblk with enough room for the link-layer 11839 * header, the unfragmentable part of the datagram, and the 11840 * fragment header. This (or a copy) will be used as the 11841 * first mblk for each fragment we send. 11842 */ 11843 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + 11844 ipst->ips_ip_wroff_extra, BPRI_HI); 11845 if (hmp == NULL) { 11846 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11847 freemsg(mp); 11848 return; 11849 } 11850 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11851 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11852 11853 fip6h = (ip6_t *)hmp->b_rptr; 11854 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11855 11856 bcopy(ip6h, fip6h, unfragmentable_len); 11857 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11858 11859 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11860 11861 fraghdr->ip6f_nxt = nexthdr; 11862 fraghdr->ip6f_reserved = 0; 11863 fraghdr->ip6f_offlg = 0; 11864 fraghdr->ip6f_ident = htonl(ident); 11865 11866 /* 11867 * len is the total length of the fragmentable data in this 11868 * datagram. For each fragment sent, we will decrement len 11869 * by the amount of fragmentable data sent in that fragment 11870 * until len reaches zero. 11871 */ 11872 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11873 11874 /* 11875 * Move read ptr past unfragmentable portion, we don't want this part 11876 * of the data in our fragments. 11877 */ 11878 mp->b_rptr += unfragmentable_len; 11879 11880 while (len != 0) { 11881 mlen = MIN(len, max_chunk); 11882 len -= mlen; 11883 if (len != 0) { 11884 /* Not last */ 11885 hmp0 = copyb(hmp); 11886 if (hmp0 == NULL) { 11887 freeb(hmp); 11888 freemsg(mp); 11889 BUMP_MIB(ill->ill_ip_mib, 11890 ipIfStatsOutFragFails); 11891 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11892 return; 11893 } 11894 off_flags = IP6F_MORE_FRAG; 11895 } else { 11896 /* Last fragment */ 11897 hmp0 = hmp; 11898 hmp = NULL; 11899 off_flags = 0; 11900 } 11901 fip6h = (ip6_t *)(hmp0->b_rptr); 11902 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11903 11904 fip6h->ip6_plen = htons((uint16_t)(mlen + 11905 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11906 /* 11907 * Note: Optimization alert. 11908 * In IPv6 (and IPv4) protocol header, Fragment Offset 11909 * ("offset") is 13 bits wide and in 8-octet units. 11910 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11911 * it occupies the most significant 13 bits. 11912 * (least significant 13 bits in IPv4). 11913 * We do not do any shifts here. Not shifting is same effect 11914 * as taking offset value in octet units, dividing by 8 and 11915 * then shifting 3 bits left to line it up in place in proper 11916 * place protocol header. 11917 */ 11918 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11919 11920 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11921 /* mp has already been freed by ip_carve_mp() */ 11922 if (hmp != NULL) 11923 freeb(hmp); 11924 freeb(hmp0); 11925 ip1dbg(("ip_carve_mp: failed\n")); 11926 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11927 return; 11928 } 11929 hmp0->b_cont = dmp; 11930 /* Get the priority marking, if any */ 11931 hmp0->b_band = dmp->b_band; 11932 UPDATE_OB_PKT_COUNT(ire); 11933 ire->ire_last_used_time = lbolt; 11934 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11935 caller, NULL); 11936 reachable = 0; /* No need to redo state machine in loop */ 11937 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11938 offset += mlen; 11939 } 11940 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11941 } 11942 11943 /* 11944 * Determine if the ill and multicast aspects of that packets 11945 * "matches" the conn. 11946 */ 11947 boolean_t 11948 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11949 zoneid_t zoneid) 11950 { 11951 ill_t *in_ill; 11952 boolean_t wantpacket = B_TRUE; 11953 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11954 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11955 11956 /* 11957 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11958 * unicast and multicast reception to conn_incoming_ill. 11959 * conn_wantpacket_v6 is called both for unicast and 11960 * multicast. 11961 * 11962 * 1) The unicast copy of the packet can come anywhere in 11963 * the ill group if it is part of the group. Thus, we 11964 * need to check to see whether the ill group matches 11965 * if in_ill is part of a group. 11966 * 11967 * 2) ip_rput does not suppress duplicate multicast packets. 11968 * If there are two interfaces in a ill group and we have 11969 * 2 applications (conns) joined a multicast group G on 11970 * both the interfaces, ilm_lookup_ill filter in ip_rput 11971 * will give us two packets because we join G on both the 11972 * interfaces rather than nominating just one interface 11973 * for receiving multicast like broadcast above. So, 11974 * we have to call ilg_lookup_ill to filter out duplicate 11975 * copies, if ill is part of a group, to supress duplicates. 11976 */ 11977 in_ill = connp->conn_incoming_ill; 11978 if (in_ill != NULL) { 11979 mutex_enter(&connp->conn_lock); 11980 in_ill = connp->conn_incoming_ill; 11981 mutex_enter(&ill->ill_lock); 11982 /* 11983 * No IPMP, and the packet did not arrive on conn_incoming_ill 11984 * OR, IPMP in use and the packet arrived on an IPMP group 11985 * different from the conn_incoming_ill's IPMP group. 11986 * Reject the packet. 11987 */ 11988 if ((in_ill->ill_group == NULL && in_ill != ill) || 11989 (in_ill->ill_group != NULL && 11990 in_ill->ill_group != ill->ill_group)) { 11991 wantpacket = B_FALSE; 11992 } 11993 mutex_exit(&ill->ill_lock); 11994 mutex_exit(&connp->conn_lock); 11995 if (!wantpacket) 11996 return (B_FALSE); 11997 } 11998 11999 if (connp->conn_multi_router) 12000 return (B_TRUE); 12001 12002 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 12003 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 12004 /* 12005 * Unicast case: we match the conn only if it's in the specified 12006 * zone. 12007 */ 12008 return (IPCL_ZONE_MATCH(connp, zoneid)); 12009 } 12010 12011 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 12012 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 12013 /* 12014 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 12015 * disabled, therefore we don't dispatch the multicast packet to 12016 * the sending zone. 12017 */ 12018 return (B_FALSE); 12019 } 12020 12021 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 12022 zoneid != ALL_ZONES) { 12023 /* 12024 * Multicast packet on the loopback interface: we only match 12025 * conns who joined the group in the specified zone. 12026 */ 12027 return (B_FALSE); 12028 } 12029 12030 mutex_enter(&connp->conn_lock); 12031 wantpacket = 12032 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 12033 mutex_exit(&connp->conn_lock); 12034 12035 return (wantpacket); 12036 } 12037 12038 12039 /* 12040 * Transmit a packet and update any NUD state based on the flags 12041 * XXX need to "recover" any ip6i_t when doing putq! 12042 * 12043 * NOTE : This function does not ire_refrele the ire passed in as the 12044 * argument. 12045 */ 12046 void 12047 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 12048 int caller, ipsec_out_t *io) 12049 { 12050 mblk_t *mp1; 12051 nce_t *nce = ire->ire_nce; 12052 ill_t *ill; 12053 ill_t *out_ill; 12054 uint64_t delta; 12055 ip6_t *ip6h; 12056 queue_t *stq = ire->ire_stq; 12057 ire_t *ire1 = NULL; 12058 ire_t *save_ire = ire; 12059 boolean_t multirt_send = B_FALSE; 12060 mblk_t *next_mp = NULL; 12061 ip_stack_t *ipst = ire->ire_ipst; 12062 boolean_t fp_prepend = B_FALSE; 12063 uint32_t hlen; 12064 12065 ip6h = (ip6_t *)mp->b_rptr; 12066 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 12067 ASSERT(ire->ire_ipversion == IPV6_VERSION); 12068 ASSERT(nce != NULL); 12069 ASSERT(mp->b_datap->db_type == M_DATA); 12070 ASSERT(stq != NULL); 12071 12072 ill = ire_to_ill(ire); 12073 if (!ill) { 12074 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 12075 freemsg(mp); 12076 return; 12077 } 12078 12079 /* 12080 * If a packet is to be sent out an interface that is a 6to4 12081 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 12082 * destination, must be checked to have a 6to4 prefix 12083 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 12084 * address configured on the sending interface. Otherwise, 12085 * the packet was delivered to this interface in error and the 12086 * packet must be dropped. 12087 */ 12088 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 12089 ipif_t *ipif = ill->ill_ipif; 12090 12091 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 12092 &ip6h->ip6_dst)) { 12093 if (ip_debug > 2) { 12094 /* ip1dbg */ 12095 pr_addr_dbg("ip_xmit_v6: attempting to " 12096 "send 6to4 addressed IPv6 " 12097 "destination (%s) out the wrong " 12098 "interface.\n", AF_INET6, 12099 &ip6h->ip6_dst); 12100 } 12101 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12102 freemsg(mp); 12103 return; 12104 } 12105 } 12106 12107 /* Flow-control check has been done in ip_wput_ire_v6 */ 12108 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 12109 caller == IP_WSRV || canput(stq->q_next)) { 12110 uint32_t ill_index; 12111 12112 /* 12113 * In most cases, the emission loop below is entered only 12114 * once. Only in the case where the ire holds the 12115 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 12116 * flagged ires in the bucket, and send the packet 12117 * through all crossed RTF_MULTIRT routes. 12118 */ 12119 if (ire->ire_flags & RTF_MULTIRT) { 12120 /* 12121 * Multirouting case. The bucket where ire is stored 12122 * probably holds other RTF_MULTIRT flagged ires 12123 * to the destination. In this call to ip_xmit_v6, 12124 * we attempt to send the packet through all 12125 * those ires. Thus, we first ensure that ire is the 12126 * first RTF_MULTIRT ire in the bucket, 12127 * before walking the ire list. 12128 */ 12129 ire_t *first_ire; 12130 irb_t *irb = ire->ire_bucket; 12131 ASSERT(irb != NULL); 12132 multirt_send = B_TRUE; 12133 12134 /* Make sure we do not omit any multiroute ire. */ 12135 IRB_REFHOLD(irb); 12136 for (first_ire = irb->irb_ire; 12137 first_ire != NULL; 12138 first_ire = first_ire->ire_next) { 12139 if ((first_ire->ire_flags & RTF_MULTIRT) && 12140 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12141 &ire->ire_addr_v6)) && 12142 !(first_ire->ire_marks & 12143 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12144 break; 12145 } 12146 12147 if ((first_ire != NULL) && (first_ire != ire)) { 12148 IRE_REFHOLD(first_ire); 12149 /* ire will be released by the caller */ 12150 ire = first_ire; 12151 nce = ire->ire_nce; 12152 stq = ire->ire_stq; 12153 ill = ire_to_ill(ire); 12154 } 12155 IRB_REFRELE(irb); 12156 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12157 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12158 ILL_MDT_USABLE(ill)) { 12159 /* 12160 * This tcp connection was marked as MDT-capable, but 12161 * it has been turned off due changes in the interface. 12162 * Now that the interface support is back, turn it on 12163 * by notifying tcp. We don't directly modify tcp_mdt, 12164 * since we leave all the details to the tcp code that 12165 * knows better. 12166 */ 12167 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12168 12169 if (mdimp == NULL) { 12170 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12171 "connp %p (ENOMEM)\n", (void *)connp)); 12172 } else { 12173 CONN_INC_REF(connp); 12174 SQUEUE_ENTER_ONE(connp->conn_sqp, mdimp, 12175 tcp_input, connp, SQ_FILL, 12176 SQTAG_TCP_INPUT_MCTL); 12177 } 12178 } 12179 12180 do { 12181 mblk_t *mp_ip6h; 12182 12183 if (multirt_send) { 12184 irb_t *irb; 12185 /* 12186 * We are in a multiple send case, need to get 12187 * the next ire and make a duplicate of the 12188 * packet. ire1 holds here the next ire to 12189 * process in the bucket. If multirouting is 12190 * expected, any non-RTF_MULTIRT ire that has 12191 * the right destination address is ignored. 12192 */ 12193 irb = ire->ire_bucket; 12194 ASSERT(irb != NULL); 12195 12196 IRB_REFHOLD(irb); 12197 for (ire1 = ire->ire_next; 12198 ire1 != NULL; 12199 ire1 = ire1->ire_next) { 12200 if (!(ire1->ire_flags & RTF_MULTIRT)) 12201 continue; 12202 if (!IN6_ARE_ADDR_EQUAL( 12203 &ire1->ire_addr_v6, 12204 &ire->ire_addr_v6)) 12205 continue; 12206 if (ire1->ire_marks & 12207 (IRE_MARK_CONDEMNED| 12208 IRE_MARK_HIDDEN)) 12209 continue; 12210 12211 /* Got one */ 12212 if (ire1 != save_ire) { 12213 IRE_REFHOLD(ire1); 12214 } 12215 break; 12216 } 12217 IRB_REFRELE(irb); 12218 12219 if (ire1 != NULL) { 12220 next_mp = copyb(mp); 12221 if ((next_mp == NULL) || 12222 ((mp->b_cont != NULL) && 12223 ((next_mp->b_cont = 12224 dupmsg(mp->b_cont)) == NULL))) { 12225 freemsg(next_mp); 12226 next_mp = NULL; 12227 ire_refrele(ire1); 12228 ire1 = NULL; 12229 } 12230 } 12231 12232 /* Last multiroute ire; don't loop anymore. */ 12233 if (ire1 == NULL) { 12234 multirt_send = B_FALSE; 12235 } 12236 } 12237 12238 ill_index = 12239 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12240 12241 /* Initiate IPPF processing */ 12242 if (IP6_OUT_IPP(flags, ipst)) { 12243 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12244 if (mp == NULL) { 12245 BUMP_MIB(ill->ill_ip_mib, 12246 ipIfStatsOutDiscards); 12247 if (next_mp != NULL) 12248 freemsg(next_mp); 12249 if (ire != save_ire) { 12250 ire_refrele(ire); 12251 } 12252 return; 12253 } 12254 ip6h = (ip6_t *)mp->b_rptr; 12255 } 12256 mp_ip6h = mp; 12257 12258 /* 12259 * Check for fastpath, we need to hold nce_lock to 12260 * prevent fastpath update from chaining nce_fp_mp. 12261 */ 12262 12263 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12264 mutex_enter(&nce->nce_lock); 12265 if ((mp1 = nce->nce_fp_mp) != NULL) { 12266 uchar_t *rptr; 12267 12268 hlen = MBLKL(mp1); 12269 rptr = mp->b_rptr - hlen; 12270 /* 12271 * make sure there is room for the fastpath 12272 * datalink header 12273 */ 12274 if (rptr < mp->b_datap->db_base) { 12275 mp1 = copyb(mp1); 12276 mutex_exit(&nce->nce_lock); 12277 if (mp1 == NULL) { 12278 BUMP_MIB(ill->ill_ip_mib, 12279 ipIfStatsOutDiscards); 12280 freemsg(mp); 12281 if (next_mp != NULL) 12282 freemsg(next_mp); 12283 if (ire != save_ire) { 12284 ire_refrele(ire); 12285 } 12286 return; 12287 } 12288 mp1->b_cont = mp; 12289 12290 /* Get the priority marking, if any */ 12291 mp1->b_band = mp->b_band; 12292 mp = mp1; 12293 } else { 12294 mp->b_rptr = rptr; 12295 /* 12296 * fastpath - pre-pend datalink 12297 * header 12298 */ 12299 bcopy(mp1->b_rptr, rptr, hlen); 12300 mutex_exit(&nce->nce_lock); 12301 fp_prepend = B_TRUE; 12302 } 12303 } else { 12304 /* 12305 * Get the DL_UNITDATA_REQ. 12306 */ 12307 mp1 = nce->nce_res_mp; 12308 if (mp1 == NULL) { 12309 mutex_exit(&nce->nce_lock); 12310 ip1dbg(("ip_xmit_v6: No resolution " 12311 "block ire = %p\n", (void *)ire)); 12312 freemsg(mp); 12313 if (next_mp != NULL) 12314 freemsg(next_mp); 12315 if (ire != save_ire) { 12316 ire_refrele(ire); 12317 } 12318 return; 12319 } 12320 /* 12321 * Prepend the DL_UNITDATA_REQ. 12322 */ 12323 mp1 = copyb(mp1); 12324 mutex_exit(&nce->nce_lock); 12325 if (mp1 == NULL) { 12326 BUMP_MIB(ill->ill_ip_mib, 12327 ipIfStatsOutDiscards); 12328 freemsg(mp); 12329 if (next_mp != NULL) 12330 freemsg(next_mp); 12331 if (ire != save_ire) { 12332 ire_refrele(ire); 12333 } 12334 return; 12335 } 12336 mp1->b_cont = mp; 12337 12338 /* Get the priority marking, if any */ 12339 mp1->b_band = mp->b_band; 12340 mp = mp1; 12341 } 12342 12343 out_ill = (ill_t *)stq->q_ptr; 12344 12345 DTRACE_PROBE4(ip6__physical__out__start, 12346 ill_t *, NULL, ill_t *, out_ill, 12347 ip6_t *, ip6h, mblk_t *, mp); 12348 12349 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12350 ipst->ips_ipv6firewall_physical_out, 12351 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 12352 12353 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12354 12355 if (mp == NULL) { 12356 if (multirt_send) { 12357 ASSERT(ire1 != NULL); 12358 if (ire != save_ire) { 12359 ire_refrele(ire); 12360 } 12361 /* 12362 * Proceed with the next RTF_MULTIRT 12363 * ire, also set up the send-to queue 12364 * accordingly. 12365 */ 12366 ire = ire1; 12367 ire1 = NULL; 12368 stq = ire->ire_stq; 12369 nce = ire->ire_nce; 12370 ill = ire_to_ill(ire); 12371 mp = next_mp; 12372 next_mp = NULL; 12373 continue; 12374 } else { 12375 ASSERT(next_mp == NULL); 12376 ASSERT(ire1 == NULL); 12377 break; 12378 } 12379 } 12380 12381 if (ipst->ips_ipobs_enabled) { 12382 zoneid_t szone; 12383 12384 szone = ip_get_zoneid_v6(&ip6h->ip6_src, 12385 mp_ip6h, out_ill, ipst, ALL_ZONES); 12386 ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone, 12387 ALL_ZONES, out_ill, IPV6_VERSION, 12388 fp_prepend ? hlen : 0, ipst); 12389 } 12390 12391 /* 12392 * Update ire and MIB counters; for save_ire, this has 12393 * been done by the caller. 12394 */ 12395 if (ire != save_ire) { 12396 UPDATE_OB_PKT_COUNT(ire); 12397 ire->ire_last_used_time = lbolt; 12398 12399 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12400 BUMP_MIB(ill->ill_ip_mib, 12401 ipIfStatsHCOutMcastPkts); 12402 UPDATE_MIB(ill->ill_ip_mib, 12403 ipIfStatsHCOutMcastOctets, 12404 ntohs(ip6h->ip6_plen) + 12405 IPV6_HDR_LEN); 12406 } 12407 } 12408 12409 /* 12410 * Send it down. XXX Do we want to flow control AH/ESP 12411 * packets that carry TCP payloads? We don't flow 12412 * control TCP packets, but we should also not 12413 * flow-control TCP packets that have been protected. 12414 * We don't have an easy way to find out if an AH/ESP 12415 * packet was originally TCP or not currently. 12416 */ 12417 if (io == NULL) { 12418 BUMP_MIB(ill->ill_ip_mib, 12419 ipIfStatsHCOutTransmits); 12420 UPDATE_MIB(ill->ill_ip_mib, 12421 ipIfStatsHCOutOctets, 12422 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12423 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 12424 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 12425 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 12426 int, 0); 12427 12428 putnext(stq, mp); 12429 } else { 12430 /* 12431 * Safety Pup says: make sure this is 12432 * going to the right interface! 12433 */ 12434 if (io->ipsec_out_capab_ill_index != 12435 ill_index) { 12436 /* IPsec kstats: bump lose counter */ 12437 freemsg(mp1); 12438 } else { 12439 BUMP_MIB(ill->ill_ip_mib, 12440 ipIfStatsHCOutTransmits); 12441 UPDATE_MIB(ill->ill_ip_mib, 12442 ipIfStatsHCOutOctets, 12443 ntohs(ip6h->ip6_plen) + 12444 IPV6_HDR_LEN); 12445 DTRACE_IP7(send, mblk_t *, mp, 12446 conn_t *, NULL, void_ip_t *, ip6h, 12447 __dtrace_ipsr_ill_t *, out_ill, 12448 ipha_t *, NULL, ip6_t *, ip6h, int, 12449 0); 12450 ipsec_hw_putnext(stq, mp); 12451 } 12452 } 12453 12454 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12455 if (ire != save_ire) { 12456 ire_refrele(ire); 12457 } 12458 if (multirt_send) { 12459 ASSERT(ire1 != NULL); 12460 /* 12461 * Proceed with the next RTF_MULTIRT 12462 * ire, also set up the send-to queue 12463 * accordingly. 12464 */ 12465 ire = ire1; 12466 ire1 = NULL; 12467 stq = ire->ire_stq; 12468 nce = ire->ire_nce; 12469 ill = ire_to_ill(ire); 12470 mp = next_mp; 12471 next_mp = NULL; 12472 continue; 12473 } 12474 ASSERT(next_mp == NULL); 12475 ASSERT(ire1 == NULL); 12476 return; 12477 } 12478 12479 ASSERT(nce->nce_state != ND_INCOMPLETE); 12480 12481 /* 12482 * Check for upper layer advice 12483 */ 12484 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12485 /* 12486 * It should be o.k. to check the state without 12487 * a lock here, at most we lose an advice. 12488 */ 12489 nce->nce_last = TICK_TO_MSEC(lbolt64); 12490 if (nce->nce_state != ND_REACHABLE) { 12491 12492 mutex_enter(&nce->nce_lock); 12493 nce->nce_state = ND_REACHABLE; 12494 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12495 mutex_exit(&nce->nce_lock); 12496 (void) untimeout(nce->nce_timeout_id); 12497 if (ip_debug > 2) { 12498 /* ip1dbg */ 12499 pr_addr_dbg("ip_xmit_v6: state" 12500 " for %s changed to" 12501 " REACHABLE\n", AF_INET6, 12502 &ire->ire_addr_v6); 12503 } 12504 } 12505 if (ire != save_ire) { 12506 ire_refrele(ire); 12507 } 12508 if (multirt_send) { 12509 ASSERT(ire1 != NULL); 12510 /* 12511 * Proceed with the next RTF_MULTIRT 12512 * ire, also set up the send-to queue 12513 * accordingly. 12514 */ 12515 ire = ire1; 12516 ire1 = NULL; 12517 stq = ire->ire_stq; 12518 nce = ire->ire_nce; 12519 ill = ire_to_ill(ire); 12520 mp = next_mp; 12521 next_mp = NULL; 12522 continue; 12523 } 12524 ASSERT(next_mp == NULL); 12525 ASSERT(ire1 == NULL); 12526 return; 12527 } 12528 12529 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12530 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12531 " ill_reachable_time = %d \n", delta, 12532 ill->ill_reachable_time)); 12533 if (delta > (uint64_t)ill->ill_reachable_time) { 12534 nce = ire->ire_nce; 12535 mutex_enter(&nce->nce_lock); 12536 switch (nce->nce_state) { 12537 case ND_REACHABLE: 12538 case ND_STALE: 12539 /* 12540 * ND_REACHABLE is identical to 12541 * ND_STALE in this specific case. If 12542 * reachable time has expired for this 12543 * neighbor (delta is greater than 12544 * reachable time), conceptually, the 12545 * neighbor cache is no longer in 12546 * REACHABLE state, but already in 12547 * STALE state. So the correct 12548 * transition here is to ND_DELAY. 12549 */ 12550 nce->nce_state = ND_DELAY; 12551 mutex_exit(&nce->nce_lock); 12552 NDP_RESTART_TIMER(nce, 12553 ipst->ips_delay_first_probe_time); 12554 if (ip_debug > 3) { 12555 /* ip2dbg */ 12556 pr_addr_dbg("ip_xmit_v6: state" 12557 " for %s changed to" 12558 " DELAY\n", AF_INET6, 12559 &ire->ire_addr_v6); 12560 } 12561 break; 12562 case ND_DELAY: 12563 case ND_PROBE: 12564 mutex_exit(&nce->nce_lock); 12565 /* Timers have already started */ 12566 break; 12567 case ND_UNREACHABLE: 12568 /* 12569 * ndp timer has detected that this nce 12570 * is unreachable and initiated deleting 12571 * this nce and all its associated IREs. 12572 * This is a race where we found the 12573 * ire before it was deleted and have 12574 * just sent out a packet using this 12575 * unreachable nce. 12576 */ 12577 mutex_exit(&nce->nce_lock); 12578 break; 12579 default: 12580 ASSERT(0); 12581 } 12582 } 12583 12584 if (multirt_send) { 12585 ASSERT(ire1 != NULL); 12586 /* 12587 * Proceed with the next RTF_MULTIRT ire, 12588 * Also set up the send-to queue accordingly. 12589 */ 12590 if (ire != save_ire) { 12591 ire_refrele(ire); 12592 } 12593 ire = ire1; 12594 ire1 = NULL; 12595 stq = ire->ire_stq; 12596 nce = ire->ire_nce; 12597 ill = ire_to_ill(ire); 12598 mp = next_mp; 12599 next_mp = NULL; 12600 } 12601 } while (multirt_send); 12602 /* 12603 * In the multirouting case, release the last ire used for 12604 * emission. save_ire will be released by the caller. 12605 */ 12606 if (ire != save_ire) { 12607 ire_refrele(ire); 12608 } 12609 } else { 12610 /* 12611 * Can't apply backpressure, just discard the packet. 12612 */ 12613 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12614 freemsg(mp); 12615 return; 12616 } 12617 } 12618 12619 /* 12620 * pr_addr_dbg function provides the needed buffer space to call 12621 * inet_ntop() function's 3rd argument. This function should be 12622 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12623 * stack buffer space in it's own stack frame. This function uses 12624 * a buffer from it's own stack and prints the information. 12625 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12626 * 12627 * Note: This function can call inet_ntop() once. 12628 */ 12629 void 12630 pr_addr_dbg(char *fmt1, int af, const void *addr) 12631 { 12632 char buf[INET6_ADDRSTRLEN]; 12633 12634 if (fmt1 == NULL) { 12635 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12636 return; 12637 } 12638 12639 /* 12640 * This does not compare debug level and just prints 12641 * out. Thus it is the responsibility of the caller 12642 * to check the appropriate debug-level before calling 12643 * this function. 12644 */ 12645 if (ip_debug > 0) { 12646 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12647 } 12648 12649 12650 } 12651 12652 12653 /* 12654 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12655 * if needed and extension headers) that will be needed based on the 12656 * ip6_pkt_t structure passed by the caller. 12657 * 12658 * The returned length does not include the length of the upper level 12659 * protocol (ULP) header. 12660 */ 12661 int 12662 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12663 { 12664 int len; 12665 12666 len = IPV6_HDR_LEN; 12667 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12668 len += sizeof (ip6i_t); 12669 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12670 ASSERT(ipp->ipp_hopoptslen != 0); 12671 len += ipp->ipp_hopoptslen; 12672 } 12673 if (ipp->ipp_fields & IPPF_RTHDR) { 12674 ASSERT(ipp->ipp_rthdrlen != 0); 12675 len += ipp->ipp_rthdrlen; 12676 } 12677 /* 12678 * En-route destination options 12679 * Only do them if there's a routing header as well 12680 */ 12681 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12682 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12683 ASSERT(ipp->ipp_rtdstoptslen != 0); 12684 len += ipp->ipp_rtdstoptslen; 12685 } 12686 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12687 ASSERT(ipp->ipp_dstoptslen != 0); 12688 len += ipp->ipp_dstoptslen; 12689 } 12690 return (len); 12691 } 12692 12693 /* 12694 * All-purpose routine to build a header chain of an IPv6 header 12695 * followed by any required extension headers and a proto header, 12696 * preceeded (where necessary) by an ip6i_t private header. 12697 * 12698 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12699 * will be filled in appropriately. 12700 * Thus the caller must fill in the rest of the IPv6 header, such as 12701 * traffic class/flowid, source address (if not set here), hoplimit (if not 12702 * set here) and destination address. 12703 * 12704 * The extension headers and ip6i_t header will all be fully filled in. 12705 */ 12706 void 12707 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12708 ip6_pkt_t *ipp, uint8_t protocol) 12709 { 12710 uint8_t *nxthdr_ptr; 12711 uint8_t *cp; 12712 ip6i_t *ip6i; 12713 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12714 12715 /* 12716 * If sending private ip6i_t header down (checksum info, nexthop, 12717 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12718 * then fill it in. (The checksum info will be filled in by icmp). 12719 */ 12720 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12721 ip6i = (ip6i_t *)ip6h; 12722 ip6h = (ip6_t *)&ip6i[1]; 12723 12724 ip6i->ip6i_flags = 0; 12725 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12726 if (ipp->ipp_fields & IPPF_IFINDEX || 12727 ipp->ipp_fields & IPPF_SCOPE_ID) { 12728 ASSERT(ipp->ipp_ifindex != 0); 12729 ip6i->ip6i_flags |= IP6I_IFINDEX; 12730 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12731 } 12732 if (ipp->ipp_fields & IPPF_ADDR) { 12733 /* 12734 * Enable per-packet source address verification if 12735 * IPV6_PKTINFO specified the source address. 12736 * ip6_src is set in the transport's _wput function. 12737 */ 12738 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12739 &ipp->ipp_addr)); 12740 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12741 } 12742 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12743 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12744 /* 12745 * We need to set this flag so that IP doesn't 12746 * rewrite the IPv6 header's hoplimit with the 12747 * current default value. 12748 */ 12749 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12750 } 12751 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12752 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12753 &ipp->ipp_nexthop)); 12754 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12755 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12756 } 12757 /* 12758 * tell IP this is an ip6i_t private header 12759 */ 12760 ip6i->ip6i_nxt = IPPROTO_RAW; 12761 } 12762 /* Initialize IPv6 header */ 12763 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12764 if (ipp->ipp_fields & IPPF_TCLASS) { 12765 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12766 (ipp->ipp_tclass << 20); 12767 } 12768 if (ipp->ipp_fields & IPPF_ADDR) 12769 ip6h->ip6_src = ipp->ipp_addr; 12770 12771 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12772 cp = (uint8_t *)&ip6h[1]; 12773 /* 12774 * Here's where we have to start stringing together 12775 * any extension headers in the right order: 12776 * Hop-by-hop, destination, routing, and final destination opts. 12777 */ 12778 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12779 /* Hop-by-hop options */ 12780 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12781 12782 *nxthdr_ptr = IPPROTO_HOPOPTS; 12783 nxthdr_ptr = &hbh->ip6h_nxt; 12784 12785 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12786 cp += ipp->ipp_hopoptslen; 12787 } 12788 /* 12789 * En-route destination options 12790 * Only do them if there's a routing header as well 12791 */ 12792 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12793 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12794 ip6_dest_t *dst = (ip6_dest_t *)cp; 12795 12796 *nxthdr_ptr = IPPROTO_DSTOPTS; 12797 nxthdr_ptr = &dst->ip6d_nxt; 12798 12799 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12800 cp += ipp->ipp_rtdstoptslen; 12801 } 12802 /* 12803 * Routing header next 12804 */ 12805 if (ipp->ipp_fields & IPPF_RTHDR) { 12806 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12807 12808 *nxthdr_ptr = IPPROTO_ROUTING; 12809 nxthdr_ptr = &rt->ip6r_nxt; 12810 12811 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12812 cp += ipp->ipp_rthdrlen; 12813 } 12814 /* 12815 * Do ultimate destination options 12816 */ 12817 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12818 ip6_dest_t *dest = (ip6_dest_t *)cp; 12819 12820 *nxthdr_ptr = IPPROTO_DSTOPTS; 12821 nxthdr_ptr = &dest->ip6d_nxt; 12822 12823 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12824 cp += ipp->ipp_dstoptslen; 12825 } 12826 /* 12827 * Now set the last header pointer to the proto passed in 12828 */ 12829 *nxthdr_ptr = protocol; 12830 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12831 } 12832 12833 /* 12834 * Return a pointer to the routing header extension header 12835 * in the IPv6 header(s) chain passed in. 12836 * If none found, return NULL 12837 * Assumes that all extension headers are in same mblk as the v6 header 12838 */ 12839 ip6_rthdr_t * 12840 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12841 { 12842 ip6_dest_t *desthdr; 12843 ip6_frag_t *fraghdr; 12844 uint_t hdrlen; 12845 uint8_t nexthdr; 12846 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12847 12848 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12849 return ((ip6_rthdr_t *)ptr); 12850 12851 /* 12852 * The routing header will precede all extension headers 12853 * other than the hop-by-hop and destination options 12854 * extension headers, so if we see anything other than those, 12855 * we're done and didn't find it. 12856 * We could see a destination options header alone but no 12857 * routing header, in which case we'll return NULL as soon as 12858 * we see anything after that. 12859 * Hop-by-hop and destination option headers are identical, 12860 * so we can use either one we want as a template. 12861 */ 12862 nexthdr = ip6h->ip6_nxt; 12863 while (ptr < endptr) { 12864 /* Is there enough left for len + nexthdr? */ 12865 if (ptr + MIN_EHDR_LEN > endptr) 12866 return (NULL); 12867 12868 switch (nexthdr) { 12869 case IPPROTO_HOPOPTS: 12870 case IPPROTO_DSTOPTS: 12871 /* Assumes the headers are identical for hbh and dst */ 12872 desthdr = (ip6_dest_t *)ptr; 12873 hdrlen = 8 * (desthdr->ip6d_len + 1); 12874 nexthdr = desthdr->ip6d_nxt; 12875 break; 12876 12877 case IPPROTO_ROUTING: 12878 return ((ip6_rthdr_t *)ptr); 12879 12880 case IPPROTO_FRAGMENT: 12881 fraghdr = (ip6_frag_t *)ptr; 12882 hdrlen = sizeof (ip6_frag_t); 12883 nexthdr = fraghdr->ip6f_nxt; 12884 break; 12885 12886 default: 12887 return (NULL); 12888 } 12889 ptr += hdrlen; 12890 } 12891 return (NULL); 12892 } 12893 12894 /* 12895 * Called for source-routed packets originating on this node. 12896 * Manipulates the original routing header by moving every entry up 12897 * one slot, placing the first entry in the v6 header's v6_dst field, 12898 * and placing the ultimate destination in the routing header's last 12899 * slot. 12900 * 12901 * Returns the checksum diference between the ultimate destination 12902 * (last hop in the routing header when the packet is sent) and 12903 * the first hop (ip6_dst when the packet is sent) 12904 */ 12905 /* ARGSUSED2 */ 12906 uint32_t 12907 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12908 { 12909 uint_t numaddr; 12910 uint_t i; 12911 in6_addr_t *addrptr; 12912 in6_addr_t tmp; 12913 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12914 uint32_t cksm; 12915 uint32_t addrsum = 0; 12916 uint16_t *ptr; 12917 12918 /* 12919 * Perform any processing needed for source routing. 12920 * We know that all extension headers will be in the same mblk 12921 * as the IPv6 header. 12922 */ 12923 12924 /* 12925 * If no segments left in header, or the header length field is zero, 12926 * don't move hop addresses around; 12927 * Checksum difference is zero. 12928 */ 12929 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12930 return (0); 12931 12932 ptr = (uint16_t *)&ip6h->ip6_dst; 12933 cksm = 0; 12934 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12935 cksm += ptr[i]; 12936 } 12937 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12938 12939 /* 12940 * Here's where the fun begins - we have to 12941 * move all addresses up one spot, take the 12942 * first hop and make it our first ip6_dst, 12943 * and place the ultimate destination in the 12944 * newly-opened last slot. 12945 */ 12946 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12947 numaddr = rthdr->ip6r0_len / 2; 12948 tmp = *addrptr; 12949 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12950 *addrptr = addrptr[1]; 12951 } 12952 *addrptr = ip6h->ip6_dst; 12953 ip6h->ip6_dst = tmp; 12954 12955 /* 12956 * From the checksummed ultimate destination subtract the checksummed 12957 * current ip6_dst (the first hop address). Return that number. 12958 * (In the v4 case, the second part of this is done in each routine 12959 * that calls ip_massage_options(). We do it all in this one place 12960 * for v6). 12961 */ 12962 ptr = (uint16_t *)&ip6h->ip6_dst; 12963 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12964 addrsum += ptr[i]; 12965 } 12966 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12967 if ((int)cksm < 0) 12968 cksm--; 12969 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12970 12971 return (cksm); 12972 } 12973 12974 /* 12975 * Propagate a multicast group membership operation (join/leave) (*fn) on 12976 * all interfaces crossed by the related multirt routes. 12977 * The call is considered successful if the operation succeeds 12978 * on at least one interface. 12979 * The function is called if the destination address in the packet to send 12980 * is multirouted. 12981 */ 12982 int 12983 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12984 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12985 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12986 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12987 { 12988 ire_t *ire_gw; 12989 irb_t *irb; 12990 int index, error = 0; 12991 opt_restart_t *or; 12992 ip_stack_t *ipst = ire->ire_ipst; 12993 12994 irb = ire->ire_bucket; 12995 ASSERT(irb != NULL); 12996 12997 ASSERT(DB_TYPE(first_mp) == M_CTL); 12998 or = (opt_restart_t *)first_mp->b_rptr; 12999 13000 IRB_REFHOLD(irb); 13001 for (; ire != NULL; ire = ire->ire_next) { 13002 if ((ire->ire_flags & RTF_MULTIRT) == 0) 13003 continue; 13004 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 13005 continue; 13006 13007 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 13008 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 13009 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 13010 /* No resolver exists for the gateway; skip this ire. */ 13011 if (ire_gw == NULL) 13012 continue; 13013 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 13014 /* 13015 * A resolver exists: we can get the interface on which we have 13016 * to apply the operation. 13017 */ 13018 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 13019 first_mp); 13020 if (error == 0) 13021 or->or_private = CGTP_MCAST_SUCCESS; 13022 13023 if (ip_debug > 0) { 13024 ulong_t off; 13025 char *ksym; 13026 13027 ksym = kobj_getsymname((uintptr_t)fn, &off); 13028 ip2dbg(("ip_multirt_apply_membership_v6: " 13029 "called %s, multirt group 0x%08x via itf 0x%08x, " 13030 "error %d [success %u]\n", 13031 ksym ? ksym : "?", 13032 ntohl(V4_PART_OF_V6((*v6grp))), 13033 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 13034 error, or->or_private)); 13035 } 13036 13037 ire_refrele(ire_gw); 13038 if (error == EINPROGRESS) { 13039 IRB_REFRELE(irb); 13040 return (error); 13041 } 13042 } 13043 IRB_REFRELE(irb); 13044 /* 13045 * Consider the call as successful if we succeeded on at least 13046 * one interface. Otherwise, return the last encountered error. 13047 */ 13048 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 13049 } 13050 13051 void 13052 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 13053 { 13054 kstat_t *ksp; 13055 13056 ip6_stat_t template = { 13057 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 13058 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 13059 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 13060 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 13061 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 13062 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 13063 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 13064 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13065 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13066 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13067 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 13068 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13069 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13070 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13071 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 13072 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 13073 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 13074 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 13075 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 13076 }; 13077 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 13078 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 13079 KSTAT_FLAG_VIRTUAL, stackid); 13080 13081 if (ksp == NULL) 13082 return (NULL); 13083 13084 bcopy(&template, ip6_statisticsp, sizeof (template)); 13085 ksp->ks_data = (void *)ip6_statisticsp; 13086 ksp->ks_private = (void *)(uintptr_t)stackid; 13087 13088 kstat_install(ksp); 13089 return (ksp); 13090 } 13091 13092 void 13093 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 13094 { 13095 if (ksp != NULL) { 13096 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 13097 kstat_delete_netstack(ksp, stackid); 13098 } 13099 } 13100 13101 /* 13102 * The following two functions set and get the value for the 13103 * IPV6_SRC_PREFERENCES socket option. 13104 */ 13105 int 13106 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 13107 { 13108 /* 13109 * We only support preferences that are covered by 13110 * IPV6_PREFER_SRC_MASK. 13111 */ 13112 if (prefs & ~IPV6_PREFER_SRC_MASK) 13113 return (EINVAL); 13114 13115 /* 13116 * Look for conflicting preferences or default preferences. If 13117 * both bits of a related pair are clear, the application wants the 13118 * system's default value for that pair. Both bits in a pair can't 13119 * be set. 13120 */ 13121 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13122 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13123 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13124 IPV6_PREFER_SRC_MIPMASK) { 13125 return (EINVAL); 13126 } 13127 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13128 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13129 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13130 IPV6_PREFER_SRC_TMPMASK) { 13131 return (EINVAL); 13132 } 13133 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13134 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13135 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13136 IPV6_PREFER_SRC_CGAMASK) { 13137 return (EINVAL); 13138 } 13139 13140 connp->conn_src_preferences = prefs; 13141 return (0); 13142 } 13143 13144 size_t 13145 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13146 { 13147 *val = connp->conn_src_preferences; 13148 return (sizeof (connp->conn_src_preferences)); 13149 } 13150 13151 int 13152 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13153 { 13154 ill_t *ill; 13155 ire_t *ire; 13156 int error; 13157 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13158 13159 /* 13160 * Verify the source address and ifindex. Privileged users can use 13161 * any source address. For ancillary data the source address is 13162 * checked in ip_wput_v6. 13163 */ 13164 if (pkti->ipi6_ifindex != 0) { 13165 ASSERT(connp != NULL); 13166 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13167 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error, ipst); 13168 if (ill == NULL) { 13169 /* 13170 * We just want to know if the interface exists, we 13171 * don't really care about the ill pointer itself. 13172 */ 13173 if (error != EINPROGRESS) 13174 return (error); 13175 error = 0; /* Ensure we don't use it below */ 13176 } else { 13177 ill_refrele(ill); 13178 } 13179 } 13180 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13181 secpolicy_net_rawaccess(cr) != 0) { 13182 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13183 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13184 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 13185 if (ire != NULL) 13186 ire_refrele(ire); 13187 else 13188 return (ENXIO); 13189 } 13190 return (0); 13191 } 13192 13193 /* 13194 * Get the size of the IP options (including the IP headers size) 13195 * without including the AH header's size. If till_ah is B_FALSE, 13196 * and if AH header is present, dest options beyond AH header will 13197 * also be included in the returned size. 13198 */ 13199 int 13200 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13201 { 13202 ip6_t *ip6h; 13203 uint8_t nexthdr; 13204 uint8_t *whereptr; 13205 ip6_hbh_t *hbhhdr; 13206 ip6_dest_t *dsthdr; 13207 ip6_rthdr_t *rthdr; 13208 int ehdrlen; 13209 int size; 13210 ah_t *ah; 13211 13212 ip6h = (ip6_t *)mp->b_rptr; 13213 size = IPV6_HDR_LEN; 13214 nexthdr = ip6h->ip6_nxt; 13215 whereptr = (uint8_t *)&ip6h[1]; 13216 for (;;) { 13217 /* Assume IP has already stripped it */ 13218 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13219 switch (nexthdr) { 13220 case IPPROTO_HOPOPTS: 13221 hbhhdr = (ip6_hbh_t *)whereptr; 13222 nexthdr = hbhhdr->ip6h_nxt; 13223 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13224 break; 13225 case IPPROTO_DSTOPTS: 13226 dsthdr = (ip6_dest_t *)whereptr; 13227 nexthdr = dsthdr->ip6d_nxt; 13228 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13229 break; 13230 case IPPROTO_ROUTING: 13231 rthdr = (ip6_rthdr_t *)whereptr; 13232 nexthdr = rthdr->ip6r_nxt; 13233 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13234 break; 13235 default : 13236 if (till_ah) { 13237 ASSERT(nexthdr == IPPROTO_AH); 13238 return (size); 13239 } 13240 /* 13241 * If we don't have a AH header to traverse, 13242 * return now. This happens normally for 13243 * outbound datagrams where we have not inserted 13244 * the AH header. 13245 */ 13246 if (nexthdr != IPPROTO_AH) { 13247 return (size); 13248 } 13249 13250 /* 13251 * We don't include the AH header's size 13252 * to be symmetrical with other cases where 13253 * we either don't have a AH header (outbound) 13254 * or peek into the AH header yet (inbound and 13255 * not pulled up yet). 13256 */ 13257 ah = (ah_t *)whereptr; 13258 nexthdr = ah->ah_nexthdr; 13259 ehdrlen = (ah->ah_length << 2) + 8; 13260 13261 if (nexthdr == IPPROTO_DSTOPTS) { 13262 if (whereptr + ehdrlen >= mp->b_wptr) { 13263 /* 13264 * The destination options header 13265 * is not part of the first mblk. 13266 */ 13267 whereptr = mp->b_cont->b_rptr; 13268 } else { 13269 whereptr += ehdrlen; 13270 } 13271 13272 dsthdr = (ip6_dest_t *)whereptr; 13273 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13274 size += ehdrlen; 13275 } 13276 return (size); 13277 } 13278 whereptr += ehdrlen; 13279 size += ehdrlen; 13280 } 13281 } 13282