1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/sdt.h> 46 #include <sys/kobj.h> 47 #include <sys/zone.h> 48 #include <sys/neti.h> 49 #include <sys/hook.h> 50 51 #include <sys/kmem.h> 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/atomic.h> 58 #include <sys/iphada.h> 59 #include <sys/policy.h> 60 #include <net/if.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/if_dl.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 #include <netinet/sctp.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/optcom.h> 73 #include <inet/mib2.h> 74 #include <inet/nd.h> 75 #include <inet/arp.h> 76 77 #include <inet/ip.h> 78 #include <inet/ip_impl.h> 79 #include <inet/ip6.h> 80 #include <inet/ip6_asp.h> 81 #include <inet/tcp.h> 82 #include <inet/tcp_impl.h> 83 #include <inet/udp_impl.h> 84 #include <inet/ipp_common.h> 85 86 #include <inet/ip_multi.h> 87 #include <inet/ip_if.h> 88 #include <inet/ip_ire.h> 89 #include <inet/ip_rts.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/tun.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/udp_impl.h> 101 #include <inet/rawip_impl.h> 102 #include <inet/rts_impl.h> 103 #include <sys/squeue.h> 104 105 #include <sys/tsol/label.h> 106 #include <sys/tsol/tnet.h> 107 108 #include <rpc/pmap_prot.h> 109 110 /* Temporary; for CR 6451644 work-around */ 111 #include <sys/ethernet.h> 112 113 extern squeue_func_t ip_input_proc; 114 115 /* 116 * Naming conventions: 117 * These rules should be judiciously applied 118 * if there is a need to identify something as IPv6 versus IPv4 119 * IPv6 funcions will end with _v6 in the ip module. 120 * IPv6 funcions will end with _ipv6 in the transport modules. 121 * IPv6 macros: 122 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 123 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 124 * And then there are ..V4_PART_OF_V6. 125 * The intent is that macros in the ip module end with _V6. 126 * IPv6 global variables will start with ipv6_ 127 * IPv6 structures will start with ipv6 128 * IPv6 defined constants should start with IPV6_ 129 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 130 */ 131 132 /* 133 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 134 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 135 * from IANA. This mechanism will remain in effect until an official 136 * number is obtained. 137 */ 138 uchar_t ip6opt_ls; 139 140 const in6_addr_t ipv6_all_ones = 141 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 142 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 143 144 #ifdef _BIG_ENDIAN 145 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 146 #else /* _BIG_ENDIAN */ 147 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 148 #endif /* _BIG_ENDIAN */ 149 150 #ifdef _BIG_ENDIAN 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 152 #else /* _BIG_ENDIAN */ 153 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 154 #endif /* _BIG_ENDIAN */ 155 156 #ifdef _BIG_ENDIAN 157 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 158 #else /* _BIG_ENDIAN */ 159 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 160 #endif /* _BIG_ENDIAN */ 161 162 #ifdef _BIG_ENDIAN 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 164 #else /* _BIG_ENDIAN */ 165 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 166 #endif /* _BIG_ENDIAN */ 167 168 #ifdef _BIG_ENDIAN 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 170 #else /* _BIG_ENDIAN */ 171 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 172 #endif /* _BIG_ENDIAN */ 173 174 #ifdef _BIG_ENDIAN 175 const in6_addr_t ipv6_solicited_node_mcast = 176 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 177 #else /* _BIG_ENDIAN */ 178 const in6_addr_t ipv6_solicited_node_mcast = 179 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 180 #endif /* _BIG_ENDIAN */ 181 182 /* Leave room for ip_newroute to tack on the src and target addresses */ 183 #define OK_RESOLVER_MP_V6(mp) \ 184 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 185 186 #define IP6_MBLK_OK 0 187 #define IP6_MBLK_HDR_ERR 1 188 #define IP6_MBLK_LEN_ERR 2 189 190 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 191 boolean_t, zoneid_t); 192 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 193 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 194 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 195 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 196 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 197 boolean_t, boolean_t, boolean_t, boolean_t); 198 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 199 iulp_t *, ip_stack_t *); 200 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 201 uint16_t, boolean_t, boolean_t, boolean_t); 202 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 203 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 204 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 205 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 206 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 207 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 208 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 209 uint8_t *, uint_t, uint8_t, ip_stack_t *); 210 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 211 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 212 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 213 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 214 conn_t *, int, int, int, zoneid_t); 215 216 /* 217 * A template for an IPv6 AR_ENTRY_QUERY 218 */ 219 static areq_t ipv6_areq_template = { 220 AR_ENTRY_QUERY, /* cmd */ 221 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 222 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 223 IP6_DL_SAP, /* protocol, from arps perspective */ 224 sizeof (areq_t), /* target addr offset */ 225 IPV6_ADDR_LEN, /* target addr_length */ 226 0, /* flags */ 227 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 228 IPV6_ADDR_LEN, /* sender addr length */ 229 6, /* xmit_count */ 230 1000, /* (re)xmit_interval in milliseconds */ 231 4 /* max # of requests to buffer */ 232 /* anything else filled in by the code */ 233 }; 234 235 /* 236 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 237 * The message has already been checksummed and if needed, 238 * a copy has been made to be sent any interested ICMP client (conn) 239 * Note that this is different than icmp_inbound() which does the fanout 240 * to conn's as well as local processing of the ICMP packets. 241 * 242 * All error messages are passed to the matching transport stream. 243 * 244 * Zones notes: 245 * The packet is only processed in the context of the specified zone: typically 246 * only this zone will reply to an echo request. This means that the caller must 247 * call icmp_inbound_v6() for each relevant zone. 248 */ 249 static void 250 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 251 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 252 { 253 icmp6_t *icmp6; 254 ip6_t *ip6h; 255 boolean_t interested; 256 ip6i_t *ip6i; 257 in6_addr_t origsrc; 258 ire_t *ire; 259 mblk_t *first_mp; 260 ipsec_in_t *ii; 261 ip_stack_t *ipst = ill->ill_ipst; 262 263 ASSERT(ill != NULL); 264 first_mp = mp; 265 if (mctl_present) { 266 mp = first_mp->b_cont; 267 ASSERT(mp != NULL); 268 269 ii = (ipsec_in_t *)first_mp->b_rptr; 270 ASSERT(ii->ipsec_in_type == IPSEC_IN); 271 } 272 273 ip6h = (ip6_t *)mp->b_rptr; 274 275 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 276 277 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 278 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 279 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 280 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 281 freemsg(first_mp); 282 return; 283 } 284 ip6h = (ip6_t *)mp->b_rptr; 285 } 286 if (ipst->ips_icmp_accept_clear_messages == 0) { 287 first_mp = ipsec_check_global_policy(first_mp, NULL, 288 NULL, ip6h, mctl_present, ipst->ips_netstack); 289 if (first_mp == NULL) 290 return; 291 } 292 293 /* 294 * On a labeled system, we have to check whether the zone itself is 295 * permitted to receive raw traffic. 296 */ 297 if (is_system_labeled()) { 298 if (zoneid == ALL_ZONES) 299 zoneid = tsol_packet_to_zoneid(mp); 300 if (!tsol_can_accept_raw(mp, B_FALSE)) { 301 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 302 zoneid)); 303 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 304 freemsg(first_mp); 305 return; 306 } 307 } 308 309 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 310 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 311 icmp6->icmp6_code)); 312 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 313 314 /* Initiate IPPF processing here */ 315 if (IP6_IN_IPP(flags, ipst)) { 316 317 /* 318 * If the ifindex changes due to SIOCSLIFINDEX 319 * packet may return to IP on the wrong ill. 320 */ 321 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 322 if (mp == NULL) { 323 if (mctl_present) { 324 freeb(first_mp); 325 } 326 return; 327 } 328 } 329 330 switch (icmp6->icmp6_type) { 331 case ICMP6_DST_UNREACH: 332 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 333 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 334 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 335 break; 336 337 case ICMP6_TIME_EXCEEDED: 338 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 339 break; 340 341 case ICMP6_PARAM_PROB: 342 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 343 break; 344 345 case ICMP6_PACKET_TOO_BIG: 346 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 347 zoneid); 348 return; 349 case ICMP6_ECHO_REQUEST: 350 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 351 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 352 !ipst->ips_ipv6_resp_echo_mcast) 353 break; 354 355 /* 356 * We must have exclusive use of the mblk to convert it to 357 * a response. 358 * If not, we copy it. 359 */ 360 if (mp->b_datap->db_ref > 1) { 361 mblk_t *mp1; 362 363 mp1 = copymsg(mp); 364 freemsg(mp); 365 if (mp1 == NULL) { 366 BUMP_MIB(ill->ill_icmp6_mib, 367 ipv6IfIcmpInErrors); 368 if (mctl_present) 369 freeb(first_mp); 370 return; 371 } 372 mp = mp1; 373 ip6h = (ip6_t *)mp->b_rptr; 374 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 375 if (mctl_present) 376 first_mp->b_cont = mp; 377 else 378 first_mp = mp; 379 } 380 381 /* 382 * Turn the echo into an echo reply. 383 * Remove any extension headers (do not reverse a source route) 384 * and clear the flow id (keep traffic class for now). 385 */ 386 if (hdr_length != IPV6_HDR_LEN) { 387 int i; 388 389 for (i = 0; i < IPV6_HDR_LEN; i++) 390 mp->b_rptr[hdr_length - i - 1] = 391 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 392 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 393 ip6h = (ip6_t *)mp->b_rptr; 394 ip6h->ip6_nxt = IPPROTO_ICMPV6; 395 hdr_length = IPV6_HDR_LEN; 396 } 397 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 398 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 399 400 ip6h->ip6_plen = 401 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 402 origsrc = ip6h->ip6_src; 403 /* 404 * Reverse the source and destination addresses. 405 * If the return address is a multicast, zero out the source 406 * (ip_wput_v6 will set an address). 407 */ 408 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 409 ip6h->ip6_src = ipv6_all_zeros; 410 ip6h->ip6_dst = origsrc; 411 } else { 412 ip6h->ip6_src = ip6h->ip6_dst; 413 ip6h->ip6_dst = origsrc; 414 } 415 416 /* set the hop limit */ 417 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 418 419 /* 420 * Prepare for checksum by putting icmp length in the icmp 421 * checksum field. The checksum is calculated in ip_wput_v6. 422 */ 423 icmp6->icmp6_cksum = ip6h->ip6_plen; 424 /* 425 * ICMP echo replies should go out on the same interface 426 * the request came on as probes used by in.mpathd for 427 * detecting NIC failures are ECHO packets. We turn-off load 428 * spreading by allocating a ip6i and setting ip6i_attach_if 429 * to B_TRUE which is handled both by ip_wput_v6 and 430 * ip_newroute_v6. If we don't turnoff load spreading, 431 * the packets might get dropped if there are no 432 * non-FAILED/INACTIVE interfaces for it to go out on and 433 * in.mpathd would wrongly detect a failure or mis-detect 434 * a NIC failure as a link failure. As load spreading can 435 * happen only if ill_group is not NULL, we do only for 436 * that case and this does not affect the normal case. 437 * 438 * We force this only on echo packets that came from on-link 439 * hosts. We restrict this to link-local addresses which 440 * is used by in.mpathd for probing. In the IPv6 case, 441 * default routes typically have an ire_ipif pointer and 442 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 443 * might work. As a default route out of this interface 444 * may not be present, enforcing this packet to go out in 445 * this case may not work. 446 */ 447 if (ill->ill_group != NULL && 448 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 449 /* 450 * If we are sending replies to ourselves, don't 451 * set ATTACH_IF as we may not be able to find 452 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 453 * causes ip_wput_v6 to look for an IRE_LOCAL on 454 * "ill" which it may not find and will try to 455 * create an IRE_CACHE for our local address. Once 456 * we do this, we will try to forward all packets 457 * meant to our LOCAL address. 458 */ 459 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 460 NULL, ipst); 461 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 462 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 463 if (mp == NULL) { 464 BUMP_MIB(ill->ill_icmp6_mib, 465 ipv6IfIcmpInErrors); 466 if (ire != NULL) 467 ire_refrele(ire); 468 if (mctl_present) 469 freeb(first_mp); 470 return; 471 } else if (mctl_present) { 472 first_mp->b_cont = mp; 473 } else { 474 first_mp = mp; 475 } 476 ip6i = (ip6i_t *)mp->b_rptr; 477 ip6i->ip6i_flags = IP6I_ATTACH_IF; 478 ip6i->ip6i_ifindex = 479 ill->ill_phyint->phyint_ifindex; 480 } 481 if (ire != NULL) 482 ire_refrele(ire); 483 } 484 485 if (!mctl_present) { 486 /* 487 * This packet should go out the same way as it 488 * came in i.e in clear. To make sure that global 489 * policy will not be applied to this in ip_wput, 490 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 491 */ 492 ASSERT(first_mp == mp); 493 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 494 if (first_mp == NULL) { 495 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 496 freemsg(mp); 497 return; 498 } 499 ii = (ipsec_in_t *)first_mp->b_rptr; 500 501 /* This is not a secure packet */ 502 ii->ipsec_in_secure = B_FALSE; 503 first_mp->b_cont = mp; 504 } 505 ii->ipsec_in_zoneid = zoneid; 506 ASSERT(zoneid != ALL_ZONES); 507 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 508 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 509 return; 510 } 511 put(WR(q), first_mp); 512 return; 513 514 case ICMP6_ECHO_REPLY: 515 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 516 break; 517 518 case ND_ROUTER_SOLICIT: 519 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 520 break; 521 522 case ND_ROUTER_ADVERT: 523 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 524 break; 525 526 case ND_NEIGHBOR_SOLICIT: 527 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 528 if (mctl_present) 529 freeb(first_mp); 530 /* XXX may wish to pass first_mp up to ndp_input someday. */ 531 ndp_input(ill, mp, dl_mp); 532 return; 533 534 case ND_NEIGHBOR_ADVERT: 535 BUMP_MIB(ill->ill_icmp6_mib, 536 ipv6IfIcmpInNeighborAdvertisements); 537 if (mctl_present) 538 freeb(first_mp); 539 /* XXX may wish to pass first_mp up to ndp_input someday. */ 540 ndp_input(ill, mp, dl_mp); 541 return; 542 543 case ND_REDIRECT: { 544 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 545 546 if (ipst->ips_ipv6_ignore_redirect) 547 break; 548 549 /* 550 * As there is no upper client to deliver, we don't 551 * need the first_mp any more. 552 */ 553 if (mctl_present) 554 freeb(first_mp); 555 if (!pullupmsg(mp, -1)) { 556 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 557 break; 558 } 559 icmp_redirect_v6(q, mp, ill); 560 return; 561 } 562 563 /* 564 * The next three icmp messages will be handled by MLD. 565 * Pass all valid MLD packets up to any process(es) 566 * listening on a raw ICMP socket. MLD messages are 567 * freed by mld_input function. 568 */ 569 case MLD_LISTENER_QUERY: 570 case MLD_LISTENER_REPORT: 571 case MLD_LISTENER_REDUCTION: 572 if (mctl_present) 573 freeb(first_mp); 574 mld_input(q, mp, ill); 575 return; 576 default: 577 break; 578 } 579 if (interested) { 580 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 581 mctl_present, zoneid); 582 } else { 583 freemsg(first_mp); 584 } 585 } 586 587 /* 588 * Process received IPv6 ICMP Packet too big. 589 * After updating any IRE it does the fanout to any matching transport streams. 590 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 591 */ 592 /* ARGSUSED */ 593 static void 594 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 595 boolean_t mctl_present, zoneid_t zoneid) 596 { 597 ip6_t *ip6h; 598 ip6_t *inner_ip6h; 599 icmp6_t *icmp6; 600 uint16_t hdr_length; 601 uint32_t mtu; 602 ire_t *ire, *first_ire; 603 mblk_t *first_mp; 604 ip_stack_t *ipst = ill->ill_ipst; 605 606 first_mp = mp; 607 if (mctl_present) 608 mp = first_mp->b_cont; 609 /* 610 * We must have exclusive use of the mblk to update the MTU 611 * in the packet. 612 * If not, we copy it. 613 * 614 * If there's an M_CTL present, we know that allocated first_mp 615 * earlier in this function, so we know first_mp has refcnt of one. 616 */ 617 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 618 if (mp->b_datap->db_ref > 1) { 619 mblk_t *mp1; 620 621 mp1 = copymsg(mp); 622 freemsg(mp); 623 if (mp1 == NULL) { 624 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 625 if (mctl_present) 626 freeb(first_mp); 627 return; 628 } 629 mp = mp1; 630 if (mctl_present) 631 first_mp->b_cont = mp; 632 else 633 first_mp = mp; 634 } 635 ip6h = (ip6_t *)mp->b_rptr; 636 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 637 hdr_length = ip_hdr_length_v6(mp, ip6h); 638 else 639 hdr_length = IPV6_HDR_LEN; 640 641 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 642 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 643 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 644 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 645 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 646 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 647 freemsg(first_mp); 648 return; 649 } 650 ip6h = (ip6_t *)mp->b_rptr; 651 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 652 inner_ip6h = (ip6_t *)&icmp6[1]; 653 } 654 655 /* 656 * For link local destinations matching simply on IRE type is not 657 * sufficient. Same link local addresses for different ILL's is 658 * possible. 659 */ 660 661 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 662 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 663 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 664 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 665 666 if (first_ire == NULL) { 667 if (ip_debug > 2) { 668 /* ip1dbg */ 669 pr_addr_dbg("icmp_inbound_too_big_v6:" 670 "no ire for dst %s\n", AF_INET6, 671 &inner_ip6h->ip6_dst); 672 } 673 freemsg(first_mp); 674 return; 675 } 676 677 mtu = ntohl(icmp6->icmp6_mtu); 678 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 679 for (ire = first_ire; ire != NULL && 680 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 681 ire = ire->ire_next) { 682 mutex_enter(&ire->ire_lock); 683 if (mtu < IPV6_MIN_MTU) { 684 ip1dbg(("Received mtu less than IPv6 " 685 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 686 mtu = IPV6_MIN_MTU; 687 /* 688 * If an mtu less than IPv6 min mtu is received, 689 * we must include a fragment header in 690 * subsequent packets. 691 */ 692 ire->ire_frag_flag |= IPH_FRAG_HDR; 693 } 694 ip1dbg(("Received mtu from router: %d\n", mtu)); 695 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 696 /* Record the new max frag size for the ULP. */ 697 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 698 /* 699 * If we need a fragment header in every packet 700 * (above case or multirouting), make sure the 701 * ULP takes it into account when computing the 702 * payload size. 703 */ 704 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 705 sizeof (ip6_frag_t)); 706 } else { 707 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 708 } 709 mutex_exit(&ire->ire_lock); 710 } 711 rw_exit(&first_ire->ire_bucket->irb_lock); 712 ire_refrele(first_ire); 713 } else { 714 irb_t *irb = NULL; 715 /* 716 * for non-link local destinations we match only on the IRE type 717 */ 718 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 719 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 720 ipst); 721 if (ire == NULL) { 722 if (ip_debug > 2) { 723 /* ip1dbg */ 724 pr_addr_dbg("icmp_inbound_too_big_v6:" 725 "no ire for dst %s\n", 726 AF_INET6, &inner_ip6h->ip6_dst); 727 } 728 freemsg(first_mp); 729 return; 730 } 731 irb = ire->ire_bucket; 732 ire_refrele(ire); 733 rw_enter(&irb->irb_lock, RW_READER); 734 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 735 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 736 &inner_ip6h->ip6_dst)) { 737 mtu = ntohl(icmp6->icmp6_mtu); 738 mutex_enter(&ire->ire_lock); 739 if (mtu < IPV6_MIN_MTU) { 740 ip1dbg(("Received mtu less than IPv6" 741 "min mtu %d: %d\n", 742 IPV6_MIN_MTU, mtu)); 743 mtu = IPV6_MIN_MTU; 744 /* 745 * If an mtu less than IPv6 min mtu is 746 * received, we must include a fragment 747 * header in subsequent packets. 748 */ 749 ire->ire_frag_flag |= IPH_FRAG_HDR; 750 } 751 752 ip1dbg(("Received mtu from router: %d\n", mtu)); 753 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 754 /* Record the new max frag size for the ULP. */ 755 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 756 /* 757 * If we need a fragment header in 758 * every packet (above case or 759 * multirouting), make sure the ULP 760 * takes it into account when computing 761 * the payload size. 762 */ 763 icmp6->icmp6_mtu = 764 htonl(ire->ire_max_frag - 765 sizeof (ip6_frag_t)); 766 } else { 767 icmp6->icmp6_mtu = 768 htonl(ire->ire_max_frag); 769 } 770 mutex_exit(&ire->ire_lock); 771 } 772 } 773 rw_exit(&irb->irb_lock); 774 } 775 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 776 mctl_present, zoneid); 777 } 778 779 /* 780 * Fanout received ICMPv6 error packets to the transports. 781 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 782 */ 783 void 784 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 785 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 786 { 787 uint16_t *up; /* Pointer to ports in ULP header */ 788 uint32_t ports; /* reversed ports for fanout */ 789 ip6_t rip6h; /* With reversed addresses */ 790 uint16_t hdr_length; 791 uint8_t *nexthdrp; 792 uint8_t nexthdr; 793 mblk_t *first_mp; 794 ipsec_in_t *ii; 795 tcpha_t *tcpha; 796 conn_t *connp; 797 ip_stack_t *ipst = ill->ill_ipst; 798 799 first_mp = mp; 800 if (mctl_present) { 801 mp = first_mp->b_cont; 802 ASSERT(mp != NULL); 803 804 ii = (ipsec_in_t *)first_mp->b_rptr; 805 ASSERT(ii->ipsec_in_type == IPSEC_IN); 806 } else { 807 ii = NULL; 808 } 809 810 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 811 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 812 813 /* 814 * Need to pullup everything in order to use 815 * ip_hdr_length_nexthdr_v6() 816 */ 817 if (mp->b_cont != NULL) { 818 if (!pullupmsg(mp, -1)) { 819 ip1dbg(("icmp_inbound_error_fanout_v6: " 820 "pullupmsg failed\n")); 821 goto drop_pkt; 822 } 823 ip6h = (ip6_t *)mp->b_rptr; 824 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 825 } 826 827 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 828 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 829 goto drop_pkt; 830 831 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 832 goto drop_pkt; 833 nexthdr = *nexthdrp; 834 835 /* Set message type, must be done after pullups */ 836 mp->b_datap->db_type = M_CTL; 837 838 /* Try to pass the ICMP message to clients who need it */ 839 switch (nexthdr) { 840 case IPPROTO_UDP: { 841 /* 842 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 843 * UDP header to get the port information. 844 */ 845 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 846 mp->b_wptr) { 847 break; 848 } 849 /* 850 * Attempt to find a client stream based on port. 851 * Note that we do a reverse lookup since the header is 852 * in the form we sent it out. 853 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 854 * and we only set the src and dst addresses and nexthdr. 855 */ 856 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 857 rip6h.ip6_src = ip6h->ip6_dst; 858 rip6h.ip6_dst = ip6h->ip6_src; 859 rip6h.ip6_nxt = nexthdr; 860 ((uint16_t *)&ports)[0] = up[1]; 861 ((uint16_t *)&ports)[1] = up[0]; 862 863 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 864 IP6_NO_IPPOLICY, mctl_present, zoneid); 865 return; 866 } 867 case IPPROTO_TCP: { 868 /* 869 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 870 * the TCP header to get the port information. 871 */ 872 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 873 mp->b_wptr) { 874 break; 875 } 876 877 /* 878 * Attempt to find a client stream based on port. 879 * Note that we do a reverse lookup since the header is 880 * in the form we sent it out. 881 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 882 * we only set the src and dst addresses and nexthdr. 883 */ 884 885 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 886 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 887 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 888 if (connp == NULL) { 889 goto drop_pkt; 890 } 891 892 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 893 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 894 return; 895 896 } 897 case IPPROTO_SCTP: 898 /* 899 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 900 * the SCTP header to get the port information. 901 */ 902 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 903 mp->b_wptr) { 904 break; 905 } 906 907 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 908 ((uint16_t *)&ports)[0] = up[1]; 909 ((uint16_t *)&ports)[1] = up[0]; 910 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 0, 911 mctl_present, IP6_NO_IPPOLICY, zoneid); 912 return; 913 case IPPROTO_ESP: 914 case IPPROTO_AH: { 915 int ipsec_rc; 916 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 917 918 /* 919 * We need a IPSEC_IN in the front to fanout to AH/ESP. 920 * We will re-use the IPSEC_IN if it is already present as 921 * AH/ESP will not affect any fields in the IPSEC_IN for 922 * ICMP errors. If there is no IPSEC_IN, allocate a new 923 * one and attach it in the front. 924 */ 925 if (ii != NULL) { 926 /* 927 * ip_fanout_proto_again converts the ICMP errors 928 * that come back from AH/ESP to M_DATA so that 929 * if it is non-AH/ESP and we do a pullupmsg in 930 * this function, it would work. Convert it back 931 * to M_CTL before we send up as this is a ICMP 932 * error. This could have been generated locally or 933 * by some router. Validate the inner IPSEC 934 * headers. 935 * 936 * NOTE : ill_index is used by ip_fanout_proto_again 937 * to locate the ill. 938 */ 939 ASSERT(ill != NULL); 940 ii->ipsec_in_ill_index = 941 ill->ill_phyint->phyint_ifindex; 942 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 943 first_mp->b_cont->b_datap->db_type = M_CTL; 944 } else { 945 /* 946 * IPSEC_IN is not present. We attach a ipsec_in 947 * message and send up to IPSEC for validating 948 * and removing the IPSEC headers. Clear 949 * ipsec_in_secure so that when we return 950 * from IPSEC, we don't mistakenly think that this 951 * is a secure packet came from the network. 952 * 953 * NOTE : ill_index is used by ip_fanout_proto_again 954 * to locate the ill. 955 */ 956 ASSERT(first_mp == mp); 957 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 958 ASSERT(ill != NULL); 959 if (first_mp == NULL) { 960 freemsg(mp); 961 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 962 return; 963 } 964 ii = (ipsec_in_t *)first_mp->b_rptr; 965 966 /* This is not a secure packet */ 967 ii->ipsec_in_secure = B_FALSE; 968 first_mp->b_cont = mp; 969 mp->b_datap->db_type = M_CTL; 970 ii->ipsec_in_ill_index = 971 ill->ill_phyint->phyint_ifindex; 972 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 973 } 974 975 if (!ipsec_loaded(ipss)) { 976 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 977 return; 978 } 979 980 if (nexthdr == IPPROTO_ESP) 981 ipsec_rc = ipsecesp_icmp_error(first_mp); 982 else 983 ipsec_rc = ipsecah_icmp_error(first_mp); 984 if (ipsec_rc == IPSEC_STATUS_FAILED) 985 return; 986 987 ip_fanout_proto_again(first_mp, ill, ill, NULL); 988 return; 989 } 990 case IPPROTO_ENCAP: 991 case IPPROTO_IPV6: 992 if ((uint8_t *)ip6h + hdr_length + 993 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 994 sizeof (ip6_t)) > mp->b_wptr) { 995 goto drop_pkt; 996 } 997 998 if (nexthdr == IPPROTO_ENCAP || 999 !IN6_ARE_ADDR_EQUAL( 1000 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1001 &ip6h->ip6_src) || 1002 !IN6_ARE_ADDR_EQUAL( 1003 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1004 &ip6h->ip6_dst)) { 1005 /* 1006 * For tunnels that have used IPsec protection, 1007 * we need to adjust the MTU to take into account 1008 * the IPsec overhead. 1009 */ 1010 if (ii != NULL) 1011 icmp6->icmp6_mtu = htonl( 1012 ntohl(icmp6->icmp6_mtu) - 1013 ipsec_in_extra_length(first_mp)); 1014 } else { 1015 /* 1016 * Self-encapsulated case. As in the ipv4 case, 1017 * we need to strip the 2nd IP header. Since mp 1018 * is already pulled-up, we can simply bcopy 1019 * the 3rd header + data over the 2nd header. 1020 */ 1021 uint16_t unused_len; 1022 ip6_t *inner_ip6h = (ip6_t *) 1023 ((uchar_t *)ip6h + hdr_length); 1024 1025 /* 1026 * Make sure we don't do recursion more than once. 1027 */ 1028 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1029 &unused_len, &nexthdrp) || 1030 *nexthdrp == IPPROTO_IPV6) { 1031 goto drop_pkt; 1032 } 1033 1034 /* 1035 * We are about to modify the packet. Make a copy if 1036 * someone else has a reference to it. 1037 */ 1038 if (DB_REF(mp) > 1) { 1039 mblk_t *mp1; 1040 uint16_t icmp6_offset; 1041 1042 mp1 = copymsg(mp); 1043 if (mp1 == NULL) { 1044 goto drop_pkt; 1045 } 1046 icmp6_offset = (uint16_t) 1047 ((uchar_t *)icmp6 - mp->b_rptr); 1048 freemsg(mp); 1049 mp = mp1; 1050 1051 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1052 ip6h = (ip6_t *)&icmp6[1]; 1053 inner_ip6h = (ip6_t *) 1054 ((uchar_t *)ip6h + hdr_length); 1055 1056 if (mctl_present) 1057 first_mp->b_cont = mp; 1058 else 1059 first_mp = mp; 1060 } 1061 1062 /* 1063 * Need to set db_type back to M_DATA before 1064 * refeeding mp into this function. 1065 */ 1066 DB_TYPE(mp) = M_DATA; 1067 1068 /* 1069 * Copy the 3rd header + remaining data on top 1070 * of the 2nd header. 1071 */ 1072 bcopy(inner_ip6h, ip6h, 1073 mp->b_wptr - (uchar_t *)inner_ip6h); 1074 1075 /* 1076 * Subtract length of the 2nd header. 1077 */ 1078 mp->b_wptr -= hdr_length; 1079 1080 /* 1081 * Now recurse, and see what I _really_ should be 1082 * doing here. 1083 */ 1084 icmp_inbound_error_fanout_v6(q, first_mp, 1085 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1086 zoneid); 1087 return; 1088 } 1089 /* FALLTHRU */ 1090 default: 1091 /* 1092 * The rip6h header is only used for the lookup and we 1093 * only set the src and dst addresses and nexthdr. 1094 */ 1095 rip6h.ip6_src = ip6h->ip6_dst; 1096 rip6h.ip6_dst = ip6h->ip6_src; 1097 rip6h.ip6_nxt = nexthdr; 1098 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1099 IP6_NO_IPPOLICY, mctl_present, zoneid); 1100 return; 1101 } 1102 /* NOTREACHED */ 1103 drop_pkt: 1104 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1105 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1106 freemsg(first_mp); 1107 } 1108 1109 /* 1110 * Process received IPv6 ICMP Redirect messages. 1111 */ 1112 /* ARGSUSED */ 1113 static void 1114 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1115 { 1116 ip6_t *ip6h; 1117 uint16_t hdr_length; 1118 nd_redirect_t *rd; 1119 ire_t *ire; 1120 ire_t *prev_ire; 1121 ire_t *redir_ire; 1122 in6_addr_t *src, *dst, *gateway; 1123 nd_opt_hdr_t *opt; 1124 nce_t *nce; 1125 int nce_flags = 0; 1126 int err = 0; 1127 boolean_t redirect_to_router = B_FALSE; 1128 int len; 1129 int optlen; 1130 iulp_t ulp_info = { 0 }; 1131 ill_t *prev_ire_ill; 1132 ipif_t *ipif; 1133 ip_stack_t *ipst = ill->ill_ipst; 1134 1135 ip6h = (ip6_t *)mp->b_rptr; 1136 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1137 hdr_length = ip_hdr_length_v6(mp, ip6h); 1138 else 1139 hdr_length = IPV6_HDR_LEN; 1140 1141 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1142 len = mp->b_wptr - mp->b_rptr - hdr_length; 1143 src = &ip6h->ip6_src; 1144 dst = &rd->nd_rd_dst; 1145 gateway = &rd->nd_rd_target; 1146 1147 /* Verify if it is a valid redirect */ 1148 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1149 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1150 (rd->nd_rd_code != 0) || 1151 (len < sizeof (nd_redirect_t)) || 1152 (IN6_IS_ADDR_V4MAPPED(dst)) || 1153 (IN6_IS_ADDR_MULTICAST(dst))) { 1154 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1155 freemsg(mp); 1156 return; 1157 } 1158 1159 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1160 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1161 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1162 freemsg(mp); 1163 return; 1164 } 1165 1166 if (len > sizeof (nd_redirect_t)) { 1167 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1168 len - sizeof (nd_redirect_t))) { 1169 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1170 freemsg(mp); 1171 return; 1172 } 1173 } 1174 1175 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1176 redirect_to_router = B_TRUE; 1177 nce_flags |= NCE_F_ISROUTER; 1178 } 1179 1180 /* ipif will be refreleased afterwards */ 1181 ipif = ipif_get_next_ipif(NULL, ill); 1182 if (ipif == NULL) { 1183 freemsg(mp); 1184 return; 1185 } 1186 1187 /* 1188 * Verify that the IP source address of the redirect is 1189 * the same as the current first-hop router for the specified 1190 * ICMP destination address. 1191 * Also, Make sure we had a route for the dest in question and 1192 * that route was pointing to the old gateway (the source of the 1193 * redirect packet.) 1194 */ 1195 1196 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1197 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1198 MATCH_IRE_DEFAULT, ipst); 1199 1200 /* 1201 * Check that 1202 * the redirect was not from ourselves 1203 * old gateway is still directly reachable 1204 */ 1205 if (prev_ire == NULL || 1206 prev_ire->ire_type == IRE_LOCAL) { 1207 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1208 ipif_refrele(ipif); 1209 goto fail_redirect; 1210 } 1211 prev_ire_ill = ire_to_ill(prev_ire); 1212 ASSERT(prev_ire_ill != NULL); 1213 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1214 nce_flags |= NCE_F_NONUD; 1215 1216 /* 1217 * Should we use the old ULP info to create the new gateway? From 1218 * a user's perspective, we should inherit the info so that it 1219 * is a "smooth" transition. If we do not do that, then new 1220 * connections going thru the new gateway will have no route metrics, 1221 * which is counter-intuitive to user. From a network point of 1222 * view, this may or may not make sense even though the new gateway 1223 * is still directly connected to us so the route metrics should not 1224 * change much. 1225 * 1226 * But if the old ire_uinfo is not initialized, we do another 1227 * recursive lookup on the dest using the new gateway. There may 1228 * be a route to that. If so, use it to initialize the redirect 1229 * route. 1230 */ 1231 if (prev_ire->ire_uinfo.iulp_set) { 1232 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1233 } else if (redirect_to_router) { 1234 /* 1235 * Only do the following if the redirection is really to 1236 * a router. 1237 */ 1238 ire_t *tmp_ire; 1239 ire_t *sire; 1240 1241 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1242 ALL_ZONES, 0, NULL, 1243 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1244 ipst); 1245 if (sire != NULL) { 1246 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1247 ASSERT(tmp_ire != NULL); 1248 ire_refrele(tmp_ire); 1249 ire_refrele(sire); 1250 } else if (tmp_ire != NULL) { 1251 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1252 sizeof (iulp_t)); 1253 ire_refrele(tmp_ire); 1254 } 1255 } 1256 1257 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1258 opt = (nd_opt_hdr_t *)&rd[1]; 1259 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1260 if (opt != NULL) { 1261 err = ndp_lookup_then_add_v6(ill, 1262 (uchar_t *)&opt[1], /* Link layer address */ 1263 gateway, 1264 &ipv6_all_ones, /* prefix mask */ 1265 &ipv6_all_zeros, /* Mapping mask */ 1266 0, 1267 nce_flags, 1268 ND_STALE, 1269 &nce); 1270 switch (err) { 1271 case 0: 1272 NCE_REFRELE(nce); 1273 break; 1274 case EEXIST: 1275 /* 1276 * Check to see if link layer address has changed and 1277 * process the nce_state accordingly. 1278 */ 1279 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1280 NCE_REFRELE(nce); 1281 break; 1282 default: 1283 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1284 err)); 1285 ipif_refrele(ipif); 1286 goto fail_redirect; 1287 } 1288 } 1289 if (redirect_to_router) { 1290 /* icmp_redirect_ok_v6() must have already verified this */ 1291 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1292 1293 /* 1294 * Create a Route Association. This will allow us to remember 1295 * a router told us to use the particular gateway. 1296 */ 1297 ire = ire_create_v6( 1298 dst, 1299 &ipv6_all_ones, /* mask */ 1300 &prev_ire->ire_src_addr_v6, /* source addr */ 1301 gateway, /* gateway addr */ 1302 &prev_ire->ire_max_frag, /* max frag */ 1303 NULL, /* no src nce */ 1304 NULL, /* no rfq */ 1305 NULL, /* no stq */ 1306 IRE_HOST, 1307 prev_ire->ire_ipif, 1308 NULL, 1309 0, 1310 0, 1311 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1312 &ulp_info, 1313 NULL, 1314 NULL, 1315 ipst); 1316 } else { 1317 queue_t *stq; 1318 1319 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1320 ? ipif->ipif_rq : ipif->ipif_wq; 1321 1322 /* 1323 * Just create an on link entry, i.e. interface route. 1324 */ 1325 ire = ire_create_v6( 1326 dst, /* gateway == dst */ 1327 &ipv6_all_ones, /* mask */ 1328 &prev_ire->ire_src_addr_v6, /* source addr */ 1329 &ipv6_all_zeros, /* gateway addr */ 1330 &prev_ire->ire_max_frag, /* max frag */ 1331 NULL, /* no src nce */ 1332 NULL, /* ire rfq */ 1333 stq, /* ire stq */ 1334 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1335 prev_ire->ire_ipif, 1336 &ipv6_all_ones, 1337 0, 1338 0, 1339 (RTF_DYNAMIC | RTF_HOST), 1340 &ulp_info, 1341 NULL, 1342 NULL, 1343 ipst); 1344 } 1345 1346 /* Release reference from earlier ipif_get_next_ipif() */ 1347 ipif_refrele(ipif); 1348 1349 if (ire == NULL) 1350 goto fail_redirect; 1351 1352 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1353 1354 /* tell routing sockets that we received a redirect */ 1355 ip_rts_change_v6(RTM_REDIRECT, 1356 &rd->nd_rd_dst, 1357 &rd->nd_rd_target, 1358 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1359 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1360 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1361 1362 /* 1363 * Delete any existing IRE_HOST type ires for this destination. 1364 * This together with the added IRE has the effect of 1365 * modifying an existing redirect. 1366 */ 1367 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1368 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1369 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), 1370 ipst); 1371 1372 ire_refrele(ire); /* Held in ire_add_v6 */ 1373 1374 if (redir_ire != NULL) { 1375 if (redir_ire->ire_flags & RTF_DYNAMIC) 1376 ire_delete(redir_ire); 1377 ire_refrele(redir_ire); 1378 } 1379 } 1380 1381 if (prev_ire->ire_type == IRE_CACHE) 1382 ire_delete(prev_ire); 1383 ire_refrele(prev_ire); 1384 prev_ire = NULL; 1385 1386 fail_redirect: 1387 if (prev_ire != NULL) 1388 ire_refrele(prev_ire); 1389 freemsg(mp); 1390 } 1391 1392 static ill_t * 1393 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1394 { 1395 ill_t *ill; 1396 1397 ASSERT(WR(q) == q); 1398 1399 if (q->q_next != NULL) { 1400 ill = (ill_t *)q->q_ptr; 1401 if (ILL_CAN_LOOKUP(ill)) 1402 ill_refhold(ill); 1403 else 1404 ill = NULL; 1405 } else { 1406 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1407 NULL, NULL, NULL, NULL, NULL, ipst); 1408 } 1409 if (ill == NULL) 1410 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1411 return (ill); 1412 } 1413 1414 /* 1415 * Assigns an appropriate source address to the packet. 1416 * If origdst is one of our IP addresses that use it as the source. 1417 * If the queue is an ill queue then select a source from that ill. 1418 * Otherwise pick a source based on a route lookup back to the origsrc. 1419 * 1420 * src is the return parameter. Returns a pointer to src or NULL if failure. 1421 */ 1422 static in6_addr_t * 1423 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1424 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1425 { 1426 ill_t *ill; 1427 ire_t *ire; 1428 ipif_t *ipif; 1429 1430 ASSERT(!(wq->q_flag & QREADR)); 1431 if (wq->q_next != NULL) { 1432 ill = (ill_t *)wq->q_ptr; 1433 } else { 1434 ill = NULL; 1435 } 1436 1437 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1438 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1439 ipst); 1440 if (ire != NULL) { 1441 /* Destined to one of our addresses */ 1442 *src = *origdst; 1443 ire_refrele(ire); 1444 return (src); 1445 } 1446 if (ire != NULL) { 1447 ire_refrele(ire); 1448 ire = NULL; 1449 } 1450 if (ill == NULL) { 1451 /* What is the route back to the original source? */ 1452 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1453 NULL, NULL, zoneid, NULL, 1454 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1455 if (ire == NULL) { 1456 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1457 return (NULL); 1458 } 1459 /* 1460 * Does not matter whether we use ire_stq or ire_ipif here. 1461 * Just pick an ill for ICMP replies. 1462 */ 1463 ASSERT(ire->ire_ipif != NULL); 1464 ill = ire->ire_ipif->ipif_ill; 1465 ire_refrele(ire); 1466 } 1467 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1468 IPV6_PREFER_SRC_DEFAULT, zoneid); 1469 if (ipif != NULL) { 1470 *src = ipif->ipif_v6src_addr; 1471 ipif_refrele(ipif); 1472 return (src); 1473 } 1474 /* 1475 * Unusual case - can't find a usable source address to reach the 1476 * original source. Use what in the route to the source. 1477 */ 1478 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1479 NULL, NULL, zoneid, NULL, 1480 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1481 if (ire == NULL) { 1482 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1483 return (NULL); 1484 } 1485 ASSERT(ire != NULL); 1486 *src = ire->ire_src_addr_v6; 1487 ire_refrele(ire); 1488 return (src); 1489 } 1490 1491 /* 1492 * Build and ship an IPv6 ICMP message using the packet data in mp, 1493 * and the ICMP header pointed to by "stuff". (May be called as 1494 * writer.) 1495 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1496 * verify that an icmp error packet can be sent. 1497 * 1498 * If q is an ill write side queue (which is the case when packets 1499 * arrive from ip_rput) then ip_wput code will ensure that packets to 1500 * link-local destinations are sent out that ill. 1501 * 1502 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1503 * source address (see above function). 1504 */ 1505 static void 1506 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1507 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1508 ip_stack_t *ipst) 1509 { 1510 ip6_t *ip6h; 1511 in6_addr_t v6dst; 1512 size_t len_needed; 1513 size_t msg_len; 1514 mblk_t *mp1; 1515 icmp6_t *icmp6; 1516 ill_t *ill; 1517 in6_addr_t v6src; 1518 mblk_t *ipsec_mp; 1519 ipsec_out_t *io; 1520 1521 ill = ip_queue_to_ill_v6(q, ipst); 1522 if (ill == NULL) { 1523 freemsg(mp); 1524 return; 1525 } 1526 1527 if (mctl_present) { 1528 /* 1529 * If it is : 1530 * 1531 * 1) a IPSEC_OUT, then this is caused by outbound 1532 * datagram originating on this host. IPSEC processing 1533 * may or may not have been done. Refer to comments above 1534 * icmp_inbound_error_fanout for details. 1535 * 1536 * 2) a IPSEC_IN if we are generating a icmp_message 1537 * for an incoming datagram destined for us i.e called 1538 * from ip_fanout_send_icmp. 1539 */ 1540 ipsec_info_t *in; 1541 1542 ipsec_mp = mp; 1543 mp = ipsec_mp->b_cont; 1544 1545 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1546 ip6h = (ip6_t *)mp->b_rptr; 1547 1548 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1549 in->ipsec_info_type == IPSEC_IN); 1550 1551 if (in->ipsec_info_type == IPSEC_IN) { 1552 /* 1553 * Convert the IPSEC_IN to IPSEC_OUT. 1554 */ 1555 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1556 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1557 ill_refrele(ill); 1558 return; 1559 } 1560 } else { 1561 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1562 io = (ipsec_out_t *)in; 1563 /* 1564 * Clear out ipsec_out_proc_begin, so we do a fresh 1565 * ire lookup. 1566 */ 1567 io->ipsec_out_proc_begin = B_FALSE; 1568 } 1569 } else { 1570 /* 1571 * This is in clear. The icmp message we are building 1572 * here should go out in clear. 1573 */ 1574 ipsec_in_t *ii; 1575 ASSERT(mp->b_datap->db_type == M_DATA); 1576 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1577 if (ipsec_mp == NULL) { 1578 freemsg(mp); 1579 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1580 ill_refrele(ill); 1581 return; 1582 } 1583 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1584 1585 /* This is not a secure packet */ 1586 ii->ipsec_in_secure = B_FALSE; 1587 /* 1588 * For trusted extensions using a shared IP address we can 1589 * send using any zoneid. 1590 */ 1591 if (zoneid == ALL_ZONES) 1592 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1593 else 1594 ii->ipsec_in_zoneid = zoneid; 1595 ipsec_mp->b_cont = mp; 1596 ip6h = (ip6_t *)mp->b_rptr; 1597 /* 1598 * Convert the IPSEC_IN to IPSEC_OUT. 1599 */ 1600 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1601 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1602 ill_refrele(ill); 1603 return; 1604 } 1605 } 1606 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1607 1608 if (v6src_ptr != NULL) { 1609 v6src = *v6src_ptr; 1610 } else { 1611 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1612 &v6src, zoneid, ipst) == NULL) { 1613 freemsg(ipsec_mp); 1614 ill_refrele(ill); 1615 return; 1616 } 1617 } 1618 v6dst = ip6h->ip6_src; 1619 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1620 msg_len = msgdsize(mp); 1621 if (msg_len > len_needed) { 1622 if (!adjmsg(mp, len_needed - msg_len)) { 1623 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1624 freemsg(ipsec_mp); 1625 ill_refrele(ill); 1626 return; 1627 } 1628 msg_len = len_needed; 1629 } 1630 mp1 = allocb_cred(IPV6_HDR_LEN + len, DB_CRED(mp)); 1631 if (mp1 == NULL) { 1632 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1633 freemsg(ipsec_mp); 1634 ill_refrele(ill); 1635 return; 1636 } 1637 ill_refrele(ill); 1638 mp1->b_cont = mp; 1639 mp = mp1; 1640 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1641 io->ipsec_out_type == IPSEC_OUT); 1642 ipsec_mp->b_cont = mp; 1643 1644 /* 1645 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1646 * node generates be accepted in peace by all on-host destinations. 1647 * If we do NOT assume that all on-host destinations trust 1648 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1649 * (Look for ipsec_out_icmp_loopback). 1650 */ 1651 io->ipsec_out_icmp_loopback = B_TRUE; 1652 1653 ip6h = (ip6_t *)mp->b_rptr; 1654 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1655 1656 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1657 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1658 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1659 ip6h->ip6_dst = v6dst; 1660 ip6h->ip6_src = v6src; 1661 msg_len += IPV6_HDR_LEN + len; 1662 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1663 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1664 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1665 } 1666 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1667 icmp6 = (icmp6_t *)&ip6h[1]; 1668 bcopy(stuff, (char *)icmp6, len); 1669 /* 1670 * Prepare for checksum by putting icmp length in the icmp 1671 * checksum field. The checksum is calculated in ip_wput_v6. 1672 */ 1673 icmp6->icmp6_cksum = ip6h->ip6_plen; 1674 if (icmp6->icmp6_type == ND_REDIRECT) { 1675 ip6h->ip6_hops = IPV6_MAX_HOPS; 1676 } 1677 /* Send to V6 writeside put routine */ 1678 put(q, ipsec_mp); 1679 } 1680 1681 /* 1682 * Update the output mib when ICMPv6 packets are sent. 1683 */ 1684 static void 1685 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1686 { 1687 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1688 1689 switch (icmp6->icmp6_type) { 1690 case ICMP6_DST_UNREACH: 1691 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1692 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1693 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1694 break; 1695 1696 case ICMP6_TIME_EXCEEDED: 1697 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1698 break; 1699 1700 case ICMP6_PARAM_PROB: 1701 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1702 break; 1703 1704 case ICMP6_PACKET_TOO_BIG: 1705 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1706 break; 1707 1708 case ICMP6_ECHO_REQUEST: 1709 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1710 break; 1711 1712 case ICMP6_ECHO_REPLY: 1713 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1714 break; 1715 1716 case ND_ROUTER_SOLICIT: 1717 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1718 break; 1719 1720 case ND_ROUTER_ADVERT: 1721 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1722 break; 1723 1724 case ND_NEIGHBOR_SOLICIT: 1725 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1726 break; 1727 1728 case ND_NEIGHBOR_ADVERT: 1729 BUMP_MIB(ill->ill_icmp6_mib, 1730 ipv6IfIcmpOutNeighborAdvertisements); 1731 break; 1732 1733 case ND_REDIRECT: 1734 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1735 break; 1736 1737 case MLD_LISTENER_QUERY: 1738 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1739 break; 1740 1741 case MLD_LISTENER_REPORT: 1742 case MLD_V2_LISTENER_REPORT: 1743 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1744 break; 1745 1746 case MLD_LISTENER_REDUCTION: 1747 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1748 break; 1749 } 1750 } 1751 1752 /* 1753 * Check if it is ok to send an ICMPv6 error packet in 1754 * response to the IP packet in mp. 1755 * Free the message and return null if no 1756 * ICMP error packet should be sent. 1757 */ 1758 static mblk_t * 1759 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1760 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1761 { 1762 ip6_t *ip6h; 1763 1764 if (!mp) 1765 return (NULL); 1766 1767 ip6h = (ip6_t *)mp->b_rptr; 1768 1769 /* Check if source address uniquely identifies the host */ 1770 1771 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1772 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1773 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1774 freemsg(mp); 1775 return (NULL); 1776 } 1777 1778 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1779 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1780 icmp6_t *icmp6; 1781 1782 if (mp->b_wptr - mp->b_rptr < len_needed) { 1783 if (!pullupmsg(mp, len_needed)) { 1784 ill_t *ill; 1785 1786 ill = ip_queue_to_ill_v6(q, ipst); 1787 if (ill == NULL) { 1788 BUMP_MIB(&ipst->ips_icmp6_mib, 1789 ipv6IfIcmpInErrors); 1790 } else { 1791 BUMP_MIB(ill->ill_icmp6_mib, 1792 ipv6IfIcmpInErrors); 1793 ill_refrele(ill); 1794 } 1795 freemsg(mp); 1796 return (NULL); 1797 } 1798 ip6h = (ip6_t *)mp->b_rptr; 1799 } 1800 icmp6 = (icmp6_t *)&ip6h[1]; 1801 /* Explicitly do not generate errors in response to redirects */ 1802 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1803 icmp6->icmp6_type == ND_REDIRECT) { 1804 freemsg(mp); 1805 return (NULL); 1806 } 1807 } 1808 /* 1809 * Check that the destination is not multicast and that the packet 1810 * was not sent on link layer broadcast or multicast. (Exception 1811 * is Packet too big message as per the draft - when mcast_ok is set.) 1812 */ 1813 if (!mcast_ok && 1814 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1815 freemsg(mp); 1816 return (NULL); 1817 } 1818 if (icmp_err_rate_limit(ipst)) { 1819 /* 1820 * Only send ICMP error packets every so often. 1821 * This should be done on a per port/source basis, 1822 * but for now this will suffice. 1823 */ 1824 freemsg(mp); 1825 return (NULL); 1826 } 1827 return (mp); 1828 } 1829 1830 /* 1831 * Generate an ICMPv6 redirect message. 1832 * Include target link layer address option if it exits. 1833 * Always include redirect header. 1834 */ 1835 static void 1836 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1837 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1838 { 1839 nd_redirect_t *rd; 1840 nd_opt_rd_hdr_t *rdh; 1841 uchar_t *buf; 1842 nce_t *nce = NULL; 1843 nd_opt_hdr_t *opt; 1844 int len; 1845 int ll_opt_len = 0; 1846 int max_redir_hdr_data_len; 1847 int pkt_len; 1848 in6_addr_t *srcp; 1849 ip_stack_t *ipst = ill->ill_ipst; 1850 1851 /* 1852 * We are called from ip_rput where we could 1853 * not have attached an IPSEC_IN. 1854 */ 1855 ASSERT(mp->b_datap->db_type == M_DATA); 1856 1857 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1858 if (mp == NULL) 1859 return; 1860 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1861 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1862 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1863 ill->ill_phys_addr_length + 7)/8 * 8; 1864 } 1865 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1866 ASSERT(len % 4 == 0); 1867 buf = kmem_alloc(len, KM_NOSLEEP); 1868 if (buf == NULL) { 1869 if (nce != NULL) 1870 NCE_REFRELE(nce); 1871 freemsg(mp); 1872 return; 1873 } 1874 1875 rd = (nd_redirect_t *)buf; 1876 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1877 rd->nd_rd_code = 0; 1878 rd->nd_rd_reserved = 0; 1879 rd->nd_rd_target = *targetp; 1880 rd->nd_rd_dst = *dest; 1881 1882 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1883 if (nce != NULL && ll_opt_len != 0) { 1884 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1885 opt->nd_opt_len = ll_opt_len/8; 1886 bcopy((char *)nce->nce_res_mp->b_rptr + 1887 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1888 ill->ill_phys_addr_length); 1889 } 1890 if (nce != NULL) 1891 NCE_REFRELE(nce); 1892 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1893 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1894 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1895 max_redir_hdr_data_len = 1896 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1897 pkt_len = msgdsize(mp); 1898 /* Make sure mp is 8 byte aligned */ 1899 if (pkt_len > max_redir_hdr_data_len) { 1900 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1901 sizeof (nd_opt_rd_hdr_t))/8; 1902 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1903 } else { 1904 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1905 (void) adjmsg(mp, -(pkt_len % 8)); 1906 } 1907 rdh->nd_opt_rh_reserved1 = 0; 1908 rdh->nd_opt_rh_reserved2 = 0; 1909 /* ipif_v6src_addr contains the link-local source address */ 1910 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1911 if (ill->ill_group != NULL) { 1912 /* 1913 * The receiver of the redirect will verify whether it 1914 * had a route through us (srcp that we will use in 1915 * the redirect) or not. As we load spread even link-locals, 1916 * we don't know which source address the receiver of 1917 * redirect has in its route for communicating with us. 1918 * Thus we randomly choose a source here and finally we 1919 * should get to the right one and it will eventually 1920 * accept the redirect from us. We can't call 1921 * ip_lookup_scope_v6 because we don't have the right 1922 * link-local address here. Thus we randomly choose one. 1923 */ 1924 int cnt = ill->ill_group->illgrp_ill_count; 1925 1926 ill = ill->ill_group->illgrp_ill; 1927 cnt = ++ipst->ips_icmp_redirect_v6_src_index % cnt; 1928 while (cnt--) 1929 ill = ill->ill_group_next; 1930 srcp = &ill->ill_ipif->ipif_v6src_addr; 1931 } else { 1932 srcp = &ill->ill_ipif->ipif_v6src_addr; 1933 } 1934 rw_exit(&ipst->ips_ill_g_lock); 1935 /* Redirects sent by router, and router is global zone */ 1936 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1937 kmem_free(buf, len); 1938 } 1939 1940 1941 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1942 void 1943 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1944 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1945 ip_stack_t *ipst) 1946 { 1947 icmp6_t icmp6; 1948 boolean_t mctl_present; 1949 mblk_t *first_mp; 1950 1951 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1952 1953 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1954 if (mp == NULL) { 1955 if (mctl_present) 1956 freeb(first_mp); 1957 return; 1958 } 1959 bzero(&icmp6, sizeof (icmp6_t)); 1960 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1961 icmp6.icmp6_code = code; 1962 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1963 zoneid, ipst); 1964 } 1965 1966 /* 1967 * Generate an ICMP unreachable message. 1968 */ 1969 void 1970 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1971 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1972 ip_stack_t *ipst) 1973 { 1974 icmp6_t icmp6; 1975 boolean_t mctl_present; 1976 mblk_t *first_mp; 1977 1978 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1979 1980 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1981 if (mp == NULL) { 1982 if (mctl_present) 1983 freeb(first_mp); 1984 return; 1985 } 1986 bzero(&icmp6, sizeof (icmp6_t)); 1987 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1988 icmp6.icmp6_code = code; 1989 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1990 zoneid, ipst); 1991 } 1992 1993 /* 1994 * Generate an ICMP pkt too big message. 1995 */ 1996 static void 1997 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1998 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1999 { 2000 icmp6_t icmp6; 2001 mblk_t *first_mp; 2002 boolean_t mctl_present; 2003 2004 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2005 2006 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2007 if (mp == NULL) { 2008 if (mctl_present) 2009 freeb(first_mp); 2010 return; 2011 } 2012 bzero(&icmp6, sizeof (icmp6_t)); 2013 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2014 icmp6.icmp6_code = 0; 2015 icmp6.icmp6_mtu = htonl(mtu); 2016 2017 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2018 zoneid, ipst); 2019 } 2020 2021 /* 2022 * Generate an ICMP parameter problem message. (May be called as writer.) 2023 * 'offset' is the offset from the beginning of the packet in error. 2024 */ 2025 static void 2026 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2027 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2028 ip_stack_t *ipst) 2029 { 2030 icmp6_t icmp6; 2031 boolean_t mctl_present; 2032 mblk_t *first_mp; 2033 2034 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2035 2036 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2037 if (mp == NULL) { 2038 if (mctl_present) 2039 freeb(first_mp); 2040 return; 2041 } 2042 bzero((char *)&icmp6, sizeof (icmp6_t)); 2043 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2044 icmp6.icmp6_code = code; 2045 icmp6.icmp6_pptr = htonl(offset); 2046 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2047 zoneid, ipst); 2048 } 2049 2050 /* 2051 * This code will need to take into account the possibility of binding 2052 * to a link local address on a multi-homed host, in which case the 2053 * outgoing interface (from the conn) will need to be used when getting 2054 * an ire for the dst. Going through proper outgoing interface and 2055 * choosing the source address corresponding to the outgoing interface 2056 * is necessary when the destination address is a link-local address and 2057 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2058 * This can happen when active connection is setup; thus ipp pointer 2059 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2060 * pointer is passed as ipp pointer. 2061 */ 2062 mblk_t * 2063 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2064 { 2065 ssize_t len; 2066 int protocol; 2067 struct T_bind_req *tbr; 2068 sin6_t *sin6; 2069 ipa6_conn_t *ac6; 2070 in6_addr_t *v6srcp; 2071 in6_addr_t *v6dstp; 2072 uint16_t lport; 2073 uint16_t fport; 2074 uchar_t *ucp; 2075 mblk_t *mp1; 2076 boolean_t ire_requested; 2077 boolean_t ipsec_policy_set; 2078 int error = 0; 2079 boolean_t local_bind; 2080 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2081 ipa6_conn_x_t *acx6; 2082 boolean_t verify_dst; 2083 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2084 2085 ASSERT(connp->conn_af_isv6); 2086 len = mp->b_wptr - mp->b_rptr; 2087 if (len < (sizeof (*tbr) + 1)) { 2088 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2089 "ip_bind_v6: bogus msg, len %ld", len); 2090 goto bad_addr; 2091 } 2092 /* Back up and extract the protocol identifier. */ 2093 mp->b_wptr--; 2094 tbr = (struct T_bind_req *)mp->b_rptr; 2095 /* Reset the message type in preparation for shipping it back. */ 2096 mp->b_datap->db_type = M_PCPROTO; 2097 2098 protocol = *mp->b_wptr & 0xFF; 2099 connp->conn_ulp = (uint8_t)protocol; 2100 2101 /* 2102 * Check for a zero length address. This is from a protocol that 2103 * wants to register to receive all packets of its type. 2104 */ 2105 if (tbr->ADDR_length == 0) { 2106 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2107 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2108 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2109 NULL) { 2110 /* 2111 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2112 * Do not allow others to bind to these. 2113 */ 2114 goto bad_addr; 2115 } 2116 2117 /* 2118 * 2119 * The udp module never sends down a zero-length address, 2120 * and allowing this on a labeled system will break MLP 2121 * functionality. 2122 */ 2123 if (is_system_labeled() && protocol == IPPROTO_UDP) 2124 goto bad_addr; 2125 2126 /* Allow ipsec plumbing */ 2127 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2128 protocol != IPPROTO_ESP) 2129 goto bad_addr; 2130 2131 connp->conn_srcv6 = ipv6_all_zeros; 2132 ipcl_proto_insert_v6(connp, protocol); 2133 2134 tbr->PRIM_type = T_BIND_ACK; 2135 return (mp); 2136 } 2137 2138 /* Extract the address pointer from the message. */ 2139 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2140 tbr->ADDR_length); 2141 if (ucp == NULL) { 2142 ip1dbg(("ip_bind_v6: no address\n")); 2143 goto bad_addr; 2144 } 2145 if (!OK_32PTR(ucp)) { 2146 ip1dbg(("ip_bind_v6: unaligned address\n")); 2147 goto bad_addr; 2148 } 2149 mp1 = mp->b_cont; /* trailing mp if any */ 2150 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2151 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2152 2153 switch (tbr->ADDR_length) { 2154 default: 2155 ip1dbg(("ip_bind_v6: bad address length %d\n", 2156 (int)tbr->ADDR_length)); 2157 goto bad_addr; 2158 2159 case IPV6_ADDR_LEN: 2160 /* Verification of local address only */ 2161 v6srcp = (in6_addr_t *)ucp; 2162 lport = 0; 2163 local_bind = B_TRUE; 2164 break; 2165 2166 case sizeof (sin6_t): 2167 sin6 = (sin6_t *)ucp; 2168 v6srcp = &sin6->sin6_addr; 2169 lport = sin6->sin6_port; 2170 local_bind = B_TRUE; 2171 break; 2172 2173 case sizeof (ipa6_conn_t): 2174 /* 2175 * Verify that both the source and destination addresses 2176 * are valid. 2177 * Note that we allow connect to broadcast and multicast 2178 * addresses when ire_requested is set. Thus the ULP 2179 * has to check for IRE_BROADCAST and multicast. 2180 */ 2181 ac6 = (ipa6_conn_t *)ucp; 2182 v6srcp = &ac6->ac6_laddr; 2183 v6dstp = &ac6->ac6_faddr; 2184 fport = ac6->ac6_fport; 2185 /* For raw socket, the local port is not set. */ 2186 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2187 connp->conn_lport; 2188 local_bind = B_FALSE; 2189 /* Always verify destination reachability. */ 2190 verify_dst = B_TRUE; 2191 break; 2192 2193 case sizeof (ipa6_conn_x_t): 2194 /* 2195 * Verify that the source address is valid. 2196 * Note that we allow connect to broadcast and multicast 2197 * addresses when ire_requested is set. Thus the ULP 2198 * has to check for IRE_BROADCAST and multicast. 2199 */ 2200 acx6 = (ipa6_conn_x_t *)ucp; 2201 ac6 = &acx6->ac6x_conn; 2202 v6srcp = &ac6->ac6_laddr; 2203 v6dstp = &ac6->ac6_faddr; 2204 fport = ac6->ac6_fport; 2205 lport = ac6->ac6_lport; 2206 local_bind = B_FALSE; 2207 /* 2208 * Client that passed ipa6_conn_x_t to us specifies whether to 2209 * verify destination reachability. 2210 */ 2211 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2212 break; 2213 } 2214 if (local_bind) { 2215 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2216 /* Bind to IPv4 address */ 2217 ipaddr_t v4src; 2218 2219 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2220 2221 error = ip_bind_laddr(connp, mp, v4src, lport, 2222 ire_requested, ipsec_policy_set, 2223 tbr->ADDR_length != IPV6_ADDR_LEN); 2224 if (error != 0) 2225 goto bad_addr; 2226 connp->conn_pkt_isv6 = B_FALSE; 2227 } else { 2228 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2229 error = 0; 2230 goto bad_addr; 2231 } 2232 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2233 ire_requested, ipsec_policy_set, 2234 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2235 if (error != 0) 2236 goto bad_addr; 2237 connp->conn_pkt_isv6 = B_TRUE; 2238 } 2239 } else { 2240 /* 2241 * Bind to local and remote address. Local might be 2242 * unspecified in which case it will be extracted from 2243 * ire_src_addr_v6 2244 */ 2245 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2246 /* Connect to IPv4 address */ 2247 ipaddr_t v4src; 2248 ipaddr_t v4dst; 2249 2250 /* Is the source unspecified or mapped? */ 2251 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2252 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2253 ip1dbg(("ip_bind_v6: " 2254 "dst is mapped, but not the src\n")); 2255 goto bad_addr; 2256 } 2257 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2258 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2259 2260 /* 2261 * XXX Fix needed. Need to pass ipsec_policy_set 2262 * instead of B_FALSE. 2263 */ 2264 2265 /* Always verify destination reachability. */ 2266 error = ip_bind_connected(connp, mp, &v4src, lport, 2267 v4dst, fport, ire_requested, ipsec_policy_set, 2268 B_TRUE, B_TRUE); 2269 if (error != 0) 2270 goto bad_addr; 2271 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2272 connp->conn_pkt_isv6 = B_FALSE; 2273 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2274 ip1dbg(("ip_bind_v6: " 2275 "src is mapped, but not the dst\n")); 2276 goto bad_addr; 2277 } else { 2278 error = ip_bind_connected_v6(connp, mp, v6srcp, 2279 lport, v6dstp, ipp, fport, ire_requested, 2280 ipsec_policy_set, B_TRUE, verify_dst); 2281 if (error != 0) 2282 goto bad_addr; 2283 connp->conn_pkt_isv6 = B_TRUE; 2284 } 2285 } 2286 2287 /* Update conn_send and pktversion if v4/v6 changed */ 2288 if (orig_pkt_isv6 != connp->conn_pkt_isv6) { 2289 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2290 } 2291 /* 2292 * Pass the IPSEC headers size in ire_ipsec_overhead. 2293 * We can't do this in ip_bind_insert_ire because the policy 2294 * may not have been inherited at that point in time and hence 2295 * conn_out_enforce_policy may not be set. 2296 */ 2297 mp1 = mp->b_cont; 2298 if (ire_requested && connp->conn_out_enforce_policy && 2299 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2300 ire_t *ire = (ire_t *)mp1->b_rptr; 2301 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2302 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2303 } 2304 2305 /* Send it home. */ 2306 mp->b_datap->db_type = M_PCPROTO; 2307 tbr->PRIM_type = T_BIND_ACK; 2308 return (mp); 2309 2310 bad_addr: 2311 if (error == EINPROGRESS) 2312 return (NULL); 2313 if (error > 0) 2314 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2315 else 2316 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2317 return (mp); 2318 } 2319 2320 /* 2321 * Here address is verified to be a valid local address. 2322 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2323 * address is also considered a valid local address. 2324 * In the case of a multicast address, however, the 2325 * upper protocol is expected to reset the src address 2326 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2327 * no packets are emitted with multicast address as 2328 * source address. 2329 * The addresses valid for bind are: 2330 * (1) - in6addr_any 2331 * (2) - IP address of an UP interface 2332 * (3) - IP address of a DOWN interface 2333 * (4) - a multicast address. In this case 2334 * the conn will only receive packets destined to 2335 * the specified multicast address. Note: the 2336 * application still has to issue an 2337 * IPV6_JOIN_GROUP socket option. 2338 * 2339 * In all the above cases, the bound address must be valid in the current zone. 2340 * When the address is loopback or multicast, there might be many matching IREs 2341 * so bind has to look up based on the zone. 2342 */ 2343 static int 2344 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2345 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2346 boolean_t fanout_insert) 2347 { 2348 int error = 0; 2349 ire_t *src_ire = NULL; 2350 ipif_t *ipif = NULL; 2351 mblk_t *policy_mp; 2352 zoneid_t zoneid; 2353 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2354 2355 if (ipsec_policy_set) 2356 policy_mp = mp->b_cont; 2357 2358 /* 2359 * If it was previously connected, conn_fully_bound would have 2360 * been set. 2361 */ 2362 connp->conn_fully_bound = B_FALSE; 2363 2364 zoneid = connp->conn_zoneid; 2365 2366 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2367 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2368 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2369 /* 2370 * If an address other than in6addr_any is requested, 2371 * we verify that it is a valid address for bind 2372 * Note: Following code is in if-else-if form for 2373 * readability compared to a condition check. 2374 */ 2375 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2376 if (IRE_IS_LOCAL(src_ire)) { 2377 /* 2378 * (2) Bind to address of local UP interface 2379 */ 2380 ipif = src_ire->ire_ipif; 2381 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2382 ipif_t *multi_ipif = NULL; 2383 ire_t *save_ire; 2384 /* 2385 * (4) bind to multicast address. 2386 * Fake out the IRE returned to upper 2387 * layer to be a broadcast IRE in 2388 * ip_bind_insert_ire_v6(). 2389 * Pass other information that matches 2390 * the ipif (e.g. the source address). 2391 * conn_multicast_ill is only used for 2392 * IPv6 packets 2393 */ 2394 mutex_enter(&connp->conn_lock); 2395 if (connp->conn_multicast_ill != NULL) { 2396 (void) ipif_lookup_zoneid( 2397 connp->conn_multicast_ill, zoneid, 0, 2398 &multi_ipif); 2399 } else { 2400 /* 2401 * Look for default like 2402 * ip_wput_v6 2403 */ 2404 multi_ipif = ipif_lookup_group_v6( 2405 &ipv6_unspecified_group, zoneid, ipst); 2406 } 2407 mutex_exit(&connp->conn_lock); 2408 save_ire = src_ire; 2409 src_ire = NULL; 2410 if (multi_ipif == NULL || !ire_requested || 2411 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2412 src_ire = save_ire; 2413 error = EADDRNOTAVAIL; 2414 } else { 2415 ASSERT(src_ire != NULL); 2416 if (save_ire != NULL) 2417 ire_refrele(save_ire); 2418 } 2419 if (multi_ipif != NULL) 2420 ipif_refrele(multi_ipif); 2421 } else { 2422 *mp->b_wptr++ = (char)connp->conn_ulp; 2423 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2424 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error, 2425 ipst); 2426 if (ipif == NULL) { 2427 if (error == EINPROGRESS) { 2428 if (src_ire != NULL) 2429 ire_refrele(src_ire); 2430 return (error); 2431 } 2432 /* 2433 * Not a valid address for bind 2434 */ 2435 error = EADDRNOTAVAIL; 2436 } else { 2437 ipif_refrele(ipif); 2438 } 2439 /* 2440 * Just to keep it consistent with the processing in 2441 * ip_bind_v6(). 2442 */ 2443 mp->b_wptr--; 2444 } 2445 2446 if (error != 0) { 2447 /* Red Alert! Attempting to be a bogon! */ 2448 if (ip_debug > 2) { 2449 /* ip1dbg */ 2450 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2451 " address %s\n", AF_INET6, v6src); 2452 } 2453 goto bad_addr; 2454 } 2455 } 2456 2457 /* 2458 * Allow setting new policies. For example, disconnects come 2459 * down as ipa_t bind. As we would have set conn_policy_cached 2460 * to B_TRUE before, we should set it to B_FALSE, so that policy 2461 * can change after the disconnect. 2462 */ 2463 connp->conn_policy_cached = B_FALSE; 2464 2465 /* If not fanout_insert this was just an address verification */ 2466 if (fanout_insert) { 2467 /* 2468 * The addresses have been verified. Time to insert in 2469 * the correct fanout list. 2470 */ 2471 connp->conn_srcv6 = *v6src; 2472 connp->conn_remv6 = ipv6_all_zeros; 2473 connp->conn_lport = lport; 2474 connp->conn_fport = 0; 2475 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2476 } 2477 if (error == 0) { 2478 if (ire_requested) { 2479 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL, 2480 ipst)) { 2481 error = -1; 2482 goto bad_addr; 2483 } 2484 } else if (ipsec_policy_set) { 2485 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2486 error = -1; 2487 goto bad_addr; 2488 } 2489 } 2490 } 2491 bad_addr: 2492 if (error != 0) { 2493 if (connp->conn_anon_port) { 2494 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2495 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2496 B_FALSE); 2497 } 2498 connp->conn_mlp_type = mlptSingle; 2499 } 2500 2501 if (src_ire != NULL) 2502 ire_refrele(src_ire); 2503 2504 if (ipsec_policy_set) { 2505 ASSERT(policy_mp != NULL); 2506 freeb(policy_mp); 2507 /* 2508 * As of now assume that nothing else accompanies 2509 * IPSEC_POLICY_SET. 2510 */ 2511 mp->b_cont = NULL; 2512 } 2513 return (error); 2514 } 2515 2516 /* ARGSUSED */ 2517 static void 2518 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2519 void *dummy_arg) 2520 { 2521 conn_t *connp = NULL; 2522 t_scalar_t prim; 2523 2524 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2525 2526 if (CONN_Q(q)) 2527 connp = Q_TO_CONN(q); 2528 ASSERT(connp != NULL); 2529 2530 prim = ((union T_primitives *)mp->b_rptr)->type; 2531 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2532 2533 if (IPCL_IS_TCP(connp)) { 2534 /* Pass sticky_ipp for scope_id and pktinfo */ 2535 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2536 } else { 2537 /* For UDP and ICMP */ 2538 mp = ip_bind_v6(q, mp, connp, NULL); 2539 } 2540 if (mp != NULL) { 2541 if (IPCL_IS_TCP(connp)) { 2542 CONN_INC_REF(connp); 2543 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2544 connp, SQTAG_TCP_RPUTOTHER); 2545 } else if (IPCL_IS_UDP(connp)) { 2546 udp_resume_bind(connp, mp); 2547 } else { 2548 ASSERT(IPCL_IS_RAWIP(connp)); 2549 rawip_resume_bind(connp, mp); 2550 } 2551 } 2552 } 2553 2554 /* 2555 * Verify that both the source and destination addresses 2556 * are valid. If verify_dst, then destination address must also be reachable, 2557 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2558 * It takes ip6_pkt_t * as one of the arguments to determine correct 2559 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2560 * destination address. Note that parameter ipp is only useful for TCP connect 2561 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2562 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2563 * 2564 */ 2565 static int 2566 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2567 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2568 boolean_t ire_requested, boolean_t ipsec_policy_set, 2569 boolean_t fanout_insert, boolean_t verify_dst) 2570 { 2571 ire_t *src_ire; 2572 ire_t *dst_ire; 2573 int error = 0; 2574 int protocol; 2575 mblk_t *policy_mp; 2576 ire_t *sire = NULL; 2577 ire_t *md_dst_ire = NULL; 2578 ill_t *md_ill = NULL; 2579 ill_t *dst_ill = NULL; 2580 ipif_t *src_ipif = NULL; 2581 zoneid_t zoneid; 2582 boolean_t ill_held = B_FALSE; 2583 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2584 2585 src_ire = dst_ire = NULL; 2586 /* 2587 * NOTE: The protocol is beyond the wptr because that's how 2588 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2589 */ 2590 protocol = *mp->b_wptr & 0xFF; 2591 2592 /* 2593 * If we never got a disconnect before, clear it now. 2594 */ 2595 connp->conn_fully_bound = B_FALSE; 2596 2597 if (ipsec_policy_set) { 2598 policy_mp = mp->b_cont; 2599 } 2600 2601 zoneid = connp->conn_zoneid; 2602 2603 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2604 ipif_t *ipif; 2605 2606 /* 2607 * Use an "emulated" IRE_BROADCAST to tell the transport it 2608 * is a multicast. 2609 * Pass other information that matches 2610 * the ipif (e.g. the source address). 2611 * 2612 * conn_multicast_ill is only used for IPv6 packets 2613 */ 2614 mutex_enter(&connp->conn_lock); 2615 if (connp->conn_multicast_ill != NULL) { 2616 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2617 zoneid, 0, &ipif); 2618 } else { 2619 /* Look for default like ip_wput_v6 */ 2620 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2621 } 2622 mutex_exit(&connp->conn_lock); 2623 if (ipif == NULL || !ire_requested || 2624 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2625 if (ipif != NULL) 2626 ipif_refrele(ipif); 2627 if (ip_debug > 2) { 2628 /* ip1dbg */ 2629 pr_addr_dbg("ip_bind_connected_v6: bad " 2630 "connected multicast %s\n", AF_INET6, 2631 v6dst); 2632 } 2633 error = ENETUNREACH; 2634 goto bad_addr; 2635 } 2636 if (ipif != NULL) 2637 ipif_refrele(ipif); 2638 } else { 2639 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2640 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2641 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2642 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2643 ipst); 2644 /* 2645 * We also prevent ire's with src address INADDR_ANY to 2646 * be used, which are created temporarily for 2647 * sending out packets from endpoints that have 2648 * conn_unspec_src set. 2649 */ 2650 if (dst_ire == NULL || 2651 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2652 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2653 /* 2654 * When verifying destination reachability, we always 2655 * complain. 2656 * 2657 * When not verifying destination reachability but we 2658 * found an IRE, i.e. the destination is reachable, 2659 * then the other tests still apply and we complain. 2660 */ 2661 if (verify_dst || (dst_ire != NULL)) { 2662 if (ip_debug > 2) { 2663 /* ip1dbg */ 2664 pr_addr_dbg("ip_bind_connected_v6: bad" 2665 " connected dst %s\n", AF_INET6, 2666 v6dst); 2667 } 2668 if (dst_ire == NULL || 2669 !(dst_ire->ire_type & IRE_HOST)) { 2670 error = ENETUNREACH; 2671 } else { 2672 error = EHOSTUNREACH; 2673 } 2674 goto bad_addr; 2675 } 2676 } 2677 } 2678 2679 /* 2680 * We now know that routing will allow us to reach the destination. 2681 * Check whether Trusted Solaris policy allows communication with this 2682 * host, and pretend that the destination is unreachable if not. 2683 * 2684 * This is never a problem for TCP, since that transport is known to 2685 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2686 * handling. If the remote is unreachable, it will be detected at that 2687 * point, so there's no reason to check it here. 2688 * 2689 * Note that for sendto (and other datagram-oriented friends), this 2690 * check is done as part of the data path label computation instead. 2691 * The check here is just to make non-TCP connect() report the right 2692 * error. 2693 */ 2694 if (dst_ire != NULL && is_system_labeled() && 2695 !IPCL_IS_TCP(connp) && 2696 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2697 connp->conn_mac_exempt, ipst) != 0) { 2698 error = EHOSTUNREACH; 2699 if (ip_debug > 2) { 2700 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2701 AF_INET6, v6dst); 2702 } 2703 goto bad_addr; 2704 } 2705 2706 /* 2707 * If the app does a connect(), it means that it will most likely 2708 * send more than 1 packet to the destination. It makes sense 2709 * to clear the temporary flag. 2710 */ 2711 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2712 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2713 irb_t *irb = dst_ire->ire_bucket; 2714 2715 rw_enter(&irb->irb_lock, RW_WRITER); 2716 /* 2717 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2718 * the lock in order to guarantee irb_tmp_ire_cnt. 2719 */ 2720 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2721 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2722 irb->irb_tmp_ire_cnt--; 2723 } 2724 rw_exit(&irb->irb_lock); 2725 } 2726 2727 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2728 2729 /* 2730 * See if we should notify ULP about MDT; we do this whether or not 2731 * ire_requested is TRUE, in order to handle active connects; MDT 2732 * eligibility tests for passive connects are handled separately 2733 * through tcp_adapt_ire(). We do this before the source address 2734 * selection, because dst_ire may change after a call to 2735 * ipif_select_source_v6(). This is a best-effort check, as the 2736 * packet for this connection may not actually go through 2737 * dst_ire->ire_stq, and the exact IRE can only be known after 2738 * calling ip_newroute_v6(). This is why we further check on the 2739 * IRE during Multidata packet transmission in tcp_multisend(). 2740 */ 2741 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2742 dst_ire != NULL && 2743 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2744 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2745 ILL_MDT_CAPABLE(md_ill)) { 2746 md_dst_ire = dst_ire; 2747 IRE_REFHOLD(md_dst_ire); 2748 } 2749 2750 if (dst_ire != NULL && 2751 dst_ire->ire_type == IRE_LOCAL && 2752 dst_ire->ire_zoneid != zoneid && 2753 dst_ire->ire_zoneid != ALL_ZONES) { 2754 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2755 zoneid, 0, NULL, 2756 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2757 MATCH_IRE_RJ_BHOLE, ipst); 2758 if (src_ire == NULL) { 2759 error = EHOSTUNREACH; 2760 goto bad_addr; 2761 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2762 if (!(src_ire->ire_type & IRE_HOST)) 2763 error = ENETUNREACH; 2764 else 2765 error = EHOSTUNREACH; 2766 goto bad_addr; 2767 } 2768 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2769 src_ipif = src_ire->ire_ipif; 2770 ipif_refhold(src_ipif); 2771 *v6src = src_ipif->ipif_v6lcl_addr; 2772 } 2773 ire_refrele(src_ire); 2774 src_ire = NULL; 2775 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2776 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2777 *v6src = sire->ire_src_addr_v6; 2778 ire_refrele(dst_ire); 2779 dst_ire = sire; 2780 sire = NULL; 2781 } else if (dst_ire->ire_type == IRE_CACHE && 2782 (dst_ire->ire_flags & RTF_SETSRC)) { 2783 ASSERT(dst_ire->ire_zoneid == zoneid || 2784 dst_ire->ire_zoneid == ALL_ZONES); 2785 *v6src = dst_ire->ire_src_addr_v6; 2786 } else { 2787 /* 2788 * Pick a source address so that a proper inbound load 2789 * spreading would happen. Use dst_ill specified by the 2790 * app. when socket option or scopeid is set. 2791 */ 2792 int err; 2793 2794 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2795 uint_t if_index; 2796 2797 /* 2798 * Scope id or IPV6_PKTINFO 2799 */ 2800 2801 if_index = ipp->ipp_ifindex; 2802 dst_ill = ill_lookup_on_ifindex( 2803 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2804 ipst); 2805 if (dst_ill == NULL) { 2806 ip1dbg(("ip_bind_connected_v6:" 2807 " bad ifindex %d\n", if_index)); 2808 error = EADDRNOTAVAIL; 2809 goto bad_addr; 2810 } 2811 ill_held = B_TRUE; 2812 } else if (connp->conn_outgoing_ill != NULL) { 2813 /* 2814 * For IPV6_BOUND_IF socket option, 2815 * conn_outgoing_ill should be set 2816 * already in TCP or UDP/ICMP. 2817 */ 2818 dst_ill = conn_get_held_ill(connp, 2819 &connp->conn_outgoing_ill, &err); 2820 if (err == ILL_LOOKUP_FAILED) { 2821 ip1dbg(("ip_bind_connected_v6:" 2822 "no ill for bound_if\n")); 2823 error = EADDRNOTAVAIL; 2824 goto bad_addr; 2825 } 2826 ill_held = B_TRUE; 2827 } else if (dst_ire->ire_stq != NULL) { 2828 /* No need to hold ill here */ 2829 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2830 } else { 2831 /* No need to hold ill here */ 2832 dst_ill = dst_ire->ire_ipif->ipif_ill; 2833 } 2834 if (!ip6_asp_can_lookup(ipst)) { 2835 *mp->b_wptr++ = (char)protocol; 2836 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2837 ip_bind_connected_resume_v6); 2838 error = EINPROGRESS; 2839 goto refrele_and_quit; 2840 } 2841 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2842 RESTRICT_TO_NONE, connp->conn_src_preferences, 2843 zoneid); 2844 ip6_asp_table_refrele(ipst); 2845 if (src_ipif == NULL) { 2846 pr_addr_dbg("ip_bind_connected_v6: " 2847 "no usable source address for " 2848 "connection to %s\n", AF_INET6, v6dst); 2849 error = EADDRNOTAVAIL; 2850 goto bad_addr; 2851 } 2852 *v6src = src_ipif->ipif_v6lcl_addr; 2853 } 2854 } 2855 2856 /* 2857 * We do ire_route_lookup_v6() here (and not an interface lookup) 2858 * as we assert that v6src should only come from an 2859 * UP interface for hard binding. 2860 */ 2861 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2862 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2863 2864 /* src_ire must be a local|loopback */ 2865 if (!IRE_IS_LOCAL(src_ire)) { 2866 if (ip_debug > 2) { 2867 /* ip1dbg */ 2868 pr_addr_dbg("ip_bind_connected_v6: bad " 2869 "connected src %s\n", AF_INET6, v6src); 2870 } 2871 error = EADDRNOTAVAIL; 2872 goto bad_addr; 2873 } 2874 2875 /* 2876 * If the source address is a loopback address, the 2877 * destination had best be local or multicast. 2878 * The transports that can't handle multicast will reject 2879 * those addresses. 2880 */ 2881 if (src_ire->ire_type == IRE_LOOPBACK && 2882 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2883 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2884 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2885 error = -1; 2886 goto bad_addr; 2887 } 2888 /* 2889 * Allow setting new policies. For example, disconnects come 2890 * down as ipa_t bind. As we would have set conn_policy_cached 2891 * to B_TRUE before, we should set it to B_FALSE, so that policy 2892 * can change after the disconnect. 2893 */ 2894 connp->conn_policy_cached = B_FALSE; 2895 2896 /* 2897 * The addresses have been verified. Initialize the conn 2898 * before calling the policy as they expect the conns 2899 * initialized. 2900 */ 2901 connp->conn_srcv6 = *v6src; 2902 connp->conn_remv6 = *v6dst; 2903 connp->conn_lport = lport; 2904 connp->conn_fport = fport; 2905 2906 ASSERT(!(ipsec_policy_set && ire_requested)); 2907 if (ire_requested) { 2908 iulp_t *ulp_info = NULL; 2909 2910 /* 2911 * Note that sire will not be NULL if this is an off-link 2912 * connection and there is not cache for that dest yet. 2913 * 2914 * XXX Because of an existing bug, if there are multiple 2915 * default routes, the IRE returned now may not be the actual 2916 * default route used (default routes are chosen in a 2917 * round robin fashion). So if the metrics for different 2918 * default routes are different, we may return the wrong 2919 * metrics. This will not be a problem if the existing 2920 * bug is fixed. 2921 */ 2922 if (sire != NULL) 2923 ulp_info = &(sire->ire_uinfo); 2924 2925 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info, 2926 ipst)) { 2927 error = -1; 2928 goto bad_addr; 2929 } 2930 } else if (ipsec_policy_set) { 2931 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2932 error = -1; 2933 goto bad_addr; 2934 } 2935 } 2936 2937 /* 2938 * Cache IPsec policy in this conn. If we have per-socket policy, 2939 * we'll cache that. If we don't, we'll inherit global policy. 2940 * 2941 * We can't insert until the conn reflects the policy. Note that 2942 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2943 * connections where we don't have a policy. This is to prevent 2944 * global policy lookups in the inbound path. 2945 * 2946 * If we insert before we set conn_policy_cached, 2947 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2948 * because global policy cound be non-empty. We normally call 2949 * ipsec_check_policy() for conn_policy_cached connections only if 2950 * conn_in_enforce_policy is set. But in this case, 2951 * conn_policy_cached can get set anytime since we made the 2952 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2953 * is called, which will make the above assumption false. Thus, we 2954 * need to insert after we set conn_policy_cached. 2955 */ 2956 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2957 goto bad_addr; 2958 2959 /* If not fanout_insert this was just an address verification */ 2960 if (fanout_insert) { 2961 /* 2962 * The addresses have been verified. Time to insert in 2963 * the correct fanout list. 2964 */ 2965 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2966 connp->conn_ports, 2967 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2968 } 2969 if (error == 0) { 2970 connp->conn_fully_bound = B_TRUE; 2971 /* 2972 * Our initial checks for MDT have passed; the IRE is not 2973 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2974 * be supporting MDT. Pass the IRE, IPC and ILL into 2975 * ip_mdinfo_return(), which performs further checks 2976 * against them and upon success, returns the MDT info 2977 * mblk which we will attach to the bind acknowledgment. 2978 */ 2979 if (md_dst_ire != NULL) { 2980 mblk_t *mdinfo_mp; 2981 2982 ASSERT(md_ill != NULL); 2983 ASSERT(md_ill->ill_mdt_capab != NULL); 2984 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2985 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 2986 linkb(mp, mdinfo_mp); 2987 } 2988 } 2989 bad_addr: 2990 if (ipsec_policy_set) { 2991 ASSERT(policy_mp != NULL); 2992 freeb(policy_mp); 2993 /* 2994 * As of now assume that nothing else accompanies 2995 * IPSEC_POLICY_SET. 2996 */ 2997 mp->b_cont = NULL; 2998 } 2999 refrele_and_quit: 3000 if (src_ire != NULL) 3001 IRE_REFRELE(src_ire); 3002 if (dst_ire != NULL) 3003 IRE_REFRELE(dst_ire); 3004 if (sire != NULL) 3005 IRE_REFRELE(sire); 3006 if (src_ipif != NULL) 3007 ipif_refrele(src_ipif); 3008 if (md_dst_ire != NULL) 3009 IRE_REFRELE(md_dst_ire); 3010 if (ill_held && dst_ill != NULL) 3011 ill_refrele(dst_ill); 3012 return (error); 3013 } 3014 3015 /* 3016 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3017 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3018 */ 3019 /* ARGSUSED4 */ 3020 static boolean_t 3021 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3022 iulp_t *ulp_info, ip_stack_t *ipst) 3023 { 3024 mblk_t *mp1; 3025 ire_t *ret_ire; 3026 3027 mp1 = mp->b_cont; 3028 ASSERT(mp1 != NULL); 3029 3030 if (ire != NULL) { 3031 /* 3032 * mp1 initialized above to IRE_DB_REQ_TYPE 3033 * appended mblk. Its <upper protocol>'s 3034 * job to make sure there is room. 3035 */ 3036 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3037 return (B_FALSE); 3038 3039 mp1->b_datap->db_type = IRE_DB_TYPE; 3040 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3041 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3042 ret_ire = (ire_t *)mp1->b_rptr; 3043 if (IN6_IS_ADDR_MULTICAST(dst) || 3044 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3045 ret_ire->ire_type = IRE_BROADCAST; 3046 ret_ire->ire_addr_v6 = *dst; 3047 } 3048 if (ulp_info != NULL) { 3049 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3050 sizeof (iulp_t)); 3051 } 3052 ret_ire->ire_mp = mp1; 3053 } else { 3054 /* 3055 * No IRE was found. Remove IRE mblk. 3056 */ 3057 mp->b_cont = mp1->b_cont; 3058 freeb(mp1); 3059 } 3060 return (B_TRUE); 3061 } 3062 3063 /* 3064 * Add an ip6i_t header to the front of the mblk. 3065 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3066 * Returns NULL if allocation fails (and frees original message). 3067 * Used in outgoing path when going through ip_newroute_*v6(). 3068 * Used in incoming path to pass ifindex to transports. 3069 */ 3070 mblk_t * 3071 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3072 { 3073 mblk_t *mp1; 3074 ip6i_t *ip6i; 3075 ip6_t *ip6h; 3076 3077 ip6h = (ip6_t *)mp->b_rptr; 3078 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3079 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3080 mp->b_datap->db_ref > 1) { 3081 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3082 if (mp1 == NULL) { 3083 freemsg(mp); 3084 return (NULL); 3085 } 3086 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3087 mp1->b_cont = mp; 3088 mp = mp1; 3089 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3090 } 3091 mp->b_rptr = (uchar_t *)ip6i; 3092 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3093 ip6i->ip6i_nxt = IPPROTO_RAW; 3094 if (ill != NULL) { 3095 ip6i->ip6i_flags = IP6I_IFINDEX; 3096 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3097 } else { 3098 ip6i->ip6i_flags = 0; 3099 } 3100 ip6i->ip6i_nexthop = *dst; 3101 return (mp); 3102 } 3103 3104 /* 3105 * Handle protocols with which IP is less intimate. There 3106 * can be more than one stream bound to a particular 3107 * protocol. When this is the case, normally each one gets a copy 3108 * of any incoming packets. 3109 * However, if the packet was tunneled and not multicast we only send to it 3110 * the first match. 3111 * 3112 * Zones notes: 3113 * Packets will be distributed to streams in all zones. This is really only 3114 * useful for ICMPv6 as only applications in the global zone can create raw 3115 * sockets for other protocols. 3116 */ 3117 static void 3118 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3119 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3120 boolean_t mctl_present, zoneid_t zoneid) 3121 { 3122 queue_t *rq; 3123 mblk_t *mp1, *first_mp1; 3124 in6_addr_t dst = ip6h->ip6_dst; 3125 in6_addr_t src = ip6h->ip6_src; 3126 boolean_t one_only; 3127 mblk_t *first_mp = mp; 3128 boolean_t secure, shared_addr; 3129 conn_t *connp, *first_connp, *next_connp; 3130 connf_t *connfp; 3131 ip_stack_t *ipst = inill->ill_ipst; 3132 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3133 3134 if (mctl_present) { 3135 mp = first_mp->b_cont; 3136 secure = ipsec_in_is_secure(first_mp); 3137 ASSERT(mp != NULL); 3138 } else { 3139 secure = B_FALSE; 3140 } 3141 3142 /* 3143 * If the packet was tunneled and not multicast we only send to it 3144 * the first match. 3145 */ 3146 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3147 !IN6_IS_ADDR_MULTICAST(&dst)); 3148 3149 shared_addr = (zoneid == ALL_ZONES); 3150 if (shared_addr) { 3151 /* 3152 * We don't allow multilevel ports for raw IP, so no need to 3153 * check for that here. 3154 */ 3155 zoneid = tsol_packet_to_zoneid(mp); 3156 } 3157 3158 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3159 mutex_enter(&connfp->connf_lock); 3160 connp = connfp->connf_head; 3161 for (connp = connfp->connf_head; connp != NULL; 3162 connp = connp->conn_next) { 3163 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3164 zoneid) && 3165 (!is_system_labeled() || 3166 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3167 connp))) 3168 break; 3169 } 3170 3171 if (connp == NULL || connp->conn_upq == NULL) { 3172 /* 3173 * No one bound to this port. Is 3174 * there a client that wants all 3175 * unclaimed datagrams? 3176 */ 3177 mutex_exit(&connfp->connf_lock); 3178 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3179 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3180 nexthdr_offset, mctl_present, zoneid, ipst)) { 3181 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3182 } 3183 3184 return; 3185 } 3186 3187 CONN_INC_REF(connp); 3188 first_connp = connp; 3189 3190 /* 3191 * XXX: Fix the multiple protocol listeners case. We should not 3192 * be walking the conn->next list here. 3193 */ 3194 if (one_only) { 3195 /* 3196 * Only send message to one tunnel driver by immediately 3197 * terminating the loop. 3198 */ 3199 connp = NULL; 3200 } else { 3201 connp = connp->conn_next; 3202 3203 } 3204 for (;;) { 3205 while (connp != NULL) { 3206 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3207 flags, zoneid) && 3208 (!is_system_labeled() || 3209 tsol_receive_local(mp, &dst, IPV6_VERSION, 3210 shared_addr, connp))) 3211 break; 3212 connp = connp->conn_next; 3213 } 3214 3215 /* 3216 * Just copy the data part alone. The mctl part is 3217 * needed just for verifying policy and it is never 3218 * sent up. 3219 */ 3220 if (connp == NULL || connp->conn_upq == NULL || 3221 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3222 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3223 /* 3224 * No more intested clients or memory 3225 * allocation failed 3226 */ 3227 connp = first_connp; 3228 break; 3229 } 3230 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3231 CONN_INC_REF(connp); 3232 mutex_exit(&connfp->connf_lock); 3233 rq = connp->conn_rq; 3234 /* 3235 * For link-local always add ifindex so that transport can set 3236 * sin6_scope_id. Avoid it for ICMP error fanout. 3237 */ 3238 if ((connp->conn_ip_recvpktinfo || 3239 IN6_IS_ADDR_LINKLOCAL(&src)) && 3240 (flags & IP_FF_IPINFO)) { 3241 /* Add header */ 3242 mp1 = ip_add_info_v6(mp1, inill, &dst); 3243 } 3244 if (mp1 == NULL) { 3245 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3246 } else if (!canputnext(rq)) { 3247 if (flags & IP_FF_RAWIP) { 3248 BUMP_MIB(ill->ill_ip_mib, 3249 rawipIfStatsInOverflows); 3250 } else { 3251 BUMP_MIB(ill->ill_icmp6_mib, 3252 ipv6IfIcmpInOverflows); 3253 } 3254 3255 freemsg(mp1); 3256 } else { 3257 /* 3258 * Don't enforce here if we're a tunnel - let "tun" do 3259 * it instead. 3260 */ 3261 if (!IPCL_IS_IPTUN(connp) && 3262 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3263 secure)) { 3264 first_mp1 = ipsec_check_inbound_policy 3265 (first_mp1, connp, NULL, ip6h, 3266 mctl_present); 3267 } 3268 if (first_mp1 != NULL) { 3269 if (mctl_present) 3270 freeb(first_mp1); 3271 BUMP_MIB(ill->ill_ip_mib, 3272 ipIfStatsHCInDelivers); 3273 (connp->conn_recv)(connp, mp1, NULL); 3274 } 3275 } 3276 mutex_enter(&connfp->connf_lock); 3277 /* Follow the next pointer before releasing the conn. */ 3278 next_connp = connp->conn_next; 3279 CONN_DEC_REF(connp); 3280 connp = next_connp; 3281 } 3282 3283 /* Last one. Send it upstream. */ 3284 mutex_exit(&connfp->connf_lock); 3285 3286 /* Initiate IPPF processing */ 3287 if (IP6_IN_IPP(flags, ipst)) { 3288 uint_t ifindex; 3289 3290 mutex_enter(&ill->ill_lock); 3291 ifindex = ill->ill_phyint->phyint_ifindex; 3292 mutex_exit(&ill->ill_lock); 3293 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3294 if (mp == NULL) { 3295 CONN_DEC_REF(connp); 3296 if (mctl_present) 3297 freeb(first_mp); 3298 return; 3299 } 3300 } 3301 3302 /* 3303 * For link-local always add ifindex so that transport can set 3304 * sin6_scope_id. Avoid it for ICMP error fanout. 3305 */ 3306 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3307 (flags & IP_FF_IPINFO)) { 3308 /* Add header */ 3309 mp = ip_add_info_v6(mp, inill, &dst); 3310 if (mp == NULL) { 3311 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3312 CONN_DEC_REF(connp); 3313 if (mctl_present) 3314 freeb(first_mp); 3315 return; 3316 } else if (mctl_present) { 3317 first_mp->b_cont = mp; 3318 } else { 3319 first_mp = mp; 3320 } 3321 } 3322 3323 rq = connp->conn_rq; 3324 if (!canputnext(rq)) { 3325 if (flags & IP_FF_RAWIP) { 3326 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3327 } else { 3328 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3329 } 3330 3331 freemsg(first_mp); 3332 } else { 3333 if (IPCL_IS_IPTUN(connp)) { 3334 /* 3335 * Tunneled packet. We enforce policy in the tunnel 3336 * module itself. 3337 * 3338 * Send the WHOLE packet up (incl. IPSEC_IN) without 3339 * a policy check. 3340 */ 3341 putnext(rq, first_mp); 3342 CONN_DEC_REF(connp); 3343 return; 3344 } 3345 /* 3346 * Don't enforce here if we're a tunnel - let "tun" do 3347 * it instead. 3348 */ 3349 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3350 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3351 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3352 NULL, ip6h, mctl_present); 3353 if (first_mp == NULL) { 3354 CONN_DEC_REF(connp); 3355 return; 3356 } 3357 } 3358 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3359 (connp->conn_recv)(connp, mp, NULL); 3360 if (mctl_present) 3361 freeb(first_mp); 3362 } 3363 CONN_DEC_REF(connp); 3364 } 3365 3366 /* 3367 * Send an ICMP error after patching up the packet appropriately. Returns 3368 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3369 */ 3370 int 3371 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3372 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3373 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3374 { 3375 ip6_t *ip6h; 3376 mblk_t *first_mp; 3377 boolean_t secure; 3378 unsigned char db_type; 3379 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3380 3381 first_mp = mp; 3382 if (mctl_present) { 3383 mp = mp->b_cont; 3384 secure = ipsec_in_is_secure(first_mp); 3385 ASSERT(mp != NULL); 3386 } else { 3387 /* 3388 * If this is an ICMP error being reported - which goes 3389 * up as M_CTLs, we need to convert them to M_DATA till 3390 * we finish checking with global policy because 3391 * ipsec_check_global_policy() assumes M_DATA as clear 3392 * and M_CTL as secure. 3393 */ 3394 db_type = mp->b_datap->db_type; 3395 mp->b_datap->db_type = M_DATA; 3396 secure = B_FALSE; 3397 } 3398 /* 3399 * We are generating an icmp error for some inbound packet. 3400 * Called from all ip_fanout_(udp, tcp, proto) functions. 3401 * Before we generate an error, check with global policy 3402 * to see whether this is allowed to enter the system. As 3403 * there is no "conn", we are checking with global policy. 3404 */ 3405 ip6h = (ip6_t *)mp->b_rptr; 3406 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3407 first_mp = ipsec_check_global_policy(first_mp, NULL, 3408 NULL, ip6h, mctl_present, ipst->ips_netstack); 3409 if (first_mp == NULL) 3410 return (0); 3411 } 3412 3413 if (!mctl_present) 3414 mp->b_datap->db_type = db_type; 3415 3416 if (flags & IP_FF_SEND_ICMP) { 3417 if (flags & IP_FF_HDR_COMPLETE) { 3418 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3419 freemsg(first_mp); 3420 return (1); 3421 } 3422 } 3423 switch (icmp_type) { 3424 case ICMP6_DST_UNREACH: 3425 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3426 B_FALSE, B_FALSE, zoneid, ipst); 3427 break; 3428 case ICMP6_PARAM_PROB: 3429 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3430 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3431 break; 3432 default: 3433 #ifdef DEBUG 3434 panic("ip_fanout_send_icmp_v6: wrong type"); 3435 /*NOTREACHED*/ 3436 #else 3437 freemsg(first_mp); 3438 break; 3439 #endif 3440 } 3441 } else { 3442 freemsg(first_mp); 3443 return (0); 3444 } 3445 3446 return (1); 3447 } 3448 3449 3450 /* 3451 * Fanout for TCP packets 3452 * The caller puts <fport, lport> in the ports parameter. 3453 */ 3454 static void 3455 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3456 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3457 { 3458 mblk_t *first_mp; 3459 boolean_t secure; 3460 conn_t *connp; 3461 tcph_t *tcph; 3462 boolean_t syn_present = B_FALSE; 3463 ip_stack_t *ipst = inill->ill_ipst; 3464 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3465 3466 first_mp = mp; 3467 if (mctl_present) { 3468 mp = first_mp->b_cont; 3469 secure = ipsec_in_is_secure(first_mp); 3470 ASSERT(mp != NULL); 3471 } else { 3472 secure = B_FALSE; 3473 } 3474 3475 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3476 3477 if (connp == NULL || 3478 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3479 /* 3480 * No hard-bound match. Send Reset. 3481 */ 3482 dblk_t *dp = mp->b_datap; 3483 uint32_t ill_index; 3484 3485 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3486 3487 /* Initiate IPPf processing, if needed. */ 3488 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3489 (flags & IP6_NO_IPPOLICY)) { 3490 ill_index = ill->ill_phyint->phyint_ifindex; 3491 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3492 if (first_mp == NULL) { 3493 if (connp != NULL) 3494 CONN_DEC_REF(connp); 3495 return; 3496 } 3497 } 3498 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3499 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3500 ipst->ips_netstack->netstack_tcp, connp); 3501 if (connp != NULL) 3502 CONN_DEC_REF(connp); 3503 return; 3504 } 3505 3506 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3507 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3508 if (connp->conn_flags & IPCL_TCP) { 3509 squeue_t *sqp; 3510 3511 /* 3512 * For fused tcp loopback, assign the eager's 3513 * squeue to be that of the active connect's. 3514 */ 3515 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3516 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3517 !secure && 3518 !IP6_IN_IPP(flags, ipst)) { 3519 ASSERT(Q_TO_CONN(q) != NULL); 3520 sqp = Q_TO_CONN(q)->conn_sqp; 3521 } else { 3522 sqp = IP_SQUEUE_GET(lbolt); 3523 } 3524 3525 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3526 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3527 3528 /* 3529 * db_cksumstuff is unused in the incoming 3530 * path; Thus store the ifindex here. It will 3531 * be cleared in tcp_conn_create_v6(). 3532 */ 3533 DB_CKSUMSTUFF(mp) = 3534 (intptr_t)ill->ill_phyint->phyint_ifindex; 3535 syn_present = B_TRUE; 3536 } 3537 } 3538 3539 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3540 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3541 if ((flags & TH_RST) || (flags & TH_URG)) { 3542 CONN_DEC_REF(connp); 3543 freemsg(first_mp); 3544 return; 3545 } 3546 if (flags & TH_ACK) { 3547 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3548 ipst->ips_netstack->netstack_tcp, connp); 3549 CONN_DEC_REF(connp); 3550 return; 3551 } 3552 3553 CONN_DEC_REF(connp); 3554 freemsg(first_mp); 3555 return; 3556 } 3557 3558 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3559 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3560 NULL, ip6h, mctl_present); 3561 if (first_mp == NULL) { 3562 CONN_DEC_REF(connp); 3563 return; 3564 } 3565 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3566 ASSERT(syn_present); 3567 if (mctl_present) { 3568 ASSERT(first_mp != mp); 3569 first_mp->b_datap->db_struioflag |= 3570 STRUIO_POLICY; 3571 } else { 3572 ASSERT(first_mp == mp); 3573 mp->b_datap->db_struioflag &= 3574 ~STRUIO_EAGER; 3575 mp->b_datap->db_struioflag |= 3576 STRUIO_POLICY; 3577 } 3578 } else { 3579 /* 3580 * Discard first_mp early since we're dealing with a 3581 * fully-connected conn_t and tcp doesn't do policy in 3582 * this case. Also, if someone is bound to IPPROTO_TCP 3583 * over raw IP, they don't expect to see a M_CTL. 3584 */ 3585 if (mctl_present) { 3586 freeb(first_mp); 3587 mctl_present = B_FALSE; 3588 } 3589 first_mp = mp; 3590 } 3591 } 3592 3593 /* Initiate IPPF processing */ 3594 if (IP6_IN_IPP(flags, ipst)) { 3595 uint_t ifindex; 3596 3597 mutex_enter(&ill->ill_lock); 3598 ifindex = ill->ill_phyint->phyint_ifindex; 3599 mutex_exit(&ill->ill_lock); 3600 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3601 if (mp == NULL) { 3602 CONN_DEC_REF(connp); 3603 if (mctl_present) { 3604 freeb(first_mp); 3605 } 3606 return; 3607 } else if (mctl_present) { 3608 /* 3609 * ip_add_info_v6 might return a new mp. 3610 */ 3611 ASSERT(first_mp != mp); 3612 first_mp->b_cont = mp; 3613 } else { 3614 first_mp = mp; 3615 } 3616 } 3617 3618 /* 3619 * For link-local always add ifindex so that TCP can bind to that 3620 * interface. Avoid it for ICMP error fanout. 3621 */ 3622 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3623 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3624 (flags & IP_FF_IPINFO))) { 3625 /* Add header */ 3626 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3627 if (mp == NULL) { 3628 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3629 CONN_DEC_REF(connp); 3630 if (mctl_present) 3631 freeb(first_mp); 3632 return; 3633 } else if (mctl_present) { 3634 ASSERT(first_mp != mp); 3635 first_mp->b_cont = mp; 3636 } else { 3637 first_mp = mp; 3638 } 3639 } 3640 3641 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3642 if (IPCL_IS_TCP(connp)) { 3643 (*ip_input_proc)(connp->conn_sqp, first_mp, 3644 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3645 } else { 3646 /* SOCK_RAW, IPPROTO_TCP case */ 3647 (connp->conn_recv)(connp, first_mp, NULL); 3648 CONN_DEC_REF(connp); 3649 } 3650 } 3651 3652 /* 3653 * Fanout for UDP packets. 3654 * The caller puts <fport, lport> in the ports parameter. 3655 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3656 * 3657 * If SO_REUSEADDR is set all multicast and broadcast packets 3658 * will be delivered to all streams bound to the same port. 3659 * 3660 * Zones notes: 3661 * Multicast packets will be distributed to streams in all zones. 3662 */ 3663 static void 3664 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3665 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3666 zoneid_t zoneid) 3667 { 3668 uint32_t dstport, srcport; 3669 in6_addr_t dst; 3670 mblk_t *first_mp; 3671 boolean_t secure; 3672 conn_t *connp; 3673 connf_t *connfp; 3674 conn_t *first_conn; 3675 conn_t *next_conn; 3676 mblk_t *mp1, *first_mp1; 3677 in6_addr_t src; 3678 boolean_t shared_addr; 3679 ip_stack_t *ipst = inill->ill_ipst; 3680 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3681 3682 first_mp = mp; 3683 if (mctl_present) { 3684 mp = first_mp->b_cont; 3685 secure = ipsec_in_is_secure(first_mp); 3686 ASSERT(mp != NULL); 3687 } else { 3688 secure = B_FALSE; 3689 } 3690 3691 /* Extract ports in net byte order */ 3692 dstport = htons(ntohl(ports) & 0xFFFF); 3693 srcport = htons(ntohl(ports) >> 16); 3694 dst = ip6h->ip6_dst; 3695 src = ip6h->ip6_src; 3696 3697 shared_addr = (zoneid == ALL_ZONES); 3698 if (shared_addr) { 3699 /* 3700 * No need to handle exclusive-stack zones since ALL_ZONES 3701 * only applies to the shared stack. 3702 */ 3703 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3704 /* 3705 * If no shared MLP is found, tsol_mlp_findzone returns 3706 * ALL_ZONES. In that case, we assume it's SLP, and 3707 * search for the zone based on the packet label. 3708 * That will also return ALL_ZONES on failure, but 3709 * we never allow conn_zoneid to be set to ALL_ZONES. 3710 */ 3711 if (zoneid == ALL_ZONES) 3712 zoneid = tsol_packet_to_zoneid(mp); 3713 } 3714 3715 /* Attempt to find a client stream based on destination port. */ 3716 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3717 mutex_enter(&connfp->connf_lock); 3718 connp = connfp->connf_head; 3719 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3720 /* 3721 * Not multicast. Send to the one (first) client we find. 3722 */ 3723 while (connp != NULL) { 3724 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3725 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3726 conn_wantpacket_v6(connp, ill, ip6h, 3727 flags, zoneid)) { 3728 break; 3729 } 3730 connp = connp->conn_next; 3731 } 3732 if (connp == NULL || connp->conn_upq == NULL) 3733 goto notfound; 3734 3735 if (is_system_labeled() && 3736 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3737 connp)) 3738 goto notfound; 3739 3740 /* Found a client */ 3741 CONN_INC_REF(connp); 3742 mutex_exit(&connfp->connf_lock); 3743 3744 if (CONN_UDP_FLOWCTLD(connp)) { 3745 freemsg(first_mp); 3746 CONN_DEC_REF(connp); 3747 return; 3748 } 3749 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3750 first_mp = ipsec_check_inbound_policy(first_mp, 3751 connp, NULL, ip6h, mctl_present); 3752 if (first_mp == NULL) { 3753 CONN_DEC_REF(connp); 3754 return; 3755 } 3756 } 3757 /* Initiate IPPF processing */ 3758 if (IP6_IN_IPP(flags, ipst)) { 3759 uint_t ifindex; 3760 3761 mutex_enter(&ill->ill_lock); 3762 ifindex = ill->ill_phyint->phyint_ifindex; 3763 mutex_exit(&ill->ill_lock); 3764 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3765 if (mp == NULL) { 3766 CONN_DEC_REF(connp); 3767 if (mctl_present) 3768 freeb(first_mp); 3769 return; 3770 } 3771 } 3772 /* 3773 * For link-local always add ifindex so that 3774 * transport can set sin6_scope_id. Avoid it for 3775 * ICMP error fanout. 3776 */ 3777 if ((connp->conn_ip_recvpktinfo || 3778 IN6_IS_ADDR_LINKLOCAL(&src)) && 3779 (flags & IP_FF_IPINFO)) { 3780 /* Add header */ 3781 mp = ip_add_info_v6(mp, inill, &dst); 3782 if (mp == NULL) { 3783 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3784 CONN_DEC_REF(connp); 3785 if (mctl_present) 3786 freeb(first_mp); 3787 return; 3788 } else if (mctl_present) { 3789 first_mp->b_cont = mp; 3790 } else { 3791 first_mp = mp; 3792 } 3793 } 3794 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3795 3796 /* Send it upstream */ 3797 (connp->conn_recv)(connp, mp, NULL); 3798 3799 IP6_STAT(ipst, ip6_udp_fannorm); 3800 CONN_DEC_REF(connp); 3801 if (mctl_present) 3802 freeb(first_mp); 3803 return; 3804 } 3805 3806 while (connp != NULL) { 3807 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3808 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3809 (!is_system_labeled() || 3810 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3811 connp))) 3812 break; 3813 connp = connp->conn_next; 3814 } 3815 3816 if (connp == NULL || connp->conn_upq == NULL) 3817 goto notfound; 3818 3819 first_conn = connp; 3820 3821 CONN_INC_REF(connp); 3822 connp = connp->conn_next; 3823 for (;;) { 3824 while (connp != NULL) { 3825 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3826 src) && conn_wantpacket_v6(connp, ill, ip6h, 3827 flags, zoneid) && 3828 (!is_system_labeled() || 3829 tsol_receive_local(mp, &dst, IPV6_VERSION, 3830 shared_addr, connp))) 3831 break; 3832 connp = connp->conn_next; 3833 } 3834 /* 3835 * Just copy the data part alone. The mctl part is 3836 * needed just for verifying policy and it is never 3837 * sent up. 3838 */ 3839 if (connp == NULL || 3840 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3841 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3842 /* 3843 * No more interested clients or memory 3844 * allocation failed 3845 */ 3846 connp = first_conn; 3847 break; 3848 } 3849 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3850 CONN_INC_REF(connp); 3851 mutex_exit(&connfp->connf_lock); 3852 /* 3853 * For link-local always add ifindex so that transport 3854 * can set sin6_scope_id. Avoid it for ICMP error 3855 * fanout. 3856 */ 3857 if ((connp->conn_ip_recvpktinfo || 3858 IN6_IS_ADDR_LINKLOCAL(&src)) && 3859 (flags & IP_FF_IPINFO)) { 3860 /* Add header */ 3861 mp1 = ip_add_info_v6(mp1, inill, &dst); 3862 } 3863 /* mp1 could have changed */ 3864 if (mctl_present) 3865 first_mp1->b_cont = mp1; 3866 else 3867 first_mp1 = mp1; 3868 if (mp1 == NULL) { 3869 if (mctl_present) 3870 freeb(first_mp1); 3871 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3872 goto next_one; 3873 } 3874 if (CONN_UDP_FLOWCTLD(connp)) { 3875 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3876 freemsg(first_mp1); 3877 goto next_one; 3878 } 3879 3880 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3881 first_mp1 = ipsec_check_inbound_policy 3882 (first_mp1, connp, NULL, ip6h, 3883 mctl_present); 3884 } 3885 if (first_mp1 != NULL) { 3886 if (mctl_present) 3887 freeb(first_mp1); 3888 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3889 3890 /* Send it upstream */ 3891 (connp->conn_recv)(connp, mp1, NULL); 3892 } 3893 next_one: 3894 mutex_enter(&connfp->connf_lock); 3895 /* Follow the next pointer before releasing the conn. */ 3896 next_conn = connp->conn_next; 3897 IP6_STAT(ipst, ip6_udp_fanmb); 3898 CONN_DEC_REF(connp); 3899 connp = next_conn; 3900 } 3901 3902 /* Last one. Send it upstream. */ 3903 mutex_exit(&connfp->connf_lock); 3904 3905 /* Initiate IPPF processing */ 3906 if (IP6_IN_IPP(flags, ipst)) { 3907 uint_t ifindex; 3908 3909 mutex_enter(&ill->ill_lock); 3910 ifindex = ill->ill_phyint->phyint_ifindex; 3911 mutex_exit(&ill->ill_lock); 3912 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3913 if (mp == NULL) { 3914 CONN_DEC_REF(connp); 3915 if (mctl_present) { 3916 freeb(first_mp); 3917 } 3918 return; 3919 } 3920 } 3921 3922 /* 3923 * For link-local always add ifindex so that transport can set 3924 * sin6_scope_id. Avoid it for ICMP error fanout. 3925 */ 3926 if ((connp->conn_ip_recvpktinfo || 3927 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3928 /* Add header */ 3929 mp = ip_add_info_v6(mp, inill, &dst); 3930 if (mp == NULL) { 3931 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3932 CONN_DEC_REF(connp); 3933 if (mctl_present) 3934 freeb(first_mp); 3935 return; 3936 } else if (mctl_present) { 3937 first_mp->b_cont = mp; 3938 } else { 3939 first_mp = mp; 3940 } 3941 } 3942 if (CONN_UDP_FLOWCTLD(connp)) { 3943 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3944 freemsg(mp); 3945 } else { 3946 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3947 first_mp = ipsec_check_inbound_policy(first_mp, 3948 connp, NULL, ip6h, mctl_present); 3949 if (first_mp == NULL) { 3950 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3951 CONN_DEC_REF(connp); 3952 return; 3953 } 3954 } 3955 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3956 3957 /* Send it upstream */ 3958 (connp->conn_recv)(connp, mp, NULL); 3959 } 3960 IP6_STAT(ipst, ip6_udp_fanmb); 3961 CONN_DEC_REF(connp); 3962 if (mctl_present) 3963 freeb(first_mp); 3964 return; 3965 3966 notfound: 3967 mutex_exit(&connfp->connf_lock); 3968 /* 3969 * No one bound to this port. Is 3970 * there a client that wants all 3971 * unclaimed datagrams? 3972 */ 3973 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3974 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3975 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3976 zoneid); 3977 } else { 3978 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3979 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3980 mctl_present, zoneid, ipst)) { 3981 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3982 } 3983 } 3984 } 3985 3986 /* 3987 * int ip_find_hdr_v6() 3988 * 3989 * This routine is used by the upper layer protocols and the IP tunnel 3990 * module to: 3991 * - Set extension header pointers to appropriate locations 3992 * - Determine IPv6 header length and return it 3993 * - Return a pointer to the last nexthdr value 3994 * 3995 * The caller must initialize ipp_fields. 3996 * 3997 * NOTE: If multiple extension headers of the same type are present, 3998 * ip_find_hdr_v6() will set the respective extension header pointers 3999 * to the first one that it encounters in the IPv6 header. It also 4000 * skips fragment headers. This routine deals with malformed packets 4001 * of various sorts in which case the returned length is up to the 4002 * malformed part. 4003 */ 4004 int 4005 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4006 { 4007 uint_t length, ehdrlen; 4008 uint8_t nexthdr; 4009 uint8_t *whereptr, *endptr; 4010 ip6_dest_t *tmpdstopts; 4011 ip6_rthdr_t *tmprthdr; 4012 ip6_hbh_t *tmphopopts; 4013 ip6_frag_t *tmpfraghdr; 4014 4015 length = IPV6_HDR_LEN; 4016 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4017 endptr = mp->b_wptr; 4018 4019 nexthdr = ip6h->ip6_nxt; 4020 while (whereptr < endptr) { 4021 /* Is there enough left for len + nexthdr? */ 4022 if (whereptr + MIN_EHDR_LEN > endptr) 4023 goto done; 4024 4025 switch (nexthdr) { 4026 case IPPROTO_HOPOPTS: 4027 tmphopopts = (ip6_hbh_t *)whereptr; 4028 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4029 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4030 goto done; 4031 nexthdr = tmphopopts->ip6h_nxt; 4032 /* return only 1st hbh */ 4033 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4034 ipp->ipp_fields |= IPPF_HOPOPTS; 4035 ipp->ipp_hopopts = tmphopopts; 4036 ipp->ipp_hopoptslen = ehdrlen; 4037 } 4038 break; 4039 case IPPROTO_DSTOPTS: 4040 tmpdstopts = (ip6_dest_t *)whereptr; 4041 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4042 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4043 goto done; 4044 nexthdr = tmpdstopts->ip6d_nxt; 4045 /* 4046 * ipp_dstopts is set to the destination header after a 4047 * routing header. 4048 * Assume it is a post-rthdr destination header 4049 * and adjust when we find an rthdr. 4050 */ 4051 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4052 ipp->ipp_fields |= IPPF_DSTOPTS; 4053 ipp->ipp_dstopts = tmpdstopts; 4054 ipp->ipp_dstoptslen = ehdrlen; 4055 } 4056 break; 4057 case IPPROTO_ROUTING: 4058 tmprthdr = (ip6_rthdr_t *)whereptr; 4059 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4060 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4061 goto done; 4062 nexthdr = tmprthdr->ip6r_nxt; 4063 /* return only 1st rthdr */ 4064 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4065 ipp->ipp_fields |= IPPF_RTHDR; 4066 ipp->ipp_rthdr = tmprthdr; 4067 ipp->ipp_rthdrlen = ehdrlen; 4068 } 4069 /* 4070 * Make any destination header we've seen be a 4071 * pre-rthdr destination header. 4072 */ 4073 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4074 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4075 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4076 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4077 ipp->ipp_dstopts = NULL; 4078 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4079 ipp->ipp_dstoptslen = 0; 4080 } 4081 break; 4082 case IPPROTO_FRAGMENT: 4083 tmpfraghdr = (ip6_frag_t *)whereptr; 4084 ehdrlen = sizeof (ip6_frag_t); 4085 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4086 goto done; 4087 nexthdr = tmpfraghdr->ip6f_nxt; 4088 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4089 ipp->ipp_fields |= IPPF_FRAGHDR; 4090 ipp->ipp_fraghdr = tmpfraghdr; 4091 ipp->ipp_fraghdrlen = ehdrlen; 4092 } 4093 break; 4094 case IPPROTO_NONE: 4095 default: 4096 goto done; 4097 } 4098 length += ehdrlen; 4099 whereptr += ehdrlen; 4100 } 4101 done: 4102 if (nexthdrp != NULL) 4103 *nexthdrp = nexthdr; 4104 return (length); 4105 } 4106 4107 int 4108 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4109 { 4110 ire_t *ire; 4111 4112 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4113 ire = ire_lookup_local_v6(zoneid, ipst); 4114 if (ire == NULL) { 4115 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4116 return (1); 4117 } 4118 ip6h->ip6_src = ire->ire_addr_v6; 4119 ire_refrele(ire); 4120 } 4121 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4122 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4123 return (0); 4124 } 4125 4126 /* 4127 * Try to determine where and what are the IPv6 header length and 4128 * pointer to nexthdr value for the upper layer protocol (or an 4129 * unknown next hdr). 4130 * 4131 * Parameters returns a pointer to the nexthdr value; 4132 * Must handle malformed packets of various sorts. 4133 * Function returns failure for malformed cases. 4134 */ 4135 boolean_t 4136 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4137 uint8_t **nexthdrpp) 4138 { 4139 uint16_t length; 4140 uint_t ehdrlen; 4141 uint8_t *nexthdrp; 4142 uint8_t *whereptr; 4143 uint8_t *endptr; 4144 ip6_dest_t *desthdr; 4145 ip6_rthdr_t *rthdr; 4146 ip6_frag_t *fraghdr; 4147 4148 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4149 length = IPV6_HDR_LEN; 4150 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4151 endptr = mp->b_wptr; 4152 4153 nexthdrp = &ip6h->ip6_nxt; 4154 while (whereptr < endptr) { 4155 /* Is there enough left for len + nexthdr? */ 4156 if (whereptr + MIN_EHDR_LEN > endptr) 4157 break; 4158 4159 switch (*nexthdrp) { 4160 case IPPROTO_HOPOPTS: 4161 case IPPROTO_DSTOPTS: 4162 /* Assumes the headers are identical for hbh and dst */ 4163 desthdr = (ip6_dest_t *)whereptr; 4164 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4165 if ((uchar_t *)desthdr + ehdrlen > endptr) 4166 return (B_FALSE); 4167 nexthdrp = &desthdr->ip6d_nxt; 4168 break; 4169 case IPPROTO_ROUTING: 4170 rthdr = (ip6_rthdr_t *)whereptr; 4171 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4172 if ((uchar_t *)rthdr + ehdrlen > endptr) 4173 return (B_FALSE); 4174 nexthdrp = &rthdr->ip6r_nxt; 4175 break; 4176 case IPPROTO_FRAGMENT: 4177 fraghdr = (ip6_frag_t *)whereptr; 4178 ehdrlen = sizeof (ip6_frag_t); 4179 if ((uchar_t *)&fraghdr[1] > endptr) 4180 return (B_FALSE); 4181 nexthdrp = &fraghdr->ip6f_nxt; 4182 break; 4183 case IPPROTO_NONE: 4184 /* No next header means we're finished */ 4185 default: 4186 *hdr_length_ptr = length; 4187 *nexthdrpp = nexthdrp; 4188 return (B_TRUE); 4189 } 4190 length += ehdrlen; 4191 whereptr += ehdrlen; 4192 *hdr_length_ptr = length; 4193 *nexthdrpp = nexthdrp; 4194 } 4195 switch (*nexthdrp) { 4196 case IPPROTO_HOPOPTS: 4197 case IPPROTO_DSTOPTS: 4198 case IPPROTO_ROUTING: 4199 case IPPROTO_FRAGMENT: 4200 /* 4201 * If any know extension headers are still to be processed, 4202 * the packet's malformed (or at least all the IP header(s) are 4203 * not in the same mblk - and that should never happen. 4204 */ 4205 return (B_FALSE); 4206 4207 default: 4208 /* 4209 * If we get here, we know that all of the IP headers were in 4210 * the same mblk, even if the ULP header is in the next mblk. 4211 */ 4212 *hdr_length_ptr = length; 4213 *nexthdrpp = nexthdrp; 4214 return (B_TRUE); 4215 } 4216 } 4217 4218 /* 4219 * Return the length of the IPv6 related headers (including extension headers) 4220 * Returns a length even if the packet is malformed. 4221 */ 4222 int 4223 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4224 { 4225 uint16_t hdr_len; 4226 uint8_t *nexthdrp; 4227 4228 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4229 return (hdr_len); 4230 } 4231 4232 /* 4233 * Select an ill for the packet by considering load spreading across 4234 * a different ill in the group if dst_ill is part of some group. 4235 */ 4236 static ill_t * 4237 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4238 { 4239 ill_t *ill; 4240 4241 /* 4242 * We schedule irrespective of whether the source address is 4243 * INADDR_UNSPECIED or not. 4244 */ 4245 ill = illgrp_scheduler(dst_ill); 4246 if (ill == NULL) 4247 return (NULL); 4248 4249 /* 4250 * For groups with names ip_sioctl_groupname ensures that all 4251 * ills are of same type. For groups without names, ifgrp_insert 4252 * ensures this. 4253 */ 4254 ASSERT(dst_ill->ill_type == ill->ill_type); 4255 4256 return (ill); 4257 } 4258 4259 /* 4260 * IPv6 - 4261 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4262 * to send out a packet to a destination address for which we do not have 4263 * specific routing information. 4264 * 4265 * Handle non-multicast packets. If ill is non-NULL the match is done 4266 * for that ill. 4267 * 4268 * When a specific ill is specified (using IPV6_PKTINFO, 4269 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4270 * on routing entries (ftable and ctable) that have a matching 4271 * ire->ire_ipif->ipif_ill. Thus this can only be used 4272 * for destinations that are on-link for the specific ill 4273 * and that can appear on multiple links. Thus it is useful 4274 * for multicast destinations, link-local destinations, and 4275 * at some point perhaps for site-local destinations (if the 4276 * node sits at a site boundary). 4277 * We create the cache entries in the regular ctable since 4278 * it can not "confuse" things for other destinations. 4279 * table. 4280 * 4281 * When ill is part of a ill group, we subject the packets 4282 * to load spreading even if the ill is specified by the 4283 * means described above. We disable only for IPV6_BOUND_PIF 4284 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4285 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4286 * set. 4287 * 4288 * NOTE : These are the scopes of some of the variables that point at IRE, 4289 * which needs to be followed while making any future modifications 4290 * to avoid memory leaks. 4291 * 4292 * - ire and sire are the entries looked up initially by 4293 * ire_ftable_lookup_v6. 4294 * - ipif_ire is used to hold the interface ire associated with 4295 * the new cache ire. But it's scope is limited, so we always REFRELE 4296 * it before branching out to error paths. 4297 * - save_ire is initialized before ire_create, so that ire returned 4298 * by ire_create will not over-write the ire. We REFRELE save_ire 4299 * before breaking out of the switch. 4300 * 4301 * Thus on failures, we have to REFRELE only ire and sire, if they 4302 * are not NULL. 4303 * 4304 * v6srcp may be used in the future. Currently unused. 4305 */ 4306 /* ARGSUSED */ 4307 void 4308 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4309 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4310 { 4311 in6_addr_t v6gw; 4312 in6_addr_t dst; 4313 ire_t *ire = NULL; 4314 ipif_t *src_ipif = NULL; 4315 ill_t *dst_ill = NULL; 4316 ire_t *sire = NULL; 4317 ire_t *save_ire; 4318 ip6_t *ip6h; 4319 int err = 0; 4320 mblk_t *first_mp; 4321 ipsec_out_t *io; 4322 ill_t *attach_ill = NULL; 4323 ushort_t ire_marks = 0; 4324 int match_flags; 4325 boolean_t ip6i_present; 4326 ire_t *first_sire = NULL; 4327 mblk_t *copy_mp = NULL; 4328 mblk_t *xmit_mp = NULL; 4329 in6_addr_t save_dst; 4330 uint32_t multirt_flags = 4331 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4332 boolean_t multirt_is_resolvable; 4333 boolean_t multirt_resolve_next; 4334 boolean_t need_rele = B_FALSE; 4335 boolean_t do_attach_ill = B_FALSE; 4336 boolean_t ip6_asp_table_held = B_FALSE; 4337 tsol_ire_gw_secattr_t *attrp = NULL; 4338 tsol_gcgrp_t *gcgrp = NULL; 4339 tsol_gcgrp_addr_t ga; 4340 4341 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4342 4343 first_mp = mp; 4344 if (mp->b_datap->db_type == M_CTL) { 4345 mp = mp->b_cont; 4346 io = (ipsec_out_t *)first_mp->b_rptr; 4347 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4348 } else { 4349 io = NULL; 4350 } 4351 4352 /* 4353 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4354 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4355 * could be NULL. 4356 * 4357 * This information can appear either in an ip6i_t or an IPSEC_OUT 4358 * message. 4359 */ 4360 ip6h = (ip6_t *)mp->b_rptr; 4361 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4362 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4363 if (!ip6i_present || 4364 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4365 attach_ill = ip_grab_attach_ill(ill, first_mp, 4366 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4367 io->ipsec_out_ill_index), B_TRUE, ipst); 4368 /* Failure case frees things for us. */ 4369 if (attach_ill == NULL) 4370 return; 4371 4372 /* 4373 * Check if we need an ire that will not be 4374 * looked up by anybody else i.e. HIDDEN. 4375 */ 4376 if (ill_is_probeonly(attach_ill)) 4377 ire_marks = IRE_MARK_HIDDEN; 4378 } 4379 } 4380 4381 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4382 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4383 goto icmp_err_ret; 4384 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4385 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4386 goto icmp_err_ret; 4387 } 4388 4389 /* 4390 * If this IRE is created for forwarding or it is not for 4391 * TCP traffic, mark it as temporary. 4392 * 4393 * Is it sufficient just to check the next header?? 4394 */ 4395 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4396 ire_marks |= IRE_MARK_TEMPORARY; 4397 4398 /* 4399 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4400 * chain until it gets the most specific information available. 4401 * For example, we know that there is no IRE_CACHE for this dest, 4402 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4403 * ire_ftable_lookup_v6 will look up the gateway, etc. 4404 */ 4405 4406 if (ill == NULL) { 4407 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4408 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4409 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4410 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4411 match_flags, ipst); 4412 /* 4413 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4414 * in a NULL ill, but the packet could be a neighbor 4415 * solicitation/advertisment and could have a valid attach_ill. 4416 */ 4417 if (attach_ill != NULL) 4418 ill_refrele(attach_ill); 4419 } else { 4420 if (attach_ill != NULL) { 4421 /* 4422 * attach_ill is set only for communicating with 4423 * on-link hosts. So, don't look for DEFAULT. 4424 * ip_wput_v6 passes the right ill in this case and 4425 * hence we can assert. 4426 */ 4427 ASSERT(ill == attach_ill); 4428 ill_refrele(attach_ill); 4429 do_attach_ill = B_TRUE; 4430 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4431 } else { 4432 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4433 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4434 } 4435 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4436 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4437 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags, ipst); 4438 } 4439 4440 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4441 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4442 4443 /* 4444 * We enter a loop that will be run only once in most cases. 4445 * The loop is re-entered in the case where the destination 4446 * can be reached through multiple RTF_MULTIRT-flagged routes. 4447 * The intention is to compute multiple routes to a single 4448 * destination in a single ip_newroute_v6 call. 4449 * The information is contained in sire->ire_flags. 4450 */ 4451 do { 4452 multirt_resolve_next = B_FALSE; 4453 4454 if (dst_ill != NULL) { 4455 ill_refrele(dst_ill); 4456 dst_ill = NULL; 4457 } 4458 if (src_ipif != NULL) { 4459 ipif_refrele(src_ipif); 4460 src_ipif = NULL; 4461 } 4462 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4463 ip3dbg(("ip_newroute_v6: starting new resolution " 4464 "with first_mp %p, tag %d\n", 4465 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4466 4467 /* 4468 * We check if there are trailing unresolved routes for 4469 * the destination contained in sire. 4470 */ 4471 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4472 &sire, multirt_flags, MBLK_GETLABEL(mp), ipst); 4473 4474 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4475 "ire %p, sire %p\n", 4476 multirt_is_resolvable, (void *)ire, (void *)sire)); 4477 4478 if (!multirt_is_resolvable) { 4479 /* 4480 * No more multirt routes to resolve; give up 4481 * (all routes resolved or no more resolvable 4482 * routes). 4483 */ 4484 if (ire != NULL) { 4485 ire_refrele(ire); 4486 ire = NULL; 4487 } 4488 } else { 4489 ASSERT(sire != NULL); 4490 ASSERT(ire != NULL); 4491 /* 4492 * We simply use first_sire as a flag that 4493 * indicates if a resolvable multirt route has 4494 * already been found during the preceding 4495 * loops. If it is not the case, we may have 4496 * to send an ICMP error to report that the 4497 * destination is unreachable. We do not 4498 * IRE_REFHOLD first_sire. 4499 */ 4500 if (first_sire == NULL) { 4501 first_sire = sire; 4502 } 4503 } 4504 } 4505 if ((ire == NULL) || (ire == sire)) { 4506 /* 4507 * either ire == NULL (the destination cannot be 4508 * resolved) or ire == sire (the gateway cannot be 4509 * resolved). At this point, there are no more routes 4510 * to resolve for the destination, thus we exit. 4511 */ 4512 if (ip_debug > 3) { 4513 /* ip2dbg */ 4514 pr_addr_dbg("ip_newroute_v6: " 4515 "can't resolve %s\n", AF_INET6, v6dstp); 4516 } 4517 ip3dbg(("ip_newroute_v6: " 4518 "ire %p, sire %p, first_sire %p\n", 4519 (void *)ire, (void *)sire, (void *)first_sire)); 4520 4521 if (sire != NULL) { 4522 ire_refrele(sire); 4523 sire = NULL; 4524 } 4525 4526 if (first_sire != NULL) { 4527 /* 4528 * At least one multirt route has been found 4529 * in the same ip_newroute() call; there is no 4530 * need to report an ICMP error. 4531 * first_sire was not IRE_REFHOLDed. 4532 */ 4533 MULTIRT_DEBUG_UNTAG(first_mp); 4534 freemsg(first_mp); 4535 return; 4536 } 4537 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4538 RTA_DST, ipst); 4539 goto icmp_err_ret; 4540 } 4541 4542 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4543 4544 /* 4545 * Verify that the returned IRE does not have either the 4546 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4547 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4548 */ 4549 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4550 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4551 goto icmp_err_ret; 4552 4553 /* 4554 * Increment the ire_ob_pkt_count field for ire if it is an 4555 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4556 * increment the same for the parent IRE, sire, if it is some 4557 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4558 */ 4559 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4560 UPDATE_OB_PKT_COUNT(ire); 4561 ire->ire_last_used_time = lbolt; 4562 } 4563 4564 if (sire != NULL) { 4565 mutex_enter(&sire->ire_lock); 4566 v6gw = sire->ire_gateway_addr_v6; 4567 mutex_exit(&sire->ire_lock); 4568 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4569 IRE_INTERFACE)) == 0); 4570 UPDATE_OB_PKT_COUNT(sire); 4571 sire->ire_last_used_time = lbolt; 4572 } else { 4573 v6gw = ipv6_all_zeros; 4574 } 4575 4576 /* 4577 * We have a route to reach the destination. 4578 * 4579 * 1) If the interface is part of ill group, try to get a new 4580 * ill taking load spreading into account. 4581 * 4582 * 2) After selecting the ill, get a source address that might 4583 * create good inbound load spreading and that matches the 4584 * right scope. ipif_select_source_v6 does this for us. 4585 * 4586 * If the application specified the ill (ifindex), we still 4587 * load spread. Only if the packets needs to go out specifically 4588 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4589 * IPV6_BOUND_PIF we don't try to use a different ill for load 4590 * spreading. 4591 */ 4592 if (!do_attach_ill) { 4593 /* 4594 * If the interface belongs to an interface group, 4595 * make sure the next possible interface in the group 4596 * is used. This encourages load spreading among 4597 * peers in an interface group. However, in the case 4598 * of multirouting, load spreading is not used, as we 4599 * actually want to replicate outgoing packets through 4600 * particular interfaces. 4601 * 4602 * Note: While we pick a dst_ill we are really only 4603 * interested in the ill for load spreading. 4604 * The source ipif is determined by source address 4605 * selection below. 4606 */ 4607 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4608 dst_ill = ire->ire_ipif->ipif_ill; 4609 /* For uniformity do a refhold */ 4610 ill_refhold(dst_ill); 4611 } else { 4612 /* 4613 * If we are here trying to create an IRE_CACHE 4614 * for an offlink destination and have the 4615 * IRE_CACHE for the next hop and the latter is 4616 * using virtual IP source address selection i.e 4617 * it's ire->ire_ipif is pointing to a virtual 4618 * network interface (vni) then 4619 * ip_newroute_get_dst_ll() will return the vni 4620 * interface as the dst_ill. Since the vni is 4621 * virtual i.e not associated with any physical 4622 * interface, it cannot be the dst_ill, hence 4623 * in such a case call ip_newroute_get_dst_ll() 4624 * with the stq_ill instead of the ire_ipif ILL. 4625 * The function returns a refheld ill. 4626 */ 4627 if ((ire->ire_type == IRE_CACHE) && 4628 IS_VNI(ire->ire_ipif->ipif_ill)) 4629 dst_ill = ip_newroute_get_dst_ill_v6( 4630 ire->ire_stq->q_ptr); 4631 else 4632 dst_ill = ip_newroute_get_dst_ill_v6( 4633 ire->ire_ipif->ipif_ill); 4634 } 4635 if (dst_ill == NULL) { 4636 if (ip_debug > 2) { 4637 pr_addr_dbg("ip_newroute_v6 : no dst " 4638 "ill for dst %s\n", 4639 AF_INET6, v6dstp); 4640 } 4641 goto icmp_err_ret; 4642 } else if (dst_ill->ill_group == NULL && ill != NULL && 4643 dst_ill != ill) { 4644 /* 4645 * If "ill" is not part of any group, we should 4646 * have found a route matching "ill" as we 4647 * called ire_ftable_lookup_v6 with 4648 * MATCH_IRE_ILL_GROUP. 4649 * Rather than asserting when there is a 4650 * mismatch, we just drop the packet. 4651 */ 4652 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4653 "dst_ill %s ill %s\n", 4654 dst_ill->ill_name, 4655 ill->ill_name)); 4656 goto icmp_err_ret; 4657 } 4658 } else { 4659 dst_ill = ire->ire_ipif->ipif_ill; 4660 /* For uniformity do refhold */ 4661 ill_refhold(dst_ill); 4662 /* 4663 * We should have found a route matching ill as we 4664 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4665 * Rather than asserting, while there is a mismatch, 4666 * we just drop the packet. 4667 */ 4668 if (dst_ill != ill) { 4669 ip0dbg(("ip_newroute_v6: Packet dropped as " 4670 "IP6I_ATTACH_IF ill is %s, " 4671 "ire->ire_ipif->ipif_ill is %s\n", 4672 ill->ill_name, 4673 dst_ill->ill_name)); 4674 goto icmp_err_ret; 4675 } 4676 } 4677 /* 4678 * Pick a source address which matches the scope of the 4679 * destination address. 4680 * For RTF_SETSRC routes, the source address is imposed by the 4681 * parent ire (sire). 4682 */ 4683 ASSERT(src_ipif == NULL); 4684 if (ire->ire_type == IRE_IF_RESOLVER && 4685 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4686 ip6_asp_can_lookup(ipst)) { 4687 /* 4688 * The ire cache entry we're adding is for the 4689 * gateway itself. The source address in this case 4690 * is relative to the gateway's address. 4691 */ 4692 ip6_asp_table_held = B_TRUE; 4693 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4694 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4695 if (src_ipif != NULL) 4696 ire_marks |= IRE_MARK_USESRC_CHECK; 4697 } else { 4698 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4699 /* 4700 * Check that the ipif matching the requested 4701 * source address still exists. 4702 */ 4703 src_ipif = ipif_lookup_addr_v6( 4704 &sire->ire_src_addr_v6, NULL, zoneid, 4705 NULL, NULL, NULL, NULL, ipst); 4706 } 4707 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4708 uint_t restrict_ill = RESTRICT_TO_NONE; 4709 4710 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4711 & IP6I_ATTACH_IF) 4712 restrict_ill = RESTRICT_TO_ILL; 4713 ip6_asp_table_held = B_TRUE; 4714 src_ipif = ipif_select_source_v6(dst_ill, 4715 v6dstp, restrict_ill, 4716 IPV6_PREFER_SRC_DEFAULT, zoneid); 4717 if (src_ipif != NULL) 4718 ire_marks |= IRE_MARK_USESRC_CHECK; 4719 } 4720 } 4721 4722 if (src_ipif == NULL) { 4723 if (ip_debug > 2) { 4724 /* ip1dbg */ 4725 pr_addr_dbg("ip_newroute_v6: no src for " 4726 "dst %s\n, ", AF_INET6, v6dstp); 4727 printf("ip_newroute_v6: interface name %s\n", 4728 dst_ill->ill_name); 4729 } 4730 goto icmp_err_ret; 4731 } 4732 4733 if (ip_debug > 3) { 4734 /* ip2dbg */ 4735 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4736 AF_INET6, &v6gw); 4737 } 4738 ip2dbg(("\tire type %s (%d)\n", 4739 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4740 4741 /* 4742 * At this point in ip_newroute_v6(), ire is either the 4743 * IRE_CACHE of the next-hop gateway for an off-subnet 4744 * destination or an IRE_INTERFACE type that should be used 4745 * to resolve an on-subnet destination or an on-subnet 4746 * next-hop gateway. 4747 * 4748 * In the IRE_CACHE case, we have the following : 4749 * 4750 * 1) src_ipif - used for getting a source address. 4751 * 4752 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4753 * means packets using this IRE_CACHE will go out on dst_ill. 4754 * 4755 * 3) The IRE sire will point to the prefix that is the longest 4756 * matching route for the destination. These prefix types 4757 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4758 * 4759 * The newly created IRE_CACHE entry for the off-subnet 4760 * destination is tied to both the prefix route and the 4761 * interface route used to resolve the next-hop gateway 4762 * via the ire_phandle and ire_ihandle fields, respectively. 4763 * 4764 * In the IRE_INTERFACE case, we have the following : 4765 * 4766 * 1) src_ipif - used for getting a source address. 4767 * 4768 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4769 * means packets using the IRE_CACHE that we will build 4770 * here will go out on dst_ill. 4771 * 4772 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4773 * to be created will only be tied to the IRE_INTERFACE that 4774 * was derived from the ire_ihandle field. 4775 * 4776 * If sire is non-NULL, it means the destination is off-link 4777 * and we will first create the IRE_CACHE for the gateway. 4778 * Next time through ip_newroute_v6, we will create the 4779 * IRE_CACHE for the final destination as described above. 4780 */ 4781 save_ire = ire; 4782 switch (ire->ire_type) { 4783 case IRE_CACHE: { 4784 ire_t *ipif_ire; 4785 4786 ASSERT(sire != NULL); 4787 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4788 mutex_enter(&ire->ire_lock); 4789 v6gw = ire->ire_gateway_addr_v6; 4790 mutex_exit(&ire->ire_lock); 4791 } 4792 /* 4793 * We need 3 ire's to create a new cache ire for an 4794 * off-link destination from the cache ire of the 4795 * gateway. 4796 * 4797 * 1. The prefix ire 'sire' 4798 * 2. The cache ire of the gateway 'ire' 4799 * 3. The interface ire 'ipif_ire' 4800 * 4801 * We have (1) and (2). We lookup (3) below. 4802 * 4803 * If there is no interface route to the gateway, 4804 * it is a race condition, where we found the cache 4805 * but the inteface route has been deleted. 4806 */ 4807 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4808 if (ipif_ire == NULL) { 4809 ip1dbg(("ip_newroute_v6:" 4810 "ire_ihandle_lookup_offlink_v6 failed\n")); 4811 goto icmp_err_ret; 4812 } 4813 /* 4814 * Assume DL_UNITDATA_REQ is same for all physical 4815 * interfaces in the ifgrp. If it isn't, this code will 4816 * have to be seriously rewhacked to allow the 4817 * fastpath probing (such that I cache the link 4818 * header in the IRE_CACHE) to work over ifgrps. 4819 * We have what we need to build an IRE_CACHE. 4820 */ 4821 /* 4822 * Note: the new ire inherits RTF_SETSRC 4823 * and RTF_MULTIRT to propagate these flags from prefix 4824 * to cache. 4825 */ 4826 4827 /* 4828 * Check cached gateway IRE for any security 4829 * attributes; if found, associate the gateway 4830 * credentials group to the destination IRE. 4831 */ 4832 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4833 mutex_enter(&attrp->igsa_lock); 4834 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4835 GCGRP_REFHOLD(gcgrp); 4836 mutex_exit(&attrp->igsa_lock); 4837 } 4838 4839 ire = ire_create_v6( 4840 v6dstp, /* dest address */ 4841 &ipv6_all_ones, /* mask */ 4842 &src_ipif->ipif_v6src_addr, /* source address */ 4843 &v6gw, /* gateway address */ 4844 &save_ire->ire_max_frag, 4845 NULL, /* src nce */ 4846 dst_ill->ill_rq, /* recv-from queue */ 4847 dst_ill->ill_wq, /* send-to queue */ 4848 IRE_CACHE, 4849 src_ipif, 4850 &sire->ire_mask_v6, /* Parent mask */ 4851 sire->ire_phandle, /* Parent handle */ 4852 ipif_ire->ire_ihandle, /* Interface handle */ 4853 sire->ire_flags & /* flags if any */ 4854 (RTF_SETSRC | RTF_MULTIRT), 4855 &(sire->ire_uinfo), 4856 NULL, 4857 gcgrp, 4858 ipst); 4859 4860 if (ire == NULL) { 4861 if (gcgrp != NULL) { 4862 GCGRP_REFRELE(gcgrp); 4863 gcgrp = NULL; 4864 } 4865 ire_refrele(save_ire); 4866 ire_refrele(ipif_ire); 4867 break; 4868 } 4869 4870 /* reference now held by IRE */ 4871 gcgrp = NULL; 4872 4873 ire->ire_marks |= ire_marks; 4874 4875 /* 4876 * Prevent sire and ipif_ire from getting deleted. The 4877 * newly created ire is tied to both of them via the 4878 * phandle and ihandle respectively. 4879 */ 4880 IRB_REFHOLD(sire->ire_bucket); 4881 /* Has it been removed already ? */ 4882 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4883 IRB_REFRELE(sire->ire_bucket); 4884 ire_refrele(ipif_ire); 4885 ire_refrele(save_ire); 4886 break; 4887 } 4888 4889 IRB_REFHOLD(ipif_ire->ire_bucket); 4890 /* Has it been removed already ? */ 4891 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4892 IRB_REFRELE(ipif_ire->ire_bucket); 4893 IRB_REFRELE(sire->ire_bucket); 4894 ire_refrele(ipif_ire); 4895 ire_refrele(save_ire); 4896 break; 4897 } 4898 4899 xmit_mp = first_mp; 4900 if (ire->ire_flags & RTF_MULTIRT) { 4901 copy_mp = copymsg(first_mp); 4902 if (copy_mp != NULL) { 4903 xmit_mp = copy_mp; 4904 MULTIRT_DEBUG_TAG(first_mp); 4905 } 4906 } 4907 ire_add_then_send(q, ire, xmit_mp); 4908 if (ip6_asp_table_held) { 4909 ip6_asp_table_refrele(ipst); 4910 ip6_asp_table_held = B_FALSE; 4911 } 4912 ire_refrele(save_ire); 4913 4914 /* Assert that sire is not deleted yet. */ 4915 ASSERT(sire->ire_ptpn != NULL); 4916 IRB_REFRELE(sire->ire_bucket); 4917 4918 /* Assert that ipif_ire is not deleted yet. */ 4919 ASSERT(ipif_ire->ire_ptpn != NULL); 4920 IRB_REFRELE(ipif_ire->ire_bucket); 4921 ire_refrele(ipif_ire); 4922 4923 if (copy_mp != NULL) { 4924 /* 4925 * Search for the next unresolved 4926 * multirt route. 4927 */ 4928 copy_mp = NULL; 4929 ipif_ire = NULL; 4930 ire = NULL; 4931 /* re-enter the loop */ 4932 multirt_resolve_next = B_TRUE; 4933 continue; 4934 } 4935 ire_refrele(sire); 4936 ill_refrele(dst_ill); 4937 ipif_refrele(src_ipif); 4938 return; 4939 } 4940 case IRE_IF_NORESOLVER: 4941 /* 4942 * We have what we need to build an IRE_CACHE. 4943 * 4944 * handle the Gated case, where we create 4945 * a NORESOLVER route for loopback. 4946 */ 4947 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4948 break; 4949 /* 4950 * TSol note: We are creating the ire cache for the 4951 * destination 'dst'. If 'dst' is offlink, going 4952 * through the first hop 'gw', the security attributes 4953 * of 'dst' must be set to point to the gateway 4954 * credentials of gateway 'gw'. If 'dst' is onlink, it 4955 * is possible that 'dst' is a potential gateway that is 4956 * referenced by some route that has some security 4957 * attributes. Thus in the former case, we need to do a 4958 * gcgrp_lookup of 'gw' while in the latter case we 4959 * need to do gcgrp_lookup of 'dst' itself. 4960 */ 4961 ga.ga_af = AF_INET6; 4962 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4963 ga.ga_addr = v6gw; 4964 else 4965 ga.ga_addr = *v6dstp; 4966 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4967 4968 /* 4969 * Note: the new ire inherits sire flags RTF_SETSRC 4970 * and RTF_MULTIRT to propagate those rules from prefix 4971 * to cache. 4972 */ 4973 ire = ire_create_v6( 4974 v6dstp, /* dest address */ 4975 &ipv6_all_ones, /* mask */ 4976 &src_ipif->ipif_v6src_addr, /* source address */ 4977 &v6gw, /* gateway address */ 4978 &save_ire->ire_max_frag, 4979 NULL, /* no src nce */ 4980 dst_ill->ill_rq, /* recv-from queue */ 4981 dst_ill->ill_wq, /* send-to queue */ 4982 IRE_CACHE, 4983 src_ipif, 4984 &save_ire->ire_mask_v6, /* Parent mask */ 4985 (sire != NULL) ? /* Parent handle */ 4986 sire->ire_phandle : 0, 4987 save_ire->ire_ihandle, /* Interface handle */ 4988 (sire != NULL) ? /* flags if any */ 4989 sire->ire_flags & 4990 (RTF_SETSRC | RTF_MULTIRT) : 0, 4991 &(save_ire->ire_uinfo), 4992 NULL, 4993 gcgrp, 4994 ipst); 4995 4996 if (ire == NULL) { 4997 if (gcgrp != NULL) { 4998 GCGRP_REFRELE(gcgrp); 4999 gcgrp = NULL; 5000 } 5001 ire_refrele(save_ire); 5002 break; 5003 } 5004 5005 /* reference now held by IRE */ 5006 gcgrp = NULL; 5007 5008 ire->ire_marks |= ire_marks; 5009 5010 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5011 dst = v6gw; 5012 else 5013 dst = *v6dstp; 5014 err = ndp_noresolver(dst_ill, &dst); 5015 if (err != 0) { 5016 ire_refrele(save_ire); 5017 break; 5018 } 5019 5020 /* Prevent save_ire from getting deleted */ 5021 IRB_REFHOLD(save_ire->ire_bucket); 5022 /* Has it been removed already ? */ 5023 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5024 IRB_REFRELE(save_ire->ire_bucket); 5025 ire_refrele(save_ire); 5026 break; 5027 } 5028 5029 xmit_mp = first_mp; 5030 /* 5031 * In case of MULTIRT, a copy of the current packet 5032 * to send is made to further re-enter the 5033 * loop and attempt another route resolution 5034 */ 5035 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5036 copy_mp = copymsg(first_mp); 5037 if (copy_mp != NULL) { 5038 xmit_mp = copy_mp; 5039 MULTIRT_DEBUG_TAG(first_mp); 5040 } 5041 } 5042 ire_add_then_send(q, ire, xmit_mp); 5043 if (ip6_asp_table_held) { 5044 ip6_asp_table_refrele(ipst); 5045 ip6_asp_table_held = B_FALSE; 5046 } 5047 5048 /* Assert that it is not deleted yet. */ 5049 ASSERT(save_ire->ire_ptpn != NULL); 5050 IRB_REFRELE(save_ire->ire_bucket); 5051 ire_refrele(save_ire); 5052 5053 if (copy_mp != NULL) { 5054 /* 5055 * If we found a (no)resolver, we ignore any 5056 * trailing top priority IRE_CACHE in 5057 * further loops. This ensures that we do not 5058 * omit any (no)resolver despite the priority 5059 * in this call. 5060 * IRE_CACHE, if any, will be processed 5061 * by another thread entering ip_newroute(), 5062 * (on resolver response, for example). 5063 * We use this to force multiple parallel 5064 * resolution as soon as a packet needs to be 5065 * sent. The result is, after one packet 5066 * emission all reachable routes are generally 5067 * resolved. 5068 * Otherwise, complete resolution of MULTIRT 5069 * routes would require several emissions as 5070 * side effect. 5071 */ 5072 multirt_flags &= ~MULTIRT_CACHEGW; 5073 5074 /* 5075 * Search for the next unresolved multirt 5076 * route. 5077 */ 5078 copy_mp = NULL; 5079 save_ire = NULL; 5080 ire = NULL; 5081 /* re-enter the loop */ 5082 multirt_resolve_next = B_TRUE; 5083 continue; 5084 } 5085 5086 /* Don't need sire anymore */ 5087 if (sire != NULL) 5088 ire_refrele(sire); 5089 ill_refrele(dst_ill); 5090 ipif_refrele(src_ipif); 5091 return; 5092 5093 case IRE_IF_RESOLVER: 5094 /* 5095 * We can't build an IRE_CACHE yet, but at least we 5096 * found a resolver that can help. 5097 */ 5098 dst = *v6dstp; 5099 5100 /* 5101 * To be at this point in the code with a non-zero gw 5102 * means that dst is reachable through a gateway that 5103 * we have never resolved. By changing dst to the gw 5104 * addr we resolve the gateway first. When 5105 * ire_add_then_send() tries to put the IP dg to dst, 5106 * it will reenter ip_newroute() at which time we will 5107 * find the IRE_CACHE for the gw and create another 5108 * IRE_CACHE above (for dst itself). 5109 */ 5110 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5111 save_dst = dst; 5112 dst = v6gw; 5113 v6gw = ipv6_all_zeros; 5114 } 5115 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5116 /* 5117 * Ask the external resolver to do its thing. 5118 * Make an mblk chain in the following form: 5119 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5120 */ 5121 mblk_t *ire_mp; 5122 mblk_t *areq_mp; 5123 areq_t *areq; 5124 in6_addr_t *addrp; 5125 5126 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5127 if (ip6_asp_table_held) { 5128 ip6_asp_table_refrele(ipst); 5129 ip6_asp_table_held = B_FALSE; 5130 } 5131 ire = ire_create_mp_v6( 5132 &dst, /* dest address */ 5133 &ipv6_all_ones, /* mask */ 5134 &src_ipif->ipif_v6src_addr, 5135 /* source address */ 5136 &v6gw, /* gateway address */ 5137 NULL, /* no src nce */ 5138 dst_ill->ill_rq, /* recv-from queue */ 5139 dst_ill->ill_wq, /* send-to queue */ 5140 IRE_CACHE, 5141 src_ipif, 5142 &save_ire->ire_mask_v6, /* Parent mask */ 5143 0, 5144 save_ire->ire_ihandle, 5145 /* Interface handle */ 5146 0, /* flags if any */ 5147 &(save_ire->ire_uinfo), 5148 NULL, 5149 NULL, 5150 ipst); 5151 5152 ire_refrele(save_ire); 5153 if (ire == NULL) { 5154 ip1dbg(("ip_newroute_v6:" 5155 "ire is NULL\n")); 5156 break; 5157 } 5158 5159 if ((sire != NULL) && 5160 (sire->ire_flags & RTF_MULTIRT)) { 5161 /* 5162 * processing a copy of the packet to 5163 * send for further resolution loops 5164 */ 5165 copy_mp = copymsg(first_mp); 5166 if (copy_mp != NULL) 5167 MULTIRT_DEBUG_TAG(copy_mp); 5168 } 5169 ire->ire_marks |= ire_marks; 5170 ire_mp = ire->ire_mp; 5171 /* 5172 * Now create or find an nce for this interface. 5173 * The hw addr will need to to be set from 5174 * the reply to the AR_ENTRY_QUERY that 5175 * we're about to send. This will be done in 5176 * ire_add_v6(). 5177 */ 5178 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5179 switch (err) { 5180 case 0: 5181 /* 5182 * New cache entry created. 5183 * Break, then ask the external 5184 * resolver. 5185 */ 5186 break; 5187 case EINPROGRESS: 5188 /* 5189 * Resolution in progress; 5190 * packet has been queued by 5191 * ndp_resolver(). 5192 */ 5193 ire_delete(ire); 5194 ire = NULL; 5195 /* 5196 * Check if another multirt 5197 * route must be resolved. 5198 */ 5199 if (copy_mp != NULL) { 5200 /* 5201 * If we found a resolver, we 5202 * ignore any trailing top 5203 * priority IRE_CACHE in 5204 * further loops. The reason is 5205 * the same as for noresolver. 5206 */ 5207 multirt_flags &= 5208 ~MULTIRT_CACHEGW; 5209 /* 5210 * Search for the next 5211 * unresolved multirt route. 5212 */ 5213 first_mp = copy_mp; 5214 copy_mp = NULL; 5215 mp = first_mp; 5216 if (mp->b_datap->db_type == 5217 M_CTL) { 5218 mp = mp->b_cont; 5219 } 5220 ASSERT(sire != NULL); 5221 dst = save_dst; 5222 /* 5223 * re-enter the loop 5224 */ 5225 multirt_resolve_next = 5226 B_TRUE; 5227 continue; 5228 } 5229 5230 if (sire != NULL) 5231 ire_refrele(sire); 5232 ill_refrele(dst_ill); 5233 ipif_refrele(src_ipif); 5234 return; 5235 default: 5236 /* 5237 * Transient error; packet will be 5238 * freed. 5239 */ 5240 ire_delete(ire); 5241 ire = NULL; 5242 break; 5243 } 5244 if (err != 0) 5245 break; 5246 /* 5247 * Now set up the AR_ENTRY_QUERY and send it. 5248 */ 5249 areq_mp = ill_arp_alloc(dst_ill, 5250 (uchar_t *)&ipv6_areq_template, 5251 (caddr_t)&dst); 5252 if (areq_mp == NULL) { 5253 ip1dbg(("ip_newroute_v6:" 5254 "areq_mp is NULL\n")); 5255 freemsg(ire_mp); 5256 break; 5257 } 5258 areq = (areq_t *)areq_mp->b_rptr; 5259 addrp = (in6_addr_t *)((char *)areq + 5260 areq->areq_target_addr_offset); 5261 *addrp = dst; 5262 addrp = (in6_addr_t *)((char *)areq + 5263 areq->areq_sender_addr_offset); 5264 *addrp = src_ipif->ipif_v6src_addr; 5265 /* 5266 * link the chain, then send up to the resolver. 5267 */ 5268 linkb(areq_mp, ire_mp); 5269 linkb(areq_mp, mp); 5270 ip1dbg(("ip_newroute_v6:" 5271 "putnext to resolver\n")); 5272 putnext(dst_ill->ill_rq, areq_mp); 5273 /* 5274 * Check if another multirt route 5275 * must be resolved. 5276 */ 5277 ire = NULL; 5278 if (copy_mp != NULL) { 5279 /* 5280 * If we find a resolver, we ignore any 5281 * trailing top priority IRE_CACHE in 5282 * further loops. The reason is the 5283 * same as for noresolver. 5284 */ 5285 multirt_flags &= ~MULTIRT_CACHEGW; 5286 /* 5287 * Search for the next unresolved 5288 * multirt route. 5289 */ 5290 first_mp = copy_mp; 5291 copy_mp = NULL; 5292 mp = first_mp; 5293 if (mp->b_datap->db_type == M_CTL) { 5294 mp = mp->b_cont; 5295 } 5296 ASSERT(sire != NULL); 5297 dst = save_dst; 5298 /* 5299 * re-enter the loop 5300 */ 5301 multirt_resolve_next = B_TRUE; 5302 continue; 5303 } 5304 5305 if (sire != NULL) 5306 ire_refrele(sire); 5307 ill_refrele(dst_ill); 5308 ipif_refrele(src_ipif); 5309 return; 5310 } 5311 /* 5312 * Non-external resolver case. 5313 * 5314 * TSol note: Please see the note above the 5315 * IRE_IF_NORESOLVER case. 5316 */ 5317 ga.ga_af = AF_INET6; 5318 ga.ga_addr = dst; 5319 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5320 5321 ire = ire_create_v6( 5322 &dst, /* dest address */ 5323 &ipv6_all_ones, /* mask */ 5324 &src_ipif->ipif_v6src_addr, /* source address */ 5325 &v6gw, /* gateway address */ 5326 &save_ire->ire_max_frag, 5327 NULL, /* no src nce */ 5328 dst_ill->ill_rq, /* recv-from queue */ 5329 dst_ill->ill_wq, /* send-to queue */ 5330 IRE_CACHE, 5331 src_ipif, 5332 &save_ire->ire_mask_v6, /* Parent mask */ 5333 0, 5334 save_ire->ire_ihandle, /* Interface handle */ 5335 0, /* flags if any */ 5336 &(save_ire->ire_uinfo), 5337 NULL, 5338 gcgrp, 5339 ipst); 5340 5341 if (ire == NULL) { 5342 if (gcgrp != NULL) { 5343 GCGRP_REFRELE(gcgrp); 5344 gcgrp = NULL; 5345 } 5346 ire_refrele(save_ire); 5347 break; 5348 } 5349 5350 /* reference now held by IRE */ 5351 gcgrp = NULL; 5352 5353 if ((sire != NULL) && 5354 (sire->ire_flags & RTF_MULTIRT)) { 5355 copy_mp = copymsg(first_mp); 5356 if (copy_mp != NULL) 5357 MULTIRT_DEBUG_TAG(copy_mp); 5358 } 5359 5360 ire->ire_marks |= ire_marks; 5361 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5362 switch (err) { 5363 case 0: 5364 /* Prevent save_ire from getting deleted */ 5365 IRB_REFHOLD(save_ire->ire_bucket); 5366 /* Has it been removed already ? */ 5367 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5368 IRB_REFRELE(save_ire->ire_bucket); 5369 ire_refrele(save_ire); 5370 break; 5371 } 5372 5373 /* 5374 * We have a resolved cache entry, 5375 * add in the IRE. 5376 */ 5377 ire_add_then_send(q, ire, first_mp); 5378 if (ip6_asp_table_held) { 5379 ip6_asp_table_refrele(ipst); 5380 ip6_asp_table_held = B_FALSE; 5381 } 5382 5383 /* Assert that it is not deleted yet. */ 5384 ASSERT(save_ire->ire_ptpn != NULL); 5385 IRB_REFRELE(save_ire->ire_bucket); 5386 ire_refrele(save_ire); 5387 /* 5388 * Check if another multirt route 5389 * must be resolved. 5390 */ 5391 ire = NULL; 5392 if (copy_mp != NULL) { 5393 /* 5394 * If we find a resolver, we ignore any 5395 * trailing top priority IRE_CACHE in 5396 * further loops. The reason is the 5397 * same as for noresolver. 5398 */ 5399 multirt_flags &= ~MULTIRT_CACHEGW; 5400 /* 5401 * Search for the next unresolved 5402 * multirt route. 5403 */ 5404 first_mp = copy_mp; 5405 copy_mp = NULL; 5406 mp = first_mp; 5407 if (mp->b_datap->db_type == M_CTL) { 5408 mp = mp->b_cont; 5409 } 5410 ASSERT(sire != NULL); 5411 dst = save_dst; 5412 /* 5413 * re-enter the loop 5414 */ 5415 multirt_resolve_next = B_TRUE; 5416 continue; 5417 } 5418 5419 if (sire != NULL) 5420 ire_refrele(sire); 5421 ill_refrele(dst_ill); 5422 ipif_refrele(src_ipif); 5423 return; 5424 5425 case EINPROGRESS: 5426 /* 5427 * mp was consumed - presumably queued. 5428 * No need for ire, presumably resolution is 5429 * in progress, and ire will be added when the 5430 * address is resolved. 5431 */ 5432 if (ip6_asp_table_held) { 5433 ip6_asp_table_refrele(ipst); 5434 ip6_asp_table_held = B_FALSE; 5435 } 5436 ASSERT(ire->ire_nce == NULL); 5437 ire_delete(ire); 5438 ire_refrele(save_ire); 5439 /* 5440 * Check if another multirt route 5441 * must be resolved. 5442 */ 5443 ire = NULL; 5444 if (copy_mp != NULL) { 5445 /* 5446 * If we find a resolver, we ignore any 5447 * trailing top priority IRE_CACHE in 5448 * further loops. The reason is the 5449 * same as for noresolver. 5450 */ 5451 multirt_flags &= ~MULTIRT_CACHEGW; 5452 /* 5453 * Search for the next unresolved 5454 * multirt route. 5455 */ 5456 first_mp = copy_mp; 5457 copy_mp = NULL; 5458 mp = first_mp; 5459 if (mp->b_datap->db_type == M_CTL) { 5460 mp = mp->b_cont; 5461 } 5462 ASSERT(sire != NULL); 5463 dst = save_dst; 5464 /* 5465 * re-enter the loop 5466 */ 5467 multirt_resolve_next = B_TRUE; 5468 continue; 5469 } 5470 if (sire != NULL) 5471 ire_refrele(sire); 5472 ill_refrele(dst_ill); 5473 ipif_refrele(src_ipif); 5474 return; 5475 default: 5476 /* Some transient error */ 5477 ASSERT(ire->ire_nce == NULL); 5478 ire_refrele(save_ire); 5479 break; 5480 } 5481 break; 5482 default: 5483 break; 5484 } 5485 if (ip6_asp_table_held) { 5486 ip6_asp_table_refrele(ipst); 5487 ip6_asp_table_held = B_FALSE; 5488 } 5489 } while (multirt_resolve_next); 5490 5491 err_ret: 5492 ip1dbg(("ip_newroute_v6: dropped\n")); 5493 if (src_ipif != NULL) 5494 ipif_refrele(src_ipif); 5495 if (dst_ill != NULL) { 5496 need_rele = B_TRUE; 5497 ill = dst_ill; 5498 } 5499 if (ill != NULL) { 5500 if (mp->b_prev != NULL) { 5501 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5502 } else { 5503 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5504 } 5505 5506 if (need_rele) 5507 ill_refrele(ill); 5508 } else { 5509 if (mp->b_prev != NULL) { 5510 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5511 } else { 5512 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5513 } 5514 } 5515 /* Did this packet originate externally? */ 5516 if (mp->b_prev) { 5517 mp->b_next = NULL; 5518 mp->b_prev = NULL; 5519 } 5520 if (copy_mp != NULL) { 5521 MULTIRT_DEBUG_UNTAG(copy_mp); 5522 freemsg(copy_mp); 5523 } 5524 MULTIRT_DEBUG_UNTAG(first_mp); 5525 freemsg(first_mp); 5526 if (ire != NULL) 5527 ire_refrele(ire); 5528 if (sire != NULL) 5529 ire_refrele(sire); 5530 return; 5531 5532 icmp_err_ret: 5533 if (ip6_asp_table_held) 5534 ip6_asp_table_refrele(ipst); 5535 if (src_ipif != NULL) 5536 ipif_refrele(src_ipif); 5537 if (dst_ill != NULL) { 5538 need_rele = B_TRUE; 5539 ill = dst_ill; 5540 } 5541 ip1dbg(("ip_newroute_v6: no route\n")); 5542 if (sire != NULL) 5543 ire_refrele(sire); 5544 /* 5545 * We need to set sire to NULL to avoid double freeing if we 5546 * ever goto err_ret from below. 5547 */ 5548 sire = NULL; 5549 ip6h = (ip6_t *)mp->b_rptr; 5550 /* Skip ip6i_t header if present */ 5551 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5552 /* Make sure the IPv6 header is present */ 5553 if ((mp->b_wptr - (uchar_t *)ip6h) < 5554 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5555 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5556 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5557 goto err_ret; 5558 } 5559 } 5560 mp->b_rptr += sizeof (ip6i_t); 5561 ip6h = (ip6_t *)mp->b_rptr; 5562 } 5563 /* Did this packet originate externally? */ 5564 if (mp->b_prev) { 5565 if (ill != NULL) { 5566 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5567 } else { 5568 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5569 } 5570 mp->b_next = NULL; 5571 mp->b_prev = NULL; 5572 q = WR(q); 5573 } else { 5574 if (ill != NULL) { 5575 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5576 } else { 5577 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5578 } 5579 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5580 /* Failed */ 5581 if (copy_mp != NULL) { 5582 MULTIRT_DEBUG_UNTAG(copy_mp); 5583 freemsg(copy_mp); 5584 } 5585 MULTIRT_DEBUG_UNTAG(first_mp); 5586 freemsg(first_mp); 5587 if (ire != NULL) 5588 ire_refrele(ire); 5589 if (need_rele) 5590 ill_refrele(ill); 5591 return; 5592 } 5593 } 5594 5595 if (need_rele) 5596 ill_refrele(ill); 5597 5598 /* 5599 * At this point we will have ire only if RTF_BLACKHOLE 5600 * or RTF_REJECT flags are set on the IRE. It will not 5601 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5602 */ 5603 if (ire != NULL) { 5604 if (ire->ire_flags & RTF_BLACKHOLE) { 5605 ire_refrele(ire); 5606 if (copy_mp != NULL) { 5607 MULTIRT_DEBUG_UNTAG(copy_mp); 5608 freemsg(copy_mp); 5609 } 5610 MULTIRT_DEBUG_UNTAG(first_mp); 5611 freemsg(first_mp); 5612 return; 5613 } 5614 ire_refrele(ire); 5615 } 5616 if (ip_debug > 3) { 5617 /* ip2dbg */ 5618 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5619 AF_INET6, v6dstp); 5620 } 5621 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5622 B_FALSE, B_FALSE, zoneid, ipst); 5623 } 5624 5625 /* 5626 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5627 * we need to send out a packet to a destination address for which we do not 5628 * have specific routing information. It is only used for multicast packets. 5629 * 5630 * If unspec_src we allow creating an IRE with source address zero. 5631 * ire_send_v6() will delete it after the packet is sent. 5632 */ 5633 void 5634 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5635 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5636 { 5637 ire_t *ire = NULL; 5638 ipif_t *src_ipif = NULL; 5639 int err = 0; 5640 ill_t *dst_ill = NULL; 5641 ire_t *save_ire; 5642 ushort_t ire_marks = 0; 5643 ipsec_out_t *io; 5644 ill_t *attach_ill = NULL; 5645 ill_t *ill; 5646 ip6_t *ip6h; 5647 mblk_t *first_mp; 5648 boolean_t ip6i_present; 5649 ire_t *fire = NULL; 5650 mblk_t *copy_mp = NULL; 5651 boolean_t multirt_resolve_next; 5652 in6_addr_t *v6dstp = &v6dst; 5653 boolean_t ipif_held = B_FALSE; 5654 boolean_t ill_held = B_FALSE; 5655 boolean_t ip6_asp_table_held = B_FALSE; 5656 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5657 5658 /* 5659 * This loop is run only once in most cases. 5660 * We loop to resolve further routes only when the destination 5661 * can be reached through multiple RTF_MULTIRT-flagged ires. 5662 */ 5663 do { 5664 multirt_resolve_next = B_FALSE; 5665 if (dst_ill != NULL) { 5666 ill_refrele(dst_ill); 5667 dst_ill = NULL; 5668 } 5669 5670 if (src_ipif != NULL) { 5671 ipif_refrele(src_ipif); 5672 src_ipif = NULL; 5673 } 5674 ASSERT(ipif != NULL); 5675 ill = ipif->ipif_ill; 5676 5677 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5678 if (ip_debug > 2) { 5679 /* ip1dbg */ 5680 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5681 AF_INET6, v6dstp); 5682 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5683 ill->ill_name, ipif->ipif_isv6); 5684 } 5685 5686 first_mp = mp; 5687 if (mp->b_datap->db_type == M_CTL) { 5688 mp = mp->b_cont; 5689 io = (ipsec_out_t *)first_mp->b_rptr; 5690 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5691 } else { 5692 io = NULL; 5693 } 5694 5695 /* 5696 * If the interface is a pt-pt interface we look for an 5697 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5698 * local_address and the pt-pt destination address. 5699 * Otherwise we just match the local address. 5700 */ 5701 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5702 goto err_ret; 5703 } 5704 /* 5705 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5706 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5707 * as it could be NULL. 5708 * 5709 * This information can appear either in an ip6i_t or an 5710 * IPSEC_OUT message. 5711 */ 5712 ip6h = (ip6_t *)mp->b_rptr; 5713 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5714 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5715 if (!ip6i_present || 5716 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5717 attach_ill = ip_grab_attach_ill(ill, first_mp, 5718 (ip6i_present ? 5719 ((ip6i_t *)ip6h)->ip6i_ifindex : 5720 io->ipsec_out_ill_index), B_TRUE, ipst); 5721 /* Failure case frees things for us. */ 5722 if (attach_ill == NULL) 5723 return; 5724 5725 /* 5726 * Check if we need an ire that will not be 5727 * looked up by anybody else i.e. HIDDEN. 5728 */ 5729 if (ill_is_probeonly(attach_ill)) 5730 ire_marks = IRE_MARK_HIDDEN; 5731 } 5732 } 5733 5734 /* 5735 * We check if an IRE_OFFSUBNET for the addr that goes through 5736 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5737 * RTF_MULTIRT flags must be honored. 5738 */ 5739 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5740 ip2dbg(("ip_newroute_ipif_v6: " 5741 "ipif_lookup_multi_ire_v6(" 5742 "ipif %p, dst %08x) = fire %p\n", 5743 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5744 (void *)fire)); 5745 5746 /* 5747 * If the application specified the ill (ifindex), we still 5748 * load spread. Only if the packets needs to go out specifically 5749 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5750 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5751 * multirouting, then we don't try to use a different ill for 5752 * load spreading. 5753 */ 5754 if (attach_ill == NULL) { 5755 /* 5756 * If the interface belongs to an interface group, 5757 * make sure the next possible interface in the group 5758 * is used. This encourages load spreading among peers 5759 * in an interface group. 5760 * 5761 * Note: While we pick a dst_ill we are really only 5762 * interested in the ill for load spreading. The source 5763 * ipif is determined by source address selection below. 5764 */ 5765 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5766 dst_ill = ipif->ipif_ill; 5767 /* For uniformity do a refhold */ 5768 ill_refhold(dst_ill); 5769 } else { 5770 /* refheld by ip_newroute_get_dst_ill_v6 */ 5771 dst_ill = 5772 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5773 } 5774 if (dst_ill == NULL) { 5775 if (ip_debug > 2) { 5776 pr_addr_dbg("ip_newroute_ipif_v6: " 5777 "no dst ill for dst %s\n", 5778 AF_INET6, v6dstp); 5779 } 5780 goto err_ret; 5781 } 5782 } else { 5783 dst_ill = ipif->ipif_ill; 5784 /* 5785 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5786 * and IPV6_BOUND_PIF case. 5787 */ 5788 ASSERT(dst_ill == attach_ill); 5789 /* attach_ill is already refheld */ 5790 } 5791 /* 5792 * Pick a source address which matches the scope of the 5793 * destination address. 5794 * For RTF_SETSRC routes, the source address is imposed by the 5795 * parent ire (fire). 5796 */ 5797 ASSERT(src_ipif == NULL); 5798 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5799 /* 5800 * Check that the ipif matching the requested source 5801 * address still exists. 5802 */ 5803 src_ipif = 5804 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5805 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5806 } 5807 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5808 ip6_asp_table_held = B_TRUE; 5809 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5810 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5811 } 5812 5813 if (src_ipif == NULL) { 5814 if (!unspec_src) { 5815 if (ip_debug > 2) { 5816 /* ip1dbg */ 5817 pr_addr_dbg("ip_newroute_ipif_v6: " 5818 "no src for dst %s\n,", 5819 AF_INET6, v6dstp); 5820 printf(" through interface %s\n", 5821 dst_ill->ill_name); 5822 } 5823 goto err_ret; 5824 } 5825 src_ipif = ipif; 5826 ipif_refhold(src_ipif); 5827 } 5828 ire = ipif_to_ire_v6(ipif); 5829 if (ire == NULL) { 5830 if (ip_debug > 2) { 5831 /* ip1dbg */ 5832 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5833 AF_INET6, &ipif->ipif_v6lcl_addr); 5834 printf("ip_newroute_ipif_v6: " 5835 "if %s\n", dst_ill->ill_name); 5836 } 5837 goto err_ret; 5838 } 5839 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5840 goto err_ret; 5841 5842 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5843 5844 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5845 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5846 if (ip_debug > 2) { 5847 /* ip1dbg */ 5848 pr_addr_dbg(" address %s\n", 5849 AF_INET6, &ire->ire_src_addr_v6); 5850 } 5851 save_ire = ire; 5852 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5853 (void *)ire, (void *)ipif)); 5854 5855 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5856 /* 5857 * an IRE_OFFSUBET was looked up 5858 * on that interface. 5859 * this ire has RTF_MULTIRT flag, 5860 * so the resolution loop 5861 * will be re-entered to resolve 5862 * additional routes on other 5863 * interfaces. For that purpose, 5864 * a copy of the packet is 5865 * made at this point. 5866 */ 5867 fire->ire_last_used_time = lbolt; 5868 copy_mp = copymsg(first_mp); 5869 if (copy_mp) { 5870 MULTIRT_DEBUG_TAG(copy_mp); 5871 } 5872 } 5873 5874 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5875 switch (ire->ire_type) { 5876 case IRE_IF_NORESOLVER: { 5877 /* 5878 * We have what we need to build an IRE_CACHE. 5879 * 5880 * handle the Gated case, where we create 5881 * a NORESOLVER route for loopback. 5882 */ 5883 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5884 break; 5885 /* 5886 * The newly created ire will inherit the flags of the 5887 * parent ire, if any. 5888 */ 5889 ire = ire_create_v6( 5890 v6dstp, /* dest address */ 5891 &ipv6_all_ones, /* mask */ 5892 &src_ipif->ipif_v6src_addr, /* source address */ 5893 NULL, /* gateway address */ 5894 &save_ire->ire_max_frag, 5895 NULL, /* no src nce */ 5896 dst_ill->ill_rq, /* recv-from queue */ 5897 dst_ill->ill_wq, /* send-to queue */ 5898 IRE_CACHE, 5899 src_ipif, 5900 NULL, 5901 (fire != NULL) ? /* Parent handle */ 5902 fire->ire_phandle : 0, 5903 save_ire->ire_ihandle, /* Interface handle */ 5904 (fire != NULL) ? 5905 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5906 0, 5907 &ire_uinfo_null, 5908 NULL, 5909 NULL, 5910 ipst); 5911 5912 if (ire == NULL) { 5913 ire_refrele(save_ire); 5914 break; 5915 } 5916 5917 ire->ire_marks |= ire_marks; 5918 5919 err = ndp_noresolver(dst_ill, v6dstp); 5920 if (err != 0) { 5921 ire_refrele(save_ire); 5922 break; 5923 } 5924 5925 /* Prevent save_ire from getting deleted */ 5926 IRB_REFHOLD(save_ire->ire_bucket); 5927 /* Has it been removed already ? */ 5928 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5929 IRB_REFRELE(save_ire->ire_bucket); 5930 ire_refrele(save_ire); 5931 break; 5932 } 5933 5934 ire_add_then_send(q, ire, first_mp); 5935 if (ip6_asp_table_held) { 5936 ip6_asp_table_refrele(ipst); 5937 ip6_asp_table_held = B_FALSE; 5938 } 5939 5940 /* Assert that it is not deleted yet. */ 5941 ASSERT(save_ire->ire_ptpn != NULL); 5942 IRB_REFRELE(save_ire->ire_bucket); 5943 ire_refrele(save_ire); 5944 if (fire != NULL) { 5945 ire_refrele(fire); 5946 fire = NULL; 5947 } 5948 5949 /* 5950 * The resolution loop is re-entered if we 5951 * actually are in a multirouting case. 5952 */ 5953 if (copy_mp != NULL) { 5954 boolean_t need_resolve = 5955 ire_multirt_need_resolve_v6(v6dstp, 5956 MBLK_GETLABEL(copy_mp), ipst); 5957 if (!need_resolve) { 5958 MULTIRT_DEBUG_UNTAG(copy_mp); 5959 freemsg(copy_mp); 5960 copy_mp = NULL; 5961 } else { 5962 /* 5963 * ipif_lookup_group_v6() calls 5964 * ire_lookup_multi_v6() that uses 5965 * ire_ftable_lookup_v6() to find 5966 * an IRE_INTERFACE for the group. 5967 * In the multirt case, 5968 * ire_lookup_multi_v6() then invokes 5969 * ire_multirt_lookup_v6() to find 5970 * the next resolvable ire. 5971 * As a result, we obtain a new 5972 * interface, derived from the 5973 * next ire. 5974 */ 5975 if (ipif_held) { 5976 ipif_refrele(ipif); 5977 ipif_held = B_FALSE; 5978 } 5979 ipif = ipif_lookup_group_v6(v6dstp, 5980 zoneid, ipst); 5981 ip2dbg(("ip_newroute_ipif: " 5982 "multirt dst %08x, ipif %p\n", 5983 ntohl(V4_PART_OF_V6((*v6dstp))), 5984 (void *)ipif)); 5985 if (ipif != NULL) { 5986 ipif_held = B_TRUE; 5987 mp = copy_mp; 5988 copy_mp = NULL; 5989 multirt_resolve_next = 5990 B_TRUE; 5991 continue; 5992 } else { 5993 freemsg(copy_mp); 5994 } 5995 } 5996 } 5997 ill_refrele(dst_ill); 5998 if (ipif_held) { 5999 ipif_refrele(ipif); 6000 ipif_held = B_FALSE; 6001 } 6002 if (src_ipif != NULL) 6003 ipif_refrele(src_ipif); 6004 return; 6005 } 6006 case IRE_IF_RESOLVER: { 6007 6008 ASSERT(dst_ill->ill_isv6); 6009 6010 /* 6011 * We obtain a partial IRE_CACHE which we will pass 6012 * along with the resolver query. When the response 6013 * comes back it will be there ready for us to add. 6014 */ 6015 /* 6016 * the newly created ire will inherit the flags of the 6017 * parent ire, if any. 6018 */ 6019 ire = ire_create_v6( 6020 v6dstp, /* dest address */ 6021 &ipv6_all_ones, /* mask */ 6022 &src_ipif->ipif_v6src_addr, /* source address */ 6023 NULL, /* gateway address */ 6024 &save_ire->ire_max_frag, 6025 NULL, /* src nce */ 6026 dst_ill->ill_rq, /* recv-from queue */ 6027 dst_ill->ill_wq, /* send-to queue */ 6028 IRE_CACHE, 6029 src_ipif, 6030 NULL, 6031 (fire != NULL) ? /* Parent handle */ 6032 fire->ire_phandle : 0, 6033 save_ire->ire_ihandle, /* Interface handle */ 6034 (fire != NULL) ? 6035 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6036 0, 6037 &ire_uinfo_null, 6038 NULL, 6039 NULL, 6040 ipst); 6041 6042 if (ire == NULL) { 6043 ire_refrele(save_ire); 6044 break; 6045 } 6046 6047 ire->ire_marks |= ire_marks; 6048 6049 /* Resolve and add ire to the ctable */ 6050 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6051 switch (err) { 6052 case 0: 6053 /* Prevent save_ire from getting deleted */ 6054 IRB_REFHOLD(save_ire->ire_bucket); 6055 /* Has it been removed already ? */ 6056 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6057 IRB_REFRELE(save_ire->ire_bucket); 6058 ire_refrele(save_ire); 6059 break; 6060 } 6061 /* 6062 * We have a resolved cache entry, 6063 * add in the IRE. 6064 */ 6065 ire_add_then_send(q, ire, first_mp); 6066 if (ip6_asp_table_held) { 6067 ip6_asp_table_refrele(ipst); 6068 ip6_asp_table_held = B_FALSE; 6069 } 6070 6071 /* Assert that it is not deleted yet. */ 6072 ASSERT(save_ire->ire_ptpn != NULL); 6073 IRB_REFRELE(save_ire->ire_bucket); 6074 ire_refrele(save_ire); 6075 if (fire != NULL) { 6076 ire_refrele(fire); 6077 fire = NULL; 6078 } 6079 6080 /* 6081 * The resolution loop is re-entered if we 6082 * actually are in a multirouting case. 6083 */ 6084 if (copy_mp != NULL) { 6085 boolean_t need_resolve = 6086 ire_multirt_need_resolve_v6(v6dstp, 6087 MBLK_GETLABEL(copy_mp), ipst); 6088 if (!need_resolve) { 6089 MULTIRT_DEBUG_UNTAG(copy_mp); 6090 freemsg(copy_mp); 6091 copy_mp = NULL; 6092 } else { 6093 /* 6094 * ipif_lookup_group_v6() calls 6095 * ire_lookup_multi_v6() that 6096 * uses ire_ftable_lookup_v6() 6097 * to find an IRE_INTERFACE for 6098 * the group. In the multirt 6099 * case, ire_lookup_multi_v6() 6100 * then invokes 6101 * ire_multirt_lookup_v6() to 6102 * find the next resolvable ire. 6103 * As a result, we obtain a new 6104 * interface, derived from the 6105 * next ire. 6106 */ 6107 if (ipif_held) { 6108 ipif_refrele(ipif); 6109 ipif_held = B_FALSE; 6110 } 6111 ipif = ipif_lookup_group_v6( 6112 v6dstp, zoneid, ipst); 6113 ip2dbg(("ip_newroute_ipif: " 6114 "multirt dst %08x, " 6115 "ipif %p\n", 6116 ntohl(V4_PART_OF_V6( 6117 (*v6dstp))), 6118 (void *)ipif)); 6119 if (ipif != NULL) { 6120 ipif_held = B_TRUE; 6121 mp = copy_mp; 6122 copy_mp = NULL; 6123 multirt_resolve_next = 6124 B_TRUE; 6125 continue; 6126 } else { 6127 freemsg(copy_mp); 6128 } 6129 } 6130 } 6131 ill_refrele(dst_ill); 6132 if (ipif_held) { 6133 ipif_refrele(ipif); 6134 ipif_held = B_FALSE; 6135 } 6136 if (src_ipif != NULL) 6137 ipif_refrele(src_ipif); 6138 return; 6139 6140 case EINPROGRESS: 6141 /* 6142 * mp was consumed - presumably queued. 6143 * No need for ire, presumably resolution is 6144 * in progress, and ire will be added when the 6145 * address is resolved. 6146 */ 6147 if (ip6_asp_table_held) { 6148 ip6_asp_table_refrele(ipst); 6149 ip6_asp_table_held = B_FALSE; 6150 } 6151 ire_delete(ire); 6152 ire_refrele(save_ire); 6153 if (fire != NULL) { 6154 ire_refrele(fire); 6155 fire = NULL; 6156 } 6157 6158 /* 6159 * The resolution loop is re-entered if we 6160 * actually are in a multirouting case. 6161 */ 6162 if (copy_mp != NULL) { 6163 boolean_t need_resolve = 6164 ire_multirt_need_resolve_v6(v6dstp, 6165 MBLK_GETLABEL(copy_mp), ipst); 6166 if (!need_resolve) { 6167 MULTIRT_DEBUG_UNTAG(copy_mp); 6168 freemsg(copy_mp); 6169 copy_mp = NULL; 6170 } else { 6171 /* 6172 * ipif_lookup_group_v6() calls 6173 * ire_lookup_multi_v6() that 6174 * uses ire_ftable_lookup_v6() 6175 * to find an IRE_INTERFACE for 6176 * the group. In the multirt 6177 * case, ire_lookup_multi_v6() 6178 * then invokes 6179 * ire_multirt_lookup_v6() to 6180 * find the next resolvable ire. 6181 * As a result, we obtain a new 6182 * interface, derived from the 6183 * next ire. 6184 */ 6185 if (ipif_held) { 6186 ipif_refrele(ipif); 6187 ipif_held = B_FALSE; 6188 } 6189 ipif = ipif_lookup_group_v6( 6190 v6dstp, zoneid, ipst); 6191 ip2dbg(("ip_newroute_ipif: " 6192 "multirt dst %08x, " 6193 "ipif %p\n", 6194 ntohl(V4_PART_OF_V6( 6195 (*v6dstp))), 6196 (void *)ipif)); 6197 if (ipif != NULL) { 6198 ipif_held = B_TRUE; 6199 mp = copy_mp; 6200 copy_mp = NULL; 6201 multirt_resolve_next = 6202 B_TRUE; 6203 continue; 6204 } else { 6205 freemsg(copy_mp); 6206 } 6207 } 6208 } 6209 ill_refrele(dst_ill); 6210 if (ipif_held) { 6211 ipif_refrele(ipif); 6212 ipif_held = B_FALSE; 6213 } 6214 if (src_ipif != NULL) 6215 ipif_refrele(src_ipif); 6216 return; 6217 default: 6218 /* Some transient error */ 6219 ire_refrele(save_ire); 6220 break; 6221 } 6222 break; 6223 } 6224 default: 6225 break; 6226 } 6227 if (ip6_asp_table_held) { 6228 ip6_asp_table_refrele(ipst); 6229 ip6_asp_table_held = B_FALSE; 6230 } 6231 } while (multirt_resolve_next); 6232 6233 err_ret: 6234 if (ip6_asp_table_held) 6235 ip6_asp_table_refrele(ipst); 6236 if (ire != NULL) 6237 ire_refrele(ire); 6238 if (fire != NULL) 6239 ire_refrele(fire); 6240 if (ipif != NULL && ipif_held) 6241 ipif_refrele(ipif); 6242 if (src_ipif != NULL) 6243 ipif_refrele(src_ipif); 6244 /* Multicast - no point in trying to generate ICMP error */ 6245 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6246 if (dst_ill != NULL) { 6247 ill = dst_ill; 6248 ill_held = B_TRUE; 6249 } 6250 if (mp->b_prev || mp->b_next) { 6251 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6252 } else { 6253 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6254 } 6255 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6256 mp->b_next = NULL; 6257 mp->b_prev = NULL; 6258 freemsg(first_mp); 6259 if (ill_held) 6260 ill_refrele(ill); 6261 } 6262 6263 /* 6264 * Parse and process any hop-by-hop or destination options. 6265 * 6266 * Assumes that q is an ill read queue so that ICMP errors for link-local 6267 * destinations are sent out the correct interface. 6268 * 6269 * Returns -1 if there was an error and mp has been consumed. 6270 * Returns 0 if no special action is needed. 6271 * Returns 1 if the packet contained a router alert option for this node 6272 * which is verified to be "interesting/known" for our implementation. 6273 * 6274 * XXX Note: In future as more hbh or dest options are defined, 6275 * it may be better to have different routines for hbh and dest 6276 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6277 * may have same value in different namespaces. Or is it same namespace ?? 6278 * Current code checks for each opt_type (other than pads) if it is in 6279 * the expected nexthdr (hbh or dest) 6280 */ 6281 static int 6282 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6283 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6284 { 6285 uint8_t opt_type; 6286 uint_t optused; 6287 int ret = 0; 6288 mblk_t *first_mp; 6289 const char *errtype; 6290 zoneid_t zoneid; 6291 ill_t *ill = q->q_ptr; 6292 6293 first_mp = mp; 6294 if (mp->b_datap->db_type == M_CTL) { 6295 mp = mp->b_cont; 6296 } 6297 6298 while (optlen != 0) { 6299 opt_type = *optptr; 6300 if (opt_type == IP6OPT_PAD1) { 6301 optused = 1; 6302 } else { 6303 if (optlen < 2) 6304 goto bad_opt; 6305 errtype = "malformed"; 6306 if (opt_type == ip6opt_ls) { 6307 optused = 2 + optptr[1]; 6308 if (optused > optlen) 6309 goto bad_opt; 6310 } else switch (opt_type) { 6311 case IP6OPT_PADN: 6312 /* 6313 * Note:We don't verify that (N-2) pad octets 6314 * are zero as required by spec. Adhere to 6315 * "be liberal in what you accept..." part of 6316 * implementation philosophy (RFC791,RFC1122) 6317 */ 6318 optused = 2 + optptr[1]; 6319 if (optused > optlen) 6320 goto bad_opt; 6321 break; 6322 6323 case IP6OPT_JUMBO: 6324 if (hdr_type != IPPROTO_HOPOPTS) 6325 goto opt_error; 6326 goto opt_error; /* XXX Not implemented! */ 6327 6328 case IP6OPT_ROUTER_ALERT: { 6329 struct ip6_opt_router *or; 6330 6331 if (hdr_type != IPPROTO_HOPOPTS) 6332 goto opt_error; 6333 optused = 2 + optptr[1]; 6334 if (optused > optlen) 6335 goto bad_opt; 6336 or = (struct ip6_opt_router *)optptr; 6337 /* Check total length and alignment */ 6338 if (optused != sizeof (*or) || 6339 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6340 goto opt_error; 6341 /* Check value */ 6342 switch (*((uint16_t *)or->ip6or_value)) { 6343 case IP6_ALERT_MLD: 6344 case IP6_ALERT_RSVP: 6345 ret = 1; 6346 } 6347 break; 6348 } 6349 case IP6OPT_HOME_ADDRESS: { 6350 /* 6351 * Minimal support for the home address option 6352 * (which is required by all IPv6 nodes). 6353 * Implement by just swapping the home address 6354 * and source address. 6355 * XXX Note: this has IPsec implications since 6356 * AH needs to take this into account. 6357 * Also, when IPsec is used we need to ensure 6358 * that this is only processed once 6359 * in the received packet (to avoid swapping 6360 * back and forth). 6361 * NOTE:This option processing is considered 6362 * to be unsafe and prone to a denial of 6363 * service attack. 6364 * The current processing is not safe even with 6365 * IPsec secured IP packets. Since the home 6366 * address option processing requirement still 6367 * is in the IETF draft and in the process of 6368 * being redefined for its usage, it has been 6369 * decided to turn off the option by default. 6370 * If this section of code needs to be executed, 6371 * ndd variable ip6_ignore_home_address_opt 6372 * should be set to 0 at the user's own risk. 6373 */ 6374 struct ip6_opt_home_address *oh; 6375 in6_addr_t tmp; 6376 6377 if (ipst->ips_ipv6_ignore_home_address_opt) 6378 goto opt_error; 6379 6380 if (hdr_type != IPPROTO_DSTOPTS) 6381 goto opt_error; 6382 optused = 2 + optptr[1]; 6383 if (optused > optlen) 6384 goto bad_opt; 6385 6386 /* 6387 * We did this dest. opt the first time 6388 * around (i.e. before AH processing). 6389 * If we've done AH... stop now. 6390 */ 6391 if (first_mp != mp) { 6392 ipsec_in_t *ii; 6393 6394 ii = (ipsec_in_t *)first_mp->b_rptr; 6395 if (ii->ipsec_in_ah_sa != NULL) 6396 break; 6397 } 6398 6399 oh = (struct ip6_opt_home_address *)optptr; 6400 /* Check total length and alignment */ 6401 if (optused < sizeof (*oh) || 6402 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6403 goto opt_error; 6404 /* Swap ip6_src and the home address */ 6405 tmp = ip6h->ip6_src; 6406 /* XXX Note: only 8 byte alignment option */ 6407 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6408 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6409 break; 6410 } 6411 6412 case IP6OPT_TUNNEL_LIMIT: 6413 if (hdr_type != IPPROTO_DSTOPTS) { 6414 goto opt_error; 6415 } 6416 optused = 2 + optptr[1]; 6417 if (optused > optlen) { 6418 goto bad_opt; 6419 } 6420 if (optused != 3) { 6421 goto opt_error; 6422 } 6423 break; 6424 6425 default: 6426 errtype = "unknown"; 6427 /* FALLTHROUGH */ 6428 opt_error: 6429 /* Determine which zone should send error */ 6430 zoneid = ipif_lookup_addr_zoneid_v6( 6431 &ip6h->ip6_dst, ill, ipst); 6432 switch (IP6OPT_TYPE(opt_type)) { 6433 case IP6OPT_TYPE_SKIP: 6434 optused = 2 + optptr[1]; 6435 if (optused > optlen) 6436 goto bad_opt; 6437 ip1dbg(("ip_process_options_v6: %s " 6438 "opt 0x%x skipped\n", 6439 errtype, opt_type)); 6440 break; 6441 case IP6OPT_TYPE_DISCARD: 6442 ip1dbg(("ip_process_options_v6: %s " 6443 "opt 0x%x; packet dropped\n", 6444 errtype, opt_type)); 6445 freemsg(first_mp); 6446 return (-1); 6447 case IP6OPT_TYPE_ICMP: 6448 if (zoneid == ALL_ZONES) { 6449 freemsg(first_mp); 6450 return (-1); 6451 } 6452 icmp_param_problem_v6(WR(q), first_mp, 6453 ICMP6_PARAMPROB_OPTION, 6454 (uint32_t)(optptr - 6455 (uint8_t *)ip6h), 6456 B_FALSE, B_FALSE, zoneid, ipst); 6457 return (-1); 6458 case IP6OPT_TYPE_FORCEICMP: 6459 if (zoneid == ALL_ZONES) { 6460 freemsg(first_mp); 6461 return (-1); 6462 } 6463 icmp_param_problem_v6(WR(q), first_mp, 6464 ICMP6_PARAMPROB_OPTION, 6465 (uint32_t)(optptr - 6466 (uint8_t *)ip6h), 6467 B_FALSE, B_TRUE, zoneid, ipst); 6468 return (-1); 6469 default: 6470 ASSERT(0); 6471 } 6472 } 6473 } 6474 optlen -= optused; 6475 optptr += optused; 6476 } 6477 return (ret); 6478 6479 bad_opt: 6480 /* Determine which zone should send error */ 6481 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6482 if (zoneid == ALL_ZONES) { 6483 freemsg(first_mp); 6484 } else { 6485 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6486 (uint32_t)(optptr - (uint8_t *)ip6h), 6487 B_FALSE, B_FALSE, zoneid, ipst); 6488 } 6489 return (-1); 6490 } 6491 6492 /* 6493 * Process a routing header that is not yet empty. 6494 * Only handles type 0 routing headers. 6495 */ 6496 static void 6497 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6498 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6499 { 6500 ip6_rthdr0_t *rthdr; 6501 uint_t ehdrlen; 6502 uint_t numaddr; 6503 in6_addr_t *addrptr; 6504 in6_addr_t tmp; 6505 ip_stack_t *ipst = ill->ill_ipst; 6506 6507 ASSERT(rth->ip6r_segleft != 0); 6508 6509 if (!ipst->ips_ipv6_forward_src_routed) { 6510 /* XXX Check for source routed out same interface? */ 6511 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6512 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6513 freemsg(hada_mp); 6514 freemsg(mp); 6515 return; 6516 } 6517 6518 if (rth->ip6r_type != 0) { 6519 if (hada_mp != NULL) 6520 goto hada_drop; 6521 /* Sent by forwarding path, and router is global zone */ 6522 icmp_param_problem_v6(WR(q), mp, 6523 ICMP6_PARAMPROB_HEADER, 6524 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6525 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6526 return; 6527 } 6528 rthdr = (ip6_rthdr0_t *)rth; 6529 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6530 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6531 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6532 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6533 if (rthdr->ip6r0_len & 0x1) { 6534 /* An odd length is impossible */ 6535 if (hada_mp != NULL) 6536 goto hada_drop; 6537 /* Sent by forwarding path, and router is global zone */ 6538 icmp_param_problem_v6(WR(q), mp, 6539 ICMP6_PARAMPROB_HEADER, 6540 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6541 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6542 return; 6543 } 6544 numaddr = rthdr->ip6r0_len / 2; 6545 if (rthdr->ip6r0_segleft > numaddr) { 6546 /* segleft exceeds number of addresses in routing header */ 6547 if (hada_mp != NULL) 6548 goto hada_drop; 6549 /* Sent by forwarding path, and router is global zone */ 6550 icmp_param_problem_v6(WR(q), mp, 6551 ICMP6_PARAMPROB_HEADER, 6552 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6553 (uchar_t *)ip6h), 6554 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6555 return; 6556 } 6557 addrptr += (numaddr - rthdr->ip6r0_segleft); 6558 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6559 IN6_IS_ADDR_MULTICAST(addrptr)) { 6560 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6561 freemsg(hada_mp); 6562 freemsg(mp); 6563 return; 6564 } 6565 /* Swap */ 6566 tmp = *addrptr; 6567 *addrptr = ip6h->ip6_dst; 6568 ip6h->ip6_dst = tmp; 6569 rthdr->ip6r0_segleft--; 6570 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6571 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6572 if (hada_mp != NULL) 6573 goto hada_drop; 6574 /* Sent by forwarding path, and router is global zone */ 6575 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6576 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6577 return; 6578 } 6579 if (ip_check_v6_mblk(mp, ill) == IP6_MBLK_OK) { 6580 ip6h = (ip6_t *)mp->b_rptr; 6581 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6582 } else { 6583 freemsg(mp); 6584 } 6585 return; 6586 hada_drop: 6587 /* IPsec kstats: bean counter? */ 6588 freemsg(hada_mp); 6589 freemsg(mp); 6590 } 6591 6592 /* 6593 * Read side put procedure for IPv6 module. 6594 */ 6595 void 6596 ip_rput_v6(queue_t *q, mblk_t *mp) 6597 { 6598 mblk_t *first_mp; 6599 mblk_t *hada_mp = NULL; 6600 ip6_t *ip6h; 6601 boolean_t ll_multicast = B_FALSE; 6602 boolean_t mctl_present = B_FALSE; 6603 ill_t *ill; 6604 struct iocblk *iocp; 6605 uint_t flags = 0; 6606 mblk_t *dl_mp; 6607 ip_stack_t *ipst; 6608 int check; 6609 6610 ill = (ill_t *)q->q_ptr; 6611 ipst = ill->ill_ipst; 6612 if (ill->ill_state_flags & ILL_CONDEMNED) { 6613 union DL_primitives *dl; 6614 6615 dl = (union DL_primitives *)mp->b_rptr; 6616 /* 6617 * Things are opening or closing - only accept DLPI 6618 * ack messages. If the stream is closing and ip_wsrv 6619 * has completed, ip_close is out of the qwait, but has 6620 * not yet completed qprocsoff. Don't proceed any further 6621 * because the ill has been cleaned up and things hanging 6622 * off the ill have been freed. 6623 */ 6624 if ((mp->b_datap->db_type != M_PCPROTO) || 6625 (dl->dl_primitive == DL_UNITDATA_IND)) { 6626 inet_freemsg(mp); 6627 return; 6628 } 6629 } 6630 6631 dl_mp = NULL; 6632 switch (mp->b_datap->db_type) { 6633 case M_DATA: { 6634 int hlen; 6635 uchar_t *ucp; 6636 struct ether_header *eh; 6637 dl_unitdata_ind_t *dui; 6638 6639 /* 6640 * This is a work-around for CR 6451644, a bug in Nemo. It 6641 * should be removed when that problem is fixed. 6642 */ 6643 if (ill->ill_mactype == DL_ETHER && 6644 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6645 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6646 ucp[-2] == (IP6_DL_SAP >> 8)) { 6647 if (hlen >= sizeof (struct ether_vlan_header) && 6648 ucp[-5] == 0 && ucp[-6] == 0x81) 6649 ucp -= sizeof (struct ether_vlan_header); 6650 else 6651 ucp -= sizeof (struct ether_header); 6652 /* 6653 * If it's a group address, then fabricate a 6654 * DL_UNITDATA_IND message. 6655 */ 6656 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6657 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6658 BPRI_HI)) != NULL) { 6659 eh = (struct ether_header *)ucp; 6660 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6661 DB_TYPE(dl_mp) = M_PROTO; 6662 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6663 dui->dl_primitive = DL_UNITDATA_IND; 6664 dui->dl_dest_addr_length = 8; 6665 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6666 dui->dl_src_addr_length = 8; 6667 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6668 8; 6669 dui->dl_group_address = 1; 6670 ucp = (uchar_t *)(dui + 1); 6671 if (ill->ill_sap_length > 0) 6672 ucp += ill->ill_sap_length; 6673 bcopy(&eh->ether_dhost, ucp, 6); 6674 bcopy(&eh->ether_shost, ucp + 8, 6); 6675 ucp = (uchar_t *)(dui + 1); 6676 if (ill->ill_sap_length < 0) 6677 ucp += 8 + ill->ill_sap_length; 6678 bcopy(&eh->ether_type, ucp, 2); 6679 bcopy(&eh->ether_type, ucp + 8, 2); 6680 } 6681 } 6682 break; 6683 } 6684 6685 case M_PROTO: 6686 case M_PCPROTO: 6687 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6688 DL_UNITDATA_IND) { 6689 /* Go handle anything other than data elsewhere. */ 6690 ip_rput_dlpi(q, mp); 6691 return; 6692 } 6693 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6694 6695 /* Save the DLPI header. */ 6696 dl_mp = mp; 6697 mp = mp->b_cont; 6698 dl_mp->b_cont = NULL; 6699 break; 6700 case M_BREAK: 6701 panic("ip_rput_v6: got an M_BREAK"); 6702 /*NOTREACHED*/ 6703 case M_IOCACK: 6704 iocp = (struct iocblk *)mp->b_rptr; 6705 switch (iocp->ioc_cmd) { 6706 case DL_IOC_HDR_INFO: 6707 ill = (ill_t *)q->q_ptr; 6708 ill_fastpath_ack(ill, mp); 6709 return; 6710 6711 case SIOCGTUNPARAM: 6712 case OSIOCGTUNPARAM: 6713 ip_rput_other(NULL, q, mp, NULL); 6714 return; 6715 6716 case SIOCSTUNPARAM: 6717 case OSIOCSTUNPARAM: 6718 /* Go through qwriter */ 6719 break; 6720 default: 6721 putnext(q, mp); 6722 return; 6723 } 6724 /* FALLTHRU */ 6725 case M_ERROR: 6726 case M_HANGUP: 6727 mutex_enter(&ill->ill_lock); 6728 if (ill->ill_state_flags & ILL_CONDEMNED) { 6729 mutex_exit(&ill->ill_lock); 6730 freemsg(mp); 6731 return; 6732 } 6733 ill_refhold_locked(ill); 6734 mutex_exit(&ill->ill_lock); 6735 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6736 return; 6737 case M_CTL: 6738 if ((MBLKL(mp) > sizeof (int)) && 6739 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6740 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6741 mctl_present = B_TRUE; 6742 break; 6743 } 6744 putnext(q, mp); 6745 return; 6746 case M_IOCNAK: 6747 iocp = (struct iocblk *)mp->b_rptr; 6748 switch (iocp->ioc_cmd) { 6749 case DL_IOC_HDR_INFO: 6750 case SIOCGTUNPARAM: 6751 case OSIOCGTUNPARAM: 6752 ip_rput_other(NULL, q, mp, NULL); 6753 return; 6754 6755 case SIOCSTUNPARAM: 6756 case OSIOCSTUNPARAM: 6757 mutex_enter(&ill->ill_lock); 6758 if (ill->ill_state_flags & ILL_CONDEMNED) { 6759 mutex_exit(&ill->ill_lock); 6760 freemsg(mp); 6761 return; 6762 } 6763 ill_refhold_locked(ill); 6764 mutex_exit(&ill->ill_lock); 6765 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6766 return; 6767 default: 6768 break; 6769 } 6770 /* FALLTHRU */ 6771 default: 6772 putnext(q, mp); 6773 return; 6774 } 6775 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6776 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6777 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6778 /* 6779 * if db_ref > 1 then copymsg and free original. Packet may be 6780 * changed and do not want other entity who has a reference to this 6781 * message to trip over the changes. This is a blind change because 6782 * trying to catch all places that might change packet is too 6783 * difficult (since it may be a module above this one). 6784 */ 6785 if (mp->b_datap->db_ref > 1) { 6786 mblk_t *mp1; 6787 6788 mp1 = copymsg(mp); 6789 freemsg(mp); 6790 if (mp1 == NULL) { 6791 first_mp = NULL; 6792 goto discard; 6793 } 6794 mp = mp1; 6795 } 6796 first_mp = mp; 6797 if (mctl_present) { 6798 hada_mp = first_mp; 6799 mp = first_mp->b_cont; 6800 } 6801 6802 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6803 freemsg(mp); 6804 return; 6805 } 6806 6807 ip6h = (ip6_t *)mp->b_rptr; 6808 6809 /* 6810 * ip:::receive must see ipv6 packets with a full header, 6811 * and so is placed after the IP6_MBLK_HDR_ERR check. 6812 */ 6813 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6814 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6815 int, 0); 6816 6817 if (check != IP6_MBLK_OK) { 6818 freemsg(mp); 6819 return; 6820 } 6821 6822 DTRACE_PROBE4(ip6__physical__in__start, 6823 ill_t *, ill, ill_t *, NULL, 6824 ip6_t *, ip6h, mblk_t *, first_mp); 6825 6826 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6827 ipst->ips_ipv6firewall_physical_in, 6828 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6829 6830 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6831 6832 if (first_mp == NULL) 6833 return; 6834 6835 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6836 IPV6_DEFAULT_VERS_AND_FLOW) { 6837 /* 6838 * It may be a bit too expensive to do this mapped address 6839 * check here, but in the interest of robustness, it seems 6840 * like the correct place. 6841 * TODO: Avoid this check for e.g. connected TCP sockets 6842 */ 6843 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6844 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6845 goto discard; 6846 } 6847 6848 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6849 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6850 goto discard; 6851 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6852 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6853 goto discard; 6854 } 6855 6856 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6857 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6858 } else { 6859 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6860 goto discard; 6861 } 6862 freemsg(dl_mp); 6863 return; 6864 6865 discard: 6866 if (dl_mp != NULL) 6867 freeb(dl_mp); 6868 freemsg(first_mp); 6869 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6870 } 6871 6872 /* 6873 * Walk through the IPv6 packet in mp and see if there's an AH header 6874 * in it. See if the AH header needs to get done before other headers in 6875 * the packet. (Worker function for ipsec_early_ah_v6().) 6876 */ 6877 #define IPSEC_HDR_DONT_PROCESS 0 6878 #define IPSEC_HDR_PROCESS 1 6879 #define IPSEC_MEMORY_ERROR 2 6880 static int 6881 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6882 { 6883 uint_t length; 6884 uint_t ehdrlen; 6885 uint8_t *whereptr; 6886 uint8_t *endptr; 6887 uint8_t *nexthdrp; 6888 ip6_dest_t *desthdr; 6889 ip6_rthdr_t *rthdr; 6890 ip6_t *ip6h; 6891 6892 /* 6893 * For now just pullup everything. In general, the less pullups, 6894 * the better, but there's so much squirrelling through anyway, 6895 * it's just easier this way. 6896 */ 6897 if (!pullupmsg(mp, -1)) { 6898 return (IPSEC_MEMORY_ERROR); 6899 } 6900 6901 ip6h = (ip6_t *)mp->b_rptr; 6902 length = IPV6_HDR_LEN; 6903 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6904 endptr = mp->b_wptr; 6905 6906 /* 6907 * We can't just use the argument nexthdr in the place 6908 * of nexthdrp becaue we don't dereference nexthdrp 6909 * till we confirm whether it is a valid address. 6910 */ 6911 nexthdrp = &ip6h->ip6_nxt; 6912 while (whereptr < endptr) { 6913 /* Is there enough left for len + nexthdr? */ 6914 if (whereptr + MIN_EHDR_LEN > endptr) 6915 return (IPSEC_MEMORY_ERROR); 6916 6917 switch (*nexthdrp) { 6918 case IPPROTO_HOPOPTS: 6919 case IPPROTO_DSTOPTS: 6920 /* Assumes the headers are identical for hbh and dst */ 6921 desthdr = (ip6_dest_t *)whereptr; 6922 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6923 if ((uchar_t *)desthdr + ehdrlen > endptr) 6924 return (IPSEC_MEMORY_ERROR); 6925 /* 6926 * Return DONT_PROCESS because the destination 6927 * options header may be for each hop in a 6928 * routing-header, and we only want AH if we're 6929 * finished with routing headers. 6930 */ 6931 if (*nexthdrp == IPPROTO_DSTOPTS) 6932 return (IPSEC_HDR_DONT_PROCESS); 6933 nexthdrp = &desthdr->ip6d_nxt; 6934 break; 6935 case IPPROTO_ROUTING: 6936 rthdr = (ip6_rthdr_t *)whereptr; 6937 6938 /* 6939 * If there's more hops left on the routing header, 6940 * return now with DON'T PROCESS. 6941 */ 6942 if (rthdr->ip6r_segleft > 0) 6943 return (IPSEC_HDR_DONT_PROCESS); 6944 6945 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6946 if ((uchar_t *)rthdr + ehdrlen > endptr) 6947 return (IPSEC_MEMORY_ERROR); 6948 nexthdrp = &rthdr->ip6r_nxt; 6949 break; 6950 case IPPROTO_FRAGMENT: 6951 /* Wait for reassembly */ 6952 return (IPSEC_HDR_DONT_PROCESS); 6953 case IPPROTO_AH: 6954 *nexthdr = IPPROTO_AH; 6955 return (IPSEC_HDR_PROCESS); 6956 case IPPROTO_NONE: 6957 /* No next header means we're finished */ 6958 default: 6959 return (IPSEC_HDR_DONT_PROCESS); 6960 } 6961 length += ehdrlen; 6962 whereptr += ehdrlen; 6963 } 6964 panic("ipsec_needs_processing_v6"); 6965 /*NOTREACHED*/ 6966 } 6967 6968 /* 6969 * Path for AH if options are present. If this is the first time we are 6970 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6971 * Otherwise, just fanout. Return value answers the boolean question: 6972 * "Did I consume the mblk you sent me?" 6973 * 6974 * Sometimes AH needs to be done before other IPv6 headers for security 6975 * reasons. This function (and its ipsec_needs_processing_v6() above) 6976 * indicates if that is so, and fans out to the appropriate IPsec protocol 6977 * for the datagram passed in. 6978 */ 6979 static boolean_t 6980 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6981 ill_t *ill, mblk_t *hada_mp, zoneid_t zoneid) 6982 { 6983 mblk_t *mp; 6984 uint8_t nexthdr; 6985 ipsec_in_t *ii = NULL; 6986 ah_t *ah; 6987 ipsec_status_t ipsec_rc; 6988 ip_stack_t *ipst = ill->ill_ipst; 6989 netstack_t *ns = ipst->ips_netstack; 6990 ipsec_stack_t *ipss = ns->netstack_ipsec; 6991 6992 ASSERT((hada_mp == NULL) || (!mctl_present)); 6993 6994 switch (ipsec_needs_processing_v6( 6995 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6996 case IPSEC_MEMORY_ERROR: 6997 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6998 freemsg(hada_mp); 6999 freemsg(first_mp); 7000 return (B_TRUE); 7001 case IPSEC_HDR_DONT_PROCESS: 7002 return (B_FALSE); 7003 } 7004 7005 /* Default means send it to AH! */ 7006 ASSERT(nexthdr == IPPROTO_AH); 7007 if (!mctl_present) { 7008 mp = first_mp; 7009 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 7010 if (first_mp == NULL) { 7011 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7012 "allocation failure.\n")); 7013 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7014 freemsg(hada_mp); 7015 freemsg(mp); 7016 return (B_TRUE); 7017 } 7018 /* 7019 * Store the ill_index so that when we come back 7020 * from IPSEC we ride on the same queue. 7021 */ 7022 ii = (ipsec_in_t *)first_mp->b_rptr; 7023 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7024 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7025 first_mp->b_cont = mp; 7026 } 7027 /* 7028 * Cache hardware acceleration info. 7029 */ 7030 if (hada_mp != NULL) { 7031 ASSERT(ii != NULL); 7032 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7033 "caching data attr.\n")); 7034 ii->ipsec_in_accelerated = B_TRUE; 7035 ii->ipsec_in_da = hada_mp; 7036 } 7037 7038 if (!ipsec_loaded(ipss)) { 7039 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 7040 return (B_TRUE); 7041 } 7042 7043 ah = ipsec_inbound_ah_sa(first_mp, ns); 7044 if (ah == NULL) 7045 return (B_TRUE); 7046 ASSERT(ii->ipsec_in_ah_sa != NULL); 7047 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7048 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7049 7050 switch (ipsec_rc) { 7051 case IPSEC_STATUS_SUCCESS: 7052 /* we're done with IPsec processing, send it up */ 7053 ip_fanout_proto_again(first_mp, ill, ill, NULL); 7054 break; 7055 case IPSEC_STATUS_FAILED: 7056 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 7057 break; 7058 case IPSEC_STATUS_PENDING: 7059 /* no action needed */ 7060 break; 7061 } 7062 return (B_TRUE); 7063 } 7064 7065 /* 7066 * Validate the IPv6 mblk for alignment. 7067 */ 7068 int 7069 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7070 { 7071 int pkt_len, ip6_len; 7072 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7073 7074 /* check for alignment and full IPv6 header */ 7075 if (!OK_32PTR((uchar_t *)ip6h) || 7076 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7077 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7078 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7079 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7080 return (IP6_MBLK_HDR_ERR); 7081 } 7082 ip6h = (ip6_t *)mp->b_rptr; 7083 } 7084 7085 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7086 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7087 7088 if (mp->b_cont == NULL) 7089 pkt_len = mp->b_wptr - mp->b_rptr; 7090 else 7091 pkt_len = msgdsize(mp); 7092 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7093 7094 /* 7095 * Check for bogus (too short packet) and packet which 7096 * was padded by the link layer. 7097 */ 7098 if (ip6_len != pkt_len) { 7099 ssize_t diff; 7100 7101 if (ip6_len > pkt_len) { 7102 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7103 ip6_len, pkt_len)); 7104 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7105 return (IP6_MBLK_LEN_ERR); 7106 } 7107 diff = (ssize_t)(pkt_len - ip6_len); 7108 7109 if (!adjmsg(mp, -diff)) { 7110 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7111 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7112 return (IP6_MBLK_LEN_ERR); 7113 } 7114 } 7115 return (IP6_MBLK_OK); 7116 } 7117 7118 /* 7119 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7120 * ip_rput_v6 has already verified alignment, the min length, the version, 7121 * and db_ref = 1. 7122 * 7123 * The ill passed in (the arg named inill) is the ill that the packet 7124 * actually arrived on. We need to remember this when saving the 7125 * input interface index into potential IPV6_PKTINFO data in 7126 * ip_add_info_v6(). 7127 * 7128 * This routine doesn't free dl_mp; that's the caller's responsibility on 7129 * return. (Note that the callers are complex enough that there's no tail 7130 * recursion here anyway.) 7131 */ 7132 void 7133 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7134 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7135 { 7136 ire_t *ire = NULL; 7137 ill_t *ill = inill; 7138 ill_t *outill; 7139 ipif_t *ipif; 7140 uint8_t *whereptr; 7141 uint8_t nexthdr; 7142 uint16_t remlen; 7143 uint_t prev_nexthdr_offset; 7144 uint_t used; 7145 size_t old_pkt_len; 7146 size_t pkt_len; 7147 uint16_t ip6_len; 7148 uint_t hdr_len; 7149 boolean_t mctl_present; 7150 mblk_t *first_mp; 7151 mblk_t *first_mp1; 7152 boolean_t no_forward; 7153 ip6_hbh_t *hbhhdr; 7154 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7155 conn_t *connp; 7156 ilm_t *ilm; 7157 uint32_t ports; 7158 zoneid_t zoneid = GLOBAL_ZONEID; 7159 uint16_t hck_flags, reass_hck_flags; 7160 uint32_t reass_sum; 7161 boolean_t cksum_err; 7162 mblk_t *mp1; 7163 ip_stack_t *ipst = inill->ill_ipst; 7164 7165 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7166 7167 if (hada_mp != NULL) { 7168 /* 7169 * It's an IPsec accelerated packet. 7170 * Keep a pointer to the data attributes around until 7171 * we allocate the ipsecinfo structure. 7172 */ 7173 IPSECHW_DEBUG(IPSECHW_PKT, 7174 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7175 hada_mp->b_cont = NULL; 7176 /* 7177 * Since it is accelerated, it came directly from 7178 * the ill. 7179 */ 7180 ASSERT(mctl_present == B_FALSE); 7181 ASSERT(mp->b_datap->db_type != M_CTL); 7182 } 7183 7184 ip6h = (ip6_t *)mp->b_rptr; 7185 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7186 old_pkt_len = pkt_len = ip6_len; 7187 7188 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7189 hck_flags = DB_CKSUMFLAGS(mp); 7190 else 7191 hck_flags = 0; 7192 7193 /* Clear checksum flags in case we need to forward */ 7194 DB_CKSUMFLAGS(mp) = 0; 7195 reass_sum = reass_hck_flags = 0; 7196 7197 nexthdr = ip6h->ip6_nxt; 7198 7199 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7200 (uchar_t *)ip6h); 7201 whereptr = (uint8_t *)&ip6h[1]; 7202 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7203 7204 /* Process hop by hop header options */ 7205 if (nexthdr == IPPROTO_HOPOPTS) { 7206 uint_t ehdrlen; 7207 uint8_t *optptr; 7208 7209 if (remlen < MIN_EHDR_LEN) 7210 goto pkt_too_short; 7211 if (mp->b_cont != NULL && 7212 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7213 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7214 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7215 freemsg(hada_mp); 7216 freemsg(first_mp); 7217 return; 7218 } 7219 ip6h = (ip6_t *)mp->b_rptr; 7220 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7221 } 7222 hbhhdr = (ip6_hbh_t *)whereptr; 7223 nexthdr = hbhhdr->ip6h_nxt; 7224 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7225 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7226 7227 if (remlen < ehdrlen) 7228 goto pkt_too_short; 7229 if (mp->b_cont != NULL && 7230 whereptr + ehdrlen > mp->b_wptr) { 7231 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7232 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7233 freemsg(hada_mp); 7234 freemsg(first_mp); 7235 return; 7236 } 7237 ip6h = (ip6_t *)mp->b_rptr; 7238 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7239 hbhhdr = (ip6_hbh_t *)whereptr; 7240 } 7241 7242 optptr = whereptr + 2; 7243 whereptr += ehdrlen; 7244 remlen -= ehdrlen; 7245 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7246 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7247 case -1: 7248 /* 7249 * Packet has been consumed and any 7250 * needed ICMP messages sent. 7251 */ 7252 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7253 freemsg(hada_mp); 7254 return; 7255 case 0: 7256 /* no action needed */ 7257 break; 7258 case 1: 7259 /* Known router alert */ 7260 goto ipv6forus; 7261 } 7262 } 7263 7264 /* 7265 * Attach any necessary label information to this packet. 7266 */ 7267 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7268 if (ip6opt_ls != 0) 7269 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7270 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7271 freemsg(hada_mp); 7272 freemsg(first_mp); 7273 return; 7274 } 7275 7276 /* 7277 * On incoming v6 multicast packets we will bypass the ire table, 7278 * and assume that the read queue corresponds to the targetted 7279 * interface. 7280 * 7281 * The effect of this is the same as the IPv4 original code, but is 7282 * much cleaner I think. See ip_rput for how that was done. 7283 */ 7284 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7285 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7286 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7287 /* 7288 * XXX TODO Give to mrouted to for multicast forwarding. 7289 */ 7290 ILM_WALKER_HOLD(ill); 7291 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7292 ILM_WALKER_RELE(ill); 7293 if (ilm == NULL) { 7294 if (ip_debug > 3) { 7295 /* ip2dbg */ 7296 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7297 " which is not for us: %s\n", AF_INET6, 7298 &ip6h->ip6_dst); 7299 } 7300 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7301 freemsg(hada_mp); 7302 freemsg(first_mp); 7303 return; 7304 } 7305 if (ip_debug > 3) { 7306 /* ip2dbg */ 7307 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7308 AF_INET6, &ip6h->ip6_dst); 7309 } 7310 zoneid = GLOBAL_ZONEID; 7311 goto ipv6forus; 7312 } 7313 7314 ipif = ill->ill_ipif; 7315 7316 /* 7317 * If a packet was received on an interface that is a 6to4 tunnel, 7318 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7319 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7320 * the 6to4 prefix of the address configured on the receiving interface. 7321 * Otherwise, the packet was delivered to this interface in error and 7322 * the packet must be dropped. 7323 */ 7324 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7325 7326 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7327 &ip6h->ip6_dst)) { 7328 if (ip_debug > 2) { 7329 /* ip1dbg */ 7330 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7331 "addressed packet which is not for us: " 7332 "%s\n", AF_INET6, &ip6h->ip6_dst); 7333 } 7334 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7335 freemsg(first_mp); 7336 return; 7337 } 7338 } 7339 7340 /* 7341 * Find an ire that matches destination. For link-local addresses 7342 * we have to match the ill. 7343 * TBD for site local addresses. 7344 */ 7345 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7346 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7347 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7348 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 7349 } else { 7350 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7351 MBLK_GETLABEL(mp), ipst); 7352 7353 if (ire != NULL && ire->ire_stq != NULL && 7354 ire->ire_zoneid != GLOBAL_ZONEID && 7355 ire->ire_zoneid != ALL_ZONES) { 7356 /* 7357 * Should only use IREs that are visible from the 7358 * global zone for forwarding. 7359 */ 7360 ire_refrele(ire); 7361 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7362 GLOBAL_ZONEID, MBLK_GETLABEL(mp), ipst); 7363 } 7364 } 7365 7366 if (ire == NULL) { 7367 /* 7368 * No matching IRE found. Mark this packet as having 7369 * originated externally. 7370 */ 7371 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7372 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7373 if (!(ill->ill_flags & ILLF_ROUTER)) { 7374 BUMP_MIB(ill->ill_ip_mib, 7375 ipIfStatsInAddrErrors); 7376 } 7377 freemsg(hada_mp); 7378 freemsg(first_mp); 7379 return; 7380 } 7381 if (ip6h->ip6_hops <= 1) { 7382 if (hada_mp != NULL) 7383 goto hada_drop; 7384 /* Sent by forwarding path, and router is global zone */ 7385 icmp_time_exceeded_v6(WR(q), first_mp, 7386 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7387 GLOBAL_ZONEID, ipst); 7388 return; 7389 } 7390 /* 7391 * Per RFC 3513 section 2.5.2, we must not forward packets with 7392 * an unspecified source address. 7393 */ 7394 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7395 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7396 freemsg(hada_mp); 7397 freemsg(first_mp); 7398 return; 7399 } 7400 mp->b_prev = (mblk_t *)(uintptr_t) 7401 ill->ill_phyint->phyint_ifindex; 7402 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7403 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7404 GLOBAL_ZONEID, ipst); 7405 return; 7406 } 7407 /* we have a matching IRE */ 7408 if (ire->ire_stq != NULL) { 7409 ill_group_t *ill_group; 7410 ill_group_t *ire_group; 7411 7412 /* 7413 * To be quicker, we may wish not to chase pointers 7414 * (ire->ire_ipif->ipif_ill...) and instead store the 7415 * forwarding policy in the ire. An unfortunate side- 7416 * effect of this would be requiring an ire flush whenever 7417 * the ILLF_ROUTER flag changes. For now, chase pointers 7418 * once and store in the boolean no_forward. 7419 * 7420 * This appears twice to keep it out of the non-forwarding, 7421 * yes-it's-for-us-on-the-right-interface case. 7422 */ 7423 no_forward = ((ill->ill_flags & 7424 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7425 7426 7427 ASSERT(first_mp == mp); 7428 /* 7429 * This ire has a send-to queue - forward the packet. 7430 */ 7431 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7432 freemsg(hada_mp); 7433 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7434 if (no_forward) { 7435 BUMP_MIB(ill->ill_ip_mib, 7436 ipIfStatsInAddrErrors); 7437 } 7438 freemsg(mp); 7439 ire_refrele(ire); 7440 return; 7441 } 7442 /* 7443 * ipIfStatsHCInForwDatagrams should only be increment if there 7444 * will be an attempt to forward the packet, which is why we 7445 * increment after the above condition has been checked. 7446 */ 7447 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7448 if (ip6h->ip6_hops <= 1) { 7449 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7450 /* Sent by forwarding path, and router is global zone */ 7451 icmp_time_exceeded_v6(WR(q), mp, 7452 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7453 GLOBAL_ZONEID, ipst); 7454 ire_refrele(ire); 7455 return; 7456 } 7457 /* 7458 * Per RFC 3513 section 2.5.2, we must not forward packets with 7459 * an unspecified source address. 7460 */ 7461 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7462 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7463 freemsg(mp); 7464 ire_refrele(ire); 7465 return; 7466 } 7467 7468 if (is_system_labeled()) { 7469 mblk_t *mp1; 7470 7471 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7472 BUMP_MIB(ill->ill_ip_mib, 7473 ipIfStatsForwProhibits); 7474 freemsg(mp); 7475 ire_refrele(ire); 7476 return; 7477 } 7478 /* Size may have changed */ 7479 mp = mp1; 7480 ip6h = (ip6_t *)mp->b_rptr; 7481 pkt_len = msgdsize(mp); 7482 } 7483 7484 if (pkt_len > ire->ire_max_frag) { 7485 int max_frag = ire->ire_max_frag; 7486 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7487 /* 7488 * Handle labeled packet resizing. 7489 */ 7490 if (is_system_labeled()) { 7491 max_frag = tsol_pmtu_adjust(mp, max_frag, 7492 pkt_len - old_pkt_len, AF_INET6); 7493 } 7494 7495 /* Sent by forwarding path, and router is global zone */ 7496 icmp_pkt2big_v6(WR(q), mp, max_frag, 7497 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7498 ire_refrele(ire); 7499 return; 7500 } 7501 7502 /* 7503 * Check to see if we're forwarding the packet to a 7504 * different link from which it came. If so, check the 7505 * source and destination addresses since routers must not 7506 * forward any packets with link-local source or 7507 * destination addresses to other links. Otherwise (if 7508 * we're forwarding onto the same link), conditionally send 7509 * a redirect message. 7510 */ 7511 ill_group = ill->ill_group; 7512 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7513 if (ire->ire_rfq != q && (ill_group == NULL || 7514 ill_group != ire_group)) { 7515 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7516 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7517 BUMP_MIB(ill->ill_ip_mib, 7518 ipIfStatsInAddrErrors); 7519 freemsg(mp); 7520 ire_refrele(ire); 7521 return; 7522 } 7523 /* TBD add site-local check at site boundary? */ 7524 } else if (ipst->ips_ipv6_send_redirects) { 7525 in6_addr_t *v6targ; 7526 in6_addr_t gw_addr_v6; 7527 ire_t *src_ire_v6 = NULL; 7528 7529 /* 7530 * Don't send a redirect when forwarding a source 7531 * routed packet. 7532 */ 7533 if (ip_source_routed_v6(ip6h, mp, ipst)) 7534 goto forward; 7535 7536 mutex_enter(&ire->ire_lock); 7537 gw_addr_v6 = ire->ire_gateway_addr_v6; 7538 mutex_exit(&ire->ire_lock); 7539 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7540 v6targ = &gw_addr_v6; 7541 /* 7542 * We won't send redirects to a router 7543 * that doesn't have a link local 7544 * address, but will forward. 7545 */ 7546 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7547 BUMP_MIB(ill->ill_ip_mib, 7548 ipIfStatsInAddrErrors); 7549 goto forward; 7550 } 7551 } else { 7552 v6targ = &ip6h->ip6_dst; 7553 } 7554 7555 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7556 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7557 GLOBAL_ZONEID, 0, NULL, 7558 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7559 ipst); 7560 7561 if (src_ire_v6 != NULL) { 7562 /* 7563 * The source is directly connected. 7564 */ 7565 mp1 = copymsg(mp); 7566 if (mp1 != NULL) { 7567 icmp_send_redirect_v6(WR(q), 7568 mp1, v6targ, &ip6h->ip6_dst, 7569 ill, B_FALSE); 7570 } 7571 ire_refrele(src_ire_v6); 7572 } 7573 } 7574 7575 forward: 7576 /* Hoplimit verified above */ 7577 ip6h->ip6_hops--; 7578 7579 outill = ire->ire_ipif->ipif_ill; 7580 7581 DTRACE_PROBE4(ip6__forwarding__start, 7582 ill_t *, inill, ill_t *, outill, 7583 ip6_t *, ip6h, mblk_t *, mp); 7584 7585 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7586 ipst->ips_ipv6firewall_forwarding, 7587 inill, outill, ip6h, mp, mp, 0, ipst); 7588 7589 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7590 7591 if (mp != NULL) { 7592 UPDATE_IB_PKT_COUNT(ire); 7593 ire->ire_last_used_time = lbolt; 7594 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7595 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7596 } 7597 IRE_REFRELE(ire); 7598 return; 7599 } 7600 7601 /* 7602 * Need to put on correct queue for reassembly to find it. 7603 * No need to use put() since reassembly has its own locks. 7604 * Note: multicast packets and packets destined to addresses 7605 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7606 * the arriving ill. Unlike the IPv4 case, enabling strict 7607 * destination multihoming will prevent accepting packets 7608 * addressed to an IRE_LOCAL on lo0. 7609 */ 7610 if (ire->ire_rfq != q) { 7611 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7612 == NULL) { 7613 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7614 freemsg(hada_mp); 7615 freemsg(first_mp); 7616 return; 7617 } 7618 if (ire->ire_rfq != NULL) { 7619 q = ire->ire_rfq; 7620 ill = (ill_t *)q->q_ptr; 7621 ASSERT(ill != NULL); 7622 } 7623 } 7624 7625 zoneid = ire->ire_zoneid; 7626 UPDATE_IB_PKT_COUNT(ire); 7627 ire->ire_last_used_time = lbolt; 7628 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7629 ire_refrele(ire); 7630 ire = NULL; 7631 ipv6forus: 7632 /* 7633 * Looks like this packet is for us one way or another. 7634 * This is where we'll process destination headers etc. 7635 */ 7636 for (; ; ) { 7637 switch (nexthdr) { 7638 case IPPROTO_TCP: { 7639 uint16_t *up; 7640 uint32_t sum; 7641 int offset; 7642 7643 hdr_len = pkt_len - remlen; 7644 7645 if (hada_mp != NULL) { 7646 ip0dbg(("tcp hada drop\n")); 7647 goto hada_drop; 7648 } 7649 7650 7651 /* TCP needs all of the TCP header */ 7652 if (remlen < TCP_MIN_HEADER_LENGTH) 7653 goto pkt_too_short; 7654 if (mp->b_cont != NULL && 7655 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7656 if (!pullupmsg(mp, 7657 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7658 BUMP_MIB(ill->ill_ip_mib, 7659 ipIfStatsInDiscards); 7660 freemsg(first_mp); 7661 return; 7662 } 7663 hck_flags = 0; 7664 ip6h = (ip6_t *)mp->b_rptr; 7665 whereptr = (uint8_t *)ip6h + hdr_len; 7666 } 7667 /* 7668 * Extract the offset field from the TCP header. 7669 */ 7670 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7671 if (offset != 5) { 7672 if (offset < 5) { 7673 ip1dbg(("ip_rput_data_v6: short " 7674 "TCP data offset")); 7675 BUMP_MIB(ill->ill_ip_mib, 7676 ipIfStatsInDiscards); 7677 freemsg(first_mp); 7678 return; 7679 } 7680 /* 7681 * There must be TCP options. 7682 * Make sure we can grab them. 7683 */ 7684 offset <<= 2; 7685 if (remlen < offset) 7686 goto pkt_too_short; 7687 if (mp->b_cont != NULL && 7688 whereptr + offset > mp->b_wptr) { 7689 if (!pullupmsg(mp, 7690 hdr_len + offset)) { 7691 BUMP_MIB(ill->ill_ip_mib, 7692 ipIfStatsInDiscards); 7693 freemsg(first_mp); 7694 return; 7695 } 7696 hck_flags = 0; 7697 ip6h = (ip6_t *)mp->b_rptr; 7698 whereptr = (uint8_t *)ip6h + hdr_len; 7699 } 7700 } 7701 7702 up = (uint16_t *)&ip6h->ip6_src; 7703 /* 7704 * TCP checksum calculation. First sum up the 7705 * pseudo-header fields: 7706 * - Source IPv6 address 7707 * - Destination IPv6 address 7708 * - TCP payload length 7709 * - TCP protocol ID 7710 */ 7711 sum = htons(IPPROTO_TCP + remlen) + 7712 up[0] + up[1] + up[2] + up[3] + 7713 up[4] + up[5] + up[6] + up[7] + 7714 up[8] + up[9] + up[10] + up[11] + 7715 up[12] + up[13] + up[14] + up[15]; 7716 7717 /* Fold initial sum */ 7718 sum = (sum & 0xffff) + (sum >> 16); 7719 7720 mp1 = mp->b_cont; 7721 7722 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7723 IP6_STAT(ipst, ip6_in_sw_cksum); 7724 7725 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7726 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7727 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7728 mp, mp1, cksum_err); 7729 7730 if (cksum_err) { 7731 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7732 7733 if (hck_flags & HCK_FULLCKSUM) { 7734 IP6_STAT(ipst, 7735 ip6_tcp_in_full_hw_cksum_err); 7736 } else if (hck_flags & HCK_PARTIALCKSUM) { 7737 IP6_STAT(ipst, 7738 ip6_tcp_in_part_hw_cksum_err); 7739 } else { 7740 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7741 } 7742 freemsg(first_mp); 7743 return; 7744 } 7745 tcp_fanout: 7746 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7747 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7748 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7749 return; 7750 } 7751 case IPPROTO_SCTP: 7752 { 7753 sctp_hdr_t *sctph; 7754 uint32_t calcsum, pktsum; 7755 uint_t hdr_len = pkt_len - remlen; 7756 sctp_stack_t *sctps; 7757 7758 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7759 7760 /* SCTP needs all of the SCTP header */ 7761 if (remlen < sizeof (*sctph)) { 7762 goto pkt_too_short; 7763 } 7764 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7765 ASSERT(mp->b_cont != NULL); 7766 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7767 BUMP_MIB(ill->ill_ip_mib, 7768 ipIfStatsInDiscards); 7769 freemsg(mp); 7770 return; 7771 } 7772 ip6h = (ip6_t *)mp->b_rptr; 7773 whereptr = (uint8_t *)ip6h + hdr_len; 7774 } 7775 7776 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7777 /* checksum */ 7778 pktsum = sctph->sh_chksum; 7779 sctph->sh_chksum = 0; 7780 calcsum = sctp_cksum(mp, hdr_len); 7781 if (calcsum != pktsum) { 7782 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7783 freemsg(mp); 7784 return; 7785 } 7786 sctph->sh_chksum = pktsum; 7787 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7788 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7789 ports, zoneid, mp, sctps)) == NULL) { 7790 ip_fanout_sctp_raw(first_mp, ill, 7791 (ipha_t *)ip6h, B_FALSE, ports, 7792 mctl_present, 7793 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7794 B_TRUE, zoneid); 7795 return; 7796 } 7797 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7798 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7799 B_FALSE, mctl_present); 7800 return; 7801 } 7802 case IPPROTO_UDP: { 7803 uint16_t *up; 7804 uint32_t sum; 7805 7806 hdr_len = pkt_len - remlen; 7807 7808 if (hada_mp != NULL) { 7809 ip0dbg(("udp hada drop\n")); 7810 goto hada_drop; 7811 } 7812 7813 /* Verify that at least the ports are present */ 7814 if (remlen < UDPH_SIZE) 7815 goto pkt_too_short; 7816 if (mp->b_cont != NULL && 7817 whereptr + UDPH_SIZE > mp->b_wptr) { 7818 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7819 BUMP_MIB(ill->ill_ip_mib, 7820 ipIfStatsInDiscards); 7821 freemsg(first_mp); 7822 return; 7823 } 7824 hck_flags = 0; 7825 ip6h = (ip6_t *)mp->b_rptr; 7826 whereptr = (uint8_t *)ip6h + hdr_len; 7827 } 7828 7829 /* 7830 * Before going through the regular checksum 7831 * calculation, make sure the received checksum 7832 * is non-zero. RFC 2460 says, a 0x0000 checksum 7833 * in a UDP packet (within IPv6 packet) is invalid 7834 * and should be replaced by 0xffff. This makes 7835 * sense as regular checksum calculation will 7836 * pass for both the cases i.e. 0x0000 and 0xffff. 7837 * Removing one of the case makes error detection 7838 * stronger. 7839 */ 7840 7841 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7842 /* 0x0000 checksum is invalid */ 7843 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7844 "checksum value 0x0000\n")); 7845 BUMP_MIB(ill->ill_ip_mib, 7846 udpIfStatsInCksumErrs); 7847 freemsg(first_mp); 7848 return; 7849 } 7850 7851 up = (uint16_t *)&ip6h->ip6_src; 7852 7853 /* 7854 * UDP checksum calculation. First sum up the 7855 * pseudo-header fields: 7856 * - Source IPv6 address 7857 * - Destination IPv6 address 7858 * - UDP payload length 7859 * - UDP protocol ID 7860 */ 7861 7862 sum = htons(IPPROTO_UDP + remlen) + 7863 up[0] + up[1] + up[2] + up[3] + 7864 up[4] + up[5] + up[6] + up[7] + 7865 up[8] + up[9] + up[10] + up[11] + 7866 up[12] + up[13] + up[14] + up[15]; 7867 7868 /* Fold initial sum */ 7869 sum = (sum & 0xffff) + (sum >> 16); 7870 7871 if (reass_hck_flags != 0) { 7872 hck_flags = reass_hck_flags; 7873 7874 IP_CKSUM_RECV_REASS(hck_flags, 7875 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7876 sum, reass_sum, cksum_err); 7877 } else { 7878 mp1 = mp->b_cont; 7879 7880 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7881 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7882 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7883 mp, mp1, cksum_err); 7884 } 7885 7886 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7887 IP6_STAT(ipst, ip6_in_sw_cksum); 7888 7889 if (cksum_err) { 7890 BUMP_MIB(ill->ill_ip_mib, 7891 udpIfStatsInCksumErrs); 7892 7893 if (hck_flags & HCK_FULLCKSUM) 7894 IP6_STAT(ipst, 7895 ip6_udp_in_full_hw_cksum_err); 7896 else if (hck_flags & HCK_PARTIALCKSUM) 7897 IP6_STAT(ipst, 7898 ip6_udp_in_part_hw_cksum_err); 7899 else 7900 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7901 7902 freemsg(first_mp); 7903 return; 7904 } 7905 goto udp_fanout; 7906 } 7907 case IPPROTO_ICMPV6: { 7908 uint16_t *up; 7909 uint32_t sum; 7910 uint_t hdr_len = pkt_len - remlen; 7911 7912 if (hada_mp != NULL) { 7913 ip0dbg(("icmp hada drop\n")); 7914 goto hada_drop; 7915 } 7916 7917 up = (uint16_t *)&ip6h->ip6_src; 7918 sum = htons(IPPROTO_ICMPV6 + remlen) + 7919 up[0] + up[1] + up[2] + up[3] + 7920 up[4] + up[5] + up[6] + up[7] + 7921 up[8] + up[9] + up[10] + up[11] + 7922 up[12] + up[13] + up[14] + up[15]; 7923 sum = (sum & 0xffff) + (sum >> 16); 7924 sum = IP_CSUM(mp, hdr_len, sum); 7925 if (sum != 0) { 7926 /* IPv6 ICMP checksum failed */ 7927 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7928 "failed %x\n", 7929 sum)); 7930 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7931 BUMP_MIB(ill->ill_icmp6_mib, 7932 ipv6IfIcmpInErrors); 7933 freemsg(first_mp); 7934 return; 7935 } 7936 7937 icmp_fanout: 7938 /* Check variable for testing applications */ 7939 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7940 freemsg(first_mp); 7941 return; 7942 } 7943 /* 7944 * Assume that there is always at least one conn for 7945 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7946 * where there is no conn. 7947 */ 7948 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7949 ASSERT(!IS_LOOPBACK((ill))); 7950 /* 7951 * In the multicast case, applications may have 7952 * joined the group from different zones, so we 7953 * need to deliver the packet to each of them. 7954 * Loop through the multicast memberships 7955 * structures (ilm) on the receive ill and send 7956 * a copy of the packet up each matching one. 7957 */ 7958 ILM_WALKER_HOLD(ill); 7959 for (ilm = ill->ill_ilm; ilm != NULL; 7960 ilm = ilm->ilm_next) { 7961 if (ilm->ilm_flags & ILM_DELETED) 7962 continue; 7963 if (!IN6_ARE_ADDR_EQUAL( 7964 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7965 continue; 7966 if (!ipif_lookup_zoneid(ill, 7967 ilm->ilm_zoneid, IPIF_UP, NULL)) 7968 continue; 7969 7970 first_mp1 = ip_copymsg(first_mp); 7971 if (first_mp1 == NULL) 7972 continue; 7973 icmp_inbound_v6(q, first_mp1, ill, 7974 hdr_len, mctl_present, 0, 7975 ilm->ilm_zoneid, dl_mp); 7976 } 7977 ILM_WALKER_RELE(ill); 7978 } else { 7979 first_mp1 = ip_copymsg(first_mp); 7980 if (first_mp1 != NULL) 7981 icmp_inbound_v6(q, first_mp1, ill, 7982 hdr_len, mctl_present, 0, zoneid, 7983 dl_mp); 7984 } 7985 } 7986 /* FALLTHRU */ 7987 default: { 7988 /* 7989 * Handle protocols with which IPv6 is less intimate. 7990 */ 7991 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 7992 7993 if (hada_mp != NULL) { 7994 ip0dbg(("default hada drop\n")); 7995 goto hada_drop; 7996 } 7997 7998 /* 7999 * Enable sending ICMP for "Unknown" nexthdr 8000 * case. i.e. where we did not FALLTHRU from 8001 * IPPROTO_ICMPV6 processing case above. 8002 * If we did FALLTHRU, then the packet has already been 8003 * processed for IPPF, don't process it again in 8004 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8005 * flags 8006 */ 8007 if (nexthdr != IPPROTO_ICMPV6) 8008 proto_flags |= IP_FF_SEND_ICMP; 8009 else 8010 proto_flags |= IP6_NO_IPPOLICY; 8011 8012 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8013 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8014 mctl_present, zoneid); 8015 return; 8016 } 8017 8018 case IPPROTO_DSTOPTS: { 8019 uint_t ehdrlen; 8020 uint8_t *optptr; 8021 ip6_dest_t *desthdr; 8022 8023 /* Check if AH is present. */ 8024 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8025 hada_mp, zoneid)) { 8026 ip0dbg(("dst early hada drop\n")); 8027 return; 8028 } 8029 8030 /* 8031 * Reinitialize pointers, as ipsec_early_ah_v6() does 8032 * complete pullups. We don't have to do more pullups 8033 * as a result. 8034 */ 8035 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8036 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8037 ip6h = (ip6_t *)mp->b_rptr; 8038 8039 if (remlen < MIN_EHDR_LEN) 8040 goto pkt_too_short; 8041 8042 desthdr = (ip6_dest_t *)whereptr; 8043 nexthdr = desthdr->ip6d_nxt; 8044 prev_nexthdr_offset = (uint_t)(whereptr - 8045 (uint8_t *)ip6h); 8046 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8047 if (remlen < ehdrlen) 8048 goto pkt_too_short; 8049 optptr = whereptr + 2; 8050 /* 8051 * Note: XXX This code does not seem to make 8052 * distinction between Destination Options Header 8053 * being before/after Routing Header which can 8054 * happen if we are at the end of source route. 8055 * This may become significant in future. 8056 * (No real significant Destination Options are 8057 * defined/implemented yet ). 8058 */ 8059 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8060 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 8061 case -1: 8062 /* 8063 * Packet has been consumed and any needed 8064 * ICMP errors sent. 8065 */ 8066 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8067 freemsg(hada_mp); 8068 return; 8069 case 0: 8070 /* No action needed continue */ 8071 break; 8072 case 1: 8073 /* 8074 * Unnexpected return value 8075 * (Router alert is a Hop-by-Hop option) 8076 */ 8077 #ifdef DEBUG 8078 panic("ip_rput_data_v6: router " 8079 "alert hbh opt indication in dest opt"); 8080 /*NOTREACHED*/ 8081 #else 8082 freemsg(hada_mp); 8083 freemsg(first_mp); 8084 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8085 return; 8086 #endif 8087 } 8088 used = ehdrlen; 8089 break; 8090 } 8091 case IPPROTO_FRAGMENT: { 8092 ip6_frag_t *fraghdr; 8093 size_t no_frag_hdr_len; 8094 8095 if (hada_mp != NULL) { 8096 ip0dbg(("frag hada drop\n")); 8097 goto hada_drop; 8098 } 8099 8100 ASSERT(first_mp == mp); 8101 if (remlen < sizeof (ip6_frag_t)) 8102 goto pkt_too_short; 8103 8104 if (mp->b_cont != NULL && 8105 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8106 if (!pullupmsg(mp, 8107 pkt_len - remlen + sizeof (ip6_frag_t))) { 8108 BUMP_MIB(ill->ill_ip_mib, 8109 ipIfStatsInDiscards); 8110 freemsg(mp); 8111 return; 8112 } 8113 hck_flags = 0; 8114 ip6h = (ip6_t *)mp->b_rptr; 8115 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8116 } 8117 8118 fraghdr = (ip6_frag_t *)whereptr; 8119 used = (uint_t)sizeof (ip6_frag_t); 8120 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8121 8122 /* 8123 * Invoke the CGTP (multirouting) filtering module to 8124 * process the incoming packet. Packets identified as 8125 * duplicates must be discarded. Filtering is active 8126 * only if the the ip_cgtp_filter ndd variable is 8127 * non-zero. 8128 */ 8129 if (ipst->ips_ip_cgtp_filter && 8130 ipst->ips_ip_cgtp_filter_ops != NULL) { 8131 int cgtp_flt_pkt; 8132 netstackid_t stackid; 8133 8134 stackid = ipst->ips_netstack->netstack_stackid; 8135 8136 cgtp_flt_pkt = 8137 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 8138 stackid, inill->ill_phyint->phyint_ifindex, 8139 ip6h, fraghdr); 8140 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8141 freemsg(mp); 8142 return; 8143 } 8144 } 8145 8146 /* Restore the flags */ 8147 DB_CKSUMFLAGS(mp) = hck_flags; 8148 8149 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8150 remlen - used, &prev_nexthdr_offset, 8151 &reass_sum, &reass_hck_flags); 8152 if (mp == NULL) { 8153 /* Reassembly is still pending */ 8154 return; 8155 } 8156 /* The first mblk are the headers before the frag hdr */ 8157 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8158 8159 first_mp = mp; /* mp has most likely changed! */ 8160 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8161 ip6h = (ip6_t *)mp->b_rptr; 8162 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8163 whereptr = mp->b_rptr + no_frag_hdr_len; 8164 remlen = ntohs(ip6h->ip6_plen) + 8165 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8166 pkt_len = msgdsize(mp); 8167 used = 0; 8168 break; 8169 } 8170 case IPPROTO_HOPOPTS: { 8171 if (hada_mp != NULL) { 8172 ip0dbg(("hop hada drop\n")); 8173 goto hada_drop; 8174 } 8175 /* 8176 * Illegal header sequence. 8177 * (Hop-by-hop headers are processed above 8178 * and required to immediately follow IPv6 header) 8179 */ 8180 icmp_param_problem_v6(WR(q), first_mp, 8181 ICMP6_PARAMPROB_NEXTHEADER, 8182 prev_nexthdr_offset, 8183 B_FALSE, B_FALSE, zoneid, ipst); 8184 return; 8185 } 8186 case IPPROTO_ROUTING: { 8187 uint_t ehdrlen; 8188 ip6_rthdr_t *rthdr; 8189 8190 /* Check if AH is present. */ 8191 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8192 hada_mp, zoneid)) { 8193 ip0dbg(("routing hada drop\n")); 8194 return; 8195 } 8196 8197 /* 8198 * Reinitialize pointers, as ipsec_early_ah_v6() does 8199 * complete pullups. We don't have to do more pullups 8200 * as a result. 8201 */ 8202 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8203 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8204 ip6h = (ip6_t *)mp->b_rptr; 8205 8206 if (remlen < MIN_EHDR_LEN) 8207 goto pkt_too_short; 8208 rthdr = (ip6_rthdr_t *)whereptr; 8209 nexthdr = rthdr->ip6r_nxt; 8210 prev_nexthdr_offset = (uint_t)(whereptr - 8211 (uint8_t *)ip6h); 8212 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8213 if (remlen < ehdrlen) 8214 goto pkt_too_short; 8215 if (rthdr->ip6r_segleft != 0) { 8216 /* Not end of source route */ 8217 if (ll_multicast) { 8218 BUMP_MIB(ill->ill_ip_mib, 8219 ipIfStatsForwProhibits); 8220 freemsg(hada_mp); 8221 freemsg(mp); 8222 return; 8223 } 8224 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8225 flags, hada_mp, dl_mp); 8226 return; 8227 } 8228 used = ehdrlen; 8229 break; 8230 } 8231 case IPPROTO_AH: 8232 case IPPROTO_ESP: { 8233 /* 8234 * Fast path for AH/ESP. If this is the first time 8235 * we are sending a datagram to AH/ESP, allocate 8236 * a IPSEC_IN message and prepend it. Otherwise, 8237 * just fanout. 8238 */ 8239 8240 ipsec_in_t *ii; 8241 int ipsec_rc; 8242 ipsec_stack_t *ipss; 8243 8244 ipss = ipst->ips_netstack->netstack_ipsec; 8245 if (!mctl_present) { 8246 ASSERT(first_mp == mp); 8247 first_mp = ipsec_in_alloc(B_FALSE, 8248 ipst->ips_netstack); 8249 if (first_mp == NULL) { 8250 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8251 "allocation failure.\n")); 8252 BUMP_MIB(ill->ill_ip_mib, 8253 ipIfStatsInDiscards); 8254 freemsg(mp); 8255 return; 8256 } 8257 /* 8258 * Store the ill_index so that when we come back 8259 * from IPSEC we ride on the same queue. 8260 */ 8261 ii = (ipsec_in_t *)first_mp->b_rptr; 8262 ii->ipsec_in_ill_index = 8263 ill->ill_phyint->phyint_ifindex; 8264 ii->ipsec_in_rill_index = 8265 ii->ipsec_in_ill_index; 8266 first_mp->b_cont = mp; 8267 /* 8268 * Cache hardware acceleration info. 8269 */ 8270 if (hada_mp != NULL) { 8271 IPSECHW_DEBUG(IPSECHW_PKT, 8272 ("ip_rput_data_v6: " 8273 "caching data attr.\n")); 8274 ii->ipsec_in_accelerated = B_TRUE; 8275 ii->ipsec_in_da = hada_mp; 8276 hada_mp = NULL; 8277 } 8278 } else { 8279 ii = (ipsec_in_t *)first_mp->b_rptr; 8280 } 8281 8282 if (!ipsec_loaded(ipss)) { 8283 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8284 zoneid, ipst); 8285 return; 8286 } 8287 8288 /* select inbound SA and have IPsec process the pkt */ 8289 if (nexthdr == IPPROTO_ESP) { 8290 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8291 ipst->ips_netstack); 8292 if (esph == NULL) 8293 return; 8294 ASSERT(ii->ipsec_in_esp_sa != NULL); 8295 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8296 NULL); 8297 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8298 first_mp, esph); 8299 } else { 8300 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8301 ipst->ips_netstack); 8302 if (ah == NULL) 8303 return; 8304 ASSERT(ii->ipsec_in_ah_sa != NULL); 8305 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8306 NULL); 8307 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8308 first_mp, ah); 8309 } 8310 8311 switch (ipsec_rc) { 8312 case IPSEC_STATUS_SUCCESS: 8313 break; 8314 case IPSEC_STATUS_FAILED: 8315 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8316 /* FALLTHRU */ 8317 case IPSEC_STATUS_PENDING: 8318 return; 8319 } 8320 /* we're done with IPsec processing, send it up */ 8321 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8322 return; 8323 } 8324 case IPPROTO_NONE: 8325 /* All processing is done. Count as "delivered". */ 8326 freemsg(hada_mp); 8327 freemsg(first_mp); 8328 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8329 return; 8330 } 8331 whereptr += used; 8332 ASSERT(remlen >= used); 8333 remlen -= used; 8334 } 8335 /* NOTREACHED */ 8336 8337 pkt_too_short: 8338 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8339 ip6_len, pkt_len, remlen)); 8340 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8341 freemsg(hada_mp); 8342 freemsg(first_mp); 8343 return; 8344 udp_fanout: 8345 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8346 connp = NULL; 8347 } else { 8348 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8349 ipst); 8350 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8351 CONN_DEC_REF(connp); 8352 connp = NULL; 8353 } 8354 } 8355 8356 if (connp == NULL) { 8357 uint32_t ports; 8358 8359 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8360 UDP_PORTS_OFFSET); 8361 IP6_STAT(ipst, ip6_udp_slow_path); 8362 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8363 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8364 zoneid); 8365 return; 8366 } 8367 8368 if (CONN_UDP_FLOWCTLD(connp)) { 8369 freemsg(first_mp); 8370 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8371 CONN_DEC_REF(connp); 8372 return; 8373 } 8374 8375 /* Initiate IPPF processing */ 8376 if (IP6_IN_IPP(flags, ipst)) { 8377 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8378 if (mp == NULL) { 8379 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8380 CONN_DEC_REF(connp); 8381 return; 8382 } 8383 } 8384 8385 if (connp->conn_ip_recvpktinfo || 8386 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8387 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8388 if (mp == NULL) { 8389 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8390 CONN_DEC_REF(connp); 8391 return; 8392 } 8393 } 8394 8395 IP6_STAT(ipst, ip6_udp_fast_path); 8396 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8397 8398 /* Send it upstream */ 8399 (connp->conn_recv)(connp, mp, NULL); 8400 8401 CONN_DEC_REF(connp); 8402 freemsg(hada_mp); 8403 return; 8404 8405 hada_drop: 8406 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8407 /* IPsec kstats: bump counter here */ 8408 freemsg(hada_mp); 8409 freemsg(first_mp); 8410 } 8411 8412 /* 8413 * Reassemble fragment. 8414 * When it returns a completed message the first mblk will only contain 8415 * the headers prior to the fragment header. 8416 * 8417 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8418 * of the preceding header. This is needed to patch the previous header's 8419 * nexthdr field when reassembly completes. 8420 */ 8421 static mblk_t * 8422 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8423 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8424 uint32_t *cksum_val, uint16_t *cksum_flags) 8425 { 8426 ill_t *ill = (ill_t *)q->q_ptr; 8427 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8428 uint16_t offset; 8429 boolean_t more_frags; 8430 uint8_t nexthdr = fraghdr->ip6f_nxt; 8431 in6_addr_t *v6dst_ptr; 8432 in6_addr_t *v6src_ptr; 8433 uint_t end; 8434 uint_t hdr_length; 8435 size_t count; 8436 ipf_t *ipf; 8437 ipf_t **ipfp; 8438 ipfb_t *ipfb; 8439 mblk_t *mp1; 8440 uint8_t ecn_info = 0; 8441 size_t msg_len; 8442 mblk_t *tail_mp; 8443 mblk_t *t_mp; 8444 boolean_t pruned = B_FALSE; 8445 uint32_t sum_val; 8446 uint16_t sum_flags; 8447 ip_stack_t *ipst = ill->ill_ipst; 8448 8449 if (cksum_val != NULL) 8450 *cksum_val = 0; 8451 if (cksum_flags != NULL) 8452 *cksum_flags = 0; 8453 8454 /* 8455 * We utilize hardware computed checksum info only for UDP since 8456 * IP fragmentation is a normal occurence for the protocol. In 8457 * addition, checksum offload support for IP fragments carrying 8458 * UDP payload is commonly implemented across network adapters. 8459 */ 8460 ASSERT(ill != NULL); 8461 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8462 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8463 mblk_t *mp1 = mp->b_cont; 8464 int32_t len; 8465 8466 /* Record checksum information from the packet */ 8467 sum_val = (uint32_t)DB_CKSUM16(mp); 8468 sum_flags = DB_CKSUMFLAGS(mp); 8469 8470 /* fragmented payload offset from beginning of mblk */ 8471 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8472 8473 if ((sum_flags & HCK_PARTIALCKSUM) && 8474 (mp1 == NULL || mp1->b_cont == NULL) && 8475 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8476 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8477 uint32_t adj; 8478 /* 8479 * Partial checksum has been calculated by hardware 8480 * and attached to the packet; in addition, any 8481 * prepended extraneous data is even byte aligned. 8482 * If any such data exists, we adjust the checksum; 8483 * this would also handle any postpended data. 8484 */ 8485 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8486 mp, mp1, len, adj); 8487 8488 /* One's complement subtract extraneous checksum */ 8489 if (adj >= sum_val) 8490 sum_val = ~(adj - sum_val) & 0xFFFF; 8491 else 8492 sum_val -= adj; 8493 } 8494 } else { 8495 sum_val = 0; 8496 sum_flags = 0; 8497 } 8498 8499 /* Clear hardware checksumming flag */ 8500 DB_CKSUMFLAGS(mp) = 0; 8501 8502 /* 8503 * Note: Fragment offset in header is in 8-octet units. 8504 * Clearing least significant 3 bits not only extracts 8505 * it but also gets it in units of octets. 8506 */ 8507 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8508 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8509 8510 /* 8511 * Is the more frags flag on and the payload length not a multiple 8512 * of eight? 8513 */ 8514 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8515 zoneid_t zoneid; 8516 8517 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8518 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8519 if (zoneid == ALL_ZONES) { 8520 freemsg(mp); 8521 return (NULL); 8522 } 8523 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8524 (uint32_t)((char *)&ip6h->ip6_plen - 8525 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8526 return (NULL); 8527 } 8528 8529 v6src_ptr = &ip6h->ip6_src; 8530 v6dst_ptr = &ip6h->ip6_dst; 8531 end = remlen; 8532 8533 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8534 end += offset; 8535 8536 /* 8537 * Would fragment cause reassembled packet to have a payload length 8538 * greater than IP_MAXPACKET - the max payload size? 8539 */ 8540 if (end > IP_MAXPACKET) { 8541 zoneid_t zoneid; 8542 8543 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8544 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8545 if (zoneid == ALL_ZONES) { 8546 freemsg(mp); 8547 return (NULL); 8548 } 8549 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8550 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8551 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8552 return (NULL); 8553 } 8554 8555 /* 8556 * This packet just has one fragment. Reassembly not 8557 * needed. 8558 */ 8559 if (!more_frags && offset == 0) { 8560 goto reass_done; 8561 } 8562 8563 /* 8564 * Drop the fragmented as early as possible, if 8565 * we don't have resource(s) to re-assemble. 8566 */ 8567 if (ipst->ips_ip_reass_queue_bytes == 0) { 8568 freemsg(mp); 8569 return (NULL); 8570 } 8571 8572 /* Record the ECN field info. */ 8573 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8574 /* 8575 * If this is not the first fragment, dump the unfragmentable 8576 * portion of the packet. 8577 */ 8578 if (offset) 8579 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8580 8581 /* 8582 * Fragmentation reassembly. Each ILL has a hash table for 8583 * queueing packets undergoing reassembly for all IPIFs 8584 * associated with the ILL. The hash is based on the packet 8585 * IP ident field. The ILL frag hash table was allocated 8586 * as a timer block at the time the ILL was created. Whenever 8587 * there is anything on the reassembly queue, the timer will 8588 * be running. 8589 */ 8590 msg_len = MBLKSIZE(mp); 8591 tail_mp = mp; 8592 while (tail_mp->b_cont != NULL) { 8593 tail_mp = tail_mp->b_cont; 8594 msg_len += MBLKSIZE(tail_mp); 8595 } 8596 /* 8597 * If the reassembly list for this ILL will get too big 8598 * prune it. 8599 */ 8600 8601 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8602 ipst->ips_ip_reass_queue_bytes) { 8603 ill_frag_prune(ill, 8604 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8605 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8606 pruned = B_TRUE; 8607 } 8608 8609 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8610 mutex_enter(&ipfb->ipfb_lock); 8611 8612 ipfp = &ipfb->ipfb_ipf; 8613 /* Try to find an existing fragment queue for this packet. */ 8614 for (;;) { 8615 ipf = ipfp[0]; 8616 if (ipf) { 8617 /* 8618 * It has to match on ident, source address, and 8619 * dest address. 8620 */ 8621 if (ipf->ipf_ident == ident && 8622 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8623 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8624 8625 /* 8626 * If we have received too many 8627 * duplicate fragments for this packet 8628 * free it. 8629 */ 8630 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8631 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8632 freemsg(mp); 8633 mutex_exit(&ipfb->ipfb_lock); 8634 return (NULL); 8635 } 8636 8637 break; 8638 } 8639 ipfp = &ipf->ipf_hash_next; 8640 continue; 8641 } 8642 8643 8644 /* 8645 * If we pruned the list, do we want to store this new 8646 * fragment?. We apply an optimization here based on the 8647 * fact that most fragments will be received in order. 8648 * So if the offset of this incoming fragment is zero, 8649 * it is the first fragment of a new packet. We will 8650 * keep it. Otherwise drop the fragment, as we have 8651 * probably pruned the packet already (since the 8652 * packet cannot be found). 8653 */ 8654 8655 if (pruned && offset != 0) { 8656 mutex_exit(&ipfb->ipfb_lock); 8657 freemsg(mp); 8658 return (NULL); 8659 } 8660 8661 /* New guy. Allocate a frag message. */ 8662 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8663 if (!mp1) { 8664 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8665 freemsg(mp); 8666 partial_reass_done: 8667 mutex_exit(&ipfb->ipfb_lock); 8668 return (NULL); 8669 } 8670 8671 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8672 /* 8673 * Too many fragmented packets in this hash bucket. 8674 * Free the oldest. 8675 */ 8676 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8677 } 8678 8679 mp1->b_cont = mp; 8680 8681 /* Initialize the fragment header. */ 8682 ipf = (ipf_t *)mp1->b_rptr; 8683 ipf->ipf_mp = mp1; 8684 ipf->ipf_ptphn = ipfp; 8685 ipfp[0] = ipf; 8686 ipf->ipf_hash_next = NULL; 8687 ipf->ipf_ident = ident; 8688 ipf->ipf_v6src = *v6src_ptr; 8689 ipf->ipf_v6dst = *v6dst_ptr; 8690 /* Record reassembly start time. */ 8691 ipf->ipf_timestamp = gethrestime_sec(); 8692 /* Record ipf generation and account for frag header */ 8693 ipf->ipf_gen = ill->ill_ipf_gen++; 8694 ipf->ipf_count = MBLKSIZE(mp1); 8695 ipf->ipf_protocol = nexthdr; 8696 ipf->ipf_nf_hdr_len = 0; 8697 ipf->ipf_prev_nexthdr_offset = 0; 8698 ipf->ipf_last_frag_seen = B_FALSE; 8699 ipf->ipf_ecn = ecn_info; 8700 ipf->ipf_num_dups = 0; 8701 ipfb->ipfb_frag_pkts++; 8702 ipf->ipf_checksum = 0; 8703 ipf->ipf_checksum_flags = 0; 8704 8705 /* Store checksum value in fragment header */ 8706 if (sum_flags != 0) { 8707 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8708 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8709 ipf->ipf_checksum = sum_val; 8710 ipf->ipf_checksum_flags = sum_flags; 8711 } 8712 8713 /* 8714 * We handle reassembly two ways. In the easy case, 8715 * where all the fragments show up in order, we do 8716 * minimal bookkeeping, and just clip new pieces on 8717 * the end. If we ever see a hole, then we go off 8718 * to ip_reassemble which has to mark the pieces and 8719 * keep track of the number of holes, etc. Obviously, 8720 * the point of having both mechanisms is so we can 8721 * handle the easy case as efficiently as possible. 8722 */ 8723 if (offset == 0) { 8724 /* Easy case, in-order reassembly so far. */ 8725 /* Update the byte count */ 8726 ipf->ipf_count += msg_len; 8727 ipf->ipf_tail_mp = tail_mp; 8728 /* 8729 * Keep track of next expected offset in 8730 * ipf_end. 8731 */ 8732 ipf->ipf_end = end; 8733 ipf->ipf_nf_hdr_len = hdr_length; 8734 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8735 } else { 8736 /* Hard case, hole at the beginning. */ 8737 ipf->ipf_tail_mp = NULL; 8738 /* 8739 * ipf_end == 0 means that we have given up 8740 * on easy reassembly. 8741 */ 8742 ipf->ipf_end = 0; 8743 8744 /* Forget checksum offload from now on */ 8745 ipf->ipf_checksum_flags = 0; 8746 8747 /* 8748 * ipf_hole_cnt is set by ip_reassemble. 8749 * ipf_count is updated by ip_reassemble. 8750 * No need to check for return value here 8751 * as we don't expect reassembly to complete or 8752 * fail for the first fragment itself. 8753 */ 8754 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8755 msg_len); 8756 } 8757 /* Update per ipfb and ill byte counts */ 8758 ipfb->ipfb_count += ipf->ipf_count; 8759 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8760 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8761 /* If the frag timer wasn't already going, start it. */ 8762 mutex_enter(&ill->ill_lock); 8763 ill_frag_timer_start(ill); 8764 mutex_exit(&ill->ill_lock); 8765 goto partial_reass_done; 8766 } 8767 8768 /* 8769 * If the packet's flag has changed (it could be coming up 8770 * from an interface different than the previous, therefore 8771 * possibly different checksum capability), then forget about 8772 * any stored checksum states. Otherwise add the value to 8773 * the existing one stored in the fragment header. 8774 */ 8775 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8776 sum_val += ipf->ipf_checksum; 8777 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8778 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8779 ipf->ipf_checksum = sum_val; 8780 } else if (ipf->ipf_checksum_flags != 0) { 8781 /* Forget checksum offload from now on */ 8782 ipf->ipf_checksum_flags = 0; 8783 } 8784 8785 /* 8786 * We have a new piece of a datagram which is already being 8787 * reassembled. Update the ECN info if all IP fragments 8788 * are ECN capable. If there is one which is not, clear 8789 * all the info. If there is at least one which has CE 8790 * code point, IP needs to report that up to transport. 8791 */ 8792 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8793 if (ecn_info == IPH_ECN_CE) 8794 ipf->ipf_ecn = IPH_ECN_CE; 8795 } else { 8796 ipf->ipf_ecn = IPH_ECN_NECT; 8797 } 8798 8799 if (offset && ipf->ipf_end == offset) { 8800 /* The new fragment fits at the end */ 8801 ipf->ipf_tail_mp->b_cont = mp; 8802 /* Update the byte count */ 8803 ipf->ipf_count += msg_len; 8804 /* Update per ipfb and ill byte counts */ 8805 ipfb->ipfb_count += msg_len; 8806 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8807 atomic_add_32(&ill->ill_frag_count, msg_len); 8808 if (more_frags) { 8809 /* More to come. */ 8810 ipf->ipf_end = end; 8811 ipf->ipf_tail_mp = tail_mp; 8812 goto partial_reass_done; 8813 } 8814 } else { 8815 /* 8816 * Go do the hard cases. 8817 * Call ip_reassemble(). 8818 */ 8819 int ret; 8820 8821 if (offset == 0) { 8822 if (ipf->ipf_prev_nexthdr_offset == 0) { 8823 ipf->ipf_nf_hdr_len = hdr_length; 8824 ipf->ipf_prev_nexthdr_offset = 8825 *prev_nexthdr_offset; 8826 } 8827 } 8828 /* Save current byte count */ 8829 count = ipf->ipf_count; 8830 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8831 8832 /* Count of bytes added and subtracted (freeb()ed) */ 8833 count = ipf->ipf_count - count; 8834 if (count) { 8835 /* Update per ipfb and ill byte counts */ 8836 ipfb->ipfb_count += count; 8837 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8838 atomic_add_32(&ill->ill_frag_count, count); 8839 } 8840 if (ret == IP_REASS_PARTIAL) { 8841 goto partial_reass_done; 8842 } else if (ret == IP_REASS_FAILED) { 8843 /* Reassembly failed. Free up all resources */ 8844 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8845 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8846 IP_REASS_SET_START(t_mp, 0); 8847 IP_REASS_SET_END(t_mp, 0); 8848 } 8849 freemsg(mp); 8850 goto partial_reass_done; 8851 } 8852 8853 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8854 } 8855 /* 8856 * We have completed reassembly. Unhook the frag header from 8857 * the reassembly list. 8858 * 8859 * Grab the unfragmentable header length next header value out 8860 * of the first fragment 8861 */ 8862 ASSERT(ipf->ipf_nf_hdr_len != 0); 8863 hdr_length = ipf->ipf_nf_hdr_len; 8864 8865 /* 8866 * Before we free the frag header, record the ECN info 8867 * to report back to the transport. 8868 */ 8869 ecn_info = ipf->ipf_ecn; 8870 8871 /* 8872 * Store the nextheader field in the header preceding the fragment 8873 * header 8874 */ 8875 nexthdr = ipf->ipf_protocol; 8876 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8877 ipfp = ipf->ipf_ptphn; 8878 8879 /* We need to supply these to caller */ 8880 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8881 sum_val = ipf->ipf_checksum; 8882 else 8883 sum_val = 0; 8884 8885 mp1 = ipf->ipf_mp; 8886 count = ipf->ipf_count; 8887 ipf = ipf->ipf_hash_next; 8888 if (ipf) 8889 ipf->ipf_ptphn = ipfp; 8890 ipfp[0] = ipf; 8891 atomic_add_32(&ill->ill_frag_count, -count); 8892 ASSERT(ipfb->ipfb_count >= count); 8893 ipfb->ipfb_count -= count; 8894 ipfb->ipfb_frag_pkts--; 8895 mutex_exit(&ipfb->ipfb_lock); 8896 /* Ditch the frag header. */ 8897 mp = mp1->b_cont; 8898 freeb(mp1); 8899 8900 /* 8901 * Make sure the packet is good by doing some sanity 8902 * check. If bad we can silentely drop the packet. 8903 */ 8904 reass_done: 8905 if (hdr_length < sizeof (ip6_frag_t)) { 8906 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8907 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8908 freemsg(mp); 8909 return (NULL); 8910 } 8911 8912 /* 8913 * Remove the fragment header from the initial header by 8914 * splitting the mblk into the non-fragmentable header and 8915 * everthing after the fragment extension header. This has the 8916 * side effect of putting all the headers that need destination 8917 * processing into the b_cont block-- on return this fact is 8918 * used in order to avoid having to look at the extensions 8919 * already processed. 8920 * 8921 * Note that this code assumes that the unfragmentable portion 8922 * of the header is in the first mblk and increments 8923 * the read pointer past it. If this assumption is broken 8924 * this code fails badly. 8925 */ 8926 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8927 mblk_t *nmp; 8928 8929 if (!(nmp = dupb(mp))) { 8930 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8931 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8932 freemsg(mp); 8933 return (NULL); 8934 } 8935 nmp->b_cont = mp->b_cont; 8936 mp->b_cont = nmp; 8937 nmp->b_rptr += hdr_length; 8938 } 8939 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8940 8941 ip6h = (ip6_t *)mp->b_rptr; 8942 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8943 8944 /* Restore original IP length in header. */ 8945 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8946 /* Record the ECN info. */ 8947 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8948 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8949 8950 /* Reassembly is successful; return checksum information if needed */ 8951 if (cksum_val != NULL) 8952 *cksum_val = sum_val; 8953 if (cksum_flags != NULL) 8954 *cksum_flags = sum_flags; 8955 8956 return (mp); 8957 } 8958 8959 /* 8960 * Walk through the options to see if there is a routing header. 8961 * If present get the destination which is the last address of 8962 * the option. 8963 */ 8964 in6_addr_t 8965 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8966 { 8967 uint8_t nexthdr; 8968 uint8_t *whereptr; 8969 ip6_hbh_t *hbhhdr; 8970 ip6_dest_t *dsthdr; 8971 ip6_rthdr0_t *rthdr; 8972 ip6_frag_t *fraghdr; 8973 int ehdrlen; 8974 int left; 8975 in6_addr_t *ap, rv; 8976 8977 if (is_fragment != NULL) 8978 *is_fragment = B_FALSE; 8979 8980 rv = ip6h->ip6_dst; 8981 8982 nexthdr = ip6h->ip6_nxt; 8983 whereptr = (uint8_t *)&ip6h[1]; 8984 for (;;) { 8985 8986 ASSERT(nexthdr != IPPROTO_RAW); 8987 switch (nexthdr) { 8988 case IPPROTO_HOPOPTS: 8989 hbhhdr = (ip6_hbh_t *)whereptr; 8990 nexthdr = hbhhdr->ip6h_nxt; 8991 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8992 break; 8993 case IPPROTO_DSTOPTS: 8994 dsthdr = (ip6_dest_t *)whereptr; 8995 nexthdr = dsthdr->ip6d_nxt; 8996 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8997 break; 8998 case IPPROTO_ROUTING: 8999 rthdr = (ip6_rthdr0_t *)whereptr; 9000 nexthdr = rthdr->ip6r0_nxt; 9001 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9002 9003 left = rthdr->ip6r0_segleft; 9004 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9005 rv = *(ap + left - 1); 9006 /* 9007 * If the caller doesn't care whether the packet 9008 * is a fragment or not, we can stop here since 9009 * we have our destination. 9010 */ 9011 if (is_fragment == NULL) 9012 goto done; 9013 break; 9014 case IPPROTO_FRAGMENT: 9015 fraghdr = (ip6_frag_t *)whereptr; 9016 nexthdr = fraghdr->ip6f_nxt; 9017 ehdrlen = sizeof (ip6_frag_t); 9018 if (is_fragment != NULL) 9019 *is_fragment = B_TRUE; 9020 goto done; 9021 default : 9022 goto done; 9023 } 9024 whereptr += ehdrlen; 9025 } 9026 9027 done: 9028 return (rv); 9029 } 9030 9031 /* 9032 * ip_source_routed_v6: 9033 * This function is called by redirect code in ip_rput_data_v6 to 9034 * know whether this packet is source routed through this node i.e 9035 * whether this node (router) is part of the journey. This 9036 * function is called under two cases : 9037 * 9038 * case 1 : Routing header was processed by this node and 9039 * ip_process_rthdr replaced ip6_dst with the next hop 9040 * and we are forwarding the packet to the next hop. 9041 * 9042 * case 2 : Routing header was not processed by this node and we 9043 * are just forwarding the packet. 9044 * 9045 * For case (1) we don't want to send redirects. For case(2) we 9046 * want to send redirects. 9047 */ 9048 static boolean_t 9049 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 9050 { 9051 uint8_t nexthdr; 9052 in6_addr_t *addrptr; 9053 ip6_rthdr0_t *rthdr; 9054 uint8_t numaddr; 9055 ip6_hbh_t *hbhhdr; 9056 uint_t ehdrlen; 9057 uint8_t *byteptr; 9058 9059 ip2dbg(("ip_source_routed_v6\n")); 9060 nexthdr = ip6h->ip6_nxt; 9061 ehdrlen = IPV6_HDR_LEN; 9062 9063 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9064 while (nexthdr == IPPROTO_HOPOPTS || 9065 nexthdr == IPPROTO_DSTOPTS) { 9066 byteptr = (uint8_t *)ip6h + ehdrlen; 9067 /* 9068 * Check if we have already processed 9069 * packets or we are just a forwarding 9070 * router which only pulled up msgs up 9071 * to IPV6HDR and one HBH ext header 9072 */ 9073 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9074 ip2dbg(("ip_source_routed_v6: Extension" 9075 " headers not processed\n")); 9076 return (B_FALSE); 9077 } 9078 hbhhdr = (ip6_hbh_t *)byteptr; 9079 nexthdr = hbhhdr->ip6h_nxt; 9080 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9081 } 9082 switch (nexthdr) { 9083 case IPPROTO_ROUTING: 9084 byteptr = (uint8_t *)ip6h + ehdrlen; 9085 /* 9086 * If for some reason, we haven't pulled up 9087 * the routing hdr data mblk, then we must 9088 * not have processed it at all. So for sure 9089 * we are not part of the source routed journey. 9090 */ 9091 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9092 ip2dbg(("ip_source_routed_v6: Routing" 9093 " header not processed\n")); 9094 return (B_FALSE); 9095 } 9096 rthdr = (ip6_rthdr0_t *)byteptr; 9097 /* 9098 * Either we are an intermediate router or the 9099 * last hop before destination and we have 9100 * already processed the routing header. 9101 * If segment_left is greater than or equal to zero, 9102 * then we must be the (numaddr - segleft) entry 9103 * of the routing header. Although ip6r0_segleft 9104 * is a unit8_t variable, we still check for zero 9105 * or greater value, if in case the data type 9106 * is changed someday in future. 9107 */ 9108 if (rthdr->ip6r0_segleft > 0 || 9109 rthdr->ip6r0_segleft == 0) { 9110 ire_t *ire = NULL; 9111 9112 numaddr = rthdr->ip6r0_len / 2; 9113 addrptr = (in6_addr_t *)((char *)rthdr + 9114 sizeof (*rthdr)); 9115 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9116 if (addrptr != NULL) { 9117 ire = ire_ctable_lookup_v6(addrptr, NULL, 9118 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9119 MATCH_IRE_TYPE, 9120 ipst); 9121 if (ire != NULL) { 9122 ire_refrele(ire); 9123 return (B_TRUE); 9124 } 9125 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9126 } 9127 } 9128 /* FALLTHRU */ 9129 default: 9130 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9131 return (B_FALSE); 9132 } 9133 } 9134 9135 /* 9136 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9137 * Assumes that the following set of headers appear in the first 9138 * mblk: 9139 * ip6i_t (if present) CAN also appear as a separate mblk. 9140 * ip6_t 9141 * Any extension headers 9142 * TCP/UDP/SCTP header (if present) 9143 * The routine can handle an ICMPv6 header that is not in the first mblk. 9144 * 9145 * The order to determine the outgoing interface is as follows: 9146 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9147 * 2. If conn_nofailover_ill is set then use that ill. 9148 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9149 * 4. If q is an ill queue and (link local or multicast destination) then 9150 * use that ill. 9151 * 5. If IPV6_BOUND_IF has been set use that ill. 9152 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9153 * look for the best IRE match for the unspecified group to determine 9154 * the ill. 9155 * 7. For unicast: Just do an IRE lookup for the best match. 9156 * 9157 * arg2 is always a queue_t *. 9158 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9159 * the zoneid. 9160 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9161 */ 9162 void 9163 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9164 { 9165 conn_t *connp = NULL; 9166 queue_t *q = (queue_t *)arg2; 9167 ire_t *ire = NULL; 9168 ire_t *sctp_ire = NULL; 9169 ip6_t *ip6h; 9170 in6_addr_t *v6dstp; 9171 ill_t *ill = NULL; 9172 ipif_t *ipif; 9173 ip6i_t *ip6i; 9174 int cksum_request; /* -1 => normal. */ 9175 /* 1 => Skip TCP/UDP/SCTP checksum */ 9176 /* Otherwise contains insert offset for checksum */ 9177 int unspec_src; 9178 boolean_t do_outrequests; /* Increment OutRequests? */ 9179 mib2_ipIfStatsEntry_t *mibptr; 9180 int match_flags = MATCH_IRE_ILL_GROUP; 9181 boolean_t attach_if = B_FALSE; 9182 mblk_t *first_mp; 9183 boolean_t mctl_present; 9184 ipsec_out_t *io; 9185 boolean_t drop_if_delayed = B_FALSE; 9186 boolean_t multirt_need_resolve = B_FALSE; 9187 mblk_t *copy_mp = NULL; 9188 int err = 0; 9189 int ip6i_flags = 0; 9190 zoneid_t zoneid; 9191 ill_t *saved_ill = NULL; 9192 boolean_t conn_lock_held; 9193 boolean_t need_decref = B_FALSE; 9194 ip_stack_t *ipst; 9195 9196 if (q->q_next != NULL) { 9197 ill = (ill_t *)q->q_ptr; 9198 ipst = ill->ill_ipst; 9199 } else { 9200 connp = (conn_t *)arg; 9201 ASSERT(connp != NULL); 9202 ipst = connp->conn_netstack->netstack_ip; 9203 } 9204 9205 /* 9206 * Highest bit in version field is Reachability Confirmation bit 9207 * used by NUD in ip_xmit_v6(). 9208 */ 9209 #ifdef _BIG_ENDIAN 9210 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9211 #else 9212 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9213 #endif 9214 9215 /* 9216 * M_CTL comes from 6 places 9217 * 9218 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9219 * both V4 and V6 datagrams. 9220 * 9221 * 2) AH/ESP sends down M_CTL after doing their job with both 9222 * V4 and V6 datagrams. 9223 * 9224 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9225 * attached. 9226 * 9227 * 4) Notifications from an external resolver (for XRESOLV ifs) 9228 * 9229 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9230 * IPsec hardware acceleration support. 9231 * 9232 * 6) TUN_HELLO. 9233 * 9234 * We need to handle (1)'s IPv6 case and (3) here. For the 9235 * IPv4 case in (1), and (2), IPSEC processing has already 9236 * started. The code in ip_wput() already knows how to handle 9237 * continuing IPSEC processing (for IPv4 and IPv6). All other 9238 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9239 * for handling. 9240 */ 9241 first_mp = mp; 9242 mctl_present = B_FALSE; 9243 io = NULL; 9244 9245 /* Multidata transmit? */ 9246 if (DB_TYPE(mp) == M_MULTIDATA) { 9247 /* 9248 * We should never get here, since all Multidata messages 9249 * originating from tcp should have been directed over to 9250 * tcp_multisend() in the first place. 9251 */ 9252 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9253 freemsg(mp); 9254 return; 9255 } else if (DB_TYPE(mp) == M_CTL) { 9256 uint32_t mctltype = 0; 9257 uint32_t mlen = MBLKL(first_mp); 9258 9259 mp = mp->b_cont; 9260 mctl_present = B_TRUE; 9261 io = (ipsec_out_t *)first_mp->b_rptr; 9262 9263 /* 9264 * Validate this M_CTL message. The only three types of 9265 * M_CTL messages we expect to see in this code path are 9266 * ipsec_out_t or ipsec_in_t structures (allocated as 9267 * ipsec_info_t unions), or ipsec_ctl_t structures. 9268 * The ipsec_out_type and ipsec_in_type overlap in the two 9269 * data structures, and they are either set to IPSEC_OUT 9270 * or IPSEC_IN depending on which data structure it is. 9271 * ipsec_ctl_t is an IPSEC_CTL. 9272 * 9273 * All other M_CTL messages are sent to ip_wput_nondata() 9274 * for handling. 9275 */ 9276 if (mlen >= sizeof (io->ipsec_out_type)) 9277 mctltype = io->ipsec_out_type; 9278 9279 if ((mlen == sizeof (ipsec_ctl_t)) && 9280 (mctltype == IPSEC_CTL)) { 9281 ip_output(arg, first_mp, arg2, caller); 9282 return; 9283 } 9284 9285 if ((mlen < sizeof (ipsec_info_t)) || 9286 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9287 mp == NULL) { 9288 ip_wput_nondata(NULL, q, first_mp, NULL); 9289 return; 9290 } 9291 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9292 if (q->q_next == NULL) { 9293 ip6h = (ip6_t *)mp->b_rptr; 9294 /* 9295 * For a freshly-generated TCP dgram that needs IPV6 9296 * processing, don't call ip_wput immediately. We can 9297 * tell this by the ipsec_out_proc_begin. In-progress 9298 * IPSEC_OUT messages have proc_begin set to TRUE, 9299 * and we want to send all IPSEC_IN messages to 9300 * ip_wput() for IPsec processing or finishing. 9301 */ 9302 if (mctltype == IPSEC_IN || 9303 IPVER(ip6h) != IPV6_VERSION || 9304 io->ipsec_out_proc_begin) { 9305 mibptr = &ipst->ips_ip6_mib; 9306 goto notv6; 9307 } 9308 } 9309 } else if (DB_TYPE(mp) != M_DATA) { 9310 ip_wput_nondata(NULL, q, mp, NULL); 9311 return; 9312 } 9313 9314 ip6h = (ip6_t *)mp->b_rptr; 9315 9316 if (IPVER(ip6h) != IPV6_VERSION) { 9317 mibptr = &ipst->ips_ip6_mib; 9318 goto notv6; 9319 } 9320 9321 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9322 (connp == NULL || !connp->conn_ulp_labeled)) { 9323 if (connp != NULL) { 9324 ASSERT(CONN_CRED(connp) != NULL); 9325 err = tsol_check_label_v6(BEST_CRED(mp, connp), 9326 &mp, connp->conn_mac_exempt, ipst); 9327 } else if (DB_CRED(mp) != NULL) { 9328 err = tsol_check_label_v6(DB_CRED(mp), 9329 &mp, B_FALSE, ipst); 9330 } 9331 if (mctl_present) 9332 first_mp->b_cont = mp; 9333 else 9334 first_mp = mp; 9335 if (err != 0) { 9336 DTRACE_PROBE3( 9337 tsol_ip_log_drop_checklabel_ip6, char *, 9338 "conn(1), failed to check/update mp(2)", 9339 conn_t, connp, mblk_t, mp); 9340 freemsg(first_mp); 9341 return; 9342 } 9343 ip6h = (ip6_t *)mp->b_rptr; 9344 } 9345 if (q->q_next != NULL) { 9346 /* 9347 * We don't know if this ill will be used for IPv6 9348 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9349 * ipif_set_values() sets the ill_isv6 flag to true if 9350 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9351 * just drop the packet. 9352 */ 9353 if (!ill->ill_isv6) { 9354 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9355 "ILLF_IPV6 was set\n")); 9356 freemsg(first_mp); 9357 return; 9358 } 9359 /* For uniformity do a refhold */ 9360 mutex_enter(&ill->ill_lock); 9361 if (!ILL_CAN_LOOKUP(ill)) { 9362 mutex_exit(&ill->ill_lock); 9363 freemsg(first_mp); 9364 return; 9365 } 9366 ill_refhold_locked(ill); 9367 mutex_exit(&ill->ill_lock); 9368 mibptr = ill->ill_ip_mib; 9369 9370 ASSERT(mibptr != NULL); 9371 unspec_src = 0; 9372 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9373 do_outrequests = B_FALSE; 9374 zoneid = (zoneid_t)(uintptr_t)arg; 9375 } else { 9376 ASSERT(connp != NULL); 9377 zoneid = connp->conn_zoneid; 9378 9379 /* is queue flow controlled? */ 9380 if ((q->q_first || connp->conn_draining) && 9381 (caller == IP_WPUT)) { 9382 /* 9383 * 1) TCP sends down M_CTL for detached connections. 9384 * 2) AH/ESP sends down M_CTL. 9385 * 9386 * We don't flow control either of the above. Only 9387 * UDP and others are flow controlled for which we 9388 * can't have a M_CTL. 9389 */ 9390 ASSERT(first_mp == mp); 9391 (void) putq(q, mp); 9392 return; 9393 } 9394 mibptr = &ipst->ips_ip6_mib; 9395 unspec_src = connp->conn_unspec_src; 9396 do_outrequests = B_TRUE; 9397 if (mp->b_flag & MSGHASREF) { 9398 mp->b_flag &= ~MSGHASREF; 9399 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9400 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9401 need_decref = B_TRUE; 9402 } 9403 9404 /* 9405 * If there is a policy, try to attach an ipsec_out in 9406 * the front. At the end, first_mp either points to a 9407 * M_DATA message or IPSEC_OUT message linked to a 9408 * M_DATA message. We have to do it now as we might 9409 * lose the "conn" if we go through ip_newroute. 9410 */ 9411 if (!mctl_present && 9412 (connp->conn_out_enforce_policy || 9413 connp->conn_latch != NULL)) { 9414 ASSERT(first_mp == mp); 9415 /* XXX Any better way to get the protocol fast ? */ 9416 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9417 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9418 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9419 if (need_decref) 9420 CONN_DEC_REF(connp); 9421 return; 9422 } else { 9423 ASSERT(mp->b_datap->db_type == M_CTL); 9424 first_mp = mp; 9425 mp = mp->b_cont; 9426 mctl_present = B_TRUE; 9427 io = (ipsec_out_t *)first_mp->b_rptr; 9428 } 9429 } 9430 } 9431 9432 /* check for alignment and full IPv6 header */ 9433 if (!OK_32PTR((uchar_t *)ip6h) || 9434 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9435 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9436 if (do_outrequests) 9437 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9438 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9439 freemsg(first_mp); 9440 if (ill != NULL) 9441 ill_refrele(ill); 9442 if (need_decref) 9443 CONN_DEC_REF(connp); 9444 return; 9445 } 9446 v6dstp = &ip6h->ip6_dst; 9447 cksum_request = -1; 9448 ip6i = NULL; 9449 9450 /* 9451 * Once neighbor discovery has completed, ndp_process() will provide 9452 * locally generated packets for which processing can be reattempted. 9453 * In these cases, connp is NULL and the original zone is part of a 9454 * prepended ipsec_out_t. 9455 */ 9456 if (io != NULL) { 9457 /* 9458 * When coming from icmp_input_v6, the zoneid might not match 9459 * for the loopback case, because inside icmp_input_v6 the 9460 * queue_t is a conn queue from the sending side. 9461 */ 9462 zoneid = io->ipsec_out_zoneid; 9463 ASSERT(zoneid != ALL_ZONES); 9464 } 9465 9466 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9467 /* 9468 * This is an ip6i_t header followed by an ip6_hdr. 9469 * Check which fields are set. 9470 * 9471 * When the packet comes from a transport we should have 9472 * all needed headers in the first mblk. However, when 9473 * going through ip_newroute*_v6 the ip6i might be in 9474 * a separate mblk when we return here. In that case 9475 * we pullup everything to ensure that extension and transport 9476 * headers "stay" in the first mblk. 9477 */ 9478 ip6i = (ip6i_t *)ip6h; 9479 ip6i_flags = ip6i->ip6i_flags; 9480 9481 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9482 ((mp->b_wptr - (uchar_t *)ip6i) >= 9483 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9484 9485 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9486 if (!pullupmsg(mp, -1)) { 9487 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9488 if (do_outrequests) { 9489 BUMP_MIB(mibptr, 9490 ipIfStatsHCOutRequests); 9491 } 9492 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9493 freemsg(first_mp); 9494 if (ill != NULL) 9495 ill_refrele(ill); 9496 if (need_decref) 9497 CONN_DEC_REF(connp); 9498 return; 9499 } 9500 ip6h = (ip6_t *)mp->b_rptr; 9501 v6dstp = &ip6h->ip6_dst; 9502 ip6i = (ip6i_t *)ip6h; 9503 } 9504 ip6h = (ip6_t *)&ip6i[1]; 9505 9506 /* 9507 * Advance rptr past the ip6i_t to get ready for 9508 * transmitting the packet. However, if the packet gets 9509 * passed to ip_newroute*_v6 then rptr is moved back so 9510 * that the ip6i_t header can be inspected when the 9511 * packet comes back here after passing through 9512 * ire_add_then_send. 9513 */ 9514 mp->b_rptr = (uchar_t *)ip6h; 9515 9516 /* 9517 * IP6I_ATTACH_IF is set in this function when we had a 9518 * conn and it was either bound to the IPFF_NOFAILOVER address 9519 * or IPV6_BOUND_PIF was set. These options override other 9520 * options that set the ifindex. We come here with 9521 * IP6I_ATTACH_IF set when we can't find the ire and 9522 * ip_newroute_v6 is feeding the packet for second time. 9523 */ 9524 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9525 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9526 ASSERT(ip6i->ip6i_ifindex != 0); 9527 if (ill != NULL) 9528 ill_refrele(ill); 9529 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9530 NULL, NULL, NULL, NULL, ipst); 9531 if (ill == NULL) { 9532 if (do_outrequests) { 9533 BUMP_MIB(mibptr, 9534 ipIfStatsHCOutRequests); 9535 } 9536 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9537 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9538 ip6i->ip6i_ifindex)); 9539 if (need_decref) 9540 CONN_DEC_REF(connp); 9541 freemsg(first_mp); 9542 return; 9543 } 9544 mibptr = ill->ill_ip_mib; 9545 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9546 /* 9547 * Preserve the index so that when we return 9548 * from IPSEC processing, we know where to 9549 * send the packet. 9550 */ 9551 if (mctl_present) { 9552 ASSERT(io != NULL); 9553 io->ipsec_out_ill_index = 9554 ip6i->ip6i_ifindex; 9555 } 9556 } 9557 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9558 /* 9559 * This is a multipathing probe packet that has 9560 * been delayed in ND resolution. Drop the 9561 * packet for the reasons mentioned in 9562 * nce_queue_mp() 9563 */ 9564 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9565 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9566 freemsg(first_mp); 9567 ill_refrele(ill); 9568 if (need_decref) 9569 CONN_DEC_REF(connp); 9570 return; 9571 } 9572 } 9573 } 9574 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9575 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9576 9577 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9578 if (secpolicy_net_rawaccess(cr) != 0) { 9579 /* 9580 * Use IPCL_ZONEID to honor SO_ALLZONES. 9581 */ 9582 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9583 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9584 NULL, connp != NULL ? 9585 IPCL_ZONEID(connp) : zoneid, NULL, 9586 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9587 if (ire == NULL) { 9588 if (do_outrequests) 9589 BUMP_MIB(mibptr, 9590 ipIfStatsHCOutRequests); 9591 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9592 ip1dbg(("ip_wput_v6: bad source " 9593 "addr\n")); 9594 freemsg(first_mp); 9595 if (ill != NULL) 9596 ill_refrele(ill); 9597 if (need_decref) 9598 CONN_DEC_REF(connp); 9599 return; 9600 } 9601 ire_refrele(ire); 9602 } 9603 /* No need to verify again when using ip_newroute */ 9604 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9605 } 9606 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9607 /* 9608 * Make sure they match since ip_newroute*_v6 etc might 9609 * (unknown to them) inspect ip6i_nexthop when 9610 * they think they access ip6_dst. 9611 */ 9612 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9613 } 9614 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9615 cksum_request = 1; 9616 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9617 cksum_request = ip6i->ip6i_checksum_off; 9618 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9619 unspec_src = 1; 9620 9621 if (do_outrequests && ill != NULL) { 9622 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9623 do_outrequests = B_FALSE; 9624 } 9625 /* 9626 * Store ip6i_t info that we need after we come back 9627 * from IPSEC processing. 9628 */ 9629 if (mctl_present) { 9630 ASSERT(io != NULL); 9631 io->ipsec_out_unspec_src = unspec_src; 9632 } 9633 } 9634 if (connp != NULL && connp->conn_dontroute) 9635 ip6h->ip6_hops = 1; 9636 9637 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9638 goto ipv6multicast; 9639 9640 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9641 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9642 ill_t *conn_outgoing_pill; 9643 9644 conn_outgoing_pill = conn_get_held_ill(connp, 9645 &connp->conn_outgoing_pill, &err); 9646 if (err == ILL_LOOKUP_FAILED) { 9647 if (ill != NULL) 9648 ill_refrele(ill); 9649 if (need_decref) 9650 CONN_DEC_REF(connp); 9651 freemsg(first_mp); 9652 return; 9653 } 9654 if (conn_outgoing_pill != NULL) { 9655 if (ill != NULL) 9656 ill_refrele(ill); 9657 ill = conn_outgoing_pill; 9658 attach_if = B_TRUE; 9659 match_flags = MATCH_IRE_ILL; 9660 mibptr = ill->ill_ip_mib; 9661 9662 /* 9663 * Check if we need an ire that will not be 9664 * looked up by anybody else i.e. HIDDEN. 9665 */ 9666 if (ill_is_probeonly(ill)) 9667 match_flags |= MATCH_IRE_MARK_HIDDEN; 9668 goto send_from_ill; 9669 } 9670 } 9671 9672 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9673 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9674 ill_t *conn_nofailover_ill; 9675 9676 conn_nofailover_ill = conn_get_held_ill(connp, 9677 &connp->conn_nofailover_ill, &err); 9678 if (err == ILL_LOOKUP_FAILED) { 9679 if (ill != NULL) 9680 ill_refrele(ill); 9681 if (need_decref) 9682 CONN_DEC_REF(connp); 9683 freemsg(first_mp); 9684 return; 9685 } 9686 if (conn_nofailover_ill != NULL) { 9687 if (ill != NULL) 9688 ill_refrele(ill); 9689 ill = conn_nofailover_ill; 9690 attach_if = B_TRUE; 9691 /* 9692 * Assumes that ipc_nofailover_ill is used only for 9693 * multipathing probe packets. These packets are better 9694 * dropped, if they are delayed in ND resolution, for 9695 * the reasons described in nce_queue_mp(). 9696 * IP6I_DROP_IFDELAYED will be set later on in this 9697 * function for this packet. 9698 */ 9699 drop_if_delayed = B_TRUE; 9700 match_flags = MATCH_IRE_ILL; 9701 mibptr = ill->ill_ip_mib; 9702 9703 /* 9704 * Check if we need an ire that will not be 9705 * looked up by anybody else i.e. HIDDEN. 9706 */ 9707 if (ill_is_probeonly(ill)) 9708 match_flags |= MATCH_IRE_MARK_HIDDEN; 9709 goto send_from_ill; 9710 } 9711 } 9712 9713 /* 9714 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9715 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9716 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9717 */ 9718 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9719 ASSERT(ip6i->ip6i_ifindex != 0); 9720 attach_if = B_TRUE; 9721 ASSERT(ill != NULL); 9722 match_flags = MATCH_IRE_ILL; 9723 9724 /* 9725 * Check if we need an ire that will not be 9726 * looked up by anybody else i.e. HIDDEN. 9727 */ 9728 if (ill_is_probeonly(ill)) 9729 match_flags |= MATCH_IRE_MARK_HIDDEN; 9730 goto send_from_ill; 9731 } 9732 9733 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9734 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9735 ASSERT(ill != NULL); 9736 goto send_from_ill; 9737 } 9738 9739 /* 9740 * 4. If q is an ill queue and (link local or multicast destination) 9741 * then use that ill. 9742 */ 9743 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9744 goto send_from_ill; 9745 } 9746 9747 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9748 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9749 ill_t *conn_outgoing_ill; 9750 9751 conn_outgoing_ill = conn_get_held_ill(connp, 9752 &connp->conn_outgoing_ill, &err); 9753 if (err == ILL_LOOKUP_FAILED) { 9754 if (ill != NULL) 9755 ill_refrele(ill); 9756 if (need_decref) 9757 CONN_DEC_REF(connp); 9758 freemsg(first_mp); 9759 return; 9760 } 9761 if (ill != NULL) 9762 ill_refrele(ill); 9763 ill = conn_outgoing_ill; 9764 mibptr = ill->ill_ip_mib; 9765 goto send_from_ill; 9766 } 9767 9768 /* 9769 * 6. For unicast: Just do an IRE lookup for the best match. 9770 * If we get here for a link-local address it is rather random 9771 * what interface we pick on a multihomed host. 9772 * *If* there is an IRE_CACHE (and the link-local address 9773 * isn't duplicated on multi links) this will find the IRE_CACHE. 9774 * Otherwise it will use one of the matching IRE_INTERFACE routes 9775 * for the link-local prefix. Hence, applications 9776 * *should* be encouraged to specify an outgoing interface when sending 9777 * to a link local address. 9778 */ 9779 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9780 !connp->conn_fully_bound)) { 9781 /* 9782 * We cache IRE_CACHEs to avoid lookups. We don't do 9783 * this for the tcp global queue and listen end point 9784 * as it does not really have a real destination to 9785 * talk to. 9786 */ 9787 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp), 9788 ipst); 9789 } else { 9790 /* 9791 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9792 * grab a lock here to check for CONDEMNED as it is okay 9793 * to send a packet or two with the IRE_CACHE that is going 9794 * away. 9795 */ 9796 mutex_enter(&connp->conn_lock); 9797 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9798 if (ire != NULL && 9799 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9800 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9801 9802 IRE_REFHOLD(ire); 9803 mutex_exit(&connp->conn_lock); 9804 9805 } else { 9806 boolean_t cached = B_FALSE; 9807 9808 connp->conn_ire_cache = NULL; 9809 mutex_exit(&connp->conn_lock); 9810 /* Release the old ire */ 9811 if (ire != NULL && sctp_ire == NULL) 9812 IRE_REFRELE_NOTR(ire); 9813 9814 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9815 MBLK_GETLABEL(mp), ipst); 9816 if (ire != NULL) { 9817 IRE_REFHOLD_NOTR(ire); 9818 9819 mutex_enter(&connp->conn_lock); 9820 if (CONN_CACHE_IRE(connp) && 9821 (connp->conn_ire_cache == NULL)) { 9822 rw_enter(&ire->ire_bucket->irb_lock, 9823 RW_READER); 9824 if (!(ire->ire_marks & 9825 IRE_MARK_CONDEMNED)) { 9826 connp->conn_ire_cache = ire; 9827 cached = B_TRUE; 9828 } 9829 rw_exit(&ire->ire_bucket->irb_lock); 9830 } 9831 mutex_exit(&connp->conn_lock); 9832 9833 /* 9834 * We can continue to use the ire but since it 9835 * was not cached, we should drop the extra 9836 * reference. 9837 */ 9838 if (!cached) 9839 IRE_REFRELE_NOTR(ire); 9840 } 9841 } 9842 } 9843 9844 if (ire != NULL) { 9845 if (do_outrequests) { 9846 /* Handle IRE_LOCAL's that might appear here */ 9847 if (ire->ire_type == IRE_CACHE) { 9848 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9849 ill_ip_mib; 9850 } else { 9851 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9852 } 9853 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9854 } 9855 ASSERT(!attach_if); 9856 9857 /* 9858 * Check if the ire has the RTF_MULTIRT flag, inherited 9859 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9860 */ 9861 if (ire->ire_flags & RTF_MULTIRT) { 9862 /* 9863 * Force hop limit of multirouted packets if required. 9864 * The hop limit of such packets is bounded by the 9865 * ip_multirt_ttl ndd variable. 9866 * NDP packets must have a hop limit of 255; don't 9867 * change the hop limit in that case. 9868 */ 9869 if ((ipst->ips_ip_multirt_ttl > 0) && 9870 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9871 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9872 if (ip_debug > 3) { 9873 ip2dbg(("ip_wput_v6: forcing multirt " 9874 "hop limit to %d (was %d) ", 9875 ipst->ips_ip_multirt_ttl, 9876 ip6h->ip6_hops)); 9877 pr_addr_dbg("v6dst %s\n", AF_INET6, 9878 &ire->ire_addr_v6); 9879 } 9880 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9881 } 9882 9883 /* 9884 * We look at this point if there are pending 9885 * unresolved routes. ire_multirt_need_resolve_v6() 9886 * checks in O(n) that all IRE_OFFSUBNET ire 9887 * entries for the packet's destination and 9888 * flagged RTF_MULTIRT are currently resolved. 9889 * If some remain unresolved, we do a copy 9890 * of the current message. It will be used 9891 * to initiate additional route resolutions. 9892 */ 9893 multirt_need_resolve = 9894 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9895 MBLK_GETLABEL(first_mp), ipst); 9896 ip2dbg(("ip_wput_v6: ire %p, " 9897 "multirt_need_resolve %d, first_mp %p\n", 9898 (void *)ire, multirt_need_resolve, 9899 (void *)first_mp)); 9900 if (multirt_need_resolve) { 9901 copy_mp = copymsg(first_mp); 9902 if (copy_mp != NULL) { 9903 MULTIRT_DEBUG_TAG(copy_mp); 9904 } 9905 } 9906 } 9907 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9908 connp, caller, 0, ip6i_flags, zoneid); 9909 if (need_decref) { 9910 CONN_DEC_REF(connp); 9911 connp = NULL; 9912 } 9913 IRE_REFRELE(ire); 9914 9915 /* 9916 * Try to resolve another multiroute if 9917 * ire_multirt_need_resolve_v6() deemed it necessary. 9918 * copy_mp will be consumed (sent or freed) by 9919 * ip_newroute_v6(). 9920 */ 9921 if (copy_mp != NULL) { 9922 if (mctl_present) { 9923 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9924 } else { 9925 ip6h = (ip6_t *)copy_mp->b_rptr; 9926 } 9927 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9928 &ip6h->ip6_src, NULL, zoneid, ipst); 9929 } 9930 if (ill != NULL) 9931 ill_refrele(ill); 9932 return; 9933 } 9934 9935 /* 9936 * No full IRE for this destination. Send it to 9937 * ip_newroute_v6 to see if anything else matches. 9938 * Mark this packet as having originated on this 9939 * machine. 9940 * Update rptr if there was an ip6i_t header. 9941 */ 9942 mp->b_prev = NULL; 9943 mp->b_next = NULL; 9944 if (ip6i != NULL) 9945 mp->b_rptr -= sizeof (ip6i_t); 9946 9947 if (unspec_src) { 9948 if (ip6i == NULL) { 9949 /* 9950 * Add ip6i_t header to carry unspec_src 9951 * until the packet comes back in ip_wput_v6. 9952 */ 9953 mp = ip_add_info_v6(mp, NULL, v6dstp); 9954 if (mp == NULL) { 9955 if (do_outrequests) 9956 BUMP_MIB(mibptr, 9957 ipIfStatsHCOutRequests); 9958 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9959 if (mctl_present) 9960 freeb(first_mp); 9961 if (ill != NULL) 9962 ill_refrele(ill); 9963 if (need_decref) 9964 CONN_DEC_REF(connp); 9965 return; 9966 } 9967 ip6i = (ip6i_t *)mp->b_rptr; 9968 9969 if (mctl_present) { 9970 ASSERT(first_mp != mp); 9971 first_mp->b_cont = mp; 9972 } else { 9973 first_mp = mp; 9974 } 9975 9976 if ((mp->b_wptr - (uchar_t *)ip6i) == 9977 sizeof (ip6i_t)) { 9978 /* 9979 * ndp_resolver called from ip_newroute_v6 9980 * expects pulled up message. 9981 */ 9982 if (!pullupmsg(mp, -1)) { 9983 ip1dbg(("ip_wput_v6: pullupmsg" 9984 " failed\n")); 9985 if (do_outrequests) { 9986 BUMP_MIB(mibptr, 9987 ipIfStatsHCOutRequests); 9988 } 9989 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9990 freemsg(first_mp); 9991 if (ill != NULL) 9992 ill_refrele(ill); 9993 if (need_decref) 9994 CONN_DEC_REF(connp); 9995 return; 9996 } 9997 ip6i = (ip6i_t *)mp->b_rptr; 9998 } 9999 ip6h = (ip6_t *)&ip6i[1]; 10000 v6dstp = &ip6h->ip6_dst; 10001 } 10002 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10003 if (mctl_present) { 10004 ASSERT(io != NULL); 10005 io->ipsec_out_unspec_src = unspec_src; 10006 } 10007 } 10008 if (do_outrequests) 10009 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10010 if (need_decref) 10011 CONN_DEC_REF(connp); 10012 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 10013 if (ill != NULL) 10014 ill_refrele(ill); 10015 return; 10016 10017 10018 /* 10019 * Handle multicast packets with or without an conn. 10020 * Assumes that the transports set ip6_hops taking 10021 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10022 * into account. 10023 */ 10024 ipv6multicast: 10025 ip2dbg(("ip_wput_v6: multicast\n")); 10026 10027 /* 10028 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10029 * 2. If conn_nofailover_ill is set then use that ill. 10030 * 10031 * Hold the conn_lock till we refhold the ill of interest that is 10032 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10033 * while holding any locks, postpone the refrele until after the 10034 * conn_lock is dropped. 10035 */ 10036 if (connp != NULL) { 10037 mutex_enter(&connp->conn_lock); 10038 conn_lock_held = B_TRUE; 10039 } else { 10040 conn_lock_held = B_FALSE; 10041 } 10042 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10043 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10044 if (err == ILL_LOOKUP_FAILED) { 10045 ip1dbg(("ip_output_v6: multicast" 10046 " conn_outgoing_pill no ipif\n")); 10047 multicast_discard: 10048 ASSERT(saved_ill == NULL); 10049 if (conn_lock_held) 10050 mutex_exit(&connp->conn_lock); 10051 if (ill != NULL) 10052 ill_refrele(ill); 10053 freemsg(first_mp); 10054 if (do_outrequests) 10055 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10056 if (need_decref) 10057 CONN_DEC_REF(connp); 10058 return; 10059 } 10060 saved_ill = ill; 10061 ill = connp->conn_outgoing_pill; 10062 attach_if = B_TRUE; 10063 match_flags = MATCH_IRE_ILL; 10064 mibptr = ill->ill_ip_mib; 10065 10066 /* 10067 * Check if we need an ire that will not be 10068 * looked up by anybody else i.e. HIDDEN. 10069 */ 10070 if (ill_is_probeonly(ill)) 10071 match_flags |= MATCH_IRE_MARK_HIDDEN; 10072 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10073 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10074 if (err == ILL_LOOKUP_FAILED) { 10075 ip1dbg(("ip_output_v6: multicast" 10076 " conn_nofailover_ill no ipif\n")); 10077 goto multicast_discard; 10078 } 10079 saved_ill = ill; 10080 ill = connp->conn_nofailover_ill; 10081 attach_if = B_TRUE; 10082 match_flags = MATCH_IRE_ILL; 10083 10084 /* 10085 * Check if we need an ire that will not be 10086 * looked up by anybody else i.e. HIDDEN. 10087 */ 10088 if (ill_is_probeonly(ill)) 10089 match_flags |= MATCH_IRE_MARK_HIDDEN; 10090 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10091 /* 10092 * Redo 1. If we did not find an IRE_CACHE the first time, 10093 * we should have an ip6i_t with IP6I_ATTACH_IF if 10094 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10095 * used on this endpoint. 10096 */ 10097 ASSERT(ip6i->ip6i_ifindex != 0); 10098 attach_if = B_TRUE; 10099 ASSERT(ill != NULL); 10100 match_flags = MATCH_IRE_ILL; 10101 10102 /* 10103 * Check if we need an ire that will not be 10104 * looked up by anybody else i.e. HIDDEN. 10105 */ 10106 if (ill_is_probeonly(ill)) 10107 match_flags |= MATCH_IRE_MARK_HIDDEN; 10108 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10109 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10110 10111 ASSERT(ill != NULL); 10112 } else if (ill != NULL) { 10113 /* 10114 * 4. If q is an ill queue and (link local or multicast 10115 * destination) then use that ill. 10116 * We don't need the ipif initialization here. 10117 * This useless assert below is just to prevent lint from 10118 * reporting a null body if statement. 10119 */ 10120 ASSERT(ill != NULL); 10121 } else if (connp != NULL) { 10122 /* 10123 * 5. If IPV6_BOUND_IF has been set use that ill. 10124 * 10125 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10126 * Otherwise look for the best IRE match for the unspecified 10127 * group to determine the ill. 10128 * 10129 * conn_multicast_ill is used for only IPv6 packets. 10130 * conn_multicast_ipif is used for only IPv4 packets. 10131 * Thus a PF_INET6 socket send both IPv4 and IPv6 10132 * multicast packets using different IP*_MULTICAST_IF 10133 * interfaces. 10134 */ 10135 if (connp->conn_outgoing_ill != NULL) { 10136 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10137 if (err == ILL_LOOKUP_FAILED) { 10138 ip1dbg(("ip_output_v6: multicast" 10139 " conn_outgoing_ill no ipif\n")); 10140 goto multicast_discard; 10141 } 10142 ill = connp->conn_outgoing_ill; 10143 } else if (connp->conn_multicast_ill != NULL) { 10144 err = ill_check_and_refhold(connp->conn_multicast_ill); 10145 if (err == ILL_LOOKUP_FAILED) { 10146 ip1dbg(("ip_output_v6: multicast" 10147 " conn_multicast_ill no ipif\n")); 10148 goto multicast_discard; 10149 } 10150 ill = connp->conn_multicast_ill; 10151 } else { 10152 mutex_exit(&connp->conn_lock); 10153 conn_lock_held = B_FALSE; 10154 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 10155 if (ipif == NULL) { 10156 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10157 goto multicast_discard; 10158 } 10159 /* 10160 * We have a ref to this ipif, so we can safely 10161 * access ipif_ill. 10162 */ 10163 ill = ipif->ipif_ill; 10164 mutex_enter(&ill->ill_lock); 10165 if (!ILL_CAN_LOOKUP(ill)) { 10166 mutex_exit(&ill->ill_lock); 10167 ipif_refrele(ipif); 10168 ill = NULL; 10169 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10170 goto multicast_discard; 10171 } 10172 ill_refhold_locked(ill); 10173 mutex_exit(&ill->ill_lock); 10174 ipif_refrele(ipif); 10175 /* 10176 * Save binding until IPV6_MULTICAST_IF 10177 * changes it 10178 */ 10179 mutex_enter(&connp->conn_lock); 10180 connp->conn_multicast_ill = ill; 10181 connp->conn_orig_multicast_ifindex = 10182 ill->ill_phyint->phyint_ifindex; 10183 mutex_exit(&connp->conn_lock); 10184 } 10185 } 10186 if (conn_lock_held) 10187 mutex_exit(&connp->conn_lock); 10188 10189 if (saved_ill != NULL) 10190 ill_refrele(saved_ill); 10191 10192 ASSERT(ill != NULL); 10193 /* 10194 * For multicast loopback interfaces replace the multicast address 10195 * with a unicast address for the ire lookup. 10196 */ 10197 if (IS_LOOPBACK(ill)) 10198 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10199 10200 mibptr = ill->ill_ip_mib; 10201 if (do_outrequests) { 10202 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10203 do_outrequests = B_FALSE; 10204 } 10205 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10206 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10207 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10208 10209 /* 10210 * As we may lose the conn by the time we reach ip_wput_ire_v6 10211 * we copy conn_multicast_loop and conn_dontroute on to an 10212 * ipsec_out. In case if this datagram goes out secure, 10213 * we need the ill_index also. Copy that also into the 10214 * ipsec_out. 10215 */ 10216 if (mctl_present) { 10217 io = (ipsec_out_t *)first_mp->b_rptr; 10218 ASSERT(first_mp->b_datap->db_type == M_CTL); 10219 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10220 } else { 10221 ASSERT(mp == first_mp); 10222 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 10223 NULL) { 10224 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10225 freemsg(mp); 10226 if (ill != NULL) 10227 ill_refrele(ill); 10228 if (need_decref) 10229 CONN_DEC_REF(connp); 10230 return; 10231 } 10232 io = (ipsec_out_t *)first_mp->b_rptr; 10233 /* This is not a secure packet */ 10234 io->ipsec_out_secure = B_FALSE; 10235 io->ipsec_out_use_global_policy = B_TRUE; 10236 io->ipsec_out_zoneid = 10237 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10238 first_mp->b_cont = mp; 10239 mctl_present = B_TRUE; 10240 } 10241 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10242 io->ipsec_out_unspec_src = unspec_src; 10243 if (connp != NULL) 10244 io->ipsec_out_dontroute = connp->conn_dontroute; 10245 10246 send_from_ill: 10247 ASSERT(ill != NULL); 10248 ASSERT(mibptr == ill->ill_ip_mib); 10249 if (do_outrequests) { 10250 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10251 do_outrequests = B_FALSE; 10252 } 10253 10254 if (io != NULL) 10255 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10256 10257 /* 10258 * When a specific ill is specified (using IPV6_PKTINFO, 10259 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10260 * on routing entries (ftable and ctable) that have a matching 10261 * ire->ire_ipif->ipif_ill. Thus this can only be used 10262 * for destinations that are on-link for the specific ill 10263 * and that can appear on multiple links. Thus it is useful 10264 * for multicast destinations, link-local destinations, and 10265 * at some point perhaps for site-local destinations (if the 10266 * node sits at a site boundary). 10267 * We create the cache entries in the regular ctable since 10268 * it can not "confuse" things for other destinations. 10269 * table. 10270 * 10271 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10272 * It is used only when ire_cache_lookup is used above. 10273 */ 10274 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10275 zoneid, MBLK_GETLABEL(mp), match_flags, ipst); 10276 if (ire != NULL) { 10277 /* 10278 * Check if the ire has the RTF_MULTIRT flag, inherited 10279 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10280 */ 10281 if (ire->ire_flags & RTF_MULTIRT) { 10282 /* 10283 * Force hop limit of multirouted packets if required. 10284 * The hop limit of such packets is bounded by the 10285 * ip_multirt_ttl ndd variable. 10286 * NDP packets must have a hop limit of 255; don't 10287 * change the hop limit in that case. 10288 */ 10289 if ((ipst->ips_ip_multirt_ttl > 0) && 10290 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10291 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10292 if (ip_debug > 3) { 10293 ip2dbg(("ip_wput_v6: forcing multirt " 10294 "hop limit to %d (was %d) ", 10295 ipst->ips_ip_multirt_ttl, 10296 ip6h->ip6_hops)); 10297 pr_addr_dbg("v6dst %s\n", AF_INET6, 10298 &ire->ire_addr_v6); 10299 } 10300 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10301 } 10302 10303 /* 10304 * We look at this point if there are pending 10305 * unresolved routes. ire_multirt_need_resolve_v6() 10306 * checks in O(n) that all IRE_OFFSUBNET ire 10307 * entries for the packet's destination and 10308 * flagged RTF_MULTIRT are currently resolved. 10309 * If some remain unresolved, we make a copy 10310 * of the current message. It will be used 10311 * to initiate additional route resolutions. 10312 */ 10313 multirt_need_resolve = 10314 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10315 MBLK_GETLABEL(first_mp), ipst); 10316 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10317 "multirt_need_resolve %d, first_mp %p\n", 10318 (void *)ire, multirt_need_resolve, 10319 (void *)first_mp)); 10320 if (multirt_need_resolve) { 10321 copy_mp = copymsg(first_mp); 10322 if (copy_mp != NULL) { 10323 MULTIRT_DEBUG_TAG(copy_mp); 10324 } 10325 } 10326 } 10327 10328 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10329 ill->ill_name, (void *)ire, 10330 ill->ill_phyint->phyint_ifindex)); 10331 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10332 connp, caller, 10333 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10334 ip6i_flags, zoneid); 10335 ire_refrele(ire); 10336 if (need_decref) { 10337 CONN_DEC_REF(connp); 10338 connp = NULL; 10339 } 10340 10341 /* 10342 * Try to resolve another multiroute if 10343 * ire_multirt_need_resolve_v6() deemed it necessary. 10344 * copy_mp will be consumed (sent or freed) by 10345 * ip_newroute_[ipif_]v6(). 10346 */ 10347 if (copy_mp != NULL) { 10348 if (mctl_present) { 10349 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10350 } else { 10351 ip6h = (ip6_t *)copy_mp->b_rptr; 10352 } 10353 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10354 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10355 zoneid, ipst); 10356 if (ipif == NULL) { 10357 ip1dbg(("ip_wput_v6: No ipif for " 10358 "multicast\n")); 10359 MULTIRT_DEBUG_UNTAG(copy_mp); 10360 freemsg(copy_mp); 10361 return; 10362 } 10363 ip_newroute_ipif_v6(q, copy_mp, ipif, 10364 ip6h->ip6_dst, unspec_src, zoneid); 10365 ipif_refrele(ipif); 10366 } else { 10367 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10368 &ip6h->ip6_src, ill, zoneid, ipst); 10369 } 10370 } 10371 ill_refrele(ill); 10372 return; 10373 } 10374 if (need_decref) { 10375 CONN_DEC_REF(connp); 10376 connp = NULL; 10377 } 10378 10379 /* Update rptr if there was an ip6i_t header. */ 10380 if (ip6i != NULL) 10381 mp->b_rptr -= sizeof (ip6i_t); 10382 if (unspec_src || attach_if) { 10383 if (ip6i == NULL) { 10384 /* 10385 * Add ip6i_t header to carry unspec_src 10386 * or attach_if until the packet comes back in 10387 * ip_wput_v6. 10388 */ 10389 if (mctl_present) { 10390 first_mp->b_cont = 10391 ip_add_info_v6(mp, NULL, v6dstp); 10392 mp = first_mp->b_cont; 10393 if (mp == NULL) 10394 freeb(first_mp); 10395 } else { 10396 first_mp = mp = ip_add_info_v6(mp, NULL, 10397 v6dstp); 10398 } 10399 if (mp == NULL) { 10400 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10401 ill_refrele(ill); 10402 return; 10403 } 10404 ip6i = (ip6i_t *)mp->b_rptr; 10405 if ((mp->b_wptr - (uchar_t *)ip6i) == 10406 sizeof (ip6i_t)) { 10407 /* 10408 * ndp_resolver called from ip_newroute_v6 10409 * expects a pulled up message. 10410 */ 10411 if (!pullupmsg(mp, -1)) { 10412 ip1dbg(("ip_wput_v6: pullupmsg" 10413 " failed\n")); 10414 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10415 freemsg(first_mp); 10416 return; 10417 } 10418 ip6i = (ip6i_t *)mp->b_rptr; 10419 } 10420 ip6h = (ip6_t *)&ip6i[1]; 10421 v6dstp = &ip6h->ip6_dst; 10422 } 10423 if (unspec_src) 10424 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10425 if (attach_if) { 10426 /* 10427 * Bind to nofailover/BOUND_PIF overrides ifindex. 10428 */ 10429 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10430 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10431 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10432 if (drop_if_delayed) { 10433 /* This is a multipathing probe packet */ 10434 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10435 } 10436 } 10437 if (mctl_present) { 10438 ASSERT(io != NULL); 10439 io->ipsec_out_unspec_src = unspec_src; 10440 } 10441 } 10442 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10443 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10444 unspec_src, zoneid); 10445 } else { 10446 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10447 zoneid, ipst); 10448 } 10449 ill_refrele(ill); 10450 return; 10451 10452 notv6: 10453 /* FIXME?: assume the caller calls the right version of ip_output? */ 10454 if (q->q_next == NULL) { 10455 connp = Q_TO_CONN(q); 10456 10457 /* 10458 * We can change conn_send for all types of conn, even 10459 * though only TCP uses it right now. 10460 * FIXME: sctp could use conn_send but doesn't currently. 10461 */ 10462 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10463 } 10464 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10465 (void) ip_output(arg, first_mp, arg2, caller); 10466 if (ill != NULL) 10467 ill_refrele(ill); 10468 } 10469 10470 /* 10471 * If this is a conn_t queue, then we pass in the conn. This includes the 10472 * zoneid. 10473 * Otherwise, this is a message for an ill_t queue, 10474 * in which case we use the global zoneid since those are all part of 10475 * the global zone. 10476 */ 10477 void 10478 ip_wput_v6(queue_t *q, mblk_t *mp) 10479 { 10480 if (CONN_Q(q)) 10481 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10482 else 10483 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10484 } 10485 10486 static void 10487 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10488 { 10489 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10490 io->ipsec_out_attach_if = B_TRUE; 10491 io->ipsec_out_ill_index = attach_index; 10492 } 10493 10494 /* 10495 * NULL send-to queue - packet is to be delivered locally. 10496 */ 10497 void 10498 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10499 ire_t *ire, int fanout_flags) 10500 { 10501 uint32_t ports; 10502 mblk_t *mp = first_mp, *first_mp1; 10503 boolean_t mctl_present; 10504 uint8_t nexthdr; 10505 uint16_t hdr_length; 10506 ipsec_out_t *io; 10507 mib2_ipIfStatsEntry_t *mibptr; 10508 ilm_t *ilm; 10509 uint_t nexthdr_offset; 10510 ip_stack_t *ipst = ill->ill_ipst; 10511 10512 if (DB_TYPE(mp) == M_CTL) { 10513 io = (ipsec_out_t *)mp->b_rptr; 10514 if (!io->ipsec_out_secure) { 10515 mp = mp->b_cont; 10516 freeb(first_mp); 10517 first_mp = mp; 10518 mctl_present = B_FALSE; 10519 } else { 10520 mctl_present = B_TRUE; 10521 mp = first_mp->b_cont; 10522 ipsec_out_to_in(first_mp); 10523 } 10524 } else { 10525 mctl_present = B_FALSE; 10526 } 10527 10528 /* 10529 * Remove reachability confirmation bit from version field 10530 * before passing the packet on to any firewall hooks or 10531 * looping back the packet. 10532 */ 10533 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10534 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10535 10536 DTRACE_PROBE4(ip6__loopback__in__start, 10537 ill_t *, ill, ill_t *, NULL, 10538 ip6_t *, ip6h, mblk_t *, first_mp); 10539 10540 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10541 ipst->ips_ipv6firewall_loopback_in, 10542 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10543 10544 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10545 10546 if (first_mp == NULL) 10547 return; 10548 10549 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10550 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10551 int, 1); 10552 10553 nexthdr = ip6h->ip6_nxt; 10554 mibptr = ill->ill_ip_mib; 10555 10556 /* Fastpath */ 10557 switch (nexthdr) { 10558 case IPPROTO_TCP: 10559 case IPPROTO_UDP: 10560 case IPPROTO_ICMPV6: 10561 case IPPROTO_SCTP: 10562 hdr_length = IPV6_HDR_LEN; 10563 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10564 (uchar_t *)ip6h); 10565 break; 10566 default: { 10567 uint8_t *nexthdrp; 10568 10569 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10570 &hdr_length, &nexthdrp)) { 10571 /* Malformed packet */ 10572 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10573 freemsg(first_mp); 10574 return; 10575 } 10576 nexthdr = *nexthdrp; 10577 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10578 break; 10579 } 10580 } 10581 10582 UPDATE_OB_PKT_COUNT(ire); 10583 ire->ire_last_used_time = lbolt; 10584 10585 switch (nexthdr) { 10586 case IPPROTO_TCP: 10587 if (DB_TYPE(mp) == M_DATA) { 10588 /* 10589 * M_DATA mblk, so init mblk (chain) for 10590 * no struio(). 10591 */ 10592 mblk_t *mp1 = mp; 10593 10594 do { 10595 mp1->b_datap->db_struioflag = 0; 10596 } while ((mp1 = mp1->b_cont) != NULL); 10597 } 10598 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10599 TCP_PORTS_OFFSET); 10600 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10601 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10602 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10603 hdr_length, mctl_present, ire->ire_zoneid); 10604 return; 10605 10606 case IPPROTO_UDP: 10607 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10608 UDP_PORTS_OFFSET); 10609 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10610 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10611 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10612 return; 10613 10614 case IPPROTO_SCTP: 10615 { 10616 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10617 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10618 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10619 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10620 return; 10621 } 10622 case IPPROTO_ICMPV6: { 10623 icmp6_t *icmp6; 10624 10625 /* check for full IPv6+ICMPv6 header */ 10626 if ((mp->b_wptr - mp->b_rptr) < 10627 (hdr_length + ICMP6_MINLEN)) { 10628 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10629 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10630 " failed\n")); 10631 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10632 freemsg(first_mp); 10633 return; 10634 } 10635 ip6h = (ip6_t *)mp->b_rptr; 10636 } 10637 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10638 10639 /* Update output mib stats */ 10640 icmp_update_out_mib_v6(ill, icmp6); 10641 10642 /* Check variable for testing applications */ 10643 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10644 freemsg(first_mp); 10645 return; 10646 } 10647 /* 10648 * Assume that there is always at least one conn for 10649 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10650 * where there is no conn. 10651 */ 10652 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10653 !IS_LOOPBACK(ill)) { 10654 /* 10655 * In the multicast case, applications may have 10656 * joined the group from different zones, so we 10657 * need to deliver the packet to each of them. 10658 * Loop through the multicast memberships 10659 * structures (ilm) on the receive ill and send 10660 * a copy of the packet up each matching one. 10661 * However, we don't do this for multicasts sent 10662 * on the loopback interface (PHYI_LOOPBACK flag 10663 * set) as they must stay in the sender's zone. 10664 */ 10665 ILM_WALKER_HOLD(ill); 10666 for (ilm = ill->ill_ilm; ilm != NULL; 10667 ilm = ilm->ilm_next) { 10668 if (ilm->ilm_flags & ILM_DELETED) 10669 continue; 10670 if (!IN6_ARE_ADDR_EQUAL( 10671 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10672 continue; 10673 if ((fanout_flags & 10674 IP_FF_NO_MCAST_LOOP) && 10675 ilm->ilm_zoneid == ire->ire_zoneid) 10676 continue; 10677 if (!ipif_lookup_zoneid(ill, 10678 ilm->ilm_zoneid, IPIF_UP, NULL)) 10679 continue; 10680 10681 first_mp1 = ip_copymsg(first_mp); 10682 if (first_mp1 == NULL) 10683 continue; 10684 icmp_inbound_v6(q, first_mp1, ill, 10685 hdr_length, mctl_present, 10686 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10687 NULL); 10688 } 10689 ILM_WALKER_RELE(ill); 10690 } else { 10691 first_mp1 = ip_copymsg(first_mp); 10692 if (first_mp1 != NULL) 10693 icmp_inbound_v6(q, first_mp1, ill, 10694 hdr_length, mctl_present, 10695 IP6_NO_IPPOLICY, ire->ire_zoneid, 10696 NULL); 10697 } 10698 } 10699 /* FALLTHRU */ 10700 default: { 10701 /* 10702 * Handle protocols with which IPv6 is less intimate. 10703 */ 10704 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10705 10706 /* 10707 * Enable sending ICMP for "Unknown" nexthdr 10708 * case. i.e. where we did not FALLTHRU from 10709 * IPPROTO_ICMPV6 processing case above. 10710 */ 10711 if (nexthdr != IPPROTO_ICMPV6) 10712 fanout_flags |= IP_FF_SEND_ICMP; 10713 /* 10714 * Note: There can be more than one stream bound 10715 * to a particular protocol. When this is the case, 10716 * each one gets a copy of any incoming packets. 10717 */ 10718 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10719 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10720 mctl_present, ire->ire_zoneid); 10721 return; 10722 } 10723 } 10724 } 10725 10726 /* 10727 * Send packet using IRE. 10728 * Checksumming is controlled by cksum_request: 10729 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10730 * 1 => Skip TCP/UDP/SCTP checksum 10731 * Otherwise => checksum_request contains insert offset for checksum 10732 * 10733 * Assumes that the following set of headers appear in the first 10734 * mblk: 10735 * ip6_t 10736 * Any extension headers 10737 * TCP/UDP/SCTP header (if present) 10738 * The routine can handle an ICMPv6 header that is not in the first mblk. 10739 * 10740 * NOTE : This function does not ire_refrele the ire passed in as the 10741 * argument unlike ip_wput_ire where the REFRELE is done. 10742 * Refer to ip_wput_ire for more on this. 10743 */ 10744 static void 10745 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10746 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10747 zoneid_t zoneid) 10748 { 10749 ip6_t *ip6h; 10750 uint8_t nexthdr; 10751 uint16_t hdr_length; 10752 uint_t reachable = 0x0; 10753 ill_t *ill; 10754 mib2_ipIfStatsEntry_t *mibptr; 10755 mblk_t *first_mp; 10756 boolean_t mctl_present; 10757 ipsec_out_t *io; 10758 boolean_t conn_dontroute; /* conn value for multicast */ 10759 boolean_t conn_multicast_loop; /* conn value for multicast */ 10760 boolean_t multicast_forward; /* Should we forward ? */ 10761 int max_frag; 10762 ip_stack_t *ipst = ire->ire_ipst; 10763 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10764 10765 ill = ire_to_ill(ire); 10766 first_mp = mp; 10767 multicast_forward = B_FALSE; 10768 10769 if (mp->b_datap->db_type != M_CTL) { 10770 ip6h = (ip6_t *)first_mp->b_rptr; 10771 } else { 10772 io = (ipsec_out_t *)first_mp->b_rptr; 10773 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10774 /* 10775 * Grab the zone id now because the M_CTL can be discarded by 10776 * ip_wput_ire_parse_ipsec_out() below. 10777 */ 10778 ASSERT(zoneid == io->ipsec_out_zoneid); 10779 ASSERT(zoneid != ALL_ZONES); 10780 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10781 /* 10782 * For the multicast case, ipsec_out carries conn_dontroute and 10783 * conn_multicast_loop as conn may not be available here. We 10784 * need this for multicast loopback and forwarding which is done 10785 * later in the code. 10786 */ 10787 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10788 conn_dontroute = io->ipsec_out_dontroute; 10789 conn_multicast_loop = io->ipsec_out_multicast_loop; 10790 /* 10791 * If conn_dontroute is not set or conn_multicast_loop 10792 * is set, we need to do forwarding/loopback. For 10793 * datagrams from ip_wput_multicast, conn_dontroute is 10794 * set to B_TRUE and conn_multicast_loop is set to 10795 * B_FALSE so that we neither do forwarding nor 10796 * loopback. 10797 */ 10798 if (!conn_dontroute || conn_multicast_loop) 10799 multicast_forward = B_TRUE; 10800 } 10801 } 10802 10803 /* 10804 * If the sender didn't supply the hop limit and there is a default 10805 * unicast hop limit associated with the output interface, we use 10806 * that if the packet is unicast. Interface specific unicast hop 10807 * limits as set via the SIOCSLIFLNKINFO ioctl. 10808 */ 10809 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10810 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10811 ip6h->ip6_hops = ill->ill_max_hops; 10812 } 10813 10814 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10815 ire->ire_zoneid != ALL_ZONES) { 10816 /* 10817 * When a zone sends a packet to another zone, we try to deliver 10818 * the packet under the same conditions as if the destination 10819 * was a real node on the network. To do so, we look for a 10820 * matching route in the forwarding table. 10821 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10822 * ip_newroute_v6() does. 10823 * Note that IRE_LOCAL are special, since they are used 10824 * when the zoneid doesn't match in some cases. This means that 10825 * we need to handle ipha_src differently since ire_src_addr 10826 * belongs to the receiving zone instead of the sending zone. 10827 * When ip_restrict_interzone_loopback is set, then 10828 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10829 * for loopback between zones when the logical "Ethernet" would 10830 * have looped them back. 10831 */ 10832 ire_t *src_ire; 10833 10834 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10835 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10836 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10837 if (src_ire != NULL && 10838 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10839 (!ipst->ips_ip_restrict_interzone_loopback || 10840 ire_local_same_ill_group(ire, src_ire))) { 10841 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10842 !unspec_src) { 10843 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10844 } 10845 ire_refrele(src_ire); 10846 } else { 10847 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10848 if (src_ire != NULL) { 10849 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10850 ire_refrele(src_ire); 10851 freemsg(first_mp); 10852 return; 10853 } 10854 ire_refrele(src_ire); 10855 } 10856 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10857 /* Failed */ 10858 freemsg(first_mp); 10859 return; 10860 } 10861 icmp_unreachable_v6(q, first_mp, 10862 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10863 zoneid, ipst); 10864 return; 10865 } 10866 } 10867 10868 if (mp->b_datap->db_type == M_CTL || 10869 ipss->ipsec_outbound_v6_policy_present) { 10870 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10871 connp, unspec_src, zoneid); 10872 if (mp == NULL) { 10873 return; 10874 } 10875 } 10876 10877 first_mp = mp; 10878 if (mp->b_datap->db_type == M_CTL) { 10879 io = (ipsec_out_t *)mp->b_rptr; 10880 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10881 mp = mp->b_cont; 10882 mctl_present = B_TRUE; 10883 } else { 10884 mctl_present = B_FALSE; 10885 } 10886 10887 ip6h = (ip6_t *)mp->b_rptr; 10888 nexthdr = ip6h->ip6_nxt; 10889 mibptr = ill->ill_ip_mib; 10890 10891 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10892 ipif_t *ipif; 10893 10894 /* 10895 * Select the source address using ipif_select_source_v6. 10896 */ 10897 if (attach_index != 0) { 10898 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10899 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10900 } else { 10901 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10902 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10903 } 10904 if (ipif == NULL) { 10905 if (ip_debug > 2) { 10906 /* ip1dbg */ 10907 pr_addr_dbg("ip_wput_ire_v6: no src for " 10908 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10909 printf("ip_wput_ire_v6: interface name %s\n", 10910 ill->ill_name); 10911 } 10912 freemsg(first_mp); 10913 return; 10914 } 10915 ip6h->ip6_src = ipif->ipif_v6src_addr; 10916 ipif_refrele(ipif); 10917 } 10918 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10919 if ((connp != NULL && connp->conn_multicast_loop) || 10920 !IS_LOOPBACK(ill)) { 10921 ilm_t *ilm; 10922 10923 ILM_WALKER_HOLD(ill); 10924 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10925 ILM_WALKER_RELE(ill); 10926 if (ilm != NULL) { 10927 mblk_t *nmp; 10928 int fanout_flags = 0; 10929 10930 if (connp != NULL && 10931 !connp->conn_multicast_loop) { 10932 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10933 } 10934 ip1dbg(("ip_wput_ire_v6: " 10935 "Loopback multicast\n")); 10936 nmp = ip_copymsg(first_mp); 10937 if (nmp != NULL) { 10938 ip6_t *nip6h; 10939 mblk_t *mp_ip6h; 10940 10941 if (mctl_present) { 10942 nip6h = (ip6_t *) 10943 nmp->b_cont->b_rptr; 10944 mp_ip6h = nmp->b_cont; 10945 } else { 10946 nip6h = (ip6_t *)nmp->b_rptr; 10947 mp_ip6h = nmp; 10948 } 10949 10950 DTRACE_PROBE4( 10951 ip6__loopback__out__start, 10952 ill_t *, NULL, 10953 ill_t *, ill, 10954 ip6_t *, nip6h, 10955 mblk_t *, nmp); 10956 10957 FW_HOOKS6( 10958 ipst->ips_ip6_loopback_out_event, 10959 ipst->ips_ipv6firewall_loopback_out, 10960 NULL, ill, nip6h, nmp, mp_ip6h, 10961 0, ipst); 10962 10963 DTRACE_PROBE1( 10964 ip6__loopback__out__end, 10965 mblk_t *, nmp); 10966 10967 /* 10968 * DTrace this as ip:::send. A blocked 10969 * packet will fire the send probe, but 10970 * not the receive probe. 10971 */ 10972 DTRACE_IP7(send, mblk_t *, nmp, 10973 conn_t *, NULL, void_ip_t *, nip6h, 10974 __dtrace_ipsr_ill_t *, ill, 10975 ipha_t *, NULL, ip6_t *, nip6h, 10976 int, 1); 10977 10978 if (nmp != NULL) { 10979 /* 10980 * Deliver locally and to 10981 * every local zone, except 10982 * the sending zone when 10983 * IPV6_MULTICAST_LOOP is 10984 * disabled. 10985 */ 10986 ip_wput_local_v6(RD(q), ill, 10987 nip6h, nmp, 10988 ire, fanout_flags); 10989 } 10990 } else { 10991 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10992 ip1dbg(("ip_wput_ire_v6: " 10993 "copymsg failed\n")); 10994 } 10995 } 10996 } 10997 if (ip6h->ip6_hops == 0 || 10998 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10999 IS_LOOPBACK(ill)) { 11000 /* 11001 * Local multicast or just loopback on loopback 11002 * interface. 11003 */ 11004 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 11005 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 11006 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11007 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 11008 freemsg(first_mp); 11009 return; 11010 } 11011 } 11012 11013 if (ire->ire_stq != NULL) { 11014 uint32_t sum; 11015 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 11016 ill_phyint->phyint_ifindex; 11017 queue_t *dev_q = ire->ire_stq->q_next; 11018 11019 /* 11020 * non-NULL send-to queue - packet is to be sent 11021 * out an interface. 11022 */ 11023 11024 /* Driver is flow-controlling? */ 11025 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 11026 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 11027 /* 11028 * Queue packet if we have an conn to give back 11029 * pressure. We can't queue packets intended for 11030 * hardware acceleration since we've tossed that 11031 * state already. If the packet is being fed back 11032 * from ire_send_v6, we don't know the position in 11033 * the queue to enqueue the packet and we discard 11034 * the packet. 11035 */ 11036 if (ipst->ips_ip_output_queue && connp != NULL && 11037 !mctl_present && caller != IRE_SEND) { 11038 if (caller == IP_WSRV) { 11039 connp->conn_did_putbq = 1; 11040 (void) putbq(connp->conn_wq, mp); 11041 conn_drain_insert(connp); 11042 /* 11043 * caller == IP_WSRV implies we are 11044 * the service thread, and the 11045 * queue is already noenabled. 11046 * The check for canput and 11047 * the putbq is not atomic. 11048 * So we need to check again. 11049 */ 11050 if (canput(dev_q)) 11051 connp->conn_did_putbq = 0; 11052 } else { 11053 (void) putq(connp->conn_wq, mp); 11054 } 11055 return; 11056 } 11057 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11058 freemsg(first_mp); 11059 return; 11060 } 11061 11062 /* 11063 * Look for reachability confirmations from the transport. 11064 */ 11065 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11066 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11067 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11068 if (mctl_present) 11069 io->ipsec_out_reachable = B_TRUE; 11070 } 11071 /* Fastpath */ 11072 switch (nexthdr) { 11073 case IPPROTO_TCP: 11074 case IPPROTO_UDP: 11075 case IPPROTO_ICMPV6: 11076 case IPPROTO_SCTP: 11077 hdr_length = IPV6_HDR_LEN; 11078 break; 11079 default: { 11080 uint8_t *nexthdrp; 11081 11082 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11083 &hdr_length, &nexthdrp)) { 11084 /* Malformed packet */ 11085 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11086 freemsg(first_mp); 11087 return; 11088 } 11089 nexthdr = *nexthdrp; 11090 break; 11091 } 11092 } 11093 11094 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11095 uint16_t *up; 11096 uint16_t *insp; 11097 11098 /* 11099 * The packet header is processed once for all, even 11100 * in the multirouting case. We disable hardware 11101 * checksum if the packet is multirouted, as it will be 11102 * replicated via several interfaces, and not all of 11103 * them may have this capability. 11104 */ 11105 if (cksum_request == 1 && 11106 !(ire->ire_flags & RTF_MULTIRT)) { 11107 /* Skip the transport checksum */ 11108 goto cksum_done; 11109 } 11110 /* 11111 * Do user-configured raw checksum. 11112 * Compute checksum and insert at offset "cksum_request" 11113 */ 11114 11115 /* check for enough headers for checksum */ 11116 cksum_request += hdr_length; /* offset from rptr */ 11117 if ((mp->b_wptr - mp->b_rptr) < 11118 (cksum_request + sizeof (int16_t))) { 11119 if (!pullupmsg(mp, 11120 cksum_request + sizeof (int16_t))) { 11121 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11122 " failed\n")); 11123 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11124 freemsg(first_mp); 11125 return; 11126 } 11127 ip6h = (ip6_t *)mp->b_rptr; 11128 } 11129 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11130 ASSERT(((uintptr_t)insp & 0x1) == 0); 11131 up = (uint16_t *)&ip6h->ip6_src; 11132 /* 11133 * icmp has placed length and routing 11134 * header adjustment in *insp. 11135 */ 11136 sum = htons(nexthdr) + 11137 up[0] + up[1] + up[2] + up[3] + 11138 up[4] + up[5] + up[6] + up[7] + 11139 up[8] + up[9] + up[10] + up[11] + 11140 up[12] + up[13] + up[14] + up[15]; 11141 sum = (sum & 0xffff) + (sum >> 16); 11142 *insp = IP_CSUM(mp, hdr_length, sum); 11143 } else if (nexthdr == IPPROTO_TCP) { 11144 uint16_t *up; 11145 11146 /* 11147 * Check for full IPv6 header + enough TCP header 11148 * to get at the checksum field. 11149 */ 11150 if ((mp->b_wptr - mp->b_rptr) < 11151 (hdr_length + TCP_CHECKSUM_OFFSET + 11152 TCP_CHECKSUM_SIZE)) { 11153 if (!pullupmsg(mp, hdr_length + 11154 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11155 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11156 " failed\n")); 11157 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11158 freemsg(first_mp); 11159 return; 11160 } 11161 ip6h = (ip6_t *)mp->b_rptr; 11162 } 11163 11164 up = (uint16_t *)&ip6h->ip6_src; 11165 /* 11166 * Note: The TCP module has stored the length value 11167 * into the tcp checksum field, so we don't 11168 * need to explicitly sum it in here. 11169 */ 11170 sum = up[0] + up[1] + up[2] + up[3] + 11171 up[4] + up[5] + up[6] + up[7] + 11172 up[8] + up[9] + up[10] + up[11] + 11173 up[12] + up[13] + up[14] + up[15]; 11174 11175 /* Fold the initial sum */ 11176 sum = (sum & 0xffff) + (sum >> 16); 11177 11178 up = (uint16_t *)(((uchar_t *)ip6h) + 11179 hdr_length + TCP_CHECKSUM_OFFSET); 11180 11181 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11182 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11183 ire->ire_max_frag, mctl_present, sum); 11184 11185 /* Software checksum? */ 11186 if (DB_CKSUMFLAGS(mp) == 0) { 11187 IP6_STAT(ipst, ip6_out_sw_cksum); 11188 IP6_STAT_UPDATE(ipst, 11189 ip6_tcp_out_sw_cksum_bytes, 11190 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11191 hdr_length); 11192 } 11193 } else if (nexthdr == IPPROTO_UDP) { 11194 uint16_t *up; 11195 11196 /* 11197 * check for full IPv6 header + enough UDP header 11198 * to get at the UDP checksum field 11199 */ 11200 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11201 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11202 if (!pullupmsg(mp, hdr_length + 11203 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11204 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11205 " failed\n")); 11206 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11207 freemsg(first_mp); 11208 return; 11209 } 11210 ip6h = (ip6_t *)mp->b_rptr; 11211 } 11212 up = (uint16_t *)&ip6h->ip6_src; 11213 /* 11214 * Note: The UDP module has stored the length value 11215 * into the udp checksum field, so we don't 11216 * need to explicitly sum it in here. 11217 */ 11218 sum = up[0] + up[1] + up[2] + up[3] + 11219 up[4] + up[5] + up[6] + up[7] + 11220 up[8] + up[9] + up[10] + up[11] + 11221 up[12] + up[13] + up[14] + up[15]; 11222 11223 /* Fold the initial sum */ 11224 sum = (sum & 0xffff) + (sum >> 16); 11225 11226 up = (uint16_t *)(((uchar_t *)ip6h) + 11227 hdr_length + UDP_CHECKSUM_OFFSET); 11228 11229 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11230 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11231 ire->ire_max_frag, mctl_present, sum); 11232 11233 /* Software checksum? */ 11234 if (DB_CKSUMFLAGS(mp) == 0) { 11235 IP6_STAT(ipst, ip6_out_sw_cksum); 11236 IP6_STAT_UPDATE(ipst, 11237 ip6_udp_out_sw_cksum_bytes, 11238 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11239 hdr_length); 11240 } 11241 } else if (nexthdr == IPPROTO_ICMPV6) { 11242 uint16_t *up; 11243 icmp6_t *icmp6; 11244 11245 /* check for full IPv6+ICMPv6 header */ 11246 if ((mp->b_wptr - mp->b_rptr) < 11247 (hdr_length + ICMP6_MINLEN)) { 11248 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11249 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11250 " failed\n")); 11251 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11252 freemsg(first_mp); 11253 return; 11254 } 11255 ip6h = (ip6_t *)mp->b_rptr; 11256 } 11257 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11258 up = (uint16_t *)&ip6h->ip6_src; 11259 /* 11260 * icmp has placed length and routing 11261 * header adjustment in icmp6_cksum. 11262 */ 11263 sum = htons(IPPROTO_ICMPV6) + 11264 up[0] + up[1] + up[2] + up[3] + 11265 up[4] + up[5] + up[6] + up[7] + 11266 up[8] + up[9] + up[10] + up[11] + 11267 up[12] + up[13] + up[14] + up[15]; 11268 sum = (sum & 0xffff) + (sum >> 16); 11269 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11270 11271 /* Update output mib stats */ 11272 icmp_update_out_mib_v6(ill, icmp6); 11273 } else if (nexthdr == IPPROTO_SCTP) { 11274 sctp_hdr_t *sctph; 11275 11276 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11277 if (!pullupmsg(mp, hdr_length + 11278 sizeof (*sctph))) { 11279 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11280 " failed\n")); 11281 BUMP_MIB(ill->ill_ip_mib, 11282 ipIfStatsOutDiscards); 11283 freemsg(mp); 11284 return; 11285 } 11286 ip6h = (ip6_t *)mp->b_rptr; 11287 } 11288 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11289 sctph->sh_chksum = 0; 11290 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11291 } 11292 11293 cksum_done: 11294 /* 11295 * We force the insertion of a fragment header using the 11296 * IPH_FRAG_HDR flag in two cases: 11297 * - after reception of an ICMPv6 "packet too big" message 11298 * with a MTU < 1280 (cf. RFC 2460 section 5) 11299 * - for multirouted IPv6 packets, so that the receiver can 11300 * discard duplicates according to their fragment identifier 11301 * 11302 * Two flags modifed from the API can modify this behavior. 11303 * The first is IPV6_USE_MIN_MTU. With this API the user 11304 * can specify how to manage PMTUD for unicast and multicast. 11305 * 11306 * IPV6_DONTFRAG disallows fragmentation. 11307 */ 11308 max_frag = ire->ire_max_frag; 11309 switch (IP6I_USE_MIN_MTU_API(flags)) { 11310 case IPV6_USE_MIN_MTU_DEFAULT: 11311 case IPV6_USE_MIN_MTU_UNICAST: 11312 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11313 max_frag = IPV6_MIN_MTU; 11314 } 11315 break; 11316 11317 case IPV6_USE_MIN_MTU_NEVER: 11318 max_frag = IPV6_MIN_MTU; 11319 break; 11320 } 11321 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11322 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11323 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11324 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11325 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11326 return; 11327 } 11328 11329 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11330 (mp->b_cont ? msgdsize(mp) : 11331 mp->b_wptr - (uchar_t *)ip6h)) { 11332 ip0dbg(("Packet length mismatch: %d, %ld\n", 11333 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11334 msgdsize(mp))); 11335 freemsg(first_mp); 11336 return; 11337 } 11338 /* Do IPSEC processing first */ 11339 if (mctl_present) { 11340 if (attach_index != 0) 11341 ipsec_out_attach_if(io, attach_index); 11342 ipsec_out_process(q, first_mp, ire, ill_index); 11343 return; 11344 } 11345 ASSERT(mp->b_prev == NULL); 11346 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11347 ntohs(ip6h->ip6_plen) + 11348 IPV6_HDR_LEN, max_frag)); 11349 ASSERT(mp == first_mp); 11350 /* Initiate IPPF processing */ 11351 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11352 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11353 if (mp == NULL) { 11354 return; 11355 } 11356 } 11357 ip_wput_frag_v6(mp, ire, reachable, connp, 11358 caller, max_frag); 11359 return; 11360 } 11361 /* Do IPSEC processing first */ 11362 if (mctl_present) { 11363 int extra_len = ipsec_out_extra_length(first_mp); 11364 11365 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11366 max_frag) { 11367 /* 11368 * IPsec headers will push the packet over the 11369 * MTU limit. Issue an ICMPv6 Packet Too Big 11370 * message for this packet if the upper-layer 11371 * that issued this packet will be able to 11372 * react to the icmp_pkt2big_v6() that we'll 11373 * generate. 11374 */ 11375 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11376 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11377 return; 11378 } 11379 if (attach_index != 0) 11380 ipsec_out_attach_if(io, attach_index); 11381 ipsec_out_process(q, first_mp, ire, ill_index); 11382 return; 11383 } 11384 /* 11385 * XXX multicast: add ip_mforward_v6() here. 11386 * Check conn_dontroute 11387 */ 11388 #ifdef lint 11389 /* 11390 * XXX The only purpose of this statement is to avoid lint 11391 * errors. See the above "XXX multicast". When that gets 11392 * fixed, remove this whole #ifdef lint section. 11393 */ 11394 ip3dbg(("multicast forward is %s.\n", 11395 (multicast_forward ? "TRUE" : "FALSE"))); 11396 #endif 11397 11398 UPDATE_OB_PKT_COUNT(ire); 11399 ire->ire_last_used_time = lbolt; 11400 ASSERT(mp == first_mp); 11401 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11402 } else { 11403 /* 11404 * DTrace this as ip:::send. A blocked packet will fire the 11405 * send probe, but not the receive probe. 11406 */ 11407 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11408 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11409 NULL, ip6_t *, ip6h, int, 1); 11410 DTRACE_PROBE4(ip6__loopback__out__start, 11411 ill_t *, NULL, ill_t *, ill, 11412 ip6_t *, ip6h, mblk_t *, first_mp); 11413 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11414 ipst->ips_ipv6firewall_loopback_out, 11415 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11416 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11417 if (first_mp != NULL) 11418 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11419 } 11420 } 11421 11422 /* 11423 * Outbound IPv6 fragmentation routine using MDT. 11424 */ 11425 static void 11426 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11427 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11428 { 11429 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11430 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11431 mblk_t *hdr_mp, *md_mp = NULL; 11432 int i1; 11433 multidata_t *mmd; 11434 unsigned char *hdr_ptr, *pld_ptr; 11435 ip_pdescinfo_t pdi; 11436 uint32_t ident; 11437 size_t len; 11438 uint16_t offset; 11439 queue_t *stq = ire->ire_stq; 11440 ill_t *ill = (ill_t *)stq->q_ptr; 11441 ip_stack_t *ipst = ill->ill_ipst; 11442 11443 ASSERT(DB_TYPE(mp) == M_DATA); 11444 ASSERT(MBLKL(mp) > unfragmentable_len); 11445 11446 /* 11447 * Move read ptr past unfragmentable portion, we don't want this part 11448 * of the data in our fragments. 11449 */ 11450 mp->b_rptr += unfragmentable_len; 11451 11452 /* Calculate how many packets we will send out */ 11453 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11454 pkts = (i1 + max_chunk - 1) / max_chunk; 11455 ASSERT(pkts > 1); 11456 11457 /* Allocate a message block which will hold all the IP Headers. */ 11458 wroff = ipst->ips_ip_wroff_extra; 11459 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11460 11461 i1 = pkts * hdr_chunk_len; 11462 /* 11463 * Create the header buffer, Multidata and destination address 11464 * and SAP attribute that should be associated with it. 11465 */ 11466 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11467 ((hdr_mp->b_wptr += i1), 11468 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11469 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11470 freemsg(mp); 11471 if (md_mp == NULL) { 11472 freemsg(hdr_mp); 11473 } else { 11474 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11475 freemsg(md_mp); 11476 } 11477 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11478 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11479 return; 11480 } 11481 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11482 11483 /* 11484 * Add a payload buffer to the Multidata; this operation must not 11485 * fail, or otherwise our logic in this routine is broken. There 11486 * is no memory allocation done by the routine, so any returned 11487 * failure simply tells us that we've done something wrong. 11488 * 11489 * A failure tells us that either we're adding the same payload 11490 * buffer more than once, or we're trying to add more buffers than 11491 * allowed. None of the above cases should happen, and we panic 11492 * because either there's horrible heap corruption, and/or 11493 * programming mistake. 11494 */ 11495 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11496 goto pbuf_panic; 11497 } 11498 11499 hdr_ptr = hdr_mp->b_rptr; 11500 pld_ptr = mp->b_rptr; 11501 11502 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11503 11504 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11505 11506 /* 11507 * len is the total length of the fragmentable data in this 11508 * datagram. For each fragment sent, we will decrement len 11509 * by the amount of fragmentable data sent in that fragment 11510 * until len reaches zero. 11511 */ 11512 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11513 11514 offset = 0; 11515 prev_nexthdr_offset += wroff; 11516 11517 while (len != 0) { 11518 size_t mlen; 11519 ip6_t *fip6h; 11520 ip6_frag_t *fraghdr; 11521 int error; 11522 11523 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11524 mlen = MIN(len, max_chunk); 11525 len -= mlen; 11526 11527 fip6h = (ip6_t *)(hdr_ptr + wroff); 11528 ASSERT(OK_32PTR(fip6h)); 11529 bcopy(ip6h, fip6h, unfragmentable_len); 11530 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11531 11532 fip6h->ip6_plen = htons((uint16_t)(mlen + 11533 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11534 11535 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11536 unfragmentable_len); 11537 fraghdr->ip6f_nxt = nexthdr; 11538 fraghdr->ip6f_reserved = 0; 11539 fraghdr->ip6f_offlg = htons(offset) | 11540 ((len != 0) ? IP6F_MORE_FRAG : 0); 11541 fraghdr->ip6f_ident = ident; 11542 11543 /* 11544 * Record offset and size of header and data of the next packet 11545 * in the multidata message. 11546 */ 11547 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11548 unfragmentable_len + sizeof (ip6_frag_t), 0); 11549 PDESC_PLD_INIT(&pdi); 11550 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11551 ASSERT(i1 > 0); 11552 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11553 if (i1 == mlen) { 11554 pld_ptr += mlen; 11555 } else { 11556 i1 = mlen - i1; 11557 mp = mp->b_cont; 11558 ASSERT(mp != NULL); 11559 ASSERT(MBLKL(mp) >= i1); 11560 /* 11561 * Attach the next payload message block to the 11562 * multidata message. 11563 */ 11564 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11565 goto pbuf_panic; 11566 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11567 pld_ptr = mp->b_rptr + i1; 11568 } 11569 11570 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11571 KM_NOSLEEP)) == NULL) { 11572 /* 11573 * Any failure other than ENOMEM indicates that we 11574 * have passed in invalid pdesc info or parameters 11575 * to mmd_addpdesc, which must not happen. 11576 * 11577 * EINVAL is a result of failure on boundary checks 11578 * against the pdesc info contents. It should not 11579 * happen, and we panic because either there's 11580 * horrible heap corruption, and/or programming 11581 * mistake. 11582 */ 11583 if (error != ENOMEM) { 11584 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11585 "pdesc logic error detected for " 11586 "mmd %p pinfo %p (%d)\n", 11587 (void *)mmd, (void *)&pdi, error); 11588 /* NOTREACHED */ 11589 } 11590 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11591 /* Free unattached payload message blocks as well */ 11592 md_mp->b_cont = mp->b_cont; 11593 goto free_mmd; 11594 } 11595 11596 /* Advance fragment offset. */ 11597 offset += mlen; 11598 11599 /* Advance to location for next header in the buffer. */ 11600 hdr_ptr += hdr_chunk_len; 11601 11602 /* Did we reach the next payload message block? */ 11603 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11604 mp = mp->b_cont; 11605 /* 11606 * Attach the next message block with payload 11607 * data to the multidata message. 11608 */ 11609 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11610 goto pbuf_panic; 11611 pld_ptr = mp->b_rptr; 11612 } 11613 } 11614 11615 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11616 ASSERT(mp->b_wptr == pld_ptr); 11617 11618 /* Update IP statistics */ 11619 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11620 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11621 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11622 /* 11623 * The ipv6 header len is accounted for in unfragmentable_len so 11624 * when calculating the fragmentation overhead just add the frag 11625 * header len. 11626 */ 11627 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11628 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11629 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11630 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11631 11632 ire->ire_ob_pkt_count += pkts; 11633 if (ire->ire_ipif != NULL) 11634 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11635 11636 ire->ire_last_used_time = lbolt; 11637 /* Send it down */ 11638 putnext(stq, md_mp); 11639 return; 11640 11641 pbuf_panic: 11642 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11643 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11644 pbuf_idx); 11645 /* NOTREACHED */ 11646 } 11647 11648 /* 11649 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11650 * We have not optimized this in terms of number of mblks 11651 * allocated. For instance, for each fragment sent we always allocate a 11652 * mblk to hold the IPv6 header and fragment header. 11653 * 11654 * Assumes that all the extension headers are contained in the first mblk. 11655 * 11656 * The fragment header is inserted after an hop-by-hop options header 11657 * and after [an optional destinations header followed by] a routing header. 11658 * 11659 * NOTE : This function does not ire_refrele the ire passed in as 11660 * the argument. 11661 */ 11662 void 11663 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11664 int caller, int max_frag) 11665 { 11666 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11667 ip6_t *fip6h; 11668 mblk_t *hmp; 11669 mblk_t *hmp0; 11670 mblk_t *dmp; 11671 ip6_frag_t *fraghdr; 11672 size_t unfragmentable_len; 11673 size_t len; 11674 size_t mlen; 11675 size_t max_chunk; 11676 uint32_t ident; 11677 uint16_t off_flags; 11678 uint16_t offset = 0; 11679 ill_t *ill; 11680 uint8_t nexthdr; 11681 uint_t prev_nexthdr_offset; 11682 uint8_t *ptr; 11683 ip_stack_t *ipst = ire->ire_ipst; 11684 11685 ASSERT(ire->ire_type == IRE_CACHE); 11686 ill = (ill_t *)ire->ire_stq->q_ptr; 11687 11688 if (max_frag <= 0) { 11689 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11690 freemsg(mp); 11691 return; 11692 } 11693 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11694 11695 /* 11696 * Determine the length of the unfragmentable portion of this 11697 * datagram. This consists of the IPv6 header, a potential 11698 * hop-by-hop options header, a potential pre-routing-header 11699 * destination options header, and a potential routing header. 11700 */ 11701 nexthdr = ip6h->ip6_nxt; 11702 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11703 ptr = (uint8_t *)&ip6h[1]; 11704 11705 if (nexthdr == IPPROTO_HOPOPTS) { 11706 ip6_hbh_t *hbh_hdr; 11707 uint_t hdr_len; 11708 11709 hbh_hdr = (ip6_hbh_t *)ptr; 11710 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11711 nexthdr = hbh_hdr->ip6h_nxt; 11712 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11713 - (uint8_t *)ip6h; 11714 ptr += hdr_len; 11715 } 11716 if (nexthdr == IPPROTO_DSTOPTS) { 11717 ip6_dest_t *dest_hdr; 11718 uint_t hdr_len; 11719 11720 dest_hdr = (ip6_dest_t *)ptr; 11721 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11722 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11723 nexthdr = dest_hdr->ip6d_nxt; 11724 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11725 - (uint8_t *)ip6h; 11726 ptr += hdr_len; 11727 } 11728 } 11729 if (nexthdr == IPPROTO_ROUTING) { 11730 ip6_rthdr_t *rthdr; 11731 uint_t hdr_len; 11732 11733 rthdr = (ip6_rthdr_t *)ptr; 11734 nexthdr = rthdr->ip6r_nxt; 11735 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11736 - (uint8_t *)ip6h; 11737 hdr_len = 8 * (rthdr->ip6r_len + 1); 11738 ptr += hdr_len; 11739 } 11740 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11741 11742 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11743 sizeof (ip6_frag_t)) & ~7; 11744 11745 /* Check if we can use MDT to send out the frags. */ 11746 ASSERT(!IRE_IS_LOCAL(ire)); 11747 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11748 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11749 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11750 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11751 nexthdr, prev_nexthdr_offset); 11752 return; 11753 } 11754 11755 /* 11756 * Allocate an mblk with enough room for the link-layer 11757 * header, the unfragmentable part of the datagram, and the 11758 * fragment header. This (or a copy) will be used as the 11759 * first mblk for each fragment we send. 11760 */ 11761 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + 11762 ipst->ips_ip_wroff_extra, BPRI_HI); 11763 if (hmp == NULL) { 11764 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11765 freemsg(mp); 11766 return; 11767 } 11768 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11769 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11770 11771 fip6h = (ip6_t *)hmp->b_rptr; 11772 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11773 11774 bcopy(ip6h, fip6h, unfragmentable_len); 11775 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11776 11777 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11778 11779 fraghdr->ip6f_nxt = nexthdr; 11780 fraghdr->ip6f_reserved = 0; 11781 fraghdr->ip6f_offlg = 0; 11782 fraghdr->ip6f_ident = htonl(ident); 11783 11784 /* 11785 * len is the total length of the fragmentable data in this 11786 * datagram. For each fragment sent, we will decrement len 11787 * by the amount of fragmentable data sent in that fragment 11788 * until len reaches zero. 11789 */ 11790 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11791 11792 /* 11793 * Move read ptr past unfragmentable portion, we don't want this part 11794 * of the data in our fragments. 11795 */ 11796 mp->b_rptr += unfragmentable_len; 11797 11798 while (len != 0) { 11799 mlen = MIN(len, max_chunk); 11800 len -= mlen; 11801 if (len != 0) { 11802 /* Not last */ 11803 hmp0 = copyb(hmp); 11804 if (hmp0 == NULL) { 11805 freeb(hmp); 11806 freemsg(mp); 11807 BUMP_MIB(ill->ill_ip_mib, 11808 ipIfStatsOutFragFails); 11809 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11810 return; 11811 } 11812 off_flags = IP6F_MORE_FRAG; 11813 } else { 11814 /* Last fragment */ 11815 hmp0 = hmp; 11816 hmp = NULL; 11817 off_flags = 0; 11818 } 11819 fip6h = (ip6_t *)(hmp0->b_rptr); 11820 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11821 11822 fip6h->ip6_plen = htons((uint16_t)(mlen + 11823 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11824 /* 11825 * Note: Optimization alert. 11826 * In IPv6 (and IPv4) protocol header, Fragment Offset 11827 * ("offset") is 13 bits wide and in 8-octet units. 11828 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11829 * it occupies the most significant 13 bits. 11830 * (least significant 13 bits in IPv4). 11831 * We do not do any shifts here. Not shifting is same effect 11832 * as taking offset value in octet units, dividing by 8 and 11833 * then shifting 3 bits left to line it up in place in proper 11834 * place protocol header. 11835 */ 11836 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11837 11838 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11839 /* mp has already been freed by ip_carve_mp() */ 11840 if (hmp != NULL) 11841 freeb(hmp); 11842 freeb(hmp0); 11843 ip1dbg(("ip_carve_mp: failed\n")); 11844 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11845 return; 11846 } 11847 hmp0->b_cont = dmp; 11848 /* Get the priority marking, if any */ 11849 hmp0->b_band = dmp->b_band; 11850 UPDATE_OB_PKT_COUNT(ire); 11851 ire->ire_last_used_time = lbolt; 11852 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11853 caller, NULL); 11854 reachable = 0; /* No need to redo state machine in loop */ 11855 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11856 offset += mlen; 11857 } 11858 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11859 } 11860 11861 /* 11862 * Determine if the ill and multicast aspects of that packets 11863 * "matches" the conn. 11864 */ 11865 boolean_t 11866 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11867 zoneid_t zoneid) 11868 { 11869 ill_t *in_ill; 11870 boolean_t wantpacket = B_TRUE; 11871 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11872 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11873 11874 /* 11875 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11876 * unicast and multicast reception to conn_incoming_ill. 11877 * conn_wantpacket_v6 is called both for unicast and 11878 * multicast. 11879 * 11880 * 1) The unicast copy of the packet can come anywhere in 11881 * the ill group if it is part of the group. Thus, we 11882 * need to check to see whether the ill group matches 11883 * if in_ill is part of a group. 11884 * 11885 * 2) ip_rput does not suppress duplicate multicast packets. 11886 * If there are two interfaces in a ill group and we have 11887 * 2 applications (conns) joined a multicast group G on 11888 * both the interfaces, ilm_lookup_ill filter in ip_rput 11889 * will give us two packets because we join G on both the 11890 * interfaces rather than nominating just one interface 11891 * for receiving multicast like broadcast above. So, 11892 * we have to call ilg_lookup_ill to filter out duplicate 11893 * copies, if ill is part of a group, to supress duplicates. 11894 */ 11895 in_ill = connp->conn_incoming_ill; 11896 if (in_ill != NULL) { 11897 mutex_enter(&connp->conn_lock); 11898 in_ill = connp->conn_incoming_ill; 11899 mutex_enter(&ill->ill_lock); 11900 /* 11901 * No IPMP, and the packet did not arrive on conn_incoming_ill 11902 * OR, IPMP in use and the packet arrived on an IPMP group 11903 * different from the conn_incoming_ill's IPMP group. 11904 * Reject the packet. 11905 */ 11906 if ((in_ill->ill_group == NULL && in_ill != ill) || 11907 (in_ill->ill_group != NULL && 11908 in_ill->ill_group != ill->ill_group)) { 11909 wantpacket = B_FALSE; 11910 } 11911 mutex_exit(&ill->ill_lock); 11912 mutex_exit(&connp->conn_lock); 11913 if (!wantpacket) 11914 return (B_FALSE); 11915 } 11916 11917 if (connp->conn_multi_router) 11918 return (B_TRUE); 11919 11920 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11921 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11922 /* 11923 * Unicast case: we match the conn only if it's in the specified 11924 * zone. 11925 */ 11926 return (IPCL_ZONE_MATCH(connp, zoneid)); 11927 } 11928 11929 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11930 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11931 /* 11932 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11933 * disabled, therefore we don't dispatch the multicast packet to 11934 * the sending zone. 11935 */ 11936 return (B_FALSE); 11937 } 11938 11939 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11940 zoneid != ALL_ZONES) { 11941 /* 11942 * Multicast packet on the loopback interface: we only match 11943 * conns who joined the group in the specified zone. 11944 */ 11945 return (B_FALSE); 11946 } 11947 11948 mutex_enter(&connp->conn_lock); 11949 wantpacket = 11950 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11951 mutex_exit(&connp->conn_lock); 11952 11953 return (wantpacket); 11954 } 11955 11956 11957 /* 11958 * Transmit a packet and update any NUD state based on the flags 11959 * XXX need to "recover" any ip6i_t when doing putq! 11960 * 11961 * NOTE : This function does not ire_refrele the ire passed in as the 11962 * argument. 11963 */ 11964 void 11965 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11966 int caller, ipsec_out_t *io) 11967 { 11968 mblk_t *mp1; 11969 nce_t *nce = ire->ire_nce; 11970 ill_t *ill; 11971 ill_t *out_ill; 11972 uint64_t delta; 11973 ip6_t *ip6h; 11974 queue_t *stq = ire->ire_stq; 11975 ire_t *ire1 = NULL; 11976 ire_t *save_ire = ire; 11977 boolean_t multirt_send = B_FALSE; 11978 mblk_t *next_mp = NULL; 11979 ip_stack_t *ipst = ire->ire_ipst; 11980 11981 ip6h = (ip6_t *)mp->b_rptr; 11982 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11983 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11984 ASSERT(nce != NULL); 11985 ASSERT(mp->b_datap->db_type == M_DATA); 11986 ASSERT(stq != NULL); 11987 11988 ill = ire_to_ill(ire); 11989 if (!ill) { 11990 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11991 freemsg(mp); 11992 return; 11993 } 11994 11995 /* 11996 * If a packet is to be sent out an interface that is a 6to4 11997 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11998 * destination, must be checked to have a 6to4 prefix 11999 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 12000 * address configured on the sending interface. Otherwise, 12001 * the packet was delivered to this interface in error and the 12002 * packet must be dropped. 12003 */ 12004 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 12005 ipif_t *ipif = ill->ill_ipif; 12006 12007 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 12008 &ip6h->ip6_dst)) { 12009 if (ip_debug > 2) { 12010 /* ip1dbg */ 12011 pr_addr_dbg("ip_xmit_v6: attempting to " 12012 "send 6to4 addressed IPv6 " 12013 "destination (%s) out the wrong " 12014 "interface.\n", AF_INET6, 12015 &ip6h->ip6_dst); 12016 } 12017 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12018 freemsg(mp); 12019 return; 12020 } 12021 } 12022 12023 /* Flow-control check has been done in ip_wput_ire_v6 */ 12024 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 12025 caller == IP_WSRV || canput(stq->q_next)) { 12026 uint32_t ill_index; 12027 12028 /* 12029 * In most cases, the emission loop below is entered only 12030 * once. Only in the case where the ire holds the 12031 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 12032 * flagged ires in the bucket, and send the packet 12033 * through all crossed RTF_MULTIRT routes. 12034 */ 12035 if (ire->ire_flags & RTF_MULTIRT) { 12036 /* 12037 * Multirouting case. The bucket where ire is stored 12038 * probably holds other RTF_MULTIRT flagged ires 12039 * to the destination. In this call to ip_xmit_v6, 12040 * we attempt to send the packet through all 12041 * those ires. Thus, we first ensure that ire is the 12042 * first RTF_MULTIRT ire in the bucket, 12043 * before walking the ire list. 12044 */ 12045 ire_t *first_ire; 12046 irb_t *irb = ire->ire_bucket; 12047 ASSERT(irb != NULL); 12048 multirt_send = B_TRUE; 12049 12050 /* Make sure we do not omit any multiroute ire. */ 12051 IRB_REFHOLD(irb); 12052 for (first_ire = irb->irb_ire; 12053 first_ire != NULL; 12054 first_ire = first_ire->ire_next) { 12055 if ((first_ire->ire_flags & RTF_MULTIRT) && 12056 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12057 &ire->ire_addr_v6)) && 12058 !(first_ire->ire_marks & 12059 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12060 break; 12061 } 12062 12063 if ((first_ire != NULL) && (first_ire != ire)) { 12064 IRE_REFHOLD(first_ire); 12065 /* ire will be released by the caller */ 12066 ire = first_ire; 12067 nce = ire->ire_nce; 12068 stq = ire->ire_stq; 12069 ill = ire_to_ill(ire); 12070 } 12071 IRB_REFRELE(irb); 12072 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12073 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12074 ILL_MDT_USABLE(ill)) { 12075 /* 12076 * This tcp connection was marked as MDT-capable, but 12077 * it has been turned off due changes in the interface. 12078 * Now that the interface support is back, turn it on 12079 * by notifying tcp. We don't directly modify tcp_mdt, 12080 * since we leave all the details to the tcp code that 12081 * knows better. 12082 */ 12083 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12084 12085 if (mdimp == NULL) { 12086 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12087 "connp %p (ENOMEM)\n", (void *)connp)); 12088 } else { 12089 CONN_INC_REF(connp); 12090 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 12091 connp, SQTAG_TCP_INPUT_MCTL); 12092 } 12093 } 12094 12095 do { 12096 mblk_t *mp_ip6h; 12097 12098 if (multirt_send) { 12099 irb_t *irb; 12100 /* 12101 * We are in a multiple send case, need to get 12102 * the next ire and make a duplicate of the 12103 * packet. ire1 holds here the next ire to 12104 * process in the bucket. If multirouting is 12105 * expected, any non-RTF_MULTIRT ire that has 12106 * the right destination address is ignored. 12107 */ 12108 irb = ire->ire_bucket; 12109 ASSERT(irb != NULL); 12110 12111 IRB_REFHOLD(irb); 12112 for (ire1 = ire->ire_next; 12113 ire1 != NULL; 12114 ire1 = ire1->ire_next) { 12115 if (!(ire1->ire_flags & RTF_MULTIRT)) 12116 continue; 12117 if (!IN6_ARE_ADDR_EQUAL( 12118 &ire1->ire_addr_v6, 12119 &ire->ire_addr_v6)) 12120 continue; 12121 if (ire1->ire_marks & 12122 (IRE_MARK_CONDEMNED| 12123 IRE_MARK_HIDDEN)) 12124 continue; 12125 12126 /* Got one */ 12127 if (ire1 != save_ire) { 12128 IRE_REFHOLD(ire1); 12129 } 12130 break; 12131 } 12132 IRB_REFRELE(irb); 12133 12134 if (ire1 != NULL) { 12135 next_mp = copyb(mp); 12136 if ((next_mp == NULL) || 12137 ((mp->b_cont != NULL) && 12138 ((next_mp->b_cont = 12139 dupmsg(mp->b_cont)) == NULL))) { 12140 freemsg(next_mp); 12141 next_mp = NULL; 12142 ire_refrele(ire1); 12143 ire1 = NULL; 12144 } 12145 } 12146 12147 /* Last multiroute ire; don't loop anymore. */ 12148 if (ire1 == NULL) { 12149 multirt_send = B_FALSE; 12150 } 12151 } 12152 12153 ill_index = 12154 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12155 12156 /* Initiate IPPF processing */ 12157 if (IP6_OUT_IPP(flags, ipst)) { 12158 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12159 if (mp == NULL) { 12160 BUMP_MIB(ill->ill_ip_mib, 12161 ipIfStatsOutDiscards); 12162 if (next_mp != NULL) 12163 freemsg(next_mp); 12164 if (ire != save_ire) { 12165 ire_refrele(ire); 12166 } 12167 return; 12168 } 12169 ip6h = (ip6_t *)mp->b_rptr; 12170 } 12171 mp_ip6h = mp; 12172 12173 /* 12174 * Check for fastpath, we need to hold nce_lock to 12175 * prevent fastpath update from chaining nce_fp_mp. 12176 */ 12177 12178 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12179 mutex_enter(&nce->nce_lock); 12180 if ((mp1 = nce->nce_fp_mp) != NULL) { 12181 uint32_t hlen; 12182 uchar_t *rptr; 12183 12184 hlen = MBLKL(mp1); 12185 rptr = mp->b_rptr - hlen; 12186 /* 12187 * make sure there is room for the fastpath 12188 * datalink header 12189 */ 12190 if (rptr < mp->b_datap->db_base) { 12191 mp1 = copyb(mp1); 12192 mutex_exit(&nce->nce_lock); 12193 if (mp1 == NULL) { 12194 BUMP_MIB(ill->ill_ip_mib, 12195 ipIfStatsOutDiscards); 12196 freemsg(mp); 12197 if (next_mp != NULL) 12198 freemsg(next_mp); 12199 if (ire != save_ire) { 12200 ire_refrele(ire); 12201 } 12202 return; 12203 } 12204 mp1->b_cont = mp; 12205 12206 /* Get the priority marking, if any */ 12207 mp1->b_band = mp->b_band; 12208 mp = mp1; 12209 } else { 12210 mp->b_rptr = rptr; 12211 /* 12212 * fastpath - pre-pend datalink 12213 * header 12214 */ 12215 bcopy(mp1->b_rptr, rptr, hlen); 12216 mutex_exit(&nce->nce_lock); 12217 } 12218 } else { 12219 /* 12220 * Get the DL_UNITDATA_REQ. 12221 */ 12222 mp1 = nce->nce_res_mp; 12223 if (mp1 == NULL) { 12224 mutex_exit(&nce->nce_lock); 12225 ip1dbg(("ip_xmit_v6: No resolution " 12226 "block ire = %p\n", (void *)ire)); 12227 freemsg(mp); 12228 if (next_mp != NULL) 12229 freemsg(next_mp); 12230 if (ire != save_ire) { 12231 ire_refrele(ire); 12232 } 12233 return; 12234 } 12235 /* 12236 * Prepend the DL_UNITDATA_REQ. 12237 */ 12238 mp1 = copyb(mp1); 12239 mutex_exit(&nce->nce_lock); 12240 if (mp1 == NULL) { 12241 BUMP_MIB(ill->ill_ip_mib, 12242 ipIfStatsOutDiscards); 12243 freemsg(mp); 12244 if (next_mp != NULL) 12245 freemsg(next_mp); 12246 if (ire != save_ire) { 12247 ire_refrele(ire); 12248 } 12249 return; 12250 } 12251 mp1->b_cont = mp; 12252 12253 /* Get the priority marking, if any */ 12254 mp1->b_band = mp->b_band; 12255 mp = mp1; 12256 } 12257 12258 out_ill = (ill_t *)stq->q_ptr; 12259 12260 DTRACE_PROBE4(ip6__physical__out__start, 12261 ill_t *, NULL, ill_t *, out_ill, 12262 ip6_t *, ip6h, mblk_t *, mp); 12263 12264 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12265 ipst->ips_ipv6firewall_physical_out, 12266 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 12267 12268 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12269 12270 if (mp == NULL) { 12271 if (multirt_send) { 12272 ASSERT(ire1 != NULL); 12273 if (ire != save_ire) { 12274 ire_refrele(ire); 12275 } 12276 /* 12277 * Proceed with the next RTF_MULTIRT 12278 * ire, also set up the send-to queue 12279 * accordingly. 12280 */ 12281 ire = ire1; 12282 ire1 = NULL; 12283 stq = ire->ire_stq; 12284 nce = ire->ire_nce; 12285 ill = ire_to_ill(ire); 12286 mp = next_mp; 12287 next_mp = NULL; 12288 continue; 12289 } else { 12290 ASSERT(next_mp == NULL); 12291 ASSERT(ire1 == NULL); 12292 break; 12293 } 12294 } 12295 12296 /* 12297 * Update ire and MIB counters; for save_ire, this has 12298 * been done by the caller. 12299 */ 12300 if (ire != save_ire) { 12301 UPDATE_OB_PKT_COUNT(ire); 12302 ire->ire_last_used_time = lbolt; 12303 12304 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12305 BUMP_MIB(ill->ill_ip_mib, 12306 ipIfStatsHCOutMcastPkts); 12307 UPDATE_MIB(ill->ill_ip_mib, 12308 ipIfStatsHCOutMcastOctets, 12309 ntohs(ip6h->ip6_plen) + 12310 IPV6_HDR_LEN); 12311 } 12312 } 12313 12314 /* 12315 * Send it down. XXX Do we want to flow control AH/ESP 12316 * packets that carry TCP payloads? We don't flow 12317 * control TCP packets, but we should also not 12318 * flow-control TCP packets that have been protected. 12319 * We don't have an easy way to find out if an AH/ESP 12320 * packet was originally TCP or not currently. 12321 */ 12322 if (io == NULL) { 12323 BUMP_MIB(ill->ill_ip_mib, 12324 ipIfStatsHCOutTransmits); 12325 UPDATE_MIB(ill->ill_ip_mib, 12326 ipIfStatsHCOutOctets, 12327 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12328 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 12329 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 12330 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 12331 int, 0); 12332 12333 putnext(stq, mp); 12334 } else { 12335 /* 12336 * Safety Pup says: make sure this is 12337 * going to the right interface! 12338 */ 12339 if (io->ipsec_out_capab_ill_index != 12340 ill_index) { 12341 /* IPsec kstats: bump lose counter */ 12342 freemsg(mp1); 12343 } else { 12344 BUMP_MIB(ill->ill_ip_mib, 12345 ipIfStatsHCOutTransmits); 12346 UPDATE_MIB(ill->ill_ip_mib, 12347 ipIfStatsHCOutOctets, 12348 ntohs(ip6h->ip6_plen) + 12349 IPV6_HDR_LEN); 12350 DTRACE_IP7(send, mblk_t *, mp, 12351 conn_t *, NULL, void_ip_t *, ip6h, 12352 __dtrace_ipsr_ill_t *, out_ill, 12353 ipha_t *, NULL, ip6_t *, ip6h, int, 12354 0); 12355 ipsec_hw_putnext(stq, mp); 12356 } 12357 } 12358 12359 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12360 if (ire != save_ire) { 12361 ire_refrele(ire); 12362 } 12363 if (multirt_send) { 12364 ASSERT(ire1 != NULL); 12365 /* 12366 * Proceed with the next RTF_MULTIRT 12367 * ire, also set up the send-to queue 12368 * accordingly. 12369 */ 12370 ire = ire1; 12371 ire1 = NULL; 12372 stq = ire->ire_stq; 12373 nce = ire->ire_nce; 12374 ill = ire_to_ill(ire); 12375 mp = next_mp; 12376 next_mp = NULL; 12377 continue; 12378 } 12379 ASSERT(next_mp == NULL); 12380 ASSERT(ire1 == NULL); 12381 return; 12382 } 12383 12384 ASSERT(nce->nce_state != ND_INCOMPLETE); 12385 12386 /* 12387 * Check for upper layer advice 12388 */ 12389 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12390 /* 12391 * It should be o.k. to check the state without 12392 * a lock here, at most we lose an advice. 12393 */ 12394 nce->nce_last = TICK_TO_MSEC(lbolt64); 12395 if (nce->nce_state != ND_REACHABLE) { 12396 12397 mutex_enter(&nce->nce_lock); 12398 nce->nce_state = ND_REACHABLE; 12399 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12400 mutex_exit(&nce->nce_lock); 12401 (void) untimeout(nce->nce_timeout_id); 12402 if (ip_debug > 2) { 12403 /* ip1dbg */ 12404 pr_addr_dbg("ip_xmit_v6: state" 12405 " for %s changed to" 12406 " REACHABLE\n", AF_INET6, 12407 &ire->ire_addr_v6); 12408 } 12409 } 12410 if (ire != save_ire) { 12411 ire_refrele(ire); 12412 } 12413 if (multirt_send) { 12414 ASSERT(ire1 != NULL); 12415 /* 12416 * Proceed with the next RTF_MULTIRT 12417 * ire, also set up the send-to queue 12418 * accordingly. 12419 */ 12420 ire = ire1; 12421 ire1 = NULL; 12422 stq = ire->ire_stq; 12423 nce = ire->ire_nce; 12424 ill = ire_to_ill(ire); 12425 mp = next_mp; 12426 next_mp = NULL; 12427 continue; 12428 } 12429 ASSERT(next_mp == NULL); 12430 ASSERT(ire1 == NULL); 12431 return; 12432 } 12433 12434 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12435 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12436 " ill_reachable_time = %d \n", delta, 12437 ill->ill_reachable_time)); 12438 if (delta > (uint64_t)ill->ill_reachable_time) { 12439 nce = ire->ire_nce; 12440 mutex_enter(&nce->nce_lock); 12441 switch (nce->nce_state) { 12442 case ND_REACHABLE: 12443 case ND_STALE: 12444 /* 12445 * ND_REACHABLE is identical to 12446 * ND_STALE in this specific case. If 12447 * reachable time has expired for this 12448 * neighbor (delta is greater than 12449 * reachable time), conceptually, the 12450 * neighbor cache is no longer in 12451 * REACHABLE state, but already in 12452 * STALE state. So the correct 12453 * transition here is to ND_DELAY. 12454 */ 12455 nce->nce_state = ND_DELAY; 12456 mutex_exit(&nce->nce_lock); 12457 NDP_RESTART_TIMER(nce, 12458 ipst->ips_delay_first_probe_time); 12459 if (ip_debug > 3) { 12460 /* ip2dbg */ 12461 pr_addr_dbg("ip_xmit_v6: state" 12462 " for %s changed to" 12463 " DELAY\n", AF_INET6, 12464 &ire->ire_addr_v6); 12465 } 12466 break; 12467 case ND_DELAY: 12468 case ND_PROBE: 12469 mutex_exit(&nce->nce_lock); 12470 /* Timers have already started */ 12471 break; 12472 case ND_UNREACHABLE: 12473 /* 12474 * ndp timer has detected that this nce 12475 * is unreachable and initiated deleting 12476 * this nce and all its associated IREs. 12477 * This is a race where we found the 12478 * ire before it was deleted and have 12479 * just sent out a packet using this 12480 * unreachable nce. 12481 */ 12482 mutex_exit(&nce->nce_lock); 12483 break; 12484 default: 12485 ASSERT(0); 12486 } 12487 } 12488 12489 if (multirt_send) { 12490 ASSERT(ire1 != NULL); 12491 /* 12492 * Proceed with the next RTF_MULTIRT ire, 12493 * Also set up the send-to queue accordingly. 12494 */ 12495 if (ire != save_ire) { 12496 ire_refrele(ire); 12497 } 12498 ire = ire1; 12499 ire1 = NULL; 12500 stq = ire->ire_stq; 12501 nce = ire->ire_nce; 12502 ill = ire_to_ill(ire); 12503 mp = next_mp; 12504 next_mp = NULL; 12505 } 12506 } while (multirt_send); 12507 /* 12508 * In the multirouting case, release the last ire used for 12509 * emission. save_ire will be released by the caller. 12510 */ 12511 if (ire != save_ire) { 12512 ire_refrele(ire); 12513 } 12514 } else { 12515 /* 12516 * Queue packet if we have an conn to give back pressure. 12517 * We can't queue packets intended for hardware acceleration 12518 * since we've tossed that state already. If the packet is 12519 * being fed back from ire_send_v6, we don't know the 12520 * position in the queue to enqueue the packet and we discard 12521 * the packet. 12522 */ 12523 if (ipst->ips_ip_output_queue && (connp != NULL) && 12524 (io == NULL) && (caller != IRE_SEND)) { 12525 if (caller == IP_WSRV) { 12526 connp->conn_did_putbq = 1; 12527 (void) putbq(connp->conn_wq, mp); 12528 conn_drain_insert(connp); 12529 /* 12530 * caller == IP_WSRV implies we are 12531 * the service thread, and the 12532 * queue is already noenabled. 12533 * The check for canput and 12534 * the putbq is not atomic. 12535 * So we need to check again. 12536 */ 12537 if (canput(stq->q_next)) 12538 connp->conn_did_putbq = 0; 12539 } else { 12540 (void) putq(connp->conn_wq, mp); 12541 } 12542 return; 12543 } 12544 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12545 freemsg(mp); 12546 return; 12547 } 12548 } 12549 12550 /* 12551 * pr_addr_dbg function provides the needed buffer space to call 12552 * inet_ntop() function's 3rd argument. This function should be 12553 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12554 * stack buffer space in it's own stack frame. This function uses 12555 * a buffer from it's own stack and prints the information. 12556 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12557 * 12558 * Note: This function can call inet_ntop() once. 12559 */ 12560 void 12561 pr_addr_dbg(char *fmt1, int af, const void *addr) 12562 { 12563 char buf[INET6_ADDRSTRLEN]; 12564 12565 if (fmt1 == NULL) { 12566 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12567 return; 12568 } 12569 12570 /* 12571 * This does not compare debug level and just prints 12572 * out. Thus it is the responsibility of the caller 12573 * to check the appropriate debug-level before calling 12574 * this function. 12575 */ 12576 if (ip_debug > 0) { 12577 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12578 } 12579 12580 12581 } 12582 12583 12584 /* 12585 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12586 * if needed and extension headers) that will be needed based on the 12587 * ip6_pkt_t structure passed by the caller. 12588 * 12589 * The returned length does not include the length of the upper level 12590 * protocol (ULP) header. 12591 */ 12592 int 12593 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12594 { 12595 int len; 12596 12597 len = IPV6_HDR_LEN; 12598 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12599 len += sizeof (ip6i_t); 12600 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12601 ASSERT(ipp->ipp_hopoptslen != 0); 12602 len += ipp->ipp_hopoptslen; 12603 } 12604 if (ipp->ipp_fields & IPPF_RTHDR) { 12605 ASSERT(ipp->ipp_rthdrlen != 0); 12606 len += ipp->ipp_rthdrlen; 12607 } 12608 /* 12609 * En-route destination options 12610 * Only do them if there's a routing header as well 12611 */ 12612 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12613 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12614 ASSERT(ipp->ipp_rtdstoptslen != 0); 12615 len += ipp->ipp_rtdstoptslen; 12616 } 12617 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12618 ASSERT(ipp->ipp_dstoptslen != 0); 12619 len += ipp->ipp_dstoptslen; 12620 } 12621 return (len); 12622 } 12623 12624 /* 12625 * All-purpose routine to build a header chain of an IPv6 header 12626 * followed by any required extension headers and a proto header, 12627 * preceeded (where necessary) by an ip6i_t private header. 12628 * 12629 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12630 * will be filled in appropriately. 12631 * Thus the caller must fill in the rest of the IPv6 header, such as 12632 * traffic class/flowid, source address (if not set here), hoplimit (if not 12633 * set here) and destination address. 12634 * 12635 * The extension headers and ip6i_t header will all be fully filled in. 12636 */ 12637 void 12638 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12639 ip6_pkt_t *ipp, uint8_t protocol) 12640 { 12641 uint8_t *nxthdr_ptr; 12642 uint8_t *cp; 12643 ip6i_t *ip6i; 12644 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12645 12646 /* 12647 * If sending private ip6i_t header down (checksum info, nexthop, 12648 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12649 * then fill it in. (The checksum info will be filled in by icmp). 12650 */ 12651 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12652 ip6i = (ip6i_t *)ip6h; 12653 ip6h = (ip6_t *)&ip6i[1]; 12654 12655 ip6i->ip6i_flags = 0; 12656 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12657 if (ipp->ipp_fields & IPPF_IFINDEX || 12658 ipp->ipp_fields & IPPF_SCOPE_ID) { 12659 ASSERT(ipp->ipp_ifindex != 0); 12660 ip6i->ip6i_flags |= IP6I_IFINDEX; 12661 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12662 } 12663 if (ipp->ipp_fields & IPPF_ADDR) { 12664 /* 12665 * Enable per-packet source address verification if 12666 * IPV6_PKTINFO specified the source address. 12667 * ip6_src is set in the transport's _wput function. 12668 */ 12669 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12670 &ipp->ipp_addr)); 12671 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12672 } 12673 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12674 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12675 /* 12676 * We need to set this flag so that IP doesn't 12677 * rewrite the IPv6 header's hoplimit with the 12678 * current default value. 12679 */ 12680 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12681 } 12682 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12683 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12684 &ipp->ipp_nexthop)); 12685 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12686 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12687 } 12688 /* 12689 * tell IP this is an ip6i_t private header 12690 */ 12691 ip6i->ip6i_nxt = IPPROTO_RAW; 12692 } 12693 /* Initialize IPv6 header */ 12694 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12695 if (ipp->ipp_fields & IPPF_TCLASS) { 12696 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12697 (ipp->ipp_tclass << 20); 12698 } 12699 if (ipp->ipp_fields & IPPF_ADDR) 12700 ip6h->ip6_src = ipp->ipp_addr; 12701 12702 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12703 cp = (uint8_t *)&ip6h[1]; 12704 /* 12705 * Here's where we have to start stringing together 12706 * any extension headers in the right order: 12707 * Hop-by-hop, destination, routing, and final destination opts. 12708 */ 12709 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12710 /* Hop-by-hop options */ 12711 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12712 12713 *nxthdr_ptr = IPPROTO_HOPOPTS; 12714 nxthdr_ptr = &hbh->ip6h_nxt; 12715 12716 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12717 cp += ipp->ipp_hopoptslen; 12718 } 12719 /* 12720 * En-route destination options 12721 * Only do them if there's a routing header as well 12722 */ 12723 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12724 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12725 ip6_dest_t *dst = (ip6_dest_t *)cp; 12726 12727 *nxthdr_ptr = IPPROTO_DSTOPTS; 12728 nxthdr_ptr = &dst->ip6d_nxt; 12729 12730 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12731 cp += ipp->ipp_rtdstoptslen; 12732 } 12733 /* 12734 * Routing header next 12735 */ 12736 if (ipp->ipp_fields & IPPF_RTHDR) { 12737 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12738 12739 *nxthdr_ptr = IPPROTO_ROUTING; 12740 nxthdr_ptr = &rt->ip6r_nxt; 12741 12742 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12743 cp += ipp->ipp_rthdrlen; 12744 } 12745 /* 12746 * Do ultimate destination options 12747 */ 12748 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12749 ip6_dest_t *dest = (ip6_dest_t *)cp; 12750 12751 *nxthdr_ptr = IPPROTO_DSTOPTS; 12752 nxthdr_ptr = &dest->ip6d_nxt; 12753 12754 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12755 cp += ipp->ipp_dstoptslen; 12756 } 12757 /* 12758 * Now set the last header pointer to the proto passed in 12759 */ 12760 *nxthdr_ptr = protocol; 12761 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12762 } 12763 12764 /* 12765 * Return a pointer to the routing header extension header 12766 * in the IPv6 header(s) chain passed in. 12767 * If none found, return NULL 12768 * Assumes that all extension headers are in same mblk as the v6 header 12769 */ 12770 ip6_rthdr_t * 12771 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12772 { 12773 ip6_dest_t *desthdr; 12774 ip6_frag_t *fraghdr; 12775 uint_t hdrlen; 12776 uint8_t nexthdr; 12777 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12778 12779 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12780 return ((ip6_rthdr_t *)ptr); 12781 12782 /* 12783 * The routing header will precede all extension headers 12784 * other than the hop-by-hop and destination options 12785 * extension headers, so if we see anything other than those, 12786 * we're done and didn't find it. 12787 * We could see a destination options header alone but no 12788 * routing header, in which case we'll return NULL as soon as 12789 * we see anything after that. 12790 * Hop-by-hop and destination option headers are identical, 12791 * so we can use either one we want as a template. 12792 */ 12793 nexthdr = ip6h->ip6_nxt; 12794 while (ptr < endptr) { 12795 /* Is there enough left for len + nexthdr? */ 12796 if (ptr + MIN_EHDR_LEN > endptr) 12797 return (NULL); 12798 12799 switch (nexthdr) { 12800 case IPPROTO_HOPOPTS: 12801 case IPPROTO_DSTOPTS: 12802 /* Assumes the headers are identical for hbh and dst */ 12803 desthdr = (ip6_dest_t *)ptr; 12804 hdrlen = 8 * (desthdr->ip6d_len + 1); 12805 nexthdr = desthdr->ip6d_nxt; 12806 break; 12807 12808 case IPPROTO_ROUTING: 12809 return ((ip6_rthdr_t *)ptr); 12810 12811 case IPPROTO_FRAGMENT: 12812 fraghdr = (ip6_frag_t *)ptr; 12813 hdrlen = sizeof (ip6_frag_t); 12814 nexthdr = fraghdr->ip6f_nxt; 12815 break; 12816 12817 default: 12818 return (NULL); 12819 } 12820 ptr += hdrlen; 12821 } 12822 return (NULL); 12823 } 12824 12825 /* 12826 * Called for source-routed packets originating on this node. 12827 * Manipulates the original routing header by moving every entry up 12828 * one slot, placing the first entry in the v6 header's v6_dst field, 12829 * and placing the ultimate destination in the routing header's last 12830 * slot. 12831 * 12832 * Returns the checksum diference between the ultimate destination 12833 * (last hop in the routing header when the packet is sent) and 12834 * the first hop (ip6_dst when the packet is sent) 12835 */ 12836 /* ARGSUSED2 */ 12837 uint32_t 12838 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12839 { 12840 uint_t numaddr; 12841 uint_t i; 12842 in6_addr_t *addrptr; 12843 in6_addr_t tmp; 12844 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12845 uint32_t cksm; 12846 uint32_t addrsum = 0; 12847 uint16_t *ptr; 12848 12849 /* 12850 * Perform any processing needed for source routing. 12851 * We know that all extension headers will be in the same mblk 12852 * as the IPv6 header. 12853 */ 12854 12855 /* 12856 * If no segments left in header, or the header length field is zero, 12857 * don't move hop addresses around; 12858 * Checksum difference is zero. 12859 */ 12860 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12861 return (0); 12862 12863 ptr = (uint16_t *)&ip6h->ip6_dst; 12864 cksm = 0; 12865 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12866 cksm += ptr[i]; 12867 } 12868 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12869 12870 /* 12871 * Here's where the fun begins - we have to 12872 * move all addresses up one spot, take the 12873 * first hop and make it our first ip6_dst, 12874 * and place the ultimate destination in the 12875 * newly-opened last slot. 12876 */ 12877 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12878 numaddr = rthdr->ip6r0_len / 2; 12879 tmp = *addrptr; 12880 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12881 *addrptr = addrptr[1]; 12882 } 12883 *addrptr = ip6h->ip6_dst; 12884 ip6h->ip6_dst = tmp; 12885 12886 /* 12887 * From the checksummed ultimate destination subtract the checksummed 12888 * current ip6_dst (the first hop address). Return that number. 12889 * (In the v4 case, the second part of this is done in each routine 12890 * that calls ip_massage_options(). We do it all in this one place 12891 * for v6). 12892 */ 12893 ptr = (uint16_t *)&ip6h->ip6_dst; 12894 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12895 addrsum += ptr[i]; 12896 } 12897 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12898 if ((int)cksm < 0) 12899 cksm--; 12900 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12901 12902 return (cksm); 12903 } 12904 12905 /* 12906 * Propagate a multicast group membership operation (join/leave) (*fn) on 12907 * all interfaces crossed by the related multirt routes. 12908 * The call is considered successful if the operation succeeds 12909 * on at least one interface. 12910 * The function is called if the destination address in the packet to send 12911 * is multirouted. 12912 */ 12913 int 12914 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12915 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12916 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12917 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12918 { 12919 ire_t *ire_gw; 12920 irb_t *irb; 12921 int index, error = 0; 12922 opt_restart_t *or; 12923 ip_stack_t *ipst = ire->ire_ipst; 12924 12925 irb = ire->ire_bucket; 12926 ASSERT(irb != NULL); 12927 12928 ASSERT(DB_TYPE(first_mp) == M_CTL); 12929 or = (opt_restart_t *)first_mp->b_rptr; 12930 12931 IRB_REFHOLD(irb); 12932 for (; ire != NULL; ire = ire->ire_next) { 12933 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12934 continue; 12935 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12936 continue; 12937 12938 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12939 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12940 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12941 /* No resolver exists for the gateway; skip this ire. */ 12942 if (ire_gw == NULL) 12943 continue; 12944 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12945 /* 12946 * A resolver exists: we can get the interface on which we have 12947 * to apply the operation. 12948 */ 12949 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12950 first_mp); 12951 if (error == 0) 12952 or->or_private = CGTP_MCAST_SUCCESS; 12953 12954 if (ip_debug > 0) { 12955 ulong_t off; 12956 char *ksym; 12957 12958 ksym = kobj_getsymname((uintptr_t)fn, &off); 12959 ip2dbg(("ip_multirt_apply_membership_v6: " 12960 "called %s, multirt group 0x%08x via itf 0x%08x, " 12961 "error %d [success %u]\n", 12962 ksym ? ksym : "?", 12963 ntohl(V4_PART_OF_V6((*v6grp))), 12964 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12965 error, or->or_private)); 12966 } 12967 12968 ire_refrele(ire_gw); 12969 if (error == EINPROGRESS) { 12970 IRB_REFRELE(irb); 12971 return (error); 12972 } 12973 } 12974 IRB_REFRELE(irb); 12975 /* 12976 * Consider the call as successful if we succeeded on at least 12977 * one interface. Otherwise, return the last encountered error. 12978 */ 12979 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12980 } 12981 12982 void 12983 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 12984 { 12985 kstat_t *ksp; 12986 12987 ip6_stat_t template = { 12988 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 12989 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 12990 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 12991 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 12992 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 12993 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 12994 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12995 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12996 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12997 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12998 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12999 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13000 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13001 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13002 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 13003 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 13004 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 13005 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 13006 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 13007 }; 13008 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 13009 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 13010 KSTAT_FLAG_VIRTUAL, stackid); 13011 13012 if (ksp == NULL) 13013 return (NULL); 13014 13015 bcopy(&template, ip6_statisticsp, sizeof (template)); 13016 ksp->ks_data = (void *)ip6_statisticsp; 13017 ksp->ks_private = (void *)(uintptr_t)stackid; 13018 13019 kstat_install(ksp); 13020 return (ksp); 13021 } 13022 13023 void 13024 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 13025 { 13026 if (ksp != NULL) { 13027 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 13028 kstat_delete_netstack(ksp, stackid); 13029 } 13030 } 13031 13032 /* 13033 * The following two functions set and get the value for the 13034 * IPV6_SRC_PREFERENCES socket option. 13035 */ 13036 int 13037 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 13038 { 13039 /* 13040 * We only support preferences that are covered by 13041 * IPV6_PREFER_SRC_MASK. 13042 */ 13043 if (prefs & ~IPV6_PREFER_SRC_MASK) 13044 return (EINVAL); 13045 13046 /* 13047 * Look for conflicting preferences or default preferences. If 13048 * both bits of a related pair are clear, the application wants the 13049 * system's default value for that pair. Both bits in a pair can't 13050 * be set. 13051 */ 13052 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13053 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13054 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13055 IPV6_PREFER_SRC_MIPMASK) { 13056 return (EINVAL); 13057 } 13058 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13059 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13060 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13061 IPV6_PREFER_SRC_TMPMASK) { 13062 return (EINVAL); 13063 } 13064 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13065 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13066 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13067 IPV6_PREFER_SRC_CGAMASK) { 13068 return (EINVAL); 13069 } 13070 13071 connp->conn_src_preferences = prefs; 13072 return (0); 13073 } 13074 13075 size_t 13076 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13077 { 13078 *val = connp->conn_src_preferences; 13079 return (sizeof (connp->conn_src_preferences)); 13080 } 13081 13082 int 13083 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13084 { 13085 ill_t *ill; 13086 ire_t *ire; 13087 int error; 13088 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13089 13090 /* 13091 * Verify the source address and ifindex. Privileged users can use 13092 * any source address. For ancillary data the source address is 13093 * checked in ip_wput_v6. 13094 */ 13095 if (pkti->ipi6_ifindex != 0) { 13096 ASSERT(connp != NULL); 13097 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13098 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error, ipst); 13099 if (ill == NULL) { 13100 /* 13101 * We just want to know if the interface exists, we 13102 * don't really care about the ill pointer itself. 13103 */ 13104 if (error != EINPROGRESS) 13105 return (error); 13106 error = 0; /* Ensure we don't use it below */ 13107 } else { 13108 ill_refrele(ill); 13109 } 13110 } 13111 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13112 secpolicy_net_rawaccess(cr) != 0) { 13113 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13114 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13115 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 13116 if (ire != NULL) 13117 ire_refrele(ire); 13118 else 13119 return (ENXIO); 13120 } 13121 return (0); 13122 } 13123 13124 /* 13125 * Get the size of the IP options (including the IP headers size) 13126 * without including the AH header's size. If till_ah is B_FALSE, 13127 * and if AH header is present, dest options beyond AH header will 13128 * also be included in the returned size. 13129 */ 13130 int 13131 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13132 { 13133 ip6_t *ip6h; 13134 uint8_t nexthdr; 13135 uint8_t *whereptr; 13136 ip6_hbh_t *hbhhdr; 13137 ip6_dest_t *dsthdr; 13138 ip6_rthdr_t *rthdr; 13139 int ehdrlen; 13140 int size; 13141 ah_t *ah; 13142 13143 ip6h = (ip6_t *)mp->b_rptr; 13144 size = IPV6_HDR_LEN; 13145 nexthdr = ip6h->ip6_nxt; 13146 whereptr = (uint8_t *)&ip6h[1]; 13147 for (;;) { 13148 /* Assume IP has already stripped it */ 13149 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13150 switch (nexthdr) { 13151 case IPPROTO_HOPOPTS: 13152 hbhhdr = (ip6_hbh_t *)whereptr; 13153 nexthdr = hbhhdr->ip6h_nxt; 13154 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13155 break; 13156 case IPPROTO_DSTOPTS: 13157 dsthdr = (ip6_dest_t *)whereptr; 13158 nexthdr = dsthdr->ip6d_nxt; 13159 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13160 break; 13161 case IPPROTO_ROUTING: 13162 rthdr = (ip6_rthdr_t *)whereptr; 13163 nexthdr = rthdr->ip6r_nxt; 13164 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13165 break; 13166 default : 13167 if (till_ah) { 13168 ASSERT(nexthdr == IPPROTO_AH); 13169 return (size); 13170 } 13171 /* 13172 * If we don't have a AH header to traverse, 13173 * return now. This happens normally for 13174 * outbound datagrams where we have not inserted 13175 * the AH header. 13176 */ 13177 if (nexthdr != IPPROTO_AH) { 13178 return (size); 13179 } 13180 13181 /* 13182 * We don't include the AH header's size 13183 * to be symmetrical with other cases where 13184 * we either don't have a AH header (outbound) 13185 * or peek into the AH header yet (inbound and 13186 * not pulled up yet). 13187 */ 13188 ah = (ah_t *)whereptr; 13189 nexthdr = ah->ah_nexthdr; 13190 ehdrlen = (ah->ah_length << 2) + 8; 13191 13192 if (nexthdr == IPPROTO_DSTOPTS) { 13193 if (whereptr + ehdrlen >= mp->b_wptr) { 13194 /* 13195 * The destination options header 13196 * is not part of the first mblk. 13197 */ 13198 whereptr = mp->b_cont->b_rptr; 13199 } else { 13200 whereptr += ehdrlen; 13201 } 13202 13203 dsthdr = (ip6_dest_t *)whereptr; 13204 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13205 size += ehdrlen; 13206 } 13207 return (size); 13208 } 13209 whereptr += ehdrlen; 13210 size += ehdrlen; 13211 } 13212 } 13213