1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/sdt.h> 46 #include <sys/kobj.h> 47 #include <sys/zone.h> 48 #include <sys/neti.h> 49 #include <sys/hook.h> 50 51 #include <sys/kmem.h> 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/atomic.h> 58 #include <sys/iphada.h> 59 #include <sys/policy.h> 60 #include <net/if.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/if_dl.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 #include <netinet/sctp.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/optcom.h> 73 #include <inet/mib2.h> 74 #include <inet/nd.h> 75 #include <inet/arp.h> 76 77 #include <inet/ip.h> 78 #include <inet/ip_impl.h> 79 #include <inet/ip6.h> 80 #include <inet/ip6_asp.h> 81 #include <inet/tcp.h> 82 #include <inet/tcp_impl.h> 83 #include <inet/udp_impl.h> 84 #include <inet/ipp_common.h> 85 86 #include <inet/ip_multi.h> 87 #include <inet/ip_if.h> 88 #include <inet/ip_ire.h> 89 #include <inet/ip_rts.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/tun.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/udp_impl.h> 101 #include <inet/rawip_impl.h> 102 #include <inet/rts_impl.h> 103 #include <sys/squeue.h> 104 105 #include <sys/tsol/label.h> 106 #include <sys/tsol/tnet.h> 107 108 #include <rpc/pmap_prot.h> 109 110 /* Temporary; for CR 6451644 work-around */ 111 #include <sys/ethernet.h> 112 113 extern squeue_func_t ip_input_proc; 114 115 /* 116 * Naming conventions: 117 * These rules should be judiciously applied 118 * if there is a need to identify something as IPv6 versus IPv4 119 * IPv6 funcions will end with _v6 in the ip module. 120 * IPv6 funcions will end with _ipv6 in the transport modules. 121 * IPv6 macros: 122 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 123 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 124 * And then there are ..V4_PART_OF_V6. 125 * The intent is that macros in the ip module end with _V6. 126 * IPv6 global variables will start with ipv6_ 127 * IPv6 structures will start with ipv6 128 * IPv6 defined constants should start with IPV6_ 129 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 130 */ 131 132 /* 133 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 134 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 135 * from IANA. This mechanism will remain in effect until an official 136 * number is obtained. 137 */ 138 uchar_t ip6opt_ls; 139 140 const in6_addr_t ipv6_all_ones = 141 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 142 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 143 144 #ifdef _BIG_ENDIAN 145 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 146 #else /* _BIG_ENDIAN */ 147 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 148 #endif /* _BIG_ENDIAN */ 149 150 #ifdef _BIG_ENDIAN 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 152 #else /* _BIG_ENDIAN */ 153 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 154 #endif /* _BIG_ENDIAN */ 155 156 #ifdef _BIG_ENDIAN 157 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 158 #else /* _BIG_ENDIAN */ 159 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 160 #endif /* _BIG_ENDIAN */ 161 162 #ifdef _BIG_ENDIAN 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 164 #else /* _BIG_ENDIAN */ 165 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 166 #endif /* _BIG_ENDIAN */ 167 168 #ifdef _BIG_ENDIAN 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 170 #else /* _BIG_ENDIAN */ 171 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 172 #endif /* _BIG_ENDIAN */ 173 174 #ifdef _BIG_ENDIAN 175 const in6_addr_t ipv6_solicited_node_mcast = 176 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 177 #else /* _BIG_ENDIAN */ 178 const in6_addr_t ipv6_solicited_node_mcast = 179 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 180 #endif /* _BIG_ENDIAN */ 181 182 /* Leave room for ip_newroute to tack on the src and target addresses */ 183 #define OK_RESOLVER_MP_V6(mp) \ 184 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 185 186 #define IP6_MBLK_OK 0 187 #define IP6_MBLK_HDR_ERR 1 188 #define IP6_MBLK_LEN_ERR 2 189 190 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 191 boolean_t, zoneid_t); 192 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 193 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 194 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 195 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 196 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 197 boolean_t, boolean_t, boolean_t, boolean_t); 198 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 199 iulp_t *, ip_stack_t *); 200 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 201 uint16_t, boolean_t, boolean_t, boolean_t); 202 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 203 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 204 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 205 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 206 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 207 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 208 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 209 uint8_t *, uint_t, uint8_t, ip_stack_t *); 210 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 211 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 212 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 213 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 214 conn_t *, int, int, int, zoneid_t); 215 216 /* 217 * A template for an IPv6 AR_ENTRY_QUERY 218 */ 219 static areq_t ipv6_areq_template = { 220 AR_ENTRY_QUERY, /* cmd */ 221 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 222 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 223 IP6_DL_SAP, /* protocol, from arps perspective */ 224 sizeof (areq_t), /* target addr offset */ 225 IPV6_ADDR_LEN, /* target addr_length */ 226 0, /* flags */ 227 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 228 IPV6_ADDR_LEN, /* sender addr length */ 229 6, /* xmit_count */ 230 1000, /* (re)xmit_interval in milliseconds */ 231 4 /* max # of requests to buffer */ 232 /* anything else filled in by the code */ 233 }; 234 235 /* 236 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 237 * The message has already been checksummed and if needed, 238 * a copy has been made to be sent any interested ICMP client (conn) 239 * Note that this is different than icmp_inbound() which does the fanout 240 * to conn's as well as local processing of the ICMP packets. 241 * 242 * All error messages are passed to the matching transport stream. 243 * 244 * Zones notes: 245 * The packet is only processed in the context of the specified zone: typically 246 * only this zone will reply to an echo request. This means that the caller must 247 * call icmp_inbound_v6() for each relevant zone. 248 */ 249 static void 250 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 251 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 252 { 253 icmp6_t *icmp6; 254 ip6_t *ip6h; 255 boolean_t interested; 256 ip6i_t *ip6i; 257 in6_addr_t origsrc; 258 ire_t *ire; 259 mblk_t *first_mp; 260 ipsec_in_t *ii; 261 ip_stack_t *ipst = ill->ill_ipst; 262 263 ASSERT(ill != NULL); 264 first_mp = mp; 265 if (mctl_present) { 266 mp = first_mp->b_cont; 267 ASSERT(mp != NULL); 268 269 ii = (ipsec_in_t *)first_mp->b_rptr; 270 ASSERT(ii->ipsec_in_type == IPSEC_IN); 271 } 272 273 ip6h = (ip6_t *)mp->b_rptr; 274 275 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 276 277 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 278 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 279 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 280 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 281 freemsg(first_mp); 282 return; 283 } 284 ip6h = (ip6_t *)mp->b_rptr; 285 } 286 if (ipst->ips_icmp_accept_clear_messages == 0) { 287 first_mp = ipsec_check_global_policy(first_mp, NULL, 288 NULL, ip6h, mctl_present, ipst->ips_netstack); 289 if (first_mp == NULL) 290 return; 291 } 292 293 /* 294 * On a labeled system, we have to check whether the zone itself is 295 * permitted to receive raw traffic. 296 */ 297 if (is_system_labeled()) { 298 if (zoneid == ALL_ZONES) 299 zoneid = tsol_packet_to_zoneid(mp); 300 if (!tsol_can_accept_raw(mp, B_FALSE)) { 301 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 302 zoneid)); 303 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 304 freemsg(first_mp); 305 return; 306 } 307 } 308 309 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 310 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 311 icmp6->icmp6_code)); 312 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 313 314 /* Initiate IPPF processing here */ 315 if (IP6_IN_IPP(flags, ipst)) { 316 317 /* 318 * If the ifindex changes due to SIOCSLIFINDEX 319 * packet may return to IP on the wrong ill. 320 */ 321 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 322 if (mp == NULL) { 323 if (mctl_present) { 324 freeb(first_mp); 325 } 326 return; 327 } 328 } 329 330 switch (icmp6->icmp6_type) { 331 case ICMP6_DST_UNREACH: 332 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 333 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 334 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 335 break; 336 337 case ICMP6_TIME_EXCEEDED: 338 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 339 break; 340 341 case ICMP6_PARAM_PROB: 342 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 343 break; 344 345 case ICMP6_PACKET_TOO_BIG: 346 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 347 zoneid); 348 return; 349 case ICMP6_ECHO_REQUEST: 350 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 351 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 352 !ipst->ips_ipv6_resp_echo_mcast) 353 break; 354 355 /* 356 * We must have exclusive use of the mblk to convert it to 357 * a response. 358 * If not, we copy it. 359 */ 360 if (mp->b_datap->db_ref > 1) { 361 mblk_t *mp1; 362 363 mp1 = copymsg(mp); 364 freemsg(mp); 365 if (mp1 == NULL) { 366 BUMP_MIB(ill->ill_icmp6_mib, 367 ipv6IfIcmpInErrors); 368 if (mctl_present) 369 freeb(first_mp); 370 return; 371 } 372 mp = mp1; 373 ip6h = (ip6_t *)mp->b_rptr; 374 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 375 if (mctl_present) 376 first_mp->b_cont = mp; 377 else 378 first_mp = mp; 379 } 380 381 /* 382 * Turn the echo into an echo reply. 383 * Remove any extension headers (do not reverse a source route) 384 * and clear the flow id (keep traffic class for now). 385 */ 386 if (hdr_length != IPV6_HDR_LEN) { 387 int i; 388 389 for (i = 0; i < IPV6_HDR_LEN; i++) 390 mp->b_rptr[hdr_length - i - 1] = 391 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 392 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 393 ip6h = (ip6_t *)mp->b_rptr; 394 ip6h->ip6_nxt = IPPROTO_ICMPV6; 395 hdr_length = IPV6_HDR_LEN; 396 } 397 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 398 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 399 400 ip6h->ip6_plen = 401 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 402 origsrc = ip6h->ip6_src; 403 /* 404 * Reverse the source and destination addresses. 405 * If the return address is a multicast, zero out the source 406 * (ip_wput_v6 will set an address). 407 */ 408 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 409 ip6h->ip6_src = ipv6_all_zeros; 410 ip6h->ip6_dst = origsrc; 411 } else { 412 ip6h->ip6_src = ip6h->ip6_dst; 413 ip6h->ip6_dst = origsrc; 414 } 415 416 /* set the hop limit */ 417 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 418 419 /* 420 * Prepare for checksum by putting icmp length in the icmp 421 * checksum field. The checksum is calculated in ip_wput_v6. 422 */ 423 icmp6->icmp6_cksum = ip6h->ip6_plen; 424 /* 425 * ICMP echo replies should go out on the same interface 426 * the request came on as probes used by in.mpathd for 427 * detecting NIC failures are ECHO packets. We turn-off load 428 * spreading by allocating a ip6i and setting ip6i_attach_if 429 * to B_TRUE which is handled both by ip_wput_v6 and 430 * ip_newroute_v6. If we don't turnoff load spreading, 431 * the packets might get dropped if there are no 432 * non-FAILED/INACTIVE interfaces for it to go out on and 433 * in.mpathd would wrongly detect a failure or mis-detect 434 * a NIC failure as a link failure. As load spreading can 435 * happen only if ill_group is not NULL, we do only for 436 * that case and this does not affect the normal case. 437 * 438 * We force this only on echo packets that came from on-link 439 * hosts. We restrict this to link-local addresses which 440 * is used by in.mpathd for probing. In the IPv6 case, 441 * default routes typically have an ire_ipif pointer and 442 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 443 * might work. As a default route out of this interface 444 * may not be present, enforcing this packet to go out in 445 * this case may not work. 446 */ 447 if (ill->ill_group != NULL && 448 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 449 /* 450 * If we are sending replies to ourselves, don't 451 * set ATTACH_IF as we may not be able to find 452 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 453 * causes ip_wput_v6 to look for an IRE_LOCAL on 454 * "ill" which it may not find and will try to 455 * create an IRE_CACHE for our local address. Once 456 * we do this, we will try to forward all packets 457 * meant to our LOCAL address. 458 */ 459 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 460 NULL, ipst); 461 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 462 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 463 if (mp == NULL) { 464 BUMP_MIB(ill->ill_icmp6_mib, 465 ipv6IfIcmpInErrors); 466 if (ire != NULL) 467 ire_refrele(ire); 468 if (mctl_present) 469 freeb(first_mp); 470 return; 471 } else if (mctl_present) { 472 first_mp->b_cont = mp; 473 } else { 474 first_mp = mp; 475 } 476 ip6i = (ip6i_t *)mp->b_rptr; 477 ip6i->ip6i_flags = IP6I_ATTACH_IF; 478 ip6i->ip6i_ifindex = 479 ill->ill_phyint->phyint_ifindex; 480 } 481 if (ire != NULL) 482 ire_refrele(ire); 483 } 484 485 if (!mctl_present) { 486 /* 487 * This packet should go out the same way as it 488 * came in i.e in clear. To make sure that global 489 * policy will not be applied to this in ip_wput, 490 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 491 */ 492 ASSERT(first_mp == mp); 493 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 494 if (first_mp == NULL) { 495 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 496 freemsg(mp); 497 return; 498 } 499 ii = (ipsec_in_t *)first_mp->b_rptr; 500 501 /* This is not a secure packet */ 502 ii->ipsec_in_secure = B_FALSE; 503 first_mp->b_cont = mp; 504 } 505 ii->ipsec_in_zoneid = zoneid; 506 ASSERT(zoneid != ALL_ZONES); 507 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 508 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 509 return; 510 } 511 put(WR(q), first_mp); 512 return; 513 514 case ICMP6_ECHO_REPLY: 515 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 516 break; 517 518 case ND_ROUTER_SOLICIT: 519 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 520 break; 521 522 case ND_ROUTER_ADVERT: 523 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 524 break; 525 526 case ND_NEIGHBOR_SOLICIT: 527 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 528 if (mctl_present) 529 freeb(first_mp); 530 /* XXX may wish to pass first_mp up to ndp_input someday. */ 531 ndp_input(ill, mp, dl_mp); 532 return; 533 534 case ND_NEIGHBOR_ADVERT: 535 BUMP_MIB(ill->ill_icmp6_mib, 536 ipv6IfIcmpInNeighborAdvertisements); 537 if (mctl_present) 538 freeb(first_mp); 539 /* XXX may wish to pass first_mp up to ndp_input someday. */ 540 ndp_input(ill, mp, dl_mp); 541 return; 542 543 case ND_REDIRECT: { 544 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 545 546 if (ipst->ips_ipv6_ignore_redirect) 547 break; 548 549 /* 550 * As there is no upper client to deliver, we don't 551 * need the first_mp any more. 552 */ 553 if (mctl_present) 554 freeb(first_mp); 555 if (!pullupmsg(mp, -1)) { 556 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 557 break; 558 } 559 icmp_redirect_v6(q, mp, ill); 560 return; 561 } 562 563 /* 564 * The next three icmp messages will be handled by MLD. 565 * Pass all valid MLD packets up to any process(es) 566 * listening on a raw ICMP socket. MLD messages are 567 * freed by mld_input function. 568 */ 569 case MLD_LISTENER_QUERY: 570 case MLD_LISTENER_REPORT: 571 case MLD_LISTENER_REDUCTION: 572 if (mctl_present) 573 freeb(first_mp); 574 mld_input(q, mp, ill); 575 return; 576 default: 577 break; 578 } 579 if (interested) { 580 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 581 mctl_present, zoneid); 582 } else { 583 freemsg(first_mp); 584 } 585 } 586 587 /* 588 * Process received IPv6 ICMP Packet too big. 589 * After updating any IRE it does the fanout to any matching transport streams. 590 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 591 */ 592 /* ARGSUSED */ 593 static void 594 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 595 boolean_t mctl_present, zoneid_t zoneid) 596 { 597 ip6_t *ip6h; 598 ip6_t *inner_ip6h; 599 icmp6_t *icmp6; 600 uint16_t hdr_length; 601 uint32_t mtu; 602 ire_t *ire, *first_ire; 603 mblk_t *first_mp; 604 ip_stack_t *ipst = ill->ill_ipst; 605 606 first_mp = mp; 607 if (mctl_present) 608 mp = first_mp->b_cont; 609 /* 610 * We must have exclusive use of the mblk to update the MTU 611 * in the packet. 612 * If not, we copy it. 613 * 614 * If there's an M_CTL present, we know that allocated first_mp 615 * earlier in this function, so we know first_mp has refcnt of one. 616 */ 617 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 618 if (mp->b_datap->db_ref > 1) { 619 mblk_t *mp1; 620 621 mp1 = copymsg(mp); 622 freemsg(mp); 623 if (mp1 == NULL) { 624 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 625 if (mctl_present) 626 freeb(first_mp); 627 return; 628 } 629 mp = mp1; 630 if (mctl_present) 631 first_mp->b_cont = mp; 632 else 633 first_mp = mp; 634 } 635 ip6h = (ip6_t *)mp->b_rptr; 636 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 637 hdr_length = ip_hdr_length_v6(mp, ip6h); 638 else 639 hdr_length = IPV6_HDR_LEN; 640 641 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 642 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 643 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 644 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 645 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 646 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 647 freemsg(first_mp); 648 return; 649 } 650 ip6h = (ip6_t *)mp->b_rptr; 651 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 652 inner_ip6h = (ip6_t *)&icmp6[1]; 653 } 654 655 /* 656 * For link local destinations matching simply on IRE type is not 657 * sufficient. Same link local addresses for different ILL's is 658 * possible. 659 */ 660 661 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 662 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 663 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 664 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 665 666 if (first_ire == NULL) { 667 if (ip_debug > 2) { 668 /* ip1dbg */ 669 pr_addr_dbg("icmp_inbound_too_big_v6:" 670 "no ire for dst %s\n", AF_INET6, 671 &inner_ip6h->ip6_dst); 672 } 673 freemsg(first_mp); 674 return; 675 } 676 677 mtu = ntohl(icmp6->icmp6_mtu); 678 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 679 for (ire = first_ire; ire != NULL && 680 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 681 ire = ire->ire_next) { 682 mutex_enter(&ire->ire_lock); 683 if (mtu < IPV6_MIN_MTU) { 684 ip1dbg(("Received mtu less than IPv6 " 685 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 686 mtu = IPV6_MIN_MTU; 687 /* 688 * If an mtu less than IPv6 min mtu is received, 689 * we must include a fragment header in 690 * subsequent packets. 691 */ 692 ire->ire_frag_flag |= IPH_FRAG_HDR; 693 } 694 ip1dbg(("Received mtu from router: %d\n", mtu)); 695 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 696 /* Record the new max frag size for the ULP. */ 697 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 698 /* 699 * If we need a fragment header in every packet 700 * (above case or multirouting), make sure the 701 * ULP takes it into account when computing the 702 * payload size. 703 */ 704 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 705 sizeof (ip6_frag_t)); 706 } else { 707 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 708 } 709 mutex_exit(&ire->ire_lock); 710 } 711 rw_exit(&first_ire->ire_bucket->irb_lock); 712 ire_refrele(first_ire); 713 } else { 714 irb_t *irb = NULL; 715 /* 716 * for non-link local destinations we match only on the IRE type 717 */ 718 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 719 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 720 ipst); 721 if (ire == NULL) { 722 if (ip_debug > 2) { 723 /* ip1dbg */ 724 pr_addr_dbg("icmp_inbound_too_big_v6:" 725 "no ire for dst %s\n", 726 AF_INET6, &inner_ip6h->ip6_dst); 727 } 728 freemsg(first_mp); 729 return; 730 } 731 irb = ire->ire_bucket; 732 ire_refrele(ire); 733 rw_enter(&irb->irb_lock, RW_READER); 734 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 735 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 736 &inner_ip6h->ip6_dst)) { 737 mtu = ntohl(icmp6->icmp6_mtu); 738 mutex_enter(&ire->ire_lock); 739 if (mtu < IPV6_MIN_MTU) { 740 ip1dbg(("Received mtu less than IPv6" 741 "min mtu %d: %d\n", 742 IPV6_MIN_MTU, mtu)); 743 mtu = IPV6_MIN_MTU; 744 /* 745 * If an mtu less than IPv6 min mtu is 746 * received, we must include a fragment 747 * header in subsequent packets. 748 */ 749 ire->ire_frag_flag |= IPH_FRAG_HDR; 750 } 751 752 ip1dbg(("Received mtu from router: %d\n", mtu)); 753 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 754 /* Record the new max frag size for the ULP. */ 755 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 756 /* 757 * If we need a fragment header in 758 * every packet (above case or 759 * multirouting), make sure the ULP 760 * takes it into account when computing 761 * the payload size. 762 */ 763 icmp6->icmp6_mtu = 764 htonl(ire->ire_max_frag - 765 sizeof (ip6_frag_t)); 766 } else { 767 icmp6->icmp6_mtu = 768 htonl(ire->ire_max_frag); 769 } 770 mutex_exit(&ire->ire_lock); 771 } 772 } 773 rw_exit(&irb->irb_lock); 774 } 775 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 776 mctl_present, zoneid); 777 } 778 779 /* 780 * Fanout received ICMPv6 error packets to the transports. 781 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 782 */ 783 void 784 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 785 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 786 { 787 uint16_t *up; /* Pointer to ports in ULP header */ 788 uint32_t ports; /* reversed ports for fanout */ 789 ip6_t rip6h; /* With reversed addresses */ 790 uint16_t hdr_length; 791 uint8_t *nexthdrp; 792 uint8_t nexthdr; 793 mblk_t *first_mp; 794 ipsec_in_t *ii; 795 tcpha_t *tcpha; 796 conn_t *connp; 797 ip_stack_t *ipst = ill->ill_ipst; 798 799 first_mp = mp; 800 if (mctl_present) { 801 mp = first_mp->b_cont; 802 ASSERT(mp != NULL); 803 804 ii = (ipsec_in_t *)first_mp->b_rptr; 805 ASSERT(ii->ipsec_in_type == IPSEC_IN); 806 } else { 807 ii = NULL; 808 } 809 810 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 811 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 812 813 /* 814 * Need to pullup everything in order to use 815 * ip_hdr_length_nexthdr_v6() 816 */ 817 if (mp->b_cont != NULL) { 818 if (!pullupmsg(mp, -1)) { 819 ip1dbg(("icmp_inbound_error_fanout_v6: " 820 "pullupmsg failed\n")); 821 goto drop_pkt; 822 } 823 ip6h = (ip6_t *)mp->b_rptr; 824 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 825 } 826 827 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 828 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 829 goto drop_pkt; 830 831 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 832 goto drop_pkt; 833 nexthdr = *nexthdrp; 834 835 /* Set message type, must be done after pullups */ 836 mp->b_datap->db_type = M_CTL; 837 838 /* Try to pass the ICMP message to clients who need it */ 839 switch (nexthdr) { 840 case IPPROTO_UDP: { 841 /* 842 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 843 * UDP header to get the port information. 844 */ 845 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 846 mp->b_wptr) { 847 break; 848 } 849 /* 850 * Attempt to find a client stream based on port. 851 * Note that we do a reverse lookup since the header is 852 * in the form we sent it out. 853 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 854 * and we only set the src and dst addresses and nexthdr. 855 */ 856 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 857 rip6h.ip6_src = ip6h->ip6_dst; 858 rip6h.ip6_dst = ip6h->ip6_src; 859 rip6h.ip6_nxt = nexthdr; 860 ((uint16_t *)&ports)[0] = up[1]; 861 ((uint16_t *)&ports)[1] = up[0]; 862 863 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 864 IP6_NO_IPPOLICY, mctl_present, zoneid); 865 return; 866 } 867 case IPPROTO_TCP: { 868 /* 869 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 870 * the TCP header to get the port information. 871 */ 872 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 873 mp->b_wptr) { 874 break; 875 } 876 877 /* 878 * Attempt to find a client stream based on port. 879 * Note that we do a reverse lookup since the header is 880 * in the form we sent it out. 881 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 882 * we only set the src and dst addresses and nexthdr. 883 */ 884 885 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 886 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 887 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 888 if (connp == NULL) { 889 goto drop_pkt; 890 } 891 892 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 893 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 894 return; 895 896 } 897 case IPPROTO_SCTP: 898 /* 899 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 900 * the SCTP header to get the port information. 901 */ 902 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 903 mp->b_wptr) { 904 break; 905 } 906 907 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 908 ((uint16_t *)&ports)[0] = up[1]; 909 ((uint16_t *)&ports)[1] = up[0]; 910 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 0, 911 mctl_present, IP6_NO_IPPOLICY, zoneid); 912 return; 913 case IPPROTO_ESP: 914 case IPPROTO_AH: { 915 int ipsec_rc; 916 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 917 918 /* 919 * We need a IPSEC_IN in the front to fanout to AH/ESP. 920 * We will re-use the IPSEC_IN if it is already present as 921 * AH/ESP will not affect any fields in the IPSEC_IN for 922 * ICMP errors. If there is no IPSEC_IN, allocate a new 923 * one and attach it in the front. 924 */ 925 if (ii != NULL) { 926 /* 927 * ip_fanout_proto_again converts the ICMP errors 928 * that come back from AH/ESP to M_DATA so that 929 * if it is non-AH/ESP and we do a pullupmsg in 930 * this function, it would work. Convert it back 931 * to M_CTL before we send up as this is a ICMP 932 * error. This could have been generated locally or 933 * by some router. Validate the inner IPSEC 934 * headers. 935 * 936 * NOTE : ill_index is used by ip_fanout_proto_again 937 * to locate the ill. 938 */ 939 ASSERT(ill != NULL); 940 ii->ipsec_in_ill_index = 941 ill->ill_phyint->phyint_ifindex; 942 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 943 first_mp->b_cont->b_datap->db_type = M_CTL; 944 } else { 945 /* 946 * IPSEC_IN is not present. We attach a ipsec_in 947 * message and send up to IPSEC for validating 948 * and removing the IPSEC headers. Clear 949 * ipsec_in_secure so that when we return 950 * from IPSEC, we don't mistakenly think that this 951 * is a secure packet came from the network. 952 * 953 * NOTE : ill_index is used by ip_fanout_proto_again 954 * to locate the ill. 955 */ 956 ASSERT(first_mp == mp); 957 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 958 ASSERT(ill != NULL); 959 if (first_mp == NULL) { 960 freemsg(mp); 961 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 962 return; 963 } 964 ii = (ipsec_in_t *)first_mp->b_rptr; 965 966 /* This is not a secure packet */ 967 ii->ipsec_in_secure = B_FALSE; 968 first_mp->b_cont = mp; 969 mp->b_datap->db_type = M_CTL; 970 ii->ipsec_in_ill_index = 971 ill->ill_phyint->phyint_ifindex; 972 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 973 } 974 975 if (!ipsec_loaded(ipss)) { 976 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 977 return; 978 } 979 980 if (nexthdr == IPPROTO_ESP) 981 ipsec_rc = ipsecesp_icmp_error(first_mp); 982 else 983 ipsec_rc = ipsecah_icmp_error(first_mp); 984 if (ipsec_rc == IPSEC_STATUS_FAILED) 985 return; 986 987 ip_fanout_proto_again(first_mp, ill, ill, NULL); 988 return; 989 } 990 case IPPROTO_ENCAP: 991 case IPPROTO_IPV6: 992 if ((uint8_t *)ip6h + hdr_length + 993 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 994 sizeof (ip6_t)) > mp->b_wptr) { 995 goto drop_pkt; 996 } 997 998 if (nexthdr == IPPROTO_ENCAP || 999 !IN6_ARE_ADDR_EQUAL( 1000 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1001 &ip6h->ip6_src) || 1002 !IN6_ARE_ADDR_EQUAL( 1003 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1004 &ip6h->ip6_dst)) { 1005 /* 1006 * For tunnels that have used IPsec protection, 1007 * we need to adjust the MTU to take into account 1008 * the IPsec overhead. 1009 */ 1010 if (ii != NULL) 1011 icmp6->icmp6_mtu = htonl( 1012 ntohl(icmp6->icmp6_mtu) - 1013 ipsec_in_extra_length(first_mp)); 1014 } else { 1015 /* 1016 * Self-encapsulated case. As in the ipv4 case, 1017 * we need to strip the 2nd IP header. Since mp 1018 * is already pulled-up, we can simply bcopy 1019 * the 3rd header + data over the 2nd header. 1020 */ 1021 uint16_t unused_len; 1022 ip6_t *inner_ip6h = (ip6_t *) 1023 ((uchar_t *)ip6h + hdr_length); 1024 1025 /* 1026 * Make sure we don't do recursion more than once. 1027 */ 1028 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1029 &unused_len, &nexthdrp) || 1030 *nexthdrp == IPPROTO_IPV6) { 1031 goto drop_pkt; 1032 } 1033 1034 /* 1035 * We are about to modify the packet. Make a copy if 1036 * someone else has a reference to it. 1037 */ 1038 if (DB_REF(mp) > 1) { 1039 mblk_t *mp1; 1040 uint16_t icmp6_offset; 1041 1042 mp1 = copymsg(mp); 1043 if (mp1 == NULL) { 1044 goto drop_pkt; 1045 } 1046 icmp6_offset = (uint16_t) 1047 ((uchar_t *)icmp6 - mp->b_rptr); 1048 freemsg(mp); 1049 mp = mp1; 1050 1051 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1052 ip6h = (ip6_t *)&icmp6[1]; 1053 inner_ip6h = (ip6_t *) 1054 ((uchar_t *)ip6h + hdr_length); 1055 1056 if (mctl_present) 1057 first_mp->b_cont = mp; 1058 else 1059 first_mp = mp; 1060 } 1061 1062 /* 1063 * Need to set db_type back to M_DATA before 1064 * refeeding mp into this function. 1065 */ 1066 DB_TYPE(mp) = M_DATA; 1067 1068 /* 1069 * Copy the 3rd header + remaining data on top 1070 * of the 2nd header. 1071 */ 1072 bcopy(inner_ip6h, ip6h, 1073 mp->b_wptr - (uchar_t *)inner_ip6h); 1074 1075 /* 1076 * Subtract length of the 2nd header. 1077 */ 1078 mp->b_wptr -= hdr_length; 1079 1080 /* 1081 * Now recurse, and see what I _really_ should be 1082 * doing here. 1083 */ 1084 icmp_inbound_error_fanout_v6(q, first_mp, 1085 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1086 zoneid); 1087 return; 1088 } 1089 /* FALLTHRU */ 1090 default: 1091 /* 1092 * The rip6h header is only used for the lookup and we 1093 * only set the src and dst addresses and nexthdr. 1094 */ 1095 rip6h.ip6_src = ip6h->ip6_dst; 1096 rip6h.ip6_dst = ip6h->ip6_src; 1097 rip6h.ip6_nxt = nexthdr; 1098 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1099 IP6_NO_IPPOLICY, mctl_present, zoneid); 1100 return; 1101 } 1102 /* NOTREACHED */ 1103 drop_pkt: 1104 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1105 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1106 freemsg(first_mp); 1107 } 1108 1109 /* 1110 * Process received IPv6 ICMP Redirect messages. 1111 */ 1112 /* ARGSUSED */ 1113 static void 1114 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1115 { 1116 ip6_t *ip6h; 1117 uint16_t hdr_length; 1118 nd_redirect_t *rd; 1119 ire_t *ire; 1120 ire_t *prev_ire; 1121 ire_t *redir_ire; 1122 in6_addr_t *src, *dst, *gateway; 1123 nd_opt_hdr_t *opt; 1124 nce_t *nce; 1125 int nce_flags = 0; 1126 int err = 0; 1127 boolean_t redirect_to_router = B_FALSE; 1128 int len; 1129 int optlen; 1130 iulp_t ulp_info = { 0 }; 1131 ill_t *prev_ire_ill; 1132 ipif_t *ipif; 1133 ip_stack_t *ipst = ill->ill_ipst; 1134 1135 ip6h = (ip6_t *)mp->b_rptr; 1136 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1137 hdr_length = ip_hdr_length_v6(mp, ip6h); 1138 else 1139 hdr_length = IPV6_HDR_LEN; 1140 1141 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1142 len = mp->b_wptr - mp->b_rptr - hdr_length; 1143 src = &ip6h->ip6_src; 1144 dst = &rd->nd_rd_dst; 1145 gateway = &rd->nd_rd_target; 1146 1147 /* Verify if it is a valid redirect */ 1148 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1149 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1150 (rd->nd_rd_code != 0) || 1151 (len < sizeof (nd_redirect_t)) || 1152 (IN6_IS_ADDR_V4MAPPED(dst)) || 1153 (IN6_IS_ADDR_MULTICAST(dst))) { 1154 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1155 freemsg(mp); 1156 return; 1157 } 1158 1159 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1160 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1161 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1162 freemsg(mp); 1163 return; 1164 } 1165 1166 if (len > sizeof (nd_redirect_t)) { 1167 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1168 len - sizeof (nd_redirect_t))) { 1169 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1170 freemsg(mp); 1171 return; 1172 } 1173 } 1174 1175 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1176 redirect_to_router = B_TRUE; 1177 nce_flags |= NCE_F_ISROUTER; 1178 } 1179 1180 /* ipif will be refreleased afterwards */ 1181 ipif = ipif_get_next_ipif(NULL, ill); 1182 if (ipif == NULL) { 1183 freemsg(mp); 1184 return; 1185 } 1186 1187 /* 1188 * Verify that the IP source address of the redirect is 1189 * the same as the current first-hop router for the specified 1190 * ICMP destination address. 1191 * Also, Make sure we had a route for the dest in question and 1192 * that route was pointing to the old gateway (the source of the 1193 * redirect packet.) 1194 */ 1195 1196 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1197 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1198 MATCH_IRE_DEFAULT, ipst); 1199 1200 /* 1201 * Check that 1202 * the redirect was not from ourselves 1203 * old gateway is still directly reachable 1204 */ 1205 if (prev_ire == NULL || 1206 prev_ire->ire_type == IRE_LOCAL) { 1207 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1208 ipif_refrele(ipif); 1209 goto fail_redirect; 1210 } 1211 prev_ire_ill = ire_to_ill(prev_ire); 1212 ASSERT(prev_ire_ill != NULL); 1213 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1214 nce_flags |= NCE_F_NONUD; 1215 1216 /* 1217 * Should we use the old ULP info to create the new gateway? From 1218 * a user's perspective, we should inherit the info so that it 1219 * is a "smooth" transition. If we do not do that, then new 1220 * connections going thru the new gateway will have no route metrics, 1221 * which is counter-intuitive to user. From a network point of 1222 * view, this may or may not make sense even though the new gateway 1223 * is still directly connected to us so the route metrics should not 1224 * change much. 1225 * 1226 * But if the old ire_uinfo is not initialized, we do another 1227 * recursive lookup on the dest using the new gateway. There may 1228 * be a route to that. If so, use it to initialize the redirect 1229 * route. 1230 */ 1231 if (prev_ire->ire_uinfo.iulp_set) { 1232 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1233 } else if (redirect_to_router) { 1234 /* 1235 * Only do the following if the redirection is really to 1236 * a router. 1237 */ 1238 ire_t *tmp_ire; 1239 ire_t *sire; 1240 1241 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1242 ALL_ZONES, 0, NULL, 1243 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1244 ipst); 1245 if (sire != NULL) { 1246 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1247 ASSERT(tmp_ire != NULL); 1248 ire_refrele(tmp_ire); 1249 ire_refrele(sire); 1250 } else if (tmp_ire != NULL) { 1251 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1252 sizeof (iulp_t)); 1253 ire_refrele(tmp_ire); 1254 } 1255 } 1256 1257 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1258 opt = (nd_opt_hdr_t *)&rd[1]; 1259 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1260 if (opt != NULL) { 1261 err = ndp_lookup_then_add_v6(ill, 1262 (uchar_t *)&opt[1], /* Link layer address */ 1263 gateway, 1264 &ipv6_all_ones, /* prefix mask */ 1265 &ipv6_all_zeros, /* Mapping mask */ 1266 0, 1267 nce_flags, 1268 ND_STALE, 1269 &nce); 1270 switch (err) { 1271 case 0: 1272 NCE_REFRELE(nce); 1273 break; 1274 case EEXIST: 1275 /* 1276 * Check to see if link layer address has changed and 1277 * process the nce_state accordingly. 1278 */ 1279 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1280 NCE_REFRELE(nce); 1281 break; 1282 default: 1283 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1284 err)); 1285 ipif_refrele(ipif); 1286 goto fail_redirect; 1287 } 1288 } 1289 if (redirect_to_router) { 1290 /* icmp_redirect_ok_v6() must have already verified this */ 1291 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1292 1293 /* 1294 * Create a Route Association. This will allow us to remember 1295 * a router told us to use the particular gateway. 1296 */ 1297 ire = ire_create_v6( 1298 dst, 1299 &ipv6_all_ones, /* mask */ 1300 &prev_ire->ire_src_addr_v6, /* source addr */ 1301 gateway, /* gateway addr */ 1302 &prev_ire->ire_max_frag, /* max frag */ 1303 NULL, /* no src nce */ 1304 NULL, /* no rfq */ 1305 NULL, /* no stq */ 1306 IRE_HOST, 1307 prev_ire->ire_ipif, 1308 NULL, 1309 0, 1310 0, 1311 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1312 &ulp_info, 1313 NULL, 1314 NULL, 1315 ipst); 1316 } else { 1317 queue_t *stq; 1318 1319 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1320 ? ipif->ipif_rq : ipif->ipif_wq; 1321 1322 /* 1323 * Just create an on link entry, i.e. interface route. 1324 */ 1325 ire = ire_create_v6( 1326 dst, /* gateway == dst */ 1327 &ipv6_all_ones, /* mask */ 1328 &prev_ire->ire_src_addr_v6, /* source addr */ 1329 &ipv6_all_zeros, /* gateway addr */ 1330 &prev_ire->ire_max_frag, /* max frag */ 1331 NULL, /* no src nce */ 1332 NULL, /* ire rfq */ 1333 stq, /* ire stq */ 1334 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1335 prev_ire->ire_ipif, 1336 &ipv6_all_ones, 1337 0, 1338 0, 1339 (RTF_DYNAMIC | RTF_HOST), 1340 &ulp_info, 1341 NULL, 1342 NULL, 1343 ipst); 1344 } 1345 1346 /* Release reference from earlier ipif_get_next_ipif() */ 1347 ipif_refrele(ipif); 1348 1349 if (ire == NULL) 1350 goto fail_redirect; 1351 1352 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1353 1354 /* tell routing sockets that we received a redirect */ 1355 ip_rts_change_v6(RTM_REDIRECT, 1356 &rd->nd_rd_dst, 1357 &rd->nd_rd_target, 1358 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1359 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1360 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1361 1362 /* 1363 * Delete any existing IRE_HOST type ires for this destination. 1364 * This together with the added IRE has the effect of 1365 * modifying an existing redirect. 1366 */ 1367 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1368 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1369 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), 1370 ipst); 1371 1372 ire_refrele(ire); /* Held in ire_add_v6 */ 1373 1374 if (redir_ire != NULL) { 1375 if (redir_ire->ire_flags & RTF_DYNAMIC) 1376 ire_delete(redir_ire); 1377 ire_refrele(redir_ire); 1378 } 1379 } 1380 1381 if (prev_ire->ire_type == IRE_CACHE) 1382 ire_delete(prev_ire); 1383 ire_refrele(prev_ire); 1384 prev_ire = NULL; 1385 1386 fail_redirect: 1387 if (prev_ire != NULL) 1388 ire_refrele(prev_ire); 1389 freemsg(mp); 1390 } 1391 1392 static ill_t * 1393 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1394 { 1395 ill_t *ill; 1396 1397 ASSERT(WR(q) == q); 1398 1399 if (q->q_next != NULL) { 1400 ill = (ill_t *)q->q_ptr; 1401 if (ILL_CAN_LOOKUP(ill)) 1402 ill_refhold(ill); 1403 else 1404 ill = NULL; 1405 } else { 1406 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1407 NULL, NULL, NULL, NULL, NULL, ipst); 1408 } 1409 if (ill == NULL) 1410 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1411 return (ill); 1412 } 1413 1414 /* 1415 * Assigns an appropriate source address to the packet. 1416 * If origdst is one of our IP addresses that use it as the source. 1417 * If the queue is an ill queue then select a source from that ill. 1418 * Otherwise pick a source based on a route lookup back to the origsrc. 1419 * 1420 * src is the return parameter. Returns a pointer to src or NULL if failure. 1421 */ 1422 static in6_addr_t * 1423 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1424 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1425 { 1426 ill_t *ill; 1427 ire_t *ire; 1428 ipif_t *ipif; 1429 1430 ASSERT(!(wq->q_flag & QREADR)); 1431 if (wq->q_next != NULL) { 1432 ill = (ill_t *)wq->q_ptr; 1433 } else { 1434 ill = NULL; 1435 } 1436 1437 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1438 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1439 ipst); 1440 if (ire != NULL) { 1441 /* Destined to one of our addresses */ 1442 *src = *origdst; 1443 ire_refrele(ire); 1444 return (src); 1445 } 1446 if (ire != NULL) { 1447 ire_refrele(ire); 1448 ire = NULL; 1449 } 1450 if (ill == NULL) { 1451 /* What is the route back to the original source? */ 1452 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1453 NULL, NULL, zoneid, NULL, 1454 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1455 if (ire == NULL) { 1456 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1457 return (NULL); 1458 } 1459 /* 1460 * Does not matter whether we use ire_stq or ire_ipif here. 1461 * Just pick an ill for ICMP replies. 1462 */ 1463 ASSERT(ire->ire_ipif != NULL); 1464 ill = ire->ire_ipif->ipif_ill; 1465 ire_refrele(ire); 1466 } 1467 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1468 IPV6_PREFER_SRC_DEFAULT, zoneid); 1469 if (ipif != NULL) { 1470 *src = ipif->ipif_v6src_addr; 1471 ipif_refrele(ipif); 1472 return (src); 1473 } 1474 /* 1475 * Unusual case - can't find a usable source address to reach the 1476 * original source. Use what in the route to the source. 1477 */ 1478 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1479 NULL, NULL, zoneid, NULL, 1480 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1481 if (ire == NULL) { 1482 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1483 return (NULL); 1484 } 1485 ASSERT(ire != NULL); 1486 *src = ire->ire_src_addr_v6; 1487 ire_refrele(ire); 1488 return (src); 1489 } 1490 1491 /* 1492 * Build and ship an IPv6 ICMP message using the packet data in mp, 1493 * and the ICMP header pointed to by "stuff". (May be called as 1494 * writer.) 1495 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1496 * verify that an icmp error packet can be sent. 1497 * 1498 * If q is an ill write side queue (which is the case when packets 1499 * arrive from ip_rput) then ip_wput code will ensure that packets to 1500 * link-local destinations are sent out that ill. 1501 * 1502 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1503 * source address (see above function). 1504 */ 1505 static void 1506 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1507 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1508 ip_stack_t *ipst) 1509 { 1510 ip6_t *ip6h; 1511 in6_addr_t v6dst; 1512 size_t len_needed; 1513 size_t msg_len; 1514 mblk_t *mp1; 1515 icmp6_t *icmp6; 1516 ill_t *ill; 1517 in6_addr_t v6src; 1518 mblk_t *ipsec_mp; 1519 ipsec_out_t *io; 1520 1521 ill = ip_queue_to_ill_v6(q, ipst); 1522 if (ill == NULL) { 1523 freemsg(mp); 1524 return; 1525 } 1526 1527 if (mctl_present) { 1528 /* 1529 * If it is : 1530 * 1531 * 1) a IPSEC_OUT, then this is caused by outbound 1532 * datagram originating on this host. IPSEC processing 1533 * may or may not have been done. Refer to comments above 1534 * icmp_inbound_error_fanout for details. 1535 * 1536 * 2) a IPSEC_IN if we are generating a icmp_message 1537 * for an incoming datagram destined for us i.e called 1538 * from ip_fanout_send_icmp. 1539 */ 1540 ipsec_info_t *in; 1541 1542 ipsec_mp = mp; 1543 mp = ipsec_mp->b_cont; 1544 1545 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1546 ip6h = (ip6_t *)mp->b_rptr; 1547 1548 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1549 in->ipsec_info_type == IPSEC_IN); 1550 1551 if (in->ipsec_info_type == IPSEC_IN) { 1552 /* 1553 * Convert the IPSEC_IN to IPSEC_OUT. 1554 */ 1555 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1556 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1557 ill_refrele(ill); 1558 return; 1559 } 1560 } else { 1561 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1562 io = (ipsec_out_t *)in; 1563 /* 1564 * Clear out ipsec_out_proc_begin, so we do a fresh 1565 * ire lookup. 1566 */ 1567 io->ipsec_out_proc_begin = B_FALSE; 1568 } 1569 } else { 1570 /* 1571 * This is in clear. The icmp message we are building 1572 * here should go out in clear. 1573 */ 1574 ipsec_in_t *ii; 1575 ASSERT(mp->b_datap->db_type == M_DATA); 1576 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1577 if (ipsec_mp == NULL) { 1578 freemsg(mp); 1579 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1580 ill_refrele(ill); 1581 return; 1582 } 1583 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1584 1585 /* This is not a secure packet */ 1586 ii->ipsec_in_secure = B_FALSE; 1587 /* 1588 * For trusted extensions using a shared IP address we can 1589 * send using any zoneid. 1590 */ 1591 if (zoneid == ALL_ZONES) 1592 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1593 else 1594 ii->ipsec_in_zoneid = zoneid; 1595 ipsec_mp->b_cont = mp; 1596 ip6h = (ip6_t *)mp->b_rptr; 1597 /* 1598 * Convert the IPSEC_IN to IPSEC_OUT. 1599 */ 1600 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1601 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1602 ill_refrele(ill); 1603 return; 1604 } 1605 } 1606 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1607 1608 if (v6src_ptr != NULL) { 1609 v6src = *v6src_ptr; 1610 } else { 1611 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1612 &v6src, zoneid, ipst) == NULL) { 1613 freemsg(ipsec_mp); 1614 ill_refrele(ill); 1615 return; 1616 } 1617 } 1618 v6dst = ip6h->ip6_src; 1619 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1620 msg_len = msgdsize(mp); 1621 if (msg_len > len_needed) { 1622 if (!adjmsg(mp, len_needed - msg_len)) { 1623 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1624 freemsg(ipsec_mp); 1625 ill_refrele(ill); 1626 return; 1627 } 1628 msg_len = len_needed; 1629 } 1630 mp1 = allocb_cred(IPV6_HDR_LEN + len, DB_CRED(mp)); 1631 if (mp1 == NULL) { 1632 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1633 freemsg(ipsec_mp); 1634 ill_refrele(ill); 1635 return; 1636 } 1637 ill_refrele(ill); 1638 mp1->b_cont = mp; 1639 mp = mp1; 1640 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1641 io->ipsec_out_type == IPSEC_OUT); 1642 ipsec_mp->b_cont = mp; 1643 1644 /* 1645 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1646 * node generates be accepted in peace by all on-host destinations. 1647 * If we do NOT assume that all on-host destinations trust 1648 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1649 * (Look for ipsec_out_icmp_loopback). 1650 */ 1651 io->ipsec_out_icmp_loopback = B_TRUE; 1652 1653 ip6h = (ip6_t *)mp->b_rptr; 1654 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1655 1656 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1657 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1658 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1659 ip6h->ip6_dst = v6dst; 1660 ip6h->ip6_src = v6src; 1661 msg_len += IPV6_HDR_LEN + len; 1662 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1663 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1664 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1665 } 1666 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1667 icmp6 = (icmp6_t *)&ip6h[1]; 1668 bcopy(stuff, (char *)icmp6, len); 1669 /* 1670 * Prepare for checksum by putting icmp length in the icmp 1671 * checksum field. The checksum is calculated in ip_wput_v6. 1672 */ 1673 icmp6->icmp6_cksum = ip6h->ip6_plen; 1674 if (icmp6->icmp6_type == ND_REDIRECT) { 1675 ip6h->ip6_hops = IPV6_MAX_HOPS; 1676 } 1677 /* Send to V6 writeside put routine */ 1678 put(q, ipsec_mp); 1679 } 1680 1681 /* 1682 * Update the output mib when ICMPv6 packets are sent. 1683 */ 1684 static void 1685 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1686 { 1687 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1688 1689 switch (icmp6->icmp6_type) { 1690 case ICMP6_DST_UNREACH: 1691 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1692 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1693 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1694 break; 1695 1696 case ICMP6_TIME_EXCEEDED: 1697 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1698 break; 1699 1700 case ICMP6_PARAM_PROB: 1701 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1702 break; 1703 1704 case ICMP6_PACKET_TOO_BIG: 1705 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1706 break; 1707 1708 case ICMP6_ECHO_REQUEST: 1709 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1710 break; 1711 1712 case ICMP6_ECHO_REPLY: 1713 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1714 break; 1715 1716 case ND_ROUTER_SOLICIT: 1717 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1718 break; 1719 1720 case ND_ROUTER_ADVERT: 1721 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1722 break; 1723 1724 case ND_NEIGHBOR_SOLICIT: 1725 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1726 break; 1727 1728 case ND_NEIGHBOR_ADVERT: 1729 BUMP_MIB(ill->ill_icmp6_mib, 1730 ipv6IfIcmpOutNeighborAdvertisements); 1731 break; 1732 1733 case ND_REDIRECT: 1734 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1735 break; 1736 1737 case MLD_LISTENER_QUERY: 1738 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1739 break; 1740 1741 case MLD_LISTENER_REPORT: 1742 case MLD_V2_LISTENER_REPORT: 1743 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1744 break; 1745 1746 case MLD_LISTENER_REDUCTION: 1747 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1748 break; 1749 } 1750 } 1751 1752 /* 1753 * Check if it is ok to send an ICMPv6 error packet in 1754 * response to the IP packet in mp. 1755 * Free the message and return null if no 1756 * ICMP error packet should be sent. 1757 */ 1758 static mblk_t * 1759 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1760 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1761 { 1762 ip6_t *ip6h; 1763 1764 if (!mp) 1765 return (NULL); 1766 1767 ip6h = (ip6_t *)mp->b_rptr; 1768 1769 /* Check if source address uniquely identifies the host */ 1770 1771 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1772 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1773 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1774 freemsg(mp); 1775 return (NULL); 1776 } 1777 1778 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1779 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1780 icmp6_t *icmp6; 1781 1782 if (mp->b_wptr - mp->b_rptr < len_needed) { 1783 if (!pullupmsg(mp, len_needed)) { 1784 ill_t *ill; 1785 1786 ill = ip_queue_to_ill_v6(q, ipst); 1787 if (ill == NULL) { 1788 BUMP_MIB(&ipst->ips_icmp6_mib, 1789 ipv6IfIcmpInErrors); 1790 } else { 1791 BUMP_MIB(ill->ill_icmp6_mib, 1792 ipv6IfIcmpInErrors); 1793 ill_refrele(ill); 1794 } 1795 freemsg(mp); 1796 return (NULL); 1797 } 1798 ip6h = (ip6_t *)mp->b_rptr; 1799 } 1800 icmp6 = (icmp6_t *)&ip6h[1]; 1801 /* Explicitly do not generate errors in response to redirects */ 1802 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1803 icmp6->icmp6_type == ND_REDIRECT) { 1804 freemsg(mp); 1805 return (NULL); 1806 } 1807 } 1808 /* 1809 * Check that the destination is not multicast and that the packet 1810 * was not sent on link layer broadcast or multicast. (Exception 1811 * is Packet too big message as per the draft - when mcast_ok is set.) 1812 */ 1813 if (!mcast_ok && 1814 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1815 freemsg(mp); 1816 return (NULL); 1817 } 1818 if (icmp_err_rate_limit(ipst)) { 1819 /* 1820 * Only send ICMP error packets every so often. 1821 * This should be done on a per port/source basis, 1822 * but for now this will suffice. 1823 */ 1824 freemsg(mp); 1825 return (NULL); 1826 } 1827 return (mp); 1828 } 1829 1830 /* 1831 * Generate an ICMPv6 redirect message. 1832 * Include target link layer address option if it exits. 1833 * Always include redirect header. 1834 */ 1835 static void 1836 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1837 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1838 { 1839 nd_redirect_t *rd; 1840 nd_opt_rd_hdr_t *rdh; 1841 uchar_t *buf; 1842 nce_t *nce = NULL; 1843 nd_opt_hdr_t *opt; 1844 int len; 1845 int ll_opt_len = 0; 1846 int max_redir_hdr_data_len; 1847 int pkt_len; 1848 in6_addr_t *srcp; 1849 ip_stack_t *ipst = ill->ill_ipst; 1850 1851 /* 1852 * We are called from ip_rput where we could 1853 * not have attached an IPSEC_IN. 1854 */ 1855 ASSERT(mp->b_datap->db_type == M_DATA); 1856 1857 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1858 if (mp == NULL) 1859 return; 1860 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1861 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1862 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1863 ill->ill_phys_addr_length + 7)/8 * 8; 1864 } 1865 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1866 ASSERT(len % 4 == 0); 1867 buf = kmem_alloc(len, KM_NOSLEEP); 1868 if (buf == NULL) { 1869 if (nce != NULL) 1870 NCE_REFRELE(nce); 1871 freemsg(mp); 1872 return; 1873 } 1874 1875 rd = (nd_redirect_t *)buf; 1876 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1877 rd->nd_rd_code = 0; 1878 rd->nd_rd_reserved = 0; 1879 rd->nd_rd_target = *targetp; 1880 rd->nd_rd_dst = *dest; 1881 1882 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1883 if (nce != NULL && ll_opt_len != 0) { 1884 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1885 opt->nd_opt_len = ll_opt_len/8; 1886 bcopy((char *)nce->nce_res_mp->b_rptr + 1887 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1888 ill->ill_phys_addr_length); 1889 } 1890 if (nce != NULL) 1891 NCE_REFRELE(nce); 1892 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1893 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1894 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1895 max_redir_hdr_data_len = 1896 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1897 pkt_len = msgdsize(mp); 1898 /* Make sure mp is 8 byte aligned */ 1899 if (pkt_len > max_redir_hdr_data_len) { 1900 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1901 sizeof (nd_opt_rd_hdr_t))/8; 1902 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1903 } else { 1904 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1905 (void) adjmsg(mp, -(pkt_len % 8)); 1906 } 1907 rdh->nd_opt_rh_reserved1 = 0; 1908 rdh->nd_opt_rh_reserved2 = 0; 1909 /* ipif_v6src_addr contains the link-local source address */ 1910 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1911 if (ill->ill_group != NULL) { 1912 /* 1913 * The receiver of the redirect will verify whether it 1914 * had a route through us (srcp that we will use in 1915 * the redirect) or not. As we load spread even link-locals, 1916 * we don't know which source address the receiver of 1917 * redirect has in its route for communicating with us. 1918 * Thus we randomly choose a source here and finally we 1919 * should get to the right one and it will eventually 1920 * accept the redirect from us. We can't call 1921 * ip_lookup_scope_v6 because we don't have the right 1922 * link-local address here. Thus we randomly choose one. 1923 */ 1924 int cnt = ill->ill_group->illgrp_ill_count; 1925 1926 ill = ill->ill_group->illgrp_ill; 1927 cnt = ++ipst->ips_icmp_redirect_v6_src_index % cnt; 1928 while (cnt--) 1929 ill = ill->ill_group_next; 1930 srcp = &ill->ill_ipif->ipif_v6src_addr; 1931 } else { 1932 srcp = &ill->ill_ipif->ipif_v6src_addr; 1933 } 1934 rw_exit(&ipst->ips_ill_g_lock); 1935 /* Redirects sent by router, and router is global zone */ 1936 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1937 kmem_free(buf, len); 1938 } 1939 1940 1941 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1942 void 1943 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1944 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1945 ip_stack_t *ipst) 1946 { 1947 icmp6_t icmp6; 1948 boolean_t mctl_present; 1949 mblk_t *first_mp; 1950 1951 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1952 1953 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1954 if (mp == NULL) { 1955 if (mctl_present) 1956 freeb(first_mp); 1957 return; 1958 } 1959 bzero(&icmp6, sizeof (icmp6_t)); 1960 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1961 icmp6.icmp6_code = code; 1962 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1963 zoneid, ipst); 1964 } 1965 1966 /* 1967 * Generate an ICMP unreachable message. 1968 */ 1969 void 1970 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1971 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1972 ip_stack_t *ipst) 1973 { 1974 icmp6_t icmp6; 1975 boolean_t mctl_present; 1976 mblk_t *first_mp; 1977 1978 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1979 1980 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1981 if (mp == NULL) { 1982 if (mctl_present) 1983 freeb(first_mp); 1984 return; 1985 } 1986 bzero(&icmp6, sizeof (icmp6_t)); 1987 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1988 icmp6.icmp6_code = code; 1989 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1990 zoneid, ipst); 1991 } 1992 1993 /* 1994 * Generate an ICMP pkt too big message. 1995 */ 1996 static void 1997 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1998 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1999 { 2000 icmp6_t icmp6; 2001 mblk_t *first_mp; 2002 boolean_t mctl_present; 2003 2004 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2005 2006 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2007 if (mp == NULL) { 2008 if (mctl_present) 2009 freeb(first_mp); 2010 return; 2011 } 2012 bzero(&icmp6, sizeof (icmp6_t)); 2013 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2014 icmp6.icmp6_code = 0; 2015 icmp6.icmp6_mtu = htonl(mtu); 2016 2017 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2018 zoneid, ipst); 2019 } 2020 2021 /* 2022 * Generate an ICMP parameter problem message. (May be called as writer.) 2023 * 'offset' is the offset from the beginning of the packet in error. 2024 */ 2025 static void 2026 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2027 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2028 ip_stack_t *ipst) 2029 { 2030 icmp6_t icmp6; 2031 boolean_t mctl_present; 2032 mblk_t *first_mp; 2033 2034 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2035 2036 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2037 if (mp == NULL) { 2038 if (mctl_present) 2039 freeb(first_mp); 2040 return; 2041 } 2042 bzero((char *)&icmp6, sizeof (icmp6_t)); 2043 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2044 icmp6.icmp6_code = code; 2045 icmp6.icmp6_pptr = htonl(offset); 2046 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2047 zoneid, ipst); 2048 } 2049 2050 /* 2051 * This code will need to take into account the possibility of binding 2052 * to a link local address on a multi-homed host, in which case the 2053 * outgoing interface (from the conn) will need to be used when getting 2054 * an ire for the dst. Going through proper outgoing interface and 2055 * choosing the source address corresponding to the outgoing interface 2056 * is necessary when the destination address is a link-local address and 2057 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2058 * This can happen when active connection is setup; thus ipp pointer 2059 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2060 * pointer is passed as ipp pointer. 2061 */ 2062 mblk_t * 2063 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2064 { 2065 ssize_t len; 2066 int protocol; 2067 struct T_bind_req *tbr; 2068 sin6_t *sin6; 2069 ipa6_conn_t *ac6; 2070 in6_addr_t *v6srcp; 2071 in6_addr_t *v6dstp; 2072 uint16_t lport; 2073 uint16_t fport; 2074 uchar_t *ucp; 2075 mblk_t *mp1; 2076 boolean_t ire_requested; 2077 boolean_t ipsec_policy_set; 2078 int error = 0; 2079 boolean_t local_bind; 2080 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2081 ipa6_conn_x_t *acx6; 2082 boolean_t verify_dst; 2083 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2084 2085 ASSERT(connp->conn_af_isv6); 2086 len = mp->b_wptr - mp->b_rptr; 2087 if (len < (sizeof (*tbr) + 1)) { 2088 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2089 "ip_bind_v6: bogus msg, len %ld", len); 2090 goto bad_addr; 2091 } 2092 /* Back up and extract the protocol identifier. */ 2093 mp->b_wptr--; 2094 tbr = (struct T_bind_req *)mp->b_rptr; 2095 /* Reset the message type in preparation for shipping it back. */ 2096 mp->b_datap->db_type = M_PCPROTO; 2097 2098 protocol = *mp->b_wptr & 0xFF; 2099 connp->conn_ulp = (uint8_t)protocol; 2100 2101 /* 2102 * Check for a zero length address. This is from a protocol that 2103 * wants to register to receive all packets of its type. 2104 */ 2105 if (tbr->ADDR_length == 0) { 2106 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2107 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2108 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2109 NULL) { 2110 /* 2111 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2112 * Do not allow others to bind to these. 2113 */ 2114 goto bad_addr; 2115 } 2116 2117 /* 2118 * 2119 * The udp module never sends down a zero-length address, 2120 * and allowing this on a labeled system will break MLP 2121 * functionality. 2122 */ 2123 if (is_system_labeled() && protocol == IPPROTO_UDP) 2124 goto bad_addr; 2125 2126 /* Allow ipsec plumbing */ 2127 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2128 protocol != IPPROTO_ESP) 2129 goto bad_addr; 2130 2131 connp->conn_srcv6 = ipv6_all_zeros; 2132 ipcl_proto_insert_v6(connp, protocol); 2133 2134 tbr->PRIM_type = T_BIND_ACK; 2135 return (mp); 2136 } 2137 2138 /* Extract the address pointer from the message. */ 2139 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2140 tbr->ADDR_length); 2141 if (ucp == NULL) { 2142 ip1dbg(("ip_bind_v6: no address\n")); 2143 goto bad_addr; 2144 } 2145 if (!OK_32PTR(ucp)) { 2146 ip1dbg(("ip_bind_v6: unaligned address\n")); 2147 goto bad_addr; 2148 } 2149 mp1 = mp->b_cont; /* trailing mp if any */ 2150 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2151 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2152 2153 switch (tbr->ADDR_length) { 2154 default: 2155 ip1dbg(("ip_bind_v6: bad address length %d\n", 2156 (int)tbr->ADDR_length)); 2157 goto bad_addr; 2158 2159 case IPV6_ADDR_LEN: 2160 /* Verification of local address only */ 2161 v6srcp = (in6_addr_t *)ucp; 2162 lport = 0; 2163 local_bind = B_TRUE; 2164 break; 2165 2166 case sizeof (sin6_t): 2167 sin6 = (sin6_t *)ucp; 2168 v6srcp = &sin6->sin6_addr; 2169 lport = sin6->sin6_port; 2170 local_bind = B_TRUE; 2171 break; 2172 2173 case sizeof (ipa6_conn_t): 2174 /* 2175 * Verify that both the source and destination addresses 2176 * are valid. 2177 * Note that we allow connect to broadcast and multicast 2178 * addresses when ire_requested is set. Thus the ULP 2179 * has to check for IRE_BROADCAST and multicast. 2180 */ 2181 ac6 = (ipa6_conn_t *)ucp; 2182 v6srcp = &ac6->ac6_laddr; 2183 v6dstp = &ac6->ac6_faddr; 2184 fport = ac6->ac6_fport; 2185 /* For raw socket, the local port is not set. */ 2186 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2187 connp->conn_lport; 2188 local_bind = B_FALSE; 2189 /* Always verify destination reachability. */ 2190 verify_dst = B_TRUE; 2191 break; 2192 2193 case sizeof (ipa6_conn_x_t): 2194 /* 2195 * Verify that the source address is valid. 2196 * Note that we allow connect to broadcast and multicast 2197 * addresses when ire_requested is set. Thus the ULP 2198 * has to check for IRE_BROADCAST and multicast. 2199 */ 2200 acx6 = (ipa6_conn_x_t *)ucp; 2201 ac6 = &acx6->ac6x_conn; 2202 v6srcp = &ac6->ac6_laddr; 2203 v6dstp = &ac6->ac6_faddr; 2204 fport = ac6->ac6_fport; 2205 lport = ac6->ac6_lport; 2206 local_bind = B_FALSE; 2207 /* 2208 * Client that passed ipa6_conn_x_t to us specifies whether to 2209 * verify destination reachability. 2210 */ 2211 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2212 break; 2213 } 2214 if (local_bind) { 2215 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2216 /* Bind to IPv4 address */ 2217 ipaddr_t v4src; 2218 2219 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2220 2221 error = ip_bind_laddr(connp, mp, v4src, lport, 2222 ire_requested, ipsec_policy_set, 2223 tbr->ADDR_length != IPV6_ADDR_LEN); 2224 if (error != 0) 2225 goto bad_addr; 2226 connp->conn_pkt_isv6 = B_FALSE; 2227 } else { 2228 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2229 error = 0; 2230 goto bad_addr; 2231 } 2232 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2233 ire_requested, ipsec_policy_set, 2234 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2235 if (error != 0) 2236 goto bad_addr; 2237 connp->conn_pkt_isv6 = B_TRUE; 2238 } 2239 } else { 2240 /* 2241 * Bind to local and remote address. Local might be 2242 * unspecified in which case it will be extracted from 2243 * ire_src_addr_v6 2244 */ 2245 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2246 /* Connect to IPv4 address */ 2247 ipaddr_t v4src; 2248 ipaddr_t v4dst; 2249 2250 /* Is the source unspecified or mapped? */ 2251 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2252 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2253 ip1dbg(("ip_bind_v6: " 2254 "dst is mapped, but not the src\n")); 2255 goto bad_addr; 2256 } 2257 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2258 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2259 2260 /* 2261 * XXX Fix needed. Need to pass ipsec_policy_set 2262 * instead of B_FALSE. 2263 */ 2264 2265 /* Always verify destination reachability. */ 2266 error = ip_bind_connected(connp, mp, &v4src, lport, 2267 v4dst, fport, ire_requested, ipsec_policy_set, 2268 B_TRUE, B_TRUE); 2269 if (error != 0) 2270 goto bad_addr; 2271 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2272 connp->conn_pkt_isv6 = B_FALSE; 2273 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2274 ip1dbg(("ip_bind_v6: " 2275 "src is mapped, but not the dst\n")); 2276 goto bad_addr; 2277 } else { 2278 error = ip_bind_connected_v6(connp, mp, v6srcp, 2279 lport, v6dstp, ipp, fport, ire_requested, 2280 ipsec_policy_set, B_TRUE, verify_dst); 2281 if (error != 0) 2282 goto bad_addr; 2283 connp->conn_pkt_isv6 = B_TRUE; 2284 } 2285 } 2286 2287 /* Update conn_send and pktversion if v4/v6 changed */ 2288 if (orig_pkt_isv6 != connp->conn_pkt_isv6) { 2289 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2290 } 2291 /* 2292 * Pass the IPSEC headers size in ire_ipsec_overhead. 2293 * We can't do this in ip_bind_insert_ire because the policy 2294 * may not have been inherited at that point in time and hence 2295 * conn_out_enforce_policy may not be set. 2296 */ 2297 mp1 = mp->b_cont; 2298 if (ire_requested && connp->conn_out_enforce_policy && 2299 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2300 ire_t *ire = (ire_t *)mp1->b_rptr; 2301 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2302 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2303 } 2304 2305 /* Send it home. */ 2306 mp->b_datap->db_type = M_PCPROTO; 2307 tbr->PRIM_type = T_BIND_ACK; 2308 return (mp); 2309 2310 bad_addr: 2311 if (error == EINPROGRESS) 2312 return (NULL); 2313 if (error > 0) 2314 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2315 else 2316 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2317 return (mp); 2318 } 2319 2320 /* 2321 * Here address is verified to be a valid local address. 2322 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2323 * address is also considered a valid local address. 2324 * In the case of a multicast address, however, the 2325 * upper protocol is expected to reset the src address 2326 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2327 * no packets are emitted with multicast address as 2328 * source address. 2329 * The addresses valid for bind are: 2330 * (1) - in6addr_any 2331 * (2) - IP address of an UP interface 2332 * (3) - IP address of a DOWN interface 2333 * (4) - a multicast address. In this case 2334 * the conn will only receive packets destined to 2335 * the specified multicast address. Note: the 2336 * application still has to issue an 2337 * IPV6_JOIN_GROUP socket option. 2338 * 2339 * In all the above cases, the bound address must be valid in the current zone. 2340 * When the address is loopback or multicast, there might be many matching IREs 2341 * so bind has to look up based on the zone. 2342 */ 2343 static int 2344 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2345 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2346 boolean_t fanout_insert) 2347 { 2348 int error = 0; 2349 ire_t *src_ire = NULL; 2350 ipif_t *ipif = NULL; 2351 mblk_t *policy_mp; 2352 zoneid_t zoneid; 2353 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2354 2355 if (ipsec_policy_set) 2356 policy_mp = mp->b_cont; 2357 2358 /* 2359 * If it was previously connected, conn_fully_bound would have 2360 * been set. 2361 */ 2362 connp->conn_fully_bound = B_FALSE; 2363 2364 zoneid = connp->conn_zoneid; 2365 2366 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2367 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2368 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2369 /* 2370 * If an address other than in6addr_any is requested, 2371 * we verify that it is a valid address for bind 2372 * Note: Following code is in if-else-if form for 2373 * readability compared to a condition check. 2374 */ 2375 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2376 if (IRE_IS_LOCAL(src_ire)) { 2377 /* 2378 * (2) Bind to address of local UP interface 2379 */ 2380 ipif = src_ire->ire_ipif; 2381 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2382 ipif_t *multi_ipif = NULL; 2383 ire_t *save_ire; 2384 /* 2385 * (4) bind to multicast address. 2386 * Fake out the IRE returned to upper 2387 * layer to be a broadcast IRE in 2388 * ip_bind_insert_ire_v6(). 2389 * Pass other information that matches 2390 * the ipif (e.g. the source address). 2391 * conn_multicast_ill is only used for 2392 * IPv6 packets 2393 */ 2394 mutex_enter(&connp->conn_lock); 2395 if (connp->conn_multicast_ill != NULL) { 2396 (void) ipif_lookup_zoneid( 2397 connp->conn_multicast_ill, zoneid, 0, 2398 &multi_ipif); 2399 } else { 2400 /* 2401 * Look for default like 2402 * ip_wput_v6 2403 */ 2404 multi_ipif = ipif_lookup_group_v6( 2405 &ipv6_unspecified_group, zoneid, ipst); 2406 } 2407 mutex_exit(&connp->conn_lock); 2408 save_ire = src_ire; 2409 src_ire = NULL; 2410 if (multi_ipif == NULL || !ire_requested || 2411 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2412 src_ire = save_ire; 2413 error = EADDRNOTAVAIL; 2414 } else { 2415 ASSERT(src_ire != NULL); 2416 if (save_ire != NULL) 2417 ire_refrele(save_ire); 2418 } 2419 if (multi_ipif != NULL) 2420 ipif_refrele(multi_ipif); 2421 } else { 2422 *mp->b_wptr++ = (char)connp->conn_ulp; 2423 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2424 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error, 2425 ipst); 2426 if (ipif == NULL) { 2427 if (error == EINPROGRESS) { 2428 if (src_ire != NULL) 2429 ire_refrele(src_ire); 2430 return (error); 2431 } 2432 /* 2433 * Not a valid address for bind 2434 */ 2435 error = EADDRNOTAVAIL; 2436 } else { 2437 ipif_refrele(ipif); 2438 } 2439 /* 2440 * Just to keep it consistent with the processing in 2441 * ip_bind_v6(). 2442 */ 2443 mp->b_wptr--; 2444 } 2445 2446 if (error != 0) { 2447 /* Red Alert! Attempting to be a bogon! */ 2448 if (ip_debug > 2) { 2449 /* ip1dbg */ 2450 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2451 " address %s\n", AF_INET6, v6src); 2452 } 2453 goto bad_addr; 2454 } 2455 } 2456 2457 /* 2458 * Allow setting new policies. For example, disconnects come 2459 * down as ipa_t bind. As we would have set conn_policy_cached 2460 * to B_TRUE before, we should set it to B_FALSE, so that policy 2461 * can change after the disconnect. 2462 */ 2463 connp->conn_policy_cached = B_FALSE; 2464 2465 /* If not fanout_insert this was just an address verification */ 2466 if (fanout_insert) { 2467 /* 2468 * The addresses have been verified. Time to insert in 2469 * the correct fanout list. 2470 */ 2471 connp->conn_srcv6 = *v6src; 2472 connp->conn_remv6 = ipv6_all_zeros; 2473 connp->conn_lport = lport; 2474 connp->conn_fport = 0; 2475 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2476 } 2477 if (error == 0) { 2478 if (ire_requested) { 2479 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL, 2480 ipst)) { 2481 error = -1; 2482 goto bad_addr; 2483 } 2484 } else if (ipsec_policy_set) { 2485 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2486 error = -1; 2487 goto bad_addr; 2488 } 2489 } 2490 } 2491 bad_addr: 2492 if (error != 0) { 2493 if (connp->conn_anon_port) { 2494 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2495 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2496 B_FALSE); 2497 } 2498 connp->conn_mlp_type = mlptSingle; 2499 } 2500 2501 if (src_ire != NULL) 2502 ire_refrele(src_ire); 2503 2504 if (ipsec_policy_set) { 2505 ASSERT(policy_mp != NULL); 2506 freeb(policy_mp); 2507 /* 2508 * As of now assume that nothing else accompanies 2509 * IPSEC_POLICY_SET. 2510 */ 2511 mp->b_cont = NULL; 2512 } 2513 return (error); 2514 } 2515 2516 /* ARGSUSED */ 2517 static void 2518 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2519 void *dummy_arg) 2520 { 2521 conn_t *connp = NULL; 2522 t_scalar_t prim; 2523 2524 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2525 2526 if (CONN_Q(q)) 2527 connp = Q_TO_CONN(q); 2528 ASSERT(connp != NULL); 2529 2530 prim = ((union T_primitives *)mp->b_rptr)->type; 2531 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2532 2533 if (IPCL_IS_TCP(connp)) { 2534 /* Pass sticky_ipp for scope_id and pktinfo */ 2535 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2536 } else { 2537 /* For UDP and ICMP */ 2538 mp = ip_bind_v6(q, mp, connp, NULL); 2539 } 2540 if (mp != NULL) { 2541 if (IPCL_IS_TCP(connp)) { 2542 CONN_INC_REF(connp); 2543 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2544 connp, SQTAG_TCP_RPUTOTHER); 2545 } else if (IPCL_IS_UDP(connp)) { 2546 udp_resume_bind(connp, mp); 2547 } else { 2548 ASSERT(IPCL_IS_RAWIP(connp)); 2549 rawip_resume_bind(connp, mp); 2550 } 2551 } 2552 } 2553 2554 /* 2555 * Verify that both the source and destination addresses 2556 * are valid. If verify_dst, then destination address must also be reachable, 2557 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2558 * It takes ip6_pkt_t * as one of the arguments to determine correct 2559 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2560 * destination address. Note that parameter ipp is only useful for TCP connect 2561 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2562 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2563 * 2564 */ 2565 static int 2566 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2567 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2568 boolean_t ire_requested, boolean_t ipsec_policy_set, 2569 boolean_t fanout_insert, boolean_t verify_dst) 2570 { 2571 ire_t *src_ire; 2572 ire_t *dst_ire; 2573 int error = 0; 2574 int protocol; 2575 mblk_t *policy_mp; 2576 ire_t *sire = NULL; 2577 ire_t *md_dst_ire = NULL; 2578 ill_t *md_ill = NULL; 2579 ill_t *dst_ill = NULL; 2580 ipif_t *src_ipif = NULL; 2581 zoneid_t zoneid; 2582 boolean_t ill_held = B_FALSE; 2583 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2584 2585 src_ire = dst_ire = NULL; 2586 /* 2587 * NOTE: The protocol is beyond the wptr because that's how 2588 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2589 */ 2590 protocol = *mp->b_wptr & 0xFF; 2591 2592 /* 2593 * If we never got a disconnect before, clear it now. 2594 */ 2595 connp->conn_fully_bound = B_FALSE; 2596 2597 if (ipsec_policy_set) { 2598 policy_mp = mp->b_cont; 2599 } 2600 2601 zoneid = connp->conn_zoneid; 2602 2603 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2604 ipif_t *ipif; 2605 2606 /* 2607 * Use an "emulated" IRE_BROADCAST to tell the transport it 2608 * is a multicast. 2609 * Pass other information that matches 2610 * the ipif (e.g. the source address). 2611 * 2612 * conn_multicast_ill is only used for IPv6 packets 2613 */ 2614 mutex_enter(&connp->conn_lock); 2615 if (connp->conn_multicast_ill != NULL) { 2616 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2617 zoneid, 0, &ipif); 2618 } else { 2619 /* Look for default like ip_wput_v6 */ 2620 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2621 } 2622 mutex_exit(&connp->conn_lock); 2623 if (ipif == NULL || !ire_requested || 2624 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2625 if (ipif != NULL) 2626 ipif_refrele(ipif); 2627 if (ip_debug > 2) { 2628 /* ip1dbg */ 2629 pr_addr_dbg("ip_bind_connected_v6: bad " 2630 "connected multicast %s\n", AF_INET6, 2631 v6dst); 2632 } 2633 error = ENETUNREACH; 2634 goto bad_addr; 2635 } 2636 if (ipif != NULL) 2637 ipif_refrele(ipif); 2638 } else { 2639 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2640 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2641 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2642 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2643 ipst); 2644 /* 2645 * We also prevent ire's with src address INADDR_ANY to 2646 * be used, which are created temporarily for 2647 * sending out packets from endpoints that have 2648 * conn_unspec_src set. 2649 */ 2650 if (dst_ire == NULL || 2651 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2652 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2653 /* 2654 * When verifying destination reachability, we always 2655 * complain. 2656 * 2657 * When not verifying destination reachability but we 2658 * found an IRE, i.e. the destination is reachable, 2659 * then the other tests still apply and we complain. 2660 */ 2661 if (verify_dst || (dst_ire != NULL)) { 2662 if (ip_debug > 2) { 2663 /* ip1dbg */ 2664 pr_addr_dbg("ip_bind_connected_v6: bad" 2665 " connected dst %s\n", AF_INET6, 2666 v6dst); 2667 } 2668 if (dst_ire == NULL || 2669 !(dst_ire->ire_type & IRE_HOST)) { 2670 error = ENETUNREACH; 2671 } else { 2672 error = EHOSTUNREACH; 2673 } 2674 goto bad_addr; 2675 } 2676 } 2677 } 2678 2679 /* 2680 * We now know that routing will allow us to reach the destination. 2681 * Check whether Trusted Solaris policy allows communication with this 2682 * host, and pretend that the destination is unreachable if not. 2683 * 2684 * This is never a problem for TCP, since that transport is known to 2685 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2686 * handling. If the remote is unreachable, it will be detected at that 2687 * point, so there's no reason to check it here. 2688 * 2689 * Note that for sendto (and other datagram-oriented friends), this 2690 * check is done as part of the data path label computation instead. 2691 * The check here is just to make non-TCP connect() report the right 2692 * error. 2693 */ 2694 if (dst_ire != NULL && is_system_labeled() && 2695 !IPCL_IS_TCP(connp) && 2696 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2697 connp->conn_mac_exempt, ipst) != 0) { 2698 error = EHOSTUNREACH; 2699 if (ip_debug > 2) { 2700 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2701 AF_INET6, v6dst); 2702 } 2703 goto bad_addr; 2704 } 2705 2706 /* 2707 * If the app does a connect(), it means that it will most likely 2708 * send more than 1 packet to the destination. It makes sense 2709 * to clear the temporary flag. 2710 */ 2711 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2712 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2713 irb_t *irb = dst_ire->ire_bucket; 2714 2715 rw_enter(&irb->irb_lock, RW_WRITER); 2716 /* 2717 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2718 * the lock in order to guarantee irb_tmp_ire_cnt. 2719 */ 2720 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2721 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2722 irb->irb_tmp_ire_cnt--; 2723 } 2724 rw_exit(&irb->irb_lock); 2725 } 2726 2727 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2728 2729 /* 2730 * See if we should notify ULP about MDT; we do this whether or not 2731 * ire_requested is TRUE, in order to handle active connects; MDT 2732 * eligibility tests for passive connects are handled separately 2733 * through tcp_adapt_ire(). We do this before the source address 2734 * selection, because dst_ire may change after a call to 2735 * ipif_select_source_v6(). This is a best-effort check, as the 2736 * packet for this connection may not actually go through 2737 * dst_ire->ire_stq, and the exact IRE can only be known after 2738 * calling ip_newroute_v6(). This is why we further check on the 2739 * IRE during Multidata packet transmission in tcp_multisend(). 2740 */ 2741 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2742 dst_ire != NULL && 2743 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2744 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2745 ILL_MDT_CAPABLE(md_ill)) { 2746 md_dst_ire = dst_ire; 2747 IRE_REFHOLD(md_dst_ire); 2748 } 2749 2750 if (dst_ire != NULL && 2751 dst_ire->ire_type == IRE_LOCAL && 2752 dst_ire->ire_zoneid != zoneid && 2753 dst_ire->ire_zoneid != ALL_ZONES) { 2754 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2755 zoneid, 0, NULL, 2756 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2757 MATCH_IRE_RJ_BHOLE, ipst); 2758 if (src_ire == NULL) { 2759 error = EHOSTUNREACH; 2760 goto bad_addr; 2761 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2762 if (!(src_ire->ire_type & IRE_HOST)) 2763 error = ENETUNREACH; 2764 else 2765 error = EHOSTUNREACH; 2766 goto bad_addr; 2767 } 2768 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2769 src_ipif = src_ire->ire_ipif; 2770 ipif_refhold(src_ipif); 2771 *v6src = src_ipif->ipif_v6lcl_addr; 2772 } 2773 ire_refrele(src_ire); 2774 src_ire = NULL; 2775 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2776 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2777 *v6src = sire->ire_src_addr_v6; 2778 ire_refrele(dst_ire); 2779 dst_ire = sire; 2780 sire = NULL; 2781 } else if (dst_ire->ire_type == IRE_CACHE && 2782 (dst_ire->ire_flags & RTF_SETSRC)) { 2783 ASSERT(dst_ire->ire_zoneid == zoneid || 2784 dst_ire->ire_zoneid == ALL_ZONES); 2785 *v6src = dst_ire->ire_src_addr_v6; 2786 } else { 2787 /* 2788 * Pick a source address so that a proper inbound load 2789 * spreading would happen. Use dst_ill specified by the 2790 * app. when socket option or scopeid is set. 2791 */ 2792 int err; 2793 2794 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2795 uint_t if_index; 2796 2797 /* 2798 * Scope id or IPV6_PKTINFO 2799 */ 2800 2801 if_index = ipp->ipp_ifindex; 2802 dst_ill = ill_lookup_on_ifindex( 2803 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2804 ipst); 2805 if (dst_ill == NULL) { 2806 ip1dbg(("ip_bind_connected_v6:" 2807 " bad ifindex %d\n", if_index)); 2808 error = EADDRNOTAVAIL; 2809 goto bad_addr; 2810 } 2811 ill_held = B_TRUE; 2812 } else if (connp->conn_outgoing_ill != NULL) { 2813 /* 2814 * For IPV6_BOUND_IF socket option, 2815 * conn_outgoing_ill should be set 2816 * already in TCP or UDP/ICMP. 2817 */ 2818 dst_ill = conn_get_held_ill(connp, 2819 &connp->conn_outgoing_ill, &err); 2820 if (err == ILL_LOOKUP_FAILED) { 2821 ip1dbg(("ip_bind_connected_v6:" 2822 "no ill for bound_if\n")); 2823 error = EADDRNOTAVAIL; 2824 goto bad_addr; 2825 } 2826 ill_held = B_TRUE; 2827 } else if (dst_ire->ire_stq != NULL) { 2828 /* No need to hold ill here */ 2829 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2830 } else { 2831 /* No need to hold ill here */ 2832 dst_ill = dst_ire->ire_ipif->ipif_ill; 2833 } 2834 if (!ip6_asp_can_lookup(ipst)) { 2835 *mp->b_wptr++ = (char)protocol; 2836 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2837 ip_bind_connected_resume_v6); 2838 error = EINPROGRESS; 2839 goto refrele_and_quit; 2840 } 2841 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2842 RESTRICT_TO_NONE, connp->conn_src_preferences, 2843 zoneid); 2844 ip6_asp_table_refrele(ipst); 2845 if (src_ipif == NULL) { 2846 pr_addr_dbg("ip_bind_connected_v6: " 2847 "no usable source address for " 2848 "connection to %s\n", AF_INET6, v6dst); 2849 error = EADDRNOTAVAIL; 2850 goto bad_addr; 2851 } 2852 *v6src = src_ipif->ipif_v6lcl_addr; 2853 } 2854 } 2855 2856 /* 2857 * We do ire_route_lookup_v6() here (and not an interface lookup) 2858 * as we assert that v6src should only come from an 2859 * UP interface for hard binding. 2860 */ 2861 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2862 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2863 2864 /* src_ire must be a local|loopback */ 2865 if (!IRE_IS_LOCAL(src_ire)) { 2866 if (ip_debug > 2) { 2867 /* ip1dbg */ 2868 pr_addr_dbg("ip_bind_connected_v6: bad " 2869 "connected src %s\n", AF_INET6, v6src); 2870 } 2871 error = EADDRNOTAVAIL; 2872 goto bad_addr; 2873 } 2874 2875 /* 2876 * If the source address is a loopback address, the 2877 * destination had best be local or multicast. 2878 * The transports that can't handle multicast will reject 2879 * those addresses. 2880 */ 2881 if (src_ire->ire_type == IRE_LOOPBACK && 2882 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2883 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2884 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2885 error = -1; 2886 goto bad_addr; 2887 } 2888 /* 2889 * Allow setting new policies. For example, disconnects come 2890 * down as ipa_t bind. As we would have set conn_policy_cached 2891 * to B_TRUE before, we should set it to B_FALSE, so that policy 2892 * can change after the disconnect. 2893 */ 2894 connp->conn_policy_cached = B_FALSE; 2895 2896 /* 2897 * The addresses have been verified. Initialize the conn 2898 * before calling the policy as they expect the conns 2899 * initialized. 2900 */ 2901 connp->conn_srcv6 = *v6src; 2902 connp->conn_remv6 = *v6dst; 2903 connp->conn_lport = lport; 2904 connp->conn_fport = fport; 2905 2906 ASSERT(!(ipsec_policy_set && ire_requested)); 2907 if (ire_requested) { 2908 iulp_t *ulp_info = NULL; 2909 2910 /* 2911 * Note that sire will not be NULL if this is an off-link 2912 * connection and there is not cache for that dest yet. 2913 * 2914 * XXX Because of an existing bug, if there are multiple 2915 * default routes, the IRE returned now may not be the actual 2916 * default route used (default routes are chosen in a 2917 * round robin fashion). So if the metrics for different 2918 * default routes are different, we may return the wrong 2919 * metrics. This will not be a problem if the existing 2920 * bug is fixed. 2921 */ 2922 if (sire != NULL) 2923 ulp_info = &(sire->ire_uinfo); 2924 2925 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info, 2926 ipst)) { 2927 error = -1; 2928 goto bad_addr; 2929 } 2930 } else if (ipsec_policy_set) { 2931 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2932 error = -1; 2933 goto bad_addr; 2934 } 2935 } 2936 2937 /* 2938 * Cache IPsec policy in this conn. If we have per-socket policy, 2939 * we'll cache that. If we don't, we'll inherit global policy. 2940 * 2941 * We can't insert until the conn reflects the policy. Note that 2942 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2943 * connections where we don't have a policy. This is to prevent 2944 * global policy lookups in the inbound path. 2945 * 2946 * If we insert before we set conn_policy_cached, 2947 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2948 * because global policy cound be non-empty. We normally call 2949 * ipsec_check_policy() for conn_policy_cached connections only if 2950 * conn_in_enforce_policy is set. But in this case, 2951 * conn_policy_cached can get set anytime since we made the 2952 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2953 * is called, which will make the above assumption false. Thus, we 2954 * need to insert after we set conn_policy_cached. 2955 */ 2956 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2957 goto bad_addr; 2958 2959 /* If not fanout_insert this was just an address verification */ 2960 if (fanout_insert) { 2961 /* 2962 * The addresses have been verified. Time to insert in 2963 * the correct fanout list. 2964 */ 2965 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2966 connp->conn_ports, 2967 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2968 } 2969 if (error == 0) { 2970 connp->conn_fully_bound = B_TRUE; 2971 /* 2972 * Our initial checks for MDT have passed; the IRE is not 2973 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2974 * be supporting MDT. Pass the IRE, IPC and ILL into 2975 * ip_mdinfo_return(), which performs further checks 2976 * against them and upon success, returns the MDT info 2977 * mblk which we will attach to the bind acknowledgment. 2978 */ 2979 if (md_dst_ire != NULL) { 2980 mblk_t *mdinfo_mp; 2981 2982 ASSERT(md_ill != NULL); 2983 ASSERT(md_ill->ill_mdt_capab != NULL); 2984 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2985 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 2986 linkb(mp, mdinfo_mp); 2987 } 2988 } 2989 bad_addr: 2990 if (ipsec_policy_set) { 2991 ASSERT(policy_mp != NULL); 2992 freeb(policy_mp); 2993 /* 2994 * As of now assume that nothing else accompanies 2995 * IPSEC_POLICY_SET. 2996 */ 2997 mp->b_cont = NULL; 2998 } 2999 refrele_and_quit: 3000 if (src_ire != NULL) 3001 IRE_REFRELE(src_ire); 3002 if (dst_ire != NULL) 3003 IRE_REFRELE(dst_ire); 3004 if (sire != NULL) 3005 IRE_REFRELE(sire); 3006 if (src_ipif != NULL) 3007 ipif_refrele(src_ipif); 3008 if (md_dst_ire != NULL) 3009 IRE_REFRELE(md_dst_ire); 3010 if (ill_held && dst_ill != NULL) 3011 ill_refrele(dst_ill); 3012 return (error); 3013 } 3014 3015 /* 3016 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3017 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3018 */ 3019 /* ARGSUSED4 */ 3020 static boolean_t 3021 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3022 iulp_t *ulp_info, ip_stack_t *ipst) 3023 { 3024 mblk_t *mp1; 3025 ire_t *ret_ire; 3026 3027 mp1 = mp->b_cont; 3028 ASSERT(mp1 != NULL); 3029 3030 if (ire != NULL) { 3031 /* 3032 * mp1 initialized above to IRE_DB_REQ_TYPE 3033 * appended mblk. Its <upper protocol>'s 3034 * job to make sure there is room. 3035 */ 3036 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3037 return (B_FALSE); 3038 3039 mp1->b_datap->db_type = IRE_DB_TYPE; 3040 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3041 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3042 ret_ire = (ire_t *)mp1->b_rptr; 3043 if (IN6_IS_ADDR_MULTICAST(dst) || 3044 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3045 ret_ire->ire_type = IRE_BROADCAST; 3046 ret_ire->ire_addr_v6 = *dst; 3047 } 3048 if (ulp_info != NULL) { 3049 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3050 sizeof (iulp_t)); 3051 } 3052 ret_ire->ire_mp = mp1; 3053 } else { 3054 /* 3055 * No IRE was found. Remove IRE mblk. 3056 */ 3057 mp->b_cont = mp1->b_cont; 3058 freeb(mp1); 3059 } 3060 return (B_TRUE); 3061 } 3062 3063 /* 3064 * Add an ip6i_t header to the front of the mblk. 3065 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3066 * Returns NULL if allocation fails (and frees original message). 3067 * Used in outgoing path when going through ip_newroute_*v6(). 3068 * Used in incoming path to pass ifindex to transports. 3069 */ 3070 mblk_t * 3071 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3072 { 3073 mblk_t *mp1; 3074 ip6i_t *ip6i; 3075 ip6_t *ip6h; 3076 3077 ip6h = (ip6_t *)mp->b_rptr; 3078 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3079 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3080 mp->b_datap->db_ref > 1) { 3081 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3082 if (mp1 == NULL) { 3083 freemsg(mp); 3084 return (NULL); 3085 } 3086 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3087 mp1->b_cont = mp; 3088 mp = mp1; 3089 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3090 } 3091 mp->b_rptr = (uchar_t *)ip6i; 3092 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3093 ip6i->ip6i_nxt = IPPROTO_RAW; 3094 if (ill != NULL) { 3095 ip6i->ip6i_flags = IP6I_IFINDEX; 3096 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3097 } else { 3098 ip6i->ip6i_flags = 0; 3099 } 3100 ip6i->ip6i_nexthop = *dst; 3101 return (mp); 3102 } 3103 3104 /* 3105 * Handle protocols with which IP is less intimate. There 3106 * can be more than one stream bound to a particular 3107 * protocol. When this is the case, normally each one gets a copy 3108 * of any incoming packets. 3109 * However, if the packet was tunneled and not multicast we only send to it 3110 * the first match. 3111 * 3112 * Zones notes: 3113 * Packets will be distributed to streams in all zones. This is really only 3114 * useful for ICMPv6 as only applications in the global zone can create raw 3115 * sockets for other protocols. 3116 */ 3117 static void 3118 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3119 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3120 boolean_t mctl_present, zoneid_t zoneid) 3121 { 3122 queue_t *rq; 3123 mblk_t *mp1, *first_mp1; 3124 in6_addr_t dst = ip6h->ip6_dst; 3125 in6_addr_t src = ip6h->ip6_src; 3126 boolean_t one_only; 3127 mblk_t *first_mp = mp; 3128 boolean_t secure, shared_addr; 3129 conn_t *connp, *first_connp, *next_connp; 3130 connf_t *connfp; 3131 ip_stack_t *ipst = inill->ill_ipst; 3132 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3133 3134 if (mctl_present) { 3135 mp = first_mp->b_cont; 3136 secure = ipsec_in_is_secure(first_mp); 3137 ASSERT(mp != NULL); 3138 } else { 3139 secure = B_FALSE; 3140 } 3141 3142 /* 3143 * If the packet was tunneled and not multicast we only send to it 3144 * the first match. 3145 */ 3146 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3147 !IN6_IS_ADDR_MULTICAST(&dst)); 3148 3149 shared_addr = (zoneid == ALL_ZONES); 3150 if (shared_addr) { 3151 /* 3152 * We don't allow multilevel ports for raw IP, so no need to 3153 * check for that here. 3154 */ 3155 zoneid = tsol_packet_to_zoneid(mp); 3156 } 3157 3158 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3159 mutex_enter(&connfp->connf_lock); 3160 connp = connfp->connf_head; 3161 for (connp = connfp->connf_head; connp != NULL; 3162 connp = connp->conn_next) { 3163 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3164 zoneid) && 3165 (!is_system_labeled() || 3166 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3167 connp))) 3168 break; 3169 } 3170 3171 if (connp == NULL || connp->conn_upq == NULL) { 3172 /* 3173 * No one bound to this port. Is 3174 * there a client that wants all 3175 * unclaimed datagrams? 3176 */ 3177 mutex_exit(&connfp->connf_lock); 3178 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3179 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3180 nexthdr_offset, mctl_present, zoneid, ipst)) { 3181 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3182 } 3183 3184 return; 3185 } 3186 3187 CONN_INC_REF(connp); 3188 first_connp = connp; 3189 3190 /* 3191 * XXX: Fix the multiple protocol listeners case. We should not 3192 * be walking the conn->next list here. 3193 */ 3194 if (one_only) { 3195 /* 3196 * Only send message to one tunnel driver by immediately 3197 * terminating the loop. 3198 */ 3199 connp = NULL; 3200 } else { 3201 connp = connp->conn_next; 3202 3203 } 3204 for (;;) { 3205 while (connp != NULL) { 3206 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3207 flags, zoneid) && 3208 (!is_system_labeled() || 3209 tsol_receive_local(mp, &dst, IPV6_VERSION, 3210 shared_addr, connp))) 3211 break; 3212 connp = connp->conn_next; 3213 } 3214 3215 /* 3216 * Just copy the data part alone. The mctl part is 3217 * needed just for verifying policy and it is never 3218 * sent up. 3219 */ 3220 if (connp == NULL || connp->conn_upq == NULL || 3221 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3222 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3223 /* 3224 * No more intested clients or memory 3225 * allocation failed 3226 */ 3227 connp = first_connp; 3228 break; 3229 } 3230 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3231 CONN_INC_REF(connp); 3232 mutex_exit(&connfp->connf_lock); 3233 rq = connp->conn_rq; 3234 /* 3235 * For link-local always add ifindex so that transport can set 3236 * sin6_scope_id. Avoid it for ICMP error fanout. 3237 */ 3238 if ((connp->conn_ip_recvpktinfo || 3239 IN6_IS_ADDR_LINKLOCAL(&src)) && 3240 (flags & IP_FF_IPINFO)) { 3241 /* Add header */ 3242 mp1 = ip_add_info_v6(mp1, inill, &dst); 3243 } 3244 if (mp1 == NULL) { 3245 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3246 } else if (!canputnext(rq)) { 3247 if (flags & IP_FF_RAWIP) { 3248 BUMP_MIB(ill->ill_ip_mib, 3249 rawipIfStatsInOverflows); 3250 } else { 3251 BUMP_MIB(ill->ill_icmp6_mib, 3252 ipv6IfIcmpInOverflows); 3253 } 3254 3255 freemsg(mp1); 3256 } else { 3257 /* 3258 * Don't enforce here if we're a tunnel - let "tun" do 3259 * it instead. 3260 */ 3261 if (!IPCL_IS_IPTUN(connp) && 3262 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3263 secure)) { 3264 first_mp1 = ipsec_check_inbound_policy 3265 (first_mp1, connp, NULL, ip6h, 3266 mctl_present); 3267 } 3268 if (first_mp1 != NULL) { 3269 if (mctl_present) 3270 freeb(first_mp1); 3271 BUMP_MIB(ill->ill_ip_mib, 3272 ipIfStatsHCInDelivers); 3273 (connp->conn_recv)(connp, mp1, NULL); 3274 } 3275 } 3276 mutex_enter(&connfp->connf_lock); 3277 /* Follow the next pointer before releasing the conn. */ 3278 next_connp = connp->conn_next; 3279 CONN_DEC_REF(connp); 3280 connp = next_connp; 3281 } 3282 3283 /* Last one. Send it upstream. */ 3284 mutex_exit(&connfp->connf_lock); 3285 3286 /* Initiate IPPF processing */ 3287 if (IP6_IN_IPP(flags, ipst)) { 3288 uint_t ifindex; 3289 3290 mutex_enter(&ill->ill_lock); 3291 ifindex = ill->ill_phyint->phyint_ifindex; 3292 mutex_exit(&ill->ill_lock); 3293 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3294 if (mp == NULL) { 3295 CONN_DEC_REF(connp); 3296 if (mctl_present) 3297 freeb(first_mp); 3298 return; 3299 } 3300 } 3301 3302 /* 3303 * For link-local always add ifindex so that transport can set 3304 * sin6_scope_id. Avoid it for ICMP error fanout. 3305 */ 3306 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3307 (flags & IP_FF_IPINFO)) { 3308 /* Add header */ 3309 mp = ip_add_info_v6(mp, inill, &dst); 3310 if (mp == NULL) { 3311 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3312 CONN_DEC_REF(connp); 3313 if (mctl_present) 3314 freeb(first_mp); 3315 return; 3316 } else if (mctl_present) { 3317 first_mp->b_cont = mp; 3318 } else { 3319 first_mp = mp; 3320 } 3321 } 3322 3323 rq = connp->conn_rq; 3324 if (!canputnext(rq)) { 3325 if (flags & IP_FF_RAWIP) { 3326 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3327 } else { 3328 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3329 } 3330 3331 freemsg(first_mp); 3332 } else { 3333 if (IPCL_IS_IPTUN(connp)) { 3334 /* 3335 * Tunneled packet. We enforce policy in the tunnel 3336 * module itself. 3337 * 3338 * Send the WHOLE packet up (incl. IPSEC_IN) without 3339 * a policy check. 3340 */ 3341 putnext(rq, first_mp); 3342 CONN_DEC_REF(connp); 3343 return; 3344 } 3345 /* 3346 * Don't enforce here if we're a tunnel - let "tun" do 3347 * it instead. 3348 */ 3349 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3350 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3351 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3352 NULL, ip6h, mctl_present); 3353 if (first_mp == NULL) { 3354 CONN_DEC_REF(connp); 3355 return; 3356 } 3357 } 3358 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3359 (connp->conn_recv)(connp, mp, NULL); 3360 if (mctl_present) 3361 freeb(first_mp); 3362 } 3363 CONN_DEC_REF(connp); 3364 } 3365 3366 /* 3367 * Send an ICMP error after patching up the packet appropriately. Returns 3368 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3369 */ 3370 int 3371 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3372 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3373 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3374 { 3375 ip6_t *ip6h; 3376 mblk_t *first_mp; 3377 boolean_t secure; 3378 unsigned char db_type; 3379 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3380 3381 first_mp = mp; 3382 if (mctl_present) { 3383 mp = mp->b_cont; 3384 secure = ipsec_in_is_secure(first_mp); 3385 ASSERT(mp != NULL); 3386 } else { 3387 /* 3388 * If this is an ICMP error being reported - which goes 3389 * up as M_CTLs, we need to convert them to M_DATA till 3390 * we finish checking with global policy because 3391 * ipsec_check_global_policy() assumes M_DATA as clear 3392 * and M_CTL as secure. 3393 */ 3394 db_type = mp->b_datap->db_type; 3395 mp->b_datap->db_type = M_DATA; 3396 secure = B_FALSE; 3397 } 3398 /* 3399 * We are generating an icmp error for some inbound packet. 3400 * Called from all ip_fanout_(udp, tcp, proto) functions. 3401 * Before we generate an error, check with global policy 3402 * to see whether this is allowed to enter the system. As 3403 * there is no "conn", we are checking with global policy. 3404 */ 3405 ip6h = (ip6_t *)mp->b_rptr; 3406 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3407 first_mp = ipsec_check_global_policy(first_mp, NULL, 3408 NULL, ip6h, mctl_present, ipst->ips_netstack); 3409 if (first_mp == NULL) 3410 return (0); 3411 } 3412 3413 if (!mctl_present) 3414 mp->b_datap->db_type = db_type; 3415 3416 if (flags & IP_FF_SEND_ICMP) { 3417 if (flags & IP_FF_HDR_COMPLETE) { 3418 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3419 freemsg(first_mp); 3420 return (1); 3421 } 3422 } 3423 switch (icmp_type) { 3424 case ICMP6_DST_UNREACH: 3425 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3426 B_FALSE, B_FALSE, zoneid, ipst); 3427 break; 3428 case ICMP6_PARAM_PROB: 3429 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3430 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3431 break; 3432 default: 3433 #ifdef DEBUG 3434 panic("ip_fanout_send_icmp_v6: wrong type"); 3435 /*NOTREACHED*/ 3436 #else 3437 freemsg(first_mp); 3438 break; 3439 #endif 3440 } 3441 } else { 3442 freemsg(first_mp); 3443 return (0); 3444 } 3445 3446 return (1); 3447 } 3448 3449 3450 /* 3451 * Fanout for TCP packets 3452 * The caller puts <fport, lport> in the ports parameter. 3453 */ 3454 static void 3455 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3456 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3457 { 3458 mblk_t *first_mp; 3459 boolean_t secure; 3460 conn_t *connp; 3461 tcph_t *tcph; 3462 boolean_t syn_present = B_FALSE; 3463 ip_stack_t *ipst = inill->ill_ipst; 3464 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3465 3466 first_mp = mp; 3467 if (mctl_present) { 3468 mp = first_mp->b_cont; 3469 secure = ipsec_in_is_secure(first_mp); 3470 ASSERT(mp != NULL); 3471 } else { 3472 secure = B_FALSE; 3473 } 3474 3475 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3476 3477 if (connp == NULL || 3478 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3479 /* 3480 * No hard-bound match. Send Reset. 3481 */ 3482 dblk_t *dp = mp->b_datap; 3483 uint32_t ill_index; 3484 3485 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3486 3487 /* Initiate IPPf processing, if needed. */ 3488 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3489 (flags & IP6_NO_IPPOLICY)) { 3490 ill_index = ill->ill_phyint->phyint_ifindex; 3491 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3492 if (first_mp == NULL) { 3493 if (connp != NULL) 3494 CONN_DEC_REF(connp); 3495 return; 3496 } 3497 } 3498 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3499 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3500 ipst->ips_netstack->netstack_tcp, connp); 3501 if (connp != NULL) 3502 CONN_DEC_REF(connp); 3503 return; 3504 } 3505 3506 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3507 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3508 if (connp->conn_flags & IPCL_TCP) { 3509 squeue_t *sqp; 3510 3511 /* 3512 * For fused tcp loopback, assign the eager's 3513 * squeue to be that of the active connect's. 3514 */ 3515 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3516 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3517 !secure && 3518 !IP6_IN_IPP(flags, ipst)) { 3519 ASSERT(Q_TO_CONN(q) != NULL); 3520 sqp = Q_TO_CONN(q)->conn_sqp; 3521 } else { 3522 sqp = IP_SQUEUE_GET(lbolt); 3523 } 3524 3525 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3526 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3527 3528 /* 3529 * db_cksumstuff is unused in the incoming 3530 * path; Thus store the ifindex here. It will 3531 * be cleared in tcp_conn_create_v6(). 3532 */ 3533 DB_CKSUMSTUFF(mp) = 3534 (intptr_t)ill->ill_phyint->phyint_ifindex; 3535 syn_present = B_TRUE; 3536 } 3537 } 3538 3539 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3540 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3541 if ((flags & TH_RST) || (flags & TH_URG)) { 3542 CONN_DEC_REF(connp); 3543 freemsg(first_mp); 3544 return; 3545 } 3546 if (flags & TH_ACK) { 3547 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3548 ipst->ips_netstack->netstack_tcp, connp); 3549 CONN_DEC_REF(connp); 3550 return; 3551 } 3552 3553 CONN_DEC_REF(connp); 3554 freemsg(first_mp); 3555 return; 3556 } 3557 3558 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3559 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3560 NULL, ip6h, mctl_present); 3561 if (first_mp == NULL) { 3562 CONN_DEC_REF(connp); 3563 return; 3564 } 3565 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3566 ASSERT(syn_present); 3567 if (mctl_present) { 3568 ASSERT(first_mp != mp); 3569 first_mp->b_datap->db_struioflag |= 3570 STRUIO_POLICY; 3571 } else { 3572 ASSERT(first_mp == mp); 3573 mp->b_datap->db_struioflag &= 3574 ~STRUIO_EAGER; 3575 mp->b_datap->db_struioflag |= 3576 STRUIO_POLICY; 3577 } 3578 } else { 3579 /* 3580 * Discard first_mp early since we're dealing with a 3581 * fully-connected conn_t and tcp doesn't do policy in 3582 * this case. Also, if someone is bound to IPPROTO_TCP 3583 * over raw IP, they don't expect to see a M_CTL. 3584 */ 3585 if (mctl_present) { 3586 freeb(first_mp); 3587 mctl_present = B_FALSE; 3588 } 3589 first_mp = mp; 3590 } 3591 } 3592 3593 /* Initiate IPPF processing */ 3594 if (IP6_IN_IPP(flags, ipst)) { 3595 uint_t ifindex; 3596 3597 mutex_enter(&ill->ill_lock); 3598 ifindex = ill->ill_phyint->phyint_ifindex; 3599 mutex_exit(&ill->ill_lock); 3600 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3601 if (mp == NULL) { 3602 CONN_DEC_REF(connp); 3603 if (mctl_present) { 3604 freeb(first_mp); 3605 } 3606 return; 3607 } else if (mctl_present) { 3608 /* 3609 * ip_add_info_v6 might return a new mp. 3610 */ 3611 ASSERT(first_mp != mp); 3612 first_mp->b_cont = mp; 3613 } else { 3614 first_mp = mp; 3615 } 3616 } 3617 3618 /* 3619 * For link-local always add ifindex so that TCP can bind to that 3620 * interface. Avoid it for ICMP error fanout. 3621 */ 3622 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3623 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3624 (flags & IP_FF_IPINFO))) { 3625 /* Add header */ 3626 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3627 if (mp == NULL) { 3628 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3629 CONN_DEC_REF(connp); 3630 if (mctl_present) 3631 freeb(first_mp); 3632 return; 3633 } else if (mctl_present) { 3634 ASSERT(first_mp != mp); 3635 first_mp->b_cont = mp; 3636 } else { 3637 first_mp = mp; 3638 } 3639 } 3640 3641 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3642 if (IPCL_IS_TCP(connp)) { 3643 (*ip_input_proc)(connp->conn_sqp, first_mp, 3644 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3645 } else { 3646 /* SOCK_RAW, IPPROTO_TCP case */ 3647 (connp->conn_recv)(connp, first_mp, NULL); 3648 CONN_DEC_REF(connp); 3649 } 3650 } 3651 3652 /* 3653 * Fanout for UDP packets. 3654 * The caller puts <fport, lport> in the ports parameter. 3655 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3656 * 3657 * If SO_REUSEADDR is set all multicast and broadcast packets 3658 * will be delivered to all streams bound to the same port. 3659 * 3660 * Zones notes: 3661 * Multicast packets will be distributed to streams in all zones. 3662 */ 3663 static void 3664 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3665 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3666 zoneid_t zoneid) 3667 { 3668 uint32_t dstport, srcport; 3669 in6_addr_t dst; 3670 mblk_t *first_mp; 3671 boolean_t secure; 3672 conn_t *connp; 3673 connf_t *connfp; 3674 conn_t *first_conn; 3675 conn_t *next_conn; 3676 mblk_t *mp1, *first_mp1; 3677 in6_addr_t src; 3678 boolean_t shared_addr; 3679 ip_stack_t *ipst = inill->ill_ipst; 3680 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3681 3682 first_mp = mp; 3683 if (mctl_present) { 3684 mp = first_mp->b_cont; 3685 secure = ipsec_in_is_secure(first_mp); 3686 ASSERT(mp != NULL); 3687 } else { 3688 secure = B_FALSE; 3689 } 3690 3691 /* Extract ports in net byte order */ 3692 dstport = htons(ntohl(ports) & 0xFFFF); 3693 srcport = htons(ntohl(ports) >> 16); 3694 dst = ip6h->ip6_dst; 3695 src = ip6h->ip6_src; 3696 3697 shared_addr = (zoneid == ALL_ZONES); 3698 if (shared_addr) { 3699 /* 3700 * No need to handle exclusive-stack zones since ALL_ZONES 3701 * only applies to the shared stack. 3702 */ 3703 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3704 /* 3705 * If no shared MLP is found, tsol_mlp_findzone returns 3706 * ALL_ZONES. In that case, we assume it's SLP, and 3707 * search for the zone based on the packet label. 3708 * That will also return ALL_ZONES on failure, but 3709 * we never allow conn_zoneid to be set to ALL_ZONES. 3710 */ 3711 if (zoneid == ALL_ZONES) 3712 zoneid = tsol_packet_to_zoneid(mp); 3713 } 3714 3715 /* Attempt to find a client stream based on destination port. */ 3716 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3717 mutex_enter(&connfp->connf_lock); 3718 connp = connfp->connf_head; 3719 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3720 /* 3721 * Not multicast. Send to the one (first) client we find. 3722 */ 3723 while (connp != NULL) { 3724 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3725 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3726 conn_wantpacket_v6(connp, ill, ip6h, 3727 flags, zoneid)) { 3728 break; 3729 } 3730 connp = connp->conn_next; 3731 } 3732 if (connp == NULL || connp->conn_upq == NULL) 3733 goto notfound; 3734 3735 if (is_system_labeled() && 3736 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3737 connp)) 3738 goto notfound; 3739 3740 /* Found a client */ 3741 CONN_INC_REF(connp); 3742 mutex_exit(&connfp->connf_lock); 3743 3744 if (CONN_UDP_FLOWCTLD(connp)) { 3745 freemsg(first_mp); 3746 CONN_DEC_REF(connp); 3747 return; 3748 } 3749 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3750 first_mp = ipsec_check_inbound_policy(first_mp, 3751 connp, NULL, ip6h, mctl_present); 3752 if (first_mp == NULL) { 3753 CONN_DEC_REF(connp); 3754 return; 3755 } 3756 } 3757 /* Initiate IPPF processing */ 3758 if (IP6_IN_IPP(flags, ipst)) { 3759 uint_t ifindex; 3760 3761 mutex_enter(&ill->ill_lock); 3762 ifindex = ill->ill_phyint->phyint_ifindex; 3763 mutex_exit(&ill->ill_lock); 3764 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3765 if (mp == NULL) { 3766 CONN_DEC_REF(connp); 3767 if (mctl_present) 3768 freeb(first_mp); 3769 return; 3770 } 3771 } 3772 /* 3773 * For link-local always add ifindex so that 3774 * transport can set sin6_scope_id. Avoid it for 3775 * ICMP error fanout. 3776 */ 3777 if ((connp->conn_ip_recvpktinfo || 3778 IN6_IS_ADDR_LINKLOCAL(&src)) && 3779 (flags & IP_FF_IPINFO)) { 3780 /* Add header */ 3781 mp = ip_add_info_v6(mp, inill, &dst); 3782 if (mp == NULL) { 3783 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3784 CONN_DEC_REF(connp); 3785 if (mctl_present) 3786 freeb(first_mp); 3787 return; 3788 } else if (mctl_present) { 3789 first_mp->b_cont = mp; 3790 } else { 3791 first_mp = mp; 3792 } 3793 } 3794 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3795 3796 /* Send it upstream */ 3797 (connp->conn_recv)(connp, mp, NULL); 3798 3799 IP6_STAT(ipst, ip6_udp_fannorm); 3800 CONN_DEC_REF(connp); 3801 if (mctl_present) 3802 freeb(first_mp); 3803 return; 3804 } 3805 3806 while (connp != NULL) { 3807 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3808 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3809 (!is_system_labeled() || 3810 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3811 connp))) 3812 break; 3813 connp = connp->conn_next; 3814 } 3815 3816 if (connp == NULL || connp->conn_upq == NULL) 3817 goto notfound; 3818 3819 first_conn = connp; 3820 3821 CONN_INC_REF(connp); 3822 connp = connp->conn_next; 3823 for (;;) { 3824 while (connp != NULL) { 3825 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3826 src) && conn_wantpacket_v6(connp, ill, ip6h, 3827 flags, zoneid) && 3828 (!is_system_labeled() || 3829 tsol_receive_local(mp, &dst, IPV6_VERSION, 3830 shared_addr, connp))) 3831 break; 3832 connp = connp->conn_next; 3833 } 3834 /* 3835 * Just copy the data part alone. The mctl part is 3836 * needed just for verifying policy and it is never 3837 * sent up. 3838 */ 3839 if (connp == NULL || 3840 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3841 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3842 /* 3843 * No more interested clients or memory 3844 * allocation failed 3845 */ 3846 connp = first_conn; 3847 break; 3848 } 3849 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3850 CONN_INC_REF(connp); 3851 mutex_exit(&connfp->connf_lock); 3852 /* 3853 * For link-local always add ifindex so that transport 3854 * can set sin6_scope_id. Avoid it for ICMP error 3855 * fanout. 3856 */ 3857 if ((connp->conn_ip_recvpktinfo || 3858 IN6_IS_ADDR_LINKLOCAL(&src)) && 3859 (flags & IP_FF_IPINFO)) { 3860 /* Add header */ 3861 mp1 = ip_add_info_v6(mp1, inill, &dst); 3862 } 3863 /* mp1 could have changed */ 3864 if (mctl_present) 3865 first_mp1->b_cont = mp1; 3866 else 3867 first_mp1 = mp1; 3868 if (mp1 == NULL) { 3869 if (mctl_present) 3870 freeb(first_mp1); 3871 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3872 goto next_one; 3873 } 3874 if (CONN_UDP_FLOWCTLD(connp)) { 3875 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3876 freemsg(first_mp1); 3877 goto next_one; 3878 } 3879 3880 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3881 first_mp1 = ipsec_check_inbound_policy 3882 (first_mp1, connp, NULL, ip6h, 3883 mctl_present); 3884 } 3885 if (first_mp1 != NULL) { 3886 if (mctl_present) 3887 freeb(first_mp1); 3888 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3889 3890 /* Send it upstream */ 3891 (connp->conn_recv)(connp, mp1, NULL); 3892 } 3893 next_one: 3894 mutex_enter(&connfp->connf_lock); 3895 /* Follow the next pointer before releasing the conn. */ 3896 next_conn = connp->conn_next; 3897 IP6_STAT(ipst, ip6_udp_fanmb); 3898 CONN_DEC_REF(connp); 3899 connp = next_conn; 3900 } 3901 3902 /* Last one. Send it upstream. */ 3903 mutex_exit(&connfp->connf_lock); 3904 3905 /* Initiate IPPF processing */ 3906 if (IP6_IN_IPP(flags, ipst)) { 3907 uint_t ifindex; 3908 3909 mutex_enter(&ill->ill_lock); 3910 ifindex = ill->ill_phyint->phyint_ifindex; 3911 mutex_exit(&ill->ill_lock); 3912 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3913 if (mp == NULL) { 3914 CONN_DEC_REF(connp); 3915 if (mctl_present) { 3916 freeb(first_mp); 3917 } 3918 return; 3919 } 3920 } 3921 3922 /* 3923 * For link-local always add ifindex so that transport can set 3924 * sin6_scope_id. Avoid it for ICMP error fanout. 3925 */ 3926 if ((connp->conn_ip_recvpktinfo || 3927 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3928 /* Add header */ 3929 mp = ip_add_info_v6(mp, inill, &dst); 3930 if (mp == NULL) { 3931 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3932 CONN_DEC_REF(connp); 3933 if (mctl_present) 3934 freeb(first_mp); 3935 return; 3936 } else if (mctl_present) { 3937 first_mp->b_cont = mp; 3938 } else { 3939 first_mp = mp; 3940 } 3941 } 3942 if (CONN_UDP_FLOWCTLD(connp)) { 3943 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3944 freemsg(mp); 3945 } else { 3946 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3947 first_mp = ipsec_check_inbound_policy(first_mp, 3948 connp, NULL, ip6h, mctl_present); 3949 if (first_mp == NULL) { 3950 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3951 CONN_DEC_REF(connp); 3952 return; 3953 } 3954 } 3955 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3956 3957 /* Send it upstream */ 3958 (connp->conn_recv)(connp, mp, NULL); 3959 } 3960 IP6_STAT(ipst, ip6_udp_fanmb); 3961 CONN_DEC_REF(connp); 3962 if (mctl_present) 3963 freeb(first_mp); 3964 return; 3965 3966 notfound: 3967 mutex_exit(&connfp->connf_lock); 3968 /* 3969 * No one bound to this port. Is 3970 * there a client that wants all 3971 * unclaimed datagrams? 3972 */ 3973 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3974 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3975 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3976 zoneid); 3977 } else { 3978 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3979 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3980 mctl_present, zoneid, ipst)) { 3981 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3982 } 3983 } 3984 } 3985 3986 /* 3987 * int ip_find_hdr_v6() 3988 * 3989 * This routine is used by the upper layer protocols and the IP tunnel 3990 * module to: 3991 * - Set extension header pointers to appropriate locations 3992 * - Determine IPv6 header length and return it 3993 * - Return a pointer to the last nexthdr value 3994 * 3995 * The caller must initialize ipp_fields. 3996 * 3997 * NOTE: If multiple extension headers of the same type are present, 3998 * ip_find_hdr_v6() will set the respective extension header pointers 3999 * to the first one that it encounters in the IPv6 header. It also 4000 * skips fragment headers. This routine deals with malformed packets 4001 * of various sorts in which case the returned length is up to the 4002 * malformed part. 4003 */ 4004 int 4005 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4006 { 4007 uint_t length, ehdrlen; 4008 uint8_t nexthdr; 4009 uint8_t *whereptr, *endptr; 4010 ip6_dest_t *tmpdstopts; 4011 ip6_rthdr_t *tmprthdr; 4012 ip6_hbh_t *tmphopopts; 4013 ip6_frag_t *tmpfraghdr; 4014 4015 length = IPV6_HDR_LEN; 4016 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4017 endptr = mp->b_wptr; 4018 4019 nexthdr = ip6h->ip6_nxt; 4020 while (whereptr < endptr) { 4021 /* Is there enough left for len + nexthdr? */ 4022 if (whereptr + MIN_EHDR_LEN > endptr) 4023 goto done; 4024 4025 switch (nexthdr) { 4026 case IPPROTO_HOPOPTS: 4027 tmphopopts = (ip6_hbh_t *)whereptr; 4028 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4029 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4030 goto done; 4031 nexthdr = tmphopopts->ip6h_nxt; 4032 /* return only 1st hbh */ 4033 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4034 ipp->ipp_fields |= IPPF_HOPOPTS; 4035 ipp->ipp_hopopts = tmphopopts; 4036 ipp->ipp_hopoptslen = ehdrlen; 4037 } 4038 break; 4039 case IPPROTO_DSTOPTS: 4040 tmpdstopts = (ip6_dest_t *)whereptr; 4041 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4042 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4043 goto done; 4044 nexthdr = tmpdstopts->ip6d_nxt; 4045 /* 4046 * ipp_dstopts is set to the destination header after a 4047 * routing header. 4048 * Assume it is a post-rthdr destination header 4049 * and adjust when we find an rthdr. 4050 */ 4051 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4052 ipp->ipp_fields |= IPPF_DSTOPTS; 4053 ipp->ipp_dstopts = tmpdstopts; 4054 ipp->ipp_dstoptslen = ehdrlen; 4055 } 4056 break; 4057 case IPPROTO_ROUTING: 4058 tmprthdr = (ip6_rthdr_t *)whereptr; 4059 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4060 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4061 goto done; 4062 nexthdr = tmprthdr->ip6r_nxt; 4063 /* return only 1st rthdr */ 4064 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4065 ipp->ipp_fields |= IPPF_RTHDR; 4066 ipp->ipp_rthdr = tmprthdr; 4067 ipp->ipp_rthdrlen = ehdrlen; 4068 } 4069 /* 4070 * Make any destination header we've seen be a 4071 * pre-rthdr destination header. 4072 */ 4073 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4074 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4075 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4076 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4077 ipp->ipp_dstopts = NULL; 4078 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4079 ipp->ipp_dstoptslen = 0; 4080 } 4081 break; 4082 case IPPROTO_FRAGMENT: 4083 tmpfraghdr = (ip6_frag_t *)whereptr; 4084 ehdrlen = sizeof (ip6_frag_t); 4085 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4086 goto done; 4087 nexthdr = tmpfraghdr->ip6f_nxt; 4088 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4089 ipp->ipp_fields |= IPPF_FRAGHDR; 4090 ipp->ipp_fraghdr = tmpfraghdr; 4091 ipp->ipp_fraghdrlen = ehdrlen; 4092 } 4093 break; 4094 case IPPROTO_NONE: 4095 default: 4096 goto done; 4097 } 4098 length += ehdrlen; 4099 whereptr += ehdrlen; 4100 } 4101 done: 4102 if (nexthdrp != NULL) 4103 *nexthdrp = nexthdr; 4104 return (length); 4105 } 4106 4107 int 4108 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4109 { 4110 ire_t *ire; 4111 4112 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4113 ire = ire_lookup_local_v6(zoneid, ipst); 4114 if (ire == NULL) { 4115 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4116 return (1); 4117 } 4118 ip6h->ip6_src = ire->ire_addr_v6; 4119 ire_refrele(ire); 4120 } 4121 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4122 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4123 return (0); 4124 } 4125 4126 /* 4127 * Try to determine where and what are the IPv6 header length and 4128 * pointer to nexthdr value for the upper layer protocol (or an 4129 * unknown next hdr). 4130 * 4131 * Parameters returns a pointer to the nexthdr value; 4132 * Must handle malformed packets of various sorts. 4133 * Function returns failure for malformed cases. 4134 */ 4135 boolean_t 4136 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4137 uint8_t **nexthdrpp) 4138 { 4139 uint16_t length; 4140 uint_t ehdrlen; 4141 uint8_t *nexthdrp; 4142 uint8_t *whereptr; 4143 uint8_t *endptr; 4144 ip6_dest_t *desthdr; 4145 ip6_rthdr_t *rthdr; 4146 ip6_frag_t *fraghdr; 4147 4148 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4149 length = IPV6_HDR_LEN; 4150 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4151 endptr = mp->b_wptr; 4152 4153 nexthdrp = &ip6h->ip6_nxt; 4154 while (whereptr < endptr) { 4155 /* Is there enough left for len + nexthdr? */ 4156 if (whereptr + MIN_EHDR_LEN > endptr) 4157 break; 4158 4159 switch (*nexthdrp) { 4160 case IPPROTO_HOPOPTS: 4161 case IPPROTO_DSTOPTS: 4162 /* Assumes the headers are identical for hbh and dst */ 4163 desthdr = (ip6_dest_t *)whereptr; 4164 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4165 if ((uchar_t *)desthdr + ehdrlen > endptr) 4166 return (B_FALSE); 4167 nexthdrp = &desthdr->ip6d_nxt; 4168 break; 4169 case IPPROTO_ROUTING: 4170 rthdr = (ip6_rthdr_t *)whereptr; 4171 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4172 if ((uchar_t *)rthdr + ehdrlen > endptr) 4173 return (B_FALSE); 4174 nexthdrp = &rthdr->ip6r_nxt; 4175 break; 4176 case IPPROTO_FRAGMENT: 4177 fraghdr = (ip6_frag_t *)whereptr; 4178 ehdrlen = sizeof (ip6_frag_t); 4179 if ((uchar_t *)&fraghdr[1] > endptr) 4180 return (B_FALSE); 4181 nexthdrp = &fraghdr->ip6f_nxt; 4182 break; 4183 case IPPROTO_NONE: 4184 /* No next header means we're finished */ 4185 default: 4186 *hdr_length_ptr = length; 4187 *nexthdrpp = nexthdrp; 4188 return (B_TRUE); 4189 } 4190 length += ehdrlen; 4191 whereptr += ehdrlen; 4192 *hdr_length_ptr = length; 4193 *nexthdrpp = nexthdrp; 4194 } 4195 switch (*nexthdrp) { 4196 case IPPROTO_HOPOPTS: 4197 case IPPROTO_DSTOPTS: 4198 case IPPROTO_ROUTING: 4199 case IPPROTO_FRAGMENT: 4200 /* 4201 * If any know extension headers are still to be processed, 4202 * the packet's malformed (or at least all the IP header(s) are 4203 * not in the same mblk - and that should never happen. 4204 */ 4205 return (B_FALSE); 4206 4207 default: 4208 /* 4209 * If we get here, we know that all of the IP headers were in 4210 * the same mblk, even if the ULP header is in the next mblk. 4211 */ 4212 *hdr_length_ptr = length; 4213 *nexthdrpp = nexthdrp; 4214 return (B_TRUE); 4215 } 4216 } 4217 4218 /* 4219 * Return the length of the IPv6 related headers (including extension headers) 4220 * Returns a length even if the packet is malformed. 4221 */ 4222 int 4223 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4224 { 4225 uint16_t hdr_len; 4226 uint8_t *nexthdrp; 4227 4228 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4229 return (hdr_len); 4230 } 4231 4232 /* 4233 * Select an ill for the packet by considering load spreading across 4234 * a different ill in the group if dst_ill is part of some group. 4235 */ 4236 static ill_t * 4237 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4238 { 4239 ill_t *ill; 4240 4241 /* 4242 * We schedule irrespective of whether the source address is 4243 * INADDR_UNSPECIED or not. 4244 */ 4245 ill = illgrp_scheduler(dst_ill); 4246 if (ill == NULL) 4247 return (NULL); 4248 4249 /* 4250 * For groups with names ip_sioctl_groupname ensures that all 4251 * ills are of same type. For groups without names, ifgrp_insert 4252 * ensures this. 4253 */ 4254 ASSERT(dst_ill->ill_type == ill->ill_type); 4255 4256 return (ill); 4257 } 4258 4259 /* 4260 * IPv6 - 4261 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4262 * to send out a packet to a destination address for which we do not have 4263 * specific routing information. 4264 * 4265 * Handle non-multicast packets. If ill is non-NULL the match is done 4266 * for that ill. 4267 * 4268 * When a specific ill is specified (using IPV6_PKTINFO, 4269 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4270 * on routing entries (ftable and ctable) that have a matching 4271 * ire->ire_ipif->ipif_ill. Thus this can only be used 4272 * for destinations that are on-link for the specific ill 4273 * and that can appear on multiple links. Thus it is useful 4274 * for multicast destinations, link-local destinations, and 4275 * at some point perhaps for site-local destinations (if the 4276 * node sits at a site boundary). 4277 * We create the cache entries in the regular ctable since 4278 * it can not "confuse" things for other destinations. 4279 * table. 4280 * 4281 * When ill is part of a ill group, we subject the packets 4282 * to load spreading even if the ill is specified by the 4283 * means described above. We disable only for IPV6_BOUND_PIF 4284 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4285 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4286 * set. 4287 * 4288 * NOTE : These are the scopes of some of the variables that point at IRE, 4289 * which needs to be followed while making any future modifications 4290 * to avoid memory leaks. 4291 * 4292 * - ire and sire are the entries looked up initially by 4293 * ire_ftable_lookup_v6. 4294 * - ipif_ire is used to hold the interface ire associated with 4295 * the new cache ire. But it's scope is limited, so we always REFRELE 4296 * it before branching out to error paths. 4297 * - save_ire is initialized before ire_create, so that ire returned 4298 * by ire_create will not over-write the ire. We REFRELE save_ire 4299 * before breaking out of the switch. 4300 * 4301 * Thus on failures, we have to REFRELE only ire and sire, if they 4302 * are not NULL. 4303 * 4304 * v6srcp may be used in the future. Currently unused. 4305 */ 4306 /* ARGSUSED */ 4307 void 4308 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4309 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4310 { 4311 in6_addr_t v6gw; 4312 in6_addr_t dst; 4313 ire_t *ire = NULL; 4314 ipif_t *src_ipif = NULL; 4315 ill_t *dst_ill = NULL; 4316 ire_t *sire = NULL; 4317 ire_t *save_ire; 4318 ip6_t *ip6h; 4319 int err = 0; 4320 mblk_t *first_mp; 4321 ipsec_out_t *io; 4322 ill_t *attach_ill = NULL; 4323 ushort_t ire_marks = 0; 4324 int match_flags; 4325 boolean_t ip6i_present; 4326 ire_t *first_sire = NULL; 4327 mblk_t *copy_mp = NULL; 4328 mblk_t *xmit_mp = NULL; 4329 in6_addr_t save_dst; 4330 uint32_t multirt_flags = 4331 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4332 boolean_t multirt_is_resolvable; 4333 boolean_t multirt_resolve_next; 4334 boolean_t need_rele = B_FALSE; 4335 boolean_t do_attach_ill = B_FALSE; 4336 boolean_t ip6_asp_table_held = B_FALSE; 4337 tsol_ire_gw_secattr_t *attrp = NULL; 4338 tsol_gcgrp_t *gcgrp = NULL; 4339 tsol_gcgrp_addr_t ga; 4340 4341 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4342 4343 first_mp = mp; 4344 if (mp->b_datap->db_type == M_CTL) { 4345 mp = mp->b_cont; 4346 io = (ipsec_out_t *)first_mp->b_rptr; 4347 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4348 } else { 4349 io = NULL; 4350 } 4351 4352 /* 4353 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4354 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4355 * could be NULL. 4356 * 4357 * This information can appear either in an ip6i_t or an IPSEC_OUT 4358 * message. 4359 */ 4360 ip6h = (ip6_t *)mp->b_rptr; 4361 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4362 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4363 if (!ip6i_present || 4364 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4365 attach_ill = ip_grab_attach_ill(ill, first_mp, 4366 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4367 io->ipsec_out_ill_index), B_TRUE, ipst); 4368 /* Failure case frees things for us. */ 4369 if (attach_ill == NULL) 4370 return; 4371 4372 /* 4373 * Check if we need an ire that will not be 4374 * looked up by anybody else i.e. HIDDEN. 4375 */ 4376 if (ill_is_probeonly(attach_ill)) 4377 ire_marks = IRE_MARK_HIDDEN; 4378 } 4379 } 4380 4381 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4382 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4383 goto icmp_err_ret; 4384 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4385 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4386 goto icmp_err_ret; 4387 } 4388 4389 /* 4390 * If this IRE is created for forwarding or it is not for 4391 * TCP traffic, mark it as temporary. 4392 * 4393 * Is it sufficient just to check the next header?? 4394 */ 4395 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4396 ire_marks |= IRE_MARK_TEMPORARY; 4397 4398 /* 4399 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4400 * chain until it gets the most specific information available. 4401 * For example, we know that there is no IRE_CACHE for this dest, 4402 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4403 * ire_ftable_lookup_v6 will look up the gateway, etc. 4404 */ 4405 4406 if (ill == NULL) { 4407 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4408 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4409 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4410 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4411 match_flags, ipst); 4412 /* 4413 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4414 * in a NULL ill, but the packet could be a neighbor 4415 * solicitation/advertisment and could have a valid attach_ill. 4416 */ 4417 if (attach_ill != NULL) 4418 ill_refrele(attach_ill); 4419 } else { 4420 if (attach_ill != NULL) { 4421 /* 4422 * attach_ill is set only for communicating with 4423 * on-link hosts. So, don't look for DEFAULT. 4424 * ip_wput_v6 passes the right ill in this case and 4425 * hence we can assert. 4426 */ 4427 ASSERT(ill == attach_ill); 4428 ill_refrele(attach_ill); 4429 do_attach_ill = B_TRUE; 4430 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4431 } else { 4432 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4433 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4434 } 4435 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4436 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4437 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags, ipst); 4438 } 4439 4440 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4441 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4442 4443 /* 4444 * We enter a loop that will be run only once in most cases. 4445 * The loop is re-entered in the case where the destination 4446 * can be reached through multiple RTF_MULTIRT-flagged routes. 4447 * The intention is to compute multiple routes to a single 4448 * destination in a single ip_newroute_v6 call. 4449 * The information is contained in sire->ire_flags. 4450 */ 4451 do { 4452 multirt_resolve_next = B_FALSE; 4453 4454 if (dst_ill != NULL) { 4455 ill_refrele(dst_ill); 4456 dst_ill = NULL; 4457 } 4458 if (src_ipif != NULL) { 4459 ipif_refrele(src_ipif); 4460 src_ipif = NULL; 4461 } 4462 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4463 ip3dbg(("ip_newroute_v6: starting new resolution " 4464 "with first_mp %p, tag %d\n", 4465 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4466 4467 /* 4468 * We check if there are trailing unresolved routes for 4469 * the destination contained in sire. 4470 */ 4471 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4472 &sire, multirt_flags, MBLK_GETLABEL(mp), ipst); 4473 4474 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4475 "ire %p, sire %p\n", 4476 multirt_is_resolvable, (void *)ire, (void *)sire)); 4477 4478 if (!multirt_is_resolvable) { 4479 /* 4480 * No more multirt routes to resolve; give up 4481 * (all routes resolved or no more resolvable 4482 * routes). 4483 */ 4484 if (ire != NULL) { 4485 ire_refrele(ire); 4486 ire = NULL; 4487 } 4488 } else { 4489 ASSERT(sire != NULL); 4490 ASSERT(ire != NULL); 4491 /* 4492 * We simply use first_sire as a flag that 4493 * indicates if a resolvable multirt route has 4494 * already been found during the preceding 4495 * loops. If it is not the case, we may have 4496 * to send an ICMP error to report that the 4497 * destination is unreachable. We do not 4498 * IRE_REFHOLD first_sire. 4499 */ 4500 if (first_sire == NULL) { 4501 first_sire = sire; 4502 } 4503 } 4504 } 4505 if ((ire == NULL) || (ire == sire)) { 4506 /* 4507 * either ire == NULL (the destination cannot be 4508 * resolved) or ire == sire (the gateway cannot be 4509 * resolved). At this point, there are no more routes 4510 * to resolve for the destination, thus we exit. 4511 */ 4512 if (ip_debug > 3) { 4513 /* ip2dbg */ 4514 pr_addr_dbg("ip_newroute_v6: " 4515 "can't resolve %s\n", AF_INET6, v6dstp); 4516 } 4517 ip3dbg(("ip_newroute_v6: " 4518 "ire %p, sire %p, first_sire %p\n", 4519 (void *)ire, (void *)sire, (void *)first_sire)); 4520 4521 if (sire != NULL) { 4522 ire_refrele(sire); 4523 sire = NULL; 4524 } 4525 4526 if (first_sire != NULL) { 4527 /* 4528 * At least one multirt route has been found 4529 * in the same ip_newroute() call; there is no 4530 * need to report an ICMP error. 4531 * first_sire was not IRE_REFHOLDed. 4532 */ 4533 MULTIRT_DEBUG_UNTAG(first_mp); 4534 freemsg(first_mp); 4535 return; 4536 } 4537 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4538 RTA_DST, ipst); 4539 goto icmp_err_ret; 4540 } 4541 4542 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4543 4544 /* 4545 * Verify that the returned IRE does not have either the 4546 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4547 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4548 */ 4549 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4550 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4551 goto icmp_err_ret; 4552 4553 /* 4554 * Increment the ire_ob_pkt_count field for ire if it is an 4555 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4556 * increment the same for the parent IRE, sire, if it is some 4557 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4558 */ 4559 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4560 UPDATE_OB_PKT_COUNT(ire); 4561 ire->ire_last_used_time = lbolt; 4562 } 4563 4564 if (sire != NULL) { 4565 mutex_enter(&sire->ire_lock); 4566 v6gw = sire->ire_gateway_addr_v6; 4567 mutex_exit(&sire->ire_lock); 4568 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4569 IRE_INTERFACE)) == 0); 4570 UPDATE_OB_PKT_COUNT(sire); 4571 sire->ire_last_used_time = lbolt; 4572 } else { 4573 v6gw = ipv6_all_zeros; 4574 } 4575 4576 /* 4577 * We have a route to reach the destination. 4578 * 4579 * 1) If the interface is part of ill group, try to get a new 4580 * ill taking load spreading into account. 4581 * 4582 * 2) After selecting the ill, get a source address that might 4583 * create good inbound load spreading and that matches the 4584 * right scope. ipif_select_source_v6 does this for us. 4585 * 4586 * If the application specified the ill (ifindex), we still 4587 * load spread. Only if the packets needs to go out specifically 4588 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4589 * IPV6_BOUND_PIF we don't try to use a different ill for load 4590 * spreading. 4591 */ 4592 if (!do_attach_ill) { 4593 /* 4594 * If the interface belongs to an interface group, 4595 * make sure the next possible interface in the group 4596 * is used. This encourages load spreading among 4597 * peers in an interface group. However, in the case 4598 * of multirouting, load spreading is not used, as we 4599 * actually want to replicate outgoing packets through 4600 * particular interfaces. 4601 * 4602 * Note: While we pick a dst_ill we are really only 4603 * interested in the ill for load spreading. 4604 * The source ipif is determined by source address 4605 * selection below. 4606 */ 4607 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4608 dst_ill = ire->ire_ipif->ipif_ill; 4609 /* For uniformity do a refhold */ 4610 ill_refhold(dst_ill); 4611 } else { 4612 /* 4613 * If we are here trying to create an IRE_CACHE 4614 * for an offlink destination and have the 4615 * IRE_CACHE for the next hop and the latter is 4616 * using virtual IP source address selection i.e 4617 * it's ire->ire_ipif is pointing to a virtual 4618 * network interface (vni) then 4619 * ip_newroute_get_dst_ll() will return the vni 4620 * interface as the dst_ill. Since the vni is 4621 * virtual i.e not associated with any physical 4622 * interface, it cannot be the dst_ill, hence 4623 * in such a case call ip_newroute_get_dst_ll() 4624 * with the stq_ill instead of the ire_ipif ILL. 4625 * The function returns a refheld ill. 4626 */ 4627 if ((ire->ire_type == IRE_CACHE) && 4628 IS_VNI(ire->ire_ipif->ipif_ill)) 4629 dst_ill = ip_newroute_get_dst_ill_v6( 4630 ire->ire_stq->q_ptr); 4631 else 4632 dst_ill = ip_newroute_get_dst_ill_v6( 4633 ire->ire_ipif->ipif_ill); 4634 } 4635 if (dst_ill == NULL) { 4636 if (ip_debug > 2) { 4637 pr_addr_dbg("ip_newroute_v6 : no dst " 4638 "ill for dst %s\n", 4639 AF_INET6, v6dstp); 4640 } 4641 goto icmp_err_ret; 4642 } else if (dst_ill->ill_group == NULL && ill != NULL && 4643 dst_ill != ill) { 4644 /* 4645 * If "ill" is not part of any group, we should 4646 * have found a route matching "ill" as we 4647 * called ire_ftable_lookup_v6 with 4648 * MATCH_IRE_ILL_GROUP. 4649 * Rather than asserting when there is a 4650 * mismatch, we just drop the packet. 4651 */ 4652 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4653 "dst_ill %s ill %s\n", 4654 dst_ill->ill_name, 4655 ill->ill_name)); 4656 goto icmp_err_ret; 4657 } 4658 } else { 4659 dst_ill = ire->ire_ipif->ipif_ill; 4660 /* For uniformity do refhold */ 4661 ill_refhold(dst_ill); 4662 /* 4663 * We should have found a route matching ill as we 4664 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4665 * Rather than asserting, while there is a mismatch, 4666 * we just drop the packet. 4667 */ 4668 if (dst_ill != ill) { 4669 ip0dbg(("ip_newroute_v6: Packet dropped as " 4670 "IP6I_ATTACH_IF ill is %s, " 4671 "ire->ire_ipif->ipif_ill is %s\n", 4672 ill->ill_name, 4673 dst_ill->ill_name)); 4674 goto icmp_err_ret; 4675 } 4676 } 4677 /* 4678 * Pick a source address which matches the scope of the 4679 * destination address. 4680 * For RTF_SETSRC routes, the source address is imposed by the 4681 * parent ire (sire). 4682 */ 4683 ASSERT(src_ipif == NULL); 4684 if (ire->ire_type == IRE_IF_RESOLVER && 4685 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4686 ip6_asp_can_lookup(ipst)) { 4687 /* 4688 * The ire cache entry we're adding is for the 4689 * gateway itself. The source address in this case 4690 * is relative to the gateway's address. 4691 */ 4692 ip6_asp_table_held = B_TRUE; 4693 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4694 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4695 if (src_ipif != NULL) 4696 ire_marks |= IRE_MARK_USESRC_CHECK; 4697 } else { 4698 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4699 /* 4700 * Check that the ipif matching the requested 4701 * source address still exists. 4702 */ 4703 src_ipif = ipif_lookup_addr_v6( 4704 &sire->ire_src_addr_v6, NULL, zoneid, 4705 NULL, NULL, NULL, NULL, ipst); 4706 } 4707 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4708 uint_t restrict_ill = RESTRICT_TO_NONE; 4709 4710 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4711 & IP6I_ATTACH_IF) 4712 restrict_ill = RESTRICT_TO_ILL; 4713 ip6_asp_table_held = B_TRUE; 4714 src_ipif = ipif_select_source_v6(dst_ill, 4715 v6dstp, restrict_ill, 4716 IPV6_PREFER_SRC_DEFAULT, zoneid); 4717 if (src_ipif != NULL) 4718 ire_marks |= IRE_MARK_USESRC_CHECK; 4719 } 4720 } 4721 4722 if (src_ipif == NULL) { 4723 if (ip_debug > 2) { 4724 /* ip1dbg */ 4725 pr_addr_dbg("ip_newroute_v6: no src for " 4726 "dst %s\n, ", AF_INET6, v6dstp); 4727 printf("ip_newroute_v6: interface name %s\n", 4728 dst_ill->ill_name); 4729 } 4730 goto icmp_err_ret; 4731 } 4732 4733 if (ip_debug > 3) { 4734 /* ip2dbg */ 4735 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4736 AF_INET6, &v6gw); 4737 } 4738 ip2dbg(("\tire type %s (%d)\n", 4739 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4740 4741 /* 4742 * At this point in ip_newroute_v6(), ire is either the 4743 * IRE_CACHE of the next-hop gateway for an off-subnet 4744 * destination or an IRE_INTERFACE type that should be used 4745 * to resolve an on-subnet destination or an on-subnet 4746 * next-hop gateway. 4747 * 4748 * In the IRE_CACHE case, we have the following : 4749 * 4750 * 1) src_ipif - used for getting a source address. 4751 * 4752 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4753 * means packets using this IRE_CACHE will go out on dst_ill. 4754 * 4755 * 3) The IRE sire will point to the prefix that is the longest 4756 * matching route for the destination. These prefix types 4757 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4758 * 4759 * The newly created IRE_CACHE entry for the off-subnet 4760 * destination is tied to both the prefix route and the 4761 * interface route used to resolve the next-hop gateway 4762 * via the ire_phandle and ire_ihandle fields, respectively. 4763 * 4764 * In the IRE_INTERFACE case, we have the following : 4765 * 4766 * 1) src_ipif - used for getting a source address. 4767 * 4768 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4769 * means packets using the IRE_CACHE that we will build 4770 * here will go out on dst_ill. 4771 * 4772 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4773 * to be created will only be tied to the IRE_INTERFACE that 4774 * was derived from the ire_ihandle field. 4775 * 4776 * If sire is non-NULL, it means the destination is off-link 4777 * and we will first create the IRE_CACHE for the gateway. 4778 * Next time through ip_newroute_v6, we will create the 4779 * IRE_CACHE for the final destination as described above. 4780 */ 4781 save_ire = ire; 4782 switch (ire->ire_type) { 4783 case IRE_CACHE: { 4784 ire_t *ipif_ire; 4785 4786 ASSERT(sire != NULL); 4787 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4788 mutex_enter(&ire->ire_lock); 4789 v6gw = ire->ire_gateway_addr_v6; 4790 mutex_exit(&ire->ire_lock); 4791 } 4792 /* 4793 * We need 3 ire's to create a new cache ire for an 4794 * off-link destination from the cache ire of the 4795 * gateway. 4796 * 4797 * 1. The prefix ire 'sire' 4798 * 2. The cache ire of the gateway 'ire' 4799 * 3. The interface ire 'ipif_ire' 4800 * 4801 * We have (1) and (2). We lookup (3) below. 4802 * 4803 * If there is no interface route to the gateway, 4804 * it is a race condition, where we found the cache 4805 * but the inteface route has been deleted. 4806 */ 4807 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4808 if (ipif_ire == NULL) { 4809 ip1dbg(("ip_newroute_v6:" 4810 "ire_ihandle_lookup_offlink_v6 failed\n")); 4811 goto icmp_err_ret; 4812 } 4813 /* 4814 * Assume DL_UNITDATA_REQ is same for all physical 4815 * interfaces in the ifgrp. If it isn't, this code will 4816 * have to be seriously rewhacked to allow the 4817 * fastpath probing (such that I cache the link 4818 * header in the IRE_CACHE) to work over ifgrps. 4819 * We have what we need to build an IRE_CACHE. 4820 */ 4821 /* 4822 * Note: the new ire inherits RTF_SETSRC 4823 * and RTF_MULTIRT to propagate these flags from prefix 4824 * to cache. 4825 */ 4826 4827 /* 4828 * Check cached gateway IRE for any security 4829 * attributes; if found, associate the gateway 4830 * credentials group to the destination IRE. 4831 */ 4832 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4833 mutex_enter(&attrp->igsa_lock); 4834 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4835 GCGRP_REFHOLD(gcgrp); 4836 mutex_exit(&attrp->igsa_lock); 4837 } 4838 4839 ire = ire_create_v6( 4840 v6dstp, /* dest address */ 4841 &ipv6_all_ones, /* mask */ 4842 &src_ipif->ipif_v6src_addr, /* source address */ 4843 &v6gw, /* gateway address */ 4844 &save_ire->ire_max_frag, 4845 NULL, /* src nce */ 4846 dst_ill->ill_rq, /* recv-from queue */ 4847 dst_ill->ill_wq, /* send-to queue */ 4848 IRE_CACHE, 4849 src_ipif, 4850 &sire->ire_mask_v6, /* Parent mask */ 4851 sire->ire_phandle, /* Parent handle */ 4852 ipif_ire->ire_ihandle, /* Interface handle */ 4853 sire->ire_flags & /* flags if any */ 4854 (RTF_SETSRC | RTF_MULTIRT), 4855 &(sire->ire_uinfo), 4856 NULL, 4857 gcgrp, 4858 ipst); 4859 4860 if (ire == NULL) { 4861 if (gcgrp != NULL) { 4862 GCGRP_REFRELE(gcgrp); 4863 gcgrp = NULL; 4864 } 4865 ire_refrele(save_ire); 4866 ire_refrele(ipif_ire); 4867 break; 4868 } 4869 4870 /* reference now held by IRE */ 4871 gcgrp = NULL; 4872 4873 ire->ire_marks |= ire_marks; 4874 4875 /* 4876 * Prevent sire and ipif_ire from getting deleted. The 4877 * newly created ire is tied to both of them via the 4878 * phandle and ihandle respectively. 4879 */ 4880 IRB_REFHOLD(sire->ire_bucket); 4881 /* Has it been removed already ? */ 4882 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4883 IRB_REFRELE(sire->ire_bucket); 4884 ire_refrele(ipif_ire); 4885 ire_refrele(save_ire); 4886 break; 4887 } 4888 4889 IRB_REFHOLD(ipif_ire->ire_bucket); 4890 /* Has it been removed already ? */ 4891 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4892 IRB_REFRELE(ipif_ire->ire_bucket); 4893 IRB_REFRELE(sire->ire_bucket); 4894 ire_refrele(ipif_ire); 4895 ire_refrele(save_ire); 4896 break; 4897 } 4898 4899 xmit_mp = first_mp; 4900 if (ire->ire_flags & RTF_MULTIRT) { 4901 copy_mp = copymsg(first_mp); 4902 if (copy_mp != NULL) { 4903 xmit_mp = copy_mp; 4904 MULTIRT_DEBUG_TAG(first_mp); 4905 } 4906 } 4907 ire_add_then_send(q, ire, xmit_mp); 4908 if (ip6_asp_table_held) { 4909 ip6_asp_table_refrele(ipst); 4910 ip6_asp_table_held = B_FALSE; 4911 } 4912 ire_refrele(save_ire); 4913 4914 /* Assert that sire is not deleted yet. */ 4915 ASSERT(sire->ire_ptpn != NULL); 4916 IRB_REFRELE(sire->ire_bucket); 4917 4918 /* Assert that ipif_ire is not deleted yet. */ 4919 ASSERT(ipif_ire->ire_ptpn != NULL); 4920 IRB_REFRELE(ipif_ire->ire_bucket); 4921 ire_refrele(ipif_ire); 4922 4923 if (copy_mp != NULL) { 4924 /* 4925 * Search for the next unresolved 4926 * multirt route. 4927 */ 4928 copy_mp = NULL; 4929 ipif_ire = NULL; 4930 ire = NULL; 4931 /* re-enter the loop */ 4932 multirt_resolve_next = B_TRUE; 4933 continue; 4934 } 4935 ire_refrele(sire); 4936 ill_refrele(dst_ill); 4937 ipif_refrele(src_ipif); 4938 return; 4939 } 4940 case IRE_IF_NORESOLVER: 4941 /* 4942 * We have what we need to build an IRE_CACHE. 4943 * 4944 * handle the Gated case, where we create 4945 * a NORESOLVER route for loopback. 4946 */ 4947 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4948 break; 4949 /* 4950 * TSol note: We are creating the ire cache for the 4951 * destination 'dst'. If 'dst' is offlink, going 4952 * through the first hop 'gw', the security attributes 4953 * of 'dst' must be set to point to the gateway 4954 * credentials of gateway 'gw'. If 'dst' is onlink, it 4955 * is possible that 'dst' is a potential gateway that is 4956 * referenced by some route that has some security 4957 * attributes. Thus in the former case, we need to do a 4958 * gcgrp_lookup of 'gw' while in the latter case we 4959 * need to do gcgrp_lookup of 'dst' itself. 4960 */ 4961 ga.ga_af = AF_INET6; 4962 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4963 ga.ga_addr = v6gw; 4964 else 4965 ga.ga_addr = *v6dstp; 4966 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4967 4968 /* 4969 * Note: the new ire inherits sire flags RTF_SETSRC 4970 * and RTF_MULTIRT to propagate those rules from prefix 4971 * to cache. 4972 */ 4973 ire = ire_create_v6( 4974 v6dstp, /* dest address */ 4975 &ipv6_all_ones, /* mask */ 4976 &src_ipif->ipif_v6src_addr, /* source address */ 4977 &v6gw, /* gateway address */ 4978 &save_ire->ire_max_frag, 4979 NULL, /* no src nce */ 4980 dst_ill->ill_rq, /* recv-from queue */ 4981 dst_ill->ill_wq, /* send-to queue */ 4982 IRE_CACHE, 4983 src_ipif, 4984 &save_ire->ire_mask_v6, /* Parent mask */ 4985 (sire != NULL) ? /* Parent handle */ 4986 sire->ire_phandle : 0, 4987 save_ire->ire_ihandle, /* Interface handle */ 4988 (sire != NULL) ? /* flags if any */ 4989 sire->ire_flags & 4990 (RTF_SETSRC | RTF_MULTIRT) : 0, 4991 &(save_ire->ire_uinfo), 4992 NULL, 4993 gcgrp, 4994 ipst); 4995 4996 if (ire == NULL) { 4997 if (gcgrp != NULL) { 4998 GCGRP_REFRELE(gcgrp); 4999 gcgrp = NULL; 5000 } 5001 ire_refrele(save_ire); 5002 break; 5003 } 5004 5005 /* reference now held by IRE */ 5006 gcgrp = NULL; 5007 5008 ire->ire_marks |= ire_marks; 5009 5010 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5011 dst = v6gw; 5012 else 5013 dst = *v6dstp; 5014 err = ndp_noresolver(dst_ill, &dst); 5015 if (err != 0) { 5016 ire_refrele(save_ire); 5017 break; 5018 } 5019 5020 /* Prevent save_ire from getting deleted */ 5021 IRB_REFHOLD(save_ire->ire_bucket); 5022 /* Has it been removed already ? */ 5023 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5024 IRB_REFRELE(save_ire->ire_bucket); 5025 ire_refrele(save_ire); 5026 break; 5027 } 5028 5029 xmit_mp = first_mp; 5030 /* 5031 * In case of MULTIRT, a copy of the current packet 5032 * to send is made to further re-enter the 5033 * loop and attempt another route resolution 5034 */ 5035 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5036 copy_mp = copymsg(first_mp); 5037 if (copy_mp != NULL) { 5038 xmit_mp = copy_mp; 5039 MULTIRT_DEBUG_TAG(first_mp); 5040 } 5041 } 5042 ire_add_then_send(q, ire, xmit_mp); 5043 if (ip6_asp_table_held) { 5044 ip6_asp_table_refrele(ipst); 5045 ip6_asp_table_held = B_FALSE; 5046 } 5047 5048 /* Assert that it is not deleted yet. */ 5049 ASSERT(save_ire->ire_ptpn != NULL); 5050 IRB_REFRELE(save_ire->ire_bucket); 5051 ire_refrele(save_ire); 5052 5053 if (copy_mp != NULL) { 5054 /* 5055 * If we found a (no)resolver, we ignore any 5056 * trailing top priority IRE_CACHE in 5057 * further loops. This ensures that we do not 5058 * omit any (no)resolver despite the priority 5059 * in this call. 5060 * IRE_CACHE, if any, will be processed 5061 * by another thread entering ip_newroute(), 5062 * (on resolver response, for example). 5063 * We use this to force multiple parallel 5064 * resolution as soon as a packet needs to be 5065 * sent. The result is, after one packet 5066 * emission all reachable routes are generally 5067 * resolved. 5068 * Otherwise, complete resolution of MULTIRT 5069 * routes would require several emissions as 5070 * side effect. 5071 */ 5072 multirt_flags &= ~MULTIRT_CACHEGW; 5073 5074 /* 5075 * Search for the next unresolved multirt 5076 * route. 5077 */ 5078 copy_mp = NULL; 5079 save_ire = NULL; 5080 ire = NULL; 5081 /* re-enter the loop */ 5082 multirt_resolve_next = B_TRUE; 5083 continue; 5084 } 5085 5086 /* Don't need sire anymore */ 5087 if (sire != NULL) 5088 ire_refrele(sire); 5089 ill_refrele(dst_ill); 5090 ipif_refrele(src_ipif); 5091 return; 5092 5093 case IRE_IF_RESOLVER: 5094 /* 5095 * We can't build an IRE_CACHE yet, but at least we 5096 * found a resolver that can help. 5097 */ 5098 dst = *v6dstp; 5099 5100 /* 5101 * To be at this point in the code with a non-zero gw 5102 * means that dst is reachable through a gateway that 5103 * we have never resolved. By changing dst to the gw 5104 * addr we resolve the gateway first. When 5105 * ire_add_then_send() tries to put the IP dg to dst, 5106 * it will reenter ip_newroute() at which time we will 5107 * find the IRE_CACHE for the gw and create another 5108 * IRE_CACHE above (for dst itself). 5109 */ 5110 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5111 save_dst = dst; 5112 dst = v6gw; 5113 v6gw = ipv6_all_zeros; 5114 } 5115 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5116 /* 5117 * Ask the external resolver to do its thing. 5118 * Make an mblk chain in the following form: 5119 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5120 */ 5121 mblk_t *ire_mp; 5122 mblk_t *areq_mp; 5123 areq_t *areq; 5124 in6_addr_t *addrp; 5125 5126 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5127 if (ip6_asp_table_held) { 5128 ip6_asp_table_refrele(ipst); 5129 ip6_asp_table_held = B_FALSE; 5130 } 5131 ire = ire_create_mp_v6( 5132 &dst, /* dest address */ 5133 &ipv6_all_ones, /* mask */ 5134 &src_ipif->ipif_v6src_addr, 5135 /* source address */ 5136 &v6gw, /* gateway address */ 5137 NULL, /* no src nce */ 5138 dst_ill->ill_rq, /* recv-from queue */ 5139 dst_ill->ill_wq, /* send-to queue */ 5140 IRE_CACHE, 5141 src_ipif, 5142 &save_ire->ire_mask_v6, /* Parent mask */ 5143 0, 5144 save_ire->ire_ihandle, 5145 /* Interface handle */ 5146 0, /* flags if any */ 5147 &(save_ire->ire_uinfo), 5148 NULL, 5149 NULL, 5150 ipst); 5151 5152 ire_refrele(save_ire); 5153 if (ire == NULL) { 5154 ip1dbg(("ip_newroute_v6:" 5155 "ire is NULL\n")); 5156 break; 5157 } 5158 5159 if ((sire != NULL) && 5160 (sire->ire_flags & RTF_MULTIRT)) { 5161 /* 5162 * processing a copy of the packet to 5163 * send for further resolution loops 5164 */ 5165 copy_mp = copymsg(first_mp); 5166 if (copy_mp != NULL) 5167 MULTIRT_DEBUG_TAG(copy_mp); 5168 } 5169 ire->ire_marks |= ire_marks; 5170 ire_mp = ire->ire_mp; 5171 /* 5172 * Now create or find an nce for this interface. 5173 * The hw addr will need to to be set from 5174 * the reply to the AR_ENTRY_QUERY that 5175 * we're about to send. This will be done in 5176 * ire_add_v6(). 5177 */ 5178 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5179 switch (err) { 5180 case 0: 5181 /* 5182 * New cache entry created. 5183 * Break, then ask the external 5184 * resolver. 5185 */ 5186 break; 5187 case EINPROGRESS: 5188 /* 5189 * Resolution in progress; 5190 * packet has been queued by 5191 * ndp_resolver(). 5192 */ 5193 ire_delete(ire); 5194 ire = NULL; 5195 /* 5196 * Check if another multirt 5197 * route must be resolved. 5198 */ 5199 if (copy_mp != NULL) { 5200 /* 5201 * If we found a resolver, we 5202 * ignore any trailing top 5203 * priority IRE_CACHE in 5204 * further loops. The reason is 5205 * the same as for noresolver. 5206 */ 5207 multirt_flags &= 5208 ~MULTIRT_CACHEGW; 5209 /* 5210 * Search for the next 5211 * unresolved multirt route. 5212 */ 5213 first_mp = copy_mp; 5214 copy_mp = NULL; 5215 mp = first_mp; 5216 if (mp->b_datap->db_type == 5217 M_CTL) { 5218 mp = mp->b_cont; 5219 } 5220 ASSERT(sire != NULL); 5221 dst = save_dst; 5222 /* 5223 * re-enter the loop 5224 */ 5225 multirt_resolve_next = 5226 B_TRUE; 5227 continue; 5228 } 5229 5230 if (sire != NULL) 5231 ire_refrele(sire); 5232 ill_refrele(dst_ill); 5233 ipif_refrele(src_ipif); 5234 return; 5235 default: 5236 /* 5237 * Transient error; packet will be 5238 * freed. 5239 */ 5240 ire_delete(ire); 5241 ire = NULL; 5242 break; 5243 } 5244 if (err != 0) 5245 break; 5246 /* 5247 * Now set up the AR_ENTRY_QUERY and send it. 5248 */ 5249 areq_mp = ill_arp_alloc(dst_ill, 5250 (uchar_t *)&ipv6_areq_template, 5251 (caddr_t)&dst); 5252 if (areq_mp == NULL) { 5253 ip1dbg(("ip_newroute_v6:" 5254 "areq_mp is NULL\n")); 5255 freemsg(ire_mp); 5256 break; 5257 } 5258 areq = (areq_t *)areq_mp->b_rptr; 5259 addrp = (in6_addr_t *)((char *)areq + 5260 areq->areq_target_addr_offset); 5261 *addrp = dst; 5262 addrp = (in6_addr_t *)((char *)areq + 5263 areq->areq_sender_addr_offset); 5264 *addrp = src_ipif->ipif_v6src_addr; 5265 /* 5266 * link the chain, then send up to the resolver. 5267 */ 5268 linkb(areq_mp, ire_mp); 5269 linkb(areq_mp, mp); 5270 ip1dbg(("ip_newroute_v6:" 5271 "putnext to resolver\n")); 5272 putnext(dst_ill->ill_rq, areq_mp); 5273 /* 5274 * Check if another multirt route 5275 * must be resolved. 5276 */ 5277 ire = NULL; 5278 if (copy_mp != NULL) { 5279 /* 5280 * If we find a resolver, we ignore any 5281 * trailing top priority IRE_CACHE in 5282 * further loops. The reason is the 5283 * same as for noresolver. 5284 */ 5285 multirt_flags &= ~MULTIRT_CACHEGW; 5286 /* 5287 * Search for the next unresolved 5288 * multirt route. 5289 */ 5290 first_mp = copy_mp; 5291 copy_mp = NULL; 5292 mp = first_mp; 5293 if (mp->b_datap->db_type == M_CTL) { 5294 mp = mp->b_cont; 5295 } 5296 ASSERT(sire != NULL); 5297 dst = save_dst; 5298 /* 5299 * re-enter the loop 5300 */ 5301 multirt_resolve_next = B_TRUE; 5302 continue; 5303 } 5304 5305 if (sire != NULL) 5306 ire_refrele(sire); 5307 ill_refrele(dst_ill); 5308 ipif_refrele(src_ipif); 5309 return; 5310 } 5311 /* 5312 * Non-external resolver case. 5313 * 5314 * TSol note: Please see the note above the 5315 * IRE_IF_NORESOLVER case. 5316 */ 5317 ga.ga_af = AF_INET6; 5318 ga.ga_addr = dst; 5319 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5320 5321 ire = ire_create_v6( 5322 &dst, /* dest address */ 5323 &ipv6_all_ones, /* mask */ 5324 &src_ipif->ipif_v6src_addr, /* source address */ 5325 &v6gw, /* gateway address */ 5326 &save_ire->ire_max_frag, 5327 NULL, /* no src nce */ 5328 dst_ill->ill_rq, /* recv-from queue */ 5329 dst_ill->ill_wq, /* send-to queue */ 5330 IRE_CACHE, 5331 src_ipif, 5332 &save_ire->ire_mask_v6, /* Parent mask */ 5333 0, 5334 save_ire->ire_ihandle, /* Interface handle */ 5335 0, /* flags if any */ 5336 &(save_ire->ire_uinfo), 5337 NULL, 5338 gcgrp, 5339 ipst); 5340 5341 if (ire == NULL) { 5342 if (gcgrp != NULL) { 5343 GCGRP_REFRELE(gcgrp); 5344 gcgrp = NULL; 5345 } 5346 ire_refrele(save_ire); 5347 break; 5348 } 5349 5350 /* reference now held by IRE */ 5351 gcgrp = NULL; 5352 5353 if ((sire != NULL) && 5354 (sire->ire_flags & RTF_MULTIRT)) { 5355 copy_mp = copymsg(first_mp); 5356 if (copy_mp != NULL) 5357 MULTIRT_DEBUG_TAG(copy_mp); 5358 } 5359 5360 ire->ire_marks |= ire_marks; 5361 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5362 switch (err) { 5363 case 0: 5364 /* Prevent save_ire from getting deleted */ 5365 IRB_REFHOLD(save_ire->ire_bucket); 5366 /* Has it been removed already ? */ 5367 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5368 IRB_REFRELE(save_ire->ire_bucket); 5369 ire_refrele(save_ire); 5370 break; 5371 } 5372 5373 /* 5374 * We have a resolved cache entry, 5375 * add in the IRE. 5376 */ 5377 ire_add_then_send(q, ire, first_mp); 5378 if (ip6_asp_table_held) { 5379 ip6_asp_table_refrele(ipst); 5380 ip6_asp_table_held = B_FALSE; 5381 } 5382 5383 /* Assert that it is not deleted yet. */ 5384 ASSERT(save_ire->ire_ptpn != NULL); 5385 IRB_REFRELE(save_ire->ire_bucket); 5386 ire_refrele(save_ire); 5387 /* 5388 * Check if another multirt route 5389 * must be resolved. 5390 */ 5391 ire = NULL; 5392 if (copy_mp != NULL) { 5393 /* 5394 * If we find a resolver, we ignore any 5395 * trailing top priority IRE_CACHE in 5396 * further loops. The reason is the 5397 * same as for noresolver. 5398 */ 5399 multirt_flags &= ~MULTIRT_CACHEGW; 5400 /* 5401 * Search for the next unresolved 5402 * multirt route. 5403 */ 5404 first_mp = copy_mp; 5405 copy_mp = NULL; 5406 mp = first_mp; 5407 if (mp->b_datap->db_type == M_CTL) { 5408 mp = mp->b_cont; 5409 } 5410 ASSERT(sire != NULL); 5411 dst = save_dst; 5412 /* 5413 * re-enter the loop 5414 */ 5415 multirt_resolve_next = B_TRUE; 5416 continue; 5417 } 5418 5419 if (sire != NULL) 5420 ire_refrele(sire); 5421 ill_refrele(dst_ill); 5422 ipif_refrele(src_ipif); 5423 return; 5424 5425 case EINPROGRESS: 5426 /* 5427 * mp was consumed - presumably queued. 5428 * No need for ire, presumably resolution is 5429 * in progress, and ire will be added when the 5430 * address is resolved. 5431 */ 5432 if (ip6_asp_table_held) { 5433 ip6_asp_table_refrele(ipst); 5434 ip6_asp_table_held = B_FALSE; 5435 } 5436 ASSERT(ire->ire_nce == NULL); 5437 ire_delete(ire); 5438 ire_refrele(save_ire); 5439 /* 5440 * Check if another multirt route 5441 * must be resolved. 5442 */ 5443 ire = NULL; 5444 if (copy_mp != NULL) { 5445 /* 5446 * If we find a resolver, we ignore any 5447 * trailing top priority IRE_CACHE in 5448 * further loops. The reason is the 5449 * same as for noresolver. 5450 */ 5451 multirt_flags &= ~MULTIRT_CACHEGW; 5452 /* 5453 * Search for the next unresolved 5454 * multirt route. 5455 */ 5456 first_mp = copy_mp; 5457 copy_mp = NULL; 5458 mp = first_mp; 5459 if (mp->b_datap->db_type == M_CTL) { 5460 mp = mp->b_cont; 5461 } 5462 ASSERT(sire != NULL); 5463 dst = save_dst; 5464 /* 5465 * re-enter the loop 5466 */ 5467 multirt_resolve_next = B_TRUE; 5468 continue; 5469 } 5470 if (sire != NULL) 5471 ire_refrele(sire); 5472 ill_refrele(dst_ill); 5473 ipif_refrele(src_ipif); 5474 return; 5475 default: 5476 /* Some transient error */ 5477 ASSERT(ire->ire_nce == NULL); 5478 ire_refrele(save_ire); 5479 break; 5480 } 5481 break; 5482 default: 5483 break; 5484 } 5485 if (ip6_asp_table_held) { 5486 ip6_asp_table_refrele(ipst); 5487 ip6_asp_table_held = B_FALSE; 5488 } 5489 } while (multirt_resolve_next); 5490 5491 err_ret: 5492 ip1dbg(("ip_newroute_v6: dropped\n")); 5493 if (src_ipif != NULL) 5494 ipif_refrele(src_ipif); 5495 if (dst_ill != NULL) { 5496 need_rele = B_TRUE; 5497 ill = dst_ill; 5498 } 5499 if (ill != NULL) { 5500 if (mp->b_prev != NULL) { 5501 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5502 } else { 5503 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5504 } 5505 5506 if (need_rele) 5507 ill_refrele(ill); 5508 } else { 5509 if (mp->b_prev != NULL) { 5510 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5511 } else { 5512 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5513 } 5514 } 5515 /* Did this packet originate externally? */ 5516 if (mp->b_prev) { 5517 mp->b_next = NULL; 5518 mp->b_prev = NULL; 5519 } 5520 if (copy_mp != NULL) { 5521 MULTIRT_DEBUG_UNTAG(copy_mp); 5522 freemsg(copy_mp); 5523 } 5524 MULTIRT_DEBUG_UNTAG(first_mp); 5525 freemsg(first_mp); 5526 if (ire != NULL) 5527 ire_refrele(ire); 5528 if (sire != NULL) 5529 ire_refrele(sire); 5530 return; 5531 5532 icmp_err_ret: 5533 if (ip6_asp_table_held) 5534 ip6_asp_table_refrele(ipst); 5535 if (src_ipif != NULL) 5536 ipif_refrele(src_ipif); 5537 if (dst_ill != NULL) { 5538 need_rele = B_TRUE; 5539 ill = dst_ill; 5540 } 5541 ip1dbg(("ip_newroute_v6: no route\n")); 5542 if (sire != NULL) 5543 ire_refrele(sire); 5544 /* 5545 * We need to set sire to NULL to avoid double freeing if we 5546 * ever goto err_ret from below. 5547 */ 5548 sire = NULL; 5549 ip6h = (ip6_t *)mp->b_rptr; 5550 /* Skip ip6i_t header if present */ 5551 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5552 /* Make sure the IPv6 header is present */ 5553 if ((mp->b_wptr - (uchar_t *)ip6h) < 5554 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5555 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5556 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5557 goto err_ret; 5558 } 5559 } 5560 mp->b_rptr += sizeof (ip6i_t); 5561 ip6h = (ip6_t *)mp->b_rptr; 5562 } 5563 /* Did this packet originate externally? */ 5564 if (mp->b_prev) { 5565 if (ill != NULL) { 5566 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5567 } else { 5568 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5569 } 5570 mp->b_next = NULL; 5571 mp->b_prev = NULL; 5572 q = WR(q); 5573 } else { 5574 if (ill != NULL) { 5575 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5576 } else { 5577 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5578 } 5579 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5580 /* Failed */ 5581 if (copy_mp != NULL) { 5582 MULTIRT_DEBUG_UNTAG(copy_mp); 5583 freemsg(copy_mp); 5584 } 5585 MULTIRT_DEBUG_UNTAG(first_mp); 5586 freemsg(first_mp); 5587 if (ire != NULL) 5588 ire_refrele(ire); 5589 if (need_rele) 5590 ill_refrele(ill); 5591 return; 5592 } 5593 } 5594 5595 if (need_rele) 5596 ill_refrele(ill); 5597 5598 /* 5599 * At this point we will have ire only if RTF_BLACKHOLE 5600 * or RTF_REJECT flags are set on the IRE. It will not 5601 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5602 */ 5603 if (ire != NULL) { 5604 if (ire->ire_flags & RTF_BLACKHOLE) { 5605 ire_refrele(ire); 5606 if (copy_mp != NULL) { 5607 MULTIRT_DEBUG_UNTAG(copy_mp); 5608 freemsg(copy_mp); 5609 } 5610 MULTIRT_DEBUG_UNTAG(first_mp); 5611 freemsg(first_mp); 5612 return; 5613 } 5614 ire_refrele(ire); 5615 } 5616 if (ip_debug > 3) { 5617 /* ip2dbg */ 5618 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5619 AF_INET6, v6dstp); 5620 } 5621 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5622 B_FALSE, B_FALSE, zoneid, ipst); 5623 } 5624 5625 /* 5626 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5627 * we need to send out a packet to a destination address for which we do not 5628 * have specific routing information. It is only used for multicast packets. 5629 * 5630 * If unspec_src we allow creating an IRE with source address zero. 5631 * ire_send_v6() will delete it after the packet is sent. 5632 */ 5633 void 5634 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5635 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5636 { 5637 ire_t *ire = NULL; 5638 ipif_t *src_ipif = NULL; 5639 int err = 0; 5640 ill_t *dst_ill = NULL; 5641 ire_t *save_ire; 5642 ushort_t ire_marks = 0; 5643 ipsec_out_t *io; 5644 ill_t *attach_ill = NULL; 5645 ill_t *ill; 5646 ip6_t *ip6h; 5647 mblk_t *first_mp; 5648 boolean_t ip6i_present; 5649 ire_t *fire = NULL; 5650 mblk_t *copy_mp = NULL; 5651 boolean_t multirt_resolve_next; 5652 in6_addr_t *v6dstp = &v6dst; 5653 boolean_t ipif_held = B_FALSE; 5654 boolean_t ill_held = B_FALSE; 5655 boolean_t ip6_asp_table_held = B_FALSE; 5656 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5657 5658 /* 5659 * This loop is run only once in most cases. 5660 * We loop to resolve further routes only when the destination 5661 * can be reached through multiple RTF_MULTIRT-flagged ires. 5662 */ 5663 do { 5664 multirt_resolve_next = B_FALSE; 5665 if (dst_ill != NULL) { 5666 ill_refrele(dst_ill); 5667 dst_ill = NULL; 5668 } 5669 5670 if (src_ipif != NULL) { 5671 ipif_refrele(src_ipif); 5672 src_ipif = NULL; 5673 } 5674 ASSERT(ipif != NULL); 5675 ill = ipif->ipif_ill; 5676 5677 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5678 if (ip_debug > 2) { 5679 /* ip1dbg */ 5680 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5681 AF_INET6, v6dstp); 5682 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5683 ill->ill_name, ipif->ipif_isv6); 5684 } 5685 5686 first_mp = mp; 5687 if (mp->b_datap->db_type == M_CTL) { 5688 mp = mp->b_cont; 5689 io = (ipsec_out_t *)first_mp->b_rptr; 5690 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5691 } else { 5692 io = NULL; 5693 } 5694 5695 /* 5696 * If the interface is a pt-pt interface we look for an 5697 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5698 * local_address and the pt-pt destination address. 5699 * Otherwise we just match the local address. 5700 */ 5701 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5702 goto err_ret; 5703 } 5704 /* 5705 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5706 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5707 * as it could be NULL. 5708 * 5709 * This information can appear either in an ip6i_t or an 5710 * IPSEC_OUT message. 5711 */ 5712 ip6h = (ip6_t *)mp->b_rptr; 5713 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5714 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5715 if (!ip6i_present || 5716 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5717 attach_ill = ip_grab_attach_ill(ill, first_mp, 5718 (ip6i_present ? 5719 ((ip6i_t *)ip6h)->ip6i_ifindex : 5720 io->ipsec_out_ill_index), B_TRUE, ipst); 5721 /* Failure case frees things for us. */ 5722 if (attach_ill == NULL) 5723 return; 5724 5725 /* 5726 * Check if we need an ire that will not be 5727 * looked up by anybody else i.e. HIDDEN. 5728 */ 5729 if (ill_is_probeonly(attach_ill)) 5730 ire_marks = IRE_MARK_HIDDEN; 5731 } 5732 } 5733 5734 /* 5735 * We check if an IRE_OFFSUBNET for the addr that goes through 5736 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5737 * RTF_MULTIRT flags must be honored. 5738 */ 5739 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5740 ip2dbg(("ip_newroute_ipif_v6: " 5741 "ipif_lookup_multi_ire_v6(" 5742 "ipif %p, dst %08x) = fire %p\n", 5743 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5744 (void *)fire)); 5745 5746 /* 5747 * If the application specified the ill (ifindex), we still 5748 * load spread. Only if the packets needs to go out specifically 5749 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5750 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5751 * multirouting, then we don't try to use a different ill for 5752 * load spreading. 5753 */ 5754 if (attach_ill == NULL) { 5755 /* 5756 * If the interface belongs to an interface group, 5757 * make sure the next possible interface in the group 5758 * is used. This encourages load spreading among peers 5759 * in an interface group. 5760 * 5761 * Note: While we pick a dst_ill we are really only 5762 * interested in the ill for load spreading. The source 5763 * ipif is determined by source address selection below. 5764 */ 5765 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5766 dst_ill = ipif->ipif_ill; 5767 /* For uniformity do a refhold */ 5768 ill_refhold(dst_ill); 5769 } else { 5770 /* refheld by ip_newroute_get_dst_ill_v6 */ 5771 dst_ill = 5772 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5773 } 5774 if (dst_ill == NULL) { 5775 if (ip_debug > 2) { 5776 pr_addr_dbg("ip_newroute_ipif_v6: " 5777 "no dst ill for dst %s\n", 5778 AF_INET6, v6dstp); 5779 } 5780 goto err_ret; 5781 } 5782 } else { 5783 dst_ill = ipif->ipif_ill; 5784 /* 5785 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5786 * and IPV6_BOUND_PIF case. 5787 */ 5788 ASSERT(dst_ill == attach_ill); 5789 /* attach_ill is already refheld */ 5790 } 5791 /* 5792 * Pick a source address which matches the scope of the 5793 * destination address. 5794 * For RTF_SETSRC routes, the source address is imposed by the 5795 * parent ire (fire). 5796 */ 5797 ASSERT(src_ipif == NULL); 5798 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5799 /* 5800 * Check that the ipif matching the requested source 5801 * address still exists. 5802 */ 5803 src_ipif = 5804 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5805 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5806 } 5807 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5808 ip6_asp_table_held = B_TRUE; 5809 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5810 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5811 } 5812 5813 if (src_ipif == NULL) { 5814 if (!unspec_src) { 5815 if (ip_debug > 2) { 5816 /* ip1dbg */ 5817 pr_addr_dbg("ip_newroute_ipif_v6: " 5818 "no src for dst %s\n,", 5819 AF_INET6, v6dstp); 5820 printf(" through interface %s\n", 5821 dst_ill->ill_name); 5822 } 5823 goto err_ret; 5824 } 5825 src_ipif = ipif; 5826 ipif_refhold(src_ipif); 5827 } 5828 ire = ipif_to_ire_v6(ipif); 5829 if (ire == NULL) { 5830 if (ip_debug > 2) { 5831 /* ip1dbg */ 5832 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5833 AF_INET6, &ipif->ipif_v6lcl_addr); 5834 printf("ip_newroute_ipif_v6: " 5835 "if %s\n", dst_ill->ill_name); 5836 } 5837 goto err_ret; 5838 } 5839 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5840 goto err_ret; 5841 5842 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5843 5844 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5845 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5846 if (ip_debug > 2) { 5847 /* ip1dbg */ 5848 pr_addr_dbg(" address %s\n", 5849 AF_INET6, &ire->ire_src_addr_v6); 5850 } 5851 save_ire = ire; 5852 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5853 (void *)ire, (void *)ipif)); 5854 5855 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5856 /* 5857 * an IRE_OFFSUBET was looked up 5858 * on that interface. 5859 * this ire has RTF_MULTIRT flag, 5860 * so the resolution loop 5861 * will be re-entered to resolve 5862 * additional routes on other 5863 * interfaces. For that purpose, 5864 * a copy of the packet is 5865 * made at this point. 5866 */ 5867 fire->ire_last_used_time = lbolt; 5868 copy_mp = copymsg(first_mp); 5869 if (copy_mp) { 5870 MULTIRT_DEBUG_TAG(copy_mp); 5871 } 5872 } 5873 5874 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5875 switch (ire->ire_type) { 5876 case IRE_IF_NORESOLVER: { 5877 /* 5878 * We have what we need to build an IRE_CACHE. 5879 * 5880 * handle the Gated case, where we create 5881 * a NORESOLVER route for loopback. 5882 */ 5883 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5884 break; 5885 /* 5886 * The newly created ire will inherit the flags of the 5887 * parent ire, if any. 5888 */ 5889 ire = ire_create_v6( 5890 v6dstp, /* dest address */ 5891 &ipv6_all_ones, /* mask */ 5892 &src_ipif->ipif_v6src_addr, /* source address */ 5893 NULL, /* gateway address */ 5894 &save_ire->ire_max_frag, 5895 NULL, /* no src nce */ 5896 dst_ill->ill_rq, /* recv-from queue */ 5897 dst_ill->ill_wq, /* send-to queue */ 5898 IRE_CACHE, 5899 src_ipif, 5900 NULL, 5901 (fire != NULL) ? /* Parent handle */ 5902 fire->ire_phandle : 0, 5903 save_ire->ire_ihandle, /* Interface handle */ 5904 (fire != NULL) ? 5905 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5906 0, 5907 &ire_uinfo_null, 5908 NULL, 5909 NULL, 5910 ipst); 5911 5912 if (ire == NULL) { 5913 ire_refrele(save_ire); 5914 break; 5915 } 5916 5917 ire->ire_marks |= ire_marks; 5918 5919 err = ndp_noresolver(dst_ill, v6dstp); 5920 if (err != 0) { 5921 ire_refrele(save_ire); 5922 break; 5923 } 5924 5925 /* Prevent save_ire from getting deleted */ 5926 IRB_REFHOLD(save_ire->ire_bucket); 5927 /* Has it been removed already ? */ 5928 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5929 IRB_REFRELE(save_ire->ire_bucket); 5930 ire_refrele(save_ire); 5931 break; 5932 } 5933 5934 ire_add_then_send(q, ire, first_mp); 5935 if (ip6_asp_table_held) { 5936 ip6_asp_table_refrele(ipst); 5937 ip6_asp_table_held = B_FALSE; 5938 } 5939 5940 /* Assert that it is not deleted yet. */ 5941 ASSERT(save_ire->ire_ptpn != NULL); 5942 IRB_REFRELE(save_ire->ire_bucket); 5943 ire_refrele(save_ire); 5944 if (fire != NULL) { 5945 ire_refrele(fire); 5946 fire = NULL; 5947 } 5948 5949 /* 5950 * The resolution loop is re-entered if we 5951 * actually are in a multirouting case. 5952 */ 5953 if (copy_mp != NULL) { 5954 boolean_t need_resolve = 5955 ire_multirt_need_resolve_v6(v6dstp, 5956 MBLK_GETLABEL(copy_mp), ipst); 5957 if (!need_resolve) { 5958 MULTIRT_DEBUG_UNTAG(copy_mp); 5959 freemsg(copy_mp); 5960 copy_mp = NULL; 5961 } else { 5962 /* 5963 * ipif_lookup_group_v6() calls 5964 * ire_lookup_multi_v6() that uses 5965 * ire_ftable_lookup_v6() to find 5966 * an IRE_INTERFACE for the group. 5967 * In the multirt case, 5968 * ire_lookup_multi_v6() then invokes 5969 * ire_multirt_lookup_v6() to find 5970 * the next resolvable ire. 5971 * As a result, we obtain a new 5972 * interface, derived from the 5973 * next ire. 5974 */ 5975 if (ipif_held) { 5976 ipif_refrele(ipif); 5977 ipif_held = B_FALSE; 5978 } 5979 ipif = ipif_lookup_group_v6(v6dstp, 5980 zoneid, ipst); 5981 ip2dbg(("ip_newroute_ipif: " 5982 "multirt dst %08x, ipif %p\n", 5983 ntohl(V4_PART_OF_V6((*v6dstp))), 5984 (void *)ipif)); 5985 if (ipif != NULL) { 5986 ipif_held = B_TRUE; 5987 mp = copy_mp; 5988 copy_mp = NULL; 5989 multirt_resolve_next = 5990 B_TRUE; 5991 continue; 5992 } else { 5993 freemsg(copy_mp); 5994 } 5995 } 5996 } 5997 ill_refrele(dst_ill); 5998 if (ipif_held) { 5999 ipif_refrele(ipif); 6000 ipif_held = B_FALSE; 6001 } 6002 if (src_ipif != NULL) 6003 ipif_refrele(src_ipif); 6004 return; 6005 } 6006 case IRE_IF_RESOLVER: { 6007 6008 ASSERT(dst_ill->ill_isv6); 6009 6010 /* 6011 * We obtain a partial IRE_CACHE which we will pass 6012 * along with the resolver query. When the response 6013 * comes back it will be there ready for us to add. 6014 */ 6015 /* 6016 * the newly created ire will inherit the flags of the 6017 * parent ire, if any. 6018 */ 6019 ire = ire_create_v6( 6020 v6dstp, /* dest address */ 6021 &ipv6_all_ones, /* mask */ 6022 &src_ipif->ipif_v6src_addr, /* source address */ 6023 NULL, /* gateway address */ 6024 &save_ire->ire_max_frag, 6025 NULL, /* src nce */ 6026 dst_ill->ill_rq, /* recv-from queue */ 6027 dst_ill->ill_wq, /* send-to queue */ 6028 IRE_CACHE, 6029 src_ipif, 6030 NULL, 6031 (fire != NULL) ? /* Parent handle */ 6032 fire->ire_phandle : 0, 6033 save_ire->ire_ihandle, /* Interface handle */ 6034 (fire != NULL) ? 6035 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6036 0, 6037 &ire_uinfo_null, 6038 NULL, 6039 NULL, 6040 ipst); 6041 6042 if (ire == NULL) { 6043 ire_refrele(save_ire); 6044 break; 6045 } 6046 6047 ire->ire_marks |= ire_marks; 6048 6049 /* Resolve and add ire to the ctable */ 6050 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6051 switch (err) { 6052 case 0: 6053 /* Prevent save_ire from getting deleted */ 6054 IRB_REFHOLD(save_ire->ire_bucket); 6055 /* Has it been removed already ? */ 6056 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6057 IRB_REFRELE(save_ire->ire_bucket); 6058 ire_refrele(save_ire); 6059 break; 6060 } 6061 /* 6062 * We have a resolved cache entry, 6063 * add in the IRE. 6064 */ 6065 ire_add_then_send(q, ire, first_mp); 6066 if (ip6_asp_table_held) { 6067 ip6_asp_table_refrele(ipst); 6068 ip6_asp_table_held = B_FALSE; 6069 } 6070 6071 /* Assert that it is not deleted yet. */ 6072 ASSERT(save_ire->ire_ptpn != NULL); 6073 IRB_REFRELE(save_ire->ire_bucket); 6074 ire_refrele(save_ire); 6075 if (fire != NULL) { 6076 ire_refrele(fire); 6077 fire = NULL; 6078 } 6079 6080 /* 6081 * The resolution loop is re-entered if we 6082 * actually are in a multirouting case. 6083 */ 6084 if (copy_mp != NULL) { 6085 boolean_t need_resolve = 6086 ire_multirt_need_resolve_v6(v6dstp, 6087 MBLK_GETLABEL(copy_mp), ipst); 6088 if (!need_resolve) { 6089 MULTIRT_DEBUG_UNTAG(copy_mp); 6090 freemsg(copy_mp); 6091 copy_mp = NULL; 6092 } else { 6093 /* 6094 * ipif_lookup_group_v6() calls 6095 * ire_lookup_multi_v6() that 6096 * uses ire_ftable_lookup_v6() 6097 * to find an IRE_INTERFACE for 6098 * the group. In the multirt 6099 * case, ire_lookup_multi_v6() 6100 * then invokes 6101 * ire_multirt_lookup_v6() to 6102 * find the next resolvable ire. 6103 * As a result, we obtain a new 6104 * interface, derived from the 6105 * next ire. 6106 */ 6107 if (ipif_held) { 6108 ipif_refrele(ipif); 6109 ipif_held = B_FALSE; 6110 } 6111 ipif = ipif_lookup_group_v6( 6112 v6dstp, zoneid, ipst); 6113 ip2dbg(("ip_newroute_ipif: " 6114 "multirt dst %08x, " 6115 "ipif %p\n", 6116 ntohl(V4_PART_OF_V6( 6117 (*v6dstp))), 6118 (void *)ipif)); 6119 if (ipif != NULL) { 6120 ipif_held = B_TRUE; 6121 mp = copy_mp; 6122 copy_mp = NULL; 6123 multirt_resolve_next = 6124 B_TRUE; 6125 continue; 6126 } else { 6127 freemsg(copy_mp); 6128 } 6129 } 6130 } 6131 ill_refrele(dst_ill); 6132 if (ipif_held) { 6133 ipif_refrele(ipif); 6134 ipif_held = B_FALSE; 6135 } 6136 if (src_ipif != NULL) 6137 ipif_refrele(src_ipif); 6138 return; 6139 6140 case EINPROGRESS: 6141 /* 6142 * mp was consumed - presumably queued. 6143 * No need for ire, presumably resolution is 6144 * in progress, and ire will be added when the 6145 * address is resolved. 6146 */ 6147 if (ip6_asp_table_held) { 6148 ip6_asp_table_refrele(ipst); 6149 ip6_asp_table_held = B_FALSE; 6150 } 6151 ire_delete(ire); 6152 ire_refrele(save_ire); 6153 if (fire != NULL) { 6154 ire_refrele(fire); 6155 fire = NULL; 6156 } 6157 6158 /* 6159 * The resolution loop is re-entered if we 6160 * actually are in a multirouting case. 6161 */ 6162 if (copy_mp != NULL) { 6163 boolean_t need_resolve = 6164 ire_multirt_need_resolve_v6(v6dstp, 6165 MBLK_GETLABEL(copy_mp), ipst); 6166 if (!need_resolve) { 6167 MULTIRT_DEBUG_UNTAG(copy_mp); 6168 freemsg(copy_mp); 6169 copy_mp = NULL; 6170 } else { 6171 /* 6172 * ipif_lookup_group_v6() calls 6173 * ire_lookup_multi_v6() that 6174 * uses ire_ftable_lookup_v6() 6175 * to find an IRE_INTERFACE for 6176 * the group. In the multirt 6177 * case, ire_lookup_multi_v6() 6178 * then invokes 6179 * ire_multirt_lookup_v6() to 6180 * find the next resolvable ire. 6181 * As a result, we obtain a new 6182 * interface, derived from the 6183 * next ire. 6184 */ 6185 if (ipif_held) { 6186 ipif_refrele(ipif); 6187 ipif_held = B_FALSE; 6188 } 6189 ipif = ipif_lookup_group_v6( 6190 v6dstp, zoneid, ipst); 6191 ip2dbg(("ip_newroute_ipif: " 6192 "multirt dst %08x, " 6193 "ipif %p\n", 6194 ntohl(V4_PART_OF_V6( 6195 (*v6dstp))), 6196 (void *)ipif)); 6197 if (ipif != NULL) { 6198 ipif_held = B_TRUE; 6199 mp = copy_mp; 6200 copy_mp = NULL; 6201 multirt_resolve_next = 6202 B_TRUE; 6203 continue; 6204 } else { 6205 freemsg(copy_mp); 6206 } 6207 } 6208 } 6209 ill_refrele(dst_ill); 6210 if (ipif_held) { 6211 ipif_refrele(ipif); 6212 ipif_held = B_FALSE; 6213 } 6214 if (src_ipif != NULL) 6215 ipif_refrele(src_ipif); 6216 return; 6217 default: 6218 /* Some transient error */ 6219 ire_refrele(save_ire); 6220 break; 6221 } 6222 break; 6223 } 6224 default: 6225 break; 6226 } 6227 if (ip6_asp_table_held) { 6228 ip6_asp_table_refrele(ipst); 6229 ip6_asp_table_held = B_FALSE; 6230 } 6231 } while (multirt_resolve_next); 6232 6233 err_ret: 6234 if (ip6_asp_table_held) 6235 ip6_asp_table_refrele(ipst); 6236 if (ire != NULL) 6237 ire_refrele(ire); 6238 if (fire != NULL) 6239 ire_refrele(fire); 6240 if (ipif != NULL && ipif_held) 6241 ipif_refrele(ipif); 6242 if (src_ipif != NULL) 6243 ipif_refrele(src_ipif); 6244 /* Multicast - no point in trying to generate ICMP error */ 6245 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6246 if (dst_ill != NULL) { 6247 ill = dst_ill; 6248 ill_held = B_TRUE; 6249 } 6250 if (mp->b_prev || mp->b_next) { 6251 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6252 } else { 6253 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6254 } 6255 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6256 mp->b_next = NULL; 6257 mp->b_prev = NULL; 6258 freemsg(first_mp); 6259 if (ill_held) 6260 ill_refrele(ill); 6261 } 6262 6263 /* 6264 * Parse and process any hop-by-hop or destination options. 6265 * 6266 * Assumes that q is an ill read queue so that ICMP errors for link-local 6267 * destinations are sent out the correct interface. 6268 * 6269 * Returns -1 if there was an error and mp has been consumed. 6270 * Returns 0 if no special action is needed. 6271 * Returns 1 if the packet contained a router alert option for this node 6272 * which is verified to be "interesting/known" for our implementation. 6273 * 6274 * XXX Note: In future as more hbh or dest options are defined, 6275 * it may be better to have different routines for hbh and dest 6276 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6277 * may have same value in different namespaces. Or is it same namespace ?? 6278 * Current code checks for each opt_type (other than pads) if it is in 6279 * the expected nexthdr (hbh or dest) 6280 */ 6281 static int 6282 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6283 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6284 { 6285 uint8_t opt_type; 6286 uint_t optused; 6287 int ret = 0; 6288 mblk_t *first_mp; 6289 const char *errtype; 6290 zoneid_t zoneid; 6291 ill_t *ill = q->q_ptr; 6292 ipif_t *ipif; 6293 6294 first_mp = mp; 6295 if (mp->b_datap->db_type == M_CTL) { 6296 mp = mp->b_cont; 6297 } 6298 6299 while (optlen != 0) { 6300 opt_type = *optptr; 6301 if (opt_type == IP6OPT_PAD1) { 6302 optused = 1; 6303 } else { 6304 if (optlen < 2) 6305 goto bad_opt; 6306 errtype = "malformed"; 6307 if (opt_type == ip6opt_ls) { 6308 optused = 2 + optptr[1]; 6309 if (optused > optlen) 6310 goto bad_opt; 6311 } else switch (opt_type) { 6312 case IP6OPT_PADN: 6313 /* 6314 * Note:We don't verify that (N-2) pad octets 6315 * are zero as required by spec. Adhere to 6316 * "be liberal in what you accept..." part of 6317 * implementation philosophy (RFC791,RFC1122) 6318 */ 6319 optused = 2 + optptr[1]; 6320 if (optused > optlen) 6321 goto bad_opt; 6322 break; 6323 6324 case IP6OPT_JUMBO: 6325 if (hdr_type != IPPROTO_HOPOPTS) 6326 goto opt_error; 6327 goto opt_error; /* XXX Not implemented! */ 6328 6329 case IP6OPT_ROUTER_ALERT: { 6330 struct ip6_opt_router *or; 6331 6332 if (hdr_type != IPPROTO_HOPOPTS) 6333 goto opt_error; 6334 optused = 2 + optptr[1]; 6335 if (optused > optlen) 6336 goto bad_opt; 6337 or = (struct ip6_opt_router *)optptr; 6338 /* Check total length and alignment */ 6339 if (optused != sizeof (*or) || 6340 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6341 goto opt_error; 6342 /* Check value */ 6343 switch (*((uint16_t *)or->ip6or_value)) { 6344 case IP6_ALERT_MLD: 6345 case IP6_ALERT_RSVP: 6346 ret = 1; 6347 } 6348 break; 6349 } 6350 case IP6OPT_HOME_ADDRESS: { 6351 /* 6352 * Minimal support for the home address option 6353 * (which is required by all IPv6 nodes). 6354 * Implement by just swapping the home address 6355 * and source address. 6356 * XXX Note: this has IPsec implications since 6357 * AH needs to take this into account. 6358 * Also, when IPsec is used we need to ensure 6359 * that this is only processed once 6360 * in the received packet (to avoid swapping 6361 * back and forth). 6362 * NOTE:This option processing is considered 6363 * to be unsafe and prone to a denial of 6364 * service attack. 6365 * The current processing is not safe even with 6366 * IPsec secured IP packets. Since the home 6367 * address option processing requirement still 6368 * is in the IETF draft and in the process of 6369 * being redefined for its usage, it has been 6370 * decided to turn off the option by default. 6371 * If this section of code needs to be executed, 6372 * ndd variable ip6_ignore_home_address_opt 6373 * should be set to 0 at the user's own risk. 6374 */ 6375 struct ip6_opt_home_address *oh; 6376 in6_addr_t tmp; 6377 6378 if (ipst->ips_ipv6_ignore_home_address_opt) 6379 goto opt_error; 6380 6381 if (hdr_type != IPPROTO_DSTOPTS) 6382 goto opt_error; 6383 optused = 2 + optptr[1]; 6384 if (optused > optlen) 6385 goto bad_opt; 6386 6387 /* 6388 * We did this dest. opt the first time 6389 * around (i.e. before AH processing). 6390 * If we've done AH... stop now. 6391 */ 6392 if (first_mp != mp) { 6393 ipsec_in_t *ii; 6394 6395 ii = (ipsec_in_t *)first_mp->b_rptr; 6396 if (ii->ipsec_in_ah_sa != NULL) 6397 break; 6398 } 6399 6400 oh = (struct ip6_opt_home_address *)optptr; 6401 /* Check total length and alignment */ 6402 if (optused < sizeof (*oh) || 6403 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6404 goto opt_error; 6405 /* Swap ip6_src and the home address */ 6406 tmp = ip6h->ip6_src; 6407 /* XXX Note: only 8 byte alignment option */ 6408 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6409 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6410 break; 6411 } 6412 6413 case IP6OPT_TUNNEL_LIMIT: 6414 if (hdr_type != IPPROTO_DSTOPTS) { 6415 goto opt_error; 6416 } 6417 optused = 2 + optptr[1]; 6418 if (optused > optlen) { 6419 goto bad_opt; 6420 } 6421 if (optused != 3) { 6422 goto opt_error; 6423 } 6424 break; 6425 6426 default: 6427 errtype = "unknown"; 6428 /* FALLTHROUGH */ 6429 opt_error: 6430 /* Determine which zone should send error */ 6431 zoneid = ipif_lookup_addr_zoneid_v6( 6432 &ip6h->ip6_dst, ill, ipst); 6433 switch (IP6OPT_TYPE(opt_type)) { 6434 case IP6OPT_TYPE_SKIP: 6435 optused = 2 + optptr[1]; 6436 if (optused > optlen) 6437 goto bad_opt; 6438 ip1dbg(("ip_process_options_v6: %s " 6439 "opt 0x%x skipped\n", 6440 errtype, opt_type)); 6441 break; 6442 case IP6OPT_TYPE_DISCARD: 6443 ip1dbg(("ip_process_options_v6: %s " 6444 "opt 0x%x; packet dropped\n", 6445 errtype, opt_type)); 6446 freemsg(first_mp); 6447 return (-1); 6448 case IP6OPT_TYPE_ICMP: 6449 if (zoneid == ALL_ZONES) { 6450 freemsg(first_mp); 6451 return (-1); 6452 } 6453 icmp_param_problem_v6(WR(q), first_mp, 6454 ICMP6_PARAMPROB_OPTION, 6455 (uint32_t)(optptr - 6456 (uint8_t *)ip6h), 6457 B_FALSE, B_FALSE, zoneid, ipst); 6458 return (-1); 6459 case IP6OPT_TYPE_FORCEICMP: 6460 /* 6461 * If we don't have a zone and the dst 6462 * addr is multicast, then pick a zone 6463 * based on the inbound interface. 6464 */ 6465 if (zoneid == ALL_ZONES && 6466 IN6_IS_ADDR_MULTICAST( 6467 &ip6h->ip6_dst)) { 6468 ipif = ipif_select_source_v6( 6469 ill, &ip6h->ip6_src, 6470 RESTRICT_TO_GROUP, 6471 IPV6_PREFER_SRC_DEFAULT, 6472 ALL_ZONES); 6473 if (ipif != NULL) { 6474 zoneid = 6475 ipif->ipif_zoneid; 6476 ipif_refrele(ipif); 6477 } 6478 } 6479 if (zoneid == ALL_ZONES) { 6480 freemsg(first_mp); 6481 return (-1); 6482 } 6483 icmp_param_problem_v6(WR(q), first_mp, 6484 ICMP6_PARAMPROB_OPTION, 6485 (uint32_t)(optptr - 6486 (uint8_t *)ip6h), 6487 B_FALSE, B_TRUE, zoneid, ipst); 6488 return (-1); 6489 default: 6490 ASSERT(0); 6491 } 6492 } 6493 } 6494 optlen -= optused; 6495 optptr += optused; 6496 } 6497 return (ret); 6498 6499 bad_opt: 6500 /* Determine which zone should send error */ 6501 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6502 if (zoneid == ALL_ZONES) { 6503 freemsg(first_mp); 6504 } else { 6505 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6506 (uint32_t)(optptr - (uint8_t *)ip6h), 6507 B_FALSE, B_FALSE, zoneid, ipst); 6508 } 6509 return (-1); 6510 } 6511 6512 /* 6513 * Process a routing header that is not yet empty. 6514 * Only handles type 0 routing headers. 6515 */ 6516 static void 6517 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6518 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6519 { 6520 ip6_rthdr0_t *rthdr; 6521 uint_t ehdrlen; 6522 uint_t numaddr; 6523 in6_addr_t *addrptr; 6524 in6_addr_t tmp; 6525 ip_stack_t *ipst = ill->ill_ipst; 6526 6527 ASSERT(rth->ip6r_segleft != 0); 6528 6529 if (!ipst->ips_ipv6_forward_src_routed) { 6530 /* XXX Check for source routed out same interface? */ 6531 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6532 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6533 freemsg(hada_mp); 6534 freemsg(mp); 6535 return; 6536 } 6537 6538 if (rth->ip6r_type != 0) { 6539 if (hada_mp != NULL) 6540 goto hada_drop; 6541 /* Sent by forwarding path, and router is global zone */ 6542 icmp_param_problem_v6(WR(q), mp, 6543 ICMP6_PARAMPROB_HEADER, 6544 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6545 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6546 return; 6547 } 6548 rthdr = (ip6_rthdr0_t *)rth; 6549 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6550 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6551 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6552 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6553 if (rthdr->ip6r0_len & 0x1) { 6554 /* An odd length is impossible */ 6555 if (hada_mp != NULL) 6556 goto hada_drop; 6557 /* Sent by forwarding path, and router is global zone */ 6558 icmp_param_problem_v6(WR(q), mp, 6559 ICMP6_PARAMPROB_HEADER, 6560 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6561 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6562 return; 6563 } 6564 numaddr = rthdr->ip6r0_len / 2; 6565 if (rthdr->ip6r0_segleft > numaddr) { 6566 /* segleft exceeds number of addresses in routing header */ 6567 if (hada_mp != NULL) 6568 goto hada_drop; 6569 /* Sent by forwarding path, and router is global zone */ 6570 icmp_param_problem_v6(WR(q), mp, 6571 ICMP6_PARAMPROB_HEADER, 6572 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6573 (uchar_t *)ip6h), 6574 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6575 return; 6576 } 6577 addrptr += (numaddr - rthdr->ip6r0_segleft); 6578 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6579 IN6_IS_ADDR_MULTICAST(addrptr)) { 6580 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6581 freemsg(hada_mp); 6582 freemsg(mp); 6583 return; 6584 } 6585 /* Swap */ 6586 tmp = *addrptr; 6587 *addrptr = ip6h->ip6_dst; 6588 ip6h->ip6_dst = tmp; 6589 rthdr->ip6r0_segleft--; 6590 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6591 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6592 if (hada_mp != NULL) 6593 goto hada_drop; 6594 /* Sent by forwarding path, and router is global zone */ 6595 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6596 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6597 return; 6598 } 6599 if (ip_check_v6_mblk(mp, ill) == IP6_MBLK_OK) { 6600 ip6h = (ip6_t *)mp->b_rptr; 6601 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6602 } else { 6603 freemsg(mp); 6604 } 6605 return; 6606 hada_drop: 6607 /* IPsec kstats: bean counter? */ 6608 freemsg(hada_mp); 6609 freemsg(mp); 6610 } 6611 6612 /* 6613 * Read side put procedure for IPv6 module. 6614 */ 6615 void 6616 ip_rput_v6(queue_t *q, mblk_t *mp) 6617 { 6618 mblk_t *first_mp; 6619 mblk_t *hada_mp = NULL; 6620 ip6_t *ip6h; 6621 boolean_t ll_multicast = B_FALSE; 6622 boolean_t mctl_present = B_FALSE; 6623 ill_t *ill; 6624 struct iocblk *iocp; 6625 uint_t flags = 0; 6626 mblk_t *dl_mp; 6627 ip_stack_t *ipst; 6628 int check; 6629 6630 ill = (ill_t *)q->q_ptr; 6631 ipst = ill->ill_ipst; 6632 if (ill->ill_state_flags & ILL_CONDEMNED) { 6633 union DL_primitives *dl; 6634 6635 dl = (union DL_primitives *)mp->b_rptr; 6636 /* 6637 * Things are opening or closing - only accept DLPI 6638 * ack messages. If the stream is closing and ip_wsrv 6639 * has completed, ip_close is out of the qwait, but has 6640 * not yet completed qprocsoff. Don't proceed any further 6641 * because the ill has been cleaned up and things hanging 6642 * off the ill have been freed. 6643 */ 6644 if ((mp->b_datap->db_type != M_PCPROTO) || 6645 (dl->dl_primitive == DL_UNITDATA_IND)) { 6646 inet_freemsg(mp); 6647 return; 6648 } 6649 } 6650 6651 dl_mp = NULL; 6652 switch (mp->b_datap->db_type) { 6653 case M_DATA: { 6654 int hlen; 6655 uchar_t *ucp; 6656 struct ether_header *eh; 6657 dl_unitdata_ind_t *dui; 6658 6659 /* 6660 * This is a work-around for CR 6451644, a bug in Nemo. It 6661 * should be removed when that problem is fixed. 6662 */ 6663 if (ill->ill_mactype == DL_ETHER && 6664 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6665 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6666 ucp[-2] == (IP6_DL_SAP >> 8)) { 6667 if (hlen >= sizeof (struct ether_vlan_header) && 6668 ucp[-5] == 0 && ucp[-6] == 0x81) 6669 ucp -= sizeof (struct ether_vlan_header); 6670 else 6671 ucp -= sizeof (struct ether_header); 6672 /* 6673 * If it's a group address, then fabricate a 6674 * DL_UNITDATA_IND message. 6675 */ 6676 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6677 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6678 BPRI_HI)) != NULL) { 6679 eh = (struct ether_header *)ucp; 6680 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6681 DB_TYPE(dl_mp) = M_PROTO; 6682 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6683 dui->dl_primitive = DL_UNITDATA_IND; 6684 dui->dl_dest_addr_length = 8; 6685 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6686 dui->dl_src_addr_length = 8; 6687 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6688 8; 6689 dui->dl_group_address = 1; 6690 ucp = (uchar_t *)(dui + 1); 6691 if (ill->ill_sap_length > 0) 6692 ucp += ill->ill_sap_length; 6693 bcopy(&eh->ether_dhost, ucp, 6); 6694 bcopy(&eh->ether_shost, ucp + 8, 6); 6695 ucp = (uchar_t *)(dui + 1); 6696 if (ill->ill_sap_length < 0) 6697 ucp += 8 + ill->ill_sap_length; 6698 bcopy(&eh->ether_type, ucp, 2); 6699 bcopy(&eh->ether_type, ucp + 8, 2); 6700 } 6701 } 6702 break; 6703 } 6704 6705 case M_PROTO: 6706 case M_PCPROTO: 6707 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6708 DL_UNITDATA_IND) { 6709 /* Go handle anything other than data elsewhere. */ 6710 ip_rput_dlpi(q, mp); 6711 return; 6712 } 6713 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6714 6715 /* Save the DLPI header. */ 6716 dl_mp = mp; 6717 mp = mp->b_cont; 6718 dl_mp->b_cont = NULL; 6719 break; 6720 case M_BREAK: 6721 panic("ip_rput_v6: got an M_BREAK"); 6722 /*NOTREACHED*/ 6723 case M_IOCACK: 6724 iocp = (struct iocblk *)mp->b_rptr; 6725 switch (iocp->ioc_cmd) { 6726 case DL_IOC_HDR_INFO: 6727 ill = (ill_t *)q->q_ptr; 6728 ill_fastpath_ack(ill, mp); 6729 return; 6730 6731 case SIOCGTUNPARAM: 6732 case OSIOCGTUNPARAM: 6733 ip_rput_other(NULL, q, mp, NULL); 6734 return; 6735 6736 case SIOCSTUNPARAM: 6737 case OSIOCSTUNPARAM: 6738 /* Go through qwriter */ 6739 break; 6740 default: 6741 putnext(q, mp); 6742 return; 6743 } 6744 /* FALLTHRU */ 6745 case M_ERROR: 6746 case M_HANGUP: 6747 mutex_enter(&ill->ill_lock); 6748 if (ill->ill_state_flags & ILL_CONDEMNED) { 6749 mutex_exit(&ill->ill_lock); 6750 freemsg(mp); 6751 return; 6752 } 6753 ill_refhold_locked(ill); 6754 mutex_exit(&ill->ill_lock); 6755 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6756 return; 6757 case M_CTL: 6758 if ((MBLKL(mp) > sizeof (int)) && 6759 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6760 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6761 mctl_present = B_TRUE; 6762 break; 6763 } 6764 putnext(q, mp); 6765 return; 6766 case M_IOCNAK: 6767 iocp = (struct iocblk *)mp->b_rptr; 6768 switch (iocp->ioc_cmd) { 6769 case DL_IOC_HDR_INFO: 6770 case SIOCGTUNPARAM: 6771 case OSIOCGTUNPARAM: 6772 ip_rput_other(NULL, q, mp, NULL); 6773 return; 6774 6775 case SIOCSTUNPARAM: 6776 case OSIOCSTUNPARAM: 6777 mutex_enter(&ill->ill_lock); 6778 if (ill->ill_state_flags & ILL_CONDEMNED) { 6779 mutex_exit(&ill->ill_lock); 6780 freemsg(mp); 6781 return; 6782 } 6783 ill_refhold_locked(ill); 6784 mutex_exit(&ill->ill_lock); 6785 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6786 return; 6787 default: 6788 break; 6789 } 6790 /* FALLTHRU */ 6791 default: 6792 putnext(q, mp); 6793 return; 6794 } 6795 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6796 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6797 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6798 /* 6799 * if db_ref > 1 then copymsg and free original. Packet may be 6800 * changed and do not want other entity who has a reference to this 6801 * message to trip over the changes. This is a blind change because 6802 * trying to catch all places that might change packet is too 6803 * difficult (since it may be a module above this one). 6804 */ 6805 if (mp->b_datap->db_ref > 1) { 6806 mblk_t *mp1; 6807 6808 mp1 = copymsg(mp); 6809 freemsg(mp); 6810 if (mp1 == NULL) { 6811 first_mp = NULL; 6812 goto discard; 6813 } 6814 mp = mp1; 6815 } 6816 first_mp = mp; 6817 if (mctl_present) { 6818 hada_mp = first_mp; 6819 mp = first_mp->b_cont; 6820 } 6821 6822 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6823 freemsg(mp); 6824 return; 6825 } 6826 6827 ip6h = (ip6_t *)mp->b_rptr; 6828 6829 /* 6830 * ip:::receive must see ipv6 packets with a full header, 6831 * and so is placed after the IP6_MBLK_HDR_ERR check. 6832 */ 6833 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6834 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6835 int, 0); 6836 6837 if (check != IP6_MBLK_OK) { 6838 freemsg(mp); 6839 return; 6840 } 6841 6842 DTRACE_PROBE4(ip6__physical__in__start, 6843 ill_t *, ill, ill_t *, NULL, 6844 ip6_t *, ip6h, mblk_t *, first_mp); 6845 6846 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6847 ipst->ips_ipv6firewall_physical_in, 6848 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6849 6850 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6851 6852 if (first_mp == NULL) 6853 return; 6854 6855 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6856 IPV6_DEFAULT_VERS_AND_FLOW) { 6857 /* 6858 * It may be a bit too expensive to do this mapped address 6859 * check here, but in the interest of robustness, it seems 6860 * like the correct place. 6861 * TODO: Avoid this check for e.g. connected TCP sockets 6862 */ 6863 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6864 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6865 goto discard; 6866 } 6867 6868 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6869 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6870 goto discard; 6871 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6872 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6873 goto discard; 6874 } 6875 6876 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6877 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6878 } else { 6879 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6880 goto discard; 6881 } 6882 freemsg(dl_mp); 6883 return; 6884 6885 discard: 6886 if (dl_mp != NULL) 6887 freeb(dl_mp); 6888 freemsg(first_mp); 6889 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6890 } 6891 6892 /* 6893 * Walk through the IPv6 packet in mp and see if there's an AH header 6894 * in it. See if the AH header needs to get done before other headers in 6895 * the packet. (Worker function for ipsec_early_ah_v6().) 6896 */ 6897 #define IPSEC_HDR_DONT_PROCESS 0 6898 #define IPSEC_HDR_PROCESS 1 6899 #define IPSEC_MEMORY_ERROR 2 6900 static int 6901 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6902 { 6903 uint_t length; 6904 uint_t ehdrlen; 6905 uint8_t *whereptr; 6906 uint8_t *endptr; 6907 uint8_t *nexthdrp; 6908 ip6_dest_t *desthdr; 6909 ip6_rthdr_t *rthdr; 6910 ip6_t *ip6h; 6911 6912 /* 6913 * For now just pullup everything. In general, the less pullups, 6914 * the better, but there's so much squirrelling through anyway, 6915 * it's just easier this way. 6916 */ 6917 if (!pullupmsg(mp, -1)) { 6918 return (IPSEC_MEMORY_ERROR); 6919 } 6920 6921 ip6h = (ip6_t *)mp->b_rptr; 6922 length = IPV6_HDR_LEN; 6923 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6924 endptr = mp->b_wptr; 6925 6926 /* 6927 * We can't just use the argument nexthdr in the place 6928 * of nexthdrp becaue we don't dereference nexthdrp 6929 * till we confirm whether it is a valid address. 6930 */ 6931 nexthdrp = &ip6h->ip6_nxt; 6932 while (whereptr < endptr) { 6933 /* Is there enough left for len + nexthdr? */ 6934 if (whereptr + MIN_EHDR_LEN > endptr) 6935 return (IPSEC_MEMORY_ERROR); 6936 6937 switch (*nexthdrp) { 6938 case IPPROTO_HOPOPTS: 6939 case IPPROTO_DSTOPTS: 6940 /* Assumes the headers are identical for hbh and dst */ 6941 desthdr = (ip6_dest_t *)whereptr; 6942 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6943 if ((uchar_t *)desthdr + ehdrlen > endptr) 6944 return (IPSEC_MEMORY_ERROR); 6945 /* 6946 * Return DONT_PROCESS because the destination 6947 * options header may be for each hop in a 6948 * routing-header, and we only want AH if we're 6949 * finished with routing headers. 6950 */ 6951 if (*nexthdrp == IPPROTO_DSTOPTS) 6952 return (IPSEC_HDR_DONT_PROCESS); 6953 nexthdrp = &desthdr->ip6d_nxt; 6954 break; 6955 case IPPROTO_ROUTING: 6956 rthdr = (ip6_rthdr_t *)whereptr; 6957 6958 /* 6959 * If there's more hops left on the routing header, 6960 * return now with DON'T PROCESS. 6961 */ 6962 if (rthdr->ip6r_segleft > 0) 6963 return (IPSEC_HDR_DONT_PROCESS); 6964 6965 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6966 if ((uchar_t *)rthdr + ehdrlen > endptr) 6967 return (IPSEC_MEMORY_ERROR); 6968 nexthdrp = &rthdr->ip6r_nxt; 6969 break; 6970 case IPPROTO_FRAGMENT: 6971 /* Wait for reassembly */ 6972 return (IPSEC_HDR_DONT_PROCESS); 6973 case IPPROTO_AH: 6974 *nexthdr = IPPROTO_AH; 6975 return (IPSEC_HDR_PROCESS); 6976 case IPPROTO_NONE: 6977 /* No next header means we're finished */ 6978 default: 6979 return (IPSEC_HDR_DONT_PROCESS); 6980 } 6981 length += ehdrlen; 6982 whereptr += ehdrlen; 6983 } 6984 panic("ipsec_needs_processing_v6"); 6985 /*NOTREACHED*/ 6986 } 6987 6988 /* 6989 * Path for AH if options are present. If this is the first time we are 6990 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6991 * Otherwise, just fanout. Return value answers the boolean question: 6992 * "Did I consume the mblk you sent me?" 6993 * 6994 * Sometimes AH needs to be done before other IPv6 headers for security 6995 * reasons. This function (and its ipsec_needs_processing_v6() above) 6996 * indicates if that is so, and fans out to the appropriate IPsec protocol 6997 * for the datagram passed in. 6998 */ 6999 static boolean_t 7000 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7001 ill_t *ill, mblk_t *hada_mp, zoneid_t zoneid) 7002 { 7003 mblk_t *mp; 7004 uint8_t nexthdr; 7005 ipsec_in_t *ii = NULL; 7006 ah_t *ah; 7007 ipsec_status_t ipsec_rc; 7008 ip_stack_t *ipst = ill->ill_ipst; 7009 netstack_t *ns = ipst->ips_netstack; 7010 ipsec_stack_t *ipss = ns->netstack_ipsec; 7011 7012 ASSERT((hada_mp == NULL) || (!mctl_present)); 7013 7014 switch (ipsec_needs_processing_v6( 7015 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7016 case IPSEC_MEMORY_ERROR: 7017 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7018 freemsg(hada_mp); 7019 freemsg(first_mp); 7020 return (B_TRUE); 7021 case IPSEC_HDR_DONT_PROCESS: 7022 return (B_FALSE); 7023 } 7024 7025 /* Default means send it to AH! */ 7026 ASSERT(nexthdr == IPPROTO_AH); 7027 if (!mctl_present) { 7028 mp = first_mp; 7029 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 7030 if (first_mp == NULL) { 7031 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7032 "allocation failure.\n")); 7033 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7034 freemsg(hada_mp); 7035 freemsg(mp); 7036 return (B_TRUE); 7037 } 7038 /* 7039 * Store the ill_index so that when we come back 7040 * from IPSEC we ride on the same queue. 7041 */ 7042 ii = (ipsec_in_t *)first_mp->b_rptr; 7043 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7044 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7045 first_mp->b_cont = mp; 7046 } 7047 /* 7048 * Cache hardware acceleration info. 7049 */ 7050 if (hada_mp != NULL) { 7051 ASSERT(ii != NULL); 7052 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7053 "caching data attr.\n")); 7054 ii->ipsec_in_accelerated = B_TRUE; 7055 ii->ipsec_in_da = hada_mp; 7056 } 7057 7058 if (!ipsec_loaded(ipss)) { 7059 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 7060 return (B_TRUE); 7061 } 7062 7063 ah = ipsec_inbound_ah_sa(first_mp, ns); 7064 if (ah == NULL) 7065 return (B_TRUE); 7066 ASSERT(ii->ipsec_in_ah_sa != NULL); 7067 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7068 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7069 7070 switch (ipsec_rc) { 7071 case IPSEC_STATUS_SUCCESS: 7072 /* we're done with IPsec processing, send it up */ 7073 ip_fanout_proto_again(first_mp, ill, ill, NULL); 7074 break; 7075 case IPSEC_STATUS_FAILED: 7076 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 7077 break; 7078 case IPSEC_STATUS_PENDING: 7079 /* no action needed */ 7080 break; 7081 } 7082 return (B_TRUE); 7083 } 7084 7085 /* 7086 * Validate the IPv6 mblk for alignment. 7087 */ 7088 int 7089 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7090 { 7091 int pkt_len, ip6_len; 7092 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7093 7094 /* check for alignment and full IPv6 header */ 7095 if (!OK_32PTR((uchar_t *)ip6h) || 7096 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7097 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7098 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7099 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7100 return (IP6_MBLK_HDR_ERR); 7101 } 7102 ip6h = (ip6_t *)mp->b_rptr; 7103 } 7104 7105 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7106 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7107 7108 if (mp->b_cont == NULL) 7109 pkt_len = mp->b_wptr - mp->b_rptr; 7110 else 7111 pkt_len = msgdsize(mp); 7112 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7113 7114 /* 7115 * Check for bogus (too short packet) and packet which 7116 * was padded by the link layer. 7117 */ 7118 if (ip6_len != pkt_len) { 7119 ssize_t diff; 7120 7121 if (ip6_len > pkt_len) { 7122 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7123 ip6_len, pkt_len)); 7124 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7125 return (IP6_MBLK_LEN_ERR); 7126 } 7127 diff = (ssize_t)(pkt_len - ip6_len); 7128 7129 if (!adjmsg(mp, -diff)) { 7130 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7131 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7132 return (IP6_MBLK_LEN_ERR); 7133 } 7134 } 7135 return (IP6_MBLK_OK); 7136 } 7137 7138 /* 7139 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7140 * ip_rput_v6 has already verified alignment, the min length, the version, 7141 * and db_ref = 1. 7142 * 7143 * The ill passed in (the arg named inill) is the ill that the packet 7144 * actually arrived on. We need to remember this when saving the 7145 * input interface index into potential IPV6_PKTINFO data in 7146 * ip_add_info_v6(). 7147 * 7148 * This routine doesn't free dl_mp; that's the caller's responsibility on 7149 * return. (Note that the callers are complex enough that there's no tail 7150 * recursion here anyway.) 7151 */ 7152 void 7153 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7154 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7155 { 7156 ire_t *ire = NULL; 7157 ill_t *ill = inill; 7158 ill_t *outill; 7159 ipif_t *ipif; 7160 uint8_t *whereptr; 7161 uint8_t nexthdr; 7162 uint16_t remlen; 7163 uint_t prev_nexthdr_offset; 7164 uint_t used; 7165 size_t old_pkt_len; 7166 size_t pkt_len; 7167 uint16_t ip6_len; 7168 uint_t hdr_len; 7169 boolean_t mctl_present; 7170 mblk_t *first_mp; 7171 mblk_t *first_mp1; 7172 boolean_t no_forward; 7173 ip6_hbh_t *hbhhdr; 7174 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7175 conn_t *connp; 7176 ilm_t *ilm; 7177 uint32_t ports; 7178 zoneid_t zoneid = GLOBAL_ZONEID; 7179 uint16_t hck_flags, reass_hck_flags; 7180 uint32_t reass_sum; 7181 boolean_t cksum_err; 7182 mblk_t *mp1; 7183 ip_stack_t *ipst = inill->ill_ipst; 7184 7185 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7186 7187 if (hada_mp != NULL) { 7188 /* 7189 * It's an IPsec accelerated packet. 7190 * Keep a pointer to the data attributes around until 7191 * we allocate the ipsecinfo structure. 7192 */ 7193 IPSECHW_DEBUG(IPSECHW_PKT, 7194 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7195 hada_mp->b_cont = NULL; 7196 /* 7197 * Since it is accelerated, it came directly from 7198 * the ill. 7199 */ 7200 ASSERT(mctl_present == B_FALSE); 7201 ASSERT(mp->b_datap->db_type != M_CTL); 7202 } 7203 7204 ip6h = (ip6_t *)mp->b_rptr; 7205 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7206 old_pkt_len = pkt_len = ip6_len; 7207 7208 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7209 hck_flags = DB_CKSUMFLAGS(mp); 7210 else 7211 hck_flags = 0; 7212 7213 /* Clear checksum flags in case we need to forward */ 7214 DB_CKSUMFLAGS(mp) = 0; 7215 reass_sum = reass_hck_flags = 0; 7216 7217 nexthdr = ip6h->ip6_nxt; 7218 7219 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7220 (uchar_t *)ip6h); 7221 whereptr = (uint8_t *)&ip6h[1]; 7222 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7223 7224 /* Process hop by hop header options */ 7225 if (nexthdr == IPPROTO_HOPOPTS) { 7226 uint_t ehdrlen; 7227 uint8_t *optptr; 7228 7229 if (remlen < MIN_EHDR_LEN) 7230 goto pkt_too_short; 7231 if (mp->b_cont != NULL && 7232 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7233 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7234 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7235 freemsg(hada_mp); 7236 freemsg(first_mp); 7237 return; 7238 } 7239 ip6h = (ip6_t *)mp->b_rptr; 7240 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7241 } 7242 hbhhdr = (ip6_hbh_t *)whereptr; 7243 nexthdr = hbhhdr->ip6h_nxt; 7244 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7245 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7246 7247 if (remlen < ehdrlen) 7248 goto pkt_too_short; 7249 if (mp->b_cont != NULL && 7250 whereptr + ehdrlen > mp->b_wptr) { 7251 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7252 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7253 freemsg(hada_mp); 7254 freemsg(first_mp); 7255 return; 7256 } 7257 ip6h = (ip6_t *)mp->b_rptr; 7258 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7259 hbhhdr = (ip6_hbh_t *)whereptr; 7260 } 7261 7262 optptr = whereptr + 2; 7263 whereptr += ehdrlen; 7264 remlen -= ehdrlen; 7265 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7266 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7267 case -1: 7268 /* 7269 * Packet has been consumed and any 7270 * needed ICMP messages sent. 7271 */ 7272 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7273 freemsg(hada_mp); 7274 return; 7275 case 0: 7276 /* no action needed */ 7277 break; 7278 case 1: 7279 /* Known router alert */ 7280 goto ipv6forus; 7281 } 7282 } 7283 7284 /* 7285 * Attach any necessary label information to this packet. 7286 */ 7287 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7288 if (ip6opt_ls != 0) 7289 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7290 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7291 freemsg(hada_mp); 7292 freemsg(first_mp); 7293 return; 7294 } 7295 7296 /* 7297 * On incoming v6 multicast packets we will bypass the ire table, 7298 * and assume that the read queue corresponds to the targetted 7299 * interface. 7300 * 7301 * The effect of this is the same as the IPv4 original code, but is 7302 * much cleaner I think. See ip_rput for how that was done. 7303 */ 7304 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7305 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7306 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7307 /* 7308 * XXX TODO Give to mrouted to for multicast forwarding. 7309 */ 7310 ILM_WALKER_HOLD(ill); 7311 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7312 ILM_WALKER_RELE(ill); 7313 if (ilm == NULL) { 7314 if (ip_debug > 3) { 7315 /* ip2dbg */ 7316 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7317 " which is not for us: %s\n", AF_INET6, 7318 &ip6h->ip6_dst); 7319 } 7320 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7321 freemsg(hada_mp); 7322 freemsg(first_mp); 7323 return; 7324 } 7325 if (ip_debug > 3) { 7326 /* ip2dbg */ 7327 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7328 AF_INET6, &ip6h->ip6_dst); 7329 } 7330 zoneid = GLOBAL_ZONEID; 7331 goto ipv6forus; 7332 } 7333 7334 ipif = ill->ill_ipif; 7335 7336 /* 7337 * If a packet was received on an interface that is a 6to4 tunnel, 7338 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7339 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7340 * the 6to4 prefix of the address configured on the receiving interface. 7341 * Otherwise, the packet was delivered to this interface in error and 7342 * the packet must be dropped. 7343 */ 7344 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7345 7346 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7347 &ip6h->ip6_dst)) { 7348 if (ip_debug > 2) { 7349 /* ip1dbg */ 7350 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7351 "addressed packet which is not for us: " 7352 "%s\n", AF_INET6, &ip6h->ip6_dst); 7353 } 7354 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7355 freemsg(first_mp); 7356 return; 7357 } 7358 } 7359 7360 /* 7361 * Find an ire that matches destination. For link-local addresses 7362 * we have to match the ill. 7363 * TBD for site local addresses. 7364 */ 7365 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7366 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7367 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7368 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 7369 } else { 7370 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7371 MBLK_GETLABEL(mp), ipst); 7372 7373 if (ire != NULL && ire->ire_stq != NULL && 7374 ire->ire_zoneid != GLOBAL_ZONEID && 7375 ire->ire_zoneid != ALL_ZONES) { 7376 /* 7377 * Should only use IREs that are visible from the 7378 * global zone for forwarding. 7379 */ 7380 ire_refrele(ire); 7381 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7382 GLOBAL_ZONEID, MBLK_GETLABEL(mp), ipst); 7383 } 7384 } 7385 7386 if (ire == NULL) { 7387 /* 7388 * No matching IRE found. Mark this packet as having 7389 * originated externally. 7390 */ 7391 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7392 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7393 if (!(ill->ill_flags & ILLF_ROUTER)) { 7394 BUMP_MIB(ill->ill_ip_mib, 7395 ipIfStatsInAddrErrors); 7396 } 7397 freemsg(hada_mp); 7398 freemsg(first_mp); 7399 return; 7400 } 7401 if (ip6h->ip6_hops <= 1) { 7402 if (hada_mp != NULL) 7403 goto hada_drop; 7404 /* Sent by forwarding path, and router is global zone */ 7405 icmp_time_exceeded_v6(WR(q), first_mp, 7406 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7407 GLOBAL_ZONEID, ipst); 7408 return; 7409 } 7410 /* 7411 * Per RFC 3513 section 2.5.2, we must not forward packets with 7412 * an unspecified source address. 7413 */ 7414 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7415 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7416 freemsg(hada_mp); 7417 freemsg(first_mp); 7418 return; 7419 } 7420 mp->b_prev = (mblk_t *)(uintptr_t) 7421 ill->ill_phyint->phyint_ifindex; 7422 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7423 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7424 GLOBAL_ZONEID, ipst); 7425 return; 7426 } 7427 /* we have a matching IRE */ 7428 if (ire->ire_stq != NULL) { 7429 ill_group_t *ill_group; 7430 ill_group_t *ire_group; 7431 7432 /* 7433 * To be quicker, we may wish not to chase pointers 7434 * (ire->ire_ipif->ipif_ill...) and instead store the 7435 * forwarding policy in the ire. An unfortunate side- 7436 * effect of this would be requiring an ire flush whenever 7437 * the ILLF_ROUTER flag changes. For now, chase pointers 7438 * once and store in the boolean no_forward. 7439 * 7440 * This appears twice to keep it out of the non-forwarding, 7441 * yes-it's-for-us-on-the-right-interface case. 7442 */ 7443 no_forward = ((ill->ill_flags & 7444 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7445 7446 7447 ASSERT(first_mp == mp); 7448 /* 7449 * This ire has a send-to queue - forward the packet. 7450 */ 7451 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7452 freemsg(hada_mp); 7453 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7454 if (no_forward) { 7455 BUMP_MIB(ill->ill_ip_mib, 7456 ipIfStatsInAddrErrors); 7457 } 7458 freemsg(mp); 7459 ire_refrele(ire); 7460 return; 7461 } 7462 /* 7463 * ipIfStatsHCInForwDatagrams should only be increment if there 7464 * will be an attempt to forward the packet, which is why we 7465 * increment after the above condition has been checked. 7466 */ 7467 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7468 if (ip6h->ip6_hops <= 1) { 7469 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7470 /* Sent by forwarding path, and router is global zone */ 7471 icmp_time_exceeded_v6(WR(q), mp, 7472 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7473 GLOBAL_ZONEID, ipst); 7474 ire_refrele(ire); 7475 return; 7476 } 7477 /* 7478 * Per RFC 3513 section 2.5.2, we must not forward packets with 7479 * an unspecified source address. 7480 */ 7481 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7482 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7483 freemsg(mp); 7484 ire_refrele(ire); 7485 return; 7486 } 7487 7488 if (is_system_labeled()) { 7489 mblk_t *mp1; 7490 7491 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7492 BUMP_MIB(ill->ill_ip_mib, 7493 ipIfStatsForwProhibits); 7494 freemsg(mp); 7495 ire_refrele(ire); 7496 return; 7497 } 7498 /* Size may have changed */ 7499 mp = mp1; 7500 ip6h = (ip6_t *)mp->b_rptr; 7501 pkt_len = msgdsize(mp); 7502 } 7503 7504 if (pkt_len > ire->ire_max_frag) { 7505 int max_frag = ire->ire_max_frag; 7506 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7507 /* 7508 * Handle labeled packet resizing. 7509 */ 7510 if (is_system_labeled()) { 7511 max_frag = tsol_pmtu_adjust(mp, max_frag, 7512 pkt_len - old_pkt_len, AF_INET6); 7513 } 7514 7515 /* Sent by forwarding path, and router is global zone */ 7516 icmp_pkt2big_v6(WR(q), mp, max_frag, 7517 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7518 ire_refrele(ire); 7519 return; 7520 } 7521 7522 /* 7523 * Check to see if we're forwarding the packet to a 7524 * different link from which it came. If so, check the 7525 * source and destination addresses since routers must not 7526 * forward any packets with link-local source or 7527 * destination addresses to other links. Otherwise (if 7528 * we're forwarding onto the same link), conditionally send 7529 * a redirect message. 7530 */ 7531 ill_group = ill->ill_group; 7532 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7533 if (ire->ire_rfq != q && (ill_group == NULL || 7534 ill_group != ire_group)) { 7535 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7536 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7537 BUMP_MIB(ill->ill_ip_mib, 7538 ipIfStatsInAddrErrors); 7539 freemsg(mp); 7540 ire_refrele(ire); 7541 return; 7542 } 7543 /* TBD add site-local check at site boundary? */ 7544 } else if (ipst->ips_ipv6_send_redirects) { 7545 in6_addr_t *v6targ; 7546 in6_addr_t gw_addr_v6; 7547 ire_t *src_ire_v6 = NULL; 7548 7549 /* 7550 * Don't send a redirect when forwarding a source 7551 * routed packet. 7552 */ 7553 if (ip_source_routed_v6(ip6h, mp, ipst)) 7554 goto forward; 7555 7556 mutex_enter(&ire->ire_lock); 7557 gw_addr_v6 = ire->ire_gateway_addr_v6; 7558 mutex_exit(&ire->ire_lock); 7559 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7560 v6targ = &gw_addr_v6; 7561 /* 7562 * We won't send redirects to a router 7563 * that doesn't have a link local 7564 * address, but will forward. 7565 */ 7566 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7567 BUMP_MIB(ill->ill_ip_mib, 7568 ipIfStatsInAddrErrors); 7569 goto forward; 7570 } 7571 } else { 7572 v6targ = &ip6h->ip6_dst; 7573 } 7574 7575 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7576 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7577 GLOBAL_ZONEID, 0, NULL, 7578 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7579 ipst); 7580 7581 if (src_ire_v6 != NULL) { 7582 /* 7583 * The source is directly connected. 7584 */ 7585 mp1 = copymsg(mp); 7586 if (mp1 != NULL) { 7587 icmp_send_redirect_v6(WR(q), 7588 mp1, v6targ, &ip6h->ip6_dst, 7589 ill, B_FALSE); 7590 } 7591 ire_refrele(src_ire_v6); 7592 } 7593 } 7594 7595 forward: 7596 /* Hoplimit verified above */ 7597 ip6h->ip6_hops--; 7598 7599 outill = ire->ire_ipif->ipif_ill; 7600 7601 DTRACE_PROBE4(ip6__forwarding__start, 7602 ill_t *, inill, ill_t *, outill, 7603 ip6_t *, ip6h, mblk_t *, mp); 7604 7605 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7606 ipst->ips_ipv6firewall_forwarding, 7607 inill, outill, ip6h, mp, mp, 0, ipst); 7608 7609 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7610 7611 if (mp != NULL) { 7612 UPDATE_IB_PKT_COUNT(ire); 7613 ire->ire_last_used_time = lbolt; 7614 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7615 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7616 } 7617 IRE_REFRELE(ire); 7618 return; 7619 } 7620 7621 /* 7622 * Need to put on correct queue for reassembly to find it. 7623 * No need to use put() since reassembly has its own locks. 7624 * Note: multicast packets and packets destined to addresses 7625 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7626 * the arriving ill. Unlike the IPv4 case, enabling strict 7627 * destination multihoming will prevent accepting packets 7628 * addressed to an IRE_LOCAL on lo0. 7629 */ 7630 if (ire->ire_rfq != q) { 7631 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7632 == NULL) { 7633 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7634 freemsg(hada_mp); 7635 freemsg(first_mp); 7636 return; 7637 } 7638 if (ire->ire_rfq != NULL) { 7639 q = ire->ire_rfq; 7640 ill = (ill_t *)q->q_ptr; 7641 ASSERT(ill != NULL); 7642 } 7643 } 7644 7645 zoneid = ire->ire_zoneid; 7646 UPDATE_IB_PKT_COUNT(ire); 7647 ire->ire_last_used_time = lbolt; 7648 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7649 ire_refrele(ire); 7650 ire = NULL; 7651 ipv6forus: 7652 /* 7653 * Looks like this packet is for us one way or another. 7654 * This is where we'll process destination headers etc. 7655 */ 7656 for (; ; ) { 7657 switch (nexthdr) { 7658 case IPPROTO_TCP: { 7659 uint16_t *up; 7660 uint32_t sum; 7661 int offset; 7662 7663 hdr_len = pkt_len - remlen; 7664 7665 if (hada_mp != NULL) { 7666 ip0dbg(("tcp hada drop\n")); 7667 goto hada_drop; 7668 } 7669 7670 7671 /* TCP needs all of the TCP header */ 7672 if (remlen < TCP_MIN_HEADER_LENGTH) 7673 goto pkt_too_short; 7674 if (mp->b_cont != NULL && 7675 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7676 if (!pullupmsg(mp, 7677 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7678 BUMP_MIB(ill->ill_ip_mib, 7679 ipIfStatsInDiscards); 7680 freemsg(first_mp); 7681 return; 7682 } 7683 hck_flags = 0; 7684 ip6h = (ip6_t *)mp->b_rptr; 7685 whereptr = (uint8_t *)ip6h + hdr_len; 7686 } 7687 /* 7688 * Extract the offset field from the TCP header. 7689 */ 7690 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7691 if (offset != 5) { 7692 if (offset < 5) { 7693 ip1dbg(("ip_rput_data_v6: short " 7694 "TCP data offset")); 7695 BUMP_MIB(ill->ill_ip_mib, 7696 ipIfStatsInDiscards); 7697 freemsg(first_mp); 7698 return; 7699 } 7700 /* 7701 * There must be TCP options. 7702 * Make sure we can grab them. 7703 */ 7704 offset <<= 2; 7705 if (remlen < offset) 7706 goto pkt_too_short; 7707 if (mp->b_cont != NULL && 7708 whereptr + offset > mp->b_wptr) { 7709 if (!pullupmsg(mp, 7710 hdr_len + offset)) { 7711 BUMP_MIB(ill->ill_ip_mib, 7712 ipIfStatsInDiscards); 7713 freemsg(first_mp); 7714 return; 7715 } 7716 hck_flags = 0; 7717 ip6h = (ip6_t *)mp->b_rptr; 7718 whereptr = (uint8_t *)ip6h + hdr_len; 7719 } 7720 } 7721 7722 up = (uint16_t *)&ip6h->ip6_src; 7723 /* 7724 * TCP checksum calculation. First sum up the 7725 * pseudo-header fields: 7726 * - Source IPv6 address 7727 * - Destination IPv6 address 7728 * - TCP payload length 7729 * - TCP protocol ID 7730 */ 7731 sum = htons(IPPROTO_TCP + remlen) + 7732 up[0] + up[1] + up[2] + up[3] + 7733 up[4] + up[5] + up[6] + up[7] + 7734 up[8] + up[9] + up[10] + up[11] + 7735 up[12] + up[13] + up[14] + up[15]; 7736 7737 /* Fold initial sum */ 7738 sum = (sum & 0xffff) + (sum >> 16); 7739 7740 mp1 = mp->b_cont; 7741 7742 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7743 IP6_STAT(ipst, ip6_in_sw_cksum); 7744 7745 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7746 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7747 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7748 mp, mp1, cksum_err); 7749 7750 if (cksum_err) { 7751 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7752 7753 if (hck_flags & HCK_FULLCKSUM) { 7754 IP6_STAT(ipst, 7755 ip6_tcp_in_full_hw_cksum_err); 7756 } else if (hck_flags & HCK_PARTIALCKSUM) { 7757 IP6_STAT(ipst, 7758 ip6_tcp_in_part_hw_cksum_err); 7759 } else { 7760 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7761 } 7762 freemsg(first_mp); 7763 return; 7764 } 7765 tcp_fanout: 7766 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7767 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7768 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7769 return; 7770 } 7771 case IPPROTO_SCTP: 7772 { 7773 sctp_hdr_t *sctph; 7774 uint32_t calcsum, pktsum; 7775 uint_t hdr_len = pkt_len - remlen; 7776 sctp_stack_t *sctps; 7777 7778 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7779 7780 /* SCTP needs all of the SCTP header */ 7781 if (remlen < sizeof (*sctph)) { 7782 goto pkt_too_short; 7783 } 7784 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7785 ASSERT(mp->b_cont != NULL); 7786 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7787 BUMP_MIB(ill->ill_ip_mib, 7788 ipIfStatsInDiscards); 7789 freemsg(mp); 7790 return; 7791 } 7792 ip6h = (ip6_t *)mp->b_rptr; 7793 whereptr = (uint8_t *)ip6h + hdr_len; 7794 } 7795 7796 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7797 /* checksum */ 7798 pktsum = sctph->sh_chksum; 7799 sctph->sh_chksum = 0; 7800 calcsum = sctp_cksum(mp, hdr_len); 7801 if (calcsum != pktsum) { 7802 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7803 freemsg(mp); 7804 return; 7805 } 7806 sctph->sh_chksum = pktsum; 7807 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7808 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7809 ports, zoneid, mp, sctps)) == NULL) { 7810 ip_fanout_sctp_raw(first_mp, ill, 7811 (ipha_t *)ip6h, B_FALSE, ports, 7812 mctl_present, 7813 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7814 B_TRUE, zoneid); 7815 return; 7816 } 7817 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7818 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7819 B_FALSE, mctl_present); 7820 return; 7821 } 7822 case IPPROTO_UDP: { 7823 uint16_t *up; 7824 uint32_t sum; 7825 7826 hdr_len = pkt_len - remlen; 7827 7828 if (hada_mp != NULL) { 7829 ip0dbg(("udp hada drop\n")); 7830 goto hada_drop; 7831 } 7832 7833 /* Verify that at least the ports are present */ 7834 if (remlen < UDPH_SIZE) 7835 goto pkt_too_short; 7836 if (mp->b_cont != NULL && 7837 whereptr + UDPH_SIZE > mp->b_wptr) { 7838 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7839 BUMP_MIB(ill->ill_ip_mib, 7840 ipIfStatsInDiscards); 7841 freemsg(first_mp); 7842 return; 7843 } 7844 hck_flags = 0; 7845 ip6h = (ip6_t *)mp->b_rptr; 7846 whereptr = (uint8_t *)ip6h + hdr_len; 7847 } 7848 7849 /* 7850 * Before going through the regular checksum 7851 * calculation, make sure the received checksum 7852 * is non-zero. RFC 2460 says, a 0x0000 checksum 7853 * in a UDP packet (within IPv6 packet) is invalid 7854 * and should be replaced by 0xffff. This makes 7855 * sense as regular checksum calculation will 7856 * pass for both the cases i.e. 0x0000 and 0xffff. 7857 * Removing one of the case makes error detection 7858 * stronger. 7859 */ 7860 7861 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7862 /* 0x0000 checksum is invalid */ 7863 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7864 "checksum value 0x0000\n")); 7865 BUMP_MIB(ill->ill_ip_mib, 7866 udpIfStatsInCksumErrs); 7867 freemsg(first_mp); 7868 return; 7869 } 7870 7871 up = (uint16_t *)&ip6h->ip6_src; 7872 7873 /* 7874 * UDP checksum calculation. First sum up the 7875 * pseudo-header fields: 7876 * - Source IPv6 address 7877 * - Destination IPv6 address 7878 * - UDP payload length 7879 * - UDP protocol ID 7880 */ 7881 7882 sum = htons(IPPROTO_UDP + remlen) + 7883 up[0] + up[1] + up[2] + up[3] + 7884 up[4] + up[5] + up[6] + up[7] + 7885 up[8] + up[9] + up[10] + up[11] + 7886 up[12] + up[13] + up[14] + up[15]; 7887 7888 /* Fold initial sum */ 7889 sum = (sum & 0xffff) + (sum >> 16); 7890 7891 if (reass_hck_flags != 0) { 7892 hck_flags = reass_hck_flags; 7893 7894 IP_CKSUM_RECV_REASS(hck_flags, 7895 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7896 sum, reass_sum, cksum_err); 7897 } else { 7898 mp1 = mp->b_cont; 7899 7900 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7901 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7902 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7903 mp, mp1, cksum_err); 7904 } 7905 7906 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7907 IP6_STAT(ipst, ip6_in_sw_cksum); 7908 7909 if (cksum_err) { 7910 BUMP_MIB(ill->ill_ip_mib, 7911 udpIfStatsInCksumErrs); 7912 7913 if (hck_flags & HCK_FULLCKSUM) 7914 IP6_STAT(ipst, 7915 ip6_udp_in_full_hw_cksum_err); 7916 else if (hck_flags & HCK_PARTIALCKSUM) 7917 IP6_STAT(ipst, 7918 ip6_udp_in_part_hw_cksum_err); 7919 else 7920 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7921 7922 freemsg(first_mp); 7923 return; 7924 } 7925 goto udp_fanout; 7926 } 7927 case IPPROTO_ICMPV6: { 7928 uint16_t *up; 7929 uint32_t sum; 7930 uint_t hdr_len = pkt_len - remlen; 7931 7932 if (hada_mp != NULL) { 7933 ip0dbg(("icmp hada drop\n")); 7934 goto hada_drop; 7935 } 7936 7937 up = (uint16_t *)&ip6h->ip6_src; 7938 sum = htons(IPPROTO_ICMPV6 + remlen) + 7939 up[0] + up[1] + up[2] + up[3] + 7940 up[4] + up[5] + up[6] + up[7] + 7941 up[8] + up[9] + up[10] + up[11] + 7942 up[12] + up[13] + up[14] + up[15]; 7943 sum = (sum & 0xffff) + (sum >> 16); 7944 sum = IP_CSUM(mp, hdr_len, sum); 7945 if (sum != 0) { 7946 /* IPv6 ICMP checksum failed */ 7947 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7948 "failed %x\n", 7949 sum)); 7950 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7951 BUMP_MIB(ill->ill_icmp6_mib, 7952 ipv6IfIcmpInErrors); 7953 freemsg(first_mp); 7954 return; 7955 } 7956 7957 icmp_fanout: 7958 /* Check variable for testing applications */ 7959 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7960 freemsg(first_mp); 7961 return; 7962 } 7963 /* 7964 * Assume that there is always at least one conn for 7965 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7966 * where there is no conn. 7967 */ 7968 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7969 ASSERT(!IS_LOOPBACK((ill))); 7970 /* 7971 * In the multicast case, applications may have 7972 * joined the group from different zones, so we 7973 * need to deliver the packet to each of them. 7974 * Loop through the multicast memberships 7975 * structures (ilm) on the receive ill and send 7976 * a copy of the packet up each matching one. 7977 */ 7978 ILM_WALKER_HOLD(ill); 7979 for (ilm = ill->ill_ilm; ilm != NULL; 7980 ilm = ilm->ilm_next) { 7981 if (ilm->ilm_flags & ILM_DELETED) 7982 continue; 7983 if (!IN6_ARE_ADDR_EQUAL( 7984 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7985 continue; 7986 if (!ipif_lookup_zoneid(ill, 7987 ilm->ilm_zoneid, IPIF_UP, NULL)) 7988 continue; 7989 7990 first_mp1 = ip_copymsg(first_mp); 7991 if (first_mp1 == NULL) 7992 continue; 7993 icmp_inbound_v6(q, first_mp1, ill, 7994 hdr_len, mctl_present, 0, 7995 ilm->ilm_zoneid, dl_mp); 7996 } 7997 ILM_WALKER_RELE(ill); 7998 } else { 7999 first_mp1 = ip_copymsg(first_mp); 8000 if (first_mp1 != NULL) 8001 icmp_inbound_v6(q, first_mp1, ill, 8002 hdr_len, mctl_present, 0, zoneid, 8003 dl_mp); 8004 } 8005 } 8006 /* FALLTHRU */ 8007 default: { 8008 /* 8009 * Handle protocols with which IPv6 is less intimate. 8010 */ 8011 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 8012 8013 if (hada_mp != NULL) { 8014 ip0dbg(("default hada drop\n")); 8015 goto hada_drop; 8016 } 8017 8018 /* 8019 * Enable sending ICMP for "Unknown" nexthdr 8020 * case. i.e. where we did not FALLTHRU from 8021 * IPPROTO_ICMPV6 processing case above. 8022 * If we did FALLTHRU, then the packet has already been 8023 * processed for IPPF, don't process it again in 8024 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8025 * flags 8026 */ 8027 if (nexthdr != IPPROTO_ICMPV6) 8028 proto_flags |= IP_FF_SEND_ICMP; 8029 else 8030 proto_flags |= IP6_NO_IPPOLICY; 8031 8032 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8033 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8034 mctl_present, zoneid); 8035 return; 8036 } 8037 8038 case IPPROTO_DSTOPTS: { 8039 uint_t ehdrlen; 8040 uint8_t *optptr; 8041 ip6_dest_t *desthdr; 8042 8043 /* Check if AH is present. */ 8044 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8045 hada_mp, zoneid)) { 8046 ip0dbg(("dst early hada drop\n")); 8047 return; 8048 } 8049 8050 /* 8051 * Reinitialize pointers, as ipsec_early_ah_v6() does 8052 * complete pullups. We don't have to do more pullups 8053 * as a result. 8054 */ 8055 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8056 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8057 ip6h = (ip6_t *)mp->b_rptr; 8058 8059 if (remlen < MIN_EHDR_LEN) 8060 goto pkt_too_short; 8061 8062 desthdr = (ip6_dest_t *)whereptr; 8063 nexthdr = desthdr->ip6d_nxt; 8064 prev_nexthdr_offset = (uint_t)(whereptr - 8065 (uint8_t *)ip6h); 8066 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8067 if (remlen < ehdrlen) 8068 goto pkt_too_short; 8069 optptr = whereptr + 2; 8070 /* 8071 * Note: XXX This code does not seem to make 8072 * distinction between Destination Options Header 8073 * being before/after Routing Header which can 8074 * happen if we are at the end of source route. 8075 * This may become significant in future. 8076 * (No real significant Destination Options are 8077 * defined/implemented yet ). 8078 */ 8079 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8080 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 8081 case -1: 8082 /* 8083 * Packet has been consumed and any needed 8084 * ICMP errors sent. 8085 */ 8086 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8087 freemsg(hada_mp); 8088 return; 8089 case 0: 8090 /* No action needed continue */ 8091 break; 8092 case 1: 8093 /* 8094 * Unnexpected return value 8095 * (Router alert is a Hop-by-Hop option) 8096 */ 8097 #ifdef DEBUG 8098 panic("ip_rput_data_v6: router " 8099 "alert hbh opt indication in dest opt"); 8100 /*NOTREACHED*/ 8101 #else 8102 freemsg(hada_mp); 8103 freemsg(first_mp); 8104 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8105 return; 8106 #endif 8107 } 8108 used = ehdrlen; 8109 break; 8110 } 8111 case IPPROTO_FRAGMENT: { 8112 ip6_frag_t *fraghdr; 8113 size_t no_frag_hdr_len; 8114 8115 if (hada_mp != NULL) { 8116 ip0dbg(("frag hada drop\n")); 8117 goto hada_drop; 8118 } 8119 8120 ASSERT(first_mp == mp); 8121 if (remlen < sizeof (ip6_frag_t)) 8122 goto pkt_too_short; 8123 8124 if (mp->b_cont != NULL && 8125 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8126 if (!pullupmsg(mp, 8127 pkt_len - remlen + sizeof (ip6_frag_t))) { 8128 BUMP_MIB(ill->ill_ip_mib, 8129 ipIfStatsInDiscards); 8130 freemsg(mp); 8131 return; 8132 } 8133 hck_flags = 0; 8134 ip6h = (ip6_t *)mp->b_rptr; 8135 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8136 } 8137 8138 fraghdr = (ip6_frag_t *)whereptr; 8139 used = (uint_t)sizeof (ip6_frag_t); 8140 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8141 8142 /* 8143 * Invoke the CGTP (multirouting) filtering module to 8144 * process the incoming packet. Packets identified as 8145 * duplicates must be discarded. Filtering is active 8146 * only if the the ip_cgtp_filter ndd variable is 8147 * non-zero. 8148 */ 8149 if (ipst->ips_ip_cgtp_filter && 8150 ipst->ips_ip_cgtp_filter_ops != NULL) { 8151 int cgtp_flt_pkt; 8152 netstackid_t stackid; 8153 8154 stackid = ipst->ips_netstack->netstack_stackid; 8155 8156 cgtp_flt_pkt = 8157 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 8158 stackid, inill->ill_phyint->phyint_ifindex, 8159 ip6h, fraghdr); 8160 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8161 freemsg(mp); 8162 return; 8163 } 8164 } 8165 8166 /* Restore the flags */ 8167 DB_CKSUMFLAGS(mp) = hck_flags; 8168 8169 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8170 remlen - used, &prev_nexthdr_offset, 8171 &reass_sum, &reass_hck_flags); 8172 if (mp == NULL) { 8173 /* Reassembly is still pending */ 8174 return; 8175 } 8176 /* The first mblk are the headers before the frag hdr */ 8177 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8178 8179 first_mp = mp; /* mp has most likely changed! */ 8180 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8181 ip6h = (ip6_t *)mp->b_rptr; 8182 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8183 whereptr = mp->b_rptr + no_frag_hdr_len; 8184 remlen = ntohs(ip6h->ip6_plen) + 8185 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8186 pkt_len = msgdsize(mp); 8187 used = 0; 8188 break; 8189 } 8190 case IPPROTO_HOPOPTS: { 8191 if (hada_mp != NULL) { 8192 ip0dbg(("hop hada drop\n")); 8193 goto hada_drop; 8194 } 8195 /* 8196 * Illegal header sequence. 8197 * (Hop-by-hop headers are processed above 8198 * and required to immediately follow IPv6 header) 8199 */ 8200 icmp_param_problem_v6(WR(q), first_mp, 8201 ICMP6_PARAMPROB_NEXTHEADER, 8202 prev_nexthdr_offset, 8203 B_FALSE, B_FALSE, zoneid, ipst); 8204 return; 8205 } 8206 case IPPROTO_ROUTING: { 8207 uint_t ehdrlen; 8208 ip6_rthdr_t *rthdr; 8209 8210 /* Check if AH is present. */ 8211 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8212 hada_mp, zoneid)) { 8213 ip0dbg(("routing hada drop\n")); 8214 return; 8215 } 8216 8217 /* 8218 * Reinitialize pointers, as ipsec_early_ah_v6() does 8219 * complete pullups. We don't have to do more pullups 8220 * as a result. 8221 */ 8222 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8223 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8224 ip6h = (ip6_t *)mp->b_rptr; 8225 8226 if (remlen < MIN_EHDR_LEN) 8227 goto pkt_too_short; 8228 rthdr = (ip6_rthdr_t *)whereptr; 8229 nexthdr = rthdr->ip6r_nxt; 8230 prev_nexthdr_offset = (uint_t)(whereptr - 8231 (uint8_t *)ip6h); 8232 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8233 if (remlen < ehdrlen) 8234 goto pkt_too_short; 8235 if (rthdr->ip6r_segleft != 0) { 8236 /* Not end of source route */ 8237 if (ll_multicast) { 8238 BUMP_MIB(ill->ill_ip_mib, 8239 ipIfStatsForwProhibits); 8240 freemsg(hada_mp); 8241 freemsg(mp); 8242 return; 8243 } 8244 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8245 flags, hada_mp, dl_mp); 8246 return; 8247 } 8248 used = ehdrlen; 8249 break; 8250 } 8251 case IPPROTO_AH: 8252 case IPPROTO_ESP: { 8253 /* 8254 * Fast path for AH/ESP. If this is the first time 8255 * we are sending a datagram to AH/ESP, allocate 8256 * a IPSEC_IN message and prepend it. Otherwise, 8257 * just fanout. 8258 */ 8259 8260 ipsec_in_t *ii; 8261 int ipsec_rc; 8262 ipsec_stack_t *ipss; 8263 8264 ipss = ipst->ips_netstack->netstack_ipsec; 8265 if (!mctl_present) { 8266 ASSERT(first_mp == mp); 8267 first_mp = ipsec_in_alloc(B_FALSE, 8268 ipst->ips_netstack); 8269 if (first_mp == NULL) { 8270 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8271 "allocation failure.\n")); 8272 BUMP_MIB(ill->ill_ip_mib, 8273 ipIfStatsInDiscards); 8274 freemsg(mp); 8275 return; 8276 } 8277 /* 8278 * Store the ill_index so that when we come back 8279 * from IPSEC we ride on the same queue. 8280 */ 8281 ii = (ipsec_in_t *)first_mp->b_rptr; 8282 ii->ipsec_in_ill_index = 8283 ill->ill_phyint->phyint_ifindex; 8284 ii->ipsec_in_rill_index = 8285 ii->ipsec_in_ill_index; 8286 first_mp->b_cont = mp; 8287 /* 8288 * Cache hardware acceleration info. 8289 */ 8290 if (hada_mp != NULL) { 8291 IPSECHW_DEBUG(IPSECHW_PKT, 8292 ("ip_rput_data_v6: " 8293 "caching data attr.\n")); 8294 ii->ipsec_in_accelerated = B_TRUE; 8295 ii->ipsec_in_da = hada_mp; 8296 hada_mp = NULL; 8297 } 8298 } else { 8299 ii = (ipsec_in_t *)first_mp->b_rptr; 8300 } 8301 8302 if (!ipsec_loaded(ipss)) { 8303 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8304 zoneid, ipst); 8305 return; 8306 } 8307 8308 /* select inbound SA and have IPsec process the pkt */ 8309 if (nexthdr == IPPROTO_ESP) { 8310 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8311 ipst->ips_netstack); 8312 if (esph == NULL) 8313 return; 8314 ASSERT(ii->ipsec_in_esp_sa != NULL); 8315 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8316 NULL); 8317 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8318 first_mp, esph); 8319 } else { 8320 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8321 ipst->ips_netstack); 8322 if (ah == NULL) 8323 return; 8324 ASSERT(ii->ipsec_in_ah_sa != NULL); 8325 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8326 NULL); 8327 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8328 first_mp, ah); 8329 } 8330 8331 switch (ipsec_rc) { 8332 case IPSEC_STATUS_SUCCESS: 8333 break; 8334 case IPSEC_STATUS_FAILED: 8335 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8336 /* FALLTHRU */ 8337 case IPSEC_STATUS_PENDING: 8338 return; 8339 } 8340 /* we're done with IPsec processing, send it up */ 8341 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8342 return; 8343 } 8344 case IPPROTO_NONE: 8345 /* All processing is done. Count as "delivered". */ 8346 freemsg(hada_mp); 8347 freemsg(first_mp); 8348 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8349 return; 8350 } 8351 whereptr += used; 8352 ASSERT(remlen >= used); 8353 remlen -= used; 8354 } 8355 /* NOTREACHED */ 8356 8357 pkt_too_short: 8358 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8359 ip6_len, pkt_len, remlen)); 8360 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8361 freemsg(hada_mp); 8362 freemsg(first_mp); 8363 return; 8364 udp_fanout: 8365 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8366 connp = NULL; 8367 } else { 8368 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8369 ipst); 8370 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8371 CONN_DEC_REF(connp); 8372 connp = NULL; 8373 } 8374 } 8375 8376 if (connp == NULL) { 8377 uint32_t ports; 8378 8379 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8380 UDP_PORTS_OFFSET); 8381 IP6_STAT(ipst, ip6_udp_slow_path); 8382 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8383 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8384 zoneid); 8385 return; 8386 } 8387 8388 if (CONN_UDP_FLOWCTLD(connp)) { 8389 freemsg(first_mp); 8390 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8391 CONN_DEC_REF(connp); 8392 return; 8393 } 8394 8395 /* Initiate IPPF processing */ 8396 if (IP6_IN_IPP(flags, ipst)) { 8397 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8398 if (mp == NULL) { 8399 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8400 CONN_DEC_REF(connp); 8401 return; 8402 } 8403 } 8404 8405 if (connp->conn_ip_recvpktinfo || 8406 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8407 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8408 if (mp == NULL) { 8409 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8410 CONN_DEC_REF(connp); 8411 return; 8412 } 8413 } 8414 8415 IP6_STAT(ipst, ip6_udp_fast_path); 8416 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8417 8418 /* Send it upstream */ 8419 (connp->conn_recv)(connp, mp, NULL); 8420 8421 CONN_DEC_REF(connp); 8422 freemsg(hada_mp); 8423 return; 8424 8425 hada_drop: 8426 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8427 /* IPsec kstats: bump counter here */ 8428 freemsg(hada_mp); 8429 freemsg(first_mp); 8430 } 8431 8432 /* 8433 * Reassemble fragment. 8434 * When it returns a completed message the first mblk will only contain 8435 * the headers prior to the fragment header. 8436 * 8437 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8438 * of the preceding header. This is needed to patch the previous header's 8439 * nexthdr field when reassembly completes. 8440 */ 8441 static mblk_t * 8442 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8443 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8444 uint32_t *cksum_val, uint16_t *cksum_flags) 8445 { 8446 ill_t *ill = (ill_t *)q->q_ptr; 8447 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8448 uint16_t offset; 8449 boolean_t more_frags; 8450 uint8_t nexthdr = fraghdr->ip6f_nxt; 8451 in6_addr_t *v6dst_ptr; 8452 in6_addr_t *v6src_ptr; 8453 uint_t end; 8454 uint_t hdr_length; 8455 size_t count; 8456 ipf_t *ipf; 8457 ipf_t **ipfp; 8458 ipfb_t *ipfb; 8459 mblk_t *mp1; 8460 uint8_t ecn_info = 0; 8461 size_t msg_len; 8462 mblk_t *tail_mp; 8463 mblk_t *t_mp; 8464 boolean_t pruned = B_FALSE; 8465 uint32_t sum_val; 8466 uint16_t sum_flags; 8467 ip_stack_t *ipst = ill->ill_ipst; 8468 8469 if (cksum_val != NULL) 8470 *cksum_val = 0; 8471 if (cksum_flags != NULL) 8472 *cksum_flags = 0; 8473 8474 /* 8475 * We utilize hardware computed checksum info only for UDP since 8476 * IP fragmentation is a normal occurence for the protocol. In 8477 * addition, checksum offload support for IP fragments carrying 8478 * UDP payload is commonly implemented across network adapters. 8479 */ 8480 ASSERT(ill != NULL); 8481 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8482 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8483 mblk_t *mp1 = mp->b_cont; 8484 int32_t len; 8485 8486 /* Record checksum information from the packet */ 8487 sum_val = (uint32_t)DB_CKSUM16(mp); 8488 sum_flags = DB_CKSUMFLAGS(mp); 8489 8490 /* fragmented payload offset from beginning of mblk */ 8491 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8492 8493 if ((sum_flags & HCK_PARTIALCKSUM) && 8494 (mp1 == NULL || mp1->b_cont == NULL) && 8495 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8496 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8497 uint32_t adj; 8498 /* 8499 * Partial checksum has been calculated by hardware 8500 * and attached to the packet; in addition, any 8501 * prepended extraneous data is even byte aligned. 8502 * If any such data exists, we adjust the checksum; 8503 * this would also handle any postpended data. 8504 */ 8505 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8506 mp, mp1, len, adj); 8507 8508 /* One's complement subtract extraneous checksum */ 8509 if (adj >= sum_val) 8510 sum_val = ~(adj - sum_val) & 0xFFFF; 8511 else 8512 sum_val -= adj; 8513 } 8514 } else { 8515 sum_val = 0; 8516 sum_flags = 0; 8517 } 8518 8519 /* Clear hardware checksumming flag */ 8520 DB_CKSUMFLAGS(mp) = 0; 8521 8522 /* 8523 * Note: Fragment offset in header is in 8-octet units. 8524 * Clearing least significant 3 bits not only extracts 8525 * it but also gets it in units of octets. 8526 */ 8527 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8528 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8529 8530 /* 8531 * Is the more frags flag on and the payload length not a multiple 8532 * of eight? 8533 */ 8534 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8535 zoneid_t zoneid; 8536 8537 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8538 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8539 if (zoneid == ALL_ZONES) { 8540 freemsg(mp); 8541 return (NULL); 8542 } 8543 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8544 (uint32_t)((char *)&ip6h->ip6_plen - 8545 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8546 return (NULL); 8547 } 8548 8549 v6src_ptr = &ip6h->ip6_src; 8550 v6dst_ptr = &ip6h->ip6_dst; 8551 end = remlen; 8552 8553 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8554 end += offset; 8555 8556 /* 8557 * Would fragment cause reassembled packet to have a payload length 8558 * greater than IP_MAXPACKET - the max payload size? 8559 */ 8560 if (end > IP_MAXPACKET) { 8561 zoneid_t zoneid; 8562 8563 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8564 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8565 if (zoneid == ALL_ZONES) { 8566 freemsg(mp); 8567 return (NULL); 8568 } 8569 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8570 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8571 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8572 return (NULL); 8573 } 8574 8575 /* 8576 * This packet just has one fragment. Reassembly not 8577 * needed. 8578 */ 8579 if (!more_frags && offset == 0) { 8580 goto reass_done; 8581 } 8582 8583 /* 8584 * Drop the fragmented as early as possible, if 8585 * we don't have resource(s) to re-assemble. 8586 */ 8587 if (ipst->ips_ip_reass_queue_bytes == 0) { 8588 freemsg(mp); 8589 return (NULL); 8590 } 8591 8592 /* Record the ECN field info. */ 8593 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8594 /* 8595 * If this is not the first fragment, dump the unfragmentable 8596 * portion of the packet. 8597 */ 8598 if (offset) 8599 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8600 8601 /* 8602 * Fragmentation reassembly. Each ILL has a hash table for 8603 * queueing packets undergoing reassembly for all IPIFs 8604 * associated with the ILL. The hash is based on the packet 8605 * IP ident field. The ILL frag hash table was allocated 8606 * as a timer block at the time the ILL was created. Whenever 8607 * there is anything on the reassembly queue, the timer will 8608 * be running. 8609 */ 8610 msg_len = MBLKSIZE(mp); 8611 tail_mp = mp; 8612 while (tail_mp->b_cont != NULL) { 8613 tail_mp = tail_mp->b_cont; 8614 msg_len += MBLKSIZE(tail_mp); 8615 } 8616 /* 8617 * If the reassembly list for this ILL will get too big 8618 * prune it. 8619 */ 8620 8621 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8622 ipst->ips_ip_reass_queue_bytes) { 8623 ill_frag_prune(ill, 8624 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8625 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8626 pruned = B_TRUE; 8627 } 8628 8629 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8630 mutex_enter(&ipfb->ipfb_lock); 8631 8632 ipfp = &ipfb->ipfb_ipf; 8633 /* Try to find an existing fragment queue for this packet. */ 8634 for (;;) { 8635 ipf = ipfp[0]; 8636 if (ipf) { 8637 /* 8638 * It has to match on ident, source address, and 8639 * dest address. 8640 */ 8641 if (ipf->ipf_ident == ident && 8642 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8643 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8644 8645 /* 8646 * If we have received too many 8647 * duplicate fragments for this packet 8648 * free it. 8649 */ 8650 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8651 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8652 freemsg(mp); 8653 mutex_exit(&ipfb->ipfb_lock); 8654 return (NULL); 8655 } 8656 8657 break; 8658 } 8659 ipfp = &ipf->ipf_hash_next; 8660 continue; 8661 } 8662 8663 8664 /* 8665 * If we pruned the list, do we want to store this new 8666 * fragment?. We apply an optimization here based on the 8667 * fact that most fragments will be received in order. 8668 * So if the offset of this incoming fragment is zero, 8669 * it is the first fragment of a new packet. We will 8670 * keep it. Otherwise drop the fragment, as we have 8671 * probably pruned the packet already (since the 8672 * packet cannot be found). 8673 */ 8674 8675 if (pruned && offset != 0) { 8676 mutex_exit(&ipfb->ipfb_lock); 8677 freemsg(mp); 8678 return (NULL); 8679 } 8680 8681 /* New guy. Allocate a frag message. */ 8682 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8683 if (!mp1) { 8684 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8685 freemsg(mp); 8686 partial_reass_done: 8687 mutex_exit(&ipfb->ipfb_lock); 8688 return (NULL); 8689 } 8690 8691 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8692 /* 8693 * Too many fragmented packets in this hash bucket. 8694 * Free the oldest. 8695 */ 8696 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8697 } 8698 8699 mp1->b_cont = mp; 8700 8701 /* Initialize the fragment header. */ 8702 ipf = (ipf_t *)mp1->b_rptr; 8703 ipf->ipf_mp = mp1; 8704 ipf->ipf_ptphn = ipfp; 8705 ipfp[0] = ipf; 8706 ipf->ipf_hash_next = NULL; 8707 ipf->ipf_ident = ident; 8708 ipf->ipf_v6src = *v6src_ptr; 8709 ipf->ipf_v6dst = *v6dst_ptr; 8710 /* Record reassembly start time. */ 8711 ipf->ipf_timestamp = gethrestime_sec(); 8712 /* Record ipf generation and account for frag header */ 8713 ipf->ipf_gen = ill->ill_ipf_gen++; 8714 ipf->ipf_count = MBLKSIZE(mp1); 8715 ipf->ipf_protocol = nexthdr; 8716 ipf->ipf_nf_hdr_len = 0; 8717 ipf->ipf_prev_nexthdr_offset = 0; 8718 ipf->ipf_last_frag_seen = B_FALSE; 8719 ipf->ipf_ecn = ecn_info; 8720 ipf->ipf_num_dups = 0; 8721 ipfb->ipfb_frag_pkts++; 8722 ipf->ipf_checksum = 0; 8723 ipf->ipf_checksum_flags = 0; 8724 8725 /* Store checksum value in fragment header */ 8726 if (sum_flags != 0) { 8727 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8728 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8729 ipf->ipf_checksum = sum_val; 8730 ipf->ipf_checksum_flags = sum_flags; 8731 } 8732 8733 /* 8734 * We handle reassembly two ways. In the easy case, 8735 * where all the fragments show up in order, we do 8736 * minimal bookkeeping, and just clip new pieces on 8737 * the end. If we ever see a hole, then we go off 8738 * to ip_reassemble which has to mark the pieces and 8739 * keep track of the number of holes, etc. Obviously, 8740 * the point of having both mechanisms is so we can 8741 * handle the easy case as efficiently as possible. 8742 */ 8743 if (offset == 0) { 8744 /* Easy case, in-order reassembly so far. */ 8745 /* Update the byte count */ 8746 ipf->ipf_count += msg_len; 8747 ipf->ipf_tail_mp = tail_mp; 8748 /* 8749 * Keep track of next expected offset in 8750 * ipf_end. 8751 */ 8752 ipf->ipf_end = end; 8753 ipf->ipf_nf_hdr_len = hdr_length; 8754 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8755 } else { 8756 /* Hard case, hole at the beginning. */ 8757 ipf->ipf_tail_mp = NULL; 8758 /* 8759 * ipf_end == 0 means that we have given up 8760 * on easy reassembly. 8761 */ 8762 ipf->ipf_end = 0; 8763 8764 /* Forget checksum offload from now on */ 8765 ipf->ipf_checksum_flags = 0; 8766 8767 /* 8768 * ipf_hole_cnt is set by ip_reassemble. 8769 * ipf_count is updated by ip_reassemble. 8770 * No need to check for return value here 8771 * as we don't expect reassembly to complete or 8772 * fail for the first fragment itself. 8773 */ 8774 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8775 msg_len); 8776 } 8777 /* Update per ipfb and ill byte counts */ 8778 ipfb->ipfb_count += ipf->ipf_count; 8779 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8780 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8781 /* If the frag timer wasn't already going, start it. */ 8782 mutex_enter(&ill->ill_lock); 8783 ill_frag_timer_start(ill); 8784 mutex_exit(&ill->ill_lock); 8785 goto partial_reass_done; 8786 } 8787 8788 /* 8789 * If the packet's flag has changed (it could be coming up 8790 * from an interface different than the previous, therefore 8791 * possibly different checksum capability), then forget about 8792 * any stored checksum states. Otherwise add the value to 8793 * the existing one stored in the fragment header. 8794 */ 8795 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8796 sum_val += ipf->ipf_checksum; 8797 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8798 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8799 ipf->ipf_checksum = sum_val; 8800 } else if (ipf->ipf_checksum_flags != 0) { 8801 /* Forget checksum offload from now on */ 8802 ipf->ipf_checksum_flags = 0; 8803 } 8804 8805 /* 8806 * We have a new piece of a datagram which is already being 8807 * reassembled. Update the ECN info if all IP fragments 8808 * are ECN capable. If there is one which is not, clear 8809 * all the info. If there is at least one which has CE 8810 * code point, IP needs to report that up to transport. 8811 */ 8812 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8813 if (ecn_info == IPH_ECN_CE) 8814 ipf->ipf_ecn = IPH_ECN_CE; 8815 } else { 8816 ipf->ipf_ecn = IPH_ECN_NECT; 8817 } 8818 8819 if (offset && ipf->ipf_end == offset) { 8820 /* The new fragment fits at the end */ 8821 ipf->ipf_tail_mp->b_cont = mp; 8822 /* Update the byte count */ 8823 ipf->ipf_count += msg_len; 8824 /* Update per ipfb and ill byte counts */ 8825 ipfb->ipfb_count += msg_len; 8826 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8827 atomic_add_32(&ill->ill_frag_count, msg_len); 8828 if (more_frags) { 8829 /* More to come. */ 8830 ipf->ipf_end = end; 8831 ipf->ipf_tail_mp = tail_mp; 8832 goto partial_reass_done; 8833 } 8834 } else { 8835 /* 8836 * Go do the hard cases. 8837 * Call ip_reassemble(). 8838 */ 8839 int ret; 8840 8841 if (offset == 0) { 8842 if (ipf->ipf_prev_nexthdr_offset == 0) { 8843 ipf->ipf_nf_hdr_len = hdr_length; 8844 ipf->ipf_prev_nexthdr_offset = 8845 *prev_nexthdr_offset; 8846 } 8847 } 8848 /* Save current byte count */ 8849 count = ipf->ipf_count; 8850 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8851 8852 /* Count of bytes added and subtracted (freeb()ed) */ 8853 count = ipf->ipf_count - count; 8854 if (count) { 8855 /* Update per ipfb and ill byte counts */ 8856 ipfb->ipfb_count += count; 8857 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8858 atomic_add_32(&ill->ill_frag_count, count); 8859 } 8860 if (ret == IP_REASS_PARTIAL) { 8861 goto partial_reass_done; 8862 } else if (ret == IP_REASS_FAILED) { 8863 /* Reassembly failed. Free up all resources */ 8864 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8865 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8866 IP_REASS_SET_START(t_mp, 0); 8867 IP_REASS_SET_END(t_mp, 0); 8868 } 8869 freemsg(mp); 8870 goto partial_reass_done; 8871 } 8872 8873 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8874 } 8875 /* 8876 * We have completed reassembly. Unhook the frag header from 8877 * the reassembly list. 8878 * 8879 * Grab the unfragmentable header length next header value out 8880 * of the first fragment 8881 */ 8882 ASSERT(ipf->ipf_nf_hdr_len != 0); 8883 hdr_length = ipf->ipf_nf_hdr_len; 8884 8885 /* 8886 * Before we free the frag header, record the ECN info 8887 * to report back to the transport. 8888 */ 8889 ecn_info = ipf->ipf_ecn; 8890 8891 /* 8892 * Store the nextheader field in the header preceding the fragment 8893 * header 8894 */ 8895 nexthdr = ipf->ipf_protocol; 8896 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8897 ipfp = ipf->ipf_ptphn; 8898 8899 /* We need to supply these to caller */ 8900 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8901 sum_val = ipf->ipf_checksum; 8902 else 8903 sum_val = 0; 8904 8905 mp1 = ipf->ipf_mp; 8906 count = ipf->ipf_count; 8907 ipf = ipf->ipf_hash_next; 8908 if (ipf) 8909 ipf->ipf_ptphn = ipfp; 8910 ipfp[0] = ipf; 8911 atomic_add_32(&ill->ill_frag_count, -count); 8912 ASSERT(ipfb->ipfb_count >= count); 8913 ipfb->ipfb_count -= count; 8914 ipfb->ipfb_frag_pkts--; 8915 mutex_exit(&ipfb->ipfb_lock); 8916 /* Ditch the frag header. */ 8917 mp = mp1->b_cont; 8918 freeb(mp1); 8919 8920 /* 8921 * Make sure the packet is good by doing some sanity 8922 * check. If bad we can silentely drop the packet. 8923 */ 8924 reass_done: 8925 if (hdr_length < sizeof (ip6_frag_t)) { 8926 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8927 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8928 freemsg(mp); 8929 return (NULL); 8930 } 8931 8932 /* 8933 * Remove the fragment header from the initial header by 8934 * splitting the mblk into the non-fragmentable header and 8935 * everthing after the fragment extension header. This has the 8936 * side effect of putting all the headers that need destination 8937 * processing into the b_cont block-- on return this fact is 8938 * used in order to avoid having to look at the extensions 8939 * already processed. 8940 * 8941 * Note that this code assumes that the unfragmentable portion 8942 * of the header is in the first mblk and increments 8943 * the read pointer past it. If this assumption is broken 8944 * this code fails badly. 8945 */ 8946 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8947 mblk_t *nmp; 8948 8949 if (!(nmp = dupb(mp))) { 8950 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8951 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8952 freemsg(mp); 8953 return (NULL); 8954 } 8955 nmp->b_cont = mp->b_cont; 8956 mp->b_cont = nmp; 8957 nmp->b_rptr += hdr_length; 8958 } 8959 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8960 8961 ip6h = (ip6_t *)mp->b_rptr; 8962 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8963 8964 /* Restore original IP length in header. */ 8965 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8966 /* Record the ECN info. */ 8967 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8968 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8969 8970 /* Reassembly is successful; return checksum information if needed */ 8971 if (cksum_val != NULL) 8972 *cksum_val = sum_val; 8973 if (cksum_flags != NULL) 8974 *cksum_flags = sum_flags; 8975 8976 return (mp); 8977 } 8978 8979 /* 8980 * Walk through the options to see if there is a routing header. 8981 * If present get the destination which is the last address of 8982 * the option. 8983 */ 8984 in6_addr_t 8985 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8986 { 8987 uint8_t nexthdr; 8988 uint8_t *whereptr; 8989 ip6_hbh_t *hbhhdr; 8990 ip6_dest_t *dsthdr; 8991 ip6_rthdr0_t *rthdr; 8992 ip6_frag_t *fraghdr; 8993 int ehdrlen; 8994 int left; 8995 in6_addr_t *ap, rv; 8996 8997 if (is_fragment != NULL) 8998 *is_fragment = B_FALSE; 8999 9000 rv = ip6h->ip6_dst; 9001 9002 nexthdr = ip6h->ip6_nxt; 9003 whereptr = (uint8_t *)&ip6h[1]; 9004 for (;;) { 9005 9006 ASSERT(nexthdr != IPPROTO_RAW); 9007 switch (nexthdr) { 9008 case IPPROTO_HOPOPTS: 9009 hbhhdr = (ip6_hbh_t *)whereptr; 9010 nexthdr = hbhhdr->ip6h_nxt; 9011 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 9012 break; 9013 case IPPROTO_DSTOPTS: 9014 dsthdr = (ip6_dest_t *)whereptr; 9015 nexthdr = dsthdr->ip6d_nxt; 9016 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 9017 break; 9018 case IPPROTO_ROUTING: 9019 rthdr = (ip6_rthdr0_t *)whereptr; 9020 nexthdr = rthdr->ip6r0_nxt; 9021 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9022 9023 left = rthdr->ip6r0_segleft; 9024 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9025 rv = *(ap + left - 1); 9026 /* 9027 * If the caller doesn't care whether the packet 9028 * is a fragment or not, we can stop here since 9029 * we have our destination. 9030 */ 9031 if (is_fragment == NULL) 9032 goto done; 9033 break; 9034 case IPPROTO_FRAGMENT: 9035 fraghdr = (ip6_frag_t *)whereptr; 9036 nexthdr = fraghdr->ip6f_nxt; 9037 ehdrlen = sizeof (ip6_frag_t); 9038 if (is_fragment != NULL) 9039 *is_fragment = B_TRUE; 9040 goto done; 9041 default : 9042 goto done; 9043 } 9044 whereptr += ehdrlen; 9045 } 9046 9047 done: 9048 return (rv); 9049 } 9050 9051 /* 9052 * ip_source_routed_v6: 9053 * This function is called by redirect code in ip_rput_data_v6 to 9054 * know whether this packet is source routed through this node i.e 9055 * whether this node (router) is part of the journey. This 9056 * function is called under two cases : 9057 * 9058 * case 1 : Routing header was processed by this node and 9059 * ip_process_rthdr replaced ip6_dst with the next hop 9060 * and we are forwarding the packet to the next hop. 9061 * 9062 * case 2 : Routing header was not processed by this node and we 9063 * are just forwarding the packet. 9064 * 9065 * For case (1) we don't want to send redirects. For case(2) we 9066 * want to send redirects. 9067 */ 9068 static boolean_t 9069 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 9070 { 9071 uint8_t nexthdr; 9072 in6_addr_t *addrptr; 9073 ip6_rthdr0_t *rthdr; 9074 uint8_t numaddr; 9075 ip6_hbh_t *hbhhdr; 9076 uint_t ehdrlen; 9077 uint8_t *byteptr; 9078 9079 ip2dbg(("ip_source_routed_v6\n")); 9080 nexthdr = ip6h->ip6_nxt; 9081 ehdrlen = IPV6_HDR_LEN; 9082 9083 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9084 while (nexthdr == IPPROTO_HOPOPTS || 9085 nexthdr == IPPROTO_DSTOPTS) { 9086 byteptr = (uint8_t *)ip6h + ehdrlen; 9087 /* 9088 * Check if we have already processed 9089 * packets or we are just a forwarding 9090 * router which only pulled up msgs up 9091 * to IPV6HDR and one HBH ext header 9092 */ 9093 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9094 ip2dbg(("ip_source_routed_v6: Extension" 9095 " headers not processed\n")); 9096 return (B_FALSE); 9097 } 9098 hbhhdr = (ip6_hbh_t *)byteptr; 9099 nexthdr = hbhhdr->ip6h_nxt; 9100 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9101 } 9102 switch (nexthdr) { 9103 case IPPROTO_ROUTING: 9104 byteptr = (uint8_t *)ip6h + ehdrlen; 9105 /* 9106 * If for some reason, we haven't pulled up 9107 * the routing hdr data mblk, then we must 9108 * not have processed it at all. So for sure 9109 * we are not part of the source routed journey. 9110 */ 9111 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9112 ip2dbg(("ip_source_routed_v6: Routing" 9113 " header not processed\n")); 9114 return (B_FALSE); 9115 } 9116 rthdr = (ip6_rthdr0_t *)byteptr; 9117 /* 9118 * Either we are an intermediate router or the 9119 * last hop before destination and we have 9120 * already processed the routing header. 9121 * If segment_left is greater than or equal to zero, 9122 * then we must be the (numaddr - segleft) entry 9123 * of the routing header. Although ip6r0_segleft 9124 * is a unit8_t variable, we still check for zero 9125 * or greater value, if in case the data type 9126 * is changed someday in future. 9127 */ 9128 if (rthdr->ip6r0_segleft > 0 || 9129 rthdr->ip6r0_segleft == 0) { 9130 ire_t *ire = NULL; 9131 9132 numaddr = rthdr->ip6r0_len / 2; 9133 addrptr = (in6_addr_t *)((char *)rthdr + 9134 sizeof (*rthdr)); 9135 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9136 if (addrptr != NULL) { 9137 ire = ire_ctable_lookup_v6(addrptr, NULL, 9138 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9139 MATCH_IRE_TYPE, 9140 ipst); 9141 if (ire != NULL) { 9142 ire_refrele(ire); 9143 return (B_TRUE); 9144 } 9145 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9146 } 9147 } 9148 /* FALLTHRU */ 9149 default: 9150 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9151 return (B_FALSE); 9152 } 9153 } 9154 9155 /* 9156 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9157 * Assumes that the following set of headers appear in the first 9158 * mblk: 9159 * ip6i_t (if present) CAN also appear as a separate mblk. 9160 * ip6_t 9161 * Any extension headers 9162 * TCP/UDP/SCTP header (if present) 9163 * The routine can handle an ICMPv6 header that is not in the first mblk. 9164 * 9165 * The order to determine the outgoing interface is as follows: 9166 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9167 * 2. If conn_nofailover_ill is set then use that ill. 9168 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9169 * 4. If q is an ill queue and (link local or multicast destination) then 9170 * use that ill. 9171 * 5. If IPV6_BOUND_IF has been set use that ill. 9172 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9173 * look for the best IRE match for the unspecified group to determine 9174 * the ill. 9175 * 7. For unicast: Just do an IRE lookup for the best match. 9176 * 9177 * arg2 is always a queue_t *. 9178 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9179 * the zoneid. 9180 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9181 */ 9182 void 9183 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9184 { 9185 conn_t *connp = NULL; 9186 queue_t *q = (queue_t *)arg2; 9187 ire_t *ire = NULL; 9188 ire_t *sctp_ire = NULL; 9189 ip6_t *ip6h; 9190 in6_addr_t *v6dstp; 9191 ill_t *ill = NULL; 9192 ipif_t *ipif; 9193 ip6i_t *ip6i; 9194 int cksum_request; /* -1 => normal. */ 9195 /* 1 => Skip TCP/UDP/SCTP checksum */ 9196 /* Otherwise contains insert offset for checksum */ 9197 int unspec_src; 9198 boolean_t do_outrequests; /* Increment OutRequests? */ 9199 mib2_ipIfStatsEntry_t *mibptr; 9200 int match_flags = MATCH_IRE_ILL_GROUP; 9201 boolean_t attach_if = B_FALSE; 9202 mblk_t *first_mp; 9203 boolean_t mctl_present; 9204 ipsec_out_t *io; 9205 boolean_t drop_if_delayed = B_FALSE; 9206 boolean_t multirt_need_resolve = B_FALSE; 9207 mblk_t *copy_mp = NULL; 9208 int err = 0; 9209 int ip6i_flags = 0; 9210 zoneid_t zoneid; 9211 ill_t *saved_ill = NULL; 9212 boolean_t conn_lock_held; 9213 boolean_t need_decref = B_FALSE; 9214 ip_stack_t *ipst; 9215 9216 if (q->q_next != NULL) { 9217 ill = (ill_t *)q->q_ptr; 9218 ipst = ill->ill_ipst; 9219 } else { 9220 connp = (conn_t *)arg; 9221 ASSERT(connp != NULL); 9222 ipst = connp->conn_netstack->netstack_ip; 9223 } 9224 9225 /* 9226 * Highest bit in version field is Reachability Confirmation bit 9227 * used by NUD in ip_xmit_v6(). 9228 */ 9229 #ifdef _BIG_ENDIAN 9230 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9231 #else 9232 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9233 #endif 9234 9235 /* 9236 * M_CTL comes from 6 places 9237 * 9238 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9239 * both V4 and V6 datagrams. 9240 * 9241 * 2) AH/ESP sends down M_CTL after doing their job with both 9242 * V4 and V6 datagrams. 9243 * 9244 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9245 * attached. 9246 * 9247 * 4) Notifications from an external resolver (for XRESOLV ifs) 9248 * 9249 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9250 * IPsec hardware acceleration support. 9251 * 9252 * 6) TUN_HELLO. 9253 * 9254 * We need to handle (1)'s IPv6 case and (3) here. For the 9255 * IPv4 case in (1), and (2), IPSEC processing has already 9256 * started. The code in ip_wput() already knows how to handle 9257 * continuing IPSEC processing (for IPv4 and IPv6). All other 9258 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9259 * for handling. 9260 */ 9261 first_mp = mp; 9262 mctl_present = B_FALSE; 9263 io = NULL; 9264 9265 /* Multidata transmit? */ 9266 if (DB_TYPE(mp) == M_MULTIDATA) { 9267 /* 9268 * We should never get here, since all Multidata messages 9269 * originating from tcp should have been directed over to 9270 * tcp_multisend() in the first place. 9271 */ 9272 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9273 freemsg(mp); 9274 return; 9275 } else if (DB_TYPE(mp) == M_CTL) { 9276 uint32_t mctltype = 0; 9277 uint32_t mlen = MBLKL(first_mp); 9278 9279 mp = mp->b_cont; 9280 mctl_present = B_TRUE; 9281 io = (ipsec_out_t *)first_mp->b_rptr; 9282 9283 /* 9284 * Validate this M_CTL message. The only three types of 9285 * M_CTL messages we expect to see in this code path are 9286 * ipsec_out_t or ipsec_in_t structures (allocated as 9287 * ipsec_info_t unions), or ipsec_ctl_t structures. 9288 * The ipsec_out_type and ipsec_in_type overlap in the two 9289 * data structures, and they are either set to IPSEC_OUT 9290 * or IPSEC_IN depending on which data structure it is. 9291 * ipsec_ctl_t is an IPSEC_CTL. 9292 * 9293 * All other M_CTL messages are sent to ip_wput_nondata() 9294 * for handling. 9295 */ 9296 if (mlen >= sizeof (io->ipsec_out_type)) 9297 mctltype = io->ipsec_out_type; 9298 9299 if ((mlen == sizeof (ipsec_ctl_t)) && 9300 (mctltype == IPSEC_CTL)) { 9301 ip_output(arg, first_mp, arg2, caller); 9302 return; 9303 } 9304 9305 if ((mlen < sizeof (ipsec_info_t)) || 9306 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9307 mp == NULL) { 9308 ip_wput_nondata(NULL, q, first_mp, NULL); 9309 return; 9310 } 9311 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9312 if (q->q_next == NULL) { 9313 ip6h = (ip6_t *)mp->b_rptr; 9314 /* 9315 * For a freshly-generated TCP dgram that needs IPV6 9316 * processing, don't call ip_wput immediately. We can 9317 * tell this by the ipsec_out_proc_begin. In-progress 9318 * IPSEC_OUT messages have proc_begin set to TRUE, 9319 * and we want to send all IPSEC_IN messages to 9320 * ip_wput() for IPsec processing or finishing. 9321 */ 9322 if (mctltype == IPSEC_IN || 9323 IPVER(ip6h) != IPV6_VERSION || 9324 io->ipsec_out_proc_begin) { 9325 mibptr = &ipst->ips_ip6_mib; 9326 goto notv6; 9327 } 9328 } 9329 } else if (DB_TYPE(mp) != M_DATA) { 9330 ip_wput_nondata(NULL, q, mp, NULL); 9331 return; 9332 } 9333 9334 ip6h = (ip6_t *)mp->b_rptr; 9335 9336 if (IPVER(ip6h) != IPV6_VERSION) { 9337 mibptr = &ipst->ips_ip6_mib; 9338 goto notv6; 9339 } 9340 9341 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9342 (connp == NULL || !connp->conn_ulp_labeled)) { 9343 if (connp != NULL) { 9344 ASSERT(CONN_CRED(connp) != NULL); 9345 err = tsol_check_label_v6(BEST_CRED(mp, connp), 9346 &mp, connp->conn_mac_exempt, ipst); 9347 } else if (DB_CRED(mp) != NULL) { 9348 err = tsol_check_label_v6(DB_CRED(mp), 9349 &mp, B_FALSE, ipst); 9350 } 9351 if (mctl_present) 9352 first_mp->b_cont = mp; 9353 else 9354 first_mp = mp; 9355 if (err != 0) { 9356 DTRACE_PROBE3( 9357 tsol_ip_log_drop_checklabel_ip6, char *, 9358 "conn(1), failed to check/update mp(2)", 9359 conn_t, connp, mblk_t, mp); 9360 freemsg(first_mp); 9361 return; 9362 } 9363 ip6h = (ip6_t *)mp->b_rptr; 9364 } 9365 if (q->q_next != NULL) { 9366 /* 9367 * We don't know if this ill will be used for IPv6 9368 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9369 * ipif_set_values() sets the ill_isv6 flag to true if 9370 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9371 * just drop the packet. 9372 */ 9373 if (!ill->ill_isv6) { 9374 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9375 "ILLF_IPV6 was set\n")); 9376 freemsg(first_mp); 9377 return; 9378 } 9379 /* For uniformity do a refhold */ 9380 mutex_enter(&ill->ill_lock); 9381 if (!ILL_CAN_LOOKUP(ill)) { 9382 mutex_exit(&ill->ill_lock); 9383 freemsg(first_mp); 9384 return; 9385 } 9386 ill_refhold_locked(ill); 9387 mutex_exit(&ill->ill_lock); 9388 mibptr = ill->ill_ip_mib; 9389 9390 ASSERT(mibptr != NULL); 9391 unspec_src = 0; 9392 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9393 do_outrequests = B_FALSE; 9394 zoneid = (zoneid_t)(uintptr_t)arg; 9395 } else { 9396 ASSERT(connp != NULL); 9397 zoneid = connp->conn_zoneid; 9398 9399 /* is queue flow controlled? */ 9400 if ((q->q_first || connp->conn_draining) && 9401 (caller == IP_WPUT)) { 9402 /* 9403 * 1) TCP sends down M_CTL for detached connections. 9404 * 2) AH/ESP sends down M_CTL. 9405 * 9406 * We don't flow control either of the above. Only 9407 * UDP and others are flow controlled for which we 9408 * can't have a M_CTL. 9409 */ 9410 ASSERT(first_mp == mp); 9411 (void) putq(q, mp); 9412 return; 9413 } 9414 mibptr = &ipst->ips_ip6_mib; 9415 unspec_src = connp->conn_unspec_src; 9416 do_outrequests = B_TRUE; 9417 if (mp->b_flag & MSGHASREF) { 9418 mp->b_flag &= ~MSGHASREF; 9419 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9420 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9421 need_decref = B_TRUE; 9422 } 9423 9424 /* 9425 * If there is a policy, try to attach an ipsec_out in 9426 * the front. At the end, first_mp either points to a 9427 * M_DATA message or IPSEC_OUT message linked to a 9428 * M_DATA message. We have to do it now as we might 9429 * lose the "conn" if we go through ip_newroute. 9430 */ 9431 if (!mctl_present && 9432 (connp->conn_out_enforce_policy || 9433 connp->conn_latch != NULL)) { 9434 ASSERT(first_mp == mp); 9435 /* XXX Any better way to get the protocol fast ? */ 9436 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9437 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9438 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9439 if (need_decref) 9440 CONN_DEC_REF(connp); 9441 return; 9442 } else { 9443 ASSERT(mp->b_datap->db_type == M_CTL); 9444 first_mp = mp; 9445 mp = mp->b_cont; 9446 mctl_present = B_TRUE; 9447 io = (ipsec_out_t *)first_mp->b_rptr; 9448 } 9449 } 9450 } 9451 9452 /* check for alignment and full IPv6 header */ 9453 if (!OK_32PTR((uchar_t *)ip6h) || 9454 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9455 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9456 if (do_outrequests) 9457 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9458 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9459 freemsg(first_mp); 9460 if (ill != NULL) 9461 ill_refrele(ill); 9462 if (need_decref) 9463 CONN_DEC_REF(connp); 9464 return; 9465 } 9466 v6dstp = &ip6h->ip6_dst; 9467 cksum_request = -1; 9468 ip6i = NULL; 9469 9470 /* 9471 * Once neighbor discovery has completed, ndp_process() will provide 9472 * locally generated packets for which processing can be reattempted. 9473 * In these cases, connp is NULL and the original zone is part of a 9474 * prepended ipsec_out_t. 9475 */ 9476 if (io != NULL) { 9477 /* 9478 * When coming from icmp_input_v6, the zoneid might not match 9479 * for the loopback case, because inside icmp_input_v6 the 9480 * queue_t is a conn queue from the sending side. 9481 */ 9482 zoneid = io->ipsec_out_zoneid; 9483 ASSERT(zoneid != ALL_ZONES); 9484 } 9485 9486 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9487 /* 9488 * This is an ip6i_t header followed by an ip6_hdr. 9489 * Check which fields are set. 9490 * 9491 * When the packet comes from a transport we should have 9492 * all needed headers in the first mblk. However, when 9493 * going through ip_newroute*_v6 the ip6i might be in 9494 * a separate mblk when we return here. In that case 9495 * we pullup everything to ensure that extension and transport 9496 * headers "stay" in the first mblk. 9497 */ 9498 ip6i = (ip6i_t *)ip6h; 9499 ip6i_flags = ip6i->ip6i_flags; 9500 9501 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9502 ((mp->b_wptr - (uchar_t *)ip6i) >= 9503 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9504 9505 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9506 if (!pullupmsg(mp, -1)) { 9507 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9508 if (do_outrequests) { 9509 BUMP_MIB(mibptr, 9510 ipIfStatsHCOutRequests); 9511 } 9512 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9513 freemsg(first_mp); 9514 if (ill != NULL) 9515 ill_refrele(ill); 9516 if (need_decref) 9517 CONN_DEC_REF(connp); 9518 return; 9519 } 9520 ip6h = (ip6_t *)mp->b_rptr; 9521 v6dstp = &ip6h->ip6_dst; 9522 ip6i = (ip6i_t *)ip6h; 9523 } 9524 ip6h = (ip6_t *)&ip6i[1]; 9525 9526 /* 9527 * Advance rptr past the ip6i_t to get ready for 9528 * transmitting the packet. However, if the packet gets 9529 * passed to ip_newroute*_v6 then rptr is moved back so 9530 * that the ip6i_t header can be inspected when the 9531 * packet comes back here after passing through 9532 * ire_add_then_send. 9533 */ 9534 mp->b_rptr = (uchar_t *)ip6h; 9535 9536 /* 9537 * IP6I_ATTACH_IF is set in this function when we had a 9538 * conn and it was either bound to the IPFF_NOFAILOVER address 9539 * or IPV6_BOUND_PIF was set. These options override other 9540 * options that set the ifindex. We come here with 9541 * IP6I_ATTACH_IF set when we can't find the ire and 9542 * ip_newroute_v6 is feeding the packet for second time. 9543 */ 9544 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9545 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9546 ASSERT(ip6i->ip6i_ifindex != 0); 9547 if (ill != NULL) 9548 ill_refrele(ill); 9549 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9550 NULL, NULL, NULL, NULL, ipst); 9551 if (ill == NULL) { 9552 if (do_outrequests) { 9553 BUMP_MIB(mibptr, 9554 ipIfStatsHCOutRequests); 9555 } 9556 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9557 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9558 ip6i->ip6i_ifindex)); 9559 if (need_decref) 9560 CONN_DEC_REF(connp); 9561 freemsg(first_mp); 9562 return; 9563 } 9564 mibptr = ill->ill_ip_mib; 9565 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9566 /* 9567 * Preserve the index so that when we return 9568 * from IPSEC processing, we know where to 9569 * send the packet. 9570 */ 9571 if (mctl_present) { 9572 ASSERT(io != NULL); 9573 io->ipsec_out_ill_index = 9574 ip6i->ip6i_ifindex; 9575 } 9576 } 9577 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9578 /* 9579 * This is a multipathing probe packet that has 9580 * been delayed in ND resolution. Drop the 9581 * packet for the reasons mentioned in 9582 * nce_queue_mp() 9583 */ 9584 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9585 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9586 freemsg(first_mp); 9587 ill_refrele(ill); 9588 if (need_decref) 9589 CONN_DEC_REF(connp); 9590 return; 9591 } 9592 } 9593 } 9594 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9595 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9596 9597 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9598 if (secpolicy_net_rawaccess(cr) != 0) { 9599 /* 9600 * Use IPCL_ZONEID to honor SO_ALLZONES. 9601 */ 9602 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9603 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9604 NULL, connp != NULL ? 9605 IPCL_ZONEID(connp) : zoneid, NULL, 9606 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9607 if (ire == NULL) { 9608 if (do_outrequests) 9609 BUMP_MIB(mibptr, 9610 ipIfStatsHCOutRequests); 9611 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9612 ip1dbg(("ip_wput_v6: bad source " 9613 "addr\n")); 9614 freemsg(first_mp); 9615 if (ill != NULL) 9616 ill_refrele(ill); 9617 if (need_decref) 9618 CONN_DEC_REF(connp); 9619 return; 9620 } 9621 ire_refrele(ire); 9622 } 9623 /* No need to verify again when using ip_newroute */ 9624 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9625 } 9626 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9627 /* 9628 * Make sure they match since ip_newroute*_v6 etc might 9629 * (unknown to them) inspect ip6i_nexthop when 9630 * they think they access ip6_dst. 9631 */ 9632 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9633 } 9634 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9635 cksum_request = 1; 9636 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9637 cksum_request = ip6i->ip6i_checksum_off; 9638 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9639 unspec_src = 1; 9640 9641 if (do_outrequests && ill != NULL) { 9642 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9643 do_outrequests = B_FALSE; 9644 } 9645 /* 9646 * Store ip6i_t info that we need after we come back 9647 * from IPSEC processing. 9648 */ 9649 if (mctl_present) { 9650 ASSERT(io != NULL); 9651 io->ipsec_out_unspec_src = unspec_src; 9652 } 9653 } 9654 if (connp != NULL && connp->conn_dontroute) 9655 ip6h->ip6_hops = 1; 9656 9657 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9658 goto ipv6multicast; 9659 9660 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9661 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9662 ill_t *conn_outgoing_pill; 9663 9664 conn_outgoing_pill = conn_get_held_ill(connp, 9665 &connp->conn_outgoing_pill, &err); 9666 if (err == ILL_LOOKUP_FAILED) { 9667 if (ill != NULL) 9668 ill_refrele(ill); 9669 if (need_decref) 9670 CONN_DEC_REF(connp); 9671 freemsg(first_mp); 9672 return; 9673 } 9674 if (conn_outgoing_pill != NULL) { 9675 if (ill != NULL) 9676 ill_refrele(ill); 9677 ill = conn_outgoing_pill; 9678 attach_if = B_TRUE; 9679 match_flags = MATCH_IRE_ILL; 9680 mibptr = ill->ill_ip_mib; 9681 9682 /* 9683 * Check if we need an ire that will not be 9684 * looked up by anybody else i.e. HIDDEN. 9685 */ 9686 if (ill_is_probeonly(ill)) 9687 match_flags |= MATCH_IRE_MARK_HIDDEN; 9688 goto send_from_ill; 9689 } 9690 } 9691 9692 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9693 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9694 ill_t *conn_nofailover_ill; 9695 9696 conn_nofailover_ill = conn_get_held_ill(connp, 9697 &connp->conn_nofailover_ill, &err); 9698 if (err == ILL_LOOKUP_FAILED) { 9699 if (ill != NULL) 9700 ill_refrele(ill); 9701 if (need_decref) 9702 CONN_DEC_REF(connp); 9703 freemsg(first_mp); 9704 return; 9705 } 9706 if (conn_nofailover_ill != NULL) { 9707 if (ill != NULL) 9708 ill_refrele(ill); 9709 ill = conn_nofailover_ill; 9710 attach_if = B_TRUE; 9711 /* 9712 * Assumes that ipc_nofailover_ill is used only for 9713 * multipathing probe packets. These packets are better 9714 * dropped, if they are delayed in ND resolution, for 9715 * the reasons described in nce_queue_mp(). 9716 * IP6I_DROP_IFDELAYED will be set later on in this 9717 * function for this packet. 9718 */ 9719 drop_if_delayed = B_TRUE; 9720 match_flags = MATCH_IRE_ILL; 9721 mibptr = ill->ill_ip_mib; 9722 9723 /* 9724 * Check if we need an ire that will not be 9725 * looked up by anybody else i.e. HIDDEN. 9726 */ 9727 if (ill_is_probeonly(ill)) 9728 match_flags |= MATCH_IRE_MARK_HIDDEN; 9729 goto send_from_ill; 9730 } 9731 } 9732 9733 /* 9734 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9735 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9736 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9737 */ 9738 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9739 ASSERT(ip6i->ip6i_ifindex != 0); 9740 attach_if = B_TRUE; 9741 ASSERT(ill != NULL); 9742 match_flags = MATCH_IRE_ILL; 9743 9744 /* 9745 * Check if we need an ire that will not be 9746 * looked up by anybody else i.e. HIDDEN. 9747 */ 9748 if (ill_is_probeonly(ill)) 9749 match_flags |= MATCH_IRE_MARK_HIDDEN; 9750 goto send_from_ill; 9751 } 9752 9753 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9754 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9755 ASSERT(ill != NULL); 9756 goto send_from_ill; 9757 } 9758 9759 /* 9760 * 4. If q is an ill queue and (link local or multicast destination) 9761 * then use that ill. 9762 */ 9763 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9764 goto send_from_ill; 9765 } 9766 9767 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9768 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9769 ill_t *conn_outgoing_ill; 9770 9771 conn_outgoing_ill = conn_get_held_ill(connp, 9772 &connp->conn_outgoing_ill, &err); 9773 if (err == ILL_LOOKUP_FAILED) { 9774 if (ill != NULL) 9775 ill_refrele(ill); 9776 if (need_decref) 9777 CONN_DEC_REF(connp); 9778 freemsg(first_mp); 9779 return; 9780 } 9781 if (ill != NULL) 9782 ill_refrele(ill); 9783 ill = conn_outgoing_ill; 9784 mibptr = ill->ill_ip_mib; 9785 goto send_from_ill; 9786 } 9787 9788 /* 9789 * 6. For unicast: Just do an IRE lookup for the best match. 9790 * If we get here for a link-local address it is rather random 9791 * what interface we pick on a multihomed host. 9792 * *If* there is an IRE_CACHE (and the link-local address 9793 * isn't duplicated on multi links) this will find the IRE_CACHE. 9794 * Otherwise it will use one of the matching IRE_INTERFACE routes 9795 * for the link-local prefix. Hence, applications 9796 * *should* be encouraged to specify an outgoing interface when sending 9797 * to a link local address. 9798 */ 9799 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9800 !connp->conn_fully_bound)) { 9801 /* 9802 * We cache IRE_CACHEs to avoid lookups. We don't do 9803 * this for the tcp global queue and listen end point 9804 * as it does not really have a real destination to 9805 * talk to. 9806 */ 9807 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp), 9808 ipst); 9809 } else { 9810 /* 9811 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9812 * grab a lock here to check for CONDEMNED as it is okay 9813 * to send a packet or two with the IRE_CACHE that is going 9814 * away. 9815 */ 9816 mutex_enter(&connp->conn_lock); 9817 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9818 if (ire != NULL && 9819 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9820 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9821 9822 IRE_REFHOLD(ire); 9823 mutex_exit(&connp->conn_lock); 9824 9825 } else { 9826 boolean_t cached = B_FALSE; 9827 9828 connp->conn_ire_cache = NULL; 9829 mutex_exit(&connp->conn_lock); 9830 /* Release the old ire */ 9831 if (ire != NULL && sctp_ire == NULL) 9832 IRE_REFRELE_NOTR(ire); 9833 9834 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9835 MBLK_GETLABEL(mp), ipst); 9836 if (ire != NULL) { 9837 IRE_REFHOLD_NOTR(ire); 9838 9839 mutex_enter(&connp->conn_lock); 9840 if (CONN_CACHE_IRE(connp) && 9841 (connp->conn_ire_cache == NULL)) { 9842 rw_enter(&ire->ire_bucket->irb_lock, 9843 RW_READER); 9844 if (!(ire->ire_marks & 9845 IRE_MARK_CONDEMNED)) { 9846 connp->conn_ire_cache = ire; 9847 cached = B_TRUE; 9848 } 9849 rw_exit(&ire->ire_bucket->irb_lock); 9850 } 9851 mutex_exit(&connp->conn_lock); 9852 9853 /* 9854 * We can continue to use the ire but since it 9855 * was not cached, we should drop the extra 9856 * reference. 9857 */ 9858 if (!cached) 9859 IRE_REFRELE_NOTR(ire); 9860 } 9861 } 9862 } 9863 9864 if (ire != NULL) { 9865 if (do_outrequests) { 9866 /* Handle IRE_LOCAL's that might appear here */ 9867 if (ire->ire_type == IRE_CACHE) { 9868 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9869 ill_ip_mib; 9870 } else { 9871 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9872 } 9873 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9874 } 9875 ASSERT(!attach_if); 9876 9877 /* 9878 * Check if the ire has the RTF_MULTIRT flag, inherited 9879 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9880 */ 9881 if (ire->ire_flags & RTF_MULTIRT) { 9882 /* 9883 * Force hop limit of multirouted packets if required. 9884 * The hop limit of such packets is bounded by the 9885 * ip_multirt_ttl ndd variable. 9886 * NDP packets must have a hop limit of 255; don't 9887 * change the hop limit in that case. 9888 */ 9889 if ((ipst->ips_ip_multirt_ttl > 0) && 9890 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9891 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9892 if (ip_debug > 3) { 9893 ip2dbg(("ip_wput_v6: forcing multirt " 9894 "hop limit to %d (was %d) ", 9895 ipst->ips_ip_multirt_ttl, 9896 ip6h->ip6_hops)); 9897 pr_addr_dbg("v6dst %s\n", AF_INET6, 9898 &ire->ire_addr_v6); 9899 } 9900 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9901 } 9902 9903 /* 9904 * We look at this point if there are pending 9905 * unresolved routes. ire_multirt_need_resolve_v6() 9906 * checks in O(n) that all IRE_OFFSUBNET ire 9907 * entries for the packet's destination and 9908 * flagged RTF_MULTIRT are currently resolved. 9909 * If some remain unresolved, we do a copy 9910 * of the current message. It will be used 9911 * to initiate additional route resolutions. 9912 */ 9913 multirt_need_resolve = 9914 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9915 MBLK_GETLABEL(first_mp), ipst); 9916 ip2dbg(("ip_wput_v6: ire %p, " 9917 "multirt_need_resolve %d, first_mp %p\n", 9918 (void *)ire, multirt_need_resolve, 9919 (void *)first_mp)); 9920 if (multirt_need_resolve) { 9921 copy_mp = copymsg(first_mp); 9922 if (copy_mp != NULL) { 9923 MULTIRT_DEBUG_TAG(copy_mp); 9924 } 9925 } 9926 } 9927 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9928 connp, caller, 0, ip6i_flags, zoneid); 9929 if (need_decref) { 9930 CONN_DEC_REF(connp); 9931 connp = NULL; 9932 } 9933 IRE_REFRELE(ire); 9934 9935 /* 9936 * Try to resolve another multiroute if 9937 * ire_multirt_need_resolve_v6() deemed it necessary. 9938 * copy_mp will be consumed (sent or freed) by 9939 * ip_newroute_v6(). 9940 */ 9941 if (copy_mp != NULL) { 9942 if (mctl_present) { 9943 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9944 } else { 9945 ip6h = (ip6_t *)copy_mp->b_rptr; 9946 } 9947 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9948 &ip6h->ip6_src, NULL, zoneid, ipst); 9949 } 9950 if (ill != NULL) 9951 ill_refrele(ill); 9952 return; 9953 } 9954 9955 /* 9956 * No full IRE for this destination. Send it to 9957 * ip_newroute_v6 to see if anything else matches. 9958 * Mark this packet as having originated on this 9959 * machine. 9960 * Update rptr if there was an ip6i_t header. 9961 */ 9962 mp->b_prev = NULL; 9963 mp->b_next = NULL; 9964 if (ip6i != NULL) 9965 mp->b_rptr -= sizeof (ip6i_t); 9966 9967 if (unspec_src) { 9968 if (ip6i == NULL) { 9969 /* 9970 * Add ip6i_t header to carry unspec_src 9971 * until the packet comes back in ip_wput_v6. 9972 */ 9973 mp = ip_add_info_v6(mp, NULL, v6dstp); 9974 if (mp == NULL) { 9975 if (do_outrequests) 9976 BUMP_MIB(mibptr, 9977 ipIfStatsHCOutRequests); 9978 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9979 if (mctl_present) 9980 freeb(first_mp); 9981 if (ill != NULL) 9982 ill_refrele(ill); 9983 if (need_decref) 9984 CONN_DEC_REF(connp); 9985 return; 9986 } 9987 ip6i = (ip6i_t *)mp->b_rptr; 9988 9989 if (mctl_present) { 9990 ASSERT(first_mp != mp); 9991 first_mp->b_cont = mp; 9992 } else { 9993 first_mp = mp; 9994 } 9995 9996 if ((mp->b_wptr - (uchar_t *)ip6i) == 9997 sizeof (ip6i_t)) { 9998 /* 9999 * ndp_resolver called from ip_newroute_v6 10000 * expects pulled up message. 10001 */ 10002 if (!pullupmsg(mp, -1)) { 10003 ip1dbg(("ip_wput_v6: pullupmsg" 10004 " failed\n")); 10005 if (do_outrequests) { 10006 BUMP_MIB(mibptr, 10007 ipIfStatsHCOutRequests); 10008 } 10009 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10010 freemsg(first_mp); 10011 if (ill != NULL) 10012 ill_refrele(ill); 10013 if (need_decref) 10014 CONN_DEC_REF(connp); 10015 return; 10016 } 10017 ip6i = (ip6i_t *)mp->b_rptr; 10018 } 10019 ip6h = (ip6_t *)&ip6i[1]; 10020 v6dstp = &ip6h->ip6_dst; 10021 } 10022 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10023 if (mctl_present) { 10024 ASSERT(io != NULL); 10025 io->ipsec_out_unspec_src = unspec_src; 10026 } 10027 } 10028 if (do_outrequests) 10029 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10030 if (need_decref) 10031 CONN_DEC_REF(connp); 10032 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 10033 if (ill != NULL) 10034 ill_refrele(ill); 10035 return; 10036 10037 10038 /* 10039 * Handle multicast packets with or without an conn. 10040 * Assumes that the transports set ip6_hops taking 10041 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10042 * into account. 10043 */ 10044 ipv6multicast: 10045 ip2dbg(("ip_wput_v6: multicast\n")); 10046 10047 /* 10048 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10049 * 2. If conn_nofailover_ill is set then use that ill. 10050 * 10051 * Hold the conn_lock till we refhold the ill of interest that is 10052 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10053 * while holding any locks, postpone the refrele until after the 10054 * conn_lock is dropped. 10055 */ 10056 if (connp != NULL) { 10057 mutex_enter(&connp->conn_lock); 10058 conn_lock_held = B_TRUE; 10059 } else { 10060 conn_lock_held = B_FALSE; 10061 } 10062 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10063 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10064 if (err == ILL_LOOKUP_FAILED) { 10065 ip1dbg(("ip_output_v6: multicast" 10066 " conn_outgoing_pill no ipif\n")); 10067 multicast_discard: 10068 ASSERT(saved_ill == NULL); 10069 if (conn_lock_held) 10070 mutex_exit(&connp->conn_lock); 10071 if (ill != NULL) 10072 ill_refrele(ill); 10073 freemsg(first_mp); 10074 if (do_outrequests) 10075 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10076 if (need_decref) 10077 CONN_DEC_REF(connp); 10078 return; 10079 } 10080 saved_ill = ill; 10081 ill = connp->conn_outgoing_pill; 10082 attach_if = B_TRUE; 10083 match_flags = MATCH_IRE_ILL; 10084 mibptr = ill->ill_ip_mib; 10085 10086 /* 10087 * Check if we need an ire that will not be 10088 * looked up by anybody else i.e. HIDDEN. 10089 */ 10090 if (ill_is_probeonly(ill)) 10091 match_flags |= MATCH_IRE_MARK_HIDDEN; 10092 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10093 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10094 if (err == ILL_LOOKUP_FAILED) { 10095 ip1dbg(("ip_output_v6: multicast" 10096 " conn_nofailover_ill no ipif\n")); 10097 goto multicast_discard; 10098 } 10099 saved_ill = ill; 10100 ill = connp->conn_nofailover_ill; 10101 attach_if = B_TRUE; 10102 match_flags = MATCH_IRE_ILL; 10103 10104 /* 10105 * Check if we need an ire that will not be 10106 * looked up by anybody else i.e. HIDDEN. 10107 */ 10108 if (ill_is_probeonly(ill)) 10109 match_flags |= MATCH_IRE_MARK_HIDDEN; 10110 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10111 /* 10112 * Redo 1. If we did not find an IRE_CACHE the first time, 10113 * we should have an ip6i_t with IP6I_ATTACH_IF if 10114 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10115 * used on this endpoint. 10116 */ 10117 ASSERT(ip6i->ip6i_ifindex != 0); 10118 attach_if = B_TRUE; 10119 ASSERT(ill != NULL); 10120 match_flags = MATCH_IRE_ILL; 10121 10122 /* 10123 * Check if we need an ire that will not be 10124 * looked up by anybody else i.e. HIDDEN. 10125 */ 10126 if (ill_is_probeonly(ill)) 10127 match_flags |= MATCH_IRE_MARK_HIDDEN; 10128 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10129 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10130 10131 ASSERT(ill != NULL); 10132 } else if (ill != NULL) { 10133 /* 10134 * 4. If q is an ill queue and (link local or multicast 10135 * destination) then use that ill. 10136 * We don't need the ipif initialization here. 10137 * This useless assert below is just to prevent lint from 10138 * reporting a null body if statement. 10139 */ 10140 ASSERT(ill != NULL); 10141 } else if (connp != NULL) { 10142 /* 10143 * 5. If IPV6_BOUND_IF has been set use that ill. 10144 * 10145 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10146 * Otherwise look for the best IRE match for the unspecified 10147 * group to determine the ill. 10148 * 10149 * conn_multicast_ill is used for only IPv6 packets. 10150 * conn_multicast_ipif is used for only IPv4 packets. 10151 * Thus a PF_INET6 socket send both IPv4 and IPv6 10152 * multicast packets using different IP*_MULTICAST_IF 10153 * interfaces. 10154 */ 10155 if (connp->conn_outgoing_ill != NULL) { 10156 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10157 if (err == ILL_LOOKUP_FAILED) { 10158 ip1dbg(("ip_output_v6: multicast" 10159 " conn_outgoing_ill no ipif\n")); 10160 goto multicast_discard; 10161 } 10162 ill = connp->conn_outgoing_ill; 10163 } else if (connp->conn_multicast_ill != NULL) { 10164 err = ill_check_and_refhold(connp->conn_multicast_ill); 10165 if (err == ILL_LOOKUP_FAILED) { 10166 ip1dbg(("ip_output_v6: multicast" 10167 " conn_multicast_ill no ipif\n")); 10168 goto multicast_discard; 10169 } 10170 ill = connp->conn_multicast_ill; 10171 } else { 10172 mutex_exit(&connp->conn_lock); 10173 conn_lock_held = B_FALSE; 10174 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 10175 if (ipif == NULL) { 10176 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10177 goto multicast_discard; 10178 } 10179 /* 10180 * We have a ref to this ipif, so we can safely 10181 * access ipif_ill. 10182 */ 10183 ill = ipif->ipif_ill; 10184 mutex_enter(&ill->ill_lock); 10185 if (!ILL_CAN_LOOKUP(ill)) { 10186 mutex_exit(&ill->ill_lock); 10187 ipif_refrele(ipif); 10188 ill = NULL; 10189 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10190 goto multicast_discard; 10191 } 10192 ill_refhold_locked(ill); 10193 mutex_exit(&ill->ill_lock); 10194 ipif_refrele(ipif); 10195 /* 10196 * Save binding until IPV6_MULTICAST_IF 10197 * changes it 10198 */ 10199 mutex_enter(&connp->conn_lock); 10200 connp->conn_multicast_ill = ill; 10201 connp->conn_orig_multicast_ifindex = 10202 ill->ill_phyint->phyint_ifindex; 10203 mutex_exit(&connp->conn_lock); 10204 } 10205 } 10206 if (conn_lock_held) 10207 mutex_exit(&connp->conn_lock); 10208 10209 if (saved_ill != NULL) 10210 ill_refrele(saved_ill); 10211 10212 ASSERT(ill != NULL); 10213 /* 10214 * For multicast loopback interfaces replace the multicast address 10215 * with a unicast address for the ire lookup. 10216 */ 10217 if (IS_LOOPBACK(ill)) 10218 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10219 10220 mibptr = ill->ill_ip_mib; 10221 if (do_outrequests) { 10222 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10223 do_outrequests = B_FALSE; 10224 } 10225 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10226 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10227 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10228 10229 /* 10230 * As we may lose the conn by the time we reach ip_wput_ire_v6 10231 * we copy conn_multicast_loop and conn_dontroute on to an 10232 * ipsec_out. In case if this datagram goes out secure, 10233 * we need the ill_index also. Copy that also into the 10234 * ipsec_out. 10235 */ 10236 if (mctl_present) { 10237 io = (ipsec_out_t *)first_mp->b_rptr; 10238 ASSERT(first_mp->b_datap->db_type == M_CTL); 10239 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10240 } else { 10241 ASSERT(mp == first_mp); 10242 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 10243 NULL) { 10244 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10245 freemsg(mp); 10246 if (ill != NULL) 10247 ill_refrele(ill); 10248 if (need_decref) 10249 CONN_DEC_REF(connp); 10250 return; 10251 } 10252 io = (ipsec_out_t *)first_mp->b_rptr; 10253 /* This is not a secure packet */ 10254 io->ipsec_out_secure = B_FALSE; 10255 io->ipsec_out_use_global_policy = B_TRUE; 10256 io->ipsec_out_zoneid = 10257 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10258 first_mp->b_cont = mp; 10259 mctl_present = B_TRUE; 10260 } 10261 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10262 io->ipsec_out_unspec_src = unspec_src; 10263 if (connp != NULL) 10264 io->ipsec_out_dontroute = connp->conn_dontroute; 10265 10266 send_from_ill: 10267 ASSERT(ill != NULL); 10268 ASSERT(mibptr == ill->ill_ip_mib); 10269 if (do_outrequests) { 10270 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10271 do_outrequests = B_FALSE; 10272 } 10273 10274 if (io != NULL) 10275 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10276 10277 /* 10278 * When a specific ill is specified (using IPV6_PKTINFO, 10279 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10280 * on routing entries (ftable and ctable) that have a matching 10281 * ire->ire_ipif->ipif_ill. Thus this can only be used 10282 * for destinations that are on-link for the specific ill 10283 * and that can appear on multiple links. Thus it is useful 10284 * for multicast destinations, link-local destinations, and 10285 * at some point perhaps for site-local destinations (if the 10286 * node sits at a site boundary). 10287 * We create the cache entries in the regular ctable since 10288 * it can not "confuse" things for other destinations. 10289 * table. 10290 * 10291 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10292 * It is used only when ire_cache_lookup is used above. 10293 */ 10294 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10295 zoneid, MBLK_GETLABEL(mp), match_flags, ipst); 10296 if (ire != NULL) { 10297 /* 10298 * Check if the ire has the RTF_MULTIRT flag, inherited 10299 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10300 */ 10301 if (ire->ire_flags & RTF_MULTIRT) { 10302 /* 10303 * Force hop limit of multirouted packets if required. 10304 * The hop limit of such packets is bounded by the 10305 * ip_multirt_ttl ndd variable. 10306 * NDP packets must have a hop limit of 255; don't 10307 * change the hop limit in that case. 10308 */ 10309 if ((ipst->ips_ip_multirt_ttl > 0) && 10310 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10311 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10312 if (ip_debug > 3) { 10313 ip2dbg(("ip_wput_v6: forcing multirt " 10314 "hop limit to %d (was %d) ", 10315 ipst->ips_ip_multirt_ttl, 10316 ip6h->ip6_hops)); 10317 pr_addr_dbg("v6dst %s\n", AF_INET6, 10318 &ire->ire_addr_v6); 10319 } 10320 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10321 } 10322 10323 /* 10324 * We look at this point if there are pending 10325 * unresolved routes. ire_multirt_need_resolve_v6() 10326 * checks in O(n) that all IRE_OFFSUBNET ire 10327 * entries for the packet's destination and 10328 * flagged RTF_MULTIRT are currently resolved. 10329 * If some remain unresolved, we make a copy 10330 * of the current message. It will be used 10331 * to initiate additional route resolutions. 10332 */ 10333 multirt_need_resolve = 10334 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10335 MBLK_GETLABEL(first_mp), ipst); 10336 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10337 "multirt_need_resolve %d, first_mp %p\n", 10338 (void *)ire, multirt_need_resolve, 10339 (void *)first_mp)); 10340 if (multirt_need_resolve) { 10341 copy_mp = copymsg(first_mp); 10342 if (copy_mp != NULL) { 10343 MULTIRT_DEBUG_TAG(copy_mp); 10344 } 10345 } 10346 } 10347 10348 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10349 ill->ill_name, (void *)ire, 10350 ill->ill_phyint->phyint_ifindex)); 10351 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10352 connp, caller, 10353 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10354 ip6i_flags, zoneid); 10355 ire_refrele(ire); 10356 if (need_decref) { 10357 CONN_DEC_REF(connp); 10358 connp = NULL; 10359 } 10360 10361 /* 10362 * Try to resolve another multiroute if 10363 * ire_multirt_need_resolve_v6() deemed it necessary. 10364 * copy_mp will be consumed (sent or freed) by 10365 * ip_newroute_[ipif_]v6(). 10366 */ 10367 if (copy_mp != NULL) { 10368 if (mctl_present) { 10369 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10370 } else { 10371 ip6h = (ip6_t *)copy_mp->b_rptr; 10372 } 10373 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10374 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10375 zoneid, ipst); 10376 if (ipif == NULL) { 10377 ip1dbg(("ip_wput_v6: No ipif for " 10378 "multicast\n")); 10379 MULTIRT_DEBUG_UNTAG(copy_mp); 10380 freemsg(copy_mp); 10381 return; 10382 } 10383 ip_newroute_ipif_v6(q, copy_mp, ipif, 10384 ip6h->ip6_dst, unspec_src, zoneid); 10385 ipif_refrele(ipif); 10386 } else { 10387 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10388 &ip6h->ip6_src, ill, zoneid, ipst); 10389 } 10390 } 10391 ill_refrele(ill); 10392 return; 10393 } 10394 if (need_decref) { 10395 CONN_DEC_REF(connp); 10396 connp = NULL; 10397 } 10398 10399 /* Update rptr if there was an ip6i_t header. */ 10400 if (ip6i != NULL) 10401 mp->b_rptr -= sizeof (ip6i_t); 10402 if (unspec_src || attach_if) { 10403 if (ip6i == NULL) { 10404 /* 10405 * Add ip6i_t header to carry unspec_src 10406 * or attach_if until the packet comes back in 10407 * ip_wput_v6. 10408 */ 10409 if (mctl_present) { 10410 first_mp->b_cont = 10411 ip_add_info_v6(mp, NULL, v6dstp); 10412 mp = first_mp->b_cont; 10413 if (mp == NULL) 10414 freeb(first_mp); 10415 } else { 10416 first_mp = mp = ip_add_info_v6(mp, NULL, 10417 v6dstp); 10418 } 10419 if (mp == NULL) { 10420 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10421 ill_refrele(ill); 10422 return; 10423 } 10424 ip6i = (ip6i_t *)mp->b_rptr; 10425 if ((mp->b_wptr - (uchar_t *)ip6i) == 10426 sizeof (ip6i_t)) { 10427 /* 10428 * ndp_resolver called from ip_newroute_v6 10429 * expects a pulled up message. 10430 */ 10431 if (!pullupmsg(mp, -1)) { 10432 ip1dbg(("ip_wput_v6: pullupmsg" 10433 " failed\n")); 10434 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10435 freemsg(first_mp); 10436 return; 10437 } 10438 ip6i = (ip6i_t *)mp->b_rptr; 10439 } 10440 ip6h = (ip6_t *)&ip6i[1]; 10441 v6dstp = &ip6h->ip6_dst; 10442 } 10443 if (unspec_src) 10444 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10445 if (attach_if) { 10446 /* 10447 * Bind to nofailover/BOUND_PIF overrides ifindex. 10448 */ 10449 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10450 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10451 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10452 if (drop_if_delayed) { 10453 /* This is a multipathing probe packet */ 10454 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10455 } 10456 } 10457 if (mctl_present) { 10458 ASSERT(io != NULL); 10459 io->ipsec_out_unspec_src = unspec_src; 10460 } 10461 } 10462 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10463 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10464 unspec_src, zoneid); 10465 } else { 10466 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10467 zoneid, ipst); 10468 } 10469 ill_refrele(ill); 10470 return; 10471 10472 notv6: 10473 /* FIXME?: assume the caller calls the right version of ip_output? */ 10474 if (q->q_next == NULL) { 10475 connp = Q_TO_CONN(q); 10476 10477 /* 10478 * We can change conn_send for all types of conn, even 10479 * though only TCP uses it right now. 10480 * FIXME: sctp could use conn_send but doesn't currently. 10481 */ 10482 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10483 } 10484 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10485 (void) ip_output(arg, first_mp, arg2, caller); 10486 if (ill != NULL) 10487 ill_refrele(ill); 10488 } 10489 10490 /* 10491 * If this is a conn_t queue, then we pass in the conn. This includes the 10492 * zoneid. 10493 * Otherwise, this is a message for an ill_t queue, 10494 * in which case we use the global zoneid since those are all part of 10495 * the global zone. 10496 */ 10497 void 10498 ip_wput_v6(queue_t *q, mblk_t *mp) 10499 { 10500 if (CONN_Q(q)) 10501 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10502 else 10503 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10504 } 10505 10506 static void 10507 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10508 { 10509 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10510 io->ipsec_out_attach_if = B_TRUE; 10511 io->ipsec_out_ill_index = attach_index; 10512 } 10513 10514 /* 10515 * NULL send-to queue - packet is to be delivered locally. 10516 */ 10517 void 10518 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10519 ire_t *ire, int fanout_flags) 10520 { 10521 uint32_t ports; 10522 mblk_t *mp = first_mp, *first_mp1; 10523 boolean_t mctl_present; 10524 uint8_t nexthdr; 10525 uint16_t hdr_length; 10526 ipsec_out_t *io; 10527 mib2_ipIfStatsEntry_t *mibptr; 10528 ilm_t *ilm; 10529 uint_t nexthdr_offset; 10530 ip_stack_t *ipst = ill->ill_ipst; 10531 10532 if (DB_TYPE(mp) == M_CTL) { 10533 io = (ipsec_out_t *)mp->b_rptr; 10534 if (!io->ipsec_out_secure) { 10535 mp = mp->b_cont; 10536 freeb(first_mp); 10537 first_mp = mp; 10538 mctl_present = B_FALSE; 10539 } else { 10540 mctl_present = B_TRUE; 10541 mp = first_mp->b_cont; 10542 ipsec_out_to_in(first_mp); 10543 } 10544 } else { 10545 mctl_present = B_FALSE; 10546 } 10547 10548 /* 10549 * Remove reachability confirmation bit from version field 10550 * before passing the packet on to any firewall hooks or 10551 * looping back the packet. 10552 */ 10553 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10554 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10555 10556 DTRACE_PROBE4(ip6__loopback__in__start, 10557 ill_t *, ill, ill_t *, NULL, 10558 ip6_t *, ip6h, mblk_t *, first_mp); 10559 10560 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10561 ipst->ips_ipv6firewall_loopback_in, 10562 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10563 10564 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10565 10566 if (first_mp == NULL) 10567 return; 10568 10569 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10570 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10571 int, 1); 10572 10573 nexthdr = ip6h->ip6_nxt; 10574 mibptr = ill->ill_ip_mib; 10575 10576 /* Fastpath */ 10577 switch (nexthdr) { 10578 case IPPROTO_TCP: 10579 case IPPROTO_UDP: 10580 case IPPROTO_ICMPV6: 10581 case IPPROTO_SCTP: 10582 hdr_length = IPV6_HDR_LEN; 10583 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10584 (uchar_t *)ip6h); 10585 break; 10586 default: { 10587 uint8_t *nexthdrp; 10588 10589 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10590 &hdr_length, &nexthdrp)) { 10591 /* Malformed packet */ 10592 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10593 freemsg(first_mp); 10594 return; 10595 } 10596 nexthdr = *nexthdrp; 10597 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10598 break; 10599 } 10600 } 10601 10602 UPDATE_OB_PKT_COUNT(ire); 10603 ire->ire_last_used_time = lbolt; 10604 10605 switch (nexthdr) { 10606 case IPPROTO_TCP: 10607 if (DB_TYPE(mp) == M_DATA) { 10608 /* 10609 * M_DATA mblk, so init mblk (chain) for 10610 * no struio(). 10611 */ 10612 mblk_t *mp1 = mp; 10613 10614 do { 10615 mp1->b_datap->db_struioflag = 0; 10616 } while ((mp1 = mp1->b_cont) != NULL); 10617 } 10618 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10619 TCP_PORTS_OFFSET); 10620 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10621 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10622 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10623 hdr_length, mctl_present, ire->ire_zoneid); 10624 return; 10625 10626 case IPPROTO_UDP: 10627 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10628 UDP_PORTS_OFFSET); 10629 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10630 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10631 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10632 return; 10633 10634 case IPPROTO_SCTP: 10635 { 10636 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10637 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10638 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10639 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10640 return; 10641 } 10642 case IPPROTO_ICMPV6: { 10643 icmp6_t *icmp6; 10644 10645 /* check for full IPv6+ICMPv6 header */ 10646 if ((mp->b_wptr - mp->b_rptr) < 10647 (hdr_length + ICMP6_MINLEN)) { 10648 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10649 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10650 " failed\n")); 10651 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10652 freemsg(first_mp); 10653 return; 10654 } 10655 ip6h = (ip6_t *)mp->b_rptr; 10656 } 10657 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10658 10659 /* Update output mib stats */ 10660 icmp_update_out_mib_v6(ill, icmp6); 10661 10662 /* Check variable for testing applications */ 10663 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10664 freemsg(first_mp); 10665 return; 10666 } 10667 /* 10668 * Assume that there is always at least one conn for 10669 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10670 * where there is no conn. 10671 */ 10672 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10673 !IS_LOOPBACK(ill)) { 10674 /* 10675 * In the multicast case, applications may have 10676 * joined the group from different zones, so we 10677 * need to deliver the packet to each of them. 10678 * Loop through the multicast memberships 10679 * structures (ilm) on the receive ill and send 10680 * a copy of the packet up each matching one. 10681 * However, we don't do this for multicasts sent 10682 * on the loopback interface (PHYI_LOOPBACK flag 10683 * set) as they must stay in the sender's zone. 10684 */ 10685 ILM_WALKER_HOLD(ill); 10686 for (ilm = ill->ill_ilm; ilm != NULL; 10687 ilm = ilm->ilm_next) { 10688 if (ilm->ilm_flags & ILM_DELETED) 10689 continue; 10690 if (!IN6_ARE_ADDR_EQUAL( 10691 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10692 continue; 10693 if ((fanout_flags & 10694 IP_FF_NO_MCAST_LOOP) && 10695 ilm->ilm_zoneid == ire->ire_zoneid) 10696 continue; 10697 if (!ipif_lookup_zoneid(ill, 10698 ilm->ilm_zoneid, IPIF_UP, NULL)) 10699 continue; 10700 10701 first_mp1 = ip_copymsg(first_mp); 10702 if (first_mp1 == NULL) 10703 continue; 10704 icmp_inbound_v6(q, first_mp1, ill, 10705 hdr_length, mctl_present, 10706 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10707 NULL); 10708 } 10709 ILM_WALKER_RELE(ill); 10710 } else { 10711 first_mp1 = ip_copymsg(first_mp); 10712 if (first_mp1 != NULL) 10713 icmp_inbound_v6(q, first_mp1, ill, 10714 hdr_length, mctl_present, 10715 IP6_NO_IPPOLICY, ire->ire_zoneid, 10716 NULL); 10717 } 10718 } 10719 /* FALLTHRU */ 10720 default: { 10721 /* 10722 * Handle protocols with which IPv6 is less intimate. 10723 */ 10724 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10725 10726 /* 10727 * Enable sending ICMP for "Unknown" nexthdr 10728 * case. i.e. where we did not FALLTHRU from 10729 * IPPROTO_ICMPV6 processing case above. 10730 */ 10731 if (nexthdr != IPPROTO_ICMPV6) 10732 fanout_flags |= IP_FF_SEND_ICMP; 10733 /* 10734 * Note: There can be more than one stream bound 10735 * to a particular protocol. When this is the case, 10736 * each one gets a copy of any incoming packets. 10737 */ 10738 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10739 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10740 mctl_present, ire->ire_zoneid); 10741 return; 10742 } 10743 } 10744 } 10745 10746 /* 10747 * Send packet using IRE. 10748 * Checksumming is controlled by cksum_request: 10749 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10750 * 1 => Skip TCP/UDP/SCTP checksum 10751 * Otherwise => checksum_request contains insert offset for checksum 10752 * 10753 * Assumes that the following set of headers appear in the first 10754 * mblk: 10755 * ip6_t 10756 * Any extension headers 10757 * TCP/UDP/SCTP header (if present) 10758 * The routine can handle an ICMPv6 header that is not in the first mblk. 10759 * 10760 * NOTE : This function does not ire_refrele the ire passed in as the 10761 * argument unlike ip_wput_ire where the REFRELE is done. 10762 * Refer to ip_wput_ire for more on this. 10763 */ 10764 static void 10765 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10766 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10767 zoneid_t zoneid) 10768 { 10769 ip6_t *ip6h; 10770 uint8_t nexthdr; 10771 uint16_t hdr_length; 10772 uint_t reachable = 0x0; 10773 ill_t *ill; 10774 mib2_ipIfStatsEntry_t *mibptr; 10775 mblk_t *first_mp; 10776 boolean_t mctl_present; 10777 ipsec_out_t *io; 10778 boolean_t conn_dontroute; /* conn value for multicast */ 10779 boolean_t conn_multicast_loop; /* conn value for multicast */ 10780 boolean_t multicast_forward; /* Should we forward ? */ 10781 int max_frag; 10782 ip_stack_t *ipst = ire->ire_ipst; 10783 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10784 10785 ill = ire_to_ill(ire); 10786 first_mp = mp; 10787 multicast_forward = B_FALSE; 10788 10789 if (mp->b_datap->db_type != M_CTL) { 10790 ip6h = (ip6_t *)first_mp->b_rptr; 10791 } else { 10792 io = (ipsec_out_t *)first_mp->b_rptr; 10793 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10794 /* 10795 * Grab the zone id now because the M_CTL can be discarded by 10796 * ip_wput_ire_parse_ipsec_out() below. 10797 */ 10798 ASSERT(zoneid == io->ipsec_out_zoneid); 10799 ASSERT(zoneid != ALL_ZONES); 10800 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10801 /* 10802 * For the multicast case, ipsec_out carries conn_dontroute and 10803 * conn_multicast_loop as conn may not be available here. We 10804 * need this for multicast loopback and forwarding which is done 10805 * later in the code. 10806 */ 10807 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10808 conn_dontroute = io->ipsec_out_dontroute; 10809 conn_multicast_loop = io->ipsec_out_multicast_loop; 10810 /* 10811 * If conn_dontroute is not set or conn_multicast_loop 10812 * is set, we need to do forwarding/loopback. For 10813 * datagrams from ip_wput_multicast, conn_dontroute is 10814 * set to B_TRUE and conn_multicast_loop is set to 10815 * B_FALSE so that we neither do forwarding nor 10816 * loopback. 10817 */ 10818 if (!conn_dontroute || conn_multicast_loop) 10819 multicast_forward = B_TRUE; 10820 } 10821 } 10822 10823 /* 10824 * If the sender didn't supply the hop limit and there is a default 10825 * unicast hop limit associated with the output interface, we use 10826 * that if the packet is unicast. Interface specific unicast hop 10827 * limits as set via the SIOCSLIFLNKINFO ioctl. 10828 */ 10829 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10830 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10831 ip6h->ip6_hops = ill->ill_max_hops; 10832 } 10833 10834 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10835 ire->ire_zoneid != ALL_ZONES) { 10836 /* 10837 * When a zone sends a packet to another zone, we try to deliver 10838 * the packet under the same conditions as if the destination 10839 * was a real node on the network. To do so, we look for a 10840 * matching route in the forwarding table. 10841 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10842 * ip_newroute_v6() does. 10843 * Note that IRE_LOCAL are special, since they are used 10844 * when the zoneid doesn't match in some cases. This means that 10845 * we need to handle ipha_src differently since ire_src_addr 10846 * belongs to the receiving zone instead of the sending zone. 10847 * When ip_restrict_interzone_loopback is set, then 10848 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10849 * for loopback between zones when the logical "Ethernet" would 10850 * have looped them back. 10851 */ 10852 ire_t *src_ire; 10853 10854 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10855 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10856 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10857 if (src_ire != NULL && 10858 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10859 (!ipst->ips_ip_restrict_interzone_loopback || 10860 ire_local_same_ill_group(ire, src_ire))) { 10861 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10862 !unspec_src) { 10863 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10864 } 10865 ire_refrele(src_ire); 10866 } else { 10867 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10868 if (src_ire != NULL) { 10869 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10870 ire_refrele(src_ire); 10871 freemsg(first_mp); 10872 return; 10873 } 10874 ire_refrele(src_ire); 10875 } 10876 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10877 /* Failed */ 10878 freemsg(first_mp); 10879 return; 10880 } 10881 icmp_unreachable_v6(q, first_mp, 10882 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10883 zoneid, ipst); 10884 return; 10885 } 10886 } 10887 10888 if (mp->b_datap->db_type == M_CTL || 10889 ipss->ipsec_outbound_v6_policy_present) { 10890 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10891 connp, unspec_src, zoneid); 10892 if (mp == NULL) { 10893 return; 10894 } 10895 } 10896 10897 first_mp = mp; 10898 if (mp->b_datap->db_type == M_CTL) { 10899 io = (ipsec_out_t *)mp->b_rptr; 10900 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10901 mp = mp->b_cont; 10902 mctl_present = B_TRUE; 10903 } else { 10904 mctl_present = B_FALSE; 10905 } 10906 10907 ip6h = (ip6_t *)mp->b_rptr; 10908 nexthdr = ip6h->ip6_nxt; 10909 mibptr = ill->ill_ip_mib; 10910 10911 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10912 ipif_t *ipif; 10913 10914 /* 10915 * Select the source address using ipif_select_source_v6. 10916 */ 10917 if (attach_index != 0) { 10918 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10919 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10920 } else { 10921 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10922 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10923 } 10924 if (ipif == NULL) { 10925 if (ip_debug > 2) { 10926 /* ip1dbg */ 10927 pr_addr_dbg("ip_wput_ire_v6: no src for " 10928 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10929 printf("ip_wput_ire_v6: interface name %s\n", 10930 ill->ill_name); 10931 } 10932 freemsg(first_mp); 10933 return; 10934 } 10935 ip6h->ip6_src = ipif->ipif_v6src_addr; 10936 ipif_refrele(ipif); 10937 } 10938 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10939 if ((connp != NULL && connp->conn_multicast_loop) || 10940 !IS_LOOPBACK(ill)) { 10941 ilm_t *ilm; 10942 10943 ILM_WALKER_HOLD(ill); 10944 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10945 ILM_WALKER_RELE(ill); 10946 if (ilm != NULL) { 10947 mblk_t *nmp; 10948 int fanout_flags = 0; 10949 10950 if (connp != NULL && 10951 !connp->conn_multicast_loop) { 10952 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10953 } 10954 ip1dbg(("ip_wput_ire_v6: " 10955 "Loopback multicast\n")); 10956 nmp = ip_copymsg(first_mp); 10957 if (nmp != NULL) { 10958 ip6_t *nip6h; 10959 mblk_t *mp_ip6h; 10960 10961 if (mctl_present) { 10962 nip6h = (ip6_t *) 10963 nmp->b_cont->b_rptr; 10964 mp_ip6h = nmp->b_cont; 10965 } else { 10966 nip6h = (ip6_t *)nmp->b_rptr; 10967 mp_ip6h = nmp; 10968 } 10969 10970 DTRACE_PROBE4( 10971 ip6__loopback__out__start, 10972 ill_t *, NULL, 10973 ill_t *, ill, 10974 ip6_t *, nip6h, 10975 mblk_t *, nmp); 10976 10977 FW_HOOKS6( 10978 ipst->ips_ip6_loopback_out_event, 10979 ipst->ips_ipv6firewall_loopback_out, 10980 NULL, ill, nip6h, nmp, mp_ip6h, 10981 0, ipst); 10982 10983 DTRACE_PROBE1( 10984 ip6__loopback__out__end, 10985 mblk_t *, nmp); 10986 10987 /* 10988 * DTrace this as ip:::send. A blocked 10989 * packet will fire the send probe, but 10990 * not the receive probe. 10991 */ 10992 DTRACE_IP7(send, mblk_t *, nmp, 10993 conn_t *, NULL, void_ip_t *, nip6h, 10994 __dtrace_ipsr_ill_t *, ill, 10995 ipha_t *, NULL, ip6_t *, nip6h, 10996 int, 1); 10997 10998 if (nmp != NULL) { 10999 /* 11000 * Deliver locally and to 11001 * every local zone, except 11002 * the sending zone when 11003 * IPV6_MULTICAST_LOOP is 11004 * disabled. 11005 */ 11006 ip_wput_local_v6(RD(q), ill, 11007 nip6h, nmp, 11008 ire, fanout_flags); 11009 } 11010 } else { 11011 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11012 ip1dbg(("ip_wput_ire_v6: " 11013 "copymsg failed\n")); 11014 } 11015 } 11016 } 11017 if (ip6h->ip6_hops == 0 || 11018 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 11019 IS_LOOPBACK(ill)) { 11020 /* 11021 * Local multicast or just loopback on loopback 11022 * interface. 11023 */ 11024 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 11025 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 11026 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11027 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 11028 freemsg(first_mp); 11029 return; 11030 } 11031 } 11032 11033 if (ire->ire_stq != NULL) { 11034 uint32_t sum; 11035 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 11036 ill_phyint->phyint_ifindex; 11037 queue_t *dev_q = ire->ire_stq->q_next; 11038 11039 /* 11040 * non-NULL send-to queue - packet is to be sent 11041 * out an interface. 11042 */ 11043 11044 /* Driver is flow-controlling? */ 11045 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 11046 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 11047 /* 11048 * Queue packet if we have an conn to give back 11049 * pressure. We can't queue packets intended for 11050 * hardware acceleration since we've tossed that 11051 * state already. If the packet is being fed back 11052 * from ire_send_v6, we don't know the position in 11053 * the queue to enqueue the packet and we discard 11054 * the packet. 11055 */ 11056 if (ipst->ips_ip_output_queue && connp != NULL && 11057 !mctl_present && caller != IRE_SEND) { 11058 if (caller == IP_WSRV) { 11059 connp->conn_did_putbq = 1; 11060 (void) putbq(connp->conn_wq, mp); 11061 conn_drain_insert(connp); 11062 /* 11063 * caller == IP_WSRV implies we are 11064 * the service thread, and the 11065 * queue is already noenabled. 11066 * The check for canput and 11067 * the putbq is not atomic. 11068 * So we need to check again. 11069 */ 11070 if (canput(dev_q)) 11071 connp->conn_did_putbq = 0; 11072 } else { 11073 (void) putq(connp->conn_wq, mp); 11074 } 11075 return; 11076 } 11077 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11078 freemsg(first_mp); 11079 return; 11080 } 11081 11082 /* 11083 * Look for reachability confirmations from the transport. 11084 */ 11085 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11086 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11087 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11088 if (mctl_present) 11089 io->ipsec_out_reachable = B_TRUE; 11090 } 11091 /* Fastpath */ 11092 switch (nexthdr) { 11093 case IPPROTO_TCP: 11094 case IPPROTO_UDP: 11095 case IPPROTO_ICMPV6: 11096 case IPPROTO_SCTP: 11097 hdr_length = IPV6_HDR_LEN; 11098 break; 11099 default: { 11100 uint8_t *nexthdrp; 11101 11102 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11103 &hdr_length, &nexthdrp)) { 11104 /* Malformed packet */ 11105 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11106 freemsg(first_mp); 11107 return; 11108 } 11109 nexthdr = *nexthdrp; 11110 break; 11111 } 11112 } 11113 11114 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11115 uint16_t *up; 11116 uint16_t *insp; 11117 11118 /* 11119 * The packet header is processed once for all, even 11120 * in the multirouting case. We disable hardware 11121 * checksum if the packet is multirouted, as it will be 11122 * replicated via several interfaces, and not all of 11123 * them may have this capability. 11124 */ 11125 if (cksum_request == 1 && 11126 !(ire->ire_flags & RTF_MULTIRT)) { 11127 /* Skip the transport checksum */ 11128 goto cksum_done; 11129 } 11130 /* 11131 * Do user-configured raw checksum. 11132 * Compute checksum and insert at offset "cksum_request" 11133 */ 11134 11135 /* check for enough headers for checksum */ 11136 cksum_request += hdr_length; /* offset from rptr */ 11137 if ((mp->b_wptr - mp->b_rptr) < 11138 (cksum_request + sizeof (int16_t))) { 11139 if (!pullupmsg(mp, 11140 cksum_request + sizeof (int16_t))) { 11141 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11142 " failed\n")); 11143 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11144 freemsg(first_mp); 11145 return; 11146 } 11147 ip6h = (ip6_t *)mp->b_rptr; 11148 } 11149 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11150 ASSERT(((uintptr_t)insp & 0x1) == 0); 11151 up = (uint16_t *)&ip6h->ip6_src; 11152 /* 11153 * icmp has placed length and routing 11154 * header adjustment in *insp. 11155 */ 11156 sum = htons(nexthdr) + 11157 up[0] + up[1] + up[2] + up[3] + 11158 up[4] + up[5] + up[6] + up[7] + 11159 up[8] + up[9] + up[10] + up[11] + 11160 up[12] + up[13] + up[14] + up[15]; 11161 sum = (sum & 0xffff) + (sum >> 16); 11162 *insp = IP_CSUM(mp, hdr_length, sum); 11163 } else if (nexthdr == IPPROTO_TCP) { 11164 uint16_t *up; 11165 11166 /* 11167 * Check for full IPv6 header + enough TCP header 11168 * to get at the checksum field. 11169 */ 11170 if ((mp->b_wptr - mp->b_rptr) < 11171 (hdr_length + TCP_CHECKSUM_OFFSET + 11172 TCP_CHECKSUM_SIZE)) { 11173 if (!pullupmsg(mp, hdr_length + 11174 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11175 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11176 " failed\n")); 11177 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11178 freemsg(first_mp); 11179 return; 11180 } 11181 ip6h = (ip6_t *)mp->b_rptr; 11182 } 11183 11184 up = (uint16_t *)&ip6h->ip6_src; 11185 /* 11186 * Note: The TCP module has stored the length value 11187 * into the tcp checksum field, so we don't 11188 * need to explicitly sum it in here. 11189 */ 11190 sum = up[0] + up[1] + up[2] + up[3] + 11191 up[4] + up[5] + up[6] + up[7] + 11192 up[8] + up[9] + up[10] + up[11] + 11193 up[12] + up[13] + up[14] + up[15]; 11194 11195 /* Fold the initial sum */ 11196 sum = (sum & 0xffff) + (sum >> 16); 11197 11198 up = (uint16_t *)(((uchar_t *)ip6h) + 11199 hdr_length + TCP_CHECKSUM_OFFSET); 11200 11201 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11202 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11203 ire->ire_max_frag, mctl_present, sum); 11204 11205 /* Software checksum? */ 11206 if (DB_CKSUMFLAGS(mp) == 0) { 11207 IP6_STAT(ipst, ip6_out_sw_cksum); 11208 IP6_STAT_UPDATE(ipst, 11209 ip6_tcp_out_sw_cksum_bytes, 11210 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11211 hdr_length); 11212 } 11213 } else if (nexthdr == IPPROTO_UDP) { 11214 uint16_t *up; 11215 11216 /* 11217 * check for full IPv6 header + enough UDP header 11218 * to get at the UDP checksum field 11219 */ 11220 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11221 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11222 if (!pullupmsg(mp, hdr_length + 11223 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11224 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11225 " failed\n")); 11226 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11227 freemsg(first_mp); 11228 return; 11229 } 11230 ip6h = (ip6_t *)mp->b_rptr; 11231 } 11232 up = (uint16_t *)&ip6h->ip6_src; 11233 /* 11234 * Note: The UDP module has stored the length value 11235 * into the udp checksum field, so we don't 11236 * need to explicitly sum it in here. 11237 */ 11238 sum = up[0] + up[1] + up[2] + up[3] + 11239 up[4] + up[5] + up[6] + up[7] + 11240 up[8] + up[9] + up[10] + up[11] + 11241 up[12] + up[13] + up[14] + up[15]; 11242 11243 /* Fold the initial sum */ 11244 sum = (sum & 0xffff) + (sum >> 16); 11245 11246 up = (uint16_t *)(((uchar_t *)ip6h) + 11247 hdr_length + UDP_CHECKSUM_OFFSET); 11248 11249 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11250 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11251 ire->ire_max_frag, mctl_present, sum); 11252 11253 /* Software checksum? */ 11254 if (DB_CKSUMFLAGS(mp) == 0) { 11255 IP6_STAT(ipst, ip6_out_sw_cksum); 11256 IP6_STAT_UPDATE(ipst, 11257 ip6_udp_out_sw_cksum_bytes, 11258 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11259 hdr_length); 11260 } 11261 } else if (nexthdr == IPPROTO_ICMPV6) { 11262 uint16_t *up; 11263 icmp6_t *icmp6; 11264 11265 /* check for full IPv6+ICMPv6 header */ 11266 if ((mp->b_wptr - mp->b_rptr) < 11267 (hdr_length + ICMP6_MINLEN)) { 11268 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11269 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11270 " failed\n")); 11271 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11272 freemsg(first_mp); 11273 return; 11274 } 11275 ip6h = (ip6_t *)mp->b_rptr; 11276 } 11277 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11278 up = (uint16_t *)&ip6h->ip6_src; 11279 /* 11280 * icmp has placed length and routing 11281 * header adjustment in icmp6_cksum. 11282 */ 11283 sum = htons(IPPROTO_ICMPV6) + 11284 up[0] + up[1] + up[2] + up[3] + 11285 up[4] + up[5] + up[6] + up[7] + 11286 up[8] + up[9] + up[10] + up[11] + 11287 up[12] + up[13] + up[14] + up[15]; 11288 sum = (sum & 0xffff) + (sum >> 16); 11289 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11290 11291 /* Update output mib stats */ 11292 icmp_update_out_mib_v6(ill, icmp6); 11293 } else if (nexthdr == IPPROTO_SCTP) { 11294 sctp_hdr_t *sctph; 11295 11296 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11297 if (!pullupmsg(mp, hdr_length + 11298 sizeof (*sctph))) { 11299 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11300 " failed\n")); 11301 BUMP_MIB(ill->ill_ip_mib, 11302 ipIfStatsOutDiscards); 11303 freemsg(mp); 11304 return; 11305 } 11306 ip6h = (ip6_t *)mp->b_rptr; 11307 } 11308 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11309 sctph->sh_chksum = 0; 11310 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11311 } 11312 11313 cksum_done: 11314 /* 11315 * We force the insertion of a fragment header using the 11316 * IPH_FRAG_HDR flag in two cases: 11317 * - after reception of an ICMPv6 "packet too big" message 11318 * with a MTU < 1280 (cf. RFC 2460 section 5) 11319 * - for multirouted IPv6 packets, so that the receiver can 11320 * discard duplicates according to their fragment identifier 11321 * 11322 * Two flags modifed from the API can modify this behavior. 11323 * The first is IPV6_USE_MIN_MTU. With this API the user 11324 * can specify how to manage PMTUD for unicast and multicast. 11325 * 11326 * IPV6_DONTFRAG disallows fragmentation. 11327 */ 11328 max_frag = ire->ire_max_frag; 11329 switch (IP6I_USE_MIN_MTU_API(flags)) { 11330 case IPV6_USE_MIN_MTU_DEFAULT: 11331 case IPV6_USE_MIN_MTU_UNICAST: 11332 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11333 max_frag = IPV6_MIN_MTU; 11334 } 11335 break; 11336 11337 case IPV6_USE_MIN_MTU_NEVER: 11338 max_frag = IPV6_MIN_MTU; 11339 break; 11340 } 11341 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11342 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11343 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11344 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11345 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11346 return; 11347 } 11348 11349 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11350 (mp->b_cont ? msgdsize(mp) : 11351 mp->b_wptr - (uchar_t *)ip6h)) { 11352 ip0dbg(("Packet length mismatch: %d, %ld\n", 11353 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11354 msgdsize(mp))); 11355 freemsg(first_mp); 11356 return; 11357 } 11358 /* Do IPSEC processing first */ 11359 if (mctl_present) { 11360 if (attach_index != 0) 11361 ipsec_out_attach_if(io, attach_index); 11362 ipsec_out_process(q, first_mp, ire, ill_index); 11363 return; 11364 } 11365 ASSERT(mp->b_prev == NULL); 11366 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11367 ntohs(ip6h->ip6_plen) + 11368 IPV6_HDR_LEN, max_frag)); 11369 ASSERT(mp == first_mp); 11370 /* Initiate IPPF processing */ 11371 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11372 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11373 if (mp == NULL) { 11374 return; 11375 } 11376 } 11377 ip_wput_frag_v6(mp, ire, reachable, connp, 11378 caller, max_frag); 11379 return; 11380 } 11381 /* Do IPSEC processing first */ 11382 if (mctl_present) { 11383 int extra_len = ipsec_out_extra_length(first_mp); 11384 11385 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11386 max_frag) { 11387 /* 11388 * IPsec headers will push the packet over the 11389 * MTU limit. Issue an ICMPv6 Packet Too Big 11390 * message for this packet if the upper-layer 11391 * that issued this packet will be able to 11392 * react to the icmp_pkt2big_v6() that we'll 11393 * generate. 11394 */ 11395 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11396 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11397 return; 11398 } 11399 if (attach_index != 0) 11400 ipsec_out_attach_if(io, attach_index); 11401 ipsec_out_process(q, first_mp, ire, ill_index); 11402 return; 11403 } 11404 /* 11405 * XXX multicast: add ip_mforward_v6() here. 11406 * Check conn_dontroute 11407 */ 11408 #ifdef lint 11409 /* 11410 * XXX The only purpose of this statement is to avoid lint 11411 * errors. See the above "XXX multicast". When that gets 11412 * fixed, remove this whole #ifdef lint section. 11413 */ 11414 ip3dbg(("multicast forward is %s.\n", 11415 (multicast_forward ? "TRUE" : "FALSE"))); 11416 #endif 11417 11418 UPDATE_OB_PKT_COUNT(ire); 11419 ire->ire_last_used_time = lbolt; 11420 ASSERT(mp == first_mp); 11421 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11422 } else { 11423 /* 11424 * DTrace this as ip:::send. A blocked packet will fire the 11425 * send probe, but not the receive probe. 11426 */ 11427 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11428 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11429 NULL, ip6_t *, ip6h, int, 1); 11430 DTRACE_PROBE4(ip6__loopback__out__start, 11431 ill_t *, NULL, ill_t *, ill, 11432 ip6_t *, ip6h, mblk_t *, first_mp); 11433 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11434 ipst->ips_ipv6firewall_loopback_out, 11435 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11436 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11437 if (first_mp != NULL) 11438 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11439 } 11440 } 11441 11442 /* 11443 * Outbound IPv6 fragmentation routine using MDT. 11444 */ 11445 static void 11446 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11447 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11448 { 11449 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11450 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11451 mblk_t *hdr_mp, *md_mp = NULL; 11452 int i1; 11453 multidata_t *mmd; 11454 unsigned char *hdr_ptr, *pld_ptr; 11455 ip_pdescinfo_t pdi; 11456 uint32_t ident; 11457 size_t len; 11458 uint16_t offset; 11459 queue_t *stq = ire->ire_stq; 11460 ill_t *ill = (ill_t *)stq->q_ptr; 11461 ip_stack_t *ipst = ill->ill_ipst; 11462 11463 ASSERT(DB_TYPE(mp) == M_DATA); 11464 ASSERT(MBLKL(mp) > unfragmentable_len); 11465 11466 /* 11467 * Move read ptr past unfragmentable portion, we don't want this part 11468 * of the data in our fragments. 11469 */ 11470 mp->b_rptr += unfragmentable_len; 11471 11472 /* Calculate how many packets we will send out */ 11473 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11474 pkts = (i1 + max_chunk - 1) / max_chunk; 11475 ASSERT(pkts > 1); 11476 11477 /* Allocate a message block which will hold all the IP Headers. */ 11478 wroff = ipst->ips_ip_wroff_extra; 11479 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11480 11481 i1 = pkts * hdr_chunk_len; 11482 /* 11483 * Create the header buffer, Multidata and destination address 11484 * and SAP attribute that should be associated with it. 11485 */ 11486 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11487 ((hdr_mp->b_wptr += i1), 11488 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11489 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11490 freemsg(mp); 11491 if (md_mp == NULL) { 11492 freemsg(hdr_mp); 11493 } else { 11494 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11495 freemsg(md_mp); 11496 } 11497 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11498 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11499 return; 11500 } 11501 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11502 11503 /* 11504 * Add a payload buffer to the Multidata; this operation must not 11505 * fail, or otherwise our logic in this routine is broken. There 11506 * is no memory allocation done by the routine, so any returned 11507 * failure simply tells us that we've done something wrong. 11508 * 11509 * A failure tells us that either we're adding the same payload 11510 * buffer more than once, or we're trying to add more buffers than 11511 * allowed. None of the above cases should happen, and we panic 11512 * because either there's horrible heap corruption, and/or 11513 * programming mistake. 11514 */ 11515 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11516 goto pbuf_panic; 11517 } 11518 11519 hdr_ptr = hdr_mp->b_rptr; 11520 pld_ptr = mp->b_rptr; 11521 11522 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11523 11524 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11525 11526 /* 11527 * len is the total length of the fragmentable data in this 11528 * datagram. For each fragment sent, we will decrement len 11529 * by the amount of fragmentable data sent in that fragment 11530 * until len reaches zero. 11531 */ 11532 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11533 11534 offset = 0; 11535 prev_nexthdr_offset += wroff; 11536 11537 while (len != 0) { 11538 size_t mlen; 11539 ip6_t *fip6h; 11540 ip6_frag_t *fraghdr; 11541 int error; 11542 11543 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11544 mlen = MIN(len, max_chunk); 11545 len -= mlen; 11546 11547 fip6h = (ip6_t *)(hdr_ptr + wroff); 11548 ASSERT(OK_32PTR(fip6h)); 11549 bcopy(ip6h, fip6h, unfragmentable_len); 11550 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11551 11552 fip6h->ip6_plen = htons((uint16_t)(mlen + 11553 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11554 11555 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11556 unfragmentable_len); 11557 fraghdr->ip6f_nxt = nexthdr; 11558 fraghdr->ip6f_reserved = 0; 11559 fraghdr->ip6f_offlg = htons(offset) | 11560 ((len != 0) ? IP6F_MORE_FRAG : 0); 11561 fraghdr->ip6f_ident = ident; 11562 11563 /* 11564 * Record offset and size of header and data of the next packet 11565 * in the multidata message. 11566 */ 11567 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11568 unfragmentable_len + sizeof (ip6_frag_t), 0); 11569 PDESC_PLD_INIT(&pdi); 11570 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11571 ASSERT(i1 > 0); 11572 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11573 if (i1 == mlen) { 11574 pld_ptr += mlen; 11575 } else { 11576 i1 = mlen - i1; 11577 mp = mp->b_cont; 11578 ASSERT(mp != NULL); 11579 ASSERT(MBLKL(mp) >= i1); 11580 /* 11581 * Attach the next payload message block to the 11582 * multidata message. 11583 */ 11584 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11585 goto pbuf_panic; 11586 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11587 pld_ptr = mp->b_rptr + i1; 11588 } 11589 11590 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11591 KM_NOSLEEP)) == NULL) { 11592 /* 11593 * Any failure other than ENOMEM indicates that we 11594 * have passed in invalid pdesc info or parameters 11595 * to mmd_addpdesc, which must not happen. 11596 * 11597 * EINVAL is a result of failure on boundary checks 11598 * against the pdesc info contents. It should not 11599 * happen, and we panic because either there's 11600 * horrible heap corruption, and/or programming 11601 * mistake. 11602 */ 11603 if (error != ENOMEM) { 11604 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11605 "pdesc logic error detected for " 11606 "mmd %p pinfo %p (%d)\n", 11607 (void *)mmd, (void *)&pdi, error); 11608 /* NOTREACHED */ 11609 } 11610 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11611 /* Free unattached payload message blocks as well */ 11612 md_mp->b_cont = mp->b_cont; 11613 goto free_mmd; 11614 } 11615 11616 /* Advance fragment offset. */ 11617 offset += mlen; 11618 11619 /* Advance to location for next header in the buffer. */ 11620 hdr_ptr += hdr_chunk_len; 11621 11622 /* Did we reach the next payload message block? */ 11623 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11624 mp = mp->b_cont; 11625 /* 11626 * Attach the next message block with payload 11627 * data to the multidata message. 11628 */ 11629 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11630 goto pbuf_panic; 11631 pld_ptr = mp->b_rptr; 11632 } 11633 } 11634 11635 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11636 ASSERT(mp->b_wptr == pld_ptr); 11637 11638 /* Update IP statistics */ 11639 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11640 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11641 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11642 /* 11643 * The ipv6 header len is accounted for in unfragmentable_len so 11644 * when calculating the fragmentation overhead just add the frag 11645 * header len. 11646 */ 11647 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11648 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11649 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11650 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11651 11652 ire->ire_ob_pkt_count += pkts; 11653 if (ire->ire_ipif != NULL) 11654 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11655 11656 ire->ire_last_used_time = lbolt; 11657 /* Send it down */ 11658 putnext(stq, md_mp); 11659 return; 11660 11661 pbuf_panic: 11662 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11663 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11664 pbuf_idx); 11665 /* NOTREACHED */ 11666 } 11667 11668 /* 11669 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11670 * We have not optimized this in terms of number of mblks 11671 * allocated. For instance, for each fragment sent we always allocate a 11672 * mblk to hold the IPv6 header and fragment header. 11673 * 11674 * Assumes that all the extension headers are contained in the first mblk. 11675 * 11676 * The fragment header is inserted after an hop-by-hop options header 11677 * and after [an optional destinations header followed by] a routing header. 11678 * 11679 * NOTE : This function does not ire_refrele the ire passed in as 11680 * the argument. 11681 */ 11682 void 11683 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11684 int caller, int max_frag) 11685 { 11686 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11687 ip6_t *fip6h; 11688 mblk_t *hmp; 11689 mblk_t *hmp0; 11690 mblk_t *dmp; 11691 ip6_frag_t *fraghdr; 11692 size_t unfragmentable_len; 11693 size_t len; 11694 size_t mlen; 11695 size_t max_chunk; 11696 uint32_t ident; 11697 uint16_t off_flags; 11698 uint16_t offset = 0; 11699 ill_t *ill; 11700 uint8_t nexthdr; 11701 uint_t prev_nexthdr_offset; 11702 uint8_t *ptr; 11703 ip_stack_t *ipst = ire->ire_ipst; 11704 11705 ASSERT(ire->ire_type == IRE_CACHE); 11706 ill = (ill_t *)ire->ire_stq->q_ptr; 11707 11708 if (max_frag <= 0) { 11709 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11710 freemsg(mp); 11711 return; 11712 } 11713 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11714 11715 /* 11716 * Determine the length of the unfragmentable portion of this 11717 * datagram. This consists of the IPv6 header, a potential 11718 * hop-by-hop options header, a potential pre-routing-header 11719 * destination options header, and a potential routing header. 11720 */ 11721 nexthdr = ip6h->ip6_nxt; 11722 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11723 ptr = (uint8_t *)&ip6h[1]; 11724 11725 if (nexthdr == IPPROTO_HOPOPTS) { 11726 ip6_hbh_t *hbh_hdr; 11727 uint_t hdr_len; 11728 11729 hbh_hdr = (ip6_hbh_t *)ptr; 11730 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11731 nexthdr = hbh_hdr->ip6h_nxt; 11732 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11733 - (uint8_t *)ip6h; 11734 ptr += hdr_len; 11735 } 11736 if (nexthdr == IPPROTO_DSTOPTS) { 11737 ip6_dest_t *dest_hdr; 11738 uint_t hdr_len; 11739 11740 dest_hdr = (ip6_dest_t *)ptr; 11741 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11742 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11743 nexthdr = dest_hdr->ip6d_nxt; 11744 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11745 - (uint8_t *)ip6h; 11746 ptr += hdr_len; 11747 } 11748 } 11749 if (nexthdr == IPPROTO_ROUTING) { 11750 ip6_rthdr_t *rthdr; 11751 uint_t hdr_len; 11752 11753 rthdr = (ip6_rthdr_t *)ptr; 11754 nexthdr = rthdr->ip6r_nxt; 11755 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11756 - (uint8_t *)ip6h; 11757 hdr_len = 8 * (rthdr->ip6r_len + 1); 11758 ptr += hdr_len; 11759 } 11760 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11761 11762 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11763 sizeof (ip6_frag_t)) & ~7; 11764 11765 /* Check if we can use MDT to send out the frags. */ 11766 ASSERT(!IRE_IS_LOCAL(ire)); 11767 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11768 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11769 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11770 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11771 nexthdr, prev_nexthdr_offset); 11772 return; 11773 } 11774 11775 /* 11776 * Allocate an mblk with enough room for the link-layer 11777 * header, the unfragmentable part of the datagram, and the 11778 * fragment header. This (or a copy) will be used as the 11779 * first mblk for each fragment we send. 11780 */ 11781 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + 11782 ipst->ips_ip_wroff_extra, BPRI_HI); 11783 if (hmp == NULL) { 11784 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11785 freemsg(mp); 11786 return; 11787 } 11788 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11789 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11790 11791 fip6h = (ip6_t *)hmp->b_rptr; 11792 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11793 11794 bcopy(ip6h, fip6h, unfragmentable_len); 11795 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11796 11797 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11798 11799 fraghdr->ip6f_nxt = nexthdr; 11800 fraghdr->ip6f_reserved = 0; 11801 fraghdr->ip6f_offlg = 0; 11802 fraghdr->ip6f_ident = htonl(ident); 11803 11804 /* 11805 * len is the total length of the fragmentable data in this 11806 * datagram. For each fragment sent, we will decrement len 11807 * by the amount of fragmentable data sent in that fragment 11808 * until len reaches zero. 11809 */ 11810 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11811 11812 /* 11813 * Move read ptr past unfragmentable portion, we don't want this part 11814 * of the data in our fragments. 11815 */ 11816 mp->b_rptr += unfragmentable_len; 11817 11818 while (len != 0) { 11819 mlen = MIN(len, max_chunk); 11820 len -= mlen; 11821 if (len != 0) { 11822 /* Not last */ 11823 hmp0 = copyb(hmp); 11824 if (hmp0 == NULL) { 11825 freeb(hmp); 11826 freemsg(mp); 11827 BUMP_MIB(ill->ill_ip_mib, 11828 ipIfStatsOutFragFails); 11829 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11830 return; 11831 } 11832 off_flags = IP6F_MORE_FRAG; 11833 } else { 11834 /* Last fragment */ 11835 hmp0 = hmp; 11836 hmp = NULL; 11837 off_flags = 0; 11838 } 11839 fip6h = (ip6_t *)(hmp0->b_rptr); 11840 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11841 11842 fip6h->ip6_plen = htons((uint16_t)(mlen + 11843 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11844 /* 11845 * Note: Optimization alert. 11846 * In IPv6 (and IPv4) protocol header, Fragment Offset 11847 * ("offset") is 13 bits wide and in 8-octet units. 11848 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11849 * it occupies the most significant 13 bits. 11850 * (least significant 13 bits in IPv4). 11851 * We do not do any shifts here. Not shifting is same effect 11852 * as taking offset value in octet units, dividing by 8 and 11853 * then shifting 3 bits left to line it up in place in proper 11854 * place protocol header. 11855 */ 11856 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11857 11858 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11859 /* mp has already been freed by ip_carve_mp() */ 11860 if (hmp != NULL) 11861 freeb(hmp); 11862 freeb(hmp0); 11863 ip1dbg(("ip_carve_mp: failed\n")); 11864 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11865 return; 11866 } 11867 hmp0->b_cont = dmp; 11868 /* Get the priority marking, if any */ 11869 hmp0->b_band = dmp->b_band; 11870 UPDATE_OB_PKT_COUNT(ire); 11871 ire->ire_last_used_time = lbolt; 11872 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11873 caller, NULL); 11874 reachable = 0; /* No need to redo state machine in loop */ 11875 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11876 offset += mlen; 11877 } 11878 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11879 } 11880 11881 /* 11882 * Determine if the ill and multicast aspects of that packets 11883 * "matches" the conn. 11884 */ 11885 boolean_t 11886 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11887 zoneid_t zoneid) 11888 { 11889 ill_t *in_ill; 11890 boolean_t wantpacket = B_TRUE; 11891 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11892 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11893 11894 /* 11895 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11896 * unicast and multicast reception to conn_incoming_ill. 11897 * conn_wantpacket_v6 is called both for unicast and 11898 * multicast. 11899 * 11900 * 1) The unicast copy of the packet can come anywhere in 11901 * the ill group if it is part of the group. Thus, we 11902 * need to check to see whether the ill group matches 11903 * if in_ill is part of a group. 11904 * 11905 * 2) ip_rput does not suppress duplicate multicast packets. 11906 * If there are two interfaces in a ill group and we have 11907 * 2 applications (conns) joined a multicast group G on 11908 * both the interfaces, ilm_lookup_ill filter in ip_rput 11909 * will give us two packets because we join G on both the 11910 * interfaces rather than nominating just one interface 11911 * for receiving multicast like broadcast above. So, 11912 * we have to call ilg_lookup_ill to filter out duplicate 11913 * copies, if ill is part of a group, to supress duplicates. 11914 */ 11915 in_ill = connp->conn_incoming_ill; 11916 if (in_ill != NULL) { 11917 mutex_enter(&connp->conn_lock); 11918 in_ill = connp->conn_incoming_ill; 11919 mutex_enter(&ill->ill_lock); 11920 /* 11921 * No IPMP, and the packet did not arrive on conn_incoming_ill 11922 * OR, IPMP in use and the packet arrived on an IPMP group 11923 * different from the conn_incoming_ill's IPMP group. 11924 * Reject the packet. 11925 */ 11926 if ((in_ill->ill_group == NULL && in_ill != ill) || 11927 (in_ill->ill_group != NULL && 11928 in_ill->ill_group != ill->ill_group)) { 11929 wantpacket = B_FALSE; 11930 } 11931 mutex_exit(&ill->ill_lock); 11932 mutex_exit(&connp->conn_lock); 11933 if (!wantpacket) 11934 return (B_FALSE); 11935 } 11936 11937 if (connp->conn_multi_router) 11938 return (B_TRUE); 11939 11940 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11941 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11942 /* 11943 * Unicast case: we match the conn only if it's in the specified 11944 * zone. 11945 */ 11946 return (IPCL_ZONE_MATCH(connp, zoneid)); 11947 } 11948 11949 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11950 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11951 /* 11952 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11953 * disabled, therefore we don't dispatch the multicast packet to 11954 * the sending zone. 11955 */ 11956 return (B_FALSE); 11957 } 11958 11959 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11960 zoneid != ALL_ZONES) { 11961 /* 11962 * Multicast packet on the loopback interface: we only match 11963 * conns who joined the group in the specified zone. 11964 */ 11965 return (B_FALSE); 11966 } 11967 11968 mutex_enter(&connp->conn_lock); 11969 wantpacket = 11970 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11971 mutex_exit(&connp->conn_lock); 11972 11973 return (wantpacket); 11974 } 11975 11976 11977 /* 11978 * Transmit a packet and update any NUD state based on the flags 11979 * XXX need to "recover" any ip6i_t when doing putq! 11980 * 11981 * NOTE : This function does not ire_refrele the ire passed in as the 11982 * argument. 11983 */ 11984 void 11985 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11986 int caller, ipsec_out_t *io) 11987 { 11988 mblk_t *mp1; 11989 nce_t *nce = ire->ire_nce; 11990 ill_t *ill; 11991 ill_t *out_ill; 11992 uint64_t delta; 11993 ip6_t *ip6h; 11994 queue_t *stq = ire->ire_stq; 11995 ire_t *ire1 = NULL; 11996 ire_t *save_ire = ire; 11997 boolean_t multirt_send = B_FALSE; 11998 mblk_t *next_mp = NULL; 11999 ip_stack_t *ipst = ire->ire_ipst; 12000 12001 ip6h = (ip6_t *)mp->b_rptr; 12002 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 12003 ASSERT(ire->ire_ipversion == IPV6_VERSION); 12004 ASSERT(nce != NULL); 12005 ASSERT(mp->b_datap->db_type == M_DATA); 12006 ASSERT(stq != NULL); 12007 12008 ill = ire_to_ill(ire); 12009 if (!ill) { 12010 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 12011 freemsg(mp); 12012 return; 12013 } 12014 12015 /* 12016 * If a packet is to be sent out an interface that is a 6to4 12017 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 12018 * destination, must be checked to have a 6to4 prefix 12019 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 12020 * address configured on the sending interface. Otherwise, 12021 * the packet was delivered to this interface in error and the 12022 * packet must be dropped. 12023 */ 12024 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 12025 ipif_t *ipif = ill->ill_ipif; 12026 12027 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 12028 &ip6h->ip6_dst)) { 12029 if (ip_debug > 2) { 12030 /* ip1dbg */ 12031 pr_addr_dbg("ip_xmit_v6: attempting to " 12032 "send 6to4 addressed IPv6 " 12033 "destination (%s) out the wrong " 12034 "interface.\n", AF_INET6, 12035 &ip6h->ip6_dst); 12036 } 12037 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12038 freemsg(mp); 12039 return; 12040 } 12041 } 12042 12043 /* Flow-control check has been done in ip_wput_ire_v6 */ 12044 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 12045 caller == IP_WSRV || canput(stq->q_next)) { 12046 uint32_t ill_index; 12047 12048 /* 12049 * In most cases, the emission loop below is entered only 12050 * once. Only in the case where the ire holds the 12051 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 12052 * flagged ires in the bucket, and send the packet 12053 * through all crossed RTF_MULTIRT routes. 12054 */ 12055 if (ire->ire_flags & RTF_MULTIRT) { 12056 /* 12057 * Multirouting case. The bucket where ire is stored 12058 * probably holds other RTF_MULTIRT flagged ires 12059 * to the destination. In this call to ip_xmit_v6, 12060 * we attempt to send the packet through all 12061 * those ires. Thus, we first ensure that ire is the 12062 * first RTF_MULTIRT ire in the bucket, 12063 * before walking the ire list. 12064 */ 12065 ire_t *first_ire; 12066 irb_t *irb = ire->ire_bucket; 12067 ASSERT(irb != NULL); 12068 multirt_send = B_TRUE; 12069 12070 /* Make sure we do not omit any multiroute ire. */ 12071 IRB_REFHOLD(irb); 12072 for (first_ire = irb->irb_ire; 12073 first_ire != NULL; 12074 first_ire = first_ire->ire_next) { 12075 if ((first_ire->ire_flags & RTF_MULTIRT) && 12076 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12077 &ire->ire_addr_v6)) && 12078 !(first_ire->ire_marks & 12079 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12080 break; 12081 } 12082 12083 if ((first_ire != NULL) && (first_ire != ire)) { 12084 IRE_REFHOLD(first_ire); 12085 /* ire will be released by the caller */ 12086 ire = first_ire; 12087 nce = ire->ire_nce; 12088 stq = ire->ire_stq; 12089 ill = ire_to_ill(ire); 12090 } 12091 IRB_REFRELE(irb); 12092 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12093 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12094 ILL_MDT_USABLE(ill)) { 12095 /* 12096 * This tcp connection was marked as MDT-capable, but 12097 * it has been turned off due changes in the interface. 12098 * Now that the interface support is back, turn it on 12099 * by notifying tcp. We don't directly modify tcp_mdt, 12100 * since we leave all the details to the tcp code that 12101 * knows better. 12102 */ 12103 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12104 12105 if (mdimp == NULL) { 12106 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12107 "connp %p (ENOMEM)\n", (void *)connp)); 12108 } else { 12109 CONN_INC_REF(connp); 12110 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 12111 connp, SQTAG_TCP_INPUT_MCTL); 12112 } 12113 } 12114 12115 do { 12116 mblk_t *mp_ip6h; 12117 12118 if (multirt_send) { 12119 irb_t *irb; 12120 /* 12121 * We are in a multiple send case, need to get 12122 * the next ire and make a duplicate of the 12123 * packet. ire1 holds here the next ire to 12124 * process in the bucket. If multirouting is 12125 * expected, any non-RTF_MULTIRT ire that has 12126 * the right destination address is ignored. 12127 */ 12128 irb = ire->ire_bucket; 12129 ASSERT(irb != NULL); 12130 12131 IRB_REFHOLD(irb); 12132 for (ire1 = ire->ire_next; 12133 ire1 != NULL; 12134 ire1 = ire1->ire_next) { 12135 if (!(ire1->ire_flags & RTF_MULTIRT)) 12136 continue; 12137 if (!IN6_ARE_ADDR_EQUAL( 12138 &ire1->ire_addr_v6, 12139 &ire->ire_addr_v6)) 12140 continue; 12141 if (ire1->ire_marks & 12142 (IRE_MARK_CONDEMNED| 12143 IRE_MARK_HIDDEN)) 12144 continue; 12145 12146 /* Got one */ 12147 if (ire1 != save_ire) { 12148 IRE_REFHOLD(ire1); 12149 } 12150 break; 12151 } 12152 IRB_REFRELE(irb); 12153 12154 if (ire1 != NULL) { 12155 next_mp = copyb(mp); 12156 if ((next_mp == NULL) || 12157 ((mp->b_cont != NULL) && 12158 ((next_mp->b_cont = 12159 dupmsg(mp->b_cont)) == NULL))) { 12160 freemsg(next_mp); 12161 next_mp = NULL; 12162 ire_refrele(ire1); 12163 ire1 = NULL; 12164 } 12165 } 12166 12167 /* Last multiroute ire; don't loop anymore. */ 12168 if (ire1 == NULL) { 12169 multirt_send = B_FALSE; 12170 } 12171 } 12172 12173 ill_index = 12174 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12175 12176 /* Initiate IPPF processing */ 12177 if (IP6_OUT_IPP(flags, ipst)) { 12178 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12179 if (mp == NULL) { 12180 BUMP_MIB(ill->ill_ip_mib, 12181 ipIfStatsOutDiscards); 12182 if (next_mp != NULL) 12183 freemsg(next_mp); 12184 if (ire != save_ire) { 12185 ire_refrele(ire); 12186 } 12187 return; 12188 } 12189 ip6h = (ip6_t *)mp->b_rptr; 12190 } 12191 mp_ip6h = mp; 12192 12193 /* 12194 * Check for fastpath, we need to hold nce_lock to 12195 * prevent fastpath update from chaining nce_fp_mp. 12196 */ 12197 12198 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12199 mutex_enter(&nce->nce_lock); 12200 if ((mp1 = nce->nce_fp_mp) != NULL) { 12201 uint32_t hlen; 12202 uchar_t *rptr; 12203 12204 hlen = MBLKL(mp1); 12205 rptr = mp->b_rptr - hlen; 12206 /* 12207 * make sure there is room for the fastpath 12208 * datalink header 12209 */ 12210 if (rptr < mp->b_datap->db_base) { 12211 mp1 = copyb(mp1); 12212 mutex_exit(&nce->nce_lock); 12213 if (mp1 == NULL) { 12214 BUMP_MIB(ill->ill_ip_mib, 12215 ipIfStatsOutDiscards); 12216 freemsg(mp); 12217 if (next_mp != NULL) 12218 freemsg(next_mp); 12219 if (ire != save_ire) { 12220 ire_refrele(ire); 12221 } 12222 return; 12223 } 12224 mp1->b_cont = mp; 12225 12226 /* Get the priority marking, if any */ 12227 mp1->b_band = mp->b_band; 12228 mp = mp1; 12229 } else { 12230 mp->b_rptr = rptr; 12231 /* 12232 * fastpath - pre-pend datalink 12233 * header 12234 */ 12235 bcopy(mp1->b_rptr, rptr, hlen); 12236 mutex_exit(&nce->nce_lock); 12237 } 12238 } else { 12239 /* 12240 * Get the DL_UNITDATA_REQ. 12241 */ 12242 mp1 = nce->nce_res_mp; 12243 if (mp1 == NULL) { 12244 mutex_exit(&nce->nce_lock); 12245 ip1dbg(("ip_xmit_v6: No resolution " 12246 "block ire = %p\n", (void *)ire)); 12247 freemsg(mp); 12248 if (next_mp != NULL) 12249 freemsg(next_mp); 12250 if (ire != save_ire) { 12251 ire_refrele(ire); 12252 } 12253 return; 12254 } 12255 /* 12256 * Prepend the DL_UNITDATA_REQ. 12257 */ 12258 mp1 = copyb(mp1); 12259 mutex_exit(&nce->nce_lock); 12260 if (mp1 == NULL) { 12261 BUMP_MIB(ill->ill_ip_mib, 12262 ipIfStatsOutDiscards); 12263 freemsg(mp); 12264 if (next_mp != NULL) 12265 freemsg(next_mp); 12266 if (ire != save_ire) { 12267 ire_refrele(ire); 12268 } 12269 return; 12270 } 12271 mp1->b_cont = mp; 12272 12273 /* Get the priority marking, if any */ 12274 mp1->b_band = mp->b_band; 12275 mp = mp1; 12276 } 12277 12278 out_ill = (ill_t *)stq->q_ptr; 12279 12280 DTRACE_PROBE4(ip6__physical__out__start, 12281 ill_t *, NULL, ill_t *, out_ill, 12282 ip6_t *, ip6h, mblk_t *, mp); 12283 12284 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12285 ipst->ips_ipv6firewall_physical_out, 12286 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 12287 12288 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12289 12290 if (mp == NULL) { 12291 if (multirt_send) { 12292 ASSERT(ire1 != NULL); 12293 if (ire != save_ire) { 12294 ire_refrele(ire); 12295 } 12296 /* 12297 * Proceed with the next RTF_MULTIRT 12298 * ire, also set up the send-to queue 12299 * accordingly. 12300 */ 12301 ire = ire1; 12302 ire1 = NULL; 12303 stq = ire->ire_stq; 12304 nce = ire->ire_nce; 12305 ill = ire_to_ill(ire); 12306 mp = next_mp; 12307 next_mp = NULL; 12308 continue; 12309 } else { 12310 ASSERT(next_mp == NULL); 12311 ASSERT(ire1 == NULL); 12312 break; 12313 } 12314 } 12315 12316 /* 12317 * Update ire and MIB counters; for save_ire, this has 12318 * been done by the caller. 12319 */ 12320 if (ire != save_ire) { 12321 UPDATE_OB_PKT_COUNT(ire); 12322 ire->ire_last_used_time = lbolt; 12323 12324 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12325 BUMP_MIB(ill->ill_ip_mib, 12326 ipIfStatsHCOutMcastPkts); 12327 UPDATE_MIB(ill->ill_ip_mib, 12328 ipIfStatsHCOutMcastOctets, 12329 ntohs(ip6h->ip6_plen) + 12330 IPV6_HDR_LEN); 12331 } 12332 } 12333 12334 /* 12335 * Send it down. XXX Do we want to flow control AH/ESP 12336 * packets that carry TCP payloads? We don't flow 12337 * control TCP packets, but we should also not 12338 * flow-control TCP packets that have been protected. 12339 * We don't have an easy way to find out if an AH/ESP 12340 * packet was originally TCP or not currently. 12341 */ 12342 if (io == NULL) { 12343 BUMP_MIB(ill->ill_ip_mib, 12344 ipIfStatsHCOutTransmits); 12345 UPDATE_MIB(ill->ill_ip_mib, 12346 ipIfStatsHCOutOctets, 12347 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12348 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 12349 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 12350 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 12351 int, 0); 12352 12353 putnext(stq, mp); 12354 } else { 12355 /* 12356 * Safety Pup says: make sure this is 12357 * going to the right interface! 12358 */ 12359 if (io->ipsec_out_capab_ill_index != 12360 ill_index) { 12361 /* IPsec kstats: bump lose counter */ 12362 freemsg(mp1); 12363 } else { 12364 BUMP_MIB(ill->ill_ip_mib, 12365 ipIfStatsHCOutTransmits); 12366 UPDATE_MIB(ill->ill_ip_mib, 12367 ipIfStatsHCOutOctets, 12368 ntohs(ip6h->ip6_plen) + 12369 IPV6_HDR_LEN); 12370 DTRACE_IP7(send, mblk_t *, mp, 12371 conn_t *, NULL, void_ip_t *, ip6h, 12372 __dtrace_ipsr_ill_t *, out_ill, 12373 ipha_t *, NULL, ip6_t *, ip6h, int, 12374 0); 12375 ipsec_hw_putnext(stq, mp); 12376 } 12377 } 12378 12379 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12380 if (ire != save_ire) { 12381 ire_refrele(ire); 12382 } 12383 if (multirt_send) { 12384 ASSERT(ire1 != NULL); 12385 /* 12386 * Proceed with the next RTF_MULTIRT 12387 * ire, also set up the send-to queue 12388 * accordingly. 12389 */ 12390 ire = ire1; 12391 ire1 = NULL; 12392 stq = ire->ire_stq; 12393 nce = ire->ire_nce; 12394 ill = ire_to_ill(ire); 12395 mp = next_mp; 12396 next_mp = NULL; 12397 continue; 12398 } 12399 ASSERT(next_mp == NULL); 12400 ASSERT(ire1 == NULL); 12401 return; 12402 } 12403 12404 ASSERT(nce->nce_state != ND_INCOMPLETE); 12405 12406 /* 12407 * Check for upper layer advice 12408 */ 12409 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12410 /* 12411 * It should be o.k. to check the state without 12412 * a lock here, at most we lose an advice. 12413 */ 12414 nce->nce_last = TICK_TO_MSEC(lbolt64); 12415 if (nce->nce_state != ND_REACHABLE) { 12416 12417 mutex_enter(&nce->nce_lock); 12418 nce->nce_state = ND_REACHABLE; 12419 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12420 mutex_exit(&nce->nce_lock); 12421 (void) untimeout(nce->nce_timeout_id); 12422 if (ip_debug > 2) { 12423 /* ip1dbg */ 12424 pr_addr_dbg("ip_xmit_v6: state" 12425 " for %s changed to" 12426 " REACHABLE\n", AF_INET6, 12427 &ire->ire_addr_v6); 12428 } 12429 } 12430 if (ire != save_ire) { 12431 ire_refrele(ire); 12432 } 12433 if (multirt_send) { 12434 ASSERT(ire1 != NULL); 12435 /* 12436 * Proceed with the next RTF_MULTIRT 12437 * ire, also set up the send-to queue 12438 * accordingly. 12439 */ 12440 ire = ire1; 12441 ire1 = NULL; 12442 stq = ire->ire_stq; 12443 nce = ire->ire_nce; 12444 ill = ire_to_ill(ire); 12445 mp = next_mp; 12446 next_mp = NULL; 12447 continue; 12448 } 12449 ASSERT(next_mp == NULL); 12450 ASSERT(ire1 == NULL); 12451 return; 12452 } 12453 12454 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12455 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12456 " ill_reachable_time = %d \n", delta, 12457 ill->ill_reachable_time)); 12458 if (delta > (uint64_t)ill->ill_reachable_time) { 12459 nce = ire->ire_nce; 12460 mutex_enter(&nce->nce_lock); 12461 switch (nce->nce_state) { 12462 case ND_REACHABLE: 12463 case ND_STALE: 12464 /* 12465 * ND_REACHABLE is identical to 12466 * ND_STALE in this specific case. If 12467 * reachable time has expired for this 12468 * neighbor (delta is greater than 12469 * reachable time), conceptually, the 12470 * neighbor cache is no longer in 12471 * REACHABLE state, but already in 12472 * STALE state. So the correct 12473 * transition here is to ND_DELAY. 12474 */ 12475 nce->nce_state = ND_DELAY; 12476 mutex_exit(&nce->nce_lock); 12477 NDP_RESTART_TIMER(nce, 12478 ipst->ips_delay_first_probe_time); 12479 if (ip_debug > 3) { 12480 /* ip2dbg */ 12481 pr_addr_dbg("ip_xmit_v6: state" 12482 " for %s changed to" 12483 " DELAY\n", AF_INET6, 12484 &ire->ire_addr_v6); 12485 } 12486 break; 12487 case ND_DELAY: 12488 case ND_PROBE: 12489 mutex_exit(&nce->nce_lock); 12490 /* Timers have already started */ 12491 break; 12492 case ND_UNREACHABLE: 12493 /* 12494 * ndp timer has detected that this nce 12495 * is unreachable and initiated deleting 12496 * this nce and all its associated IREs. 12497 * This is a race where we found the 12498 * ire before it was deleted and have 12499 * just sent out a packet using this 12500 * unreachable nce. 12501 */ 12502 mutex_exit(&nce->nce_lock); 12503 break; 12504 default: 12505 ASSERT(0); 12506 } 12507 } 12508 12509 if (multirt_send) { 12510 ASSERT(ire1 != NULL); 12511 /* 12512 * Proceed with the next RTF_MULTIRT ire, 12513 * Also set up the send-to queue accordingly. 12514 */ 12515 if (ire != save_ire) { 12516 ire_refrele(ire); 12517 } 12518 ire = ire1; 12519 ire1 = NULL; 12520 stq = ire->ire_stq; 12521 nce = ire->ire_nce; 12522 ill = ire_to_ill(ire); 12523 mp = next_mp; 12524 next_mp = NULL; 12525 } 12526 } while (multirt_send); 12527 /* 12528 * In the multirouting case, release the last ire used for 12529 * emission. save_ire will be released by the caller. 12530 */ 12531 if (ire != save_ire) { 12532 ire_refrele(ire); 12533 } 12534 } else { 12535 /* 12536 * Queue packet if we have an conn to give back pressure. 12537 * We can't queue packets intended for hardware acceleration 12538 * since we've tossed that state already. If the packet is 12539 * being fed back from ire_send_v6, we don't know the 12540 * position in the queue to enqueue the packet and we discard 12541 * the packet. 12542 */ 12543 if (ipst->ips_ip_output_queue && (connp != NULL) && 12544 (io == NULL) && (caller != IRE_SEND)) { 12545 if (caller == IP_WSRV) { 12546 connp->conn_did_putbq = 1; 12547 (void) putbq(connp->conn_wq, mp); 12548 conn_drain_insert(connp); 12549 /* 12550 * caller == IP_WSRV implies we are 12551 * the service thread, and the 12552 * queue is already noenabled. 12553 * The check for canput and 12554 * the putbq is not atomic. 12555 * So we need to check again. 12556 */ 12557 if (canput(stq->q_next)) 12558 connp->conn_did_putbq = 0; 12559 } else { 12560 (void) putq(connp->conn_wq, mp); 12561 } 12562 return; 12563 } 12564 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12565 freemsg(mp); 12566 return; 12567 } 12568 } 12569 12570 /* 12571 * pr_addr_dbg function provides the needed buffer space to call 12572 * inet_ntop() function's 3rd argument. This function should be 12573 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12574 * stack buffer space in it's own stack frame. This function uses 12575 * a buffer from it's own stack and prints the information. 12576 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12577 * 12578 * Note: This function can call inet_ntop() once. 12579 */ 12580 void 12581 pr_addr_dbg(char *fmt1, int af, const void *addr) 12582 { 12583 char buf[INET6_ADDRSTRLEN]; 12584 12585 if (fmt1 == NULL) { 12586 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12587 return; 12588 } 12589 12590 /* 12591 * This does not compare debug level and just prints 12592 * out. Thus it is the responsibility of the caller 12593 * to check the appropriate debug-level before calling 12594 * this function. 12595 */ 12596 if (ip_debug > 0) { 12597 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12598 } 12599 12600 12601 } 12602 12603 12604 /* 12605 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12606 * if needed and extension headers) that will be needed based on the 12607 * ip6_pkt_t structure passed by the caller. 12608 * 12609 * The returned length does not include the length of the upper level 12610 * protocol (ULP) header. 12611 */ 12612 int 12613 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12614 { 12615 int len; 12616 12617 len = IPV6_HDR_LEN; 12618 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12619 len += sizeof (ip6i_t); 12620 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12621 ASSERT(ipp->ipp_hopoptslen != 0); 12622 len += ipp->ipp_hopoptslen; 12623 } 12624 if (ipp->ipp_fields & IPPF_RTHDR) { 12625 ASSERT(ipp->ipp_rthdrlen != 0); 12626 len += ipp->ipp_rthdrlen; 12627 } 12628 /* 12629 * En-route destination options 12630 * Only do them if there's a routing header as well 12631 */ 12632 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12633 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12634 ASSERT(ipp->ipp_rtdstoptslen != 0); 12635 len += ipp->ipp_rtdstoptslen; 12636 } 12637 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12638 ASSERT(ipp->ipp_dstoptslen != 0); 12639 len += ipp->ipp_dstoptslen; 12640 } 12641 return (len); 12642 } 12643 12644 /* 12645 * All-purpose routine to build a header chain of an IPv6 header 12646 * followed by any required extension headers and a proto header, 12647 * preceeded (where necessary) by an ip6i_t private header. 12648 * 12649 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12650 * will be filled in appropriately. 12651 * Thus the caller must fill in the rest of the IPv6 header, such as 12652 * traffic class/flowid, source address (if not set here), hoplimit (if not 12653 * set here) and destination address. 12654 * 12655 * The extension headers and ip6i_t header will all be fully filled in. 12656 */ 12657 void 12658 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12659 ip6_pkt_t *ipp, uint8_t protocol) 12660 { 12661 uint8_t *nxthdr_ptr; 12662 uint8_t *cp; 12663 ip6i_t *ip6i; 12664 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12665 12666 /* 12667 * If sending private ip6i_t header down (checksum info, nexthop, 12668 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12669 * then fill it in. (The checksum info will be filled in by icmp). 12670 */ 12671 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12672 ip6i = (ip6i_t *)ip6h; 12673 ip6h = (ip6_t *)&ip6i[1]; 12674 12675 ip6i->ip6i_flags = 0; 12676 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12677 if (ipp->ipp_fields & IPPF_IFINDEX || 12678 ipp->ipp_fields & IPPF_SCOPE_ID) { 12679 ASSERT(ipp->ipp_ifindex != 0); 12680 ip6i->ip6i_flags |= IP6I_IFINDEX; 12681 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12682 } 12683 if (ipp->ipp_fields & IPPF_ADDR) { 12684 /* 12685 * Enable per-packet source address verification if 12686 * IPV6_PKTINFO specified the source address. 12687 * ip6_src is set in the transport's _wput function. 12688 */ 12689 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12690 &ipp->ipp_addr)); 12691 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12692 } 12693 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12694 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12695 /* 12696 * We need to set this flag so that IP doesn't 12697 * rewrite the IPv6 header's hoplimit with the 12698 * current default value. 12699 */ 12700 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12701 } 12702 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12703 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12704 &ipp->ipp_nexthop)); 12705 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12706 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12707 } 12708 /* 12709 * tell IP this is an ip6i_t private header 12710 */ 12711 ip6i->ip6i_nxt = IPPROTO_RAW; 12712 } 12713 /* Initialize IPv6 header */ 12714 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12715 if (ipp->ipp_fields & IPPF_TCLASS) { 12716 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12717 (ipp->ipp_tclass << 20); 12718 } 12719 if (ipp->ipp_fields & IPPF_ADDR) 12720 ip6h->ip6_src = ipp->ipp_addr; 12721 12722 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12723 cp = (uint8_t *)&ip6h[1]; 12724 /* 12725 * Here's where we have to start stringing together 12726 * any extension headers in the right order: 12727 * Hop-by-hop, destination, routing, and final destination opts. 12728 */ 12729 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12730 /* Hop-by-hop options */ 12731 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12732 12733 *nxthdr_ptr = IPPROTO_HOPOPTS; 12734 nxthdr_ptr = &hbh->ip6h_nxt; 12735 12736 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12737 cp += ipp->ipp_hopoptslen; 12738 } 12739 /* 12740 * En-route destination options 12741 * Only do them if there's a routing header as well 12742 */ 12743 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12744 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12745 ip6_dest_t *dst = (ip6_dest_t *)cp; 12746 12747 *nxthdr_ptr = IPPROTO_DSTOPTS; 12748 nxthdr_ptr = &dst->ip6d_nxt; 12749 12750 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12751 cp += ipp->ipp_rtdstoptslen; 12752 } 12753 /* 12754 * Routing header next 12755 */ 12756 if (ipp->ipp_fields & IPPF_RTHDR) { 12757 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12758 12759 *nxthdr_ptr = IPPROTO_ROUTING; 12760 nxthdr_ptr = &rt->ip6r_nxt; 12761 12762 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12763 cp += ipp->ipp_rthdrlen; 12764 } 12765 /* 12766 * Do ultimate destination options 12767 */ 12768 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12769 ip6_dest_t *dest = (ip6_dest_t *)cp; 12770 12771 *nxthdr_ptr = IPPROTO_DSTOPTS; 12772 nxthdr_ptr = &dest->ip6d_nxt; 12773 12774 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12775 cp += ipp->ipp_dstoptslen; 12776 } 12777 /* 12778 * Now set the last header pointer to the proto passed in 12779 */ 12780 *nxthdr_ptr = protocol; 12781 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12782 } 12783 12784 /* 12785 * Return a pointer to the routing header extension header 12786 * in the IPv6 header(s) chain passed in. 12787 * If none found, return NULL 12788 * Assumes that all extension headers are in same mblk as the v6 header 12789 */ 12790 ip6_rthdr_t * 12791 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12792 { 12793 ip6_dest_t *desthdr; 12794 ip6_frag_t *fraghdr; 12795 uint_t hdrlen; 12796 uint8_t nexthdr; 12797 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12798 12799 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12800 return ((ip6_rthdr_t *)ptr); 12801 12802 /* 12803 * The routing header will precede all extension headers 12804 * other than the hop-by-hop and destination options 12805 * extension headers, so if we see anything other than those, 12806 * we're done and didn't find it. 12807 * We could see a destination options header alone but no 12808 * routing header, in which case we'll return NULL as soon as 12809 * we see anything after that. 12810 * Hop-by-hop and destination option headers are identical, 12811 * so we can use either one we want as a template. 12812 */ 12813 nexthdr = ip6h->ip6_nxt; 12814 while (ptr < endptr) { 12815 /* Is there enough left for len + nexthdr? */ 12816 if (ptr + MIN_EHDR_LEN > endptr) 12817 return (NULL); 12818 12819 switch (nexthdr) { 12820 case IPPROTO_HOPOPTS: 12821 case IPPROTO_DSTOPTS: 12822 /* Assumes the headers are identical for hbh and dst */ 12823 desthdr = (ip6_dest_t *)ptr; 12824 hdrlen = 8 * (desthdr->ip6d_len + 1); 12825 nexthdr = desthdr->ip6d_nxt; 12826 break; 12827 12828 case IPPROTO_ROUTING: 12829 return ((ip6_rthdr_t *)ptr); 12830 12831 case IPPROTO_FRAGMENT: 12832 fraghdr = (ip6_frag_t *)ptr; 12833 hdrlen = sizeof (ip6_frag_t); 12834 nexthdr = fraghdr->ip6f_nxt; 12835 break; 12836 12837 default: 12838 return (NULL); 12839 } 12840 ptr += hdrlen; 12841 } 12842 return (NULL); 12843 } 12844 12845 /* 12846 * Called for source-routed packets originating on this node. 12847 * Manipulates the original routing header by moving every entry up 12848 * one slot, placing the first entry in the v6 header's v6_dst field, 12849 * and placing the ultimate destination in the routing header's last 12850 * slot. 12851 * 12852 * Returns the checksum diference between the ultimate destination 12853 * (last hop in the routing header when the packet is sent) and 12854 * the first hop (ip6_dst when the packet is sent) 12855 */ 12856 /* ARGSUSED2 */ 12857 uint32_t 12858 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12859 { 12860 uint_t numaddr; 12861 uint_t i; 12862 in6_addr_t *addrptr; 12863 in6_addr_t tmp; 12864 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12865 uint32_t cksm; 12866 uint32_t addrsum = 0; 12867 uint16_t *ptr; 12868 12869 /* 12870 * Perform any processing needed for source routing. 12871 * We know that all extension headers will be in the same mblk 12872 * as the IPv6 header. 12873 */ 12874 12875 /* 12876 * If no segments left in header, or the header length field is zero, 12877 * don't move hop addresses around; 12878 * Checksum difference is zero. 12879 */ 12880 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12881 return (0); 12882 12883 ptr = (uint16_t *)&ip6h->ip6_dst; 12884 cksm = 0; 12885 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12886 cksm += ptr[i]; 12887 } 12888 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12889 12890 /* 12891 * Here's where the fun begins - we have to 12892 * move all addresses up one spot, take the 12893 * first hop and make it our first ip6_dst, 12894 * and place the ultimate destination in the 12895 * newly-opened last slot. 12896 */ 12897 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12898 numaddr = rthdr->ip6r0_len / 2; 12899 tmp = *addrptr; 12900 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12901 *addrptr = addrptr[1]; 12902 } 12903 *addrptr = ip6h->ip6_dst; 12904 ip6h->ip6_dst = tmp; 12905 12906 /* 12907 * From the checksummed ultimate destination subtract the checksummed 12908 * current ip6_dst (the first hop address). Return that number. 12909 * (In the v4 case, the second part of this is done in each routine 12910 * that calls ip_massage_options(). We do it all in this one place 12911 * for v6). 12912 */ 12913 ptr = (uint16_t *)&ip6h->ip6_dst; 12914 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12915 addrsum += ptr[i]; 12916 } 12917 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12918 if ((int)cksm < 0) 12919 cksm--; 12920 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12921 12922 return (cksm); 12923 } 12924 12925 /* 12926 * Propagate a multicast group membership operation (join/leave) (*fn) on 12927 * all interfaces crossed by the related multirt routes. 12928 * The call is considered successful if the operation succeeds 12929 * on at least one interface. 12930 * The function is called if the destination address in the packet to send 12931 * is multirouted. 12932 */ 12933 int 12934 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12935 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12936 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12937 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12938 { 12939 ire_t *ire_gw; 12940 irb_t *irb; 12941 int index, error = 0; 12942 opt_restart_t *or; 12943 ip_stack_t *ipst = ire->ire_ipst; 12944 12945 irb = ire->ire_bucket; 12946 ASSERT(irb != NULL); 12947 12948 ASSERT(DB_TYPE(first_mp) == M_CTL); 12949 or = (opt_restart_t *)first_mp->b_rptr; 12950 12951 IRB_REFHOLD(irb); 12952 for (; ire != NULL; ire = ire->ire_next) { 12953 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12954 continue; 12955 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12956 continue; 12957 12958 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12959 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12960 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12961 /* No resolver exists for the gateway; skip this ire. */ 12962 if (ire_gw == NULL) 12963 continue; 12964 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12965 /* 12966 * A resolver exists: we can get the interface on which we have 12967 * to apply the operation. 12968 */ 12969 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12970 first_mp); 12971 if (error == 0) 12972 or->or_private = CGTP_MCAST_SUCCESS; 12973 12974 if (ip_debug > 0) { 12975 ulong_t off; 12976 char *ksym; 12977 12978 ksym = kobj_getsymname((uintptr_t)fn, &off); 12979 ip2dbg(("ip_multirt_apply_membership_v6: " 12980 "called %s, multirt group 0x%08x via itf 0x%08x, " 12981 "error %d [success %u]\n", 12982 ksym ? ksym : "?", 12983 ntohl(V4_PART_OF_V6((*v6grp))), 12984 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12985 error, or->or_private)); 12986 } 12987 12988 ire_refrele(ire_gw); 12989 if (error == EINPROGRESS) { 12990 IRB_REFRELE(irb); 12991 return (error); 12992 } 12993 } 12994 IRB_REFRELE(irb); 12995 /* 12996 * Consider the call as successful if we succeeded on at least 12997 * one interface. Otherwise, return the last encountered error. 12998 */ 12999 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 13000 } 13001 13002 void 13003 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 13004 { 13005 kstat_t *ksp; 13006 13007 ip6_stat_t template = { 13008 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 13009 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 13010 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 13011 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 13012 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 13013 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 13014 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 13015 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13016 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13017 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13018 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 13019 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13020 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13021 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13022 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 13023 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 13024 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 13025 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 13026 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 13027 }; 13028 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 13029 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 13030 KSTAT_FLAG_VIRTUAL, stackid); 13031 13032 if (ksp == NULL) 13033 return (NULL); 13034 13035 bcopy(&template, ip6_statisticsp, sizeof (template)); 13036 ksp->ks_data = (void *)ip6_statisticsp; 13037 ksp->ks_private = (void *)(uintptr_t)stackid; 13038 13039 kstat_install(ksp); 13040 return (ksp); 13041 } 13042 13043 void 13044 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 13045 { 13046 if (ksp != NULL) { 13047 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 13048 kstat_delete_netstack(ksp, stackid); 13049 } 13050 } 13051 13052 /* 13053 * The following two functions set and get the value for the 13054 * IPV6_SRC_PREFERENCES socket option. 13055 */ 13056 int 13057 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 13058 { 13059 /* 13060 * We only support preferences that are covered by 13061 * IPV6_PREFER_SRC_MASK. 13062 */ 13063 if (prefs & ~IPV6_PREFER_SRC_MASK) 13064 return (EINVAL); 13065 13066 /* 13067 * Look for conflicting preferences or default preferences. If 13068 * both bits of a related pair are clear, the application wants the 13069 * system's default value for that pair. Both bits in a pair can't 13070 * be set. 13071 */ 13072 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13073 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13074 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13075 IPV6_PREFER_SRC_MIPMASK) { 13076 return (EINVAL); 13077 } 13078 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13079 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13080 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13081 IPV6_PREFER_SRC_TMPMASK) { 13082 return (EINVAL); 13083 } 13084 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13085 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13086 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13087 IPV6_PREFER_SRC_CGAMASK) { 13088 return (EINVAL); 13089 } 13090 13091 connp->conn_src_preferences = prefs; 13092 return (0); 13093 } 13094 13095 size_t 13096 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13097 { 13098 *val = connp->conn_src_preferences; 13099 return (sizeof (connp->conn_src_preferences)); 13100 } 13101 13102 int 13103 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13104 { 13105 ill_t *ill; 13106 ire_t *ire; 13107 int error; 13108 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13109 13110 /* 13111 * Verify the source address and ifindex. Privileged users can use 13112 * any source address. For ancillary data the source address is 13113 * checked in ip_wput_v6. 13114 */ 13115 if (pkti->ipi6_ifindex != 0) { 13116 ASSERT(connp != NULL); 13117 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13118 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error, ipst); 13119 if (ill == NULL) { 13120 /* 13121 * We just want to know if the interface exists, we 13122 * don't really care about the ill pointer itself. 13123 */ 13124 if (error != EINPROGRESS) 13125 return (error); 13126 error = 0; /* Ensure we don't use it below */ 13127 } else { 13128 ill_refrele(ill); 13129 } 13130 } 13131 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13132 secpolicy_net_rawaccess(cr) != 0) { 13133 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13134 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13135 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 13136 if (ire != NULL) 13137 ire_refrele(ire); 13138 else 13139 return (ENXIO); 13140 } 13141 return (0); 13142 } 13143 13144 /* 13145 * Get the size of the IP options (including the IP headers size) 13146 * without including the AH header's size. If till_ah is B_FALSE, 13147 * and if AH header is present, dest options beyond AH header will 13148 * also be included in the returned size. 13149 */ 13150 int 13151 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13152 { 13153 ip6_t *ip6h; 13154 uint8_t nexthdr; 13155 uint8_t *whereptr; 13156 ip6_hbh_t *hbhhdr; 13157 ip6_dest_t *dsthdr; 13158 ip6_rthdr_t *rthdr; 13159 int ehdrlen; 13160 int size; 13161 ah_t *ah; 13162 13163 ip6h = (ip6_t *)mp->b_rptr; 13164 size = IPV6_HDR_LEN; 13165 nexthdr = ip6h->ip6_nxt; 13166 whereptr = (uint8_t *)&ip6h[1]; 13167 for (;;) { 13168 /* Assume IP has already stripped it */ 13169 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13170 switch (nexthdr) { 13171 case IPPROTO_HOPOPTS: 13172 hbhhdr = (ip6_hbh_t *)whereptr; 13173 nexthdr = hbhhdr->ip6h_nxt; 13174 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13175 break; 13176 case IPPROTO_DSTOPTS: 13177 dsthdr = (ip6_dest_t *)whereptr; 13178 nexthdr = dsthdr->ip6d_nxt; 13179 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13180 break; 13181 case IPPROTO_ROUTING: 13182 rthdr = (ip6_rthdr_t *)whereptr; 13183 nexthdr = rthdr->ip6r_nxt; 13184 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13185 break; 13186 default : 13187 if (till_ah) { 13188 ASSERT(nexthdr == IPPROTO_AH); 13189 return (size); 13190 } 13191 /* 13192 * If we don't have a AH header to traverse, 13193 * return now. This happens normally for 13194 * outbound datagrams where we have not inserted 13195 * the AH header. 13196 */ 13197 if (nexthdr != IPPROTO_AH) { 13198 return (size); 13199 } 13200 13201 /* 13202 * We don't include the AH header's size 13203 * to be symmetrical with other cases where 13204 * we either don't have a AH header (outbound) 13205 * or peek into the AH header yet (inbound and 13206 * not pulled up yet). 13207 */ 13208 ah = (ah_t *)whereptr; 13209 nexthdr = ah->ah_nexthdr; 13210 ehdrlen = (ah->ah_length << 2) + 8; 13211 13212 if (nexthdr == IPPROTO_DSTOPTS) { 13213 if (whereptr + ehdrlen >= mp->b_wptr) { 13214 /* 13215 * The destination options header 13216 * is not part of the first mblk. 13217 */ 13218 whereptr = mp->b_cont->b_rptr; 13219 } else { 13220 whereptr += ehdrlen; 13221 } 13222 13223 dsthdr = (ip6_dest_t *)whereptr; 13224 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13225 size += ehdrlen; 13226 } 13227 return (size); 13228 } 13229 whereptr += ehdrlen; 13230 size += ehdrlen; 13231 } 13232 } 13233