1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/sdt.h> 46 #include <sys/kobj.h> 47 #include <sys/zone.h> 48 #include <sys/neti.h> 49 #include <sys/hook.h> 50 51 #include <sys/kmem.h> 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/atomic.h> 58 #include <sys/iphada.h> 59 #include <sys/policy.h> 60 #include <net/if.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/if_dl.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 #include <netinet/sctp.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/arp.h> 75 76 #include <inet/ip.h> 77 #include <inet/ip_impl.h> 78 #include <inet/ip6.h> 79 #include <inet/ip6_asp.h> 80 #include <inet/tcp.h> 81 #include <inet/tcp_impl.h> 82 #include <inet/udp_impl.h> 83 #include <inet/ipp_common.h> 84 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_rts.h> 89 #include <inet/optcom.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/tun.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/udp_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern squeue_func_t ip_input_proc; 112 113 /* 114 * Naming conventions: 115 * These rules should be judiciously applied 116 * if there is a need to identify something as IPv6 versus IPv4 117 * IPv6 funcions will end with _v6 in the ip module. 118 * IPv6 funcions will end with _ipv6 in the transport modules. 119 * IPv6 macros: 120 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 121 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 122 * And then there are ..V4_PART_OF_V6. 123 * The intent is that macros in the ip module end with _V6. 124 * IPv6 global variables will start with ipv6_ 125 * IPv6 structures will start with ipv6 126 * IPv6 defined constants should start with IPV6_ 127 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 128 */ 129 130 /* 131 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 132 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 133 * from IANA. This mechanism will remain in effect until an official 134 * number is obtained. 135 */ 136 uchar_t ip6opt_ls; 137 138 const in6_addr_t ipv6_all_ones = 139 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 140 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 141 142 #ifdef _BIG_ENDIAN 143 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 144 #else /* _BIG_ENDIAN */ 145 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 146 #endif /* _BIG_ENDIAN */ 147 148 #ifdef _BIG_ENDIAN 149 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 150 #else /* _BIG_ENDIAN */ 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 152 #endif /* _BIG_ENDIAN */ 153 154 #ifdef _BIG_ENDIAN 155 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 156 #else /* _BIG_ENDIAN */ 157 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 158 #endif /* _BIG_ENDIAN */ 159 160 #ifdef _BIG_ENDIAN 161 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 162 #else /* _BIG_ENDIAN */ 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 164 #endif /* _BIG_ENDIAN */ 165 166 #ifdef _BIG_ENDIAN 167 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 168 #else /* _BIG_ENDIAN */ 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 170 #endif /* _BIG_ENDIAN */ 171 172 #ifdef _BIG_ENDIAN 173 const in6_addr_t ipv6_solicited_node_mcast = 174 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 175 #else /* _BIG_ENDIAN */ 176 const in6_addr_t ipv6_solicited_node_mcast = 177 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 178 #endif /* _BIG_ENDIAN */ 179 180 /* Leave room for ip_newroute to tack on the src and target addresses */ 181 #define OK_RESOLVER_MP_V6(mp) \ 182 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 183 184 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 185 boolean_t, zoneid_t); 186 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 187 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 188 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 189 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 190 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 191 boolean_t, boolean_t, boolean_t, boolean_t); 192 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 193 iulp_t *, ip_stack_t *); 194 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 195 uint16_t, boolean_t, boolean_t, boolean_t); 196 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 197 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 198 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 199 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 200 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 201 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 202 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 203 uint8_t *, uint_t, uint8_t, ip_stack_t *); 204 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 205 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 206 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 207 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 208 conn_t *, int, int, int, zoneid_t); 209 210 void ip_rput_v6(queue_t *, mblk_t *); 211 static void ip_wput_v6(queue_t *, mblk_t *); 212 213 /* 214 * A template for an IPv6 AR_ENTRY_QUERY 215 */ 216 static areq_t ipv6_areq_template = { 217 AR_ENTRY_QUERY, /* cmd */ 218 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 219 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 220 IP6_DL_SAP, /* protocol, from arps perspective */ 221 sizeof (areq_t), /* target addr offset */ 222 IPV6_ADDR_LEN, /* target addr_length */ 223 0, /* flags */ 224 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 225 IPV6_ADDR_LEN, /* sender addr length */ 226 6, /* xmit_count */ 227 1000, /* (re)xmit_interval in milliseconds */ 228 4 /* max # of requests to buffer */ 229 /* anything else filled in by the code */ 230 }; 231 232 struct qinit rinit_ipv6 = { 233 (pfi_t)ip_rput_v6, 234 NULL, 235 ip_open, 236 ip_close, 237 NULL, 238 &ip_mod_info 239 }; 240 241 struct qinit winit_ipv6 = { 242 (pfi_t)ip_wput_v6, 243 (pfi_t)ip_wsrv, 244 ip_open, 245 ip_close, 246 NULL, 247 &ip_mod_info 248 }; 249 250 /* 251 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 252 * The message has already been checksummed and if needed, 253 * a copy has been made to be sent any interested ICMP client (conn) 254 * Note that this is different than icmp_inbound() which does the fanout 255 * to conn's as well as local processing of the ICMP packets. 256 * 257 * All error messages are passed to the matching transport stream. 258 * 259 * Zones notes: 260 * The packet is only processed in the context of the specified zone: typically 261 * only this zone will reply to an echo request. This means that the caller must 262 * call icmp_inbound_v6() for each relevant zone. 263 */ 264 static void 265 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 266 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 267 { 268 icmp6_t *icmp6; 269 ip6_t *ip6h; 270 boolean_t interested; 271 ip6i_t *ip6i; 272 in6_addr_t origsrc; 273 ire_t *ire; 274 mblk_t *first_mp; 275 ipsec_in_t *ii; 276 ip_stack_t *ipst = ill->ill_ipst; 277 278 ASSERT(ill != NULL); 279 first_mp = mp; 280 if (mctl_present) { 281 mp = first_mp->b_cont; 282 ASSERT(mp != NULL); 283 284 ii = (ipsec_in_t *)first_mp->b_rptr; 285 ASSERT(ii->ipsec_in_type == IPSEC_IN); 286 } 287 288 ip6h = (ip6_t *)mp->b_rptr; 289 290 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 291 292 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 293 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 294 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 295 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 296 freemsg(first_mp); 297 return; 298 } 299 ip6h = (ip6_t *)mp->b_rptr; 300 } 301 if (ipst->ips_icmp_accept_clear_messages == 0) { 302 first_mp = ipsec_check_global_policy(first_mp, NULL, 303 NULL, ip6h, mctl_present, ipst->ips_netstack); 304 if (first_mp == NULL) 305 return; 306 } 307 308 /* 309 * On a labeled system, we have to check whether the zone itself is 310 * permitted to receive raw traffic. 311 */ 312 if (is_system_labeled()) { 313 if (zoneid == ALL_ZONES) 314 zoneid = tsol_packet_to_zoneid(mp); 315 if (!tsol_can_accept_raw(mp, B_FALSE)) { 316 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 317 zoneid)); 318 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 319 freemsg(first_mp); 320 return; 321 } 322 } 323 324 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 325 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 326 icmp6->icmp6_code)); 327 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 328 329 /* Initiate IPPF processing here */ 330 if (IP6_IN_IPP(flags, ipst)) { 331 332 /* 333 * If the ifindex changes due to SIOCSLIFINDEX 334 * packet may return to IP on the wrong ill. 335 */ 336 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 337 if (mp == NULL) { 338 if (mctl_present) { 339 freeb(first_mp); 340 } 341 return; 342 } 343 } 344 345 switch (icmp6->icmp6_type) { 346 case ICMP6_DST_UNREACH: 347 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 348 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 349 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 350 break; 351 352 case ICMP6_TIME_EXCEEDED: 353 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 354 break; 355 356 case ICMP6_PARAM_PROB: 357 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 358 break; 359 360 case ICMP6_PACKET_TOO_BIG: 361 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 362 zoneid); 363 return; 364 case ICMP6_ECHO_REQUEST: 365 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 366 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 367 !ipst->ips_ipv6_resp_echo_mcast) 368 break; 369 370 /* 371 * We must have exclusive use of the mblk to convert it to 372 * a response. 373 * If not, we copy it. 374 */ 375 if (mp->b_datap->db_ref > 1) { 376 mblk_t *mp1; 377 378 mp1 = copymsg(mp); 379 freemsg(mp); 380 if (mp1 == NULL) { 381 BUMP_MIB(ill->ill_icmp6_mib, 382 ipv6IfIcmpInErrors); 383 if (mctl_present) 384 freeb(first_mp); 385 return; 386 } 387 mp = mp1; 388 ip6h = (ip6_t *)mp->b_rptr; 389 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 390 if (mctl_present) 391 first_mp->b_cont = mp; 392 else 393 first_mp = mp; 394 } 395 396 /* 397 * Turn the echo into an echo reply. 398 * Remove any extension headers (do not reverse a source route) 399 * and clear the flow id (keep traffic class for now). 400 */ 401 if (hdr_length != IPV6_HDR_LEN) { 402 int i; 403 404 for (i = 0; i < IPV6_HDR_LEN; i++) 405 mp->b_rptr[hdr_length - i - 1] = 406 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 407 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 408 ip6h = (ip6_t *)mp->b_rptr; 409 ip6h->ip6_nxt = IPPROTO_ICMPV6; 410 hdr_length = IPV6_HDR_LEN; 411 } 412 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 413 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 414 415 ip6h->ip6_plen = 416 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 417 origsrc = ip6h->ip6_src; 418 /* 419 * Reverse the source and destination addresses. 420 * If the return address is a multicast, zero out the source 421 * (ip_wput_v6 will set an address). 422 */ 423 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 424 ip6h->ip6_src = ipv6_all_zeros; 425 ip6h->ip6_dst = origsrc; 426 } else { 427 ip6h->ip6_src = ip6h->ip6_dst; 428 ip6h->ip6_dst = origsrc; 429 } 430 431 /* set the hop limit */ 432 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 433 434 /* 435 * Prepare for checksum by putting icmp length in the icmp 436 * checksum field. The checksum is calculated in ip_wput_v6. 437 */ 438 icmp6->icmp6_cksum = ip6h->ip6_plen; 439 /* 440 * ICMP echo replies should go out on the same interface 441 * the request came on as probes used by in.mpathd for 442 * detecting NIC failures are ECHO packets. We turn-off load 443 * spreading by allocating a ip6i and setting ip6i_attach_if 444 * to B_TRUE which is handled both by ip_wput_v6 and 445 * ip_newroute_v6. If we don't turnoff load spreading, 446 * the packets might get dropped if there are no 447 * non-FAILED/INACTIVE interfaces for it to go out on and 448 * in.mpathd would wrongly detect a failure or mis-detect 449 * a NIC failure as a link failure. As load spreading can 450 * happen only if ill_group is not NULL, we do only for 451 * that case and this does not affect the normal case. 452 * 453 * We force this only on echo packets that came from on-link 454 * hosts. We restrict this to link-local addresses which 455 * is used by in.mpathd for probing. In the IPv6 case, 456 * default routes typically have an ire_ipif pointer and 457 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 458 * might work. As a default route out of this interface 459 * may not be present, enforcing this packet to go out in 460 * this case may not work. 461 */ 462 if (ill->ill_group != NULL && 463 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 464 /* 465 * If we are sending replies to ourselves, don't 466 * set ATTACH_IF as we may not be able to find 467 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 468 * causes ip_wput_v6 to look for an IRE_LOCAL on 469 * "ill" which it may not find and will try to 470 * create an IRE_CACHE for our local address. Once 471 * we do this, we will try to forward all packets 472 * meant to our LOCAL address. 473 */ 474 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 475 NULL, ipst); 476 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 477 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 478 if (mp == NULL) { 479 BUMP_MIB(ill->ill_icmp6_mib, 480 ipv6IfIcmpInErrors); 481 if (ire != NULL) 482 ire_refrele(ire); 483 if (mctl_present) 484 freeb(first_mp); 485 return; 486 } else if (mctl_present) { 487 first_mp->b_cont = mp; 488 } else { 489 first_mp = mp; 490 } 491 ip6i = (ip6i_t *)mp->b_rptr; 492 ip6i->ip6i_flags = IP6I_ATTACH_IF; 493 ip6i->ip6i_ifindex = 494 ill->ill_phyint->phyint_ifindex; 495 } 496 if (ire != NULL) 497 ire_refrele(ire); 498 } 499 500 if (!mctl_present) { 501 /* 502 * This packet should go out the same way as it 503 * came in i.e in clear. To make sure that global 504 * policy will not be applied to this in ip_wput, 505 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 506 */ 507 ASSERT(first_mp == mp); 508 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 509 if (first_mp == NULL) { 510 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 511 freemsg(mp); 512 return; 513 } 514 ii = (ipsec_in_t *)first_mp->b_rptr; 515 516 /* This is not a secure packet */ 517 ii->ipsec_in_secure = B_FALSE; 518 first_mp->b_cont = mp; 519 } 520 ii->ipsec_in_zoneid = zoneid; 521 ASSERT(zoneid != ALL_ZONES); 522 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 523 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 524 return; 525 } 526 put(WR(q), first_mp); 527 return; 528 529 case ICMP6_ECHO_REPLY: 530 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 531 break; 532 533 case ND_ROUTER_SOLICIT: 534 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 535 break; 536 537 case ND_ROUTER_ADVERT: 538 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 539 break; 540 541 case ND_NEIGHBOR_SOLICIT: 542 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 543 if (mctl_present) 544 freeb(first_mp); 545 /* XXX may wish to pass first_mp up to ndp_input someday. */ 546 ndp_input(ill, mp, dl_mp); 547 return; 548 549 case ND_NEIGHBOR_ADVERT: 550 BUMP_MIB(ill->ill_icmp6_mib, 551 ipv6IfIcmpInNeighborAdvertisements); 552 if (mctl_present) 553 freeb(first_mp); 554 /* XXX may wish to pass first_mp up to ndp_input someday. */ 555 ndp_input(ill, mp, dl_mp); 556 return; 557 558 case ND_REDIRECT: { 559 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 560 561 if (ipst->ips_ipv6_ignore_redirect) 562 break; 563 564 /* 565 * As there is no upper client to deliver, we don't 566 * need the first_mp any more. 567 */ 568 if (mctl_present) 569 freeb(first_mp); 570 if (!pullupmsg(mp, -1)) { 571 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 572 break; 573 } 574 icmp_redirect_v6(q, mp, ill); 575 return; 576 } 577 578 /* 579 * The next three icmp messages will be handled by MLD. 580 * Pass all valid MLD packets up to any process(es) 581 * listening on a raw ICMP socket. MLD messages are 582 * freed by mld_input function. 583 */ 584 case MLD_LISTENER_QUERY: 585 case MLD_LISTENER_REPORT: 586 case MLD_LISTENER_REDUCTION: 587 if (mctl_present) 588 freeb(first_mp); 589 mld_input(q, mp, ill); 590 return; 591 default: 592 break; 593 } 594 if (interested) { 595 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 596 mctl_present, zoneid); 597 } else { 598 freemsg(first_mp); 599 } 600 } 601 602 /* 603 * Process received IPv6 ICMP Packet too big. 604 * After updating any IRE it does the fanout to any matching transport streams. 605 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 606 */ 607 /* ARGSUSED */ 608 static void 609 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 610 boolean_t mctl_present, zoneid_t zoneid) 611 { 612 ip6_t *ip6h; 613 ip6_t *inner_ip6h; 614 icmp6_t *icmp6; 615 uint16_t hdr_length; 616 uint32_t mtu; 617 ire_t *ire, *first_ire; 618 mblk_t *first_mp; 619 ip_stack_t *ipst = ill->ill_ipst; 620 621 first_mp = mp; 622 if (mctl_present) 623 mp = first_mp->b_cont; 624 /* 625 * We must have exclusive use of the mblk to update the MTU 626 * in the packet. 627 * If not, we copy it. 628 * 629 * If there's an M_CTL present, we know that allocated first_mp 630 * earlier in this function, so we know first_mp has refcnt of one. 631 */ 632 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 633 if (mp->b_datap->db_ref > 1) { 634 mblk_t *mp1; 635 636 mp1 = copymsg(mp); 637 freemsg(mp); 638 if (mp1 == NULL) { 639 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 640 if (mctl_present) 641 freeb(first_mp); 642 return; 643 } 644 mp = mp1; 645 if (mctl_present) 646 first_mp->b_cont = mp; 647 else 648 first_mp = mp; 649 } 650 ip6h = (ip6_t *)mp->b_rptr; 651 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 652 hdr_length = ip_hdr_length_v6(mp, ip6h); 653 else 654 hdr_length = IPV6_HDR_LEN; 655 656 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 657 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 658 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 659 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 660 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 661 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 662 freemsg(first_mp); 663 return; 664 } 665 ip6h = (ip6_t *)mp->b_rptr; 666 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 667 inner_ip6h = (ip6_t *)&icmp6[1]; 668 } 669 670 /* 671 * For link local destinations matching simply on IRE type is not 672 * sufficient. Same link local addresses for different ILL's is 673 * possible. 674 */ 675 676 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 677 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 678 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 679 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 680 681 if (first_ire == NULL) { 682 if (ip_debug > 2) { 683 /* ip1dbg */ 684 pr_addr_dbg("icmp_inbound_too_big_v6:" 685 "no ire for dst %s\n", AF_INET6, 686 &inner_ip6h->ip6_dst); 687 } 688 freemsg(first_mp); 689 return; 690 } 691 692 mtu = ntohl(icmp6->icmp6_mtu); 693 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 694 for (ire = first_ire; ire != NULL && 695 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 696 ire = ire->ire_next) { 697 mutex_enter(&ire->ire_lock); 698 if (mtu < IPV6_MIN_MTU) { 699 ip1dbg(("Received mtu less than IPv6 " 700 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 701 mtu = IPV6_MIN_MTU; 702 /* 703 * If an mtu less than IPv6 min mtu is received, 704 * we must include a fragment header in 705 * subsequent packets. 706 */ 707 ire->ire_frag_flag |= IPH_FRAG_HDR; 708 } 709 ip1dbg(("Received mtu from router: %d\n", mtu)); 710 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 711 /* Record the new max frag size for the ULP. */ 712 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 713 /* 714 * If we need a fragment header in every packet 715 * (above case or multirouting), make sure the 716 * ULP takes it into account when computing the 717 * payload size. 718 */ 719 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 720 sizeof (ip6_frag_t)); 721 } else { 722 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 723 } 724 mutex_exit(&ire->ire_lock); 725 } 726 rw_exit(&first_ire->ire_bucket->irb_lock); 727 ire_refrele(first_ire); 728 } else { 729 irb_t *irb = NULL; 730 /* 731 * for non-link local destinations we match only on the IRE type 732 */ 733 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 734 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 735 ipst); 736 if (ire == NULL) { 737 if (ip_debug > 2) { 738 /* ip1dbg */ 739 pr_addr_dbg("icmp_inbound_too_big_v6:" 740 "no ire for dst %s\n", 741 AF_INET6, &inner_ip6h->ip6_dst); 742 } 743 freemsg(first_mp); 744 return; 745 } 746 irb = ire->ire_bucket; 747 ire_refrele(ire); 748 rw_enter(&irb->irb_lock, RW_READER); 749 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 750 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 751 &inner_ip6h->ip6_dst)) { 752 mtu = ntohl(icmp6->icmp6_mtu); 753 mutex_enter(&ire->ire_lock); 754 if (mtu < IPV6_MIN_MTU) { 755 ip1dbg(("Received mtu less than IPv6" 756 "min mtu %d: %d\n", 757 IPV6_MIN_MTU, mtu)); 758 mtu = IPV6_MIN_MTU; 759 /* 760 * If an mtu less than IPv6 min mtu is 761 * received, we must include a fragment 762 * header in subsequent packets. 763 */ 764 ire->ire_frag_flag |= IPH_FRAG_HDR; 765 } 766 767 ip1dbg(("Received mtu from router: %d\n", mtu)); 768 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 769 /* Record the new max frag size for the ULP. */ 770 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 771 /* 772 * If we need a fragment header in 773 * every packet (above case or 774 * multirouting), make sure the ULP 775 * takes it into account when computing 776 * the payload size. 777 */ 778 icmp6->icmp6_mtu = 779 htonl(ire->ire_max_frag - 780 sizeof (ip6_frag_t)); 781 } else { 782 icmp6->icmp6_mtu = 783 htonl(ire->ire_max_frag); 784 } 785 mutex_exit(&ire->ire_lock); 786 } 787 } 788 rw_exit(&irb->irb_lock); 789 } 790 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 791 mctl_present, zoneid); 792 } 793 794 static void 795 pkt_too_big(conn_t *connp, void *arg) 796 { 797 mblk_t *mp; 798 799 if (!connp->conn_ipv6_recvpathmtu) 800 return; 801 802 /* create message and drop it on this connections read queue */ 803 if ((mp = dupb((mblk_t *)arg)) == NULL) { 804 return; 805 } 806 mp->b_datap->db_type = M_CTL; 807 808 putnext(connp->conn_rq, mp); 809 } 810 811 /* 812 * Fanout received ICMPv6 error packets to the transports. 813 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 814 */ 815 void 816 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 817 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 818 { 819 uint16_t *up; /* Pointer to ports in ULP header */ 820 uint32_t ports; /* reversed ports for fanout */ 821 ip6_t rip6h; /* With reversed addresses */ 822 uint16_t hdr_length; 823 uint8_t *nexthdrp; 824 uint8_t nexthdr; 825 mblk_t *first_mp; 826 ipsec_in_t *ii; 827 tcpha_t *tcpha; 828 conn_t *connp; 829 ip_stack_t *ipst = ill->ill_ipst; 830 831 first_mp = mp; 832 if (mctl_present) { 833 mp = first_mp->b_cont; 834 ASSERT(mp != NULL); 835 836 ii = (ipsec_in_t *)first_mp->b_rptr; 837 ASSERT(ii->ipsec_in_type == IPSEC_IN); 838 } else { 839 ii = NULL; 840 } 841 842 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 843 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 844 845 /* 846 * Need to pullup everything in order to use 847 * ip_hdr_length_nexthdr_v6() 848 */ 849 if (mp->b_cont != NULL) { 850 if (!pullupmsg(mp, -1)) { 851 ip1dbg(("icmp_inbound_error_fanout_v6: " 852 "pullupmsg failed\n")); 853 goto drop_pkt; 854 } 855 ip6h = (ip6_t *)mp->b_rptr; 856 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 857 } 858 859 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 860 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 861 goto drop_pkt; 862 863 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 864 goto drop_pkt; 865 nexthdr = *nexthdrp; 866 867 /* Set message type, must be done after pullups */ 868 mp->b_datap->db_type = M_CTL; 869 870 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 871 /* 872 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 873 * sockets. 874 * 875 * Note I don't like walking every connection to deliver 876 * this information to a set of listeners. A separate 877 * list could be kept to keep the cost of this down. 878 */ 879 ipcl_walk(pkt_too_big, (void *)mp, ipst); 880 } 881 882 /* Try to pass the ICMP message to clients who need it */ 883 switch (nexthdr) { 884 case IPPROTO_UDP: { 885 /* 886 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 887 * UDP header to get the port information. 888 */ 889 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 890 mp->b_wptr) { 891 break; 892 } 893 /* 894 * Attempt to find a client stream based on port. 895 * Note that we do a reverse lookup since the header is 896 * in the form we sent it out. 897 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 898 * and we only set the src and dst addresses and nexthdr. 899 */ 900 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 901 rip6h.ip6_src = ip6h->ip6_dst; 902 rip6h.ip6_dst = ip6h->ip6_src; 903 rip6h.ip6_nxt = nexthdr; 904 ((uint16_t *)&ports)[0] = up[1]; 905 ((uint16_t *)&ports)[1] = up[0]; 906 907 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 908 IP6_NO_IPPOLICY, mctl_present, zoneid); 909 return; 910 } 911 case IPPROTO_TCP: { 912 /* 913 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 914 * the TCP header to get the port information. 915 */ 916 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 917 mp->b_wptr) { 918 break; 919 } 920 921 /* 922 * Attempt to find a client stream based on port. 923 * Note that we do a reverse lookup since the header is 924 * in the form we sent it out. 925 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 926 * we only set the src and dst addresses and nexthdr. 927 */ 928 929 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 930 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 931 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 932 if (connp == NULL) { 933 goto drop_pkt; 934 } 935 936 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 937 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 938 return; 939 940 } 941 case IPPROTO_SCTP: 942 /* 943 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 944 * the SCTP header to get the port information. 945 */ 946 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 947 mp->b_wptr) { 948 break; 949 } 950 951 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 952 ((uint16_t *)&ports)[0] = up[1]; 953 ((uint16_t *)&ports)[1] = up[0]; 954 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 955 IP6_NO_IPPOLICY, zoneid); 956 return; 957 case IPPROTO_ESP: 958 case IPPROTO_AH: { 959 int ipsec_rc; 960 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 961 962 /* 963 * We need a IPSEC_IN in the front to fanout to AH/ESP. 964 * We will re-use the IPSEC_IN if it is already present as 965 * AH/ESP will not affect any fields in the IPSEC_IN for 966 * ICMP errors. If there is no IPSEC_IN, allocate a new 967 * one and attach it in the front. 968 */ 969 if (ii != NULL) { 970 /* 971 * ip_fanout_proto_again converts the ICMP errors 972 * that come back from AH/ESP to M_DATA so that 973 * if it is non-AH/ESP and we do a pullupmsg in 974 * this function, it would work. Convert it back 975 * to M_CTL before we send up as this is a ICMP 976 * error. This could have been generated locally or 977 * by some router. Validate the inner IPSEC 978 * headers. 979 * 980 * NOTE : ill_index is used by ip_fanout_proto_again 981 * to locate the ill. 982 */ 983 ASSERT(ill != NULL); 984 ii->ipsec_in_ill_index = 985 ill->ill_phyint->phyint_ifindex; 986 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 987 first_mp->b_cont->b_datap->db_type = M_CTL; 988 } else { 989 /* 990 * IPSEC_IN is not present. We attach a ipsec_in 991 * message and send up to IPSEC for validating 992 * and removing the IPSEC headers. Clear 993 * ipsec_in_secure so that when we return 994 * from IPSEC, we don't mistakenly think that this 995 * is a secure packet came from the network. 996 * 997 * NOTE : ill_index is used by ip_fanout_proto_again 998 * to locate the ill. 999 */ 1000 ASSERT(first_mp == mp); 1001 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1002 ASSERT(ill != NULL); 1003 if (first_mp == NULL) { 1004 freemsg(mp); 1005 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1006 return; 1007 } 1008 ii = (ipsec_in_t *)first_mp->b_rptr; 1009 1010 /* This is not a secure packet */ 1011 ii->ipsec_in_secure = B_FALSE; 1012 first_mp->b_cont = mp; 1013 mp->b_datap->db_type = M_CTL; 1014 ii->ipsec_in_ill_index = 1015 ill->ill_phyint->phyint_ifindex; 1016 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1017 } 1018 1019 if (!ipsec_loaded(ipss)) { 1020 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 1021 return; 1022 } 1023 1024 if (nexthdr == IPPROTO_ESP) 1025 ipsec_rc = ipsecesp_icmp_error(first_mp); 1026 else 1027 ipsec_rc = ipsecah_icmp_error(first_mp); 1028 if (ipsec_rc == IPSEC_STATUS_FAILED) 1029 return; 1030 1031 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1032 return; 1033 } 1034 case IPPROTO_ENCAP: 1035 case IPPROTO_IPV6: 1036 if ((uint8_t *)ip6h + hdr_length + 1037 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1038 sizeof (ip6_t)) > mp->b_wptr) { 1039 goto drop_pkt; 1040 } 1041 1042 if (nexthdr == IPPROTO_ENCAP || 1043 !IN6_ARE_ADDR_EQUAL( 1044 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1045 &ip6h->ip6_src) || 1046 !IN6_ARE_ADDR_EQUAL( 1047 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1048 &ip6h->ip6_dst)) { 1049 /* 1050 * For tunnels that have used IPsec protection, 1051 * we need to adjust the MTU to take into account 1052 * the IPsec overhead. 1053 */ 1054 if (ii != NULL) 1055 icmp6->icmp6_mtu = htonl( 1056 ntohl(icmp6->icmp6_mtu) - 1057 ipsec_in_extra_length(first_mp)); 1058 } else { 1059 /* 1060 * Self-encapsulated case. As in the ipv4 case, 1061 * we need to strip the 2nd IP header. Since mp 1062 * is already pulled-up, we can simply bcopy 1063 * the 3rd header + data over the 2nd header. 1064 */ 1065 uint16_t unused_len; 1066 ip6_t *inner_ip6h = (ip6_t *) 1067 ((uchar_t *)ip6h + hdr_length); 1068 1069 /* 1070 * Make sure we don't do recursion more than once. 1071 */ 1072 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1073 &unused_len, &nexthdrp) || 1074 *nexthdrp == IPPROTO_IPV6) { 1075 goto drop_pkt; 1076 } 1077 1078 /* 1079 * We are about to modify the packet. Make a copy if 1080 * someone else has a reference to it. 1081 */ 1082 if (DB_REF(mp) > 1) { 1083 mblk_t *mp1; 1084 uint16_t icmp6_offset; 1085 1086 mp1 = copymsg(mp); 1087 if (mp1 == NULL) { 1088 goto drop_pkt; 1089 } 1090 icmp6_offset = (uint16_t) 1091 ((uchar_t *)icmp6 - mp->b_rptr); 1092 freemsg(mp); 1093 mp = mp1; 1094 1095 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1096 ip6h = (ip6_t *)&icmp6[1]; 1097 inner_ip6h = (ip6_t *) 1098 ((uchar_t *)ip6h + hdr_length); 1099 1100 if (mctl_present) 1101 first_mp->b_cont = mp; 1102 else 1103 first_mp = mp; 1104 } 1105 1106 /* 1107 * Need to set db_type back to M_DATA before 1108 * refeeding mp into this function. 1109 */ 1110 DB_TYPE(mp) = M_DATA; 1111 1112 /* 1113 * Copy the 3rd header + remaining data on top 1114 * of the 2nd header. 1115 */ 1116 bcopy(inner_ip6h, ip6h, 1117 mp->b_wptr - (uchar_t *)inner_ip6h); 1118 1119 /* 1120 * Subtract length of the 2nd header. 1121 */ 1122 mp->b_wptr -= hdr_length; 1123 1124 /* 1125 * Now recurse, and see what I _really_ should be 1126 * doing here. 1127 */ 1128 icmp_inbound_error_fanout_v6(q, first_mp, 1129 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1130 zoneid); 1131 return; 1132 } 1133 /* FALLTHRU */ 1134 default: 1135 /* 1136 * The rip6h header is only used for the lookup and we 1137 * only set the src and dst addresses and nexthdr. 1138 */ 1139 rip6h.ip6_src = ip6h->ip6_dst; 1140 rip6h.ip6_dst = ip6h->ip6_src; 1141 rip6h.ip6_nxt = nexthdr; 1142 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1143 IP6_NO_IPPOLICY, mctl_present, zoneid); 1144 return; 1145 } 1146 /* NOTREACHED */ 1147 drop_pkt: 1148 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1149 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1150 freemsg(first_mp); 1151 } 1152 1153 /* 1154 * Process received IPv6 ICMP Redirect messages. 1155 */ 1156 /* ARGSUSED */ 1157 static void 1158 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1159 { 1160 ip6_t *ip6h; 1161 uint16_t hdr_length; 1162 nd_redirect_t *rd; 1163 ire_t *ire; 1164 ire_t *prev_ire; 1165 ire_t *redir_ire; 1166 in6_addr_t *src, *dst, *gateway; 1167 nd_opt_hdr_t *opt; 1168 nce_t *nce; 1169 int nce_flags = 0; 1170 int err = 0; 1171 boolean_t redirect_to_router = B_FALSE; 1172 int len; 1173 int optlen; 1174 iulp_t ulp_info = { 0 }; 1175 ill_t *prev_ire_ill; 1176 ipif_t *ipif; 1177 ip_stack_t *ipst = ill->ill_ipst; 1178 1179 ip6h = (ip6_t *)mp->b_rptr; 1180 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1181 hdr_length = ip_hdr_length_v6(mp, ip6h); 1182 else 1183 hdr_length = IPV6_HDR_LEN; 1184 1185 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1186 len = mp->b_wptr - mp->b_rptr - hdr_length; 1187 src = &ip6h->ip6_src; 1188 dst = &rd->nd_rd_dst; 1189 gateway = &rd->nd_rd_target; 1190 1191 /* Verify if it is a valid redirect */ 1192 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1193 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1194 (rd->nd_rd_code != 0) || 1195 (len < sizeof (nd_redirect_t)) || 1196 (IN6_IS_ADDR_V4MAPPED(dst)) || 1197 (IN6_IS_ADDR_MULTICAST(dst))) { 1198 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1199 freemsg(mp); 1200 return; 1201 } 1202 1203 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1204 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1205 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1206 freemsg(mp); 1207 return; 1208 } 1209 1210 if (len > sizeof (nd_redirect_t)) { 1211 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1212 len - sizeof (nd_redirect_t))) { 1213 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1214 freemsg(mp); 1215 return; 1216 } 1217 } 1218 1219 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1220 redirect_to_router = B_TRUE; 1221 nce_flags |= NCE_F_ISROUTER; 1222 } 1223 1224 /* ipif will be refreleased afterwards */ 1225 ipif = ipif_get_next_ipif(NULL, ill); 1226 if (ipif == NULL) { 1227 freemsg(mp); 1228 return; 1229 } 1230 1231 /* 1232 * Verify that the IP source address of the redirect is 1233 * the same as the current first-hop router for the specified 1234 * ICMP destination address. 1235 * Also, Make sure we had a route for the dest in question and 1236 * that route was pointing to the old gateway (the source of the 1237 * redirect packet.) 1238 */ 1239 1240 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1241 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1242 MATCH_IRE_DEFAULT, ipst); 1243 1244 /* 1245 * Check that 1246 * the redirect was not from ourselves 1247 * old gateway is still directly reachable 1248 */ 1249 if (prev_ire == NULL || 1250 prev_ire->ire_type == IRE_LOCAL) { 1251 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1252 ipif_refrele(ipif); 1253 goto fail_redirect; 1254 } 1255 prev_ire_ill = ire_to_ill(prev_ire); 1256 ASSERT(prev_ire_ill != NULL); 1257 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1258 nce_flags |= NCE_F_NONUD; 1259 1260 /* 1261 * Should we use the old ULP info to create the new gateway? From 1262 * a user's perspective, we should inherit the info so that it 1263 * is a "smooth" transition. If we do not do that, then new 1264 * connections going thru the new gateway will have no route metrics, 1265 * which is counter-intuitive to user. From a network point of 1266 * view, this may or may not make sense even though the new gateway 1267 * is still directly connected to us so the route metrics should not 1268 * change much. 1269 * 1270 * But if the old ire_uinfo is not initialized, we do another 1271 * recursive lookup on the dest using the new gateway. There may 1272 * be a route to that. If so, use it to initialize the redirect 1273 * route. 1274 */ 1275 if (prev_ire->ire_uinfo.iulp_set) { 1276 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1277 } else if (redirect_to_router) { 1278 /* 1279 * Only do the following if the redirection is really to 1280 * a router. 1281 */ 1282 ire_t *tmp_ire; 1283 ire_t *sire; 1284 1285 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1286 ALL_ZONES, 0, NULL, 1287 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1288 ipst); 1289 if (sire != NULL) { 1290 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1291 ASSERT(tmp_ire != NULL); 1292 ire_refrele(tmp_ire); 1293 ire_refrele(sire); 1294 } else if (tmp_ire != NULL) { 1295 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1296 sizeof (iulp_t)); 1297 ire_refrele(tmp_ire); 1298 } 1299 } 1300 1301 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1302 opt = (nd_opt_hdr_t *)&rd[1]; 1303 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1304 if (opt != NULL) { 1305 err = ndp_lookup_then_add(ill, 1306 (uchar_t *)&opt[1], /* Link layer address */ 1307 gateway, 1308 &ipv6_all_ones, /* prefix mask */ 1309 &ipv6_all_zeros, /* Mapping mask */ 1310 0, 1311 nce_flags, 1312 ND_STALE, 1313 &nce, 1314 NULL, 1315 NULL); 1316 switch (err) { 1317 case 0: 1318 NCE_REFRELE(nce); 1319 break; 1320 case EEXIST: 1321 /* 1322 * Check to see if link layer address has changed and 1323 * process the nce_state accordingly. 1324 */ 1325 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1326 NCE_REFRELE(nce); 1327 break; 1328 default: 1329 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1330 err)); 1331 ipif_refrele(ipif); 1332 goto fail_redirect; 1333 } 1334 } 1335 if (redirect_to_router) { 1336 /* icmp_redirect_ok_v6() must have already verified this */ 1337 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1338 1339 /* 1340 * Create a Route Association. This will allow us to remember 1341 * a router told us to use the particular gateway. 1342 */ 1343 ire = ire_create_v6( 1344 dst, 1345 &ipv6_all_ones, /* mask */ 1346 &prev_ire->ire_src_addr_v6, /* source addr */ 1347 gateway, /* gateway addr */ 1348 &prev_ire->ire_max_frag, /* max frag */ 1349 NULL, /* Fast Path header */ 1350 NULL, /* no rfq */ 1351 NULL, /* no stq */ 1352 IRE_HOST, 1353 NULL, 1354 prev_ire->ire_ipif, 1355 NULL, 1356 0, 1357 0, 1358 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1359 &ulp_info, 1360 NULL, 1361 NULL, 1362 ipst); 1363 } else { 1364 queue_t *stq; 1365 1366 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1367 ? ipif->ipif_rq : ipif->ipif_wq; 1368 1369 /* 1370 * Just create an on link entry, i.e. interface route. 1371 */ 1372 ire = ire_create_v6( 1373 dst, /* gateway == dst */ 1374 &ipv6_all_ones, /* mask */ 1375 &prev_ire->ire_src_addr_v6, /* source addr */ 1376 &ipv6_all_zeros, /* gateway addr */ 1377 &prev_ire->ire_max_frag, /* max frag */ 1378 NULL, /* Fast Path header */ 1379 NULL, /* ire rfq */ 1380 stq, /* ire stq */ 1381 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1382 NULL, 1383 prev_ire->ire_ipif, 1384 &ipv6_all_ones, 1385 0, 1386 0, 1387 (RTF_DYNAMIC | RTF_HOST), 1388 &ulp_info, 1389 NULL, 1390 NULL, 1391 ipst); 1392 } 1393 1394 /* Release reference from earlier ipif_get_next_ipif() */ 1395 ipif_refrele(ipif); 1396 1397 if (ire == NULL) 1398 goto fail_redirect; 1399 1400 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1401 1402 /* tell routing sockets that we received a redirect */ 1403 ip_rts_change_v6(RTM_REDIRECT, 1404 &rd->nd_rd_dst, 1405 &rd->nd_rd_target, 1406 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1407 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1408 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1409 1410 /* 1411 * Delete any existing IRE_HOST type ires for this destination. 1412 * This together with the added IRE has the effect of 1413 * modifying an existing redirect. 1414 */ 1415 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1416 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1417 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), 1418 ipst); 1419 1420 ire_refrele(ire); /* Held in ire_add_v6 */ 1421 1422 if (redir_ire != NULL) { 1423 if (redir_ire->ire_flags & RTF_DYNAMIC) 1424 ire_delete(redir_ire); 1425 ire_refrele(redir_ire); 1426 } 1427 } 1428 1429 if (prev_ire->ire_type == IRE_CACHE) 1430 ire_delete(prev_ire); 1431 ire_refrele(prev_ire); 1432 prev_ire = NULL; 1433 1434 fail_redirect: 1435 if (prev_ire != NULL) 1436 ire_refrele(prev_ire); 1437 freemsg(mp); 1438 } 1439 1440 static ill_t * 1441 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1442 { 1443 ill_t *ill; 1444 1445 ASSERT(WR(q) == q); 1446 1447 if (q->q_next != NULL) { 1448 ill = (ill_t *)q->q_ptr; 1449 if (ILL_CAN_LOOKUP(ill)) 1450 ill_refhold(ill); 1451 else 1452 ill = NULL; 1453 } else { 1454 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1455 NULL, NULL, NULL, NULL, NULL, ipst); 1456 } 1457 if (ill == NULL) 1458 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1459 return (ill); 1460 } 1461 1462 /* 1463 * Assigns an appropriate source address to the packet. 1464 * If origdst is one of our IP addresses that use it as the source. 1465 * If the queue is an ill queue then select a source from that ill. 1466 * Otherwise pick a source based on a route lookup back to the origsrc. 1467 * 1468 * src is the return parameter. Returns a pointer to src or NULL if failure. 1469 */ 1470 static in6_addr_t * 1471 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1472 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1473 { 1474 ill_t *ill; 1475 ire_t *ire; 1476 ipif_t *ipif; 1477 1478 ASSERT(!(wq->q_flag & QREADR)); 1479 if (wq->q_next != NULL) { 1480 ill = (ill_t *)wq->q_ptr; 1481 } else { 1482 ill = NULL; 1483 } 1484 1485 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1486 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1487 ipst); 1488 if (ire != NULL) { 1489 /* Destined to one of our addresses */ 1490 *src = *origdst; 1491 ire_refrele(ire); 1492 return (src); 1493 } 1494 if (ire != NULL) { 1495 ire_refrele(ire); 1496 ire = NULL; 1497 } 1498 if (ill == NULL) { 1499 /* What is the route back to the original source? */ 1500 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1501 NULL, NULL, zoneid, NULL, 1502 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1503 if (ire == NULL) { 1504 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1505 return (NULL); 1506 } 1507 /* 1508 * Does not matter whether we use ire_stq or ire_ipif here. 1509 * Just pick an ill for ICMP replies. 1510 */ 1511 ASSERT(ire->ire_ipif != NULL); 1512 ill = ire->ire_ipif->ipif_ill; 1513 ire_refrele(ire); 1514 } 1515 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1516 IPV6_PREFER_SRC_DEFAULT, zoneid); 1517 if (ipif != NULL) { 1518 *src = ipif->ipif_v6src_addr; 1519 ipif_refrele(ipif); 1520 return (src); 1521 } 1522 /* 1523 * Unusual case - can't find a usable source address to reach the 1524 * original source. Use what in the route to the source. 1525 */ 1526 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1527 NULL, NULL, zoneid, NULL, 1528 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1529 if (ire == NULL) { 1530 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1531 return (NULL); 1532 } 1533 ASSERT(ire != NULL); 1534 *src = ire->ire_src_addr_v6; 1535 ire_refrele(ire); 1536 return (src); 1537 } 1538 1539 /* 1540 * Build and ship an IPv6 ICMP message using the packet data in mp, 1541 * and the ICMP header pointed to by "stuff". (May be called as 1542 * writer.) 1543 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1544 * verify that an icmp error packet can be sent. 1545 * 1546 * If q is an ill write side queue (which is the case when packets 1547 * arrive from ip_rput) then ip_wput code will ensure that packets to 1548 * link-local destinations are sent out that ill. 1549 * 1550 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1551 * source address (see above function). 1552 */ 1553 static void 1554 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1555 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1556 ip_stack_t *ipst) 1557 { 1558 ip6_t *ip6h; 1559 in6_addr_t v6dst; 1560 size_t len_needed; 1561 size_t msg_len; 1562 mblk_t *mp1; 1563 icmp6_t *icmp6; 1564 ill_t *ill; 1565 in6_addr_t v6src; 1566 mblk_t *ipsec_mp; 1567 ipsec_out_t *io; 1568 1569 ill = ip_queue_to_ill_v6(q, ipst); 1570 if (ill == NULL) { 1571 freemsg(mp); 1572 return; 1573 } 1574 1575 if (mctl_present) { 1576 /* 1577 * If it is : 1578 * 1579 * 1) a IPSEC_OUT, then this is caused by outbound 1580 * datagram originating on this host. IPSEC processing 1581 * may or may not have been done. Refer to comments above 1582 * icmp_inbound_error_fanout for details. 1583 * 1584 * 2) a IPSEC_IN if we are generating a icmp_message 1585 * for an incoming datagram destined for us i.e called 1586 * from ip_fanout_send_icmp. 1587 */ 1588 ipsec_info_t *in; 1589 1590 ipsec_mp = mp; 1591 mp = ipsec_mp->b_cont; 1592 1593 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1594 ip6h = (ip6_t *)mp->b_rptr; 1595 1596 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1597 in->ipsec_info_type == IPSEC_IN); 1598 1599 if (in->ipsec_info_type == IPSEC_IN) { 1600 /* 1601 * Convert the IPSEC_IN to IPSEC_OUT. 1602 */ 1603 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1604 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1605 ill_refrele(ill); 1606 return; 1607 } 1608 } else { 1609 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1610 io = (ipsec_out_t *)in; 1611 /* 1612 * Clear out ipsec_out_proc_begin, so we do a fresh 1613 * ire lookup. 1614 */ 1615 io->ipsec_out_proc_begin = B_FALSE; 1616 } 1617 } else { 1618 /* 1619 * This is in clear. The icmp message we are building 1620 * here should go out in clear. 1621 */ 1622 ipsec_in_t *ii; 1623 ASSERT(mp->b_datap->db_type == M_DATA); 1624 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1625 if (ipsec_mp == NULL) { 1626 freemsg(mp); 1627 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1628 ill_refrele(ill); 1629 return; 1630 } 1631 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1632 1633 /* This is not a secure packet */ 1634 ii->ipsec_in_secure = B_FALSE; 1635 /* 1636 * For trusted extensions using a shared IP address we can 1637 * send using any zoneid. 1638 */ 1639 if (zoneid == ALL_ZONES) 1640 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1641 else 1642 ii->ipsec_in_zoneid = zoneid; 1643 ipsec_mp->b_cont = mp; 1644 ip6h = (ip6_t *)mp->b_rptr; 1645 /* 1646 * Convert the IPSEC_IN to IPSEC_OUT. 1647 */ 1648 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1649 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1650 ill_refrele(ill); 1651 return; 1652 } 1653 } 1654 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1655 1656 if (v6src_ptr != NULL) { 1657 v6src = *v6src_ptr; 1658 } else { 1659 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1660 &v6src, zoneid, ipst) == NULL) { 1661 freemsg(ipsec_mp); 1662 ill_refrele(ill); 1663 return; 1664 } 1665 } 1666 v6dst = ip6h->ip6_src; 1667 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1668 msg_len = msgdsize(mp); 1669 if (msg_len > len_needed) { 1670 if (!adjmsg(mp, len_needed - msg_len)) { 1671 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1672 freemsg(ipsec_mp); 1673 ill_refrele(ill); 1674 return; 1675 } 1676 msg_len = len_needed; 1677 } 1678 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1679 if (mp1 == NULL) { 1680 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1681 freemsg(ipsec_mp); 1682 ill_refrele(ill); 1683 return; 1684 } 1685 ill_refrele(ill); 1686 mp1->b_cont = mp; 1687 mp = mp1; 1688 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1689 io->ipsec_out_type == IPSEC_OUT); 1690 ipsec_mp->b_cont = mp; 1691 1692 /* 1693 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1694 * node generates be accepted in peace by all on-host destinations. 1695 * If we do NOT assume that all on-host destinations trust 1696 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1697 * (Look for ipsec_out_icmp_loopback). 1698 */ 1699 io->ipsec_out_icmp_loopback = B_TRUE; 1700 1701 ip6h = (ip6_t *)mp->b_rptr; 1702 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1703 1704 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1705 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1706 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1707 ip6h->ip6_dst = v6dst; 1708 ip6h->ip6_src = v6src; 1709 msg_len += IPV6_HDR_LEN + len; 1710 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1711 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1712 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1713 } 1714 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1715 icmp6 = (icmp6_t *)&ip6h[1]; 1716 bcopy(stuff, (char *)icmp6, len); 1717 /* 1718 * Prepare for checksum by putting icmp length in the icmp 1719 * checksum field. The checksum is calculated in ip_wput_v6. 1720 */ 1721 icmp6->icmp6_cksum = ip6h->ip6_plen; 1722 if (icmp6->icmp6_type == ND_REDIRECT) { 1723 ip6h->ip6_hops = IPV6_MAX_HOPS; 1724 } 1725 /* Send to V6 writeside put routine */ 1726 put(q, ipsec_mp); 1727 } 1728 1729 /* 1730 * Update the output mib when ICMPv6 packets are sent. 1731 */ 1732 static void 1733 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1734 { 1735 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1736 1737 switch (icmp6->icmp6_type) { 1738 case ICMP6_DST_UNREACH: 1739 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1740 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1741 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1742 break; 1743 1744 case ICMP6_TIME_EXCEEDED: 1745 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1746 break; 1747 1748 case ICMP6_PARAM_PROB: 1749 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1750 break; 1751 1752 case ICMP6_PACKET_TOO_BIG: 1753 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1754 break; 1755 1756 case ICMP6_ECHO_REQUEST: 1757 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1758 break; 1759 1760 case ICMP6_ECHO_REPLY: 1761 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1762 break; 1763 1764 case ND_ROUTER_SOLICIT: 1765 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1766 break; 1767 1768 case ND_ROUTER_ADVERT: 1769 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1770 break; 1771 1772 case ND_NEIGHBOR_SOLICIT: 1773 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1774 break; 1775 1776 case ND_NEIGHBOR_ADVERT: 1777 BUMP_MIB(ill->ill_icmp6_mib, 1778 ipv6IfIcmpOutNeighborAdvertisements); 1779 break; 1780 1781 case ND_REDIRECT: 1782 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1783 break; 1784 1785 case MLD_LISTENER_QUERY: 1786 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1787 break; 1788 1789 case MLD_LISTENER_REPORT: 1790 case MLD_V2_LISTENER_REPORT: 1791 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1792 break; 1793 1794 case MLD_LISTENER_REDUCTION: 1795 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1796 break; 1797 } 1798 } 1799 1800 /* 1801 * Check if it is ok to send an ICMPv6 error packet in 1802 * response to the IP packet in mp. 1803 * Free the message and return null if no 1804 * ICMP error packet should be sent. 1805 */ 1806 static mblk_t * 1807 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1808 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1809 { 1810 ip6_t *ip6h; 1811 1812 if (!mp) 1813 return (NULL); 1814 1815 ip6h = (ip6_t *)mp->b_rptr; 1816 1817 /* Check if source address uniquely identifies the host */ 1818 1819 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1820 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1821 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1822 freemsg(mp); 1823 return (NULL); 1824 } 1825 1826 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1827 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1828 icmp6_t *icmp6; 1829 1830 if (mp->b_wptr - mp->b_rptr < len_needed) { 1831 if (!pullupmsg(mp, len_needed)) { 1832 ill_t *ill; 1833 1834 ill = ip_queue_to_ill_v6(q, ipst); 1835 if (ill == NULL) { 1836 BUMP_MIB(&ipst->ips_icmp6_mib, 1837 ipv6IfIcmpInErrors); 1838 } else { 1839 BUMP_MIB(ill->ill_icmp6_mib, 1840 ipv6IfIcmpInErrors); 1841 ill_refrele(ill); 1842 } 1843 freemsg(mp); 1844 return (NULL); 1845 } 1846 ip6h = (ip6_t *)mp->b_rptr; 1847 } 1848 icmp6 = (icmp6_t *)&ip6h[1]; 1849 /* Explicitly do not generate errors in response to redirects */ 1850 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1851 icmp6->icmp6_type == ND_REDIRECT) { 1852 freemsg(mp); 1853 return (NULL); 1854 } 1855 } 1856 /* 1857 * Check that the destination is not multicast and that the packet 1858 * was not sent on link layer broadcast or multicast. (Exception 1859 * is Packet too big message as per the draft - when mcast_ok is set.) 1860 */ 1861 if (!mcast_ok && 1862 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1863 freemsg(mp); 1864 return (NULL); 1865 } 1866 if (icmp_err_rate_limit(ipst)) { 1867 /* 1868 * Only send ICMP error packets every so often. 1869 * This should be done on a per port/source basis, 1870 * but for now this will suffice. 1871 */ 1872 freemsg(mp); 1873 return (NULL); 1874 } 1875 return (mp); 1876 } 1877 1878 /* 1879 * Generate an ICMPv6 redirect message. 1880 * Include target link layer address option if it exits. 1881 * Always include redirect header. 1882 */ 1883 static void 1884 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1885 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1886 { 1887 nd_redirect_t *rd; 1888 nd_opt_rd_hdr_t *rdh; 1889 uchar_t *buf; 1890 nce_t *nce = NULL; 1891 nd_opt_hdr_t *opt; 1892 int len; 1893 int ll_opt_len = 0; 1894 int max_redir_hdr_data_len; 1895 int pkt_len; 1896 in6_addr_t *srcp; 1897 ip_stack_t *ipst = ill->ill_ipst; 1898 1899 /* 1900 * We are called from ip_rput where we could 1901 * not have attached an IPSEC_IN. 1902 */ 1903 ASSERT(mp->b_datap->db_type == M_DATA); 1904 1905 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1906 if (mp == NULL) 1907 return; 1908 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1909 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1910 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1911 ill->ill_phys_addr_length + 7)/8 * 8; 1912 } 1913 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1914 ASSERT(len % 4 == 0); 1915 buf = kmem_alloc(len, KM_NOSLEEP); 1916 if (buf == NULL) { 1917 if (nce != NULL) 1918 NCE_REFRELE(nce); 1919 freemsg(mp); 1920 return; 1921 } 1922 1923 rd = (nd_redirect_t *)buf; 1924 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1925 rd->nd_rd_code = 0; 1926 rd->nd_rd_reserved = 0; 1927 rd->nd_rd_target = *targetp; 1928 rd->nd_rd_dst = *dest; 1929 1930 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1931 if (nce != NULL && ll_opt_len != 0) { 1932 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1933 opt->nd_opt_len = ll_opt_len/8; 1934 bcopy((char *)nce->nce_res_mp->b_rptr + 1935 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1936 ill->ill_phys_addr_length); 1937 } 1938 if (nce != NULL) 1939 NCE_REFRELE(nce); 1940 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1941 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1942 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1943 max_redir_hdr_data_len = 1944 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1945 pkt_len = msgdsize(mp); 1946 /* Make sure mp is 8 byte aligned */ 1947 if (pkt_len > max_redir_hdr_data_len) { 1948 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1949 sizeof (nd_opt_rd_hdr_t))/8; 1950 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1951 } else { 1952 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1953 (void) adjmsg(mp, -(pkt_len % 8)); 1954 } 1955 rdh->nd_opt_rh_reserved1 = 0; 1956 rdh->nd_opt_rh_reserved2 = 0; 1957 /* ipif_v6src_addr contains the link-local source address */ 1958 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1959 if (ill->ill_group != NULL) { 1960 /* 1961 * The receiver of the redirect will verify whether it 1962 * had a route through us (srcp that we will use in 1963 * the redirect) or not. As we load spread even link-locals, 1964 * we don't know which source address the receiver of 1965 * redirect has in its route for communicating with us. 1966 * Thus we randomly choose a source here and finally we 1967 * should get to the right one and it will eventually 1968 * accept the redirect from us. We can't call 1969 * ip_lookup_scope_v6 because we don't have the right 1970 * link-local address here. Thus we randomly choose one. 1971 */ 1972 int cnt = ill->ill_group->illgrp_ill_count; 1973 1974 ill = ill->ill_group->illgrp_ill; 1975 cnt = ++ipst->ips_icmp_redirect_v6_src_index % cnt; 1976 while (cnt--) 1977 ill = ill->ill_group_next; 1978 srcp = &ill->ill_ipif->ipif_v6src_addr; 1979 } else { 1980 srcp = &ill->ill_ipif->ipif_v6src_addr; 1981 } 1982 rw_exit(&ipst->ips_ill_g_lock); 1983 /* Redirects sent by router, and router is global zone */ 1984 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1985 kmem_free(buf, len); 1986 } 1987 1988 1989 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1990 void 1991 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1992 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1993 ip_stack_t *ipst) 1994 { 1995 icmp6_t icmp6; 1996 boolean_t mctl_present; 1997 mblk_t *first_mp; 1998 1999 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2000 2001 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2002 if (mp == NULL) { 2003 if (mctl_present) 2004 freeb(first_mp); 2005 return; 2006 } 2007 bzero(&icmp6, sizeof (icmp6_t)); 2008 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2009 icmp6.icmp6_code = code; 2010 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2011 zoneid, ipst); 2012 } 2013 2014 /* 2015 * Generate an ICMP unreachable message. 2016 */ 2017 void 2018 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2019 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2020 ip_stack_t *ipst) 2021 { 2022 icmp6_t icmp6; 2023 boolean_t mctl_present; 2024 mblk_t *first_mp; 2025 2026 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2027 2028 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2029 if (mp == NULL) { 2030 if (mctl_present) 2031 freeb(first_mp); 2032 return; 2033 } 2034 bzero(&icmp6, sizeof (icmp6_t)); 2035 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2036 icmp6.icmp6_code = code; 2037 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2038 zoneid, ipst); 2039 } 2040 2041 /* 2042 * Generate an ICMP pkt too big message. 2043 */ 2044 static void 2045 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2046 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 2047 { 2048 icmp6_t icmp6; 2049 mblk_t *first_mp; 2050 boolean_t mctl_present; 2051 2052 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2053 2054 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2055 if (mp == NULL) { 2056 if (mctl_present) 2057 freeb(first_mp); 2058 return; 2059 } 2060 bzero(&icmp6, sizeof (icmp6_t)); 2061 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2062 icmp6.icmp6_code = 0; 2063 icmp6.icmp6_mtu = htonl(mtu); 2064 2065 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2066 zoneid, ipst); 2067 } 2068 2069 /* 2070 * Generate an ICMP parameter problem message. (May be called as writer.) 2071 * 'offset' is the offset from the beginning of the packet in error. 2072 */ 2073 static void 2074 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2075 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2076 ip_stack_t *ipst) 2077 { 2078 icmp6_t icmp6; 2079 boolean_t mctl_present; 2080 mblk_t *first_mp; 2081 2082 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2083 2084 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2085 if (mp == NULL) { 2086 if (mctl_present) 2087 freeb(first_mp); 2088 return; 2089 } 2090 bzero((char *)&icmp6, sizeof (icmp6_t)); 2091 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2092 icmp6.icmp6_code = code; 2093 icmp6.icmp6_pptr = htonl(offset); 2094 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2095 zoneid, ipst); 2096 } 2097 2098 /* 2099 * This code will need to take into account the possibility of binding 2100 * to a link local address on a multi-homed host, in which case the 2101 * outgoing interface (from the conn) will need to be used when getting 2102 * an ire for the dst. Going through proper outgoing interface and 2103 * choosing the source address corresponding to the outgoing interface 2104 * is necessary when the destination address is a link-local address and 2105 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2106 * This can happen when active connection is setup; thus ipp pointer 2107 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2108 * pointer is passed as ipp pointer. 2109 */ 2110 mblk_t * 2111 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2112 { 2113 ssize_t len; 2114 int protocol; 2115 struct T_bind_req *tbr; 2116 sin6_t *sin6; 2117 ipa6_conn_t *ac6; 2118 in6_addr_t *v6srcp; 2119 in6_addr_t *v6dstp; 2120 uint16_t lport; 2121 uint16_t fport; 2122 uchar_t *ucp; 2123 mblk_t *mp1; 2124 boolean_t ire_requested; 2125 boolean_t ipsec_policy_set; 2126 int error = 0; 2127 boolean_t local_bind; 2128 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2129 ipa6_conn_x_t *acx6; 2130 boolean_t verify_dst; 2131 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2132 2133 ASSERT(connp->conn_af_isv6); 2134 len = mp->b_wptr - mp->b_rptr; 2135 if (len < (sizeof (*tbr) + 1)) { 2136 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2137 "ip_bind_v6: bogus msg, len %ld", len); 2138 goto bad_addr; 2139 } 2140 /* Back up and extract the protocol identifier. */ 2141 mp->b_wptr--; 2142 tbr = (struct T_bind_req *)mp->b_rptr; 2143 /* Reset the message type in preparation for shipping it back. */ 2144 mp->b_datap->db_type = M_PCPROTO; 2145 2146 protocol = *mp->b_wptr & 0xFF; 2147 connp->conn_ulp = (uint8_t)protocol; 2148 2149 /* 2150 * Check for a zero length address. This is from a protocol that 2151 * wants to register to receive all packets of its type. 2152 */ 2153 if (tbr->ADDR_length == 0) { 2154 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2155 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2156 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2157 NULL) { 2158 /* 2159 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2160 * Do not allow others to bind to these. 2161 */ 2162 goto bad_addr; 2163 } 2164 2165 /* 2166 * 2167 * The udp module never sends down a zero-length address, 2168 * and allowing this on a labeled system will break MLP 2169 * functionality. 2170 */ 2171 if (is_system_labeled() && protocol == IPPROTO_UDP) 2172 goto bad_addr; 2173 2174 /* Allow ipsec plumbing */ 2175 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2176 protocol != IPPROTO_ESP) 2177 goto bad_addr; 2178 2179 connp->conn_srcv6 = ipv6_all_zeros; 2180 ipcl_proto_insert_v6(connp, protocol); 2181 2182 tbr->PRIM_type = T_BIND_ACK; 2183 return (mp); 2184 } 2185 2186 /* Extract the address pointer from the message. */ 2187 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2188 tbr->ADDR_length); 2189 if (ucp == NULL) { 2190 ip1dbg(("ip_bind_v6: no address\n")); 2191 goto bad_addr; 2192 } 2193 if (!OK_32PTR(ucp)) { 2194 ip1dbg(("ip_bind_v6: unaligned address\n")); 2195 goto bad_addr; 2196 } 2197 mp1 = mp->b_cont; /* trailing mp if any */ 2198 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2199 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2200 2201 switch (tbr->ADDR_length) { 2202 default: 2203 ip1dbg(("ip_bind_v6: bad address length %d\n", 2204 (int)tbr->ADDR_length)); 2205 goto bad_addr; 2206 2207 case IPV6_ADDR_LEN: 2208 /* Verification of local address only */ 2209 v6srcp = (in6_addr_t *)ucp; 2210 lport = 0; 2211 local_bind = B_TRUE; 2212 break; 2213 2214 case sizeof (sin6_t): 2215 sin6 = (sin6_t *)ucp; 2216 v6srcp = &sin6->sin6_addr; 2217 lport = sin6->sin6_port; 2218 local_bind = B_TRUE; 2219 break; 2220 2221 case sizeof (ipa6_conn_t): 2222 /* 2223 * Verify that both the source and destination addresses 2224 * are valid. 2225 * Note that we allow connect to broadcast and multicast 2226 * addresses when ire_requested is set. Thus the ULP 2227 * has to check for IRE_BROADCAST and multicast. 2228 */ 2229 ac6 = (ipa6_conn_t *)ucp; 2230 v6srcp = &ac6->ac6_laddr; 2231 v6dstp = &ac6->ac6_faddr; 2232 fport = ac6->ac6_fport; 2233 /* For raw socket, the local port is not set. */ 2234 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2235 connp->conn_lport; 2236 local_bind = B_FALSE; 2237 /* Always verify destination reachability. */ 2238 verify_dst = B_TRUE; 2239 break; 2240 2241 case sizeof (ipa6_conn_x_t): 2242 /* 2243 * Verify that the source address is valid. 2244 * Note that we allow connect to broadcast and multicast 2245 * addresses when ire_requested is set. Thus the ULP 2246 * has to check for IRE_BROADCAST and multicast. 2247 */ 2248 acx6 = (ipa6_conn_x_t *)ucp; 2249 ac6 = &acx6->ac6x_conn; 2250 v6srcp = &ac6->ac6_laddr; 2251 v6dstp = &ac6->ac6_faddr; 2252 fport = ac6->ac6_fport; 2253 lport = ac6->ac6_lport; 2254 local_bind = B_FALSE; 2255 /* 2256 * Client that passed ipa6_conn_x_t to us specifies whether to 2257 * verify destination reachability. 2258 */ 2259 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2260 break; 2261 } 2262 if (local_bind) { 2263 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2264 /* Bind to IPv4 address */ 2265 ipaddr_t v4src; 2266 2267 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2268 2269 error = ip_bind_laddr(connp, mp, v4src, lport, 2270 ire_requested, ipsec_policy_set, 2271 tbr->ADDR_length != IPV6_ADDR_LEN); 2272 if (error != 0) 2273 goto bad_addr; 2274 connp->conn_pkt_isv6 = B_FALSE; 2275 } else { 2276 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2277 error = 0; 2278 goto bad_addr; 2279 } 2280 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2281 ire_requested, ipsec_policy_set, 2282 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2283 if (error != 0) 2284 goto bad_addr; 2285 connp->conn_pkt_isv6 = B_TRUE; 2286 } 2287 } else { 2288 /* 2289 * Bind to local and remote address. Local might be 2290 * unspecified in which case it will be extracted from 2291 * ire_src_addr_v6 2292 */ 2293 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2294 /* Connect to IPv4 address */ 2295 ipaddr_t v4src; 2296 ipaddr_t v4dst; 2297 2298 /* Is the source unspecified or mapped? */ 2299 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2300 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2301 ip1dbg(("ip_bind_v6: " 2302 "dst is mapped, but not the src\n")); 2303 goto bad_addr; 2304 } 2305 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2306 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2307 2308 /* 2309 * XXX Fix needed. Need to pass ipsec_policy_set 2310 * instead of B_FALSE. 2311 */ 2312 2313 /* Always verify destination reachability. */ 2314 error = ip_bind_connected(connp, mp, &v4src, lport, 2315 v4dst, fport, ire_requested, ipsec_policy_set, 2316 B_TRUE, B_TRUE); 2317 if (error != 0) 2318 goto bad_addr; 2319 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2320 connp->conn_pkt_isv6 = B_FALSE; 2321 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2322 ip1dbg(("ip_bind_v6: " 2323 "src is mapped, but not the dst\n")); 2324 goto bad_addr; 2325 } else { 2326 error = ip_bind_connected_v6(connp, mp, v6srcp, 2327 lport, v6dstp, ipp, fport, ire_requested, 2328 ipsec_policy_set, B_TRUE, verify_dst); 2329 if (error != 0) 2330 goto bad_addr; 2331 connp->conn_pkt_isv6 = B_TRUE; 2332 } 2333 } 2334 /* Update qinfo if v4/v6 changed */ 2335 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2336 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2337 if (connp->conn_pkt_isv6) 2338 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE, ipst); 2339 else 2340 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE, ipst); 2341 } 2342 2343 /* 2344 * Pass the IPSEC headers size in ire_ipsec_overhead. 2345 * We can't do this in ip_bind_insert_ire because the policy 2346 * may not have been inherited at that point in time and hence 2347 * conn_out_enforce_policy may not be set. 2348 */ 2349 mp1 = mp->b_cont; 2350 if (ire_requested && connp->conn_out_enforce_policy && 2351 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2352 ire_t *ire = (ire_t *)mp1->b_rptr; 2353 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2354 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2355 } 2356 2357 /* Send it home. */ 2358 mp->b_datap->db_type = M_PCPROTO; 2359 tbr->PRIM_type = T_BIND_ACK; 2360 return (mp); 2361 2362 bad_addr: 2363 if (error == EINPROGRESS) 2364 return (NULL); 2365 if (error > 0) 2366 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2367 else 2368 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2369 return (mp); 2370 } 2371 2372 /* 2373 * Here address is verified to be a valid local address. 2374 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2375 * address is also considered a valid local address. 2376 * In the case of a multicast address, however, the 2377 * upper protocol is expected to reset the src address 2378 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2379 * no packets are emitted with multicast address as 2380 * source address. 2381 * The addresses valid for bind are: 2382 * (1) - in6addr_any 2383 * (2) - IP address of an UP interface 2384 * (3) - IP address of a DOWN interface 2385 * (4) - a multicast address. In this case 2386 * the conn will only receive packets destined to 2387 * the specified multicast address. Note: the 2388 * application still has to issue an 2389 * IPV6_JOIN_GROUP socket option. 2390 * 2391 * In all the above cases, the bound address must be valid in the current zone. 2392 * When the address is loopback or multicast, there might be many matching IREs 2393 * so bind has to look up based on the zone. 2394 */ 2395 static int 2396 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2397 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2398 boolean_t fanout_insert) 2399 { 2400 int error = 0; 2401 ire_t *src_ire = NULL; 2402 ipif_t *ipif = NULL; 2403 mblk_t *policy_mp; 2404 zoneid_t zoneid; 2405 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2406 2407 if (ipsec_policy_set) 2408 policy_mp = mp->b_cont; 2409 2410 /* 2411 * If it was previously connected, conn_fully_bound would have 2412 * been set. 2413 */ 2414 connp->conn_fully_bound = B_FALSE; 2415 2416 zoneid = connp->conn_zoneid; 2417 2418 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2419 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2420 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2421 /* 2422 * If an address other than in6addr_any is requested, 2423 * we verify that it is a valid address for bind 2424 * Note: Following code is in if-else-if form for 2425 * readability compared to a condition check. 2426 */ 2427 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2428 if (IRE_IS_LOCAL(src_ire)) { 2429 /* 2430 * (2) Bind to address of local UP interface 2431 */ 2432 ipif = src_ire->ire_ipif; 2433 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2434 ipif_t *multi_ipif = NULL; 2435 ire_t *save_ire; 2436 /* 2437 * (4) bind to multicast address. 2438 * Fake out the IRE returned to upper 2439 * layer to be a broadcast IRE in 2440 * ip_bind_insert_ire_v6(). 2441 * Pass other information that matches 2442 * the ipif (e.g. the source address). 2443 * conn_multicast_ill is only used for 2444 * IPv6 packets 2445 */ 2446 mutex_enter(&connp->conn_lock); 2447 if (connp->conn_multicast_ill != NULL) { 2448 (void) ipif_lookup_zoneid( 2449 connp->conn_multicast_ill, zoneid, 0, 2450 &multi_ipif); 2451 } else { 2452 /* 2453 * Look for default like 2454 * ip_wput_v6 2455 */ 2456 multi_ipif = ipif_lookup_group_v6( 2457 &ipv6_unspecified_group, zoneid, ipst); 2458 } 2459 mutex_exit(&connp->conn_lock); 2460 save_ire = src_ire; 2461 src_ire = NULL; 2462 if (multi_ipif == NULL || !ire_requested || 2463 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2464 src_ire = save_ire; 2465 error = EADDRNOTAVAIL; 2466 } else { 2467 ASSERT(src_ire != NULL); 2468 if (save_ire != NULL) 2469 ire_refrele(save_ire); 2470 } 2471 if (multi_ipif != NULL) 2472 ipif_refrele(multi_ipif); 2473 } else { 2474 *mp->b_wptr++ = (char)connp->conn_ulp; 2475 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2476 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error, 2477 ipst); 2478 if (ipif == NULL) { 2479 if (error == EINPROGRESS) { 2480 if (src_ire != NULL) 2481 ire_refrele(src_ire); 2482 return (error); 2483 } 2484 /* 2485 * Not a valid address for bind 2486 */ 2487 error = EADDRNOTAVAIL; 2488 } else { 2489 ipif_refrele(ipif); 2490 } 2491 /* 2492 * Just to keep it consistent with the processing in 2493 * ip_bind_v6(). 2494 */ 2495 mp->b_wptr--; 2496 } 2497 2498 if (error != 0) { 2499 /* Red Alert! Attempting to be a bogon! */ 2500 if (ip_debug > 2) { 2501 /* ip1dbg */ 2502 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2503 " address %s\n", AF_INET6, v6src); 2504 } 2505 goto bad_addr; 2506 } 2507 } 2508 2509 /* 2510 * Allow setting new policies. For example, disconnects come 2511 * down as ipa_t bind. As we would have set conn_policy_cached 2512 * to B_TRUE before, we should set it to B_FALSE, so that policy 2513 * can change after the disconnect. 2514 */ 2515 connp->conn_policy_cached = B_FALSE; 2516 2517 /* If not fanout_insert this was just an address verification */ 2518 if (fanout_insert) { 2519 /* 2520 * The addresses have been verified. Time to insert in 2521 * the correct fanout list. 2522 */ 2523 connp->conn_srcv6 = *v6src; 2524 connp->conn_remv6 = ipv6_all_zeros; 2525 connp->conn_lport = lport; 2526 connp->conn_fport = 0; 2527 2528 /* 2529 * We need to make sure that the conn_recv is set to a non-null 2530 * value before we insert the conn_t into the classifier table. 2531 * This is to avoid a race with an incoming packet which does 2532 * an ipcl_classify(). 2533 */ 2534 if (*mp->b_wptr == IPPROTO_TCP) 2535 connp->conn_recv = tcp_conn_request; 2536 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2537 } 2538 if (error == 0) { 2539 if (ire_requested) { 2540 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL, 2541 ipst)) { 2542 error = -1; 2543 goto bad_addr; 2544 } 2545 } else if (ipsec_policy_set) { 2546 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2547 error = -1; 2548 goto bad_addr; 2549 } 2550 } 2551 } else if (connp->conn_ulp == IPPROTO_TCP) { 2552 connp->conn_recv = tcp_input; 2553 } 2554 bad_addr: 2555 if (error != 0) { 2556 if (connp->conn_anon_port) { 2557 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2558 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2559 B_FALSE); 2560 } 2561 connp->conn_mlp_type = mlptSingle; 2562 } 2563 2564 if (src_ire != NULL) 2565 ire_refrele(src_ire); 2566 2567 if (ipsec_policy_set) { 2568 ASSERT(policy_mp != NULL); 2569 freeb(policy_mp); 2570 /* 2571 * As of now assume that nothing else accompanies 2572 * IPSEC_POLICY_SET. 2573 */ 2574 mp->b_cont = NULL; 2575 } 2576 return (error); 2577 } 2578 2579 /* ARGSUSED */ 2580 static void 2581 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2582 void *dummy_arg) 2583 { 2584 conn_t *connp = NULL; 2585 t_scalar_t prim; 2586 2587 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2588 2589 if (CONN_Q(q)) 2590 connp = Q_TO_CONN(q); 2591 ASSERT(connp != NULL); 2592 2593 prim = ((union T_primitives *)mp->b_rptr)->type; 2594 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2595 2596 if (IPCL_IS_TCP(connp)) { 2597 /* Pass sticky_ipp for scope_id and pktinfo */ 2598 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2599 } else { 2600 /* For UDP and ICMP */ 2601 mp = ip_bind_v6(q, mp, connp, NULL); 2602 } 2603 if (mp != NULL) { 2604 if (IPCL_IS_TCP(connp)) { 2605 CONN_INC_REF(connp); 2606 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2607 connp, SQTAG_TCP_RPUTOTHER); 2608 } else if (IPCL_IS_UDP(connp)) { 2609 udp_resume_bind(connp, mp); 2610 } else { 2611 qreply(q, mp); 2612 CONN_OPER_PENDING_DONE(connp); 2613 } 2614 } 2615 } 2616 2617 /* 2618 * Verify that both the source and destination addresses 2619 * are valid. If verify_dst, then destination address must also be reachable, 2620 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2621 * It takes ip6_pkt_t * as one of the arguments to determine correct 2622 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2623 * destination address. Note that parameter ipp is only useful for TCP connect 2624 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2625 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2626 * 2627 */ 2628 static int 2629 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2630 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2631 boolean_t ire_requested, boolean_t ipsec_policy_set, 2632 boolean_t fanout_insert, boolean_t verify_dst) 2633 { 2634 ire_t *src_ire; 2635 ire_t *dst_ire; 2636 int error = 0; 2637 int protocol; 2638 mblk_t *policy_mp; 2639 ire_t *sire = NULL; 2640 ire_t *md_dst_ire = NULL; 2641 ill_t *md_ill = NULL; 2642 ill_t *dst_ill = NULL; 2643 ipif_t *src_ipif = NULL; 2644 zoneid_t zoneid; 2645 boolean_t ill_held = B_FALSE; 2646 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2647 2648 src_ire = dst_ire = NULL; 2649 /* 2650 * NOTE: The protocol is beyond the wptr because that's how 2651 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2652 */ 2653 protocol = *mp->b_wptr & 0xFF; 2654 2655 /* 2656 * If we never got a disconnect before, clear it now. 2657 */ 2658 connp->conn_fully_bound = B_FALSE; 2659 2660 if (ipsec_policy_set) { 2661 policy_mp = mp->b_cont; 2662 } 2663 2664 zoneid = connp->conn_zoneid; 2665 2666 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2667 ipif_t *ipif; 2668 2669 /* 2670 * Use an "emulated" IRE_BROADCAST to tell the transport it 2671 * is a multicast. 2672 * Pass other information that matches 2673 * the ipif (e.g. the source address). 2674 * 2675 * conn_multicast_ill is only used for IPv6 packets 2676 */ 2677 mutex_enter(&connp->conn_lock); 2678 if (connp->conn_multicast_ill != NULL) { 2679 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2680 zoneid, 0, &ipif); 2681 } else { 2682 /* Look for default like ip_wput_v6 */ 2683 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2684 } 2685 mutex_exit(&connp->conn_lock); 2686 if (ipif == NULL || !ire_requested || 2687 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2688 if (ipif != NULL) 2689 ipif_refrele(ipif); 2690 if (ip_debug > 2) { 2691 /* ip1dbg */ 2692 pr_addr_dbg("ip_bind_connected_v6: bad " 2693 "connected multicast %s\n", AF_INET6, 2694 v6dst); 2695 } 2696 error = ENETUNREACH; 2697 goto bad_addr; 2698 } 2699 if (ipif != NULL) 2700 ipif_refrele(ipif); 2701 } else { 2702 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2703 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2704 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2705 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2706 ipst); 2707 /* 2708 * We also prevent ire's with src address INADDR_ANY to 2709 * be used, which are created temporarily for 2710 * sending out packets from endpoints that have 2711 * conn_unspec_src set. 2712 */ 2713 if (dst_ire == NULL || 2714 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2715 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2716 /* 2717 * When verifying destination reachability, we always 2718 * complain. 2719 * 2720 * When not verifying destination reachability but we 2721 * found an IRE, i.e. the destination is reachable, 2722 * then the other tests still apply and we complain. 2723 */ 2724 if (verify_dst || (dst_ire != NULL)) { 2725 if (ip_debug > 2) { 2726 /* ip1dbg */ 2727 pr_addr_dbg("ip_bind_connected_v6: bad" 2728 " connected dst %s\n", AF_INET6, 2729 v6dst); 2730 } 2731 if (dst_ire == NULL || 2732 !(dst_ire->ire_type & IRE_HOST)) { 2733 error = ENETUNREACH; 2734 } else { 2735 error = EHOSTUNREACH; 2736 } 2737 goto bad_addr; 2738 } 2739 } 2740 } 2741 2742 /* 2743 * We now know that routing will allow us to reach the destination. 2744 * Check whether Trusted Solaris policy allows communication with this 2745 * host, and pretend that the destination is unreachable if not. 2746 * 2747 * This is never a problem for TCP, since that transport is known to 2748 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2749 * handling. If the remote is unreachable, it will be detected at that 2750 * point, so there's no reason to check it here. 2751 * 2752 * Note that for sendto (and other datagram-oriented friends), this 2753 * check is done as part of the data path label computation instead. 2754 * The check here is just to make non-TCP connect() report the right 2755 * error. 2756 */ 2757 if (dst_ire != NULL && is_system_labeled() && 2758 !IPCL_IS_TCP(connp) && 2759 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2760 connp->conn_mac_exempt, ipst) != 0) { 2761 error = EHOSTUNREACH; 2762 if (ip_debug > 2) { 2763 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2764 AF_INET6, v6dst); 2765 } 2766 goto bad_addr; 2767 } 2768 2769 /* 2770 * If the app does a connect(), it means that it will most likely 2771 * send more than 1 packet to the destination. It makes sense 2772 * to clear the temporary flag. 2773 */ 2774 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2775 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2776 irb_t *irb = dst_ire->ire_bucket; 2777 2778 rw_enter(&irb->irb_lock, RW_WRITER); 2779 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2780 irb->irb_tmp_ire_cnt--; 2781 rw_exit(&irb->irb_lock); 2782 } 2783 2784 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2785 2786 /* 2787 * See if we should notify ULP about MDT; we do this whether or not 2788 * ire_requested is TRUE, in order to handle active connects; MDT 2789 * eligibility tests for passive connects are handled separately 2790 * through tcp_adapt_ire(). We do this before the source address 2791 * selection, because dst_ire may change after a call to 2792 * ipif_select_source_v6(). This is a best-effort check, as the 2793 * packet for this connection may not actually go through 2794 * dst_ire->ire_stq, and the exact IRE can only be known after 2795 * calling ip_newroute_v6(). This is why we further check on the 2796 * IRE during Multidata packet transmission in tcp_multisend(). 2797 */ 2798 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2799 dst_ire != NULL && 2800 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2801 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2802 ILL_MDT_CAPABLE(md_ill)) { 2803 md_dst_ire = dst_ire; 2804 IRE_REFHOLD(md_dst_ire); 2805 } 2806 2807 if (dst_ire != NULL && 2808 dst_ire->ire_type == IRE_LOCAL && 2809 dst_ire->ire_zoneid != zoneid && 2810 dst_ire->ire_zoneid != ALL_ZONES) { 2811 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2812 zoneid, 0, NULL, 2813 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2814 MATCH_IRE_RJ_BHOLE, ipst); 2815 if (src_ire == NULL) { 2816 error = EHOSTUNREACH; 2817 goto bad_addr; 2818 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2819 if (!(src_ire->ire_type & IRE_HOST)) 2820 error = ENETUNREACH; 2821 else 2822 error = EHOSTUNREACH; 2823 goto bad_addr; 2824 } 2825 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2826 src_ipif = src_ire->ire_ipif; 2827 ipif_refhold(src_ipif); 2828 *v6src = src_ipif->ipif_v6lcl_addr; 2829 } 2830 ire_refrele(src_ire); 2831 src_ire = NULL; 2832 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2833 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2834 *v6src = sire->ire_src_addr_v6; 2835 ire_refrele(dst_ire); 2836 dst_ire = sire; 2837 sire = NULL; 2838 } else if (dst_ire->ire_type == IRE_CACHE && 2839 (dst_ire->ire_flags & RTF_SETSRC)) { 2840 ASSERT(dst_ire->ire_zoneid == zoneid || 2841 dst_ire->ire_zoneid == ALL_ZONES); 2842 *v6src = dst_ire->ire_src_addr_v6; 2843 } else { 2844 /* 2845 * Pick a source address so that a proper inbound load 2846 * spreading would happen. Use dst_ill specified by the 2847 * app. when socket option or scopeid is set. 2848 */ 2849 int err; 2850 2851 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2852 uint_t if_index; 2853 2854 /* 2855 * Scope id or IPV6_PKTINFO 2856 */ 2857 2858 if_index = ipp->ipp_ifindex; 2859 dst_ill = ill_lookup_on_ifindex( 2860 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2861 ipst); 2862 if (dst_ill == NULL) { 2863 ip1dbg(("ip_bind_connected_v6:" 2864 " bad ifindex %d\n", if_index)); 2865 error = EADDRNOTAVAIL; 2866 goto bad_addr; 2867 } 2868 ill_held = B_TRUE; 2869 } else if (connp->conn_outgoing_ill != NULL) { 2870 /* 2871 * For IPV6_BOUND_IF socket option, 2872 * conn_outgoing_ill should be set 2873 * already in TCP or UDP/ICMP. 2874 */ 2875 dst_ill = conn_get_held_ill(connp, 2876 &connp->conn_outgoing_ill, &err); 2877 if (err == ILL_LOOKUP_FAILED) { 2878 ip1dbg(("ip_bind_connected_v6:" 2879 "no ill for bound_if\n")); 2880 error = EADDRNOTAVAIL; 2881 goto bad_addr; 2882 } 2883 ill_held = B_TRUE; 2884 } else if (dst_ire->ire_stq != NULL) { 2885 /* No need to hold ill here */ 2886 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2887 } else { 2888 /* No need to hold ill here */ 2889 dst_ill = dst_ire->ire_ipif->ipif_ill; 2890 } 2891 if (!ip6_asp_can_lookup(ipst)) { 2892 *mp->b_wptr++ = (char)protocol; 2893 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2894 ip_bind_connected_resume_v6); 2895 error = EINPROGRESS; 2896 goto refrele_and_quit; 2897 } 2898 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2899 RESTRICT_TO_NONE, connp->conn_src_preferences, 2900 zoneid); 2901 ip6_asp_table_refrele(ipst); 2902 if (src_ipif == NULL) { 2903 pr_addr_dbg("ip_bind_connected_v6: " 2904 "no usable source address for " 2905 "connection to %s\n", AF_INET6, v6dst); 2906 error = EADDRNOTAVAIL; 2907 goto bad_addr; 2908 } 2909 *v6src = src_ipif->ipif_v6lcl_addr; 2910 } 2911 } 2912 2913 /* 2914 * We do ire_route_lookup_v6() here (and not an interface lookup) 2915 * as we assert that v6src should only come from an 2916 * UP interface for hard binding. 2917 */ 2918 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2919 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2920 2921 /* src_ire must be a local|loopback */ 2922 if (!IRE_IS_LOCAL(src_ire)) { 2923 if (ip_debug > 2) { 2924 /* ip1dbg */ 2925 pr_addr_dbg("ip_bind_connected_v6: bad " 2926 "connected src %s\n", AF_INET6, v6src); 2927 } 2928 error = EADDRNOTAVAIL; 2929 goto bad_addr; 2930 } 2931 2932 /* 2933 * If the source address is a loopback address, the 2934 * destination had best be local or multicast. 2935 * The transports that can't handle multicast will reject 2936 * those addresses. 2937 */ 2938 if (src_ire->ire_type == IRE_LOOPBACK && 2939 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2940 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2941 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2942 error = -1; 2943 goto bad_addr; 2944 } 2945 /* 2946 * Allow setting new policies. For example, disconnects come 2947 * down as ipa_t bind. As we would have set conn_policy_cached 2948 * to B_TRUE before, we should set it to B_FALSE, so that policy 2949 * can change after the disconnect. 2950 */ 2951 connp->conn_policy_cached = B_FALSE; 2952 2953 /* 2954 * The addresses have been verified. Initialize the conn 2955 * before calling the policy as they expect the conns 2956 * initialized. 2957 */ 2958 connp->conn_srcv6 = *v6src; 2959 connp->conn_remv6 = *v6dst; 2960 connp->conn_lport = lport; 2961 connp->conn_fport = fport; 2962 2963 ASSERT(!(ipsec_policy_set && ire_requested)); 2964 if (ire_requested) { 2965 iulp_t *ulp_info = NULL; 2966 2967 /* 2968 * Note that sire will not be NULL if this is an off-link 2969 * connection and there is not cache for that dest yet. 2970 * 2971 * XXX Because of an existing bug, if there are multiple 2972 * default routes, the IRE returned now may not be the actual 2973 * default route used (default routes are chosen in a 2974 * round robin fashion). So if the metrics for different 2975 * default routes are different, we may return the wrong 2976 * metrics. This will not be a problem if the existing 2977 * bug is fixed. 2978 */ 2979 if (sire != NULL) 2980 ulp_info = &(sire->ire_uinfo); 2981 2982 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info, 2983 ipst)) { 2984 error = -1; 2985 goto bad_addr; 2986 } 2987 } else if (ipsec_policy_set) { 2988 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2989 error = -1; 2990 goto bad_addr; 2991 } 2992 } 2993 2994 /* 2995 * Cache IPsec policy in this conn. If we have per-socket policy, 2996 * we'll cache that. If we don't, we'll inherit global policy. 2997 * 2998 * We can't insert until the conn reflects the policy. Note that 2999 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 3000 * connections where we don't have a policy. This is to prevent 3001 * global policy lookups in the inbound path. 3002 * 3003 * If we insert before we set conn_policy_cached, 3004 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 3005 * because global policy cound be non-empty. We normally call 3006 * ipsec_check_policy() for conn_policy_cached connections only if 3007 * conn_in_enforce_policy is set. But in this case, 3008 * conn_policy_cached can get set anytime since we made the 3009 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 3010 * is called, which will make the above assumption false. Thus, we 3011 * need to insert after we set conn_policy_cached. 3012 */ 3013 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 3014 goto bad_addr; 3015 3016 /* If not fanout_insert this was just an address verification */ 3017 if (fanout_insert) { 3018 /* 3019 * The addresses have been verified. Time to insert in 3020 * the correct fanout list. 3021 * We need to make sure that the conn_recv is set to a non-null 3022 * value before we insert the conn_t into the classifier table. 3023 * This is to avoid a race with an incoming packet which does 3024 * an ipcl_classify(). 3025 */ 3026 if (protocol == IPPROTO_TCP) 3027 connp->conn_recv = tcp_input; 3028 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 3029 connp->conn_ports, 3030 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 3031 } 3032 if (error == 0) { 3033 connp->conn_fully_bound = B_TRUE; 3034 /* 3035 * Our initial checks for MDT have passed; the IRE is not 3036 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 3037 * be supporting MDT. Pass the IRE, IPC and ILL into 3038 * ip_mdinfo_return(), which performs further checks 3039 * against them and upon success, returns the MDT info 3040 * mblk which we will attach to the bind acknowledgment. 3041 */ 3042 if (md_dst_ire != NULL) { 3043 mblk_t *mdinfo_mp; 3044 3045 ASSERT(md_ill != NULL); 3046 ASSERT(md_ill->ill_mdt_capab != NULL); 3047 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 3048 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 3049 linkb(mp, mdinfo_mp); 3050 } 3051 } 3052 bad_addr: 3053 if (ipsec_policy_set) { 3054 ASSERT(policy_mp != NULL); 3055 freeb(policy_mp); 3056 /* 3057 * As of now assume that nothing else accompanies 3058 * IPSEC_POLICY_SET. 3059 */ 3060 mp->b_cont = NULL; 3061 } 3062 refrele_and_quit: 3063 if (src_ire != NULL) 3064 IRE_REFRELE(src_ire); 3065 if (dst_ire != NULL) 3066 IRE_REFRELE(dst_ire); 3067 if (sire != NULL) 3068 IRE_REFRELE(sire); 3069 if (src_ipif != NULL) 3070 ipif_refrele(src_ipif); 3071 if (md_dst_ire != NULL) 3072 IRE_REFRELE(md_dst_ire); 3073 if (ill_held && dst_ill != NULL) 3074 ill_refrele(dst_ill); 3075 return (error); 3076 } 3077 3078 /* 3079 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3080 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3081 */ 3082 /* ARGSUSED4 */ 3083 static boolean_t 3084 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3085 iulp_t *ulp_info, ip_stack_t *ipst) 3086 { 3087 mblk_t *mp1; 3088 ire_t *ret_ire; 3089 3090 mp1 = mp->b_cont; 3091 ASSERT(mp1 != NULL); 3092 3093 if (ire != NULL) { 3094 /* 3095 * mp1 initialized above to IRE_DB_REQ_TYPE 3096 * appended mblk. Its <upper protocol>'s 3097 * job to make sure there is room. 3098 */ 3099 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3100 return (B_FALSE); 3101 3102 mp1->b_datap->db_type = IRE_DB_TYPE; 3103 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3104 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3105 ret_ire = (ire_t *)mp1->b_rptr; 3106 if (IN6_IS_ADDR_MULTICAST(dst) || 3107 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3108 ret_ire->ire_type = IRE_BROADCAST; 3109 ret_ire->ire_addr_v6 = *dst; 3110 } 3111 if (ulp_info != NULL) { 3112 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3113 sizeof (iulp_t)); 3114 } 3115 ret_ire->ire_mp = mp1; 3116 } else { 3117 /* 3118 * No IRE was found. Remove IRE mblk. 3119 */ 3120 mp->b_cont = mp1->b_cont; 3121 freeb(mp1); 3122 } 3123 return (B_TRUE); 3124 } 3125 3126 /* 3127 * Add an ip6i_t header to the front of the mblk. 3128 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3129 * Returns NULL if allocation fails (and frees original message). 3130 * Used in outgoing path when going through ip_newroute_*v6(). 3131 * Used in incoming path to pass ifindex to transports. 3132 */ 3133 mblk_t * 3134 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3135 { 3136 mblk_t *mp1; 3137 ip6i_t *ip6i; 3138 ip6_t *ip6h; 3139 3140 ip6h = (ip6_t *)mp->b_rptr; 3141 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3142 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3143 mp->b_datap->db_ref > 1) { 3144 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3145 if (mp1 == NULL) { 3146 freemsg(mp); 3147 return (NULL); 3148 } 3149 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3150 mp1->b_cont = mp; 3151 mp = mp1; 3152 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3153 } 3154 mp->b_rptr = (uchar_t *)ip6i; 3155 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3156 ip6i->ip6i_nxt = IPPROTO_RAW; 3157 if (ill != NULL) { 3158 ip6i->ip6i_flags = IP6I_IFINDEX; 3159 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3160 } else { 3161 ip6i->ip6i_flags = 0; 3162 } 3163 ip6i->ip6i_nexthop = *dst; 3164 return (mp); 3165 } 3166 3167 /* 3168 * Handle protocols with which IP is less intimate. There 3169 * can be more than one stream bound to a particular 3170 * protocol. When this is the case, normally each one gets a copy 3171 * of any incoming packets. 3172 * However, if the packet was tunneled and not multicast we only send to it 3173 * the first match. 3174 * 3175 * Zones notes: 3176 * Packets will be distributed to streams in all zones. This is really only 3177 * useful for ICMPv6 as only applications in the global zone can create raw 3178 * sockets for other protocols. 3179 */ 3180 static void 3181 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3182 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3183 boolean_t mctl_present, zoneid_t zoneid) 3184 { 3185 queue_t *rq; 3186 mblk_t *mp1, *first_mp1; 3187 in6_addr_t dst = ip6h->ip6_dst; 3188 in6_addr_t src = ip6h->ip6_src; 3189 boolean_t one_only; 3190 mblk_t *first_mp = mp; 3191 boolean_t secure, shared_addr; 3192 conn_t *connp, *first_connp, *next_connp; 3193 connf_t *connfp; 3194 ip_stack_t *ipst = inill->ill_ipst; 3195 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3196 3197 if (mctl_present) { 3198 mp = first_mp->b_cont; 3199 secure = ipsec_in_is_secure(first_mp); 3200 ASSERT(mp != NULL); 3201 } else { 3202 secure = B_FALSE; 3203 } 3204 3205 /* 3206 * If the packet was tunneled and not multicast we only send to it 3207 * the first match. 3208 */ 3209 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3210 !IN6_IS_ADDR_MULTICAST(&dst)); 3211 3212 shared_addr = (zoneid == ALL_ZONES); 3213 if (shared_addr) { 3214 /* 3215 * We don't allow multilevel ports for raw IP, so no need to 3216 * check for that here. 3217 */ 3218 zoneid = tsol_packet_to_zoneid(mp); 3219 } 3220 3221 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3222 mutex_enter(&connfp->connf_lock); 3223 connp = connfp->connf_head; 3224 for (connp = connfp->connf_head; connp != NULL; 3225 connp = connp->conn_next) { 3226 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3227 zoneid) && 3228 (!is_system_labeled() || 3229 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3230 connp))) 3231 break; 3232 } 3233 3234 if (connp == NULL || connp->conn_upq == NULL) { 3235 /* 3236 * No one bound to this port. Is 3237 * there a client that wants all 3238 * unclaimed datagrams? 3239 */ 3240 mutex_exit(&connfp->connf_lock); 3241 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3242 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3243 nexthdr_offset, mctl_present, zoneid, ipst)) { 3244 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3245 } 3246 3247 return; 3248 } 3249 3250 CONN_INC_REF(connp); 3251 first_connp = connp; 3252 3253 /* 3254 * XXX: Fix the multiple protocol listeners case. We should not 3255 * be walking the conn->next list here. 3256 */ 3257 if (one_only) { 3258 /* 3259 * Only send message to one tunnel driver by immediately 3260 * terminating the loop. 3261 */ 3262 connp = NULL; 3263 } else { 3264 connp = connp->conn_next; 3265 3266 } 3267 for (;;) { 3268 while (connp != NULL) { 3269 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3270 flags, zoneid) && 3271 (!is_system_labeled() || 3272 tsol_receive_local(mp, &dst, IPV6_VERSION, 3273 shared_addr, connp))) 3274 break; 3275 connp = connp->conn_next; 3276 } 3277 3278 /* 3279 * Just copy the data part alone. The mctl part is 3280 * needed just for verifying policy and it is never 3281 * sent up. 3282 */ 3283 if (connp == NULL || connp->conn_upq == NULL || 3284 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3285 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3286 /* 3287 * No more intested clients or memory 3288 * allocation failed 3289 */ 3290 connp = first_connp; 3291 break; 3292 } 3293 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3294 CONN_INC_REF(connp); 3295 mutex_exit(&connfp->connf_lock); 3296 rq = connp->conn_rq; 3297 /* 3298 * For link-local always add ifindex so that transport can set 3299 * sin6_scope_id. Avoid it for ICMP error fanout. 3300 */ 3301 if ((connp->conn_ip_recvpktinfo || 3302 IN6_IS_ADDR_LINKLOCAL(&src)) && 3303 (flags & IP_FF_IPINFO)) { 3304 /* Add header */ 3305 mp1 = ip_add_info_v6(mp1, inill, &dst); 3306 } 3307 if (mp1 == NULL) { 3308 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3309 } else if (!canputnext(rq)) { 3310 if (flags & IP_FF_RAWIP) { 3311 BUMP_MIB(ill->ill_ip_mib, 3312 rawipIfStatsInOverflows); 3313 } else { 3314 BUMP_MIB(ill->ill_icmp6_mib, 3315 ipv6IfIcmpInOverflows); 3316 } 3317 3318 freemsg(mp1); 3319 } else { 3320 /* 3321 * Don't enforce here if we're a tunnel - let "tun" do 3322 * it instead. 3323 */ 3324 if (!IPCL_IS_IPTUN(connp) && 3325 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3326 secure)) { 3327 first_mp1 = ipsec_check_inbound_policy 3328 (first_mp1, connp, NULL, ip6h, 3329 mctl_present); 3330 } 3331 if (first_mp1 != NULL) { 3332 if (mctl_present) 3333 freeb(first_mp1); 3334 BUMP_MIB(ill->ill_ip_mib, 3335 ipIfStatsHCInDelivers); 3336 putnext(rq, mp1); 3337 } 3338 } 3339 mutex_enter(&connfp->connf_lock); 3340 /* Follow the next pointer before releasing the conn. */ 3341 next_connp = connp->conn_next; 3342 CONN_DEC_REF(connp); 3343 connp = next_connp; 3344 } 3345 3346 /* Last one. Send it upstream. */ 3347 mutex_exit(&connfp->connf_lock); 3348 3349 /* Initiate IPPF processing */ 3350 if (IP6_IN_IPP(flags, ipst)) { 3351 uint_t ifindex; 3352 3353 mutex_enter(&ill->ill_lock); 3354 ifindex = ill->ill_phyint->phyint_ifindex; 3355 mutex_exit(&ill->ill_lock); 3356 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3357 if (mp == NULL) { 3358 CONN_DEC_REF(connp); 3359 if (mctl_present) 3360 freeb(first_mp); 3361 return; 3362 } 3363 } 3364 3365 /* 3366 * For link-local always add ifindex so that transport can set 3367 * sin6_scope_id. Avoid it for ICMP error fanout. 3368 */ 3369 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3370 (flags & IP_FF_IPINFO)) { 3371 /* Add header */ 3372 mp = ip_add_info_v6(mp, inill, &dst); 3373 if (mp == NULL) { 3374 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3375 CONN_DEC_REF(connp); 3376 if (mctl_present) 3377 freeb(first_mp); 3378 return; 3379 } else if (mctl_present) { 3380 first_mp->b_cont = mp; 3381 } else { 3382 first_mp = mp; 3383 } 3384 } 3385 3386 rq = connp->conn_rq; 3387 if (!canputnext(rq)) { 3388 if (flags & IP_FF_RAWIP) { 3389 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3390 } else { 3391 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3392 } 3393 3394 freemsg(first_mp); 3395 } else { 3396 if (IPCL_IS_IPTUN(connp)) { 3397 /* 3398 * Tunneled packet. We enforce policy in the tunnel 3399 * module itself. 3400 * 3401 * Send the WHOLE packet up (incl. IPSEC_IN) without 3402 * a policy check. 3403 */ 3404 putnext(rq, first_mp); 3405 CONN_DEC_REF(connp); 3406 return; 3407 } 3408 /* 3409 * Don't enforce here if we're a tunnel - let "tun" do 3410 * it instead. 3411 */ 3412 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3413 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3414 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3415 NULL, ip6h, mctl_present); 3416 if (first_mp == NULL) { 3417 CONN_DEC_REF(connp); 3418 return; 3419 } 3420 } 3421 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3422 putnext(rq, mp); 3423 if (mctl_present) 3424 freeb(first_mp); 3425 } 3426 CONN_DEC_REF(connp); 3427 } 3428 3429 /* 3430 * Send an ICMP error after patching up the packet appropriately. Returns 3431 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3432 */ 3433 int 3434 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3435 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3436 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3437 { 3438 ip6_t *ip6h; 3439 mblk_t *first_mp; 3440 boolean_t secure; 3441 unsigned char db_type; 3442 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3443 3444 first_mp = mp; 3445 if (mctl_present) { 3446 mp = mp->b_cont; 3447 secure = ipsec_in_is_secure(first_mp); 3448 ASSERT(mp != NULL); 3449 } else { 3450 /* 3451 * If this is an ICMP error being reported - which goes 3452 * up as M_CTLs, we need to convert them to M_DATA till 3453 * we finish checking with global policy because 3454 * ipsec_check_global_policy() assumes M_DATA as clear 3455 * and M_CTL as secure. 3456 */ 3457 db_type = mp->b_datap->db_type; 3458 mp->b_datap->db_type = M_DATA; 3459 secure = B_FALSE; 3460 } 3461 /* 3462 * We are generating an icmp error for some inbound packet. 3463 * Called from all ip_fanout_(udp, tcp, proto) functions. 3464 * Before we generate an error, check with global policy 3465 * to see whether this is allowed to enter the system. As 3466 * there is no "conn", we are checking with global policy. 3467 */ 3468 ip6h = (ip6_t *)mp->b_rptr; 3469 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3470 first_mp = ipsec_check_global_policy(first_mp, NULL, 3471 NULL, ip6h, mctl_present, ipst->ips_netstack); 3472 if (first_mp == NULL) 3473 return (0); 3474 } 3475 3476 if (!mctl_present) 3477 mp->b_datap->db_type = db_type; 3478 3479 if (flags & IP_FF_SEND_ICMP) { 3480 if (flags & IP_FF_HDR_COMPLETE) { 3481 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3482 freemsg(first_mp); 3483 return (1); 3484 } 3485 } 3486 switch (icmp_type) { 3487 case ICMP6_DST_UNREACH: 3488 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3489 B_FALSE, B_FALSE, zoneid, ipst); 3490 break; 3491 case ICMP6_PARAM_PROB: 3492 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3493 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3494 break; 3495 default: 3496 #ifdef DEBUG 3497 panic("ip_fanout_send_icmp_v6: wrong type"); 3498 /*NOTREACHED*/ 3499 #else 3500 freemsg(first_mp); 3501 break; 3502 #endif 3503 } 3504 } else { 3505 freemsg(first_mp); 3506 return (0); 3507 } 3508 3509 return (1); 3510 } 3511 3512 3513 /* 3514 * Fanout for TCP packets 3515 * The caller puts <fport, lport> in the ports parameter. 3516 */ 3517 static void 3518 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3519 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3520 { 3521 mblk_t *first_mp; 3522 boolean_t secure; 3523 conn_t *connp; 3524 tcph_t *tcph; 3525 boolean_t syn_present = B_FALSE; 3526 ip_stack_t *ipst = inill->ill_ipst; 3527 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3528 3529 first_mp = mp; 3530 if (mctl_present) { 3531 mp = first_mp->b_cont; 3532 secure = ipsec_in_is_secure(first_mp); 3533 ASSERT(mp != NULL); 3534 } else { 3535 secure = B_FALSE; 3536 } 3537 3538 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3539 3540 if (connp == NULL || 3541 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3542 /* 3543 * No hard-bound match. Send Reset. 3544 */ 3545 dblk_t *dp = mp->b_datap; 3546 uint32_t ill_index; 3547 3548 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3549 3550 /* Initiate IPPf processing, if needed. */ 3551 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3552 (flags & IP6_NO_IPPOLICY)) { 3553 ill_index = ill->ill_phyint->phyint_ifindex; 3554 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3555 if (first_mp == NULL) { 3556 if (connp != NULL) 3557 CONN_DEC_REF(connp); 3558 return; 3559 } 3560 } 3561 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3562 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3563 ipst->ips_netstack->netstack_tcp); 3564 if (connp != NULL) 3565 CONN_DEC_REF(connp); 3566 return; 3567 } 3568 3569 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3570 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3571 if (connp->conn_flags & IPCL_TCP) { 3572 squeue_t *sqp; 3573 3574 /* 3575 * For fused tcp loopback, assign the eager's 3576 * squeue to be that of the active connect's. 3577 */ 3578 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3579 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3580 !secure && 3581 !IP6_IN_IPP(flags, ipst)) { 3582 ASSERT(Q_TO_CONN(q) != NULL); 3583 sqp = Q_TO_CONN(q)->conn_sqp; 3584 } else { 3585 sqp = IP_SQUEUE_GET(lbolt); 3586 } 3587 3588 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3589 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3590 3591 /* 3592 * db_cksumstuff is unused in the incoming 3593 * path; Thus store the ifindex here. It will 3594 * be cleared in tcp_conn_create_v6(). 3595 */ 3596 DB_CKSUMSTUFF(mp) = 3597 (intptr_t)ill->ill_phyint->phyint_ifindex; 3598 syn_present = B_TRUE; 3599 } 3600 } 3601 3602 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3603 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3604 if ((flags & TH_RST) || (flags & TH_URG)) { 3605 CONN_DEC_REF(connp); 3606 freemsg(first_mp); 3607 return; 3608 } 3609 if (flags & TH_ACK) { 3610 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3611 ipst->ips_netstack->netstack_tcp); 3612 CONN_DEC_REF(connp); 3613 return; 3614 } 3615 3616 CONN_DEC_REF(connp); 3617 freemsg(first_mp); 3618 return; 3619 } 3620 3621 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3622 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3623 NULL, ip6h, mctl_present); 3624 if (first_mp == NULL) { 3625 CONN_DEC_REF(connp); 3626 return; 3627 } 3628 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3629 ASSERT(syn_present); 3630 if (mctl_present) { 3631 ASSERT(first_mp != mp); 3632 first_mp->b_datap->db_struioflag |= 3633 STRUIO_POLICY; 3634 } else { 3635 ASSERT(first_mp == mp); 3636 mp->b_datap->db_struioflag &= 3637 ~STRUIO_EAGER; 3638 mp->b_datap->db_struioflag |= 3639 STRUIO_POLICY; 3640 } 3641 } else { 3642 /* 3643 * Discard first_mp early since we're dealing with a 3644 * fully-connected conn_t and tcp doesn't do policy in 3645 * this case. Also, if someone is bound to IPPROTO_TCP 3646 * over raw IP, they don't expect to see a M_CTL. 3647 */ 3648 if (mctl_present) { 3649 freeb(first_mp); 3650 mctl_present = B_FALSE; 3651 } 3652 first_mp = mp; 3653 } 3654 } 3655 3656 /* Initiate IPPF processing */ 3657 if (IP6_IN_IPP(flags, ipst)) { 3658 uint_t ifindex; 3659 3660 mutex_enter(&ill->ill_lock); 3661 ifindex = ill->ill_phyint->phyint_ifindex; 3662 mutex_exit(&ill->ill_lock); 3663 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3664 if (mp == NULL) { 3665 CONN_DEC_REF(connp); 3666 if (mctl_present) { 3667 freeb(first_mp); 3668 } 3669 return; 3670 } else if (mctl_present) { 3671 /* 3672 * ip_add_info_v6 might return a new mp. 3673 */ 3674 ASSERT(first_mp != mp); 3675 first_mp->b_cont = mp; 3676 } else { 3677 first_mp = mp; 3678 } 3679 } 3680 3681 /* 3682 * For link-local always add ifindex so that TCP can bind to that 3683 * interface. Avoid it for ICMP error fanout. 3684 */ 3685 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3686 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3687 (flags & IP_FF_IPINFO))) { 3688 /* Add header */ 3689 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3690 if (mp == NULL) { 3691 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3692 CONN_DEC_REF(connp); 3693 if (mctl_present) 3694 freeb(first_mp); 3695 return; 3696 } else if (mctl_present) { 3697 ASSERT(first_mp != mp); 3698 first_mp->b_cont = mp; 3699 } else { 3700 first_mp = mp; 3701 } 3702 } 3703 3704 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3705 if (IPCL_IS_TCP(connp)) { 3706 (*ip_input_proc)(connp->conn_sqp, first_mp, 3707 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3708 } else { 3709 putnext(connp->conn_rq, first_mp); 3710 CONN_DEC_REF(connp); 3711 } 3712 } 3713 3714 /* 3715 * Fanout for UDP packets. 3716 * The caller puts <fport, lport> in the ports parameter. 3717 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3718 * 3719 * If SO_REUSEADDR is set all multicast and broadcast packets 3720 * will be delivered to all streams bound to the same port. 3721 * 3722 * Zones notes: 3723 * Multicast packets will be distributed to streams in all zones. 3724 */ 3725 static void 3726 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3727 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3728 zoneid_t zoneid) 3729 { 3730 uint32_t dstport, srcport; 3731 in6_addr_t dst; 3732 mblk_t *first_mp; 3733 boolean_t secure; 3734 conn_t *connp; 3735 connf_t *connfp; 3736 conn_t *first_conn; 3737 conn_t *next_conn; 3738 mblk_t *mp1, *first_mp1; 3739 in6_addr_t src; 3740 boolean_t shared_addr; 3741 ip_stack_t *ipst = inill->ill_ipst; 3742 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3743 3744 first_mp = mp; 3745 if (mctl_present) { 3746 mp = first_mp->b_cont; 3747 secure = ipsec_in_is_secure(first_mp); 3748 ASSERT(mp != NULL); 3749 } else { 3750 secure = B_FALSE; 3751 } 3752 3753 /* Extract ports in net byte order */ 3754 dstport = htons(ntohl(ports) & 0xFFFF); 3755 srcport = htons(ntohl(ports) >> 16); 3756 dst = ip6h->ip6_dst; 3757 src = ip6h->ip6_src; 3758 3759 shared_addr = (zoneid == ALL_ZONES); 3760 if (shared_addr) { 3761 /* 3762 * No need to handle exclusive-stack zones since ALL_ZONES 3763 * only applies to the shared stack. 3764 */ 3765 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3766 /* 3767 * If no shared MLP is found, tsol_mlp_findzone returns 3768 * ALL_ZONES. In that case, we assume it's SLP, and 3769 * search for the zone based on the packet label. 3770 * That will also return ALL_ZONES on failure, but 3771 * we never allow conn_zoneid to be set to ALL_ZONES. 3772 */ 3773 if (zoneid == ALL_ZONES) 3774 zoneid = tsol_packet_to_zoneid(mp); 3775 } 3776 3777 /* Attempt to find a client stream based on destination port. */ 3778 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3779 mutex_enter(&connfp->connf_lock); 3780 connp = connfp->connf_head; 3781 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3782 /* 3783 * Not multicast. Send to the one (first) client we find. 3784 */ 3785 while (connp != NULL) { 3786 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3787 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3788 conn_wantpacket_v6(connp, ill, ip6h, 3789 flags, zoneid)) { 3790 break; 3791 } 3792 connp = connp->conn_next; 3793 } 3794 if (connp == NULL || connp->conn_upq == NULL) 3795 goto notfound; 3796 3797 if (is_system_labeled() && 3798 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3799 connp)) 3800 goto notfound; 3801 3802 /* Found a client */ 3803 CONN_INC_REF(connp); 3804 mutex_exit(&connfp->connf_lock); 3805 3806 if (CONN_UDP_FLOWCTLD(connp)) { 3807 freemsg(first_mp); 3808 CONN_DEC_REF(connp); 3809 return; 3810 } 3811 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3812 first_mp = ipsec_check_inbound_policy(first_mp, 3813 connp, NULL, ip6h, mctl_present); 3814 if (first_mp == NULL) { 3815 CONN_DEC_REF(connp); 3816 return; 3817 } 3818 } 3819 /* Initiate IPPF processing */ 3820 if (IP6_IN_IPP(flags, ipst)) { 3821 uint_t ifindex; 3822 3823 mutex_enter(&ill->ill_lock); 3824 ifindex = ill->ill_phyint->phyint_ifindex; 3825 mutex_exit(&ill->ill_lock); 3826 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3827 if (mp == NULL) { 3828 CONN_DEC_REF(connp); 3829 if (mctl_present) 3830 freeb(first_mp); 3831 return; 3832 } 3833 } 3834 /* 3835 * For link-local always add ifindex so that 3836 * transport can set sin6_scope_id. Avoid it for 3837 * ICMP error fanout. 3838 */ 3839 if ((connp->conn_ip_recvpktinfo || 3840 IN6_IS_ADDR_LINKLOCAL(&src)) && 3841 (flags & IP_FF_IPINFO)) { 3842 /* Add header */ 3843 mp = ip_add_info_v6(mp, inill, &dst); 3844 if (mp == NULL) { 3845 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3846 CONN_DEC_REF(connp); 3847 if (mctl_present) 3848 freeb(first_mp); 3849 return; 3850 } else if (mctl_present) { 3851 first_mp->b_cont = mp; 3852 } else { 3853 first_mp = mp; 3854 } 3855 } 3856 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3857 3858 /* Send it upstream */ 3859 CONN_UDP_RECV(connp, mp); 3860 3861 IP6_STAT(ipst, ip6_udp_fannorm); 3862 CONN_DEC_REF(connp); 3863 if (mctl_present) 3864 freeb(first_mp); 3865 return; 3866 } 3867 3868 while (connp != NULL) { 3869 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3870 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3871 (!is_system_labeled() || 3872 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3873 connp))) 3874 break; 3875 connp = connp->conn_next; 3876 } 3877 3878 if (connp == NULL || connp->conn_upq == NULL) 3879 goto notfound; 3880 3881 first_conn = connp; 3882 3883 CONN_INC_REF(connp); 3884 connp = connp->conn_next; 3885 for (;;) { 3886 while (connp != NULL) { 3887 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3888 src) && conn_wantpacket_v6(connp, ill, ip6h, 3889 flags, zoneid) && 3890 (!is_system_labeled() || 3891 tsol_receive_local(mp, &dst, IPV6_VERSION, 3892 shared_addr, connp))) 3893 break; 3894 connp = connp->conn_next; 3895 } 3896 /* 3897 * Just copy the data part alone. The mctl part is 3898 * needed just for verifying policy and it is never 3899 * sent up. 3900 */ 3901 if (connp == NULL || 3902 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3903 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3904 /* 3905 * No more interested clients or memory 3906 * allocation failed 3907 */ 3908 connp = first_conn; 3909 break; 3910 } 3911 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3912 CONN_INC_REF(connp); 3913 mutex_exit(&connfp->connf_lock); 3914 /* 3915 * For link-local always add ifindex so that transport 3916 * can set sin6_scope_id. Avoid it for ICMP error 3917 * fanout. 3918 */ 3919 if ((connp->conn_ip_recvpktinfo || 3920 IN6_IS_ADDR_LINKLOCAL(&src)) && 3921 (flags & IP_FF_IPINFO)) { 3922 /* Add header */ 3923 mp1 = ip_add_info_v6(mp1, inill, &dst); 3924 } 3925 /* mp1 could have changed */ 3926 if (mctl_present) 3927 first_mp1->b_cont = mp1; 3928 else 3929 first_mp1 = mp1; 3930 if (mp1 == NULL) { 3931 if (mctl_present) 3932 freeb(first_mp1); 3933 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3934 goto next_one; 3935 } 3936 if (CONN_UDP_FLOWCTLD(connp)) { 3937 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3938 freemsg(first_mp1); 3939 goto next_one; 3940 } 3941 3942 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3943 first_mp1 = ipsec_check_inbound_policy 3944 (first_mp1, connp, NULL, ip6h, 3945 mctl_present); 3946 } 3947 if (first_mp1 != NULL) { 3948 if (mctl_present) 3949 freeb(first_mp1); 3950 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3951 3952 /* Send it upstream */ 3953 CONN_UDP_RECV(connp, mp1); 3954 } 3955 next_one: 3956 mutex_enter(&connfp->connf_lock); 3957 /* Follow the next pointer before releasing the conn. */ 3958 next_conn = connp->conn_next; 3959 IP6_STAT(ipst, ip6_udp_fanmb); 3960 CONN_DEC_REF(connp); 3961 connp = next_conn; 3962 } 3963 3964 /* Last one. Send it upstream. */ 3965 mutex_exit(&connfp->connf_lock); 3966 3967 /* Initiate IPPF processing */ 3968 if (IP6_IN_IPP(flags, ipst)) { 3969 uint_t ifindex; 3970 3971 mutex_enter(&ill->ill_lock); 3972 ifindex = ill->ill_phyint->phyint_ifindex; 3973 mutex_exit(&ill->ill_lock); 3974 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3975 if (mp == NULL) { 3976 CONN_DEC_REF(connp); 3977 if (mctl_present) { 3978 freeb(first_mp); 3979 } 3980 return; 3981 } 3982 } 3983 3984 /* 3985 * For link-local always add ifindex so that transport can set 3986 * sin6_scope_id. Avoid it for ICMP error fanout. 3987 */ 3988 if ((connp->conn_ip_recvpktinfo || 3989 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3990 /* Add header */ 3991 mp = ip_add_info_v6(mp, inill, &dst); 3992 if (mp == NULL) { 3993 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3994 CONN_DEC_REF(connp); 3995 if (mctl_present) 3996 freeb(first_mp); 3997 return; 3998 } else if (mctl_present) { 3999 first_mp->b_cont = mp; 4000 } else { 4001 first_mp = mp; 4002 } 4003 } 4004 if (CONN_UDP_FLOWCTLD(connp)) { 4005 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 4006 freemsg(mp); 4007 } else { 4008 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 4009 first_mp = ipsec_check_inbound_policy(first_mp, 4010 connp, NULL, ip6h, mctl_present); 4011 if (first_mp == NULL) { 4012 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 4013 CONN_DEC_REF(connp); 4014 return; 4015 } 4016 } 4017 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 4018 4019 /* Send it upstream */ 4020 CONN_UDP_RECV(connp, mp); 4021 } 4022 IP6_STAT(ipst, ip6_udp_fanmb); 4023 CONN_DEC_REF(connp); 4024 if (mctl_present) 4025 freeb(first_mp); 4026 return; 4027 4028 notfound: 4029 mutex_exit(&connfp->connf_lock); 4030 /* 4031 * No one bound to this port. Is 4032 * there a client that wants all 4033 * unclaimed datagrams? 4034 */ 4035 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4036 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4037 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 4038 zoneid); 4039 } else { 4040 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4041 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4042 mctl_present, zoneid, ipst)) { 4043 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 4044 } 4045 } 4046 } 4047 4048 /* 4049 * int ip_find_hdr_v6() 4050 * 4051 * This routine is used by the upper layer protocols and the IP tunnel 4052 * module to: 4053 * - Set extension header pointers to appropriate locations 4054 * - Determine IPv6 header length and return it 4055 * - Return a pointer to the last nexthdr value 4056 * 4057 * The caller must initialize ipp_fields. 4058 * 4059 * NOTE: If multiple extension headers of the same type are present, 4060 * ip_find_hdr_v6() will set the respective extension header pointers 4061 * to the first one that it encounters in the IPv6 header. It also 4062 * skips fragment headers. This routine deals with malformed packets 4063 * of various sorts in which case the returned length is up to the 4064 * malformed part. 4065 */ 4066 int 4067 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4068 { 4069 uint_t length, ehdrlen; 4070 uint8_t nexthdr; 4071 uint8_t *whereptr, *endptr; 4072 ip6_dest_t *tmpdstopts; 4073 ip6_rthdr_t *tmprthdr; 4074 ip6_hbh_t *tmphopopts; 4075 ip6_frag_t *tmpfraghdr; 4076 4077 length = IPV6_HDR_LEN; 4078 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4079 endptr = mp->b_wptr; 4080 4081 nexthdr = ip6h->ip6_nxt; 4082 while (whereptr < endptr) { 4083 /* Is there enough left for len + nexthdr? */ 4084 if (whereptr + MIN_EHDR_LEN > endptr) 4085 goto done; 4086 4087 switch (nexthdr) { 4088 case IPPROTO_HOPOPTS: 4089 tmphopopts = (ip6_hbh_t *)whereptr; 4090 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4091 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4092 goto done; 4093 nexthdr = tmphopopts->ip6h_nxt; 4094 /* return only 1st hbh */ 4095 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4096 ipp->ipp_fields |= IPPF_HOPOPTS; 4097 ipp->ipp_hopopts = tmphopopts; 4098 ipp->ipp_hopoptslen = ehdrlen; 4099 } 4100 break; 4101 case IPPROTO_DSTOPTS: 4102 tmpdstopts = (ip6_dest_t *)whereptr; 4103 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4104 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4105 goto done; 4106 nexthdr = tmpdstopts->ip6d_nxt; 4107 /* 4108 * ipp_dstopts is set to the destination header after a 4109 * routing header. 4110 * Assume it is a post-rthdr destination header 4111 * and adjust when we find an rthdr. 4112 */ 4113 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4114 ipp->ipp_fields |= IPPF_DSTOPTS; 4115 ipp->ipp_dstopts = tmpdstopts; 4116 ipp->ipp_dstoptslen = ehdrlen; 4117 } 4118 break; 4119 case IPPROTO_ROUTING: 4120 tmprthdr = (ip6_rthdr_t *)whereptr; 4121 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4122 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4123 goto done; 4124 nexthdr = tmprthdr->ip6r_nxt; 4125 /* return only 1st rthdr */ 4126 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4127 ipp->ipp_fields |= IPPF_RTHDR; 4128 ipp->ipp_rthdr = tmprthdr; 4129 ipp->ipp_rthdrlen = ehdrlen; 4130 } 4131 /* 4132 * Make any destination header we've seen be a 4133 * pre-rthdr destination header. 4134 */ 4135 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4136 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4137 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4138 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4139 ipp->ipp_dstopts = NULL; 4140 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4141 ipp->ipp_dstoptslen = 0; 4142 } 4143 break; 4144 case IPPROTO_FRAGMENT: 4145 tmpfraghdr = (ip6_frag_t *)whereptr; 4146 ehdrlen = sizeof (ip6_frag_t); 4147 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4148 goto done; 4149 nexthdr = tmpfraghdr->ip6f_nxt; 4150 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4151 ipp->ipp_fields |= IPPF_FRAGHDR; 4152 ipp->ipp_fraghdr = tmpfraghdr; 4153 ipp->ipp_fraghdrlen = ehdrlen; 4154 } 4155 break; 4156 case IPPROTO_NONE: 4157 default: 4158 goto done; 4159 } 4160 length += ehdrlen; 4161 whereptr += ehdrlen; 4162 } 4163 done: 4164 if (nexthdrp != NULL) 4165 *nexthdrp = nexthdr; 4166 return (length); 4167 } 4168 4169 int 4170 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4171 { 4172 ire_t *ire; 4173 4174 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4175 ire = ire_lookup_local_v6(zoneid, ipst); 4176 if (ire == NULL) { 4177 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4178 return (1); 4179 } 4180 ip6h->ip6_src = ire->ire_addr_v6; 4181 ire_refrele(ire); 4182 } 4183 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4184 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4185 return (0); 4186 } 4187 4188 /* 4189 * Try to determine where and what are the IPv6 header length and 4190 * pointer to nexthdr value for the upper layer protocol (or an 4191 * unknown next hdr). 4192 * 4193 * Parameters returns a pointer to the nexthdr value; 4194 * Must handle malformed packets of various sorts. 4195 * Function returns failure for malformed cases. 4196 */ 4197 boolean_t 4198 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4199 uint8_t **nexthdrpp) 4200 { 4201 uint16_t length; 4202 uint_t ehdrlen; 4203 uint8_t *nexthdrp; 4204 uint8_t *whereptr; 4205 uint8_t *endptr; 4206 ip6_dest_t *desthdr; 4207 ip6_rthdr_t *rthdr; 4208 ip6_frag_t *fraghdr; 4209 4210 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4211 length = IPV6_HDR_LEN; 4212 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4213 endptr = mp->b_wptr; 4214 4215 nexthdrp = &ip6h->ip6_nxt; 4216 while (whereptr < endptr) { 4217 /* Is there enough left for len + nexthdr? */ 4218 if (whereptr + MIN_EHDR_LEN > endptr) 4219 break; 4220 4221 switch (*nexthdrp) { 4222 case IPPROTO_HOPOPTS: 4223 case IPPROTO_DSTOPTS: 4224 /* Assumes the headers are identical for hbh and dst */ 4225 desthdr = (ip6_dest_t *)whereptr; 4226 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4227 if ((uchar_t *)desthdr + ehdrlen > endptr) 4228 return (B_FALSE); 4229 nexthdrp = &desthdr->ip6d_nxt; 4230 break; 4231 case IPPROTO_ROUTING: 4232 rthdr = (ip6_rthdr_t *)whereptr; 4233 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4234 if ((uchar_t *)rthdr + ehdrlen > endptr) 4235 return (B_FALSE); 4236 nexthdrp = &rthdr->ip6r_nxt; 4237 break; 4238 case IPPROTO_FRAGMENT: 4239 fraghdr = (ip6_frag_t *)whereptr; 4240 ehdrlen = sizeof (ip6_frag_t); 4241 if ((uchar_t *)&fraghdr[1] > endptr) 4242 return (B_FALSE); 4243 nexthdrp = &fraghdr->ip6f_nxt; 4244 break; 4245 case IPPROTO_NONE: 4246 /* No next header means we're finished */ 4247 default: 4248 *hdr_length_ptr = length; 4249 *nexthdrpp = nexthdrp; 4250 return (B_TRUE); 4251 } 4252 length += ehdrlen; 4253 whereptr += ehdrlen; 4254 *hdr_length_ptr = length; 4255 *nexthdrpp = nexthdrp; 4256 } 4257 switch (*nexthdrp) { 4258 case IPPROTO_HOPOPTS: 4259 case IPPROTO_DSTOPTS: 4260 case IPPROTO_ROUTING: 4261 case IPPROTO_FRAGMENT: 4262 /* 4263 * If any know extension headers are still to be processed, 4264 * the packet's malformed (or at least all the IP header(s) are 4265 * not in the same mblk - and that should never happen. 4266 */ 4267 return (B_FALSE); 4268 4269 default: 4270 /* 4271 * If we get here, we know that all of the IP headers were in 4272 * the same mblk, even if the ULP header is in the next mblk. 4273 */ 4274 *hdr_length_ptr = length; 4275 *nexthdrpp = nexthdrp; 4276 return (B_TRUE); 4277 } 4278 } 4279 4280 /* 4281 * Return the length of the IPv6 related headers (including extension headers) 4282 * Returns a length even if the packet is malformed. 4283 */ 4284 int 4285 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4286 { 4287 uint16_t hdr_len; 4288 uint8_t *nexthdrp; 4289 4290 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4291 return (hdr_len); 4292 } 4293 4294 /* 4295 * Select an ill for the packet by considering load spreading across 4296 * a different ill in the group if dst_ill is part of some group. 4297 */ 4298 static ill_t * 4299 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4300 { 4301 ill_t *ill; 4302 4303 /* 4304 * We schedule irrespective of whether the source address is 4305 * INADDR_UNSPECIED or not. 4306 */ 4307 ill = illgrp_scheduler(dst_ill); 4308 if (ill == NULL) 4309 return (NULL); 4310 4311 /* 4312 * For groups with names ip_sioctl_groupname ensures that all 4313 * ills are of same type. For groups without names, ifgrp_insert 4314 * ensures this. 4315 */ 4316 ASSERT(dst_ill->ill_type == ill->ill_type); 4317 4318 return (ill); 4319 } 4320 4321 /* 4322 * IPv6 - 4323 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4324 * to send out a packet to a destination address for which we do not have 4325 * specific routing information. 4326 * 4327 * Handle non-multicast packets. If ill is non-NULL the match is done 4328 * for that ill. 4329 * 4330 * When a specific ill is specified (using IPV6_PKTINFO, 4331 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4332 * on routing entries (ftable and ctable) that have a matching 4333 * ire->ire_ipif->ipif_ill. Thus this can only be used 4334 * for destinations that are on-link for the specific ill 4335 * and that can appear on multiple links. Thus it is useful 4336 * for multicast destinations, link-local destinations, and 4337 * at some point perhaps for site-local destinations (if the 4338 * node sits at a site boundary). 4339 * We create the cache entries in the regular ctable since 4340 * it can not "confuse" things for other destinations. 4341 * table. 4342 * 4343 * When ill is part of a ill group, we subject the packets 4344 * to load spreading even if the ill is specified by the 4345 * means described above. We disable only for IPV6_BOUND_PIF 4346 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4347 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4348 * set. 4349 * 4350 * NOTE : These are the scopes of some of the variables that point at IRE, 4351 * which needs to be followed while making any future modifications 4352 * to avoid memory leaks. 4353 * 4354 * - ire and sire are the entries looked up initially by 4355 * ire_ftable_lookup_v6. 4356 * - ipif_ire is used to hold the interface ire associated with 4357 * the new cache ire. But it's scope is limited, so we always REFRELE 4358 * it before branching out to error paths. 4359 * - save_ire is initialized before ire_create, so that ire returned 4360 * by ire_create will not over-write the ire. We REFRELE save_ire 4361 * before breaking out of the switch. 4362 * 4363 * Thus on failures, we have to REFRELE only ire and sire, if they 4364 * are not NULL. 4365 * 4366 * v6srcp may be used in the future. Currently unused. 4367 */ 4368 /* ARGSUSED */ 4369 void 4370 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4371 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4372 { 4373 in6_addr_t v6gw; 4374 in6_addr_t dst; 4375 ire_t *ire = NULL; 4376 ipif_t *src_ipif = NULL; 4377 ill_t *dst_ill = NULL; 4378 ire_t *sire = NULL; 4379 ire_t *save_ire; 4380 mblk_t *dlureq_mp; 4381 ip6_t *ip6h; 4382 int err = 0; 4383 mblk_t *first_mp; 4384 ipsec_out_t *io; 4385 ill_t *attach_ill = NULL; 4386 ushort_t ire_marks = 0; 4387 int match_flags; 4388 boolean_t ip6i_present; 4389 ire_t *first_sire = NULL; 4390 mblk_t *copy_mp = NULL; 4391 mblk_t *xmit_mp = NULL; 4392 in6_addr_t save_dst; 4393 uint32_t multirt_flags = 4394 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4395 boolean_t multirt_is_resolvable; 4396 boolean_t multirt_resolve_next; 4397 boolean_t need_rele = B_FALSE; 4398 boolean_t do_attach_ill = B_FALSE; 4399 boolean_t ip6_asp_table_held = B_FALSE; 4400 tsol_ire_gw_secattr_t *attrp = NULL; 4401 tsol_gcgrp_t *gcgrp = NULL; 4402 tsol_gcgrp_addr_t ga; 4403 4404 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4405 4406 first_mp = mp; 4407 if (mp->b_datap->db_type == M_CTL) { 4408 mp = mp->b_cont; 4409 io = (ipsec_out_t *)first_mp->b_rptr; 4410 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4411 } else { 4412 io = NULL; 4413 } 4414 4415 /* 4416 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4417 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4418 * could be NULL. 4419 * 4420 * This information can appear either in an ip6i_t or an IPSEC_OUT 4421 * message. 4422 */ 4423 ip6h = (ip6_t *)mp->b_rptr; 4424 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4425 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4426 if (!ip6i_present || 4427 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4428 attach_ill = ip_grab_attach_ill(ill, first_mp, 4429 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4430 io->ipsec_out_ill_index), B_TRUE, ipst); 4431 /* Failure case frees things for us. */ 4432 if (attach_ill == NULL) 4433 return; 4434 4435 /* 4436 * Check if we need an ire that will not be 4437 * looked up by anybody else i.e. HIDDEN. 4438 */ 4439 if (ill_is_probeonly(attach_ill)) 4440 ire_marks = IRE_MARK_HIDDEN; 4441 } 4442 } 4443 4444 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4445 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4446 goto icmp_err_ret; 4447 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4448 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4449 goto icmp_err_ret; 4450 } 4451 4452 /* 4453 * If this IRE is created for forwarding or it is not for 4454 * TCP traffic, mark it as temporary. 4455 * 4456 * Is it sufficient just to check the next header?? 4457 */ 4458 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4459 ire_marks |= IRE_MARK_TEMPORARY; 4460 4461 /* 4462 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4463 * chain until it gets the most specific information available. 4464 * For example, we know that there is no IRE_CACHE for this dest, 4465 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4466 * ire_ftable_lookup_v6 will look up the gateway, etc. 4467 */ 4468 4469 if (ill == NULL) { 4470 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4471 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4472 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4473 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4474 match_flags, ipst); 4475 /* 4476 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4477 * in a NULL ill, but the packet could be a neighbor 4478 * solicitation/advertisment and could have a valid attach_ill. 4479 */ 4480 if (attach_ill != NULL) 4481 ill_refrele(attach_ill); 4482 } else { 4483 if (attach_ill != NULL) { 4484 /* 4485 * attach_ill is set only for communicating with 4486 * on-link hosts. So, don't look for DEFAULT. 4487 * ip_wput_v6 passes the right ill in this case and 4488 * hence we can assert. 4489 */ 4490 ASSERT(ill == attach_ill); 4491 ill_refrele(attach_ill); 4492 do_attach_ill = B_TRUE; 4493 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4494 } else { 4495 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4496 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4497 } 4498 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4499 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4500 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags, ipst); 4501 } 4502 4503 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4504 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4505 4506 if (zoneid == ALL_ZONES && ire != NULL) { 4507 /* 4508 * In the forwarding case, we can use a route from any zone 4509 * since we won't change the source address. We can easily 4510 * assert that the source address is already set when there's no 4511 * ip6_info header - otherwise we'd have to call pullupmsg(). 4512 */ 4513 ASSERT(ip6i_present || 4514 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4515 zoneid = ire->ire_zoneid; 4516 } 4517 4518 /* 4519 * We enter a loop that will be run only once in most cases. 4520 * The loop is re-entered in the case where the destination 4521 * can be reached through multiple RTF_MULTIRT-flagged routes. 4522 * The intention is to compute multiple routes to a single 4523 * destination in a single ip_newroute_v6 call. 4524 * The information is contained in sire->ire_flags. 4525 */ 4526 do { 4527 multirt_resolve_next = B_FALSE; 4528 4529 if (dst_ill != NULL) { 4530 ill_refrele(dst_ill); 4531 dst_ill = NULL; 4532 } 4533 if (src_ipif != NULL) { 4534 ipif_refrele(src_ipif); 4535 src_ipif = NULL; 4536 } 4537 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4538 ip3dbg(("ip_newroute_v6: starting new resolution " 4539 "with first_mp %p, tag %d\n", 4540 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4541 4542 /* 4543 * We check if there are trailing unresolved routes for 4544 * the destination contained in sire. 4545 */ 4546 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4547 &sire, multirt_flags, MBLK_GETLABEL(mp), ipst); 4548 4549 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4550 "ire %p, sire %p\n", 4551 multirt_is_resolvable, (void *)ire, (void *)sire)); 4552 4553 if (!multirt_is_resolvable) { 4554 /* 4555 * No more multirt routes to resolve; give up 4556 * (all routes resolved or no more resolvable 4557 * routes). 4558 */ 4559 if (ire != NULL) { 4560 ire_refrele(ire); 4561 ire = NULL; 4562 } 4563 } else { 4564 ASSERT(sire != NULL); 4565 ASSERT(ire != NULL); 4566 /* 4567 * We simply use first_sire as a flag that 4568 * indicates if a resolvable multirt route has 4569 * already been found during the preceding 4570 * loops. If it is not the case, we may have 4571 * to send an ICMP error to report that the 4572 * destination is unreachable. We do not 4573 * IRE_REFHOLD first_sire. 4574 */ 4575 if (first_sire == NULL) { 4576 first_sire = sire; 4577 } 4578 } 4579 } 4580 if ((ire == NULL) || (ire == sire)) { 4581 /* 4582 * either ire == NULL (the destination cannot be 4583 * resolved) or ire == sire (the gateway cannot be 4584 * resolved). At this point, there are no more routes 4585 * to resolve for the destination, thus we exit. 4586 */ 4587 if (ip_debug > 3) { 4588 /* ip2dbg */ 4589 pr_addr_dbg("ip_newroute_v6: " 4590 "can't resolve %s\n", AF_INET6, v6dstp); 4591 } 4592 ip3dbg(("ip_newroute_v6: " 4593 "ire %p, sire %p, first_sire %p\n", 4594 (void *)ire, (void *)sire, (void *)first_sire)); 4595 4596 if (sire != NULL) { 4597 ire_refrele(sire); 4598 sire = NULL; 4599 } 4600 4601 if (first_sire != NULL) { 4602 /* 4603 * At least one multirt route has been found 4604 * in the same ip_newroute() call; there is no 4605 * need to report an ICMP error. 4606 * first_sire was not IRE_REFHOLDed. 4607 */ 4608 MULTIRT_DEBUG_UNTAG(first_mp); 4609 freemsg(first_mp); 4610 return; 4611 } 4612 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4613 RTA_DST, ipst); 4614 goto icmp_err_ret; 4615 } 4616 4617 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4618 4619 /* 4620 * Verify that the returned IRE does not have either the 4621 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4622 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4623 */ 4624 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4625 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4626 goto icmp_err_ret; 4627 4628 /* 4629 * Increment the ire_ob_pkt_count field for ire if it is an 4630 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4631 * increment the same for the parent IRE, sire, if it is some 4632 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4633 */ 4634 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4635 UPDATE_OB_PKT_COUNT(ire); 4636 ire->ire_last_used_time = lbolt; 4637 } 4638 4639 if (sire != NULL) { 4640 mutex_enter(&sire->ire_lock); 4641 v6gw = sire->ire_gateway_addr_v6; 4642 mutex_exit(&sire->ire_lock); 4643 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4644 IRE_INTERFACE)) == 0); 4645 UPDATE_OB_PKT_COUNT(sire); 4646 sire->ire_last_used_time = lbolt; 4647 } else { 4648 v6gw = ipv6_all_zeros; 4649 } 4650 4651 /* 4652 * We have a route to reach the destination. 4653 * 4654 * 1) If the interface is part of ill group, try to get a new 4655 * ill taking load spreading into account. 4656 * 4657 * 2) After selecting the ill, get a source address that might 4658 * create good inbound load spreading and that matches the 4659 * right scope. ipif_select_source_v6 does this for us. 4660 * 4661 * If the application specified the ill (ifindex), we still 4662 * load spread. Only if the packets needs to go out specifically 4663 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4664 * IPV6_BOUND_PIF we don't try to use a different ill for load 4665 * spreading. 4666 */ 4667 if (!do_attach_ill) { 4668 /* 4669 * If the interface belongs to an interface group, 4670 * make sure the next possible interface in the group 4671 * is used. This encourages load spreading among 4672 * peers in an interface group. However, in the case 4673 * of multirouting, load spreading is not used, as we 4674 * actually want to replicate outgoing packets through 4675 * particular interfaces. 4676 * 4677 * Note: While we pick a dst_ill we are really only 4678 * interested in the ill for load spreading. 4679 * The source ipif is determined by source address 4680 * selection below. 4681 */ 4682 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4683 dst_ill = ire->ire_ipif->ipif_ill; 4684 /* For uniformity do a refhold */ 4685 ill_refhold(dst_ill); 4686 } else { 4687 /* 4688 * If we are here trying to create an IRE_CACHE 4689 * for an offlink destination and have the 4690 * IRE_CACHE for the next hop and the latter is 4691 * using virtual IP source address selection i.e 4692 * it's ire->ire_ipif is pointing to a virtual 4693 * network interface (vni) then 4694 * ip_newroute_get_dst_ll() will return the vni 4695 * interface as the dst_ill. Since the vni is 4696 * virtual i.e not associated with any physical 4697 * interface, it cannot be the dst_ill, hence 4698 * in such a case call ip_newroute_get_dst_ll() 4699 * with the stq_ill instead of the ire_ipif ILL. 4700 * The function returns a refheld ill. 4701 */ 4702 if ((ire->ire_type == IRE_CACHE) && 4703 IS_VNI(ire->ire_ipif->ipif_ill)) 4704 dst_ill = ip_newroute_get_dst_ill_v6( 4705 ire->ire_stq->q_ptr); 4706 else 4707 dst_ill = ip_newroute_get_dst_ill_v6( 4708 ire->ire_ipif->ipif_ill); 4709 } 4710 if (dst_ill == NULL) { 4711 if (ip_debug > 2) { 4712 pr_addr_dbg("ip_newroute_v6 : no dst " 4713 "ill for dst %s\n", 4714 AF_INET6, v6dstp); 4715 } 4716 goto icmp_err_ret; 4717 } else if (dst_ill->ill_group == NULL && ill != NULL && 4718 dst_ill != ill) { 4719 /* 4720 * If "ill" is not part of any group, we should 4721 * have found a route matching "ill" as we 4722 * called ire_ftable_lookup_v6 with 4723 * MATCH_IRE_ILL_GROUP. 4724 * Rather than asserting when there is a 4725 * mismatch, we just drop the packet. 4726 */ 4727 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4728 "dst_ill %s ill %s\n", 4729 dst_ill->ill_name, 4730 ill->ill_name)); 4731 goto icmp_err_ret; 4732 } 4733 } else { 4734 dst_ill = ire->ire_ipif->ipif_ill; 4735 /* For uniformity do refhold */ 4736 ill_refhold(dst_ill); 4737 /* 4738 * We should have found a route matching ill as we 4739 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4740 * Rather than asserting, while there is a mismatch, 4741 * we just drop the packet. 4742 */ 4743 if (dst_ill != ill) { 4744 ip0dbg(("ip_newroute_v6: Packet dropped as " 4745 "IP6I_ATTACH_IF ill is %s, " 4746 "ire->ire_ipif->ipif_ill is %s\n", 4747 ill->ill_name, 4748 dst_ill->ill_name)); 4749 goto icmp_err_ret; 4750 } 4751 } 4752 /* 4753 * Pick a source address which matches the scope of the 4754 * destination address. 4755 * For RTF_SETSRC routes, the source address is imposed by the 4756 * parent ire (sire). 4757 */ 4758 ASSERT(src_ipif == NULL); 4759 if (ire->ire_type == IRE_IF_RESOLVER && 4760 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4761 ip6_asp_can_lookup(ipst)) { 4762 /* 4763 * The ire cache entry we're adding is for the 4764 * gateway itself. The source address in this case 4765 * is relative to the gateway's address. 4766 */ 4767 ip6_asp_table_held = B_TRUE; 4768 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4769 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4770 if (src_ipif != NULL) 4771 ire_marks |= IRE_MARK_USESRC_CHECK; 4772 } else { 4773 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4774 /* 4775 * Check that the ipif matching the requested 4776 * source address still exists. 4777 */ 4778 src_ipif = ipif_lookup_addr_v6( 4779 &sire->ire_src_addr_v6, NULL, zoneid, 4780 NULL, NULL, NULL, NULL, ipst); 4781 } 4782 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4783 uint_t restrict_ill = RESTRICT_TO_NONE; 4784 4785 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4786 & IP6I_ATTACH_IF) 4787 restrict_ill = RESTRICT_TO_ILL; 4788 ip6_asp_table_held = B_TRUE; 4789 src_ipif = ipif_select_source_v6(dst_ill, 4790 v6dstp, restrict_ill, 4791 IPV6_PREFER_SRC_DEFAULT, zoneid); 4792 if (src_ipif != NULL) 4793 ire_marks |= IRE_MARK_USESRC_CHECK; 4794 } 4795 } 4796 4797 if (src_ipif == NULL) { 4798 if (ip_debug > 2) { 4799 /* ip1dbg */ 4800 pr_addr_dbg("ip_newroute_v6: no src for " 4801 "dst %s\n, ", AF_INET6, v6dstp); 4802 printf("ip_newroute_v6: interface name %s\n", 4803 dst_ill->ill_name); 4804 } 4805 goto icmp_err_ret; 4806 } 4807 4808 if (ip_debug > 3) { 4809 /* ip2dbg */ 4810 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4811 AF_INET6, &v6gw); 4812 } 4813 ip2dbg(("\tire type %s (%d)\n", 4814 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4815 4816 /* 4817 * At this point in ip_newroute_v6(), ire is either the 4818 * IRE_CACHE of the next-hop gateway for an off-subnet 4819 * destination or an IRE_INTERFACE type that should be used 4820 * to resolve an on-subnet destination or an on-subnet 4821 * next-hop gateway. 4822 * 4823 * In the IRE_CACHE case, we have the following : 4824 * 4825 * 1) src_ipif - used for getting a source address. 4826 * 4827 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4828 * means packets using this IRE_CACHE will go out on dst_ill. 4829 * 4830 * 3) The IRE sire will point to the prefix that is the longest 4831 * matching route for the destination. These prefix types 4832 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4833 * 4834 * The newly created IRE_CACHE entry for the off-subnet 4835 * destination is tied to both the prefix route and the 4836 * interface route used to resolve the next-hop gateway 4837 * via the ire_phandle and ire_ihandle fields, respectively. 4838 * 4839 * In the IRE_INTERFACE case, we have the following : 4840 * 4841 * 1) src_ipif - used for getting a source address. 4842 * 4843 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4844 * means packets using the IRE_CACHE that we will build 4845 * here will go out on dst_ill. 4846 * 4847 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4848 * to be created will only be tied to the IRE_INTERFACE that 4849 * was derived from the ire_ihandle field. 4850 * 4851 * If sire is non-NULL, it means the destination is off-link 4852 * and we will first create the IRE_CACHE for the gateway. 4853 * Next time through ip_newroute_v6, we will create the 4854 * IRE_CACHE for the final destination as described above. 4855 */ 4856 save_ire = ire; 4857 switch (ire->ire_type) { 4858 case IRE_CACHE: { 4859 ire_t *ipif_ire; 4860 4861 ASSERT(sire != NULL); 4862 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4863 mutex_enter(&ire->ire_lock); 4864 v6gw = ire->ire_gateway_addr_v6; 4865 mutex_exit(&ire->ire_lock); 4866 } 4867 /* 4868 * We need 3 ire's to create a new cache ire for an 4869 * off-link destination from the cache ire of the 4870 * gateway. 4871 * 4872 * 1. The prefix ire 'sire' 4873 * 2. The cache ire of the gateway 'ire' 4874 * 3. The interface ire 'ipif_ire' 4875 * 4876 * We have (1) and (2). We lookup (3) below. 4877 * 4878 * If there is no interface route to the gateway, 4879 * it is a race condition, where we found the cache 4880 * but the inteface route has been deleted. 4881 */ 4882 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4883 if (ipif_ire == NULL) { 4884 ip1dbg(("ip_newroute_v6:" 4885 "ire_ihandle_lookup_offlink_v6 failed\n")); 4886 goto icmp_err_ret; 4887 } 4888 /* 4889 * Assume DL_UNITDATA_REQ is same for all physical 4890 * interfaces in the ifgrp. If it isn't, this code will 4891 * have to be seriously rewhacked to allow the 4892 * fastpath probing (such that I cache the link 4893 * header in the IRE_CACHE) to work over ifgrps. 4894 * We have what we need to build an IRE_CACHE. 4895 */ 4896 /* 4897 * Note: the new ire inherits RTF_SETSRC 4898 * and RTF_MULTIRT to propagate these flags from prefix 4899 * to cache. 4900 */ 4901 4902 /* 4903 * Check cached gateway IRE for any security 4904 * attributes; if found, associate the gateway 4905 * credentials group to the destination IRE. 4906 */ 4907 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4908 mutex_enter(&attrp->igsa_lock); 4909 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4910 GCGRP_REFHOLD(gcgrp); 4911 mutex_exit(&attrp->igsa_lock); 4912 } 4913 4914 ire = ire_create_v6( 4915 v6dstp, /* dest address */ 4916 &ipv6_all_ones, /* mask */ 4917 &src_ipif->ipif_v6src_addr, /* source address */ 4918 &v6gw, /* gateway address */ 4919 &save_ire->ire_max_frag, 4920 NULL, /* Fast Path header */ 4921 dst_ill->ill_rq, /* recv-from queue */ 4922 dst_ill->ill_wq, /* send-to queue */ 4923 IRE_CACHE, 4924 NULL, 4925 src_ipif, 4926 &sire->ire_mask_v6, /* Parent mask */ 4927 sire->ire_phandle, /* Parent handle */ 4928 ipif_ire->ire_ihandle, /* Interface handle */ 4929 sire->ire_flags & /* flags if any */ 4930 (RTF_SETSRC | RTF_MULTIRT), 4931 &(sire->ire_uinfo), 4932 NULL, 4933 gcgrp, 4934 ipst); 4935 4936 if (ire == NULL) { 4937 if (gcgrp != NULL) { 4938 GCGRP_REFRELE(gcgrp); 4939 gcgrp = NULL; 4940 } 4941 ire_refrele(save_ire); 4942 ire_refrele(ipif_ire); 4943 break; 4944 } 4945 4946 /* reference now held by IRE */ 4947 gcgrp = NULL; 4948 4949 ire->ire_marks |= ire_marks; 4950 4951 /* 4952 * Prevent sire and ipif_ire from getting deleted. The 4953 * newly created ire is tied to both of them via the 4954 * phandle and ihandle respectively. 4955 */ 4956 IRB_REFHOLD(sire->ire_bucket); 4957 /* Has it been removed already ? */ 4958 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4959 IRB_REFRELE(sire->ire_bucket); 4960 ire_refrele(ipif_ire); 4961 ire_refrele(save_ire); 4962 break; 4963 } 4964 4965 IRB_REFHOLD(ipif_ire->ire_bucket); 4966 /* Has it been removed already ? */ 4967 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4968 IRB_REFRELE(ipif_ire->ire_bucket); 4969 IRB_REFRELE(sire->ire_bucket); 4970 ire_refrele(ipif_ire); 4971 ire_refrele(save_ire); 4972 break; 4973 } 4974 4975 xmit_mp = first_mp; 4976 if (ire->ire_flags & RTF_MULTIRT) { 4977 copy_mp = copymsg(first_mp); 4978 if (copy_mp != NULL) { 4979 xmit_mp = copy_mp; 4980 MULTIRT_DEBUG_TAG(first_mp); 4981 } 4982 } 4983 ire_add_then_send(q, ire, xmit_mp); 4984 if (ip6_asp_table_held) { 4985 ip6_asp_table_refrele(ipst); 4986 ip6_asp_table_held = B_FALSE; 4987 } 4988 ire_refrele(save_ire); 4989 4990 /* Assert that sire is not deleted yet. */ 4991 ASSERT(sire->ire_ptpn != NULL); 4992 IRB_REFRELE(sire->ire_bucket); 4993 4994 /* Assert that ipif_ire is not deleted yet. */ 4995 ASSERT(ipif_ire->ire_ptpn != NULL); 4996 IRB_REFRELE(ipif_ire->ire_bucket); 4997 ire_refrele(ipif_ire); 4998 4999 if (copy_mp != NULL) { 5000 /* 5001 * Search for the next unresolved 5002 * multirt route. 5003 */ 5004 copy_mp = NULL; 5005 ipif_ire = NULL; 5006 ire = NULL; 5007 /* re-enter the loop */ 5008 multirt_resolve_next = B_TRUE; 5009 continue; 5010 } 5011 ire_refrele(sire); 5012 ill_refrele(dst_ill); 5013 ipif_refrele(src_ipif); 5014 return; 5015 } 5016 case IRE_IF_NORESOLVER: 5017 /* 5018 * We have what we need to build an IRE_CACHE. 5019 * 5020 * Create a new dlureq_mp with the IPv6 gateway 5021 * address in destination address in the DLPI hdr 5022 * if the physical length is exactly 16 bytes. 5023 */ 5024 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5025 const in6_addr_t *addr; 5026 5027 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5028 addr = &v6gw; 5029 else 5030 addr = v6dstp; 5031 5032 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 5033 dst_ill->ill_phys_addr_length, 5034 dst_ill->ill_sap, 5035 dst_ill->ill_sap_length); 5036 } else { 5037 /* 5038 * handle the Gated case, where we create 5039 * a NORESOLVER route for loopback. 5040 */ 5041 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5042 break; 5043 dlureq_mp = ill_dlur_gen(NULL, 5044 dst_ill->ill_phys_addr_length, 5045 dst_ill->ill_sap, 5046 dst_ill->ill_sap_length); 5047 } 5048 if (dlureq_mp == NULL) 5049 break; 5050 /* 5051 * TSol note: We are creating the ire cache for the 5052 * destination 'dst'. If 'dst' is offlink, going 5053 * through the first hop 'gw', the security attributes 5054 * of 'dst' must be set to point to the gateway 5055 * credentials of gateway 'gw'. If 'dst' is onlink, it 5056 * is possible that 'dst' is a potential gateway that is 5057 * referenced by some route that has some security 5058 * attributes. Thus in the former case, we need to do a 5059 * gcgrp_lookup of 'gw' while in the latter case we 5060 * need to do gcgrp_lookup of 'dst' itself. 5061 */ 5062 ga.ga_af = AF_INET6; 5063 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5064 ga.ga_addr = v6gw; 5065 else 5066 ga.ga_addr = *v6dstp; 5067 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5068 5069 /* 5070 * Note: the new ire inherits sire flags RTF_SETSRC 5071 * and RTF_MULTIRT to propagate those rules from prefix 5072 * to cache. 5073 */ 5074 ire = ire_create_v6( 5075 v6dstp, /* dest address */ 5076 &ipv6_all_ones, /* mask */ 5077 &src_ipif->ipif_v6src_addr, /* source address */ 5078 &v6gw, /* gateway address */ 5079 &save_ire->ire_max_frag, 5080 NULL, /* Fast Path header */ 5081 dst_ill->ill_rq, /* recv-from queue */ 5082 dst_ill->ill_wq, /* send-to queue */ 5083 IRE_CACHE, 5084 dlureq_mp, 5085 src_ipif, 5086 &save_ire->ire_mask_v6, /* Parent mask */ 5087 (sire != NULL) ? /* Parent handle */ 5088 sire->ire_phandle : 0, 5089 save_ire->ire_ihandle, /* Interface handle */ 5090 (sire != NULL) ? /* flags if any */ 5091 sire->ire_flags & 5092 (RTF_SETSRC | RTF_MULTIRT) : 0, 5093 &(save_ire->ire_uinfo), 5094 NULL, 5095 gcgrp, 5096 ipst); 5097 5098 freeb(dlureq_mp); 5099 5100 if (ire == NULL) { 5101 if (gcgrp != NULL) { 5102 GCGRP_REFRELE(gcgrp); 5103 gcgrp = NULL; 5104 } 5105 ire_refrele(save_ire); 5106 break; 5107 } 5108 5109 /* reference now held by IRE */ 5110 gcgrp = NULL; 5111 5112 ire->ire_marks |= ire_marks; 5113 5114 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5115 dst = v6gw; 5116 else 5117 dst = *v6dstp; 5118 err = ndp_noresolver(dst_ill, &dst); 5119 if (err != 0) { 5120 ire_refrele(save_ire); 5121 break; 5122 } 5123 5124 /* Prevent save_ire from getting deleted */ 5125 IRB_REFHOLD(save_ire->ire_bucket); 5126 /* Has it been removed already ? */ 5127 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5128 IRB_REFRELE(save_ire->ire_bucket); 5129 ire_refrele(save_ire); 5130 break; 5131 } 5132 5133 xmit_mp = first_mp; 5134 /* 5135 * In case of MULTIRT, a copy of the current packet 5136 * to send is made to further re-enter the 5137 * loop and attempt another route resolution 5138 */ 5139 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5140 copy_mp = copymsg(first_mp); 5141 if (copy_mp != NULL) { 5142 xmit_mp = copy_mp; 5143 MULTIRT_DEBUG_TAG(first_mp); 5144 } 5145 } 5146 ire_add_then_send(q, ire, xmit_mp); 5147 if (ip6_asp_table_held) { 5148 ip6_asp_table_refrele(ipst); 5149 ip6_asp_table_held = B_FALSE; 5150 } 5151 5152 /* Assert that it is not deleted yet. */ 5153 ASSERT(save_ire->ire_ptpn != NULL); 5154 IRB_REFRELE(save_ire->ire_bucket); 5155 ire_refrele(save_ire); 5156 5157 if (copy_mp != NULL) { 5158 /* 5159 * If we found a (no)resolver, we ignore any 5160 * trailing top priority IRE_CACHE in 5161 * further loops. This ensures that we do not 5162 * omit any (no)resolver despite the priority 5163 * in this call. 5164 * IRE_CACHE, if any, will be processed 5165 * by another thread entering ip_newroute(), 5166 * (on resolver response, for example). 5167 * We use this to force multiple parallel 5168 * resolution as soon as a packet needs to be 5169 * sent. The result is, after one packet 5170 * emission all reachable routes are generally 5171 * resolved. 5172 * Otherwise, complete resolution of MULTIRT 5173 * routes would require several emissions as 5174 * side effect. 5175 */ 5176 multirt_flags &= ~MULTIRT_CACHEGW; 5177 5178 /* 5179 * Search for the next unresolved multirt 5180 * route. 5181 */ 5182 copy_mp = NULL; 5183 save_ire = NULL; 5184 ire = NULL; 5185 /* re-enter the loop */ 5186 multirt_resolve_next = B_TRUE; 5187 continue; 5188 } 5189 5190 /* Don't need sire anymore */ 5191 if (sire != NULL) 5192 ire_refrele(sire); 5193 ill_refrele(dst_ill); 5194 ipif_refrele(src_ipif); 5195 return; 5196 5197 case IRE_IF_RESOLVER: 5198 /* 5199 * We can't build an IRE_CACHE yet, but at least we 5200 * found a resolver that can help. 5201 */ 5202 dst = *v6dstp; 5203 5204 /* 5205 * To be at this point in the code with a non-zero gw 5206 * means that dst is reachable through a gateway that 5207 * we have never resolved. By changing dst to the gw 5208 * addr we resolve the gateway first. When 5209 * ire_add_then_send() tries to put the IP dg to dst, 5210 * it will reenter ip_newroute() at which time we will 5211 * find the IRE_CACHE for the gw and create another 5212 * IRE_CACHE above (for dst itself). 5213 */ 5214 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5215 save_dst = dst; 5216 dst = v6gw; 5217 v6gw = ipv6_all_zeros; 5218 } 5219 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5220 /* 5221 * Ask the external resolver to do its thing. 5222 * Make an mblk chain in the following form: 5223 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5224 */ 5225 mblk_t *ire_mp; 5226 mblk_t *areq_mp; 5227 areq_t *areq; 5228 in6_addr_t *addrp; 5229 5230 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5231 if (ip6_asp_table_held) { 5232 ip6_asp_table_refrele(ipst); 5233 ip6_asp_table_held = B_FALSE; 5234 } 5235 ire = ire_create_mp_v6( 5236 &dst, /* dest address */ 5237 &ipv6_all_ones, /* mask */ 5238 &src_ipif->ipif_v6src_addr, 5239 /* source address */ 5240 &v6gw, /* gateway address */ 5241 NULL, /* Fast Path header */ 5242 dst_ill->ill_rq, /* recv-from queue */ 5243 dst_ill->ill_wq, /* send-to queue */ 5244 IRE_CACHE, 5245 NULL, 5246 src_ipif, 5247 &save_ire->ire_mask_v6, 5248 /* Parent mask */ 5249 0, 5250 save_ire->ire_ihandle, 5251 /* Interface handle */ 5252 0, /* flags if any */ 5253 &(save_ire->ire_uinfo), 5254 NULL, 5255 NULL, 5256 ipst); 5257 5258 ire_refrele(save_ire); 5259 if (ire == NULL) { 5260 ip1dbg(("ip_newroute_v6:" 5261 "ire is NULL\n")); 5262 break; 5263 } 5264 5265 if ((sire != NULL) && 5266 (sire->ire_flags & RTF_MULTIRT)) { 5267 /* 5268 * processing a copy of the packet to 5269 * send for further resolution loops 5270 */ 5271 copy_mp = copymsg(first_mp); 5272 if (copy_mp != NULL) 5273 MULTIRT_DEBUG_TAG(copy_mp); 5274 } 5275 ire->ire_marks |= ire_marks; 5276 ire_mp = ire->ire_mp; 5277 /* 5278 * Now create or find an nce for this interface. 5279 * The hw addr will need to to be set from 5280 * the reply to the AR_ENTRY_QUERY that 5281 * we're about to send. This will be done in 5282 * ire_add_v6(). 5283 */ 5284 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5285 switch (err) { 5286 case 0: 5287 /* 5288 * New cache entry created. 5289 * Break, then ask the external 5290 * resolver. 5291 */ 5292 break; 5293 case EINPROGRESS: 5294 /* 5295 * Resolution in progress; 5296 * packet has been queued by 5297 * ndp_resolver(). 5298 */ 5299 ire_delete(ire); 5300 ire = NULL; 5301 /* 5302 * Check if another multirt 5303 * route must be resolved. 5304 */ 5305 if (copy_mp != NULL) { 5306 /* 5307 * If we found a resolver, we 5308 * ignore any trailing top 5309 * priority IRE_CACHE in 5310 * further loops. The reason is 5311 * the same as for noresolver. 5312 */ 5313 multirt_flags &= 5314 ~MULTIRT_CACHEGW; 5315 /* 5316 * Search for the next 5317 * unresolved multirt route. 5318 */ 5319 first_mp = copy_mp; 5320 copy_mp = NULL; 5321 mp = first_mp; 5322 if (mp->b_datap->db_type == 5323 M_CTL) { 5324 mp = mp->b_cont; 5325 } 5326 ASSERT(sire != NULL); 5327 dst = save_dst; 5328 /* 5329 * re-enter the loop 5330 */ 5331 multirt_resolve_next = 5332 B_TRUE; 5333 continue; 5334 } 5335 5336 if (sire != NULL) 5337 ire_refrele(sire); 5338 ill_refrele(dst_ill); 5339 ipif_refrele(src_ipif); 5340 return; 5341 default: 5342 /* 5343 * Transient error; packet will be 5344 * freed. 5345 */ 5346 ire_delete(ire); 5347 ire = NULL; 5348 break; 5349 } 5350 if (err != 0) 5351 break; 5352 /* 5353 * Now set up the AR_ENTRY_QUERY and send it. 5354 */ 5355 areq_mp = ill_arp_alloc(dst_ill, 5356 (uchar_t *)&ipv6_areq_template, 5357 (caddr_t)&dst); 5358 if (areq_mp == NULL) { 5359 ip1dbg(("ip_newroute_v6:" 5360 "areq_mp is NULL\n")); 5361 freemsg(ire_mp); 5362 break; 5363 } 5364 areq = (areq_t *)areq_mp->b_rptr; 5365 addrp = (in6_addr_t *)((char *)areq + 5366 areq->areq_target_addr_offset); 5367 *addrp = dst; 5368 addrp = (in6_addr_t *)((char *)areq + 5369 areq->areq_sender_addr_offset); 5370 *addrp = src_ipif->ipif_v6src_addr; 5371 /* 5372 * link the chain, then send up to the resolver. 5373 */ 5374 linkb(areq_mp, ire_mp); 5375 linkb(areq_mp, mp); 5376 ip1dbg(("ip_newroute_v6:" 5377 "putnext to resolver\n")); 5378 putnext(dst_ill->ill_rq, areq_mp); 5379 /* 5380 * Check if another multirt route 5381 * must be resolved. 5382 */ 5383 ire = NULL; 5384 if (copy_mp != NULL) { 5385 /* 5386 * If we find a resolver, we ignore any 5387 * trailing top priority IRE_CACHE in 5388 * further loops. The reason is the 5389 * same as for noresolver. 5390 */ 5391 multirt_flags &= ~MULTIRT_CACHEGW; 5392 /* 5393 * Search for the next unresolved 5394 * multirt route. 5395 */ 5396 first_mp = copy_mp; 5397 copy_mp = NULL; 5398 mp = first_mp; 5399 if (mp->b_datap->db_type == M_CTL) { 5400 mp = mp->b_cont; 5401 } 5402 ASSERT(sire != NULL); 5403 dst = save_dst; 5404 /* 5405 * re-enter the loop 5406 */ 5407 multirt_resolve_next = B_TRUE; 5408 continue; 5409 } 5410 5411 if (sire != NULL) 5412 ire_refrele(sire); 5413 ill_refrele(dst_ill); 5414 ipif_refrele(src_ipif); 5415 return; 5416 } 5417 /* 5418 * Non-external resolver case. 5419 * 5420 * TSol note: Please see the note above the 5421 * IRE_IF_NORESOLVER case. 5422 */ 5423 ga.ga_af = AF_INET6; 5424 ga.ga_addr = dst; 5425 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5426 5427 ire = ire_create_v6( 5428 &dst, /* dest address */ 5429 &ipv6_all_ones, /* mask */ 5430 &src_ipif->ipif_v6src_addr, /* source address */ 5431 &v6gw, /* gateway address */ 5432 &save_ire->ire_max_frag, 5433 NULL, /* Fast Path header */ 5434 dst_ill->ill_rq, /* recv-from queue */ 5435 dst_ill->ill_wq, /* send-to queue */ 5436 IRE_CACHE, 5437 NULL, 5438 src_ipif, 5439 &save_ire->ire_mask_v6, /* Parent mask */ 5440 0, 5441 save_ire->ire_ihandle, /* Interface handle */ 5442 0, /* flags if any */ 5443 &(save_ire->ire_uinfo), 5444 NULL, 5445 gcgrp, 5446 ipst); 5447 5448 if (ire == NULL) { 5449 if (gcgrp != NULL) { 5450 GCGRP_REFRELE(gcgrp); 5451 gcgrp = NULL; 5452 } 5453 ire_refrele(save_ire); 5454 break; 5455 } 5456 5457 /* reference now held by IRE */ 5458 gcgrp = NULL; 5459 5460 if ((sire != NULL) && 5461 (sire->ire_flags & RTF_MULTIRT)) { 5462 copy_mp = copymsg(first_mp); 5463 if (copy_mp != NULL) 5464 MULTIRT_DEBUG_TAG(copy_mp); 5465 } 5466 5467 ire->ire_marks |= ire_marks; 5468 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5469 switch (err) { 5470 case 0: 5471 /* Prevent save_ire from getting deleted */ 5472 IRB_REFHOLD(save_ire->ire_bucket); 5473 /* Has it been removed already ? */ 5474 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5475 IRB_REFRELE(save_ire->ire_bucket); 5476 ire_refrele(save_ire); 5477 break; 5478 } 5479 5480 /* 5481 * We have a resolved cache entry, 5482 * add in the IRE. 5483 */ 5484 ire_add_then_send(q, ire, first_mp); 5485 if (ip6_asp_table_held) { 5486 ip6_asp_table_refrele(ipst); 5487 ip6_asp_table_held = B_FALSE; 5488 } 5489 5490 /* Assert that it is not deleted yet. */ 5491 ASSERT(save_ire->ire_ptpn != NULL); 5492 IRB_REFRELE(save_ire->ire_bucket); 5493 ire_refrele(save_ire); 5494 /* 5495 * Check if another multirt route 5496 * must be resolved. 5497 */ 5498 ire = NULL; 5499 if (copy_mp != NULL) { 5500 /* 5501 * If we find a resolver, we ignore any 5502 * trailing top priority IRE_CACHE in 5503 * further loops. The reason is the 5504 * same as for noresolver. 5505 */ 5506 multirt_flags &= ~MULTIRT_CACHEGW; 5507 /* 5508 * Search for the next unresolved 5509 * multirt route. 5510 */ 5511 first_mp = copy_mp; 5512 copy_mp = NULL; 5513 mp = first_mp; 5514 if (mp->b_datap->db_type == M_CTL) { 5515 mp = mp->b_cont; 5516 } 5517 ASSERT(sire != NULL); 5518 dst = save_dst; 5519 /* 5520 * re-enter the loop 5521 */ 5522 multirt_resolve_next = B_TRUE; 5523 continue; 5524 } 5525 5526 if (sire != NULL) 5527 ire_refrele(sire); 5528 ill_refrele(dst_ill); 5529 ipif_refrele(src_ipif); 5530 return; 5531 5532 case EINPROGRESS: 5533 /* 5534 * mp was consumed - presumably queued. 5535 * No need for ire, presumably resolution is 5536 * in progress, and ire will be added when the 5537 * address is resolved. 5538 */ 5539 if (ip6_asp_table_held) { 5540 ip6_asp_table_refrele(ipst); 5541 ip6_asp_table_held = B_FALSE; 5542 } 5543 ASSERT(ire->ire_nce == NULL); 5544 ire_delete(ire); 5545 ire_refrele(save_ire); 5546 /* 5547 * Check if another multirt route 5548 * must be resolved. 5549 */ 5550 ire = NULL; 5551 if (copy_mp != NULL) { 5552 /* 5553 * If we find a resolver, we ignore any 5554 * trailing top priority IRE_CACHE in 5555 * further loops. The reason is the 5556 * same as for noresolver. 5557 */ 5558 multirt_flags &= ~MULTIRT_CACHEGW; 5559 /* 5560 * Search for the next unresolved 5561 * multirt route. 5562 */ 5563 first_mp = copy_mp; 5564 copy_mp = NULL; 5565 mp = first_mp; 5566 if (mp->b_datap->db_type == M_CTL) { 5567 mp = mp->b_cont; 5568 } 5569 ASSERT(sire != NULL); 5570 dst = save_dst; 5571 /* 5572 * re-enter the loop 5573 */ 5574 multirt_resolve_next = B_TRUE; 5575 continue; 5576 } 5577 if (sire != NULL) 5578 ire_refrele(sire); 5579 ill_refrele(dst_ill); 5580 ipif_refrele(src_ipif); 5581 return; 5582 default: 5583 /* Some transient error */ 5584 ASSERT(ire->ire_nce == NULL); 5585 ire_refrele(save_ire); 5586 break; 5587 } 5588 break; 5589 default: 5590 break; 5591 } 5592 if (ip6_asp_table_held) { 5593 ip6_asp_table_refrele(ipst); 5594 ip6_asp_table_held = B_FALSE; 5595 } 5596 } while (multirt_resolve_next); 5597 5598 err_ret: 5599 ip1dbg(("ip_newroute_v6: dropped\n")); 5600 if (src_ipif != NULL) 5601 ipif_refrele(src_ipif); 5602 if (dst_ill != NULL) { 5603 need_rele = B_TRUE; 5604 ill = dst_ill; 5605 } 5606 if (ill != NULL) { 5607 if (mp->b_prev != NULL) { 5608 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5609 } else { 5610 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5611 } 5612 5613 if (need_rele) 5614 ill_refrele(ill); 5615 } else { 5616 if (mp->b_prev != NULL) { 5617 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5618 } else { 5619 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5620 } 5621 } 5622 /* Did this packet originate externally? */ 5623 if (mp->b_prev) { 5624 mp->b_next = NULL; 5625 mp->b_prev = NULL; 5626 } 5627 if (copy_mp != NULL) { 5628 MULTIRT_DEBUG_UNTAG(copy_mp); 5629 freemsg(copy_mp); 5630 } 5631 MULTIRT_DEBUG_UNTAG(first_mp); 5632 freemsg(first_mp); 5633 if (ire != NULL) 5634 ire_refrele(ire); 5635 if (sire != NULL) 5636 ire_refrele(sire); 5637 return; 5638 5639 icmp_err_ret: 5640 if (ip6_asp_table_held) 5641 ip6_asp_table_refrele(ipst); 5642 if (src_ipif != NULL) 5643 ipif_refrele(src_ipif); 5644 if (dst_ill != NULL) { 5645 need_rele = B_TRUE; 5646 ill = dst_ill; 5647 } 5648 ip1dbg(("ip_newroute_v6: no route\n")); 5649 if (sire != NULL) 5650 ire_refrele(sire); 5651 /* 5652 * We need to set sire to NULL to avoid double freeing if we 5653 * ever goto err_ret from below. 5654 */ 5655 sire = NULL; 5656 ip6h = (ip6_t *)mp->b_rptr; 5657 /* Skip ip6i_t header if present */ 5658 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5659 /* Make sure the IPv6 header is present */ 5660 if ((mp->b_wptr - (uchar_t *)ip6h) < 5661 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5662 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5663 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5664 goto err_ret; 5665 } 5666 } 5667 mp->b_rptr += sizeof (ip6i_t); 5668 ip6h = (ip6_t *)mp->b_rptr; 5669 } 5670 /* Did this packet originate externally? */ 5671 if (mp->b_prev) { 5672 if (ill != NULL) { 5673 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5674 } else { 5675 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5676 } 5677 mp->b_next = NULL; 5678 mp->b_prev = NULL; 5679 q = WR(q); 5680 } else { 5681 if (ill != NULL) { 5682 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5683 } else { 5684 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5685 } 5686 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5687 /* Failed */ 5688 if (copy_mp != NULL) { 5689 MULTIRT_DEBUG_UNTAG(copy_mp); 5690 freemsg(copy_mp); 5691 } 5692 MULTIRT_DEBUG_UNTAG(first_mp); 5693 freemsg(first_mp); 5694 if (ire != NULL) 5695 ire_refrele(ire); 5696 if (need_rele) 5697 ill_refrele(ill); 5698 return; 5699 } 5700 } 5701 5702 if (need_rele) 5703 ill_refrele(ill); 5704 5705 /* 5706 * At this point we will have ire only if RTF_BLACKHOLE 5707 * or RTF_REJECT flags are set on the IRE. It will not 5708 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5709 */ 5710 if (ire != NULL) { 5711 if (ire->ire_flags & RTF_BLACKHOLE) { 5712 ire_refrele(ire); 5713 if (copy_mp != NULL) { 5714 MULTIRT_DEBUG_UNTAG(copy_mp); 5715 freemsg(copy_mp); 5716 } 5717 MULTIRT_DEBUG_UNTAG(first_mp); 5718 freemsg(first_mp); 5719 return; 5720 } 5721 ire_refrele(ire); 5722 } 5723 if (ip_debug > 3) { 5724 /* ip2dbg */ 5725 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5726 AF_INET6, v6dstp); 5727 } 5728 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5729 B_FALSE, B_FALSE, zoneid, ipst); 5730 } 5731 5732 /* 5733 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5734 * we need to send out a packet to a destination address for which we do not 5735 * have specific routing information. It is only used for multicast packets. 5736 * 5737 * If unspec_src we allow creating an IRE with source address zero. 5738 * ire_send_v6() will delete it after the packet is sent. 5739 */ 5740 void 5741 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5742 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5743 { 5744 ire_t *ire = NULL; 5745 ipif_t *src_ipif = NULL; 5746 int err = 0; 5747 ill_t *dst_ill = NULL; 5748 ire_t *save_ire; 5749 ushort_t ire_marks = 0; 5750 ipsec_out_t *io; 5751 ill_t *attach_ill = NULL; 5752 ill_t *ill; 5753 ip6_t *ip6h; 5754 mblk_t *first_mp; 5755 boolean_t ip6i_present; 5756 ire_t *fire = NULL; 5757 mblk_t *copy_mp = NULL; 5758 boolean_t multirt_resolve_next; 5759 in6_addr_t *v6dstp = &v6dst; 5760 boolean_t ipif_held = B_FALSE; 5761 boolean_t ill_held = B_FALSE; 5762 boolean_t ip6_asp_table_held = B_FALSE; 5763 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5764 5765 /* 5766 * This loop is run only once in most cases. 5767 * We loop to resolve further routes only when the destination 5768 * can be reached through multiple RTF_MULTIRT-flagged ires. 5769 */ 5770 do { 5771 multirt_resolve_next = B_FALSE; 5772 if (dst_ill != NULL) { 5773 ill_refrele(dst_ill); 5774 dst_ill = NULL; 5775 } 5776 5777 if (src_ipif != NULL) { 5778 ipif_refrele(src_ipif); 5779 src_ipif = NULL; 5780 } 5781 ASSERT(ipif != NULL); 5782 ill = ipif->ipif_ill; 5783 5784 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5785 if (ip_debug > 2) { 5786 /* ip1dbg */ 5787 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5788 AF_INET6, v6dstp); 5789 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5790 ill->ill_name, ipif->ipif_isv6); 5791 } 5792 5793 first_mp = mp; 5794 if (mp->b_datap->db_type == M_CTL) { 5795 mp = mp->b_cont; 5796 io = (ipsec_out_t *)first_mp->b_rptr; 5797 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5798 } else { 5799 io = NULL; 5800 } 5801 5802 /* 5803 * If the interface is a pt-pt interface we look for an 5804 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5805 * local_address and the pt-pt destination address. 5806 * Otherwise we just match the local address. 5807 */ 5808 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5809 goto err_ret; 5810 } 5811 /* 5812 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5813 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5814 * as it could be NULL. 5815 * 5816 * This information can appear either in an ip6i_t or an 5817 * IPSEC_OUT message. 5818 */ 5819 ip6h = (ip6_t *)mp->b_rptr; 5820 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5821 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5822 if (!ip6i_present || 5823 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5824 attach_ill = ip_grab_attach_ill(ill, first_mp, 5825 (ip6i_present ? 5826 ((ip6i_t *)ip6h)->ip6i_ifindex : 5827 io->ipsec_out_ill_index), B_TRUE, ipst); 5828 /* Failure case frees things for us. */ 5829 if (attach_ill == NULL) 5830 return; 5831 5832 /* 5833 * Check if we need an ire that will not be 5834 * looked up by anybody else i.e. HIDDEN. 5835 */ 5836 if (ill_is_probeonly(attach_ill)) 5837 ire_marks = IRE_MARK_HIDDEN; 5838 } 5839 } 5840 5841 /* 5842 * We check if an IRE_OFFSUBNET for the addr that goes through 5843 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5844 * RTF_MULTIRT flags must be honored. 5845 */ 5846 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5847 ip2dbg(("ip_newroute_ipif_v6: " 5848 "ipif_lookup_multi_ire_v6(" 5849 "ipif %p, dst %08x) = fire %p\n", 5850 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5851 (void *)fire)); 5852 5853 /* 5854 * If the application specified the ill (ifindex), we still 5855 * load spread. Only if the packets needs to go out specifically 5856 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5857 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5858 * multirouting, then we don't try to use a different ill for 5859 * load spreading. 5860 */ 5861 if (attach_ill == NULL) { 5862 /* 5863 * If the interface belongs to an interface group, 5864 * make sure the next possible interface in the group 5865 * is used. This encourages load spreading among peers 5866 * in an interface group. 5867 * 5868 * Note: While we pick a dst_ill we are really only 5869 * interested in the ill for load spreading. The source 5870 * ipif is determined by source address selection below. 5871 */ 5872 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5873 dst_ill = ipif->ipif_ill; 5874 /* For uniformity do a refhold */ 5875 ill_refhold(dst_ill); 5876 } else { 5877 /* refheld by ip_newroute_get_dst_ill_v6 */ 5878 dst_ill = 5879 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5880 } 5881 if (dst_ill == NULL) { 5882 if (ip_debug > 2) { 5883 pr_addr_dbg("ip_newroute_ipif_v6: " 5884 "no dst ill for dst %s\n", 5885 AF_INET6, v6dstp); 5886 } 5887 goto err_ret; 5888 } 5889 } else { 5890 dst_ill = ipif->ipif_ill; 5891 /* 5892 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5893 * and IPV6_BOUND_PIF case. 5894 */ 5895 ASSERT(dst_ill == attach_ill); 5896 /* attach_ill is already refheld */ 5897 } 5898 /* 5899 * Pick a source address which matches the scope of the 5900 * destination address. 5901 * For RTF_SETSRC routes, the source address is imposed by the 5902 * parent ire (fire). 5903 */ 5904 ASSERT(src_ipif == NULL); 5905 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5906 /* 5907 * Check that the ipif matching the requested source 5908 * address still exists. 5909 */ 5910 src_ipif = 5911 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5912 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5913 } 5914 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5915 ip6_asp_table_held = B_TRUE; 5916 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5917 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5918 } 5919 5920 if (src_ipif == NULL) { 5921 if (!unspec_src) { 5922 if (ip_debug > 2) { 5923 /* ip1dbg */ 5924 pr_addr_dbg("ip_newroute_ipif_v6: " 5925 "no src for dst %s\n,", 5926 AF_INET6, v6dstp); 5927 printf(" through interface %s\n", 5928 dst_ill->ill_name); 5929 } 5930 goto err_ret; 5931 } 5932 src_ipif = ipif; 5933 ipif_refhold(src_ipif); 5934 } 5935 ire = ipif_to_ire_v6(ipif); 5936 if (ire == NULL) { 5937 if (ip_debug > 2) { 5938 /* ip1dbg */ 5939 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5940 AF_INET6, &ipif->ipif_v6lcl_addr); 5941 printf("ip_newroute_ipif_v6: " 5942 "if %s\n", dst_ill->ill_name); 5943 } 5944 goto err_ret; 5945 } 5946 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5947 goto err_ret; 5948 5949 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5950 5951 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5952 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5953 if (ip_debug > 2) { 5954 /* ip1dbg */ 5955 pr_addr_dbg(" address %s\n", 5956 AF_INET6, &ire->ire_src_addr_v6); 5957 } 5958 save_ire = ire; 5959 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5960 (void *)ire, (void *)ipif)); 5961 5962 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5963 /* 5964 * an IRE_OFFSUBET was looked up 5965 * on that interface. 5966 * this ire has RTF_MULTIRT flag, 5967 * so the resolution loop 5968 * will be re-entered to resolve 5969 * additional routes on other 5970 * interfaces. For that purpose, 5971 * a copy of the packet is 5972 * made at this point. 5973 */ 5974 fire->ire_last_used_time = lbolt; 5975 copy_mp = copymsg(first_mp); 5976 if (copy_mp) { 5977 MULTIRT_DEBUG_TAG(copy_mp); 5978 } 5979 } 5980 5981 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5982 switch (ire->ire_type) { 5983 case IRE_IF_NORESOLVER: { 5984 /* We have what we need to build an IRE_CACHE. */ 5985 mblk_t *dlureq_mp; 5986 5987 /* 5988 * Create a new dlureq_mp with the 5989 * IPv6 gateway address in destination address in the 5990 * DLPI hdr if the physical length is exactly 16 bytes. 5991 */ 5992 ASSERT(dst_ill->ill_isv6); 5993 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5994 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5995 dst_ill->ill_phys_addr_length, 5996 dst_ill->ill_sap, 5997 dst_ill->ill_sap_length); 5998 } else { 5999 /* 6000 * handle the Gated case, where we create 6001 * a NORESOLVER route for loopback. 6002 */ 6003 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 6004 break; 6005 dlureq_mp = ill_dlur_gen(NULL, 6006 dst_ill->ill_phys_addr_length, 6007 dst_ill->ill_sap, 6008 dst_ill->ill_sap_length); 6009 } 6010 6011 if (dlureq_mp == NULL) 6012 break; 6013 /* 6014 * The newly created ire will inherit the flags of the 6015 * parent ire, if any. 6016 */ 6017 ire = ire_create_v6( 6018 v6dstp, /* dest address */ 6019 &ipv6_all_ones, /* mask */ 6020 &src_ipif->ipif_v6src_addr, /* source address */ 6021 NULL, /* gateway address */ 6022 &save_ire->ire_max_frag, 6023 NULL, /* Fast Path header */ 6024 dst_ill->ill_rq, /* recv-from queue */ 6025 dst_ill->ill_wq, /* send-to queue */ 6026 IRE_CACHE, 6027 dlureq_mp, 6028 src_ipif, 6029 NULL, 6030 (fire != NULL) ? /* Parent handle */ 6031 fire->ire_phandle : 0, 6032 save_ire->ire_ihandle, /* Interface handle */ 6033 (fire != NULL) ? 6034 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6035 0, 6036 &ire_uinfo_null, 6037 NULL, 6038 NULL, 6039 ipst); 6040 6041 freeb(dlureq_mp); 6042 6043 if (ire == NULL) { 6044 ire_refrele(save_ire); 6045 break; 6046 } 6047 6048 ire->ire_marks |= ire_marks; 6049 6050 err = ndp_noresolver(dst_ill, v6dstp); 6051 if (err != 0) { 6052 ire_refrele(save_ire); 6053 break; 6054 } 6055 6056 /* Prevent save_ire from getting deleted */ 6057 IRB_REFHOLD(save_ire->ire_bucket); 6058 /* Has it been removed already ? */ 6059 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6060 IRB_REFRELE(save_ire->ire_bucket); 6061 ire_refrele(save_ire); 6062 break; 6063 } 6064 6065 ire_add_then_send(q, ire, first_mp); 6066 if (ip6_asp_table_held) { 6067 ip6_asp_table_refrele(ipst); 6068 ip6_asp_table_held = B_FALSE; 6069 } 6070 6071 /* Assert that it is not deleted yet. */ 6072 ASSERT(save_ire->ire_ptpn != NULL); 6073 IRB_REFRELE(save_ire->ire_bucket); 6074 ire_refrele(save_ire); 6075 if (fire != NULL) { 6076 ire_refrele(fire); 6077 fire = NULL; 6078 } 6079 6080 /* 6081 * The resolution loop is re-entered if we 6082 * actually are in a multirouting case. 6083 */ 6084 if (copy_mp != NULL) { 6085 boolean_t need_resolve = 6086 ire_multirt_need_resolve_v6(v6dstp, 6087 MBLK_GETLABEL(copy_mp), ipst); 6088 if (!need_resolve) { 6089 MULTIRT_DEBUG_UNTAG(copy_mp); 6090 freemsg(copy_mp); 6091 copy_mp = NULL; 6092 } else { 6093 /* 6094 * ipif_lookup_group_v6() calls 6095 * ire_lookup_multi_v6() that uses 6096 * ire_ftable_lookup_v6() to find 6097 * an IRE_INTERFACE for the group. 6098 * In the multirt case, 6099 * ire_lookup_multi_v6() then invokes 6100 * ire_multirt_lookup_v6() to find 6101 * the next resolvable ire. 6102 * As a result, we obtain a new 6103 * interface, derived from the 6104 * next ire. 6105 */ 6106 if (ipif_held) { 6107 ipif_refrele(ipif); 6108 ipif_held = B_FALSE; 6109 } 6110 ipif = ipif_lookup_group_v6(v6dstp, 6111 zoneid, ipst); 6112 ip2dbg(("ip_newroute_ipif: " 6113 "multirt dst %08x, ipif %p\n", 6114 ntohl(V4_PART_OF_V6((*v6dstp))), 6115 (void *)ipif)); 6116 if (ipif != NULL) { 6117 ipif_held = B_TRUE; 6118 mp = copy_mp; 6119 copy_mp = NULL; 6120 multirt_resolve_next = 6121 B_TRUE; 6122 continue; 6123 } else { 6124 freemsg(copy_mp); 6125 } 6126 } 6127 } 6128 ill_refrele(dst_ill); 6129 if (ipif_held) { 6130 ipif_refrele(ipif); 6131 ipif_held = B_FALSE; 6132 } 6133 if (src_ipif != NULL) 6134 ipif_refrele(src_ipif); 6135 return; 6136 } 6137 case IRE_IF_RESOLVER: { 6138 6139 ASSERT(dst_ill->ill_isv6); 6140 6141 /* 6142 * We obtain a partial IRE_CACHE which we will pass 6143 * along with the resolver query. When the response 6144 * comes back it will be there ready for us to add. 6145 */ 6146 /* 6147 * the newly created ire will inherit the flags of the 6148 * parent ire, if any. 6149 */ 6150 ire = ire_create_v6( 6151 v6dstp, /* dest address */ 6152 &ipv6_all_ones, /* mask */ 6153 &src_ipif->ipif_v6src_addr, /* source address */ 6154 NULL, /* gateway address */ 6155 &save_ire->ire_max_frag, 6156 NULL, /* Fast Path header */ 6157 dst_ill->ill_rq, /* recv-from queue */ 6158 dst_ill->ill_wq, /* send-to queue */ 6159 IRE_CACHE, 6160 NULL, 6161 src_ipif, 6162 NULL, 6163 (fire != NULL) ? /* Parent handle */ 6164 fire->ire_phandle : 0, 6165 save_ire->ire_ihandle, /* Interface handle */ 6166 (fire != NULL) ? 6167 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6168 0, 6169 &ire_uinfo_null, 6170 NULL, 6171 NULL, 6172 ipst); 6173 6174 if (ire == NULL) { 6175 ire_refrele(save_ire); 6176 break; 6177 } 6178 6179 ire->ire_marks |= ire_marks; 6180 6181 /* Resolve and add ire to the ctable */ 6182 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6183 switch (err) { 6184 case 0: 6185 /* Prevent save_ire from getting deleted */ 6186 IRB_REFHOLD(save_ire->ire_bucket); 6187 /* Has it been removed already ? */ 6188 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6189 IRB_REFRELE(save_ire->ire_bucket); 6190 ire_refrele(save_ire); 6191 break; 6192 } 6193 /* 6194 * We have a resolved cache entry, 6195 * add in the IRE. 6196 */ 6197 ire_add_then_send(q, ire, first_mp); 6198 if (ip6_asp_table_held) { 6199 ip6_asp_table_refrele(ipst); 6200 ip6_asp_table_held = B_FALSE; 6201 } 6202 6203 /* Assert that it is not deleted yet. */ 6204 ASSERT(save_ire->ire_ptpn != NULL); 6205 IRB_REFRELE(save_ire->ire_bucket); 6206 ire_refrele(save_ire); 6207 if (fire != NULL) { 6208 ire_refrele(fire); 6209 fire = NULL; 6210 } 6211 6212 /* 6213 * The resolution loop is re-entered if we 6214 * actually are in a multirouting case. 6215 */ 6216 if (copy_mp != NULL) { 6217 boolean_t need_resolve = 6218 ire_multirt_need_resolve_v6(v6dstp, 6219 MBLK_GETLABEL(copy_mp), ipst); 6220 if (!need_resolve) { 6221 MULTIRT_DEBUG_UNTAG(copy_mp); 6222 freemsg(copy_mp); 6223 copy_mp = NULL; 6224 } else { 6225 /* 6226 * ipif_lookup_group_v6() calls 6227 * ire_lookup_multi_v6() that 6228 * uses ire_ftable_lookup_v6() 6229 * to find an IRE_INTERFACE for 6230 * the group. In the multirt 6231 * case, ire_lookup_multi_v6() 6232 * then invokes 6233 * ire_multirt_lookup_v6() to 6234 * find the next resolvable ire. 6235 * As a result, we obtain a new 6236 * interface, derived from the 6237 * next ire. 6238 */ 6239 if (ipif_held) { 6240 ipif_refrele(ipif); 6241 ipif_held = B_FALSE; 6242 } 6243 ipif = ipif_lookup_group_v6( 6244 v6dstp, zoneid, ipst); 6245 ip2dbg(("ip_newroute_ipif: " 6246 "multirt dst %08x, " 6247 "ipif %p\n", 6248 ntohl(V4_PART_OF_V6( 6249 (*v6dstp))), 6250 (void *)ipif)); 6251 if (ipif != NULL) { 6252 ipif_held = B_TRUE; 6253 mp = copy_mp; 6254 copy_mp = NULL; 6255 multirt_resolve_next = 6256 B_TRUE; 6257 continue; 6258 } else { 6259 freemsg(copy_mp); 6260 } 6261 } 6262 } 6263 ill_refrele(dst_ill); 6264 if (ipif_held) { 6265 ipif_refrele(ipif); 6266 ipif_held = B_FALSE; 6267 } 6268 if (src_ipif != NULL) 6269 ipif_refrele(src_ipif); 6270 return; 6271 6272 case EINPROGRESS: 6273 /* 6274 * mp was consumed - presumably queued. 6275 * No need for ire, presumably resolution is 6276 * in progress, and ire will be added when the 6277 * address is resolved. 6278 */ 6279 if (ip6_asp_table_held) { 6280 ip6_asp_table_refrele(ipst); 6281 ip6_asp_table_held = B_FALSE; 6282 } 6283 ire_delete(ire); 6284 ire_refrele(save_ire); 6285 if (fire != NULL) { 6286 ire_refrele(fire); 6287 fire = NULL; 6288 } 6289 6290 /* 6291 * The resolution loop is re-entered if we 6292 * actually are in a multirouting case. 6293 */ 6294 if (copy_mp != NULL) { 6295 boolean_t need_resolve = 6296 ire_multirt_need_resolve_v6(v6dstp, 6297 MBLK_GETLABEL(copy_mp), ipst); 6298 if (!need_resolve) { 6299 MULTIRT_DEBUG_UNTAG(copy_mp); 6300 freemsg(copy_mp); 6301 copy_mp = NULL; 6302 } else { 6303 /* 6304 * ipif_lookup_group_v6() calls 6305 * ire_lookup_multi_v6() that 6306 * uses ire_ftable_lookup_v6() 6307 * to find an IRE_INTERFACE for 6308 * the group. In the multirt 6309 * case, ire_lookup_multi_v6() 6310 * then invokes 6311 * ire_multirt_lookup_v6() to 6312 * find the next resolvable ire. 6313 * As a result, we obtain a new 6314 * interface, derived from the 6315 * next ire. 6316 */ 6317 if (ipif_held) { 6318 ipif_refrele(ipif); 6319 ipif_held = B_FALSE; 6320 } 6321 ipif = ipif_lookup_group_v6( 6322 v6dstp, zoneid, ipst); 6323 ip2dbg(("ip_newroute_ipif: " 6324 "multirt dst %08x, " 6325 "ipif %p\n", 6326 ntohl(V4_PART_OF_V6( 6327 (*v6dstp))), 6328 (void *)ipif)); 6329 if (ipif != NULL) { 6330 ipif_held = B_TRUE; 6331 mp = copy_mp; 6332 copy_mp = NULL; 6333 multirt_resolve_next = 6334 B_TRUE; 6335 continue; 6336 } else { 6337 freemsg(copy_mp); 6338 } 6339 } 6340 } 6341 ill_refrele(dst_ill); 6342 if (ipif_held) { 6343 ipif_refrele(ipif); 6344 ipif_held = B_FALSE; 6345 } 6346 if (src_ipif != NULL) 6347 ipif_refrele(src_ipif); 6348 return; 6349 default: 6350 /* Some transient error */ 6351 ire_refrele(save_ire); 6352 break; 6353 } 6354 break; 6355 } 6356 default: 6357 break; 6358 } 6359 if (ip6_asp_table_held) { 6360 ip6_asp_table_refrele(ipst); 6361 ip6_asp_table_held = B_FALSE; 6362 } 6363 } while (multirt_resolve_next); 6364 6365 err_ret: 6366 if (ip6_asp_table_held) 6367 ip6_asp_table_refrele(ipst); 6368 if (ire != NULL) 6369 ire_refrele(ire); 6370 if (fire != NULL) 6371 ire_refrele(fire); 6372 if (ipif != NULL && ipif_held) 6373 ipif_refrele(ipif); 6374 if (src_ipif != NULL) 6375 ipif_refrele(src_ipif); 6376 /* Multicast - no point in trying to generate ICMP error */ 6377 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6378 if (dst_ill != NULL) { 6379 ill = dst_ill; 6380 ill_held = B_TRUE; 6381 } 6382 if (mp->b_prev || mp->b_next) { 6383 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6384 } else { 6385 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6386 } 6387 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6388 mp->b_next = NULL; 6389 mp->b_prev = NULL; 6390 freemsg(first_mp); 6391 if (ill_held) 6392 ill_refrele(ill); 6393 } 6394 6395 /* 6396 * Parse and process any hop-by-hop or destination options. 6397 * 6398 * Assumes that q is an ill read queue so that ICMP errors for link-local 6399 * destinations are sent out the correct interface. 6400 * 6401 * Returns -1 if there was an error and mp has been consumed. 6402 * Returns 0 if no special action is needed. 6403 * Returns 1 if the packet contained a router alert option for this node 6404 * which is verified to be "interesting/known" for our implementation. 6405 * 6406 * XXX Note: In future as more hbh or dest options are defined, 6407 * it may be better to have different routines for hbh and dest 6408 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6409 * may have same value in different namespaces. Or is it same namespace ?? 6410 * Current code checks for each opt_type (other than pads) if it is in 6411 * the expected nexthdr (hbh or dest) 6412 */ 6413 static int 6414 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6415 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6416 { 6417 uint8_t opt_type; 6418 uint_t optused; 6419 int ret = 0; 6420 mblk_t *first_mp; 6421 const char *errtype; 6422 zoneid_t zoneid; 6423 ill_t *ill = q->q_ptr; 6424 6425 first_mp = mp; 6426 if (mp->b_datap->db_type == M_CTL) { 6427 mp = mp->b_cont; 6428 } 6429 6430 while (optlen != 0) { 6431 opt_type = *optptr; 6432 if (opt_type == IP6OPT_PAD1) { 6433 optused = 1; 6434 } else { 6435 if (optlen < 2) 6436 goto bad_opt; 6437 errtype = "malformed"; 6438 if (opt_type == ip6opt_ls) { 6439 optused = 2 + optptr[1]; 6440 if (optused > optlen) 6441 goto bad_opt; 6442 } else switch (opt_type) { 6443 case IP6OPT_PADN: 6444 /* 6445 * Note:We don't verify that (N-2) pad octets 6446 * are zero as required by spec. Adhere to 6447 * "be liberal in what you accept..." part of 6448 * implementation philosophy (RFC791,RFC1122) 6449 */ 6450 optused = 2 + optptr[1]; 6451 if (optused > optlen) 6452 goto bad_opt; 6453 break; 6454 6455 case IP6OPT_JUMBO: 6456 if (hdr_type != IPPROTO_HOPOPTS) 6457 goto opt_error; 6458 goto opt_error; /* XXX Not implemented! */ 6459 6460 case IP6OPT_ROUTER_ALERT: { 6461 struct ip6_opt_router *or; 6462 6463 if (hdr_type != IPPROTO_HOPOPTS) 6464 goto opt_error; 6465 optused = 2 + optptr[1]; 6466 if (optused > optlen) 6467 goto bad_opt; 6468 or = (struct ip6_opt_router *)optptr; 6469 /* Check total length and alignment */ 6470 if (optused != sizeof (*or) || 6471 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6472 goto opt_error; 6473 /* Check value */ 6474 switch (*((uint16_t *)or->ip6or_value)) { 6475 case IP6_ALERT_MLD: 6476 case IP6_ALERT_RSVP: 6477 ret = 1; 6478 } 6479 break; 6480 } 6481 case IP6OPT_HOME_ADDRESS: { 6482 /* 6483 * Minimal support for the home address option 6484 * (which is required by all IPv6 nodes). 6485 * Implement by just swapping the home address 6486 * and source address. 6487 * XXX Note: this has IPsec implications since 6488 * AH needs to take this into account. 6489 * Also, when IPsec is used we need to ensure 6490 * that this is only processed once 6491 * in the received packet (to avoid swapping 6492 * back and forth). 6493 * NOTE:This option processing is considered 6494 * to be unsafe and prone to a denial of 6495 * service attack. 6496 * The current processing is not safe even with 6497 * IPsec secured IP packets. Since the home 6498 * address option processing requirement still 6499 * is in the IETF draft and in the process of 6500 * being redefined for its usage, it has been 6501 * decided to turn off the option by default. 6502 * If this section of code needs to be executed, 6503 * ndd variable ip6_ignore_home_address_opt 6504 * should be set to 0 at the user's own risk. 6505 */ 6506 struct ip6_opt_home_address *oh; 6507 in6_addr_t tmp; 6508 6509 if (ipst->ips_ipv6_ignore_home_address_opt) 6510 goto opt_error; 6511 6512 if (hdr_type != IPPROTO_DSTOPTS) 6513 goto opt_error; 6514 optused = 2 + optptr[1]; 6515 if (optused > optlen) 6516 goto bad_opt; 6517 6518 /* 6519 * We did this dest. opt the first time 6520 * around (i.e. before AH processing). 6521 * If we've done AH... stop now. 6522 */ 6523 if (first_mp != mp) { 6524 ipsec_in_t *ii; 6525 6526 ii = (ipsec_in_t *)first_mp->b_rptr; 6527 if (ii->ipsec_in_ah_sa != NULL) 6528 break; 6529 } 6530 6531 oh = (struct ip6_opt_home_address *)optptr; 6532 /* Check total length and alignment */ 6533 if (optused < sizeof (*oh) || 6534 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6535 goto opt_error; 6536 /* Swap ip6_src and the home address */ 6537 tmp = ip6h->ip6_src; 6538 /* XXX Note: only 8 byte alignment option */ 6539 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6540 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6541 break; 6542 } 6543 6544 case IP6OPT_TUNNEL_LIMIT: 6545 if (hdr_type != IPPROTO_DSTOPTS) { 6546 goto opt_error; 6547 } 6548 optused = 2 + optptr[1]; 6549 if (optused > optlen) { 6550 goto bad_opt; 6551 } 6552 if (optused != 3) { 6553 goto opt_error; 6554 } 6555 break; 6556 6557 default: 6558 errtype = "unknown"; 6559 /* FALLTHROUGH */ 6560 opt_error: 6561 /* Determine which zone should send error */ 6562 zoneid = ipif_lookup_addr_zoneid_v6( 6563 &ip6h->ip6_dst, ill, ipst); 6564 switch (IP6OPT_TYPE(opt_type)) { 6565 case IP6OPT_TYPE_SKIP: 6566 optused = 2 + optptr[1]; 6567 if (optused > optlen) 6568 goto bad_opt; 6569 ip1dbg(("ip_process_options_v6: %s " 6570 "opt 0x%x skipped\n", 6571 errtype, opt_type)); 6572 break; 6573 case IP6OPT_TYPE_DISCARD: 6574 ip1dbg(("ip_process_options_v6: %s " 6575 "opt 0x%x; packet dropped\n", 6576 errtype, opt_type)); 6577 freemsg(first_mp); 6578 return (-1); 6579 case IP6OPT_TYPE_ICMP: 6580 if (zoneid == ALL_ZONES) { 6581 freemsg(first_mp); 6582 return (-1); 6583 } 6584 icmp_param_problem_v6(WR(q), first_mp, 6585 ICMP6_PARAMPROB_OPTION, 6586 (uint32_t)(optptr - 6587 (uint8_t *)ip6h), 6588 B_FALSE, B_FALSE, zoneid, ipst); 6589 return (-1); 6590 case IP6OPT_TYPE_FORCEICMP: 6591 if (zoneid == ALL_ZONES) { 6592 freemsg(first_mp); 6593 return (-1); 6594 } 6595 icmp_param_problem_v6(WR(q), first_mp, 6596 ICMP6_PARAMPROB_OPTION, 6597 (uint32_t)(optptr - 6598 (uint8_t *)ip6h), 6599 B_FALSE, B_TRUE, zoneid, ipst); 6600 return (-1); 6601 default: 6602 ASSERT(0); 6603 } 6604 } 6605 } 6606 optlen -= optused; 6607 optptr += optused; 6608 } 6609 return (ret); 6610 6611 bad_opt: 6612 /* Determine which zone should send error */ 6613 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6614 if (zoneid == ALL_ZONES) { 6615 freemsg(first_mp); 6616 } else { 6617 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6618 (uint32_t)(optptr - (uint8_t *)ip6h), 6619 B_FALSE, B_FALSE, zoneid, ipst); 6620 } 6621 return (-1); 6622 } 6623 6624 /* 6625 * Process a routing header that is not yet empty. 6626 * Only handles type 0 routing headers. 6627 */ 6628 static void 6629 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6630 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6631 { 6632 ip6_rthdr0_t *rthdr; 6633 uint_t ehdrlen; 6634 uint_t numaddr; 6635 in6_addr_t *addrptr; 6636 in6_addr_t tmp; 6637 ip_stack_t *ipst = ill->ill_ipst; 6638 6639 ASSERT(rth->ip6r_segleft != 0); 6640 6641 if (!ipst->ips_ipv6_forward_src_routed) { 6642 /* XXX Check for source routed out same interface? */ 6643 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6644 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6645 freemsg(hada_mp); 6646 freemsg(mp); 6647 return; 6648 } 6649 6650 if (rth->ip6r_type != 0) { 6651 if (hada_mp != NULL) 6652 goto hada_drop; 6653 /* Sent by forwarding path, and router is global zone */ 6654 icmp_param_problem_v6(WR(q), mp, 6655 ICMP6_PARAMPROB_HEADER, 6656 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6657 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6658 return; 6659 } 6660 rthdr = (ip6_rthdr0_t *)rth; 6661 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6662 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6663 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6664 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6665 if (rthdr->ip6r0_len & 0x1) { 6666 /* An odd length is impossible */ 6667 if (hada_mp != NULL) 6668 goto hada_drop; 6669 /* Sent by forwarding path, and router is global zone */ 6670 icmp_param_problem_v6(WR(q), mp, 6671 ICMP6_PARAMPROB_HEADER, 6672 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6673 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6674 return; 6675 } 6676 numaddr = rthdr->ip6r0_len / 2; 6677 if (rthdr->ip6r0_segleft > numaddr) { 6678 /* segleft exceeds number of addresses in routing header */ 6679 if (hada_mp != NULL) 6680 goto hada_drop; 6681 /* Sent by forwarding path, and router is global zone */ 6682 icmp_param_problem_v6(WR(q), mp, 6683 ICMP6_PARAMPROB_HEADER, 6684 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6685 (uchar_t *)ip6h), 6686 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6687 return; 6688 } 6689 addrptr += (numaddr - rthdr->ip6r0_segleft); 6690 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6691 IN6_IS_ADDR_MULTICAST(addrptr)) { 6692 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6693 freemsg(hada_mp); 6694 freemsg(mp); 6695 return; 6696 } 6697 /* Swap */ 6698 tmp = *addrptr; 6699 *addrptr = ip6h->ip6_dst; 6700 ip6h->ip6_dst = tmp; 6701 rthdr->ip6r0_segleft--; 6702 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6703 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6704 if (hada_mp != NULL) 6705 goto hada_drop; 6706 /* Sent by forwarding path, and router is global zone */ 6707 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6708 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6709 return; 6710 } 6711 if (ip_check_v6_mblk(mp, ill) == 0) { 6712 ip6h = (ip6_t *)mp->b_rptr; 6713 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6714 } 6715 return; 6716 hada_drop: 6717 /* IPsec kstats: bean counter? */ 6718 freemsg(hada_mp); 6719 freemsg(mp); 6720 } 6721 6722 /* 6723 * Read side put procedure for IPv6 module. 6724 */ 6725 void 6726 ip_rput_v6(queue_t *q, mblk_t *mp) 6727 { 6728 mblk_t *first_mp; 6729 mblk_t *hada_mp = NULL; 6730 ip6_t *ip6h; 6731 boolean_t ll_multicast = B_FALSE; 6732 boolean_t mctl_present = B_FALSE; 6733 ill_t *ill; 6734 struct iocblk *iocp; 6735 uint_t flags = 0; 6736 mblk_t *dl_mp; 6737 ip_stack_t *ipst; 6738 6739 ill = (ill_t *)q->q_ptr; 6740 ipst = ill->ill_ipst; 6741 if (ill->ill_state_flags & ILL_CONDEMNED) { 6742 union DL_primitives *dl; 6743 6744 dl = (union DL_primitives *)mp->b_rptr; 6745 /* 6746 * Things are opening or closing - only accept DLPI 6747 * ack messages. If the stream is closing and ip_wsrv 6748 * has completed, ip_close is out of the qwait, but has 6749 * not yet completed qprocsoff. Don't proceed any further 6750 * because the ill has been cleaned up and things hanging 6751 * off the ill have been freed. 6752 */ 6753 if ((mp->b_datap->db_type != M_PCPROTO) || 6754 (dl->dl_primitive == DL_UNITDATA_IND)) { 6755 inet_freemsg(mp); 6756 return; 6757 } 6758 } 6759 6760 dl_mp = NULL; 6761 switch (mp->b_datap->db_type) { 6762 case M_DATA: { 6763 int hlen; 6764 uchar_t *ucp; 6765 struct ether_header *eh; 6766 dl_unitdata_ind_t *dui; 6767 6768 /* 6769 * This is a work-around for CR 6451644, a bug in Nemo. It 6770 * should be removed when that problem is fixed. 6771 */ 6772 if (ill->ill_mactype == DL_ETHER && 6773 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6774 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6775 ucp[-2] == (IP6_DL_SAP >> 8)) { 6776 if (hlen >= sizeof (struct ether_vlan_header) && 6777 ucp[-5] == 0 && ucp[-6] == 0x81) 6778 ucp -= sizeof (struct ether_vlan_header); 6779 else 6780 ucp -= sizeof (struct ether_header); 6781 /* 6782 * If it's a group address, then fabricate a 6783 * DL_UNITDATA_IND message. 6784 */ 6785 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6786 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6787 BPRI_HI)) != NULL) { 6788 eh = (struct ether_header *)ucp; 6789 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6790 DB_TYPE(dl_mp) = M_PROTO; 6791 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6792 dui->dl_primitive = DL_UNITDATA_IND; 6793 dui->dl_dest_addr_length = 8; 6794 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6795 dui->dl_src_addr_length = 8; 6796 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6797 8; 6798 dui->dl_group_address = 1; 6799 ucp = (uchar_t *)(dui + 1); 6800 if (ill->ill_sap_length > 0) 6801 ucp += ill->ill_sap_length; 6802 bcopy(&eh->ether_dhost, ucp, 6); 6803 bcopy(&eh->ether_shost, ucp + 8, 6); 6804 ucp = (uchar_t *)(dui + 1); 6805 if (ill->ill_sap_length < 0) 6806 ucp += 8 + ill->ill_sap_length; 6807 bcopy(&eh->ether_type, ucp, 2); 6808 bcopy(&eh->ether_type, ucp + 8, 2); 6809 } 6810 } 6811 break; 6812 } 6813 6814 case M_PROTO: 6815 case M_PCPROTO: 6816 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6817 DL_UNITDATA_IND) { 6818 /* Go handle anything other than data elsewhere. */ 6819 ip_rput_dlpi(q, mp); 6820 return; 6821 } 6822 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6823 ll_multicast = dlur->dl_group_address; 6824 #undef dlur 6825 /* Save the DLPI header. */ 6826 dl_mp = mp; 6827 mp = mp->b_cont; 6828 dl_mp->b_cont = NULL; 6829 break; 6830 case M_BREAK: 6831 panic("ip_rput_v6: got an M_BREAK"); 6832 /*NOTREACHED*/ 6833 case M_IOCACK: 6834 iocp = (struct iocblk *)mp->b_rptr; 6835 switch (iocp->ioc_cmd) { 6836 case DL_IOC_HDR_INFO: 6837 ill = (ill_t *)q->q_ptr; 6838 ill_fastpath_ack(ill, mp); 6839 return; 6840 6841 case SIOCGTUNPARAM: 6842 case OSIOCGTUNPARAM: 6843 ip_rput_other(NULL, q, mp, NULL); 6844 return; 6845 6846 case SIOCSTUNPARAM: 6847 case OSIOCSTUNPARAM: 6848 /* Go through qwriter */ 6849 break; 6850 default: 6851 putnext(q, mp); 6852 return; 6853 } 6854 /* FALLTHRU */ 6855 case M_ERROR: 6856 case M_HANGUP: 6857 mutex_enter(&ill->ill_lock); 6858 if (ill->ill_state_flags & ILL_CONDEMNED) { 6859 mutex_exit(&ill->ill_lock); 6860 freemsg(mp); 6861 return; 6862 } 6863 ill_refhold_locked(ill); 6864 mutex_exit(&ill->ill_lock); 6865 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6866 return; 6867 case M_CTL: 6868 if ((MBLKL(mp) > sizeof (int)) && 6869 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6870 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6871 mctl_present = B_TRUE; 6872 break; 6873 } 6874 putnext(q, mp); 6875 return; 6876 case M_IOCNAK: 6877 iocp = (struct iocblk *)mp->b_rptr; 6878 switch (iocp->ioc_cmd) { 6879 case DL_IOC_HDR_INFO: 6880 case SIOCGTUNPARAM: 6881 case OSIOCGTUNPARAM: 6882 ip_rput_other(NULL, q, mp, NULL); 6883 return; 6884 6885 case SIOCSTUNPARAM: 6886 case OSIOCSTUNPARAM: 6887 mutex_enter(&ill->ill_lock); 6888 if (ill->ill_state_flags & ILL_CONDEMNED) { 6889 mutex_exit(&ill->ill_lock); 6890 freemsg(mp); 6891 return; 6892 } 6893 ill_refhold_locked(ill); 6894 mutex_exit(&ill->ill_lock); 6895 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6896 return; 6897 default: 6898 break; 6899 } 6900 /* FALLTHRU */ 6901 default: 6902 putnext(q, mp); 6903 return; 6904 } 6905 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6906 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6907 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6908 /* 6909 * if db_ref > 1 then copymsg and free original. Packet may be 6910 * changed and do not want other entity who has a reference to this 6911 * message to trip over the changes. This is a blind change because 6912 * trying to catch all places that might change packet is too 6913 * difficult (since it may be a module above this one). 6914 */ 6915 if (mp->b_datap->db_ref > 1) { 6916 mblk_t *mp1; 6917 6918 mp1 = copymsg(mp); 6919 freemsg(mp); 6920 if (mp1 == NULL) { 6921 first_mp = NULL; 6922 goto discard; 6923 } 6924 mp = mp1; 6925 } 6926 first_mp = mp; 6927 if (mctl_present) { 6928 hada_mp = first_mp; 6929 mp = first_mp->b_cont; 6930 } 6931 6932 if (ip_check_v6_mblk(mp, ill) == -1) 6933 return; 6934 6935 ip6h = (ip6_t *)mp->b_rptr; 6936 6937 DTRACE_PROBE4(ip6__physical__in__start, 6938 ill_t *, ill, ill_t *, NULL, 6939 ip6_t *, ip6h, mblk_t *, first_mp); 6940 6941 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6942 ipst->ips_ipv6firewall_physical_in, 6943 ill, NULL, ip6h, first_mp, mp, ipst); 6944 6945 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6946 6947 if (first_mp == NULL) 6948 return; 6949 6950 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6951 IPV6_DEFAULT_VERS_AND_FLOW) { 6952 /* 6953 * It may be a bit too expensive to do this mapped address 6954 * check here, but in the interest of robustness, it seems 6955 * like the correct place. 6956 * TODO: Avoid this check for e.g. connected TCP sockets 6957 */ 6958 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6959 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6960 goto discard; 6961 } 6962 6963 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6964 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6965 goto discard; 6966 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6967 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6968 goto discard; 6969 } 6970 6971 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6972 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6973 } else { 6974 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6975 goto discard; 6976 } 6977 freemsg(dl_mp); 6978 return; 6979 6980 discard: 6981 if (dl_mp != NULL) 6982 freeb(dl_mp); 6983 freemsg(first_mp); 6984 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6985 } 6986 6987 /* 6988 * Walk through the IPv6 packet in mp and see if there's an AH header 6989 * in it. See if the AH header needs to get done before other headers in 6990 * the packet. (Worker function for ipsec_early_ah_v6().) 6991 */ 6992 #define IPSEC_HDR_DONT_PROCESS 0 6993 #define IPSEC_HDR_PROCESS 1 6994 #define IPSEC_MEMORY_ERROR 2 6995 static int 6996 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6997 { 6998 uint_t length; 6999 uint_t ehdrlen; 7000 uint8_t *whereptr; 7001 uint8_t *endptr; 7002 uint8_t *nexthdrp; 7003 ip6_dest_t *desthdr; 7004 ip6_rthdr_t *rthdr; 7005 ip6_t *ip6h; 7006 7007 /* 7008 * For now just pullup everything. In general, the less pullups, 7009 * the better, but there's so much squirrelling through anyway, 7010 * it's just easier this way. 7011 */ 7012 if (!pullupmsg(mp, -1)) { 7013 return (IPSEC_MEMORY_ERROR); 7014 } 7015 7016 ip6h = (ip6_t *)mp->b_rptr; 7017 length = IPV6_HDR_LEN; 7018 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 7019 endptr = mp->b_wptr; 7020 7021 /* 7022 * We can't just use the argument nexthdr in the place 7023 * of nexthdrp becaue we don't dereference nexthdrp 7024 * till we confirm whether it is a valid address. 7025 */ 7026 nexthdrp = &ip6h->ip6_nxt; 7027 while (whereptr < endptr) { 7028 /* Is there enough left for len + nexthdr? */ 7029 if (whereptr + MIN_EHDR_LEN > endptr) 7030 return (IPSEC_MEMORY_ERROR); 7031 7032 switch (*nexthdrp) { 7033 case IPPROTO_HOPOPTS: 7034 case IPPROTO_DSTOPTS: 7035 /* Assumes the headers are identical for hbh and dst */ 7036 desthdr = (ip6_dest_t *)whereptr; 7037 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7038 if ((uchar_t *)desthdr + ehdrlen > endptr) 7039 return (IPSEC_MEMORY_ERROR); 7040 /* 7041 * Return DONT_PROCESS because of potential Mobile IPv6 7042 * cruft for destination options. 7043 */ 7044 if (*nexthdrp == IPPROTO_DSTOPTS) 7045 return (IPSEC_HDR_DONT_PROCESS); 7046 nexthdrp = &desthdr->ip6d_nxt; 7047 break; 7048 case IPPROTO_ROUTING: 7049 rthdr = (ip6_rthdr_t *)whereptr; 7050 7051 /* 7052 * If there's more hops left on the routing header, 7053 * return now with DON'T PROCESS. 7054 */ 7055 if (rthdr->ip6r_segleft > 0) 7056 return (IPSEC_HDR_DONT_PROCESS); 7057 7058 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7059 if ((uchar_t *)rthdr + ehdrlen > endptr) 7060 return (IPSEC_MEMORY_ERROR); 7061 nexthdrp = &rthdr->ip6r_nxt; 7062 break; 7063 case IPPROTO_FRAGMENT: 7064 /* Wait for reassembly */ 7065 return (IPSEC_HDR_DONT_PROCESS); 7066 case IPPROTO_AH: 7067 *nexthdr = IPPROTO_AH; 7068 return (IPSEC_HDR_PROCESS); 7069 case IPPROTO_NONE: 7070 /* No next header means we're finished */ 7071 default: 7072 return (IPSEC_HDR_DONT_PROCESS); 7073 } 7074 length += ehdrlen; 7075 whereptr += ehdrlen; 7076 } 7077 panic("ipsec_needs_processing_v6"); 7078 /*NOTREACHED*/ 7079 } 7080 7081 /* 7082 * Path for AH if options are present. If this is the first time we are 7083 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7084 * Otherwise, just fanout. Return value answers the boolean question: 7085 * "Did I consume the mblk you sent me?" 7086 * 7087 * Sometimes AH needs to be done before other IPv6 headers for security 7088 * reasons. This function (and its ipsec_needs_processing_v6() above) 7089 * indicates if that is so, and fans out to the appropriate IPsec protocol 7090 * for the datagram passed in. 7091 */ 7092 static boolean_t 7093 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7094 ill_t *ill, mblk_t *hada_mp, zoneid_t zoneid) 7095 { 7096 mblk_t *mp; 7097 uint8_t nexthdr; 7098 ipsec_in_t *ii = NULL; 7099 ah_t *ah; 7100 ipsec_status_t ipsec_rc; 7101 ip_stack_t *ipst = ill->ill_ipst; 7102 netstack_t *ns = ipst->ips_netstack; 7103 ipsec_stack_t *ipss = ns->netstack_ipsec; 7104 7105 ASSERT((hada_mp == NULL) || (!mctl_present)); 7106 7107 switch (ipsec_needs_processing_v6( 7108 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7109 case IPSEC_MEMORY_ERROR: 7110 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7111 freemsg(hada_mp); 7112 freemsg(first_mp); 7113 return (B_TRUE); 7114 case IPSEC_HDR_DONT_PROCESS: 7115 return (B_FALSE); 7116 } 7117 7118 /* Default means send it to AH! */ 7119 ASSERT(nexthdr == IPPROTO_AH); 7120 if (!mctl_present) { 7121 mp = first_mp; 7122 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 7123 if (first_mp == NULL) { 7124 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7125 "allocation failure.\n")); 7126 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7127 freemsg(hada_mp); 7128 freemsg(mp); 7129 return (B_TRUE); 7130 } 7131 /* 7132 * Store the ill_index so that when we come back 7133 * from IPSEC we ride on the same queue. 7134 */ 7135 ii = (ipsec_in_t *)first_mp->b_rptr; 7136 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7137 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7138 first_mp->b_cont = mp; 7139 } 7140 /* 7141 * Cache hardware acceleration info. 7142 */ 7143 if (hada_mp != NULL) { 7144 ASSERT(ii != NULL); 7145 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7146 "caching data attr.\n")); 7147 ii->ipsec_in_accelerated = B_TRUE; 7148 ii->ipsec_in_da = hada_mp; 7149 } 7150 7151 if (!ipsec_loaded(ipss)) { 7152 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 7153 return (B_TRUE); 7154 } 7155 7156 ah = ipsec_inbound_ah_sa(first_mp, ns); 7157 if (ah == NULL) 7158 return (B_TRUE); 7159 ASSERT(ii->ipsec_in_ah_sa != NULL); 7160 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7161 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7162 7163 switch (ipsec_rc) { 7164 case IPSEC_STATUS_SUCCESS: 7165 /* we're done with IPsec processing, send it up */ 7166 ip_fanout_proto_again(first_mp, ill, ill, NULL); 7167 break; 7168 case IPSEC_STATUS_FAILED: 7169 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 7170 break; 7171 case IPSEC_STATUS_PENDING: 7172 /* no action needed */ 7173 break; 7174 } 7175 return (B_TRUE); 7176 } 7177 7178 /* 7179 * Validate the IPv6 mblk for alignment. 7180 */ 7181 int 7182 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7183 { 7184 int pkt_len, ip6_len; 7185 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7186 7187 /* check for alignment and full IPv6 header */ 7188 if (!OK_32PTR((uchar_t *)ip6h) || 7189 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7190 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7191 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7192 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7193 freemsg(mp); 7194 return (-1); 7195 } 7196 ip6h = (ip6_t *)mp->b_rptr; 7197 } 7198 7199 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7200 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7201 7202 if (mp->b_cont == NULL) 7203 pkt_len = mp->b_wptr - mp->b_rptr; 7204 else 7205 pkt_len = msgdsize(mp); 7206 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7207 7208 /* 7209 * Check for bogus (too short packet) and packet which 7210 * was padded by the link layer. 7211 */ 7212 if (ip6_len != pkt_len) { 7213 ssize_t diff; 7214 7215 if (ip6_len > pkt_len) { 7216 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7217 ip6_len, pkt_len)); 7218 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7219 freemsg(mp); 7220 return (-1); 7221 } 7222 diff = (ssize_t)(pkt_len - ip6_len); 7223 7224 if (!adjmsg(mp, -diff)) { 7225 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7226 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7227 freemsg(mp); 7228 return (-1); 7229 } 7230 } 7231 return (0); 7232 } 7233 7234 /* 7235 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7236 * ip_rput_v6 has already verified alignment, the min length, the version, 7237 * and db_ref = 1. 7238 * 7239 * The ill passed in (the arg named inill) is the ill that the packet 7240 * actually arrived on. We need to remember this when saving the 7241 * input interface index into potential IPV6_PKTINFO data in 7242 * ip_add_info_v6(). 7243 * 7244 * This routine doesn't free dl_mp; that's the caller's responsibility on 7245 * return. (Note that the callers are complex enough that there's no tail 7246 * recursion here anyway.) 7247 */ 7248 void 7249 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7250 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7251 { 7252 ire_t *ire = NULL; 7253 ill_t *ill = inill; 7254 ill_t *outill; 7255 ipif_t *ipif; 7256 uint8_t *whereptr; 7257 uint8_t nexthdr; 7258 uint16_t remlen; 7259 uint_t prev_nexthdr_offset; 7260 uint_t used; 7261 size_t old_pkt_len; 7262 size_t pkt_len; 7263 uint16_t ip6_len; 7264 uint_t hdr_len; 7265 boolean_t mctl_present; 7266 mblk_t *first_mp; 7267 mblk_t *first_mp1; 7268 boolean_t no_forward; 7269 ip6_hbh_t *hbhhdr; 7270 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7271 conn_t *connp; 7272 ilm_t *ilm; 7273 uint32_t ports; 7274 zoneid_t zoneid = GLOBAL_ZONEID; 7275 uint16_t hck_flags, reass_hck_flags; 7276 uint32_t reass_sum; 7277 boolean_t cksum_err; 7278 mblk_t *mp1; 7279 ip_stack_t *ipst = inill->ill_ipst; 7280 7281 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7282 7283 if (hada_mp != NULL) { 7284 /* 7285 * It's an IPsec accelerated packet. 7286 * Keep a pointer to the data attributes around until 7287 * we allocate the ipsecinfo structure. 7288 */ 7289 IPSECHW_DEBUG(IPSECHW_PKT, 7290 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7291 hada_mp->b_cont = NULL; 7292 /* 7293 * Since it is accelerated, it came directly from 7294 * the ill. 7295 */ 7296 ASSERT(mctl_present == B_FALSE); 7297 ASSERT(mp->b_datap->db_type != M_CTL); 7298 } 7299 7300 ip6h = (ip6_t *)mp->b_rptr; 7301 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7302 old_pkt_len = pkt_len = ip6_len; 7303 7304 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7305 hck_flags = DB_CKSUMFLAGS(mp); 7306 else 7307 hck_flags = 0; 7308 7309 /* Clear checksum flags in case we need to forward */ 7310 DB_CKSUMFLAGS(mp) = 0; 7311 reass_sum = reass_hck_flags = 0; 7312 7313 nexthdr = ip6h->ip6_nxt; 7314 7315 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7316 (uchar_t *)ip6h); 7317 whereptr = (uint8_t *)&ip6h[1]; 7318 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7319 7320 /* Process hop by hop header options */ 7321 if (nexthdr == IPPROTO_HOPOPTS) { 7322 uint_t ehdrlen; 7323 uint8_t *optptr; 7324 7325 if (remlen < MIN_EHDR_LEN) 7326 goto pkt_too_short; 7327 if (mp->b_cont != NULL && 7328 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7329 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7330 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7331 freemsg(hada_mp); 7332 freemsg(first_mp); 7333 return; 7334 } 7335 ip6h = (ip6_t *)mp->b_rptr; 7336 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7337 } 7338 hbhhdr = (ip6_hbh_t *)whereptr; 7339 nexthdr = hbhhdr->ip6h_nxt; 7340 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7341 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7342 7343 if (remlen < ehdrlen) 7344 goto pkt_too_short; 7345 if (mp->b_cont != NULL && 7346 whereptr + ehdrlen > mp->b_wptr) { 7347 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7348 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7349 freemsg(hada_mp); 7350 freemsg(first_mp); 7351 return; 7352 } 7353 ip6h = (ip6_t *)mp->b_rptr; 7354 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7355 hbhhdr = (ip6_hbh_t *)whereptr; 7356 } 7357 7358 optptr = whereptr + 2; 7359 whereptr += ehdrlen; 7360 remlen -= ehdrlen; 7361 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7362 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7363 case -1: 7364 /* 7365 * Packet has been consumed and any 7366 * needed ICMP messages sent. 7367 */ 7368 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7369 freemsg(hada_mp); 7370 return; 7371 case 0: 7372 /* no action needed */ 7373 break; 7374 case 1: 7375 /* Known router alert */ 7376 goto ipv6forus; 7377 } 7378 } 7379 7380 /* 7381 * Attach any necessary label information to this packet. 7382 */ 7383 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7384 if (ip6opt_ls != 0) 7385 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7386 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7387 freemsg(hada_mp); 7388 freemsg(first_mp); 7389 return; 7390 } 7391 7392 /* 7393 * On incoming v6 multicast packets we will bypass the ire table, 7394 * and assume that the read queue corresponds to the targetted 7395 * interface. 7396 * 7397 * The effect of this is the same as the IPv4 original code, but is 7398 * much cleaner I think. See ip_rput for how that was done. 7399 */ 7400 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7401 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7402 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7403 /* 7404 * XXX TODO Give to mrouted to for multicast forwarding. 7405 */ 7406 ILM_WALKER_HOLD(ill); 7407 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7408 ILM_WALKER_RELE(ill); 7409 if (ilm == NULL) { 7410 if (ip_debug > 3) { 7411 /* ip2dbg */ 7412 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7413 " which is not for us: %s\n", AF_INET6, 7414 &ip6h->ip6_dst); 7415 } 7416 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7417 freemsg(hada_mp); 7418 freemsg(first_mp); 7419 return; 7420 } 7421 if (ip_debug > 3) { 7422 /* ip2dbg */ 7423 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7424 AF_INET6, &ip6h->ip6_dst); 7425 } 7426 zoneid = GLOBAL_ZONEID; 7427 goto ipv6forus; 7428 } 7429 7430 ipif = ill->ill_ipif; 7431 7432 /* 7433 * If a packet was received on an interface that is a 6to4 tunnel, 7434 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7435 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7436 * the 6to4 prefix of the address configured on the receiving interface. 7437 * Otherwise, the packet was delivered to this interface in error and 7438 * the packet must be dropped. 7439 */ 7440 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7441 7442 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7443 &ip6h->ip6_dst)) { 7444 if (ip_debug > 2) { 7445 /* ip1dbg */ 7446 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7447 "addressed packet which is not for us: " 7448 "%s\n", AF_INET6, &ip6h->ip6_dst); 7449 } 7450 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7451 freemsg(first_mp); 7452 return; 7453 } 7454 } 7455 7456 /* 7457 * Find an ire that matches destination. For link-local addresses 7458 * we have to match the ill. 7459 * TBD for site local addresses. 7460 */ 7461 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7462 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7463 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7464 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 7465 } else { 7466 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7467 MBLK_GETLABEL(mp), ipst); 7468 } 7469 if (ire == NULL) { 7470 /* 7471 * No matching IRE found. Mark this packet as having 7472 * originated externally. 7473 */ 7474 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7475 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7476 if (!(ill->ill_flags & ILLF_ROUTER)) { 7477 BUMP_MIB(ill->ill_ip_mib, 7478 ipIfStatsInAddrErrors); 7479 } 7480 freemsg(hada_mp); 7481 freemsg(first_mp); 7482 return; 7483 } 7484 if (ip6h->ip6_hops <= 1) { 7485 if (hada_mp != NULL) 7486 goto hada_drop; 7487 /* Sent by forwarding path, and router is global zone */ 7488 icmp_time_exceeded_v6(WR(q), first_mp, 7489 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7490 GLOBAL_ZONEID, ipst); 7491 return; 7492 } 7493 /* 7494 * Per RFC 3513 section 2.5.2, we must not forward packets with 7495 * an unspecified source address. 7496 */ 7497 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7498 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7499 freemsg(hada_mp); 7500 freemsg(first_mp); 7501 return; 7502 } 7503 mp->b_prev = (mblk_t *)(uintptr_t) 7504 ill->ill_phyint->phyint_ifindex; 7505 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7506 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7507 ALL_ZONES, ipst); 7508 return; 7509 } 7510 /* we have a matching IRE */ 7511 if (ire->ire_stq != NULL) { 7512 ill_group_t *ill_group; 7513 ill_group_t *ire_group; 7514 7515 /* 7516 * To be quicker, we may wish not to chase pointers 7517 * (ire->ire_ipif->ipif_ill...) and instead store the 7518 * forwarding policy in the ire. An unfortunate side- 7519 * effect of this would be requiring an ire flush whenever 7520 * the ILLF_ROUTER flag changes. For now, chase pointers 7521 * once and store in the boolean no_forward. 7522 * 7523 * This appears twice to keep it out of the non-forwarding, 7524 * yes-it's-for-us-on-the-right-interface case. 7525 */ 7526 no_forward = ((ill->ill_flags & 7527 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7528 7529 7530 ASSERT(first_mp == mp); 7531 /* 7532 * This ire has a send-to queue - forward the packet. 7533 */ 7534 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7535 freemsg(hada_mp); 7536 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7537 if (no_forward) { 7538 BUMP_MIB(ill->ill_ip_mib, 7539 ipIfStatsInAddrErrors); 7540 } 7541 freemsg(mp); 7542 ire_refrele(ire); 7543 return; 7544 } 7545 /* 7546 * ipIfStatsHCInForwDatagrams should only be increment if there 7547 * will be an attempt to forward the packet, which is why we 7548 * increment after the above condition has been checked. 7549 */ 7550 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7551 if (ip6h->ip6_hops <= 1) { 7552 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7553 /* Sent by forwarding path, and router is global zone */ 7554 icmp_time_exceeded_v6(WR(q), mp, 7555 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7556 GLOBAL_ZONEID, ipst); 7557 ire_refrele(ire); 7558 return; 7559 } 7560 /* 7561 * Per RFC 3513 section 2.5.2, we must not forward packets with 7562 * an unspecified source address. 7563 */ 7564 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7565 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7566 freemsg(mp); 7567 ire_refrele(ire); 7568 return; 7569 } 7570 7571 if (is_system_labeled()) { 7572 mblk_t *mp1; 7573 7574 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7575 BUMP_MIB(ill->ill_ip_mib, 7576 ipIfStatsForwProhibits); 7577 freemsg(mp); 7578 ire_refrele(ire); 7579 return; 7580 } 7581 /* Size may have changed */ 7582 mp = mp1; 7583 ip6h = (ip6_t *)mp->b_rptr; 7584 pkt_len = msgdsize(mp); 7585 } 7586 7587 if (pkt_len > ire->ire_max_frag) { 7588 int max_frag = ire->ire_max_frag; 7589 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7590 /* 7591 * Handle labeled packet resizing. 7592 */ 7593 if (is_system_labeled()) { 7594 max_frag = tsol_pmtu_adjust(mp, max_frag, 7595 pkt_len - old_pkt_len, AF_INET6); 7596 } 7597 7598 /* Sent by forwarding path, and router is global zone */ 7599 icmp_pkt2big_v6(WR(q), mp, max_frag, 7600 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7601 ire_refrele(ire); 7602 return; 7603 } 7604 7605 /* 7606 * Check to see if we're forwarding the packet to a 7607 * different link from which it came. If so, check the 7608 * source and destination addresses since routers must not 7609 * forward any packets with link-local source or 7610 * destination addresses to other links. Otherwise (if 7611 * we're forwarding onto the same link), conditionally send 7612 * a redirect message. 7613 */ 7614 ill_group = ill->ill_group; 7615 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7616 if (ire->ire_rfq != q && (ill_group == NULL || 7617 ill_group != ire_group)) { 7618 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7619 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7620 BUMP_MIB(ill->ill_ip_mib, 7621 ipIfStatsInAddrErrors); 7622 freemsg(mp); 7623 ire_refrele(ire); 7624 return; 7625 } 7626 /* TBD add site-local check at site boundary? */ 7627 } else if (ipst->ips_ipv6_send_redirects) { 7628 in6_addr_t *v6targ; 7629 in6_addr_t gw_addr_v6; 7630 ire_t *src_ire_v6 = NULL; 7631 7632 /* 7633 * Don't send a redirect when forwarding a source 7634 * routed packet. 7635 */ 7636 if (ip_source_routed_v6(ip6h, mp, ipst)) 7637 goto forward; 7638 7639 mutex_enter(&ire->ire_lock); 7640 gw_addr_v6 = ire->ire_gateway_addr_v6; 7641 mutex_exit(&ire->ire_lock); 7642 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7643 v6targ = &gw_addr_v6; 7644 /* 7645 * We won't send redirects to a router 7646 * that doesn't have a link local 7647 * address, but will forward. 7648 */ 7649 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7650 BUMP_MIB(ill->ill_ip_mib, 7651 ipIfStatsInAddrErrors); 7652 goto forward; 7653 } 7654 } else { 7655 v6targ = &ip6h->ip6_dst; 7656 } 7657 7658 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7659 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7660 ALL_ZONES, 0, NULL, 7661 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7662 ipst); 7663 7664 if (src_ire_v6 != NULL) { 7665 /* 7666 * The source is directly connected. 7667 */ 7668 mp1 = copymsg(mp); 7669 if (mp1 != NULL) { 7670 icmp_send_redirect_v6(WR(q), 7671 mp1, v6targ, &ip6h->ip6_dst, 7672 ill, B_FALSE); 7673 } 7674 ire_refrele(src_ire_v6); 7675 } 7676 } 7677 7678 forward: 7679 /* Hoplimit verified above */ 7680 ip6h->ip6_hops--; 7681 7682 outill = ire->ire_ipif->ipif_ill; 7683 7684 DTRACE_PROBE4(ip6__forwarding__start, 7685 ill_t *, inill, ill_t *, outill, 7686 ip6_t *, ip6h, mblk_t *, mp); 7687 7688 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7689 ipst->ips_ipv6firewall_forwarding, 7690 inill, outill, ip6h, mp, mp, ipst); 7691 7692 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7693 7694 if (mp != NULL) { 7695 UPDATE_IB_PKT_COUNT(ire); 7696 ire->ire_last_used_time = lbolt; 7697 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7698 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7699 } 7700 IRE_REFRELE(ire); 7701 return; 7702 } 7703 7704 /* 7705 * Need to put on correct queue for reassembly to find it. 7706 * No need to use put() since reassembly has its own locks. 7707 * Note: multicast packets and packets destined to addresses 7708 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7709 * the arriving ill. Unlike the IPv4 case, enabling strict 7710 * destination multihoming will prevent accepting packets 7711 * addressed to an IRE_LOCAL on lo0. 7712 */ 7713 if (ire->ire_rfq != q) { 7714 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7715 == NULL) { 7716 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7717 freemsg(hada_mp); 7718 freemsg(first_mp); 7719 return; 7720 } 7721 if (ire->ire_rfq != NULL) { 7722 q = ire->ire_rfq; 7723 ill = (ill_t *)q->q_ptr; 7724 ASSERT(ill != NULL); 7725 } 7726 } 7727 7728 zoneid = ire->ire_zoneid; 7729 UPDATE_IB_PKT_COUNT(ire); 7730 ire->ire_last_used_time = lbolt; 7731 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7732 ire_refrele(ire); 7733 ire = NULL; 7734 ipv6forus: 7735 /* 7736 * Looks like this packet is for us one way or another. 7737 * This is where we'll process destination headers etc. 7738 */ 7739 for (; ; ) { 7740 switch (nexthdr) { 7741 case IPPROTO_TCP: { 7742 uint16_t *up; 7743 uint32_t sum; 7744 int offset; 7745 7746 hdr_len = pkt_len - remlen; 7747 7748 if (hada_mp != NULL) { 7749 ip0dbg(("tcp hada drop\n")); 7750 goto hada_drop; 7751 } 7752 7753 7754 /* TCP needs all of the TCP header */ 7755 if (remlen < TCP_MIN_HEADER_LENGTH) 7756 goto pkt_too_short; 7757 if (mp->b_cont != NULL && 7758 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7759 if (!pullupmsg(mp, 7760 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7761 BUMP_MIB(ill->ill_ip_mib, 7762 ipIfStatsInDiscards); 7763 freemsg(first_mp); 7764 return; 7765 } 7766 hck_flags = 0; 7767 ip6h = (ip6_t *)mp->b_rptr; 7768 whereptr = (uint8_t *)ip6h + hdr_len; 7769 } 7770 /* 7771 * Extract the offset field from the TCP header. 7772 */ 7773 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7774 if (offset != 5) { 7775 if (offset < 5) { 7776 ip1dbg(("ip_rput_data_v6: short " 7777 "TCP data offset")); 7778 BUMP_MIB(ill->ill_ip_mib, 7779 ipIfStatsInDiscards); 7780 freemsg(first_mp); 7781 return; 7782 } 7783 /* 7784 * There must be TCP options. 7785 * Make sure we can grab them. 7786 */ 7787 offset <<= 2; 7788 if (remlen < offset) 7789 goto pkt_too_short; 7790 if (mp->b_cont != NULL && 7791 whereptr + offset > mp->b_wptr) { 7792 if (!pullupmsg(mp, 7793 hdr_len + offset)) { 7794 BUMP_MIB(ill->ill_ip_mib, 7795 ipIfStatsInDiscards); 7796 freemsg(first_mp); 7797 return; 7798 } 7799 hck_flags = 0; 7800 ip6h = (ip6_t *)mp->b_rptr; 7801 whereptr = (uint8_t *)ip6h + hdr_len; 7802 } 7803 } 7804 7805 up = (uint16_t *)&ip6h->ip6_src; 7806 /* 7807 * TCP checksum calculation. First sum up the 7808 * pseudo-header fields: 7809 * - Source IPv6 address 7810 * - Destination IPv6 address 7811 * - TCP payload length 7812 * - TCP protocol ID 7813 */ 7814 sum = htons(IPPROTO_TCP + remlen) + 7815 up[0] + up[1] + up[2] + up[3] + 7816 up[4] + up[5] + up[6] + up[7] + 7817 up[8] + up[9] + up[10] + up[11] + 7818 up[12] + up[13] + up[14] + up[15]; 7819 7820 /* Fold initial sum */ 7821 sum = (sum & 0xffff) + (sum >> 16); 7822 7823 mp1 = mp->b_cont; 7824 7825 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7826 IP6_STAT(ipst, ip6_in_sw_cksum); 7827 7828 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7829 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7830 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7831 mp, mp1, cksum_err); 7832 7833 if (cksum_err) { 7834 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7835 7836 if (hck_flags & HCK_FULLCKSUM) { 7837 IP6_STAT(ipst, 7838 ip6_tcp_in_full_hw_cksum_err); 7839 } else if (hck_flags & HCK_PARTIALCKSUM) { 7840 IP6_STAT(ipst, 7841 ip6_tcp_in_part_hw_cksum_err); 7842 } else { 7843 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7844 } 7845 freemsg(first_mp); 7846 return; 7847 } 7848 tcp_fanout: 7849 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7850 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7851 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7852 return; 7853 } 7854 case IPPROTO_SCTP: 7855 { 7856 sctp_hdr_t *sctph; 7857 uint32_t calcsum, pktsum; 7858 uint_t hdr_len = pkt_len - remlen; 7859 sctp_stack_t *sctps; 7860 7861 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7862 7863 /* SCTP needs all of the SCTP header */ 7864 if (remlen < sizeof (*sctph)) { 7865 goto pkt_too_short; 7866 } 7867 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7868 ASSERT(mp->b_cont != NULL); 7869 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7870 BUMP_MIB(ill->ill_ip_mib, 7871 ipIfStatsInDiscards); 7872 freemsg(mp); 7873 return; 7874 } 7875 ip6h = (ip6_t *)mp->b_rptr; 7876 whereptr = (uint8_t *)ip6h + hdr_len; 7877 } 7878 7879 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7880 /* checksum */ 7881 pktsum = sctph->sh_chksum; 7882 sctph->sh_chksum = 0; 7883 calcsum = sctp_cksum(mp, hdr_len); 7884 if (calcsum != pktsum) { 7885 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7886 freemsg(mp); 7887 return; 7888 } 7889 sctph->sh_chksum = pktsum; 7890 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7891 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7892 ports, zoneid, mp, sctps)) == NULL) { 7893 ip_fanout_sctp_raw(first_mp, ill, 7894 (ipha_t *)ip6h, B_FALSE, ports, 7895 mctl_present, 7896 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7897 B_TRUE, zoneid); 7898 return; 7899 } 7900 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7901 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7902 B_FALSE, mctl_present); 7903 return; 7904 } 7905 case IPPROTO_UDP: { 7906 uint16_t *up; 7907 uint32_t sum; 7908 7909 hdr_len = pkt_len - remlen; 7910 7911 if (hada_mp != NULL) { 7912 ip0dbg(("udp hada drop\n")); 7913 goto hada_drop; 7914 } 7915 7916 /* Verify that at least the ports are present */ 7917 if (remlen < UDPH_SIZE) 7918 goto pkt_too_short; 7919 if (mp->b_cont != NULL && 7920 whereptr + UDPH_SIZE > mp->b_wptr) { 7921 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7922 BUMP_MIB(ill->ill_ip_mib, 7923 ipIfStatsInDiscards); 7924 freemsg(first_mp); 7925 return; 7926 } 7927 hck_flags = 0; 7928 ip6h = (ip6_t *)mp->b_rptr; 7929 whereptr = (uint8_t *)ip6h + hdr_len; 7930 } 7931 7932 /* 7933 * Before going through the regular checksum 7934 * calculation, make sure the received checksum 7935 * is non-zero. RFC 2460 says, a 0x0000 checksum 7936 * in a UDP packet (within IPv6 packet) is invalid 7937 * and should be replaced by 0xffff. This makes 7938 * sense as regular checksum calculation will 7939 * pass for both the cases i.e. 0x0000 and 0xffff. 7940 * Removing one of the case makes error detection 7941 * stronger. 7942 */ 7943 7944 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7945 /* 0x0000 checksum is invalid */ 7946 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7947 "checksum value 0x0000\n")); 7948 BUMP_MIB(ill->ill_ip_mib, 7949 udpIfStatsInCksumErrs); 7950 freemsg(first_mp); 7951 return; 7952 } 7953 7954 up = (uint16_t *)&ip6h->ip6_src; 7955 7956 /* 7957 * UDP checksum calculation. First sum up the 7958 * pseudo-header fields: 7959 * - Source IPv6 address 7960 * - Destination IPv6 address 7961 * - UDP payload length 7962 * - UDP protocol ID 7963 */ 7964 7965 sum = htons(IPPROTO_UDP + remlen) + 7966 up[0] + up[1] + up[2] + up[3] + 7967 up[4] + up[5] + up[6] + up[7] + 7968 up[8] + up[9] + up[10] + up[11] + 7969 up[12] + up[13] + up[14] + up[15]; 7970 7971 /* Fold initial sum */ 7972 sum = (sum & 0xffff) + (sum >> 16); 7973 7974 if (reass_hck_flags != 0) { 7975 hck_flags = reass_hck_flags; 7976 7977 IP_CKSUM_RECV_REASS(hck_flags, 7978 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7979 sum, reass_sum, cksum_err); 7980 } else { 7981 mp1 = mp->b_cont; 7982 7983 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7984 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7985 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7986 mp, mp1, cksum_err); 7987 } 7988 7989 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7990 IP6_STAT(ipst, ip6_in_sw_cksum); 7991 7992 if (cksum_err) { 7993 BUMP_MIB(ill->ill_ip_mib, 7994 udpIfStatsInCksumErrs); 7995 7996 if (hck_flags & HCK_FULLCKSUM) 7997 IP6_STAT(ipst, 7998 ip6_udp_in_full_hw_cksum_err); 7999 else if (hck_flags & HCK_PARTIALCKSUM) 8000 IP6_STAT(ipst, 8001 ip6_udp_in_part_hw_cksum_err); 8002 else 8003 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 8004 8005 freemsg(first_mp); 8006 return; 8007 } 8008 goto udp_fanout; 8009 } 8010 case IPPROTO_ICMPV6: { 8011 uint16_t *up; 8012 uint32_t sum; 8013 uint_t hdr_len = pkt_len - remlen; 8014 8015 if (hada_mp != NULL) { 8016 ip0dbg(("icmp hada drop\n")); 8017 goto hada_drop; 8018 } 8019 8020 up = (uint16_t *)&ip6h->ip6_src; 8021 sum = htons(IPPROTO_ICMPV6 + remlen) + 8022 up[0] + up[1] + up[2] + up[3] + 8023 up[4] + up[5] + up[6] + up[7] + 8024 up[8] + up[9] + up[10] + up[11] + 8025 up[12] + up[13] + up[14] + up[15]; 8026 sum = (sum & 0xffff) + (sum >> 16); 8027 sum = IP_CSUM(mp, hdr_len, sum); 8028 if (sum != 0) { 8029 /* IPv6 ICMP checksum failed */ 8030 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 8031 "failed %x\n", 8032 sum)); 8033 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 8034 BUMP_MIB(ill->ill_icmp6_mib, 8035 ipv6IfIcmpInErrors); 8036 freemsg(first_mp); 8037 return; 8038 } 8039 8040 icmp_fanout: 8041 /* Check variable for testing applications */ 8042 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 8043 freemsg(first_mp); 8044 return; 8045 } 8046 /* 8047 * Assume that there is always at least one conn for 8048 * ICMPv6 (in.ndpd) i.e. don't optimize the case 8049 * where there is no conn. 8050 */ 8051 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8052 ASSERT(!IS_LOOPBACK((ill))); 8053 /* 8054 * In the multicast case, applications may have 8055 * joined the group from different zones, so we 8056 * need to deliver the packet to each of them. 8057 * Loop through the multicast memberships 8058 * structures (ilm) on the receive ill and send 8059 * a copy of the packet up each matching one. 8060 */ 8061 ILM_WALKER_HOLD(ill); 8062 for (ilm = ill->ill_ilm; ilm != NULL; 8063 ilm = ilm->ilm_next) { 8064 if (ilm->ilm_flags & ILM_DELETED) 8065 continue; 8066 if (!IN6_ARE_ADDR_EQUAL( 8067 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 8068 continue; 8069 if (!ipif_lookup_zoneid(ill, 8070 ilm->ilm_zoneid, IPIF_UP, NULL)) 8071 continue; 8072 8073 first_mp1 = ip_copymsg(first_mp); 8074 if (first_mp1 == NULL) 8075 continue; 8076 icmp_inbound_v6(q, first_mp1, ill, 8077 hdr_len, mctl_present, 0, 8078 ilm->ilm_zoneid, dl_mp); 8079 } 8080 ILM_WALKER_RELE(ill); 8081 } else { 8082 first_mp1 = ip_copymsg(first_mp); 8083 if (first_mp1 != NULL) 8084 icmp_inbound_v6(q, first_mp1, ill, 8085 hdr_len, mctl_present, 0, zoneid, 8086 dl_mp); 8087 } 8088 /* FALLTHRU */ 8089 default: { 8090 /* 8091 * Handle protocols with which IPv6 is less intimate. 8092 */ 8093 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 8094 8095 if (hada_mp != NULL) { 8096 ip0dbg(("default hada drop\n")); 8097 goto hada_drop; 8098 } 8099 8100 /* 8101 * Enable sending ICMP for "Unknown" nexthdr 8102 * case. i.e. where we did not FALLTHRU from 8103 * IPPROTO_ICMPV6 processing case above. 8104 * If we did FALLTHRU, then the packet has already been 8105 * processed for IPPF, don't process it again in 8106 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8107 * flags 8108 */ 8109 if (nexthdr != IPPROTO_ICMPV6) 8110 proto_flags |= IP_FF_SEND_ICMP; 8111 else 8112 proto_flags |= IP6_NO_IPPOLICY; 8113 8114 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8115 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8116 mctl_present, zoneid); 8117 return; 8118 } 8119 8120 case IPPROTO_DSTOPTS: { 8121 uint_t ehdrlen; 8122 uint8_t *optptr; 8123 ip6_dest_t *desthdr; 8124 8125 /* Check if AH is present. */ 8126 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8127 hada_mp, zoneid)) { 8128 ip0dbg(("dst early hada drop\n")); 8129 return; 8130 } 8131 8132 /* 8133 * Reinitialize pointers, as ipsec_early_ah_v6() does 8134 * complete pullups. We don't have to do more pullups 8135 * as a result. 8136 */ 8137 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8138 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8139 ip6h = (ip6_t *)mp->b_rptr; 8140 8141 if (remlen < MIN_EHDR_LEN) 8142 goto pkt_too_short; 8143 8144 desthdr = (ip6_dest_t *)whereptr; 8145 nexthdr = desthdr->ip6d_nxt; 8146 prev_nexthdr_offset = (uint_t)(whereptr - 8147 (uint8_t *)ip6h); 8148 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8149 if (remlen < ehdrlen) 8150 goto pkt_too_short; 8151 optptr = whereptr + 2; 8152 /* 8153 * Note: XXX This code does not seem to make 8154 * distinction between Destination Options Header 8155 * being before/after Routing Header which can 8156 * happen if we are at the end of source route. 8157 * This may become significant in future. 8158 * (No real significant Destination Options are 8159 * defined/implemented yet ). 8160 */ 8161 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8162 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 8163 case -1: 8164 /* 8165 * Packet has been consumed and any needed 8166 * ICMP errors sent. 8167 */ 8168 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8169 freemsg(hada_mp); 8170 return; 8171 case 0: 8172 /* No action needed continue */ 8173 break; 8174 case 1: 8175 /* 8176 * Unnexpected return value 8177 * (Router alert is a Hop-by-Hop option) 8178 */ 8179 #ifdef DEBUG 8180 panic("ip_rput_data_v6: router " 8181 "alert hbh opt indication in dest opt"); 8182 /*NOTREACHED*/ 8183 #else 8184 freemsg(hada_mp); 8185 freemsg(first_mp); 8186 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8187 return; 8188 #endif 8189 } 8190 used = ehdrlen; 8191 break; 8192 } 8193 case IPPROTO_FRAGMENT: { 8194 ip6_frag_t *fraghdr; 8195 size_t no_frag_hdr_len; 8196 8197 if (hada_mp != NULL) { 8198 ip0dbg(("frag hada drop\n")); 8199 goto hada_drop; 8200 } 8201 8202 ASSERT(first_mp == mp); 8203 if (remlen < sizeof (ip6_frag_t)) 8204 goto pkt_too_short; 8205 8206 if (mp->b_cont != NULL && 8207 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8208 if (!pullupmsg(mp, 8209 pkt_len - remlen + sizeof (ip6_frag_t))) { 8210 BUMP_MIB(ill->ill_ip_mib, 8211 ipIfStatsInDiscards); 8212 freemsg(mp); 8213 return; 8214 } 8215 hck_flags = 0; 8216 ip6h = (ip6_t *)mp->b_rptr; 8217 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8218 } 8219 8220 fraghdr = (ip6_frag_t *)whereptr; 8221 used = (uint_t)sizeof (ip6_frag_t); 8222 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8223 8224 /* 8225 * Invoke the CGTP (multirouting) filtering module to 8226 * process the incoming packet. Packets identified as 8227 * duplicates must be discarded. Filtering is active 8228 * only if the the ip_cgtp_filter ndd variable is 8229 * non-zero. 8230 * 8231 * Only applies to the shared stack since the 8232 * filter_ops do not carry an ip_stack_t or zoneid. 8233 */ 8234 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL) && 8235 ipst->ips_netstack->netstack_stackid == 8236 GLOBAL_NETSTACKID) { 8237 int cgtp_flt_pkt = 8238 ip_cgtp_filter_ops->cfo_filter_v6( 8239 inill->ill_rq, ip6h, fraghdr); 8240 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8241 freemsg(mp); 8242 return; 8243 } 8244 } 8245 8246 /* Restore the flags */ 8247 DB_CKSUMFLAGS(mp) = hck_flags; 8248 8249 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8250 remlen - used, &prev_nexthdr_offset, 8251 &reass_sum, &reass_hck_flags); 8252 if (mp == NULL) { 8253 /* Reassembly is still pending */ 8254 return; 8255 } 8256 /* The first mblk are the headers before the frag hdr */ 8257 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8258 8259 first_mp = mp; /* mp has most likely changed! */ 8260 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8261 ip6h = (ip6_t *)mp->b_rptr; 8262 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8263 whereptr = mp->b_rptr + no_frag_hdr_len; 8264 remlen = ntohs(ip6h->ip6_plen) + 8265 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8266 pkt_len = msgdsize(mp); 8267 used = 0; 8268 break; 8269 } 8270 case IPPROTO_HOPOPTS: 8271 if (hada_mp != NULL) { 8272 ip0dbg(("hop hada drop\n")); 8273 goto hada_drop; 8274 } 8275 /* 8276 * Illegal header sequence. 8277 * (Hop-by-hop headers are processed above 8278 * and required to immediately follow IPv6 header) 8279 */ 8280 icmp_param_problem_v6(WR(q), first_mp, 8281 ICMP6_PARAMPROB_NEXTHEADER, 8282 prev_nexthdr_offset, 8283 B_FALSE, B_FALSE, zoneid, ipst); 8284 return; 8285 } 8286 case IPPROTO_ROUTING: { 8287 uint_t ehdrlen; 8288 ip6_rthdr_t *rthdr; 8289 8290 /* Check if AH is present. */ 8291 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8292 hada_mp, zoneid)) { 8293 ip0dbg(("routing hada drop\n")); 8294 return; 8295 } 8296 8297 /* 8298 * Reinitialize pointers, as ipsec_early_ah_v6() does 8299 * complete pullups. We don't have to do more pullups 8300 * as a result. 8301 */ 8302 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8303 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8304 ip6h = (ip6_t *)mp->b_rptr; 8305 8306 if (remlen < MIN_EHDR_LEN) 8307 goto pkt_too_short; 8308 rthdr = (ip6_rthdr_t *)whereptr; 8309 nexthdr = rthdr->ip6r_nxt; 8310 prev_nexthdr_offset = (uint_t)(whereptr - 8311 (uint8_t *)ip6h); 8312 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8313 if (remlen < ehdrlen) 8314 goto pkt_too_short; 8315 if (rthdr->ip6r_segleft != 0) { 8316 /* Not end of source route */ 8317 if (ll_multicast) { 8318 BUMP_MIB(ill->ill_ip_mib, 8319 ipIfStatsForwProhibits); 8320 freemsg(hada_mp); 8321 freemsg(mp); 8322 return; 8323 } 8324 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8325 flags, hada_mp, dl_mp); 8326 return; 8327 } 8328 used = ehdrlen; 8329 break; 8330 } 8331 case IPPROTO_AH: 8332 case IPPROTO_ESP: { 8333 /* 8334 * Fast path for AH/ESP. If this is the first time 8335 * we are sending a datagram to AH/ESP, allocate 8336 * a IPSEC_IN message and prepend it. Otherwise, 8337 * just fanout. 8338 */ 8339 8340 ipsec_in_t *ii; 8341 int ipsec_rc; 8342 ipsec_stack_t *ipss; 8343 8344 ipss = ipst->ips_netstack->netstack_ipsec; 8345 if (!mctl_present) { 8346 ASSERT(first_mp == mp); 8347 first_mp = ipsec_in_alloc(B_FALSE, 8348 ipst->ips_netstack); 8349 if (first_mp == NULL) { 8350 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8351 "allocation failure.\n")); 8352 BUMP_MIB(ill->ill_ip_mib, 8353 ipIfStatsInDiscards); 8354 freemsg(mp); 8355 return; 8356 } 8357 /* 8358 * Store the ill_index so that when we come back 8359 * from IPSEC we ride on the same queue. 8360 */ 8361 ii = (ipsec_in_t *)first_mp->b_rptr; 8362 ii->ipsec_in_ill_index = 8363 ill->ill_phyint->phyint_ifindex; 8364 ii->ipsec_in_rill_index = 8365 ii->ipsec_in_ill_index; 8366 first_mp->b_cont = mp; 8367 /* 8368 * Cache hardware acceleration info. 8369 */ 8370 if (hada_mp != NULL) { 8371 IPSECHW_DEBUG(IPSECHW_PKT, 8372 ("ip_rput_data_v6: " 8373 "caching data attr.\n")); 8374 ii->ipsec_in_accelerated = B_TRUE; 8375 ii->ipsec_in_da = hada_mp; 8376 hada_mp = NULL; 8377 } 8378 } else { 8379 ii = (ipsec_in_t *)first_mp->b_rptr; 8380 } 8381 8382 if (!ipsec_loaded(ipss)) { 8383 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8384 zoneid, ipst); 8385 return; 8386 } 8387 8388 /* select inbound SA and have IPsec process the pkt */ 8389 if (nexthdr == IPPROTO_ESP) { 8390 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8391 ipst->ips_netstack); 8392 if (esph == NULL) 8393 return; 8394 ASSERT(ii->ipsec_in_esp_sa != NULL); 8395 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8396 NULL); 8397 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8398 first_mp, esph); 8399 } else { 8400 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8401 ipst->ips_netstack); 8402 if (ah == NULL) 8403 return; 8404 ASSERT(ii->ipsec_in_ah_sa != NULL); 8405 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8406 NULL); 8407 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8408 first_mp, ah); 8409 } 8410 8411 switch (ipsec_rc) { 8412 case IPSEC_STATUS_SUCCESS: 8413 break; 8414 case IPSEC_STATUS_FAILED: 8415 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8416 /* FALLTHRU */ 8417 case IPSEC_STATUS_PENDING: 8418 return; 8419 } 8420 /* we're done with IPsec processing, send it up */ 8421 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8422 return; 8423 } 8424 case IPPROTO_NONE: 8425 /* All processing is done. Count as "delivered". */ 8426 freemsg(hada_mp); 8427 freemsg(first_mp); 8428 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8429 return; 8430 } 8431 whereptr += used; 8432 ASSERT(remlen >= used); 8433 remlen -= used; 8434 } 8435 /* NOTREACHED */ 8436 8437 pkt_too_short: 8438 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8439 ip6_len, pkt_len, remlen)); 8440 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8441 freemsg(hada_mp); 8442 freemsg(first_mp); 8443 return; 8444 udp_fanout: 8445 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8446 connp = NULL; 8447 } else { 8448 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8449 ipst); 8450 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8451 CONN_DEC_REF(connp); 8452 connp = NULL; 8453 } 8454 } 8455 8456 if (connp == NULL) { 8457 uint32_t ports; 8458 8459 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8460 UDP_PORTS_OFFSET); 8461 IP6_STAT(ipst, ip6_udp_slow_path); 8462 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8463 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8464 zoneid); 8465 return; 8466 } 8467 8468 if (CONN_UDP_FLOWCTLD(connp)) { 8469 freemsg(first_mp); 8470 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8471 CONN_DEC_REF(connp); 8472 return; 8473 } 8474 8475 /* Initiate IPPF processing */ 8476 if (IP6_IN_IPP(flags, ipst)) { 8477 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8478 if (mp == NULL) { 8479 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8480 CONN_DEC_REF(connp); 8481 return; 8482 } 8483 } 8484 8485 if (connp->conn_ip_recvpktinfo || 8486 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8487 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8488 if (mp == NULL) { 8489 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8490 CONN_DEC_REF(connp); 8491 return; 8492 } 8493 } 8494 8495 IP6_STAT(ipst, ip6_udp_fast_path); 8496 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8497 8498 /* Send it upstream */ 8499 CONN_UDP_RECV(connp, mp); 8500 8501 CONN_DEC_REF(connp); 8502 freemsg(hada_mp); 8503 return; 8504 8505 hada_drop: 8506 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8507 /* IPsec kstats: bump counter here */ 8508 freemsg(hada_mp); 8509 freemsg(first_mp); 8510 } 8511 8512 /* 8513 * Reassemble fragment. 8514 * When it returns a completed message the first mblk will only contain 8515 * the headers prior to the fragment header. 8516 * 8517 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8518 * of the preceding header. This is needed to patch the previous header's 8519 * nexthdr field when reassembly completes. 8520 */ 8521 static mblk_t * 8522 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8523 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8524 uint32_t *cksum_val, uint16_t *cksum_flags) 8525 { 8526 ill_t *ill = (ill_t *)q->q_ptr; 8527 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8528 uint16_t offset; 8529 boolean_t more_frags; 8530 uint8_t nexthdr = fraghdr->ip6f_nxt; 8531 in6_addr_t *v6dst_ptr; 8532 in6_addr_t *v6src_ptr; 8533 uint_t end; 8534 uint_t hdr_length; 8535 size_t count; 8536 ipf_t *ipf; 8537 ipf_t **ipfp; 8538 ipfb_t *ipfb; 8539 mblk_t *mp1; 8540 uint8_t ecn_info = 0; 8541 size_t msg_len; 8542 mblk_t *tail_mp; 8543 mblk_t *t_mp; 8544 boolean_t pruned = B_FALSE; 8545 uint32_t sum_val; 8546 uint16_t sum_flags; 8547 ip_stack_t *ipst = ill->ill_ipst; 8548 8549 if (cksum_val != NULL) 8550 *cksum_val = 0; 8551 if (cksum_flags != NULL) 8552 *cksum_flags = 0; 8553 8554 /* 8555 * We utilize hardware computed checksum info only for UDP since 8556 * IP fragmentation is a normal occurence for the protocol. In 8557 * addition, checksum offload support for IP fragments carrying 8558 * UDP payload is commonly implemented across network adapters. 8559 */ 8560 ASSERT(ill != NULL); 8561 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8562 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8563 mblk_t *mp1 = mp->b_cont; 8564 int32_t len; 8565 8566 /* Record checksum information from the packet */ 8567 sum_val = (uint32_t)DB_CKSUM16(mp); 8568 sum_flags = DB_CKSUMFLAGS(mp); 8569 8570 /* fragmented payload offset from beginning of mblk */ 8571 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8572 8573 if ((sum_flags & HCK_PARTIALCKSUM) && 8574 (mp1 == NULL || mp1->b_cont == NULL) && 8575 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8576 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8577 uint32_t adj; 8578 /* 8579 * Partial checksum has been calculated by hardware 8580 * and attached to the packet; in addition, any 8581 * prepended extraneous data is even byte aligned. 8582 * If any such data exists, we adjust the checksum; 8583 * this would also handle any postpended data. 8584 */ 8585 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8586 mp, mp1, len, adj); 8587 8588 /* One's complement subtract extraneous checksum */ 8589 if (adj >= sum_val) 8590 sum_val = ~(adj - sum_val) & 0xFFFF; 8591 else 8592 sum_val -= adj; 8593 } 8594 } else { 8595 sum_val = 0; 8596 sum_flags = 0; 8597 } 8598 8599 /* Clear hardware checksumming flag */ 8600 DB_CKSUMFLAGS(mp) = 0; 8601 8602 /* 8603 * Note: Fragment offset in header is in 8-octet units. 8604 * Clearing least significant 3 bits not only extracts 8605 * it but also gets it in units of octets. 8606 */ 8607 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8608 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8609 8610 /* 8611 * Is the more frags flag on and the payload length not a multiple 8612 * of eight? 8613 */ 8614 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8615 zoneid_t zoneid; 8616 8617 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8618 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8619 if (zoneid == ALL_ZONES) { 8620 freemsg(mp); 8621 return (NULL); 8622 } 8623 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8624 (uint32_t)((char *)&ip6h->ip6_plen - 8625 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8626 return (NULL); 8627 } 8628 8629 v6src_ptr = &ip6h->ip6_src; 8630 v6dst_ptr = &ip6h->ip6_dst; 8631 end = remlen; 8632 8633 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8634 end += offset; 8635 8636 /* 8637 * Would fragment cause reassembled packet to have a payload length 8638 * greater than IP_MAXPACKET - the max payload size? 8639 */ 8640 if (end > IP_MAXPACKET) { 8641 zoneid_t zoneid; 8642 8643 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8644 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8645 if (zoneid == ALL_ZONES) { 8646 freemsg(mp); 8647 return (NULL); 8648 } 8649 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8650 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8651 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8652 return (NULL); 8653 } 8654 8655 /* 8656 * This packet just has one fragment. Reassembly not 8657 * needed. 8658 */ 8659 if (!more_frags && offset == 0) { 8660 goto reass_done; 8661 } 8662 8663 /* 8664 * Drop the fragmented as early as possible, if 8665 * we don't have resource(s) to re-assemble. 8666 */ 8667 if (ipst->ips_ip_reass_queue_bytes == 0) { 8668 freemsg(mp); 8669 return (NULL); 8670 } 8671 8672 /* Record the ECN field info. */ 8673 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8674 /* 8675 * If this is not the first fragment, dump the unfragmentable 8676 * portion of the packet. 8677 */ 8678 if (offset) 8679 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8680 8681 /* 8682 * Fragmentation reassembly. Each ILL has a hash table for 8683 * queueing packets undergoing reassembly for all IPIFs 8684 * associated with the ILL. The hash is based on the packet 8685 * IP ident field. The ILL frag hash table was allocated 8686 * as a timer block at the time the ILL was created. Whenever 8687 * there is anything on the reassembly queue, the timer will 8688 * be running. 8689 */ 8690 msg_len = MBLKSIZE(mp); 8691 tail_mp = mp; 8692 while (tail_mp->b_cont != NULL) { 8693 tail_mp = tail_mp->b_cont; 8694 msg_len += MBLKSIZE(tail_mp); 8695 } 8696 /* 8697 * If the reassembly list for this ILL will get too big 8698 * prune it. 8699 */ 8700 8701 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8702 ipst->ips_ip_reass_queue_bytes) { 8703 ill_frag_prune(ill, 8704 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8705 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8706 pruned = B_TRUE; 8707 } 8708 8709 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8710 mutex_enter(&ipfb->ipfb_lock); 8711 8712 ipfp = &ipfb->ipfb_ipf; 8713 /* Try to find an existing fragment queue for this packet. */ 8714 for (;;) { 8715 ipf = ipfp[0]; 8716 if (ipf) { 8717 /* 8718 * It has to match on ident, source address, and 8719 * dest address. 8720 */ 8721 if (ipf->ipf_ident == ident && 8722 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8723 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8724 8725 /* 8726 * If we have received too many 8727 * duplicate fragments for this packet 8728 * free it. 8729 */ 8730 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8731 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8732 freemsg(mp); 8733 mutex_exit(&ipfb->ipfb_lock); 8734 return (NULL); 8735 } 8736 8737 break; 8738 } 8739 ipfp = &ipf->ipf_hash_next; 8740 continue; 8741 } 8742 8743 8744 /* 8745 * If we pruned the list, do we want to store this new 8746 * fragment?. We apply an optimization here based on the 8747 * fact that most fragments will be received in order. 8748 * So if the offset of this incoming fragment is zero, 8749 * it is the first fragment of a new packet. We will 8750 * keep it. Otherwise drop the fragment, as we have 8751 * probably pruned the packet already (since the 8752 * packet cannot be found). 8753 */ 8754 8755 if (pruned && offset != 0) { 8756 mutex_exit(&ipfb->ipfb_lock); 8757 freemsg(mp); 8758 return (NULL); 8759 } 8760 8761 /* New guy. Allocate a frag message. */ 8762 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8763 if (!mp1) { 8764 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8765 freemsg(mp); 8766 partial_reass_done: 8767 mutex_exit(&ipfb->ipfb_lock); 8768 return (NULL); 8769 } 8770 8771 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8772 /* 8773 * Too many fragmented packets in this hash bucket. 8774 * Free the oldest. 8775 */ 8776 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8777 } 8778 8779 mp1->b_cont = mp; 8780 8781 /* Initialize the fragment header. */ 8782 ipf = (ipf_t *)mp1->b_rptr; 8783 ipf->ipf_mp = mp1; 8784 ipf->ipf_ptphn = ipfp; 8785 ipfp[0] = ipf; 8786 ipf->ipf_hash_next = NULL; 8787 ipf->ipf_ident = ident; 8788 ipf->ipf_v6src = *v6src_ptr; 8789 ipf->ipf_v6dst = *v6dst_ptr; 8790 /* Record reassembly start time. */ 8791 ipf->ipf_timestamp = gethrestime_sec(); 8792 /* Record ipf generation and account for frag header */ 8793 ipf->ipf_gen = ill->ill_ipf_gen++; 8794 ipf->ipf_count = MBLKSIZE(mp1); 8795 ipf->ipf_protocol = nexthdr; 8796 ipf->ipf_nf_hdr_len = 0; 8797 ipf->ipf_prev_nexthdr_offset = 0; 8798 ipf->ipf_last_frag_seen = B_FALSE; 8799 ipf->ipf_ecn = ecn_info; 8800 ipf->ipf_num_dups = 0; 8801 ipfb->ipfb_frag_pkts++; 8802 ipf->ipf_checksum = 0; 8803 ipf->ipf_checksum_flags = 0; 8804 8805 /* Store checksum value in fragment header */ 8806 if (sum_flags != 0) { 8807 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8808 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8809 ipf->ipf_checksum = sum_val; 8810 ipf->ipf_checksum_flags = sum_flags; 8811 } 8812 8813 /* 8814 * We handle reassembly two ways. In the easy case, 8815 * where all the fragments show up in order, we do 8816 * minimal bookkeeping, and just clip new pieces on 8817 * the end. If we ever see a hole, then we go off 8818 * to ip_reassemble which has to mark the pieces and 8819 * keep track of the number of holes, etc. Obviously, 8820 * the point of having both mechanisms is so we can 8821 * handle the easy case as efficiently as possible. 8822 */ 8823 if (offset == 0) { 8824 /* Easy case, in-order reassembly so far. */ 8825 /* Update the byte count */ 8826 ipf->ipf_count += msg_len; 8827 ipf->ipf_tail_mp = tail_mp; 8828 /* 8829 * Keep track of next expected offset in 8830 * ipf_end. 8831 */ 8832 ipf->ipf_end = end; 8833 ipf->ipf_nf_hdr_len = hdr_length; 8834 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8835 } else { 8836 /* Hard case, hole at the beginning. */ 8837 ipf->ipf_tail_mp = NULL; 8838 /* 8839 * ipf_end == 0 means that we have given up 8840 * on easy reassembly. 8841 */ 8842 ipf->ipf_end = 0; 8843 8844 /* Forget checksum offload from now on */ 8845 ipf->ipf_checksum_flags = 0; 8846 8847 /* 8848 * ipf_hole_cnt is set by ip_reassemble. 8849 * ipf_count is updated by ip_reassemble. 8850 * No need to check for return value here 8851 * as we don't expect reassembly to complete or 8852 * fail for the first fragment itself. 8853 */ 8854 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8855 msg_len); 8856 } 8857 /* Update per ipfb and ill byte counts */ 8858 ipfb->ipfb_count += ipf->ipf_count; 8859 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8860 ill->ill_frag_count += ipf->ipf_count; 8861 /* If the frag timer wasn't already going, start it. */ 8862 mutex_enter(&ill->ill_lock); 8863 ill_frag_timer_start(ill); 8864 mutex_exit(&ill->ill_lock); 8865 goto partial_reass_done; 8866 } 8867 8868 /* 8869 * If the packet's flag has changed (it could be coming up 8870 * from an interface different than the previous, therefore 8871 * possibly different checksum capability), then forget about 8872 * any stored checksum states. Otherwise add the value to 8873 * the existing one stored in the fragment header. 8874 */ 8875 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8876 sum_val += ipf->ipf_checksum; 8877 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8878 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8879 ipf->ipf_checksum = sum_val; 8880 } else if (ipf->ipf_checksum_flags != 0) { 8881 /* Forget checksum offload from now on */ 8882 ipf->ipf_checksum_flags = 0; 8883 } 8884 8885 /* 8886 * We have a new piece of a datagram which is already being 8887 * reassembled. Update the ECN info if all IP fragments 8888 * are ECN capable. If there is one which is not, clear 8889 * all the info. If there is at least one which has CE 8890 * code point, IP needs to report that up to transport. 8891 */ 8892 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8893 if (ecn_info == IPH_ECN_CE) 8894 ipf->ipf_ecn = IPH_ECN_CE; 8895 } else { 8896 ipf->ipf_ecn = IPH_ECN_NECT; 8897 } 8898 8899 if (offset && ipf->ipf_end == offset) { 8900 /* The new fragment fits at the end */ 8901 ipf->ipf_tail_mp->b_cont = mp; 8902 /* Update the byte count */ 8903 ipf->ipf_count += msg_len; 8904 /* Update per ipfb and ill byte counts */ 8905 ipfb->ipfb_count += msg_len; 8906 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8907 ill->ill_frag_count += msg_len; 8908 if (more_frags) { 8909 /* More to come. */ 8910 ipf->ipf_end = end; 8911 ipf->ipf_tail_mp = tail_mp; 8912 goto partial_reass_done; 8913 } 8914 } else { 8915 /* 8916 * Go do the hard cases. 8917 * Call ip_reassemble(). 8918 */ 8919 int ret; 8920 8921 if (offset == 0) { 8922 if (ipf->ipf_prev_nexthdr_offset == 0) { 8923 ipf->ipf_nf_hdr_len = hdr_length; 8924 ipf->ipf_prev_nexthdr_offset = 8925 *prev_nexthdr_offset; 8926 } 8927 } 8928 /* Save current byte count */ 8929 count = ipf->ipf_count; 8930 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8931 8932 /* Count of bytes added and subtracted (freeb()ed) */ 8933 count = ipf->ipf_count - count; 8934 if (count) { 8935 /* Update per ipfb and ill byte counts */ 8936 ipfb->ipfb_count += count; 8937 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8938 ill->ill_frag_count += count; 8939 } 8940 if (ret == IP_REASS_PARTIAL) { 8941 goto partial_reass_done; 8942 } else if (ret == IP_REASS_FAILED) { 8943 /* Reassembly failed. Free up all resources */ 8944 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8945 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8946 IP_REASS_SET_START(t_mp, 0); 8947 IP_REASS_SET_END(t_mp, 0); 8948 } 8949 freemsg(mp); 8950 goto partial_reass_done; 8951 } 8952 8953 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8954 } 8955 /* 8956 * We have completed reassembly. Unhook the frag header from 8957 * the reassembly list. 8958 * 8959 * Grab the unfragmentable header length next header value out 8960 * of the first fragment 8961 */ 8962 ASSERT(ipf->ipf_nf_hdr_len != 0); 8963 hdr_length = ipf->ipf_nf_hdr_len; 8964 8965 /* 8966 * Before we free the frag header, record the ECN info 8967 * to report back to the transport. 8968 */ 8969 ecn_info = ipf->ipf_ecn; 8970 8971 /* 8972 * Store the nextheader field in the header preceding the fragment 8973 * header 8974 */ 8975 nexthdr = ipf->ipf_protocol; 8976 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8977 ipfp = ipf->ipf_ptphn; 8978 8979 /* We need to supply these to caller */ 8980 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8981 sum_val = ipf->ipf_checksum; 8982 else 8983 sum_val = 0; 8984 8985 mp1 = ipf->ipf_mp; 8986 count = ipf->ipf_count; 8987 ipf = ipf->ipf_hash_next; 8988 if (ipf) 8989 ipf->ipf_ptphn = ipfp; 8990 ipfp[0] = ipf; 8991 ill->ill_frag_count -= count; 8992 ASSERT(ipfb->ipfb_count >= count); 8993 ipfb->ipfb_count -= count; 8994 ipfb->ipfb_frag_pkts--; 8995 mutex_exit(&ipfb->ipfb_lock); 8996 /* Ditch the frag header. */ 8997 mp = mp1->b_cont; 8998 freeb(mp1); 8999 9000 /* 9001 * Make sure the packet is good by doing some sanity 9002 * check. If bad we can silentely drop the packet. 9003 */ 9004 reass_done: 9005 if (hdr_length < sizeof (ip6_frag_t)) { 9006 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 9007 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 9008 freemsg(mp); 9009 return (NULL); 9010 } 9011 9012 /* 9013 * Remove the fragment header from the initial header by 9014 * splitting the mblk into the non-fragmentable header and 9015 * everthing after the fragment extension header. This has the 9016 * side effect of putting all the headers that need destination 9017 * processing into the b_cont block-- on return this fact is 9018 * used in order to avoid having to look at the extensions 9019 * already processed. 9020 * 9021 * Note that this code assumes that the unfragmentable portion 9022 * of the header is in the first mblk and increments 9023 * the read pointer past it. If this assumption is broken 9024 * this code fails badly. 9025 */ 9026 if (mp->b_rptr + hdr_length != mp->b_wptr) { 9027 mblk_t *nmp; 9028 9029 if (!(nmp = dupb(mp))) { 9030 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 9031 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 9032 freemsg(mp); 9033 return (NULL); 9034 } 9035 nmp->b_cont = mp->b_cont; 9036 mp->b_cont = nmp; 9037 nmp->b_rptr += hdr_length; 9038 } 9039 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 9040 9041 ip6h = (ip6_t *)mp->b_rptr; 9042 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 9043 9044 /* Restore original IP length in header. */ 9045 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 9046 /* Record the ECN info. */ 9047 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 9048 ip6h->ip6_vcf |= htonl(ecn_info << 20); 9049 9050 /* Reassembly is successful; return checksum information if needed */ 9051 if (cksum_val != NULL) 9052 *cksum_val = sum_val; 9053 if (cksum_flags != NULL) 9054 *cksum_flags = sum_flags; 9055 9056 return (mp); 9057 } 9058 9059 /* 9060 * Walk through the options to see if there is a routing header. 9061 * If present get the destination which is the last address of 9062 * the option. 9063 */ 9064 in6_addr_t 9065 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 9066 { 9067 uint8_t nexthdr; 9068 uint8_t *whereptr; 9069 ip6_hbh_t *hbhhdr; 9070 ip6_dest_t *dsthdr; 9071 ip6_rthdr0_t *rthdr; 9072 ip6_frag_t *fraghdr; 9073 int ehdrlen; 9074 int left; 9075 in6_addr_t *ap, rv; 9076 9077 if (is_fragment != NULL) 9078 *is_fragment = B_FALSE; 9079 9080 rv = ip6h->ip6_dst; 9081 9082 nexthdr = ip6h->ip6_nxt; 9083 whereptr = (uint8_t *)&ip6h[1]; 9084 for (;;) { 9085 9086 ASSERT(nexthdr != IPPROTO_RAW); 9087 switch (nexthdr) { 9088 case IPPROTO_HOPOPTS: 9089 hbhhdr = (ip6_hbh_t *)whereptr; 9090 nexthdr = hbhhdr->ip6h_nxt; 9091 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 9092 break; 9093 case IPPROTO_DSTOPTS: 9094 dsthdr = (ip6_dest_t *)whereptr; 9095 nexthdr = dsthdr->ip6d_nxt; 9096 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 9097 break; 9098 case IPPROTO_ROUTING: 9099 rthdr = (ip6_rthdr0_t *)whereptr; 9100 nexthdr = rthdr->ip6r0_nxt; 9101 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9102 9103 left = rthdr->ip6r0_segleft; 9104 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9105 rv = *(ap + left - 1); 9106 /* 9107 * If the caller doesn't care whether the packet 9108 * is a fragment or not, we can stop here since 9109 * we have our destination. 9110 */ 9111 if (is_fragment == NULL) 9112 goto done; 9113 break; 9114 case IPPROTO_FRAGMENT: 9115 fraghdr = (ip6_frag_t *)whereptr; 9116 nexthdr = fraghdr->ip6f_nxt; 9117 ehdrlen = sizeof (ip6_frag_t); 9118 if (is_fragment != NULL) 9119 *is_fragment = B_TRUE; 9120 goto done; 9121 default : 9122 goto done; 9123 } 9124 whereptr += ehdrlen; 9125 } 9126 9127 done: 9128 return (rv); 9129 } 9130 9131 /* 9132 * ip_source_routed_v6: 9133 * This function is called by redirect code in ip_rput_data_v6 to 9134 * know whether this packet is source routed through this node i.e 9135 * whether this node (router) is part of the journey. This 9136 * function is called under two cases : 9137 * 9138 * case 1 : Routing header was processed by this node and 9139 * ip_process_rthdr replaced ip6_dst with the next hop 9140 * and we are forwarding the packet to the next hop. 9141 * 9142 * case 2 : Routing header was not processed by this node and we 9143 * are just forwarding the packet. 9144 * 9145 * For case (1) we don't want to send redirects. For case(2) we 9146 * want to send redirects. 9147 */ 9148 static boolean_t 9149 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 9150 { 9151 uint8_t nexthdr; 9152 in6_addr_t *addrptr; 9153 ip6_rthdr0_t *rthdr; 9154 uint8_t numaddr; 9155 ip6_hbh_t *hbhhdr; 9156 uint_t ehdrlen; 9157 uint8_t *byteptr; 9158 9159 ip2dbg(("ip_source_routed_v6\n")); 9160 nexthdr = ip6h->ip6_nxt; 9161 ehdrlen = IPV6_HDR_LEN; 9162 9163 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9164 while (nexthdr == IPPROTO_HOPOPTS || 9165 nexthdr == IPPROTO_DSTOPTS) { 9166 byteptr = (uint8_t *)ip6h + ehdrlen; 9167 /* 9168 * Check if we have already processed 9169 * packets or we are just a forwarding 9170 * router which only pulled up msgs up 9171 * to IPV6HDR and one HBH ext header 9172 */ 9173 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9174 ip2dbg(("ip_source_routed_v6: Extension" 9175 " headers not processed\n")); 9176 return (B_FALSE); 9177 } 9178 hbhhdr = (ip6_hbh_t *)byteptr; 9179 nexthdr = hbhhdr->ip6h_nxt; 9180 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9181 } 9182 switch (nexthdr) { 9183 case IPPROTO_ROUTING: 9184 byteptr = (uint8_t *)ip6h + ehdrlen; 9185 /* 9186 * If for some reason, we haven't pulled up 9187 * the routing hdr data mblk, then we must 9188 * not have processed it at all. So for sure 9189 * we are not part of the source routed journey. 9190 */ 9191 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9192 ip2dbg(("ip_source_routed_v6: Routing" 9193 " header not processed\n")); 9194 return (B_FALSE); 9195 } 9196 rthdr = (ip6_rthdr0_t *)byteptr; 9197 /* 9198 * Either we are an intermediate router or the 9199 * last hop before destination and we have 9200 * already processed the routing header. 9201 * If segment_left is greater than or equal to zero, 9202 * then we must be the (numaddr - segleft) entry 9203 * of the routing header. Although ip6r0_segleft 9204 * is a unit8_t variable, we still check for zero 9205 * or greater value, if in case the data type 9206 * is changed someday in future. 9207 */ 9208 if (rthdr->ip6r0_segleft > 0 || 9209 rthdr->ip6r0_segleft == 0) { 9210 ire_t *ire = NULL; 9211 9212 numaddr = rthdr->ip6r0_len / 2; 9213 addrptr = (in6_addr_t *)((char *)rthdr + 9214 sizeof (*rthdr)); 9215 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9216 if (addrptr != NULL) { 9217 ire = ire_ctable_lookup_v6(addrptr, NULL, 9218 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9219 MATCH_IRE_TYPE, 9220 ipst); 9221 if (ire != NULL) { 9222 ire_refrele(ire); 9223 return (B_TRUE); 9224 } 9225 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9226 } 9227 } 9228 /* FALLTHRU */ 9229 default: 9230 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9231 return (B_FALSE); 9232 } 9233 } 9234 9235 /* 9236 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9237 * Assumes that the following set of headers appear in the first 9238 * mblk: 9239 * ip6i_t (if present) CAN also appear as a separate mblk. 9240 * ip6_t 9241 * Any extension headers 9242 * TCP/UDP/SCTP header (if present) 9243 * The routine can handle an ICMPv6 header that is not in the first mblk. 9244 * 9245 * The order to determine the outgoing interface is as follows: 9246 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9247 * 2. If conn_nofailover_ill is set then use that ill. 9248 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9249 * 4. If q is an ill queue and (link local or multicast destination) then 9250 * use that ill. 9251 * 5. If IPV6_BOUND_IF has been set use that ill. 9252 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9253 * look for the best IRE match for the unspecified group to determine 9254 * the ill. 9255 * 7. For unicast: Just do an IRE lookup for the best match. 9256 * 9257 * arg2 is always a queue_t *. 9258 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9259 * the zoneid. 9260 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9261 */ 9262 void 9263 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9264 { 9265 conn_t *connp = NULL; 9266 queue_t *q = (queue_t *)arg2; 9267 ire_t *ire = NULL; 9268 ire_t *sctp_ire = NULL; 9269 ip6_t *ip6h; 9270 in6_addr_t *v6dstp; 9271 ill_t *ill = NULL; 9272 ipif_t *ipif; 9273 ip6i_t *ip6i; 9274 int cksum_request; /* -1 => normal. */ 9275 /* 1 => Skip TCP/UDP/SCTP checksum */ 9276 /* Otherwise contains insert offset for checksum */ 9277 int unspec_src; 9278 boolean_t do_outrequests; /* Increment OutRequests? */ 9279 mib2_ipIfStatsEntry_t *mibptr; 9280 int match_flags = MATCH_IRE_ILL_GROUP; 9281 boolean_t attach_if = B_FALSE; 9282 mblk_t *first_mp; 9283 boolean_t mctl_present; 9284 ipsec_out_t *io; 9285 boolean_t drop_if_delayed = B_FALSE; 9286 boolean_t multirt_need_resolve = B_FALSE; 9287 mblk_t *copy_mp = NULL; 9288 int err; 9289 int ip6i_flags = 0; 9290 zoneid_t zoneid; 9291 ill_t *saved_ill = NULL; 9292 boolean_t conn_lock_held; 9293 boolean_t need_decref = B_FALSE; 9294 ip_stack_t *ipst; 9295 9296 if (q->q_next != NULL) { 9297 ill = (ill_t *)q->q_ptr; 9298 ipst = ill->ill_ipst; 9299 } else { 9300 connp = (conn_t *)arg; 9301 ASSERT(connp != NULL); 9302 ipst = connp->conn_netstack->netstack_ip; 9303 } 9304 9305 /* 9306 * Highest bit in version field is Reachability Confirmation bit 9307 * used by NUD in ip_xmit_v6(). 9308 */ 9309 #ifdef _BIG_ENDIAN 9310 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9311 #else 9312 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9313 #endif 9314 9315 /* 9316 * M_CTL comes from 6 places 9317 * 9318 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9319 * both V4 and V6 datagrams. 9320 * 9321 * 2) AH/ESP sends down M_CTL after doing their job with both 9322 * V4 and V6 datagrams. 9323 * 9324 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9325 * attached. 9326 * 9327 * 4) Notifications from an external resolver (for XRESOLV ifs) 9328 * 9329 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9330 * IPsec hardware acceleration support. 9331 * 9332 * 6) TUN_HELLO. 9333 * 9334 * We need to handle (1)'s IPv6 case and (3) here. For the 9335 * IPv4 case in (1), and (2), IPSEC processing has already 9336 * started. The code in ip_wput() already knows how to handle 9337 * continuing IPSEC processing (for IPv4 and IPv6). All other 9338 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9339 * for handling. 9340 */ 9341 first_mp = mp; 9342 mctl_present = B_FALSE; 9343 io = NULL; 9344 9345 /* Multidata transmit? */ 9346 if (DB_TYPE(mp) == M_MULTIDATA) { 9347 /* 9348 * We should never get here, since all Multidata messages 9349 * originating from tcp should have been directed over to 9350 * tcp_multisend() in the first place. 9351 */ 9352 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9353 freemsg(mp); 9354 return; 9355 } else if (DB_TYPE(mp) == M_CTL) { 9356 uint32_t mctltype = 0; 9357 uint32_t mlen = MBLKL(first_mp); 9358 9359 mp = mp->b_cont; 9360 mctl_present = B_TRUE; 9361 io = (ipsec_out_t *)first_mp->b_rptr; 9362 9363 /* 9364 * Validate this M_CTL message. The only three types of 9365 * M_CTL messages we expect to see in this code path are 9366 * ipsec_out_t or ipsec_in_t structures (allocated as 9367 * ipsec_info_t unions), or ipsec_ctl_t structures. 9368 * The ipsec_out_type and ipsec_in_type overlap in the two 9369 * data structures, and they are either set to IPSEC_OUT 9370 * or IPSEC_IN depending on which data structure it is. 9371 * ipsec_ctl_t is an IPSEC_CTL. 9372 * 9373 * All other M_CTL messages are sent to ip_wput_nondata() 9374 * for handling. 9375 */ 9376 if (mlen >= sizeof (io->ipsec_out_type)) 9377 mctltype = io->ipsec_out_type; 9378 9379 if ((mlen == sizeof (ipsec_ctl_t)) && 9380 (mctltype == IPSEC_CTL)) { 9381 ip_output(arg, first_mp, arg2, caller); 9382 return; 9383 } 9384 9385 if ((mlen < sizeof (ipsec_info_t)) || 9386 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9387 mp == NULL) { 9388 ip_wput_nondata(NULL, q, first_mp, NULL); 9389 return; 9390 } 9391 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9392 if (q->q_next == NULL) { 9393 ip6h = (ip6_t *)mp->b_rptr; 9394 /* 9395 * For a freshly-generated TCP dgram that needs IPV6 9396 * processing, don't call ip_wput immediately. We can 9397 * tell this by the ipsec_out_proc_begin. In-progress 9398 * IPSEC_OUT messages have proc_begin set to TRUE, 9399 * and we want to send all IPSEC_IN messages to 9400 * ip_wput() for IPsec processing or finishing. 9401 */ 9402 if (mctltype == IPSEC_IN || 9403 IPVER(ip6h) != IPV6_VERSION || 9404 io->ipsec_out_proc_begin) { 9405 mibptr = &ipst->ips_ip6_mib; 9406 goto notv6; 9407 } 9408 } 9409 } else if (DB_TYPE(mp) != M_DATA) { 9410 ip_wput_nondata(NULL, q, mp, NULL); 9411 return; 9412 } 9413 9414 ip6h = (ip6_t *)mp->b_rptr; 9415 9416 if (IPVER(ip6h) != IPV6_VERSION) { 9417 mibptr = &ipst->ips_ip6_mib; 9418 goto notv6; 9419 } 9420 9421 if (q->q_next != NULL) { 9422 /* 9423 * We don't know if this ill will be used for IPv6 9424 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9425 * ipif_set_values() sets the ill_isv6 flag to true if 9426 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9427 * just drop the packet. 9428 */ 9429 if (!ill->ill_isv6) { 9430 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9431 "ILLF_IPV6 was set\n")); 9432 freemsg(first_mp); 9433 return; 9434 } 9435 /* For uniformity do a refhold */ 9436 mutex_enter(&ill->ill_lock); 9437 if (!ILL_CAN_LOOKUP(ill)) { 9438 mutex_exit(&ill->ill_lock); 9439 freemsg(first_mp); 9440 return; 9441 } 9442 ill_refhold_locked(ill); 9443 mutex_exit(&ill->ill_lock); 9444 mibptr = ill->ill_ip_mib; 9445 9446 ASSERT(mibptr != NULL); 9447 unspec_src = 0; 9448 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9449 do_outrequests = B_FALSE; 9450 zoneid = (zoneid_t)(uintptr_t)arg; 9451 } else { 9452 connp = (conn_t *)arg; 9453 ASSERT(connp != NULL); 9454 zoneid = connp->conn_zoneid; 9455 9456 /* is queue flow controlled? */ 9457 if ((q->q_first || connp->conn_draining) && 9458 (caller == IP_WPUT)) { 9459 /* 9460 * 1) TCP sends down M_CTL for detached connections. 9461 * 2) AH/ESP sends down M_CTL. 9462 * 9463 * We don't flow control either of the above. Only 9464 * UDP and others are flow controlled for which we 9465 * can't have a M_CTL. 9466 */ 9467 ASSERT(first_mp == mp); 9468 (void) putq(q, mp); 9469 return; 9470 } 9471 mibptr = &ipst->ips_ip6_mib; 9472 unspec_src = connp->conn_unspec_src; 9473 do_outrequests = B_TRUE; 9474 if (mp->b_flag & MSGHASREF) { 9475 mp->b_flag &= ~MSGHASREF; 9476 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9477 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9478 need_decref = B_TRUE; 9479 } 9480 9481 /* 9482 * If there is a policy, try to attach an ipsec_out in 9483 * the front. At the end, first_mp either points to a 9484 * M_DATA message or IPSEC_OUT message linked to a 9485 * M_DATA message. We have to do it now as we might 9486 * lose the "conn" if we go through ip_newroute. 9487 */ 9488 if (!mctl_present && 9489 (connp->conn_out_enforce_policy || 9490 connp->conn_latch != NULL)) { 9491 ASSERT(first_mp == mp); 9492 /* XXX Any better way to get the protocol fast ? */ 9493 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9494 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9495 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9496 if (need_decref) 9497 CONN_DEC_REF(connp); 9498 return; 9499 } else { 9500 ASSERT(mp->b_datap->db_type == M_CTL); 9501 first_mp = mp; 9502 mp = mp->b_cont; 9503 mctl_present = B_TRUE; 9504 io = (ipsec_out_t *)first_mp->b_rptr; 9505 } 9506 } 9507 } 9508 9509 /* check for alignment and full IPv6 header */ 9510 if (!OK_32PTR((uchar_t *)ip6h) || 9511 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9512 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9513 if (do_outrequests) 9514 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9515 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9516 freemsg(first_mp); 9517 if (ill != NULL) 9518 ill_refrele(ill); 9519 if (need_decref) 9520 CONN_DEC_REF(connp); 9521 return; 9522 } 9523 v6dstp = &ip6h->ip6_dst; 9524 cksum_request = -1; 9525 ip6i = NULL; 9526 9527 /* 9528 * Once neighbor discovery has completed, ndp_process() will provide 9529 * locally generated packets for which processing can be reattempted. 9530 * In these cases, connp is NULL and the original zone is part of a 9531 * prepended ipsec_out_t. 9532 */ 9533 if (io != NULL) { 9534 /* 9535 * When coming from icmp_input_v6, the zoneid might not match 9536 * for the loopback case, because inside icmp_input_v6 the 9537 * queue_t is a conn queue from the sending side. 9538 */ 9539 zoneid = io->ipsec_out_zoneid; 9540 ASSERT(zoneid != ALL_ZONES); 9541 } 9542 9543 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9544 /* 9545 * This is an ip6i_t header followed by an ip6_hdr. 9546 * Check which fields are set. 9547 * 9548 * When the packet comes from a transport we should have 9549 * all needed headers in the first mblk. However, when 9550 * going through ip_newroute*_v6 the ip6i might be in 9551 * a separate mblk when we return here. In that case 9552 * we pullup everything to ensure that extension and transport 9553 * headers "stay" in the first mblk. 9554 */ 9555 ip6i = (ip6i_t *)ip6h; 9556 ip6i_flags = ip6i->ip6i_flags; 9557 9558 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9559 ((mp->b_wptr - (uchar_t *)ip6i) >= 9560 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9561 9562 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9563 if (!pullupmsg(mp, -1)) { 9564 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9565 if (do_outrequests) { 9566 BUMP_MIB(mibptr, 9567 ipIfStatsHCOutRequests); 9568 } 9569 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9570 freemsg(first_mp); 9571 if (ill != NULL) 9572 ill_refrele(ill); 9573 if (need_decref) 9574 CONN_DEC_REF(connp); 9575 return; 9576 } 9577 ip6h = (ip6_t *)mp->b_rptr; 9578 v6dstp = &ip6h->ip6_dst; 9579 ip6i = (ip6i_t *)ip6h; 9580 } 9581 ip6h = (ip6_t *)&ip6i[1]; 9582 9583 /* 9584 * Advance rptr past the ip6i_t to get ready for 9585 * transmitting the packet. However, if the packet gets 9586 * passed to ip_newroute*_v6 then rptr is moved back so 9587 * that the ip6i_t header can be inspected when the 9588 * packet comes back here after passing through 9589 * ire_add_then_send. 9590 */ 9591 mp->b_rptr = (uchar_t *)ip6h; 9592 9593 /* 9594 * IP6I_ATTACH_IF is set in this function when we had a 9595 * conn and it was either bound to the IPFF_NOFAILOVER address 9596 * or IPV6_BOUND_PIF was set. These options override other 9597 * options that set the ifindex. We come here with 9598 * IP6I_ATTACH_IF set when we can't find the ire and 9599 * ip_newroute_v6 is feeding the packet for second time. 9600 */ 9601 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9602 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9603 ASSERT(ip6i->ip6i_ifindex != 0); 9604 if (ill != NULL) 9605 ill_refrele(ill); 9606 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9607 NULL, NULL, NULL, NULL, ipst); 9608 if (ill == NULL) { 9609 if (do_outrequests) { 9610 BUMP_MIB(mibptr, 9611 ipIfStatsHCOutRequests); 9612 } 9613 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9614 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9615 ip6i->ip6i_ifindex)); 9616 if (need_decref) 9617 CONN_DEC_REF(connp); 9618 freemsg(first_mp); 9619 return; 9620 } 9621 mibptr = ill->ill_ip_mib; 9622 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9623 /* 9624 * Preserve the index so that when we return 9625 * from IPSEC processing, we know where to 9626 * send the packet. 9627 */ 9628 if (mctl_present) { 9629 ASSERT(io != NULL); 9630 io->ipsec_out_ill_index = 9631 ip6i->ip6i_ifindex; 9632 } 9633 } 9634 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9635 /* 9636 * This is a multipathing probe packet that has 9637 * been delayed in ND resolution. Drop the 9638 * packet for the reasons mentioned in 9639 * nce_queue_mp() 9640 */ 9641 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9642 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9643 freemsg(first_mp); 9644 ill_refrele(ill); 9645 if (need_decref) 9646 CONN_DEC_REF(connp); 9647 return; 9648 } 9649 } 9650 } 9651 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9652 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9653 9654 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9655 if (secpolicy_net_rawaccess(cr) != 0) { 9656 /* 9657 * Use IPCL_ZONEID to honor SO_ALLZONES. 9658 */ 9659 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9660 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9661 NULL, connp != NULL ? 9662 IPCL_ZONEID(connp) : zoneid, NULL, 9663 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9664 if (ire == NULL) { 9665 if (do_outrequests) 9666 BUMP_MIB(mibptr, 9667 ipIfStatsHCOutRequests); 9668 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9669 ip1dbg(("ip_wput_v6: bad source " 9670 "addr\n")); 9671 freemsg(first_mp); 9672 if (ill != NULL) 9673 ill_refrele(ill); 9674 if (need_decref) 9675 CONN_DEC_REF(connp); 9676 return; 9677 } 9678 ire_refrele(ire); 9679 } 9680 /* No need to verify again when using ip_newroute */ 9681 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9682 } 9683 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9684 /* 9685 * Make sure they match since ip_newroute*_v6 etc might 9686 * (unknown to them) inspect ip6i_nexthop when 9687 * they think they access ip6_dst. 9688 */ 9689 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9690 } 9691 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9692 cksum_request = 1; 9693 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9694 cksum_request = ip6i->ip6i_checksum_off; 9695 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9696 unspec_src = 1; 9697 9698 if (do_outrequests && ill != NULL) { 9699 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9700 do_outrequests = B_FALSE; 9701 } 9702 /* 9703 * Store ip6i_t info that we need after we come back 9704 * from IPSEC processing. 9705 */ 9706 if (mctl_present) { 9707 ASSERT(io != NULL); 9708 io->ipsec_out_unspec_src = unspec_src; 9709 } 9710 } 9711 if (connp != NULL && connp->conn_dontroute) 9712 ip6h->ip6_hops = 1; 9713 9714 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9715 goto ipv6multicast; 9716 9717 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9718 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9719 ill_t *conn_outgoing_pill; 9720 9721 conn_outgoing_pill = conn_get_held_ill(connp, 9722 &connp->conn_outgoing_pill, &err); 9723 if (err == ILL_LOOKUP_FAILED) { 9724 if (ill != NULL) 9725 ill_refrele(ill); 9726 if (need_decref) 9727 CONN_DEC_REF(connp); 9728 freemsg(first_mp); 9729 return; 9730 } 9731 if (conn_outgoing_pill != NULL) { 9732 if (ill != NULL) 9733 ill_refrele(ill); 9734 ill = conn_outgoing_pill; 9735 attach_if = B_TRUE; 9736 match_flags = MATCH_IRE_ILL; 9737 mibptr = ill->ill_ip_mib; 9738 9739 /* 9740 * Check if we need an ire that will not be 9741 * looked up by anybody else i.e. HIDDEN. 9742 */ 9743 if (ill_is_probeonly(ill)) 9744 match_flags |= MATCH_IRE_MARK_HIDDEN; 9745 goto send_from_ill; 9746 } 9747 } 9748 9749 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9750 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9751 ill_t *conn_nofailover_ill; 9752 9753 conn_nofailover_ill = conn_get_held_ill(connp, 9754 &connp->conn_nofailover_ill, &err); 9755 if (err == ILL_LOOKUP_FAILED) { 9756 if (ill != NULL) 9757 ill_refrele(ill); 9758 if (need_decref) 9759 CONN_DEC_REF(connp); 9760 freemsg(first_mp); 9761 return; 9762 } 9763 if (conn_nofailover_ill != NULL) { 9764 if (ill != NULL) 9765 ill_refrele(ill); 9766 ill = conn_nofailover_ill; 9767 attach_if = B_TRUE; 9768 /* 9769 * Assumes that ipc_nofailover_ill is used only for 9770 * multipathing probe packets. These packets are better 9771 * dropped, if they are delayed in ND resolution, for 9772 * the reasons described in nce_queue_mp(). 9773 * IP6I_DROP_IFDELAYED will be set later on in this 9774 * function for this packet. 9775 */ 9776 drop_if_delayed = B_TRUE; 9777 match_flags = MATCH_IRE_ILL; 9778 mibptr = ill->ill_ip_mib; 9779 9780 /* 9781 * Check if we need an ire that will not be 9782 * looked up by anybody else i.e. HIDDEN. 9783 */ 9784 if (ill_is_probeonly(ill)) 9785 match_flags |= MATCH_IRE_MARK_HIDDEN; 9786 goto send_from_ill; 9787 } 9788 } 9789 9790 /* 9791 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9792 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9793 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9794 */ 9795 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9796 ASSERT(ip6i->ip6i_ifindex != 0); 9797 attach_if = B_TRUE; 9798 ASSERT(ill != NULL); 9799 match_flags = MATCH_IRE_ILL; 9800 9801 /* 9802 * Check if we need an ire that will not be 9803 * looked up by anybody else i.e. HIDDEN. 9804 */ 9805 if (ill_is_probeonly(ill)) 9806 match_flags |= MATCH_IRE_MARK_HIDDEN; 9807 goto send_from_ill; 9808 } 9809 9810 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9811 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9812 ASSERT(ill != NULL); 9813 goto send_from_ill; 9814 } 9815 9816 /* 9817 * 4. If q is an ill queue and (link local or multicast destination) 9818 * then use that ill. 9819 */ 9820 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9821 goto send_from_ill; 9822 } 9823 9824 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9825 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9826 ill_t *conn_outgoing_ill; 9827 9828 conn_outgoing_ill = conn_get_held_ill(connp, 9829 &connp->conn_outgoing_ill, &err); 9830 if (err == ILL_LOOKUP_FAILED) { 9831 if (ill != NULL) 9832 ill_refrele(ill); 9833 if (need_decref) 9834 CONN_DEC_REF(connp); 9835 freemsg(first_mp); 9836 return; 9837 } 9838 if (ill != NULL) 9839 ill_refrele(ill); 9840 ill = conn_outgoing_ill; 9841 mibptr = ill->ill_ip_mib; 9842 goto send_from_ill; 9843 } 9844 9845 /* 9846 * 6. For unicast: Just do an IRE lookup for the best match. 9847 * If we get here for a link-local address it is rather random 9848 * what interface we pick on a multihomed host. 9849 * *If* there is an IRE_CACHE (and the link-local address 9850 * isn't duplicated on multi links) this will find the IRE_CACHE. 9851 * Otherwise it will use one of the matching IRE_INTERFACE routes 9852 * for the link-local prefix. Hence, applications 9853 * *should* be encouraged to specify an outgoing interface when sending 9854 * to a link local address. 9855 */ 9856 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9857 !connp->conn_fully_bound)) { 9858 /* 9859 * We cache IRE_CACHEs to avoid lookups. We don't do 9860 * this for the tcp global queue and listen end point 9861 * as it does not really have a real destination to 9862 * talk to. 9863 */ 9864 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp), 9865 ipst); 9866 } else { 9867 /* 9868 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9869 * grab a lock here to check for CONDEMNED as it is okay 9870 * to send a packet or two with the IRE_CACHE that is going 9871 * away. 9872 */ 9873 mutex_enter(&connp->conn_lock); 9874 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9875 if (ire != NULL && 9876 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9877 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9878 9879 IRE_REFHOLD(ire); 9880 mutex_exit(&connp->conn_lock); 9881 9882 } else { 9883 boolean_t cached = B_FALSE; 9884 9885 connp->conn_ire_cache = NULL; 9886 mutex_exit(&connp->conn_lock); 9887 /* Release the old ire */ 9888 if (ire != NULL && sctp_ire == NULL) 9889 IRE_REFRELE_NOTR(ire); 9890 9891 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9892 MBLK_GETLABEL(mp), ipst); 9893 if (ire != NULL) { 9894 IRE_REFHOLD_NOTR(ire); 9895 9896 mutex_enter(&connp->conn_lock); 9897 if (CONN_CACHE_IRE(connp) && 9898 (connp->conn_ire_cache == NULL)) { 9899 rw_enter(&ire->ire_bucket->irb_lock, 9900 RW_READER); 9901 if (!(ire->ire_marks & 9902 IRE_MARK_CONDEMNED)) { 9903 connp->conn_ire_cache = ire; 9904 cached = B_TRUE; 9905 } 9906 rw_exit(&ire->ire_bucket->irb_lock); 9907 } 9908 mutex_exit(&connp->conn_lock); 9909 9910 /* 9911 * We can continue to use the ire but since it 9912 * was not cached, we should drop the extra 9913 * reference. 9914 */ 9915 if (!cached) 9916 IRE_REFRELE_NOTR(ire); 9917 } 9918 } 9919 } 9920 9921 if (ire != NULL) { 9922 if (do_outrequests) { 9923 /* Handle IRE_LOCAL's that might appear here */ 9924 if (ire->ire_type == IRE_CACHE) { 9925 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9926 ill_ip_mib; 9927 } else { 9928 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9929 } 9930 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9931 } 9932 ASSERT(!attach_if); 9933 9934 /* 9935 * Check if the ire has the RTF_MULTIRT flag, inherited 9936 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9937 */ 9938 if (ire->ire_flags & RTF_MULTIRT) { 9939 /* 9940 * Force hop limit of multirouted packets if required. 9941 * The hop limit of such packets is bounded by the 9942 * ip_multirt_ttl ndd variable. 9943 * NDP packets must have a hop limit of 255; don't 9944 * change the hop limit in that case. 9945 */ 9946 if ((ipst->ips_ip_multirt_ttl > 0) && 9947 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9948 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9949 if (ip_debug > 3) { 9950 ip2dbg(("ip_wput_v6: forcing multirt " 9951 "hop limit to %d (was %d) ", 9952 ipst->ips_ip_multirt_ttl, 9953 ip6h->ip6_hops)); 9954 pr_addr_dbg("v6dst %s\n", AF_INET6, 9955 &ire->ire_addr_v6); 9956 } 9957 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9958 } 9959 9960 /* 9961 * We look at this point if there are pending 9962 * unresolved routes. ire_multirt_need_resolve_v6() 9963 * checks in O(n) that all IRE_OFFSUBNET ire 9964 * entries for the packet's destination and 9965 * flagged RTF_MULTIRT are currently resolved. 9966 * If some remain unresolved, we do a copy 9967 * of the current message. It will be used 9968 * to initiate additional route resolutions. 9969 */ 9970 multirt_need_resolve = 9971 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9972 MBLK_GETLABEL(first_mp), ipst); 9973 ip2dbg(("ip_wput_v6: ire %p, " 9974 "multirt_need_resolve %d, first_mp %p\n", 9975 (void *)ire, multirt_need_resolve, 9976 (void *)first_mp)); 9977 if (multirt_need_resolve) { 9978 copy_mp = copymsg(first_mp); 9979 if (copy_mp != NULL) { 9980 MULTIRT_DEBUG_TAG(copy_mp); 9981 } 9982 } 9983 } 9984 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9985 connp, caller, 0, ip6i_flags, zoneid); 9986 if (need_decref) { 9987 CONN_DEC_REF(connp); 9988 connp = NULL; 9989 } 9990 IRE_REFRELE(ire); 9991 9992 /* 9993 * Try to resolve another multiroute if 9994 * ire_multirt_need_resolve_v6() deemed it necessary. 9995 * copy_mp will be consumed (sent or freed) by 9996 * ip_newroute_v6(). 9997 */ 9998 if (copy_mp != NULL) { 9999 if (mctl_present) { 10000 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10001 } else { 10002 ip6h = (ip6_t *)copy_mp->b_rptr; 10003 } 10004 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10005 &ip6h->ip6_src, NULL, zoneid, ipst); 10006 } 10007 if (ill != NULL) 10008 ill_refrele(ill); 10009 return; 10010 } 10011 10012 /* 10013 * No full IRE for this destination. Send it to 10014 * ip_newroute_v6 to see if anything else matches. 10015 * Mark this packet as having originated on this 10016 * machine. 10017 * Update rptr if there was an ip6i_t header. 10018 */ 10019 mp->b_prev = NULL; 10020 mp->b_next = NULL; 10021 if (ip6i != NULL) 10022 mp->b_rptr -= sizeof (ip6i_t); 10023 10024 if (unspec_src) { 10025 if (ip6i == NULL) { 10026 /* 10027 * Add ip6i_t header to carry unspec_src 10028 * until the packet comes back in ip_wput_v6. 10029 */ 10030 mp = ip_add_info_v6(mp, NULL, v6dstp); 10031 if (mp == NULL) { 10032 if (do_outrequests) 10033 BUMP_MIB(mibptr, 10034 ipIfStatsHCOutRequests); 10035 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10036 if (mctl_present) 10037 freeb(first_mp); 10038 if (ill != NULL) 10039 ill_refrele(ill); 10040 if (need_decref) 10041 CONN_DEC_REF(connp); 10042 return; 10043 } 10044 ip6i = (ip6i_t *)mp->b_rptr; 10045 10046 if (mctl_present) { 10047 ASSERT(first_mp != mp); 10048 first_mp->b_cont = mp; 10049 } else { 10050 first_mp = mp; 10051 } 10052 10053 if ((mp->b_wptr - (uchar_t *)ip6i) == 10054 sizeof (ip6i_t)) { 10055 /* 10056 * ndp_resolver called from ip_newroute_v6 10057 * expects pulled up message. 10058 */ 10059 if (!pullupmsg(mp, -1)) { 10060 ip1dbg(("ip_wput_v6: pullupmsg" 10061 " failed\n")); 10062 if (do_outrequests) { 10063 BUMP_MIB(mibptr, 10064 ipIfStatsHCOutRequests); 10065 } 10066 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10067 freemsg(first_mp); 10068 if (ill != NULL) 10069 ill_refrele(ill); 10070 if (need_decref) 10071 CONN_DEC_REF(connp); 10072 return; 10073 } 10074 ip6i = (ip6i_t *)mp->b_rptr; 10075 } 10076 ip6h = (ip6_t *)&ip6i[1]; 10077 v6dstp = &ip6h->ip6_dst; 10078 } 10079 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10080 if (mctl_present) { 10081 ASSERT(io != NULL); 10082 io->ipsec_out_unspec_src = unspec_src; 10083 } 10084 } 10085 if (do_outrequests) 10086 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10087 if (need_decref) 10088 CONN_DEC_REF(connp); 10089 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 10090 if (ill != NULL) 10091 ill_refrele(ill); 10092 return; 10093 10094 10095 /* 10096 * Handle multicast packets with or without an conn. 10097 * Assumes that the transports set ip6_hops taking 10098 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10099 * into account. 10100 */ 10101 ipv6multicast: 10102 ip2dbg(("ip_wput_v6: multicast\n")); 10103 10104 /* 10105 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10106 * 2. If conn_nofailover_ill is set then use that ill. 10107 * 10108 * Hold the conn_lock till we refhold the ill of interest that is 10109 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10110 * while holding any locks, postpone the refrele until after the 10111 * conn_lock is dropped. 10112 */ 10113 if (connp != NULL) { 10114 mutex_enter(&connp->conn_lock); 10115 conn_lock_held = B_TRUE; 10116 } else { 10117 conn_lock_held = B_FALSE; 10118 } 10119 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10120 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10121 if (err == ILL_LOOKUP_FAILED) { 10122 ip1dbg(("ip_output_v6: multicast" 10123 " conn_outgoing_pill no ipif\n")); 10124 multicast_discard: 10125 ASSERT(saved_ill == NULL); 10126 if (conn_lock_held) 10127 mutex_exit(&connp->conn_lock); 10128 if (ill != NULL) 10129 ill_refrele(ill); 10130 freemsg(first_mp); 10131 if (do_outrequests) 10132 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10133 if (need_decref) 10134 CONN_DEC_REF(connp); 10135 return; 10136 } 10137 saved_ill = ill; 10138 ill = connp->conn_outgoing_pill; 10139 attach_if = B_TRUE; 10140 match_flags = MATCH_IRE_ILL; 10141 mibptr = ill->ill_ip_mib; 10142 10143 /* 10144 * Check if we need an ire that will not be 10145 * looked up by anybody else i.e. HIDDEN. 10146 */ 10147 if (ill_is_probeonly(ill)) 10148 match_flags |= MATCH_IRE_MARK_HIDDEN; 10149 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10150 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10151 if (err == ILL_LOOKUP_FAILED) { 10152 ip1dbg(("ip_output_v6: multicast" 10153 " conn_nofailover_ill no ipif\n")); 10154 goto multicast_discard; 10155 } 10156 saved_ill = ill; 10157 ill = connp->conn_nofailover_ill; 10158 attach_if = B_TRUE; 10159 match_flags = MATCH_IRE_ILL; 10160 10161 /* 10162 * Check if we need an ire that will not be 10163 * looked up by anybody else i.e. HIDDEN. 10164 */ 10165 if (ill_is_probeonly(ill)) 10166 match_flags |= MATCH_IRE_MARK_HIDDEN; 10167 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10168 /* 10169 * Redo 1. If we did not find an IRE_CACHE the first time, 10170 * we should have an ip6i_t with IP6I_ATTACH_IF if 10171 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10172 * used on this endpoint. 10173 */ 10174 ASSERT(ip6i->ip6i_ifindex != 0); 10175 attach_if = B_TRUE; 10176 ASSERT(ill != NULL); 10177 match_flags = MATCH_IRE_ILL; 10178 10179 /* 10180 * Check if we need an ire that will not be 10181 * looked up by anybody else i.e. HIDDEN. 10182 */ 10183 if (ill_is_probeonly(ill)) 10184 match_flags |= MATCH_IRE_MARK_HIDDEN; 10185 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10186 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10187 10188 ASSERT(ill != NULL); 10189 } else if (ill != NULL) { 10190 /* 10191 * 4. If q is an ill queue and (link local or multicast 10192 * destination) then use that ill. 10193 * We don't need the ipif initialization here. 10194 * This useless assert below is just to prevent lint from 10195 * reporting a null body if statement. 10196 */ 10197 ASSERT(ill != NULL); 10198 } else if (connp != NULL) { 10199 /* 10200 * 5. If IPV6_BOUND_IF has been set use that ill. 10201 * 10202 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10203 * Otherwise look for the best IRE match for the unspecified 10204 * group to determine the ill. 10205 * 10206 * conn_multicast_ill is used for only IPv6 packets. 10207 * conn_multicast_ipif is used for only IPv4 packets. 10208 * Thus a PF_INET6 socket send both IPv4 and IPv6 10209 * multicast packets using different IP*_MULTICAST_IF 10210 * interfaces. 10211 */ 10212 if (connp->conn_outgoing_ill != NULL) { 10213 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10214 if (err == ILL_LOOKUP_FAILED) { 10215 ip1dbg(("ip_output_v6: multicast" 10216 " conn_outgoing_ill no ipif\n")); 10217 goto multicast_discard; 10218 } 10219 ill = connp->conn_outgoing_ill; 10220 } else if (connp->conn_multicast_ill != NULL) { 10221 err = ill_check_and_refhold(connp->conn_multicast_ill); 10222 if (err == ILL_LOOKUP_FAILED) { 10223 ip1dbg(("ip_output_v6: multicast" 10224 " conn_multicast_ill no ipif\n")); 10225 goto multicast_discard; 10226 } 10227 ill = connp->conn_multicast_ill; 10228 } else { 10229 mutex_exit(&connp->conn_lock); 10230 conn_lock_held = B_FALSE; 10231 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 10232 if (ipif == NULL) { 10233 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10234 goto multicast_discard; 10235 } 10236 /* 10237 * We have a ref to this ipif, so we can safely 10238 * access ipif_ill. 10239 */ 10240 ill = ipif->ipif_ill; 10241 mutex_enter(&ill->ill_lock); 10242 if (!ILL_CAN_LOOKUP(ill)) { 10243 mutex_exit(&ill->ill_lock); 10244 ipif_refrele(ipif); 10245 ill = NULL; 10246 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10247 goto multicast_discard; 10248 } 10249 ill_refhold_locked(ill); 10250 mutex_exit(&ill->ill_lock); 10251 ipif_refrele(ipif); 10252 /* 10253 * Save binding until IPV6_MULTICAST_IF 10254 * changes it 10255 */ 10256 mutex_enter(&connp->conn_lock); 10257 connp->conn_multicast_ill = ill; 10258 connp->conn_orig_multicast_ifindex = 10259 ill->ill_phyint->phyint_ifindex; 10260 mutex_exit(&connp->conn_lock); 10261 } 10262 } 10263 if (conn_lock_held) 10264 mutex_exit(&connp->conn_lock); 10265 10266 if (saved_ill != NULL) 10267 ill_refrele(saved_ill); 10268 10269 ASSERT(ill != NULL); 10270 /* 10271 * For multicast loopback interfaces replace the multicast address 10272 * with a unicast address for the ire lookup. 10273 */ 10274 if (IS_LOOPBACK(ill)) 10275 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10276 10277 mibptr = ill->ill_ip_mib; 10278 if (do_outrequests) { 10279 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10280 do_outrequests = B_FALSE; 10281 } 10282 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10283 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10284 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10285 10286 /* 10287 * As we may lose the conn by the time we reach ip_wput_ire_v6 10288 * we copy conn_multicast_loop and conn_dontroute on to an 10289 * ipsec_out. In case if this datagram goes out secure, 10290 * we need the ill_index also. Copy that also into the 10291 * ipsec_out. 10292 */ 10293 if (mctl_present) { 10294 io = (ipsec_out_t *)first_mp->b_rptr; 10295 ASSERT(first_mp->b_datap->db_type == M_CTL); 10296 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10297 } else { 10298 ASSERT(mp == first_mp); 10299 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 10300 NULL) { 10301 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10302 freemsg(mp); 10303 if (ill != NULL) 10304 ill_refrele(ill); 10305 if (need_decref) 10306 CONN_DEC_REF(connp); 10307 return; 10308 } 10309 io = (ipsec_out_t *)first_mp->b_rptr; 10310 /* This is not a secure packet */ 10311 io->ipsec_out_secure = B_FALSE; 10312 io->ipsec_out_use_global_policy = B_TRUE; 10313 io->ipsec_out_zoneid = 10314 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10315 first_mp->b_cont = mp; 10316 mctl_present = B_TRUE; 10317 } 10318 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10319 io->ipsec_out_unspec_src = unspec_src; 10320 if (connp != NULL) 10321 io->ipsec_out_dontroute = connp->conn_dontroute; 10322 10323 send_from_ill: 10324 ASSERT(ill != NULL); 10325 ASSERT(mibptr == ill->ill_ip_mib); 10326 if (do_outrequests) { 10327 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10328 do_outrequests = B_FALSE; 10329 } 10330 10331 if (io != NULL) 10332 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10333 10334 /* 10335 * When a specific ill is specified (using IPV6_PKTINFO, 10336 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10337 * on routing entries (ftable and ctable) that have a matching 10338 * ire->ire_ipif->ipif_ill. Thus this can only be used 10339 * for destinations that are on-link for the specific ill 10340 * and that can appear on multiple links. Thus it is useful 10341 * for multicast destinations, link-local destinations, and 10342 * at some point perhaps for site-local destinations (if the 10343 * node sits at a site boundary). 10344 * We create the cache entries in the regular ctable since 10345 * it can not "confuse" things for other destinations. 10346 * table. 10347 * 10348 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10349 * It is used only when ire_cache_lookup is used above. 10350 */ 10351 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10352 zoneid, MBLK_GETLABEL(mp), match_flags, ipst); 10353 if (ire != NULL) { 10354 /* 10355 * Check if the ire has the RTF_MULTIRT flag, inherited 10356 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10357 */ 10358 if (ire->ire_flags & RTF_MULTIRT) { 10359 /* 10360 * Force hop limit of multirouted packets if required. 10361 * The hop limit of such packets is bounded by the 10362 * ip_multirt_ttl ndd variable. 10363 * NDP packets must have a hop limit of 255; don't 10364 * change the hop limit in that case. 10365 */ 10366 if ((ipst->ips_ip_multirt_ttl > 0) && 10367 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10368 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10369 if (ip_debug > 3) { 10370 ip2dbg(("ip_wput_v6: forcing multirt " 10371 "hop limit to %d (was %d) ", 10372 ipst->ips_ip_multirt_ttl, 10373 ip6h->ip6_hops)); 10374 pr_addr_dbg("v6dst %s\n", AF_INET6, 10375 &ire->ire_addr_v6); 10376 } 10377 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10378 } 10379 10380 /* 10381 * We look at this point if there are pending 10382 * unresolved routes. ire_multirt_need_resolve_v6() 10383 * checks in O(n) that all IRE_OFFSUBNET ire 10384 * entries for the packet's destination and 10385 * flagged RTF_MULTIRT are currently resolved. 10386 * If some remain unresolved, we make a copy 10387 * of the current message. It will be used 10388 * to initiate additional route resolutions. 10389 */ 10390 multirt_need_resolve = 10391 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10392 MBLK_GETLABEL(first_mp), ipst); 10393 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10394 "multirt_need_resolve %d, first_mp %p\n", 10395 (void *)ire, multirt_need_resolve, 10396 (void *)first_mp)); 10397 if (multirt_need_resolve) { 10398 copy_mp = copymsg(first_mp); 10399 if (copy_mp != NULL) { 10400 MULTIRT_DEBUG_TAG(copy_mp); 10401 } 10402 } 10403 } 10404 10405 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10406 ill->ill_name, (void *)ire, 10407 ill->ill_phyint->phyint_ifindex)); 10408 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10409 connp, caller, 10410 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10411 ip6i_flags, zoneid); 10412 ire_refrele(ire); 10413 if (need_decref) { 10414 CONN_DEC_REF(connp); 10415 connp = NULL; 10416 } 10417 10418 /* 10419 * Try to resolve another multiroute if 10420 * ire_multirt_need_resolve_v6() deemed it necessary. 10421 * copy_mp will be consumed (sent or freed) by 10422 * ip_newroute_[ipif_]v6(). 10423 */ 10424 if (copy_mp != NULL) { 10425 if (mctl_present) { 10426 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10427 } else { 10428 ip6h = (ip6_t *)copy_mp->b_rptr; 10429 } 10430 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10431 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10432 zoneid, ipst); 10433 if (ipif == NULL) { 10434 ip1dbg(("ip_wput_v6: No ipif for " 10435 "multicast\n")); 10436 MULTIRT_DEBUG_UNTAG(copy_mp); 10437 freemsg(copy_mp); 10438 return; 10439 } 10440 ip_newroute_ipif_v6(q, copy_mp, ipif, 10441 ip6h->ip6_dst, unspec_src, zoneid); 10442 ipif_refrele(ipif); 10443 } else { 10444 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10445 &ip6h->ip6_src, ill, zoneid, ipst); 10446 } 10447 } 10448 ill_refrele(ill); 10449 return; 10450 } 10451 if (need_decref) { 10452 CONN_DEC_REF(connp); 10453 connp = NULL; 10454 } 10455 10456 /* Update rptr if there was an ip6i_t header. */ 10457 if (ip6i != NULL) 10458 mp->b_rptr -= sizeof (ip6i_t); 10459 if (unspec_src || attach_if) { 10460 if (ip6i == NULL) { 10461 /* 10462 * Add ip6i_t header to carry unspec_src 10463 * or attach_if until the packet comes back in 10464 * ip_wput_v6. 10465 */ 10466 if (mctl_present) { 10467 first_mp->b_cont = 10468 ip_add_info_v6(mp, NULL, v6dstp); 10469 mp = first_mp->b_cont; 10470 if (mp == NULL) 10471 freeb(first_mp); 10472 } else { 10473 first_mp = mp = ip_add_info_v6(mp, NULL, 10474 v6dstp); 10475 } 10476 if (mp == NULL) { 10477 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10478 ill_refrele(ill); 10479 return; 10480 } 10481 ip6i = (ip6i_t *)mp->b_rptr; 10482 if ((mp->b_wptr - (uchar_t *)ip6i) == 10483 sizeof (ip6i_t)) { 10484 /* 10485 * ndp_resolver called from ip_newroute_v6 10486 * expects a pulled up message. 10487 */ 10488 if (!pullupmsg(mp, -1)) { 10489 ip1dbg(("ip_wput_v6: pullupmsg" 10490 " failed\n")); 10491 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10492 freemsg(first_mp); 10493 return; 10494 } 10495 ip6i = (ip6i_t *)mp->b_rptr; 10496 } 10497 ip6h = (ip6_t *)&ip6i[1]; 10498 v6dstp = &ip6h->ip6_dst; 10499 } 10500 if (unspec_src) 10501 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10502 if (attach_if) { 10503 /* 10504 * Bind to nofailover/BOUND_PIF overrides ifindex. 10505 */ 10506 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10507 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10508 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10509 if (drop_if_delayed) { 10510 /* This is a multipathing probe packet */ 10511 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10512 } 10513 } 10514 if (mctl_present) { 10515 ASSERT(io != NULL); 10516 io->ipsec_out_unspec_src = unspec_src; 10517 } 10518 } 10519 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10520 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10521 unspec_src, zoneid); 10522 } else { 10523 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10524 zoneid, ipst); 10525 } 10526 ill_refrele(ill); 10527 return; 10528 10529 notv6: 10530 /* 10531 * XXX implement a IPv4 and IPv6 packet counter per conn and 10532 * switch when ratio exceeds e.g. 10:1 10533 */ 10534 if (q->q_next == NULL) { 10535 connp = Q_TO_CONN(q); 10536 10537 if (IPCL_IS_TCP(connp)) { 10538 /* change conn_send for the tcp_v4_connections */ 10539 connp->conn_send = ip_output; 10540 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10541 /* The 'q' is the default SCTP queue */ 10542 connp = (conn_t *)arg; 10543 } else { 10544 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE, ipst); 10545 } 10546 } 10547 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10548 (void) ip_output(arg, first_mp, arg2, caller); 10549 if (ill != NULL) 10550 ill_refrele(ill); 10551 } 10552 10553 /* 10554 * If this is a conn_t queue, then we pass in the conn. This includes the 10555 * zoneid. 10556 * Otherwise, this is a message for an ill_t queue, 10557 * in which case we use the global zoneid since those are all part of 10558 * the global zone. 10559 */ 10560 static void 10561 ip_wput_v6(queue_t *q, mblk_t *mp) 10562 { 10563 if (CONN_Q(q)) 10564 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10565 else 10566 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10567 } 10568 10569 static void 10570 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10571 { 10572 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10573 io->ipsec_out_attach_if = B_TRUE; 10574 io->ipsec_out_ill_index = attach_index; 10575 } 10576 10577 /* 10578 * NULL send-to queue - packet is to be delivered locally. 10579 */ 10580 void 10581 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10582 ire_t *ire, int fanout_flags) 10583 { 10584 uint32_t ports; 10585 mblk_t *mp = first_mp, *first_mp1; 10586 boolean_t mctl_present; 10587 uint8_t nexthdr; 10588 uint16_t hdr_length; 10589 ipsec_out_t *io; 10590 mib2_ipIfStatsEntry_t *mibptr; 10591 ilm_t *ilm; 10592 uint_t nexthdr_offset; 10593 ip_stack_t *ipst = ill->ill_ipst; 10594 10595 if (DB_TYPE(mp) == M_CTL) { 10596 io = (ipsec_out_t *)mp->b_rptr; 10597 if (!io->ipsec_out_secure) { 10598 mp = mp->b_cont; 10599 freeb(first_mp); 10600 first_mp = mp; 10601 mctl_present = B_FALSE; 10602 } else { 10603 mctl_present = B_TRUE; 10604 mp = first_mp->b_cont; 10605 ipsec_out_to_in(first_mp); 10606 } 10607 } else { 10608 mctl_present = B_FALSE; 10609 } 10610 10611 /* 10612 * Remove reachability confirmation bit from version field 10613 * before passing the packet on to any firewall hooks or 10614 * looping back the packet. 10615 */ 10616 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10617 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10618 10619 DTRACE_PROBE4(ip6__loopback__in__start, 10620 ill_t *, ill, ill_t *, NULL, 10621 ip6_t *, ip6h, mblk_t *, first_mp); 10622 10623 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10624 ipst->ips_ipv6firewall_loopback_in, 10625 ill, NULL, ip6h, first_mp, mp, ipst); 10626 10627 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10628 10629 if (first_mp == NULL) 10630 return; 10631 10632 nexthdr = ip6h->ip6_nxt; 10633 mibptr = ill->ill_ip_mib; 10634 10635 /* Fastpath */ 10636 switch (nexthdr) { 10637 case IPPROTO_TCP: 10638 case IPPROTO_UDP: 10639 case IPPROTO_ICMPV6: 10640 case IPPROTO_SCTP: 10641 hdr_length = IPV6_HDR_LEN; 10642 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10643 (uchar_t *)ip6h); 10644 break; 10645 default: { 10646 uint8_t *nexthdrp; 10647 10648 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10649 &hdr_length, &nexthdrp)) { 10650 /* Malformed packet */ 10651 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10652 freemsg(first_mp); 10653 return; 10654 } 10655 nexthdr = *nexthdrp; 10656 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10657 break; 10658 } 10659 } 10660 10661 UPDATE_OB_PKT_COUNT(ire); 10662 ire->ire_last_used_time = lbolt; 10663 10664 switch (nexthdr) { 10665 case IPPROTO_TCP: 10666 if (DB_TYPE(mp) == M_DATA) { 10667 /* 10668 * M_DATA mblk, so init mblk (chain) for 10669 * no struio(). 10670 */ 10671 mblk_t *mp1 = mp; 10672 10673 do { 10674 mp1->b_datap->db_struioflag = 0; 10675 } while ((mp1 = mp1->b_cont) != NULL); 10676 } 10677 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10678 TCP_PORTS_OFFSET); 10679 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10680 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10681 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10682 hdr_length, mctl_present, ire->ire_zoneid); 10683 return; 10684 10685 case IPPROTO_UDP: 10686 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10687 UDP_PORTS_OFFSET); 10688 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10689 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10690 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10691 return; 10692 10693 case IPPROTO_SCTP: 10694 { 10695 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10696 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10697 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10698 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10699 return; 10700 } 10701 case IPPROTO_ICMPV6: { 10702 icmp6_t *icmp6; 10703 10704 /* check for full IPv6+ICMPv6 header */ 10705 if ((mp->b_wptr - mp->b_rptr) < 10706 (hdr_length + ICMP6_MINLEN)) { 10707 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10708 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10709 " failed\n")); 10710 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10711 freemsg(first_mp); 10712 return; 10713 } 10714 ip6h = (ip6_t *)mp->b_rptr; 10715 } 10716 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10717 10718 /* Update output mib stats */ 10719 icmp_update_out_mib_v6(ill, icmp6); 10720 10721 /* Check variable for testing applications */ 10722 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10723 freemsg(first_mp); 10724 return; 10725 } 10726 /* 10727 * Assume that there is always at least one conn for 10728 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10729 * where there is no conn. 10730 */ 10731 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10732 !IS_LOOPBACK(ill)) { 10733 /* 10734 * In the multicast case, applications may have 10735 * joined the group from different zones, so we 10736 * need to deliver the packet to each of them. 10737 * Loop through the multicast memberships 10738 * structures (ilm) on the receive ill and send 10739 * a copy of the packet up each matching one. 10740 * However, we don't do this for multicasts sent 10741 * on the loopback interface (PHYI_LOOPBACK flag 10742 * set) as they must stay in the sender's zone. 10743 */ 10744 ILM_WALKER_HOLD(ill); 10745 for (ilm = ill->ill_ilm; ilm != NULL; 10746 ilm = ilm->ilm_next) { 10747 if (ilm->ilm_flags & ILM_DELETED) 10748 continue; 10749 if (!IN6_ARE_ADDR_EQUAL( 10750 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10751 continue; 10752 if ((fanout_flags & 10753 IP_FF_NO_MCAST_LOOP) && 10754 ilm->ilm_zoneid == ire->ire_zoneid) 10755 continue; 10756 if (!ipif_lookup_zoneid(ill, 10757 ilm->ilm_zoneid, IPIF_UP, NULL)) 10758 continue; 10759 10760 first_mp1 = ip_copymsg(first_mp); 10761 if (first_mp1 == NULL) 10762 continue; 10763 icmp_inbound_v6(q, first_mp1, ill, 10764 hdr_length, mctl_present, 10765 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10766 NULL); 10767 } 10768 ILM_WALKER_RELE(ill); 10769 } else { 10770 first_mp1 = ip_copymsg(first_mp); 10771 if (first_mp1 != NULL) 10772 icmp_inbound_v6(q, first_mp1, ill, 10773 hdr_length, mctl_present, 10774 IP6_NO_IPPOLICY, ire->ire_zoneid, 10775 NULL); 10776 } 10777 } 10778 /* FALLTHRU */ 10779 default: { 10780 /* 10781 * Handle protocols with which IPv6 is less intimate. 10782 */ 10783 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10784 10785 /* 10786 * Enable sending ICMP for "Unknown" nexthdr 10787 * case. i.e. where we did not FALLTHRU from 10788 * IPPROTO_ICMPV6 processing case above. 10789 */ 10790 if (nexthdr != IPPROTO_ICMPV6) 10791 fanout_flags |= IP_FF_SEND_ICMP; 10792 /* 10793 * Note: There can be more than one stream bound 10794 * to a particular protocol. When this is the case, 10795 * each one gets a copy of any incoming packets. 10796 */ 10797 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10798 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10799 mctl_present, ire->ire_zoneid); 10800 return; 10801 } 10802 } 10803 } 10804 10805 /* 10806 * Send packet using IRE. 10807 * Checksumming is controlled by cksum_request: 10808 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10809 * 1 => Skip TCP/UDP/SCTP checksum 10810 * Otherwise => checksum_request contains insert offset for checksum 10811 * 10812 * Assumes that the following set of headers appear in the first 10813 * mblk: 10814 * ip6_t 10815 * Any extension headers 10816 * TCP/UDP/SCTP header (if present) 10817 * The routine can handle an ICMPv6 header that is not in the first mblk. 10818 * 10819 * NOTE : This function does not ire_refrele the ire passed in as the 10820 * argument unlike ip_wput_ire where the REFRELE is done. 10821 * Refer to ip_wput_ire for more on this. 10822 */ 10823 static void 10824 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10825 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10826 zoneid_t zoneid) 10827 { 10828 ip6_t *ip6h; 10829 uint8_t nexthdr; 10830 uint16_t hdr_length; 10831 uint_t reachable = 0x0; 10832 ill_t *ill; 10833 mib2_ipIfStatsEntry_t *mibptr; 10834 mblk_t *first_mp; 10835 boolean_t mctl_present; 10836 ipsec_out_t *io; 10837 boolean_t conn_dontroute; /* conn value for multicast */ 10838 boolean_t conn_multicast_loop; /* conn value for multicast */ 10839 boolean_t multicast_forward; /* Should we forward ? */ 10840 int max_frag; 10841 ip_stack_t *ipst = ire->ire_ipst; 10842 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10843 10844 ill = ire_to_ill(ire); 10845 first_mp = mp; 10846 multicast_forward = B_FALSE; 10847 10848 if (mp->b_datap->db_type != M_CTL) { 10849 ip6h = (ip6_t *)first_mp->b_rptr; 10850 } else { 10851 io = (ipsec_out_t *)first_mp->b_rptr; 10852 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10853 /* 10854 * Grab the zone id now because the M_CTL can be discarded by 10855 * ip_wput_ire_parse_ipsec_out() below. 10856 */ 10857 ASSERT(zoneid == io->ipsec_out_zoneid); 10858 ASSERT(zoneid != ALL_ZONES); 10859 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10860 /* 10861 * For the multicast case, ipsec_out carries conn_dontroute and 10862 * conn_multicast_loop as conn may not be available here. We 10863 * need this for multicast loopback and forwarding which is done 10864 * later in the code. 10865 */ 10866 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10867 conn_dontroute = io->ipsec_out_dontroute; 10868 conn_multicast_loop = io->ipsec_out_multicast_loop; 10869 /* 10870 * If conn_dontroute is not set or conn_multicast_loop 10871 * is set, we need to do forwarding/loopback. For 10872 * datagrams from ip_wput_multicast, conn_dontroute is 10873 * set to B_TRUE and conn_multicast_loop is set to 10874 * B_FALSE so that we neither do forwarding nor 10875 * loopback. 10876 */ 10877 if (!conn_dontroute || conn_multicast_loop) 10878 multicast_forward = B_TRUE; 10879 } 10880 } 10881 10882 /* 10883 * If the sender didn't supply the hop limit and there is a default 10884 * unicast hop limit associated with the output interface, we use 10885 * that if the packet is unicast. Interface specific unicast hop 10886 * limits as set via the SIOCSLIFLNKINFO ioctl. 10887 */ 10888 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10889 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10890 ip6h->ip6_hops = ill->ill_max_hops; 10891 } 10892 10893 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10894 ire->ire_zoneid != ALL_ZONES) { 10895 /* 10896 * When a zone sends a packet to another zone, we try to deliver 10897 * the packet under the same conditions as if the destination 10898 * was a real node on the network. To do so, we look for a 10899 * matching route in the forwarding table. 10900 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10901 * ip_newroute_v6() does. 10902 * Note that IRE_LOCAL are special, since they are used 10903 * when the zoneid doesn't match in some cases. This means that 10904 * we need to handle ipha_src differently since ire_src_addr 10905 * belongs to the receiving zone instead of the sending zone. 10906 * When ip_restrict_interzone_loopback is set, then 10907 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10908 * for loopback between zones when the logical "Ethernet" would 10909 * have looped them back. 10910 */ 10911 ire_t *src_ire; 10912 10913 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10914 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10915 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10916 if (src_ire != NULL && 10917 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10918 (!ipst->ips_ip_restrict_interzone_loopback || 10919 ire_local_same_ill_group(ire, src_ire))) { 10920 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10921 !unspec_src) { 10922 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10923 } 10924 ire_refrele(src_ire); 10925 } else { 10926 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10927 if (src_ire != NULL) { 10928 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10929 ire_refrele(src_ire); 10930 freemsg(first_mp); 10931 return; 10932 } 10933 ire_refrele(src_ire); 10934 } 10935 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10936 /* Failed */ 10937 freemsg(first_mp); 10938 return; 10939 } 10940 icmp_unreachable_v6(q, first_mp, 10941 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10942 zoneid, ipst); 10943 return; 10944 } 10945 } 10946 10947 if (mp->b_datap->db_type == M_CTL || 10948 ipss->ipsec_outbound_v6_policy_present) { 10949 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10950 connp, unspec_src, zoneid); 10951 if (mp == NULL) { 10952 return; 10953 } 10954 } 10955 10956 first_mp = mp; 10957 if (mp->b_datap->db_type == M_CTL) { 10958 io = (ipsec_out_t *)mp->b_rptr; 10959 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10960 mp = mp->b_cont; 10961 mctl_present = B_TRUE; 10962 } else { 10963 mctl_present = B_FALSE; 10964 } 10965 10966 ip6h = (ip6_t *)mp->b_rptr; 10967 nexthdr = ip6h->ip6_nxt; 10968 mibptr = ill->ill_ip_mib; 10969 10970 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10971 ipif_t *ipif; 10972 10973 /* 10974 * Select the source address using ipif_select_source_v6. 10975 */ 10976 if (attach_index != 0) { 10977 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10978 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10979 } else { 10980 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10981 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10982 } 10983 if (ipif == NULL) { 10984 if (ip_debug > 2) { 10985 /* ip1dbg */ 10986 pr_addr_dbg("ip_wput_ire_v6: no src for " 10987 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10988 printf("ip_wput_ire_v6: interface name %s\n", 10989 ill->ill_name); 10990 } 10991 freemsg(first_mp); 10992 return; 10993 } 10994 ip6h->ip6_src = ipif->ipif_v6src_addr; 10995 ipif_refrele(ipif); 10996 } 10997 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10998 if ((connp != NULL && connp->conn_multicast_loop) || 10999 !IS_LOOPBACK(ill)) { 11000 ilm_t *ilm; 11001 11002 ILM_WALKER_HOLD(ill); 11003 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 11004 ILM_WALKER_RELE(ill); 11005 if (ilm != NULL) { 11006 mblk_t *nmp; 11007 int fanout_flags = 0; 11008 11009 if (connp != NULL && 11010 !connp->conn_multicast_loop) { 11011 fanout_flags |= IP_FF_NO_MCAST_LOOP; 11012 } 11013 ip1dbg(("ip_wput_ire_v6: " 11014 "Loopback multicast\n")); 11015 nmp = ip_copymsg(first_mp); 11016 if (nmp != NULL) { 11017 ip6_t *nip6h; 11018 mblk_t *mp_ip6h; 11019 11020 if (mctl_present) { 11021 nip6h = (ip6_t *) 11022 nmp->b_cont->b_rptr; 11023 mp_ip6h = nmp->b_cont; 11024 } else { 11025 nip6h = (ip6_t *)nmp->b_rptr; 11026 mp_ip6h = nmp; 11027 } 11028 11029 DTRACE_PROBE4( 11030 ip6__loopback__out__start, 11031 ill_t *, NULL, 11032 ill_t *, ill, 11033 ip6_t *, nip6h, 11034 mblk_t *, nmp); 11035 11036 FW_HOOKS6( 11037 ipst->ips_ip6_loopback_out_event, 11038 ipst->ips_ipv6firewall_loopback_out, 11039 NULL, ill, nip6h, nmp, mp_ip6h, 11040 ipst); 11041 11042 DTRACE_PROBE1( 11043 ip6__loopback__out__end, 11044 mblk_t *, nmp); 11045 11046 if (nmp != NULL) { 11047 /* 11048 * Deliver locally and to 11049 * every local zone, except 11050 * the sending zone when 11051 * IPV6_MULTICAST_LOOP is 11052 * disabled. 11053 */ 11054 ip_wput_local_v6(RD(q), ill, 11055 nip6h, nmp, 11056 ire, fanout_flags); 11057 } 11058 } else { 11059 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11060 ip1dbg(("ip_wput_ire_v6: " 11061 "copymsg failed\n")); 11062 } 11063 } 11064 } 11065 if (ip6h->ip6_hops == 0 || 11066 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 11067 IS_LOOPBACK(ill)) { 11068 /* 11069 * Local multicast or just loopback on loopback 11070 * interface. 11071 */ 11072 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 11073 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 11074 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11075 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 11076 freemsg(first_mp); 11077 return; 11078 } 11079 } 11080 11081 if (ire->ire_stq != NULL) { 11082 uint32_t sum; 11083 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 11084 ill_phyint->phyint_ifindex; 11085 queue_t *dev_q = ire->ire_stq->q_next; 11086 11087 /* 11088 * non-NULL send-to queue - packet is to be sent 11089 * out an interface. 11090 */ 11091 11092 /* Driver is flow-controlling? */ 11093 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 11094 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 11095 /* 11096 * Queue packet if we have an conn to give back 11097 * pressure. We can't queue packets intended for 11098 * hardware acceleration since we've tossed that 11099 * state already. If the packet is being fed back 11100 * from ire_send_v6, we don't know the position in 11101 * the queue to enqueue the packet and we discard 11102 * the packet. 11103 */ 11104 if (ipst->ips_ip_output_queue && connp != NULL && 11105 !mctl_present && caller != IRE_SEND) { 11106 if (caller == IP_WSRV) { 11107 connp->conn_did_putbq = 1; 11108 (void) putbq(connp->conn_wq, mp); 11109 conn_drain_insert(connp); 11110 /* 11111 * caller == IP_WSRV implies we are 11112 * the service thread, and the 11113 * queue is already noenabled. 11114 * The check for canput and 11115 * the putbq is not atomic. 11116 * So we need to check again. 11117 */ 11118 if (canput(dev_q)) 11119 connp->conn_did_putbq = 0; 11120 } else { 11121 (void) putq(connp->conn_wq, mp); 11122 } 11123 return; 11124 } 11125 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11126 freemsg(first_mp); 11127 return; 11128 } 11129 11130 /* 11131 * Look for reachability confirmations from the transport. 11132 */ 11133 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11134 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11135 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11136 if (mctl_present) 11137 io->ipsec_out_reachable = B_TRUE; 11138 } 11139 /* Fastpath */ 11140 switch (nexthdr) { 11141 case IPPROTO_TCP: 11142 case IPPROTO_UDP: 11143 case IPPROTO_ICMPV6: 11144 case IPPROTO_SCTP: 11145 hdr_length = IPV6_HDR_LEN; 11146 break; 11147 default: { 11148 uint8_t *nexthdrp; 11149 11150 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11151 &hdr_length, &nexthdrp)) { 11152 /* Malformed packet */ 11153 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11154 freemsg(first_mp); 11155 return; 11156 } 11157 nexthdr = *nexthdrp; 11158 break; 11159 } 11160 } 11161 11162 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11163 uint16_t *up; 11164 uint16_t *insp; 11165 11166 /* 11167 * The packet header is processed once for all, even 11168 * in the multirouting case. We disable hardware 11169 * checksum if the packet is multirouted, as it will be 11170 * replicated via several interfaces, and not all of 11171 * them may have this capability. 11172 */ 11173 if (cksum_request == 1 && 11174 !(ire->ire_flags & RTF_MULTIRT)) { 11175 /* Skip the transport checksum */ 11176 goto cksum_done; 11177 } 11178 /* 11179 * Do user-configured raw checksum. 11180 * Compute checksum and insert at offset "cksum_request" 11181 */ 11182 11183 /* check for enough headers for checksum */ 11184 cksum_request += hdr_length; /* offset from rptr */ 11185 if ((mp->b_wptr - mp->b_rptr) < 11186 (cksum_request + sizeof (int16_t))) { 11187 if (!pullupmsg(mp, 11188 cksum_request + sizeof (int16_t))) { 11189 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11190 " failed\n")); 11191 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11192 freemsg(first_mp); 11193 return; 11194 } 11195 ip6h = (ip6_t *)mp->b_rptr; 11196 } 11197 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11198 ASSERT(((uintptr_t)insp & 0x1) == 0); 11199 up = (uint16_t *)&ip6h->ip6_src; 11200 /* 11201 * icmp has placed length and routing 11202 * header adjustment in *insp. 11203 */ 11204 sum = htons(nexthdr) + 11205 up[0] + up[1] + up[2] + up[3] + 11206 up[4] + up[5] + up[6] + up[7] + 11207 up[8] + up[9] + up[10] + up[11] + 11208 up[12] + up[13] + up[14] + up[15]; 11209 sum = (sum & 0xffff) + (sum >> 16); 11210 *insp = IP_CSUM(mp, hdr_length, sum); 11211 } else if (nexthdr == IPPROTO_TCP) { 11212 uint16_t *up; 11213 11214 /* 11215 * Check for full IPv6 header + enough TCP header 11216 * to get at the checksum field. 11217 */ 11218 if ((mp->b_wptr - mp->b_rptr) < 11219 (hdr_length + TCP_CHECKSUM_OFFSET + 11220 TCP_CHECKSUM_SIZE)) { 11221 if (!pullupmsg(mp, hdr_length + 11222 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11223 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11224 " failed\n")); 11225 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11226 freemsg(first_mp); 11227 return; 11228 } 11229 ip6h = (ip6_t *)mp->b_rptr; 11230 } 11231 11232 up = (uint16_t *)&ip6h->ip6_src; 11233 /* 11234 * Note: The TCP module has stored the length value 11235 * into the tcp checksum field, so we don't 11236 * need to explicitly sum it in here. 11237 */ 11238 sum = up[0] + up[1] + up[2] + up[3] + 11239 up[4] + up[5] + up[6] + up[7] + 11240 up[8] + up[9] + up[10] + up[11] + 11241 up[12] + up[13] + up[14] + up[15]; 11242 11243 /* Fold the initial sum */ 11244 sum = (sum & 0xffff) + (sum >> 16); 11245 11246 up = (uint16_t *)(((uchar_t *)ip6h) + 11247 hdr_length + TCP_CHECKSUM_OFFSET); 11248 11249 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11250 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11251 ire->ire_max_frag, mctl_present, sum); 11252 11253 /* Software checksum? */ 11254 if (DB_CKSUMFLAGS(mp) == 0) { 11255 IP6_STAT(ipst, ip6_out_sw_cksum); 11256 IP6_STAT_UPDATE(ipst, 11257 ip6_tcp_out_sw_cksum_bytes, 11258 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11259 hdr_length); 11260 } 11261 } else if (nexthdr == IPPROTO_UDP) { 11262 uint16_t *up; 11263 11264 /* 11265 * check for full IPv6 header + enough UDP header 11266 * to get at the UDP checksum field 11267 */ 11268 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11269 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11270 if (!pullupmsg(mp, hdr_length + 11271 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11272 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11273 " failed\n")); 11274 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11275 freemsg(first_mp); 11276 return; 11277 } 11278 ip6h = (ip6_t *)mp->b_rptr; 11279 } 11280 up = (uint16_t *)&ip6h->ip6_src; 11281 /* 11282 * Note: The UDP module has stored the length value 11283 * into the udp checksum field, so we don't 11284 * need to explicitly sum it in here. 11285 */ 11286 sum = up[0] + up[1] + up[2] + up[3] + 11287 up[4] + up[5] + up[6] + up[7] + 11288 up[8] + up[9] + up[10] + up[11] + 11289 up[12] + up[13] + up[14] + up[15]; 11290 11291 /* Fold the initial sum */ 11292 sum = (sum & 0xffff) + (sum >> 16); 11293 11294 up = (uint16_t *)(((uchar_t *)ip6h) + 11295 hdr_length + UDP_CHECKSUM_OFFSET); 11296 11297 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11298 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11299 ire->ire_max_frag, mctl_present, sum); 11300 11301 /* Software checksum? */ 11302 if (DB_CKSUMFLAGS(mp) == 0) { 11303 IP6_STAT(ipst, ip6_out_sw_cksum); 11304 IP6_STAT_UPDATE(ipst, 11305 ip6_udp_out_sw_cksum_bytes, 11306 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11307 hdr_length); 11308 } 11309 } else if (nexthdr == IPPROTO_ICMPV6) { 11310 uint16_t *up; 11311 icmp6_t *icmp6; 11312 11313 /* check for full IPv6+ICMPv6 header */ 11314 if ((mp->b_wptr - mp->b_rptr) < 11315 (hdr_length + ICMP6_MINLEN)) { 11316 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11317 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11318 " failed\n")); 11319 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11320 freemsg(first_mp); 11321 return; 11322 } 11323 ip6h = (ip6_t *)mp->b_rptr; 11324 } 11325 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11326 up = (uint16_t *)&ip6h->ip6_src; 11327 /* 11328 * icmp has placed length and routing 11329 * header adjustment in icmp6_cksum. 11330 */ 11331 sum = htons(IPPROTO_ICMPV6) + 11332 up[0] + up[1] + up[2] + up[3] + 11333 up[4] + up[5] + up[6] + up[7] + 11334 up[8] + up[9] + up[10] + up[11] + 11335 up[12] + up[13] + up[14] + up[15]; 11336 sum = (sum & 0xffff) + (sum >> 16); 11337 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11338 11339 /* Update output mib stats */ 11340 icmp_update_out_mib_v6(ill, icmp6); 11341 } else if (nexthdr == IPPROTO_SCTP) { 11342 sctp_hdr_t *sctph; 11343 11344 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11345 if (!pullupmsg(mp, hdr_length + 11346 sizeof (*sctph))) { 11347 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11348 " failed\n")); 11349 BUMP_MIB(ill->ill_ip_mib, 11350 ipIfStatsOutDiscards); 11351 freemsg(mp); 11352 return; 11353 } 11354 ip6h = (ip6_t *)mp->b_rptr; 11355 } 11356 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11357 sctph->sh_chksum = 0; 11358 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11359 } 11360 11361 cksum_done: 11362 /* 11363 * We force the insertion of a fragment header using the 11364 * IPH_FRAG_HDR flag in two cases: 11365 * - after reception of an ICMPv6 "packet too big" message 11366 * with a MTU < 1280 (cf. RFC 2460 section 5) 11367 * - for multirouted IPv6 packets, so that the receiver can 11368 * discard duplicates according to their fragment identifier 11369 * 11370 * Two flags modifed from the API can modify this behavior. 11371 * The first is IPV6_USE_MIN_MTU. With this API the user 11372 * can specify how to manage PMTUD for unicast and multicast. 11373 * 11374 * IPV6_DONTFRAG disallows fragmentation. 11375 */ 11376 max_frag = ire->ire_max_frag; 11377 switch (IP6I_USE_MIN_MTU_API(flags)) { 11378 case IPV6_USE_MIN_MTU_DEFAULT: 11379 case IPV6_USE_MIN_MTU_UNICAST: 11380 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11381 max_frag = IPV6_MIN_MTU; 11382 } 11383 break; 11384 11385 case IPV6_USE_MIN_MTU_NEVER: 11386 max_frag = IPV6_MIN_MTU; 11387 break; 11388 } 11389 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11390 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11391 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11392 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11393 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11394 return; 11395 } 11396 11397 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11398 (mp->b_cont ? msgdsize(mp) : 11399 mp->b_wptr - (uchar_t *)ip6h)) { 11400 ip0dbg(("Packet length mismatch: %d, %ld\n", 11401 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11402 msgdsize(mp))); 11403 freemsg(first_mp); 11404 return; 11405 } 11406 /* Do IPSEC processing first */ 11407 if (mctl_present) { 11408 if (attach_index != 0) 11409 ipsec_out_attach_if(io, attach_index); 11410 ipsec_out_process(q, first_mp, ire, ill_index); 11411 return; 11412 } 11413 ASSERT(mp->b_prev == NULL); 11414 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11415 ntohs(ip6h->ip6_plen) + 11416 IPV6_HDR_LEN, max_frag)); 11417 ASSERT(mp == first_mp); 11418 /* Initiate IPPF processing */ 11419 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11420 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11421 if (mp == NULL) { 11422 return; 11423 } 11424 } 11425 ip_wput_frag_v6(mp, ire, reachable, connp, 11426 caller, max_frag); 11427 return; 11428 } 11429 /* Do IPSEC processing first */ 11430 if (mctl_present) { 11431 int extra_len = ipsec_out_extra_length(first_mp); 11432 11433 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11434 max_frag) { 11435 /* 11436 * IPsec headers will push the packet over the 11437 * MTU limit. Issue an ICMPv6 Packet Too Big 11438 * message for this packet if the upper-layer 11439 * that issued this packet will be able to 11440 * react to the icmp_pkt2big_v6() that we'll 11441 * generate. 11442 */ 11443 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11444 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11445 return; 11446 } 11447 if (attach_index != 0) 11448 ipsec_out_attach_if(io, attach_index); 11449 ipsec_out_process(q, first_mp, ire, ill_index); 11450 return; 11451 } 11452 /* 11453 * XXX multicast: add ip_mforward_v6() here. 11454 * Check conn_dontroute 11455 */ 11456 #ifdef lint 11457 /* 11458 * XXX The only purpose of this statement is to avoid lint 11459 * errors. See the above "XXX multicast". When that gets 11460 * fixed, remove this whole #ifdef lint section. 11461 */ 11462 ip3dbg(("multicast forward is %s.\n", 11463 (multicast_forward ? "TRUE" : "FALSE"))); 11464 #endif 11465 11466 UPDATE_OB_PKT_COUNT(ire); 11467 ire->ire_last_used_time = lbolt; 11468 ASSERT(mp == first_mp); 11469 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11470 } else { 11471 DTRACE_PROBE4(ip6__loopback__out__start, 11472 ill_t *, NULL, ill_t *, ill, 11473 ip6_t *, ip6h, mblk_t *, first_mp); 11474 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11475 ipst->ips_ipv6firewall_loopback_out, 11476 NULL, ill, ip6h, first_mp, mp, ipst); 11477 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11478 if (first_mp != NULL) 11479 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11480 } 11481 } 11482 11483 /* 11484 * Outbound IPv6 fragmentation routine using MDT. 11485 */ 11486 static void 11487 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11488 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11489 { 11490 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11491 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11492 mblk_t *hdr_mp, *md_mp = NULL; 11493 int i1; 11494 multidata_t *mmd; 11495 unsigned char *hdr_ptr, *pld_ptr; 11496 ip_pdescinfo_t pdi; 11497 uint32_t ident; 11498 size_t len; 11499 uint16_t offset; 11500 queue_t *stq = ire->ire_stq; 11501 ill_t *ill = (ill_t *)stq->q_ptr; 11502 ip_stack_t *ipst = ill->ill_ipst; 11503 11504 ASSERT(DB_TYPE(mp) == M_DATA); 11505 ASSERT(MBLKL(mp) > unfragmentable_len); 11506 11507 /* 11508 * Move read ptr past unfragmentable portion, we don't want this part 11509 * of the data in our fragments. 11510 */ 11511 mp->b_rptr += unfragmentable_len; 11512 11513 /* Calculate how many packets we will send out */ 11514 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11515 pkts = (i1 + max_chunk - 1) / max_chunk; 11516 ASSERT(pkts > 1); 11517 11518 /* Allocate a message block which will hold all the IP Headers. */ 11519 wroff = ipst->ips_ip_wroff_extra; 11520 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11521 11522 i1 = pkts * hdr_chunk_len; 11523 /* 11524 * Create the header buffer, Multidata and destination address 11525 * and SAP attribute that should be associated with it. 11526 */ 11527 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11528 ((hdr_mp->b_wptr += i1), 11529 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11530 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11531 freemsg(mp); 11532 if (md_mp == NULL) { 11533 freemsg(hdr_mp); 11534 } else { 11535 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11536 freemsg(md_mp); 11537 } 11538 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11539 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11540 return; 11541 } 11542 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11543 11544 /* 11545 * Add a payload buffer to the Multidata; this operation must not 11546 * fail, or otherwise our logic in this routine is broken. There 11547 * is no memory allocation done by the routine, so any returned 11548 * failure simply tells us that we've done something wrong. 11549 * 11550 * A failure tells us that either we're adding the same payload 11551 * buffer more than once, or we're trying to add more buffers than 11552 * allowed. None of the above cases should happen, and we panic 11553 * because either there's horrible heap corruption, and/or 11554 * programming mistake. 11555 */ 11556 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11557 goto pbuf_panic; 11558 } 11559 11560 hdr_ptr = hdr_mp->b_rptr; 11561 pld_ptr = mp->b_rptr; 11562 11563 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11564 11565 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11566 11567 /* 11568 * len is the total length of the fragmentable data in this 11569 * datagram. For each fragment sent, we will decrement len 11570 * by the amount of fragmentable data sent in that fragment 11571 * until len reaches zero. 11572 */ 11573 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11574 11575 offset = 0; 11576 prev_nexthdr_offset += wroff; 11577 11578 while (len != 0) { 11579 size_t mlen; 11580 ip6_t *fip6h; 11581 ip6_frag_t *fraghdr; 11582 int error; 11583 11584 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11585 mlen = MIN(len, max_chunk); 11586 len -= mlen; 11587 11588 fip6h = (ip6_t *)(hdr_ptr + wroff); 11589 ASSERT(OK_32PTR(fip6h)); 11590 bcopy(ip6h, fip6h, unfragmentable_len); 11591 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11592 11593 fip6h->ip6_plen = htons((uint16_t)(mlen + 11594 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11595 11596 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11597 unfragmentable_len); 11598 fraghdr->ip6f_nxt = nexthdr; 11599 fraghdr->ip6f_reserved = 0; 11600 fraghdr->ip6f_offlg = htons(offset) | 11601 ((len != 0) ? IP6F_MORE_FRAG : 0); 11602 fraghdr->ip6f_ident = ident; 11603 11604 /* 11605 * Record offset and size of header and data of the next packet 11606 * in the multidata message. 11607 */ 11608 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11609 unfragmentable_len + sizeof (ip6_frag_t), 0); 11610 PDESC_PLD_INIT(&pdi); 11611 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11612 ASSERT(i1 > 0); 11613 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11614 if (i1 == mlen) { 11615 pld_ptr += mlen; 11616 } else { 11617 i1 = mlen - i1; 11618 mp = mp->b_cont; 11619 ASSERT(mp != NULL); 11620 ASSERT(MBLKL(mp) >= i1); 11621 /* 11622 * Attach the next payload message block to the 11623 * multidata message. 11624 */ 11625 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11626 goto pbuf_panic; 11627 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11628 pld_ptr = mp->b_rptr + i1; 11629 } 11630 11631 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11632 KM_NOSLEEP)) == NULL) { 11633 /* 11634 * Any failure other than ENOMEM indicates that we 11635 * have passed in invalid pdesc info or parameters 11636 * to mmd_addpdesc, which must not happen. 11637 * 11638 * EINVAL is a result of failure on boundary checks 11639 * against the pdesc info contents. It should not 11640 * happen, and we panic because either there's 11641 * horrible heap corruption, and/or programming 11642 * mistake. 11643 */ 11644 if (error != ENOMEM) { 11645 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11646 "pdesc logic error detected for " 11647 "mmd %p pinfo %p (%d)\n", 11648 (void *)mmd, (void *)&pdi, error); 11649 /* NOTREACHED */ 11650 } 11651 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11652 /* Free unattached payload message blocks as well */ 11653 md_mp->b_cont = mp->b_cont; 11654 goto free_mmd; 11655 } 11656 11657 /* Advance fragment offset. */ 11658 offset += mlen; 11659 11660 /* Advance to location for next header in the buffer. */ 11661 hdr_ptr += hdr_chunk_len; 11662 11663 /* Did we reach the next payload message block? */ 11664 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11665 mp = mp->b_cont; 11666 /* 11667 * Attach the next message block with payload 11668 * data to the multidata message. 11669 */ 11670 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11671 goto pbuf_panic; 11672 pld_ptr = mp->b_rptr; 11673 } 11674 } 11675 11676 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11677 ASSERT(mp->b_wptr == pld_ptr); 11678 11679 /* Update IP statistics */ 11680 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11681 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11682 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11683 /* 11684 * The ipv6 header len is accounted for in unfragmentable_len so 11685 * when calculating the fragmentation overhead just add the frag 11686 * header len. 11687 */ 11688 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11689 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11690 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11691 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11692 11693 ire->ire_ob_pkt_count += pkts; 11694 if (ire->ire_ipif != NULL) 11695 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11696 11697 ire->ire_last_used_time = lbolt; 11698 /* Send it down */ 11699 putnext(stq, md_mp); 11700 return; 11701 11702 pbuf_panic: 11703 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11704 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11705 pbuf_idx); 11706 /* NOTREACHED */ 11707 } 11708 11709 /* 11710 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11711 * We have not optimized this in terms of number of mblks 11712 * allocated. For instance, for each fragment sent we always allocate a 11713 * mblk to hold the IPv6 header and fragment header. 11714 * 11715 * Assumes that all the extension headers are contained in the first mblk. 11716 * 11717 * The fragment header is inserted after an hop-by-hop options header 11718 * and after [an optional destinations header followed by] a routing header. 11719 * 11720 * NOTE : This function does not ire_refrele the ire passed in as 11721 * the argument. 11722 */ 11723 void 11724 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11725 int caller, int max_frag) 11726 { 11727 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11728 ip6_t *fip6h; 11729 mblk_t *hmp; 11730 mblk_t *hmp0; 11731 mblk_t *dmp; 11732 ip6_frag_t *fraghdr; 11733 size_t unfragmentable_len; 11734 size_t len; 11735 size_t mlen; 11736 size_t max_chunk; 11737 uint32_t ident; 11738 uint16_t off_flags; 11739 uint16_t offset = 0; 11740 ill_t *ill; 11741 uint8_t nexthdr; 11742 uint_t prev_nexthdr_offset; 11743 uint8_t *ptr; 11744 ip_stack_t *ipst = ire->ire_ipst; 11745 11746 ASSERT(ire->ire_type == IRE_CACHE); 11747 ill = (ill_t *)ire->ire_stq->q_ptr; 11748 11749 if (max_frag <= 0) { 11750 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11751 freemsg(mp); 11752 return; 11753 } 11754 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11755 11756 /* 11757 * Determine the length of the unfragmentable portion of this 11758 * datagram. This consists of the IPv6 header, a potential 11759 * hop-by-hop options header, a potential pre-routing-header 11760 * destination options header, and a potential routing header. 11761 */ 11762 nexthdr = ip6h->ip6_nxt; 11763 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11764 ptr = (uint8_t *)&ip6h[1]; 11765 11766 if (nexthdr == IPPROTO_HOPOPTS) { 11767 ip6_hbh_t *hbh_hdr; 11768 uint_t hdr_len; 11769 11770 hbh_hdr = (ip6_hbh_t *)ptr; 11771 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11772 nexthdr = hbh_hdr->ip6h_nxt; 11773 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11774 - (uint8_t *)ip6h; 11775 ptr += hdr_len; 11776 } 11777 if (nexthdr == IPPROTO_DSTOPTS) { 11778 ip6_dest_t *dest_hdr; 11779 uint_t hdr_len; 11780 11781 dest_hdr = (ip6_dest_t *)ptr; 11782 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11783 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11784 nexthdr = dest_hdr->ip6d_nxt; 11785 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11786 - (uint8_t *)ip6h; 11787 ptr += hdr_len; 11788 } 11789 } 11790 if (nexthdr == IPPROTO_ROUTING) { 11791 ip6_rthdr_t *rthdr; 11792 uint_t hdr_len; 11793 11794 rthdr = (ip6_rthdr_t *)ptr; 11795 nexthdr = rthdr->ip6r_nxt; 11796 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11797 - (uint8_t *)ip6h; 11798 hdr_len = 8 * (rthdr->ip6r_len + 1); 11799 ptr += hdr_len; 11800 } 11801 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11802 11803 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11804 sizeof (ip6_frag_t)) & ~7; 11805 11806 /* Check if we can use MDT to send out the frags. */ 11807 ASSERT(!IRE_IS_LOCAL(ire)); 11808 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11809 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11810 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11811 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11812 nexthdr, prev_nexthdr_offset); 11813 return; 11814 } 11815 11816 /* 11817 * Allocate an mblk with enough room for the link-layer 11818 * header, the unfragmentable part of the datagram, and the 11819 * fragment header. This (or a copy) will be used as the 11820 * first mblk for each fragment we send. 11821 */ 11822 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + 11823 ipst->ips_ip_wroff_extra, BPRI_HI); 11824 if (hmp == NULL) { 11825 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11826 freemsg(mp); 11827 return; 11828 } 11829 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11830 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11831 11832 fip6h = (ip6_t *)hmp->b_rptr; 11833 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11834 11835 bcopy(ip6h, fip6h, unfragmentable_len); 11836 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11837 11838 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11839 11840 fraghdr->ip6f_nxt = nexthdr; 11841 fraghdr->ip6f_reserved = 0; 11842 fraghdr->ip6f_offlg = 0; 11843 fraghdr->ip6f_ident = htonl(ident); 11844 11845 /* 11846 * len is the total length of the fragmentable data in this 11847 * datagram. For each fragment sent, we will decrement len 11848 * by the amount of fragmentable data sent in that fragment 11849 * until len reaches zero. 11850 */ 11851 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11852 11853 /* 11854 * Move read ptr past unfragmentable portion, we don't want this part 11855 * of the data in our fragments. 11856 */ 11857 mp->b_rptr += unfragmentable_len; 11858 11859 while (len != 0) { 11860 mlen = MIN(len, max_chunk); 11861 len -= mlen; 11862 if (len != 0) { 11863 /* Not last */ 11864 hmp0 = copyb(hmp); 11865 if (hmp0 == NULL) { 11866 freeb(hmp); 11867 freemsg(mp); 11868 BUMP_MIB(ill->ill_ip_mib, 11869 ipIfStatsOutFragFails); 11870 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11871 return; 11872 } 11873 off_flags = IP6F_MORE_FRAG; 11874 } else { 11875 /* Last fragment */ 11876 hmp0 = hmp; 11877 hmp = NULL; 11878 off_flags = 0; 11879 } 11880 fip6h = (ip6_t *)(hmp0->b_rptr); 11881 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11882 11883 fip6h->ip6_plen = htons((uint16_t)(mlen + 11884 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11885 /* 11886 * Note: Optimization alert. 11887 * In IPv6 (and IPv4) protocol header, Fragment Offset 11888 * ("offset") is 13 bits wide and in 8-octet units. 11889 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11890 * it occupies the most significant 13 bits. 11891 * (least significant 13 bits in IPv4). 11892 * We do not do any shifts here. Not shifting is same effect 11893 * as taking offset value in octet units, dividing by 8 and 11894 * then shifting 3 bits left to line it up in place in proper 11895 * place protocol header. 11896 */ 11897 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11898 11899 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11900 /* mp has already been freed by ip_carve_mp() */ 11901 if (hmp != NULL) 11902 freeb(hmp); 11903 freeb(hmp0); 11904 ip1dbg(("ip_carve_mp: failed\n")); 11905 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11906 return; 11907 } 11908 hmp0->b_cont = dmp; 11909 /* Get the priority marking, if any */ 11910 hmp0->b_band = dmp->b_band; 11911 UPDATE_OB_PKT_COUNT(ire); 11912 ire->ire_last_used_time = lbolt; 11913 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11914 caller, NULL); 11915 reachable = 0; /* No need to redo state machine in loop */ 11916 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11917 offset += mlen; 11918 } 11919 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11920 } 11921 11922 /* 11923 * Determine if the ill and multicast aspects of that packets 11924 * "matches" the conn. 11925 */ 11926 boolean_t 11927 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11928 zoneid_t zoneid) 11929 { 11930 ill_t *in_ill; 11931 boolean_t wantpacket = B_TRUE; 11932 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11933 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11934 11935 /* 11936 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11937 * unicast and multicast reception to conn_incoming_ill. 11938 * conn_wantpacket_v6 is called both for unicast and 11939 * multicast. 11940 * 11941 * 1) The unicast copy of the packet can come anywhere in 11942 * the ill group if it is part of the group. Thus, we 11943 * need to check to see whether the ill group matches 11944 * if in_ill is part of a group. 11945 * 11946 * 2) ip_rput does not suppress duplicate multicast packets. 11947 * If there are two interfaces in a ill group and we have 11948 * 2 applications (conns) joined a multicast group G on 11949 * both the interfaces, ilm_lookup_ill filter in ip_rput 11950 * will give us two packets because we join G on both the 11951 * interfaces rather than nominating just one interface 11952 * for receiving multicast like broadcast above. So, 11953 * we have to call ilg_lookup_ill to filter out duplicate 11954 * copies, if ill is part of a group, to supress duplicates. 11955 */ 11956 in_ill = connp->conn_incoming_ill; 11957 if (in_ill != NULL) { 11958 mutex_enter(&connp->conn_lock); 11959 in_ill = connp->conn_incoming_ill; 11960 mutex_enter(&ill->ill_lock); 11961 /* 11962 * No IPMP, and the packet did not arrive on conn_incoming_ill 11963 * OR, IPMP in use and the packet arrived on an IPMP group 11964 * different from the conn_incoming_ill's IPMP group. 11965 * Reject the packet. 11966 */ 11967 if ((in_ill->ill_group == NULL && in_ill != ill) || 11968 (in_ill->ill_group != NULL && 11969 in_ill->ill_group != ill->ill_group)) { 11970 wantpacket = B_FALSE; 11971 } 11972 mutex_exit(&ill->ill_lock); 11973 mutex_exit(&connp->conn_lock); 11974 if (!wantpacket) 11975 return (B_FALSE); 11976 } 11977 11978 if (connp->conn_multi_router) 11979 return (B_TRUE); 11980 11981 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11982 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11983 /* 11984 * Unicast case: we match the conn only if it's in the specified 11985 * zone. 11986 */ 11987 return (IPCL_ZONE_MATCH(connp, zoneid)); 11988 } 11989 11990 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11991 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11992 /* 11993 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11994 * disabled, therefore we don't dispatch the multicast packet to 11995 * the sending zone. 11996 */ 11997 return (B_FALSE); 11998 } 11999 12000 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 12001 zoneid != ALL_ZONES) { 12002 /* 12003 * Multicast packet on the loopback interface: we only match 12004 * conns who joined the group in the specified zone. 12005 */ 12006 return (B_FALSE); 12007 } 12008 12009 mutex_enter(&connp->conn_lock); 12010 wantpacket = 12011 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 12012 mutex_exit(&connp->conn_lock); 12013 12014 return (wantpacket); 12015 } 12016 12017 12018 /* 12019 * Transmit a packet and update any NUD state based on the flags 12020 * XXX need to "recover" any ip6i_t when doing putq! 12021 * 12022 * NOTE : This function does not ire_refrele the ire passed in as the 12023 * argument. 12024 */ 12025 void 12026 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 12027 int caller, ipsec_out_t *io) 12028 { 12029 mblk_t *mp1; 12030 nce_t *nce = ire->ire_nce; 12031 ill_t *ill; 12032 ill_t *out_ill; 12033 uint64_t delta; 12034 ip6_t *ip6h; 12035 queue_t *stq = ire->ire_stq; 12036 ire_t *ire1 = NULL; 12037 ire_t *save_ire = ire; 12038 boolean_t multirt_send = B_FALSE; 12039 mblk_t *next_mp = NULL; 12040 ip_stack_t *ipst = ire->ire_ipst; 12041 12042 ip6h = (ip6_t *)mp->b_rptr; 12043 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 12044 ASSERT(ire->ire_ipversion == IPV6_VERSION); 12045 ASSERT(nce != NULL); 12046 ASSERT(mp->b_datap->db_type == M_DATA); 12047 ASSERT(stq != NULL); 12048 12049 ill = ire_to_ill(ire); 12050 if (!ill) { 12051 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 12052 freemsg(mp); 12053 return; 12054 } 12055 12056 /* 12057 * If a packet is to be sent out an interface that is a 6to4 12058 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 12059 * destination, must be checked to have a 6to4 prefix 12060 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 12061 * address configured on the sending interface. Otherwise, 12062 * the packet was delivered to this interface in error and the 12063 * packet must be dropped. 12064 */ 12065 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 12066 ipif_t *ipif = ill->ill_ipif; 12067 12068 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 12069 &ip6h->ip6_dst)) { 12070 if (ip_debug > 2) { 12071 /* ip1dbg */ 12072 pr_addr_dbg("ip_xmit_v6: attempting to " 12073 "send 6to4 addressed IPv6 " 12074 "destination (%s) out the wrong " 12075 "interface.\n", AF_INET6, 12076 &ip6h->ip6_dst); 12077 } 12078 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12079 freemsg(mp); 12080 return; 12081 } 12082 } 12083 12084 /* Flow-control check has been done in ip_wput_ire_v6 */ 12085 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 12086 caller == IP_WSRV || canput(stq->q_next)) { 12087 uint32_t ill_index; 12088 12089 /* 12090 * In most cases, the emission loop below is entered only 12091 * once. Only in the case where the ire holds the 12092 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 12093 * flagged ires in the bucket, and send the packet 12094 * through all crossed RTF_MULTIRT routes. 12095 */ 12096 if (ire->ire_flags & RTF_MULTIRT) { 12097 /* 12098 * Multirouting case. The bucket where ire is stored 12099 * probably holds other RTF_MULTIRT flagged ires 12100 * to the destination. In this call to ip_xmit_v6, 12101 * we attempt to send the packet through all 12102 * those ires. Thus, we first ensure that ire is the 12103 * first RTF_MULTIRT ire in the bucket, 12104 * before walking the ire list. 12105 */ 12106 ire_t *first_ire; 12107 irb_t *irb = ire->ire_bucket; 12108 ASSERT(irb != NULL); 12109 multirt_send = B_TRUE; 12110 12111 /* Make sure we do not omit any multiroute ire. */ 12112 IRB_REFHOLD(irb); 12113 for (first_ire = irb->irb_ire; 12114 first_ire != NULL; 12115 first_ire = first_ire->ire_next) { 12116 if ((first_ire->ire_flags & RTF_MULTIRT) && 12117 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12118 &ire->ire_addr_v6)) && 12119 !(first_ire->ire_marks & 12120 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12121 break; 12122 } 12123 12124 if ((first_ire != NULL) && (first_ire != ire)) { 12125 IRE_REFHOLD(first_ire); 12126 /* ire will be released by the caller */ 12127 ire = first_ire; 12128 nce = ire->ire_nce; 12129 stq = ire->ire_stq; 12130 ill = ire_to_ill(ire); 12131 } 12132 IRB_REFRELE(irb); 12133 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12134 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12135 ILL_MDT_USABLE(ill)) { 12136 /* 12137 * This tcp connection was marked as MDT-capable, but 12138 * it has been turned off due changes in the interface. 12139 * Now that the interface support is back, turn it on 12140 * by notifying tcp. We don't directly modify tcp_mdt, 12141 * since we leave all the details to the tcp code that 12142 * knows better. 12143 */ 12144 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12145 12146 if (mdimp == NULL) { 12147 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12148 "connp %p (ENOMEM)\n", (void *)connp)); 12149 } else { 12150 CONN_INC_REF(connp); 12151 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 12152 connp, SQTAG_TCP_INPUT_MCTL); 12153 } 12154 } 12155 12156 do { 12157 mblk_t *mp_ip6h; 12158 12159 if (multirt_send) { 12160 irb_t *irb; 12161 /* 12162 * We are in a multiple send case, need to get 12163 * the next ire and make a duplicate of the 12164 * packet. ire1 holds here the next ire to 12165 * process in the bucket. If multirouting is 12166 * expected, any non-RTF_MULTIRT ire that has 12167 * the right destination address is ignored. 12168 */ 12169 irb = ire->ire_bucket; 12170 ASSERT(irb != NULL); 12171 12172 IRB_REFHOLD(irb); 12173 for (ire1 = ire->ire_next; 12174 ire1 != NULL; 12175 ire1 = ire1->ire_next) { 12176 if (!(ire1->ire_flags & RTF_MULTIRT)) 12177 continue; 12178 if (!IN6_ARE_ADDR_EQUAL( 12179 &ire1->ire_addr_v6, 12180 &ire->ire_addr_v6)) 12181 continue; 12182 if (ire1->ire_marks & 12183 (IRE_MARK_CONDEMNED| 12184 IRE_MARK_HIDDEN)) 12185 continue; 12186 12187 /* Got one */ 12188 if (ire1 != save_ire) { 12189 IRE_REFHOLD(ire1); 12190 } 12191 break; 12192 } 12193 IRB_REFRELE(irb); 12194 12195 if (ire1 != NULL) { 12196 next_mp = copyb(mp); 12197 if ((next_mp == NULL) || 12198 ((mp->b_cont != NULL) && 12199 ((next_mp->b_cont = 12200 dupmsg(mp->b_cont)) == NULL))) { 12201 freemsg(next_mp); 12202 next_mp = NULL; 12203 ire_refrele(ire1); 12204 ire1 = NULL; 12205 } 12206 } 12207 12208 /* Last multiroute ire; don't loop anymore. */ 12209 if (ire1 == NULL) { 12210 multirt_send = B_FALSE; 12211 } 12212 } 12213 12214 ill_index = 12215 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12216 12217 /* Initiate IPPF processing */ 12218 if (IP6_OUT_IPP(flags, ipst)) { 12219 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12220 if (mp == NULL) { 12221 BUMP_MIB(ill->ill_ip_mib, 12222 ipIfStatsOutDiscards); 12223 if (next_mp != NULL) 12224 freemsg(next_mp); 12225 if (ire != save_ire) { 12226 ire_refrele(ire); 12227 } 12228 return; 12229 } 12230 ip6h = (ip6_t *)mp->b_rptr; 12231 } 12232 mp_ip6h = mp; 12233 12234 /* 12235 * Check for fastpath, we need to hold nce_lock to 12236 * prevent fastpath update from chaining nce_fp_mp. 12237 */ 12238 12239 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12240 mutex_enter(&nce->nce_lock); 12241 if ((mp1 = nce->nce_fp_mp) != NULL) { 12242 uint32_t hlen; 12243 uchar_t *rptr; 12244 12245 hlen = MBLKL(mp1); 12246 rptr = mp->b_rptr - hlen; 12247 /* 12248 * make sure there is room for the fastpath 12249 * datalink header 12250 */ 12251 if (rptr < mp->b_datap->db_base) { 12252 mp1 = copyb(mp1); 12253 mutex_exit(&nce->nce_lock); 12254 if (mp1 == NULL) { 12255 BUMP_MIB(ill->ill_ip_mib, 12256 ipIfStatsOutDiscards); 12257 freemsg(mp); 12258 if (next_mp != NULL) 12259 freemsg(next_mp); 12260 if (ire != save_ire) { 12261 ire_refrele(ire); 12262 } 12263 return; 12264 } 12265 mp1->b_cont = mp; 12266 12267 /* Get the priority marking, if any */ 12268 mp1->b_band = mp->b_band; 12269 mp = mp1; 12270 } else { 12271 mp->b_rptr = rptr; 12272 /* 12273 * fastpath - pre-pend datalink 12274 * header 12275 */ 12276 bcopy(mp1->b_rptr, rptr, hlen); 12277 mutex_exit(&nce->nce_lock); 12278 } 12279 } else { 12280 /* 12281 * Get the DL_UNITDATA_REQ. 12282 */ 12283 mp1 = nce->nce_res_mp; 12284 if (mp1 == NULL) { 12285 mutex_exit(&nce->nce_lock); 12286 ip1dbg(("ip_xmit_v6: No resolution " 12287 "block ire = %p\n", (void *)ire)); 12288 freemsg(mp); 12289 if (next_mp != NULL) 12290 freemsg(next_mp); 12291 if (ire != save_ire) { 12292 ire_refrele(ire); 12293 } 12294 return; 12295 } 12296 /* 12297 * Prepend the DL_UNITDATA_REQ. 12298 */ 12299 mp1 = copyb(mp1); 12300 mutex_exit(&nce->nce_lock); 12301 if (mp1 == NULL) { 12302 BUMP_MIB(ill->ill_ip_mib, 12303 ipIfStatsOutDiscards); 12304 freemsg(mp); 12305 if (next_mp != NULL) 12306 freemsg(next_mp); 12307 if (ire != save_ire) { 12308 ire_refrele(ire); 12309 } 12310 return; 12311 } 12312 mp1->b_cont = mp; 12313 12314 /* Get the priority marking, if any */ 12315 mp1->b_band = mp->b_band; 12316 mp = mp1; 12317 } 12318 12319 out_ill = (ill_t *)stq->q_ptr; 12320 12321 DTRACE_PROBE4(ip6__physical__out__start, 12322 ill_t *, NULL, ill_t *, out_ill, 12323 ip6_t *, ip6h, mblk_t *, mp); 12324 12325 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12326 ipst->ips_ipv6firewall_physical_out, 12327 NULL, out_ill, ip6h, mp, mp_ip6h, ipst); 12328 12329 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12330 12331 if (mp == NULL) { 12332 if (multirt_send) { 12333 ASSERT(ire1 != NULL); 12334 if (ire != save_ire) { 12335 ire_refrele(ire); 12336 } 12337 /* 12338 * Proceed with the next RTF_MULTIRT 12339 * ire, also set up the send-to queue 12340 * accordingly. 12341 */ 12342 ire = ire1; 12343 ire1 = NULL; 12344 stq = ire->ire_stq; 12345 nce = ire->ire_nce; 12346 ill = ire_to_ill(ire); 12347 mp = next_mp; 12348 next_mp = NULL; 12349 continue; 12350 } else { 12351 ASSERT(next_mp == NULL); 12352 ASSERT(ire1 == NULL); 12353 break; 12354 } 12355 } 12356 12357 /* 12358 * Update ire and MIB counters; for save_ire, this has 12359 * been done by the caller. 12360 */ 12361 if (ire != save_ire) { 12362 UPDATE_OB_PKT_COUNT(ire); 12363 ire->ire_last_used_time = lbolt; 12364 12365 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12366 BUMP_MIB(ill->ill_ip_mib, 12367 ipIfStatsHCOutMcastPkts); 12368 UPDATE_MIB(ill->ill_ip_mib, 12369 ipIfStatsHCOutMcastOctets, 12370 ntohs(ip6h->ip6_plen) + 12371 IPV6_HDR_LEN); 12372 } 12373 } 12374 12375 /* 12376 * Send it down. XXX Do we want to flow control AH/ESP 12377 * packets that carry TCP payloads? We don't flow 12378 * control TCP packets, but we should also not 12379 * flow-control TCP packets that have been protected. 12380 * We don't have an easy way to find out if an AH/ESP 12381 * packet was originally TCP or not currently. 12382 */ 12383 if (io == NULL) { 12384 BUMP_MIB(ill->ill_ip_mib, 12385 ipIfStatsHCOutTransmits); 12386 UPDATE_MIB(ill->ill_ip_mib, 12387 ipIfStatsHCOutOctets, 12388 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12389 putnext(stq, mp); 12390 } else { 12391 /* 12392 * Safety Pup says: make sure this is 12393 * going to the right interface! 12394 */ 12395 if (io->ipsec_out_capab_ill_index != 12396 ill_index) { 12397 /* IPsec kstats: bump lose counter */ 12398 freemsg(mp1); 12399 } else { 12400 BUMP_MIB(ill->ill_ip_mib, 12401 ipIfStatsHCOutTransmits); 12402 UPDATE_MIB(ill->ill_ip_mib, 12403 ipIfStatsHCOutOctets, 12404 ntohs(ip6h->ip6_plen) + 12405 IPV6_HDR_LEN); 12406 ipsec_hw_putnext(stq, mp); 12407 } 12408 } 12409 12410 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12411 if (ire != save_ire) { 12412 ire_refrele(ire); 12413 } 12414 if (multirt_send) { 12415 ASSERT(ire1 != NULL); 12416 /* 12417 * Proceed with the next RTF_MULTIRT 12418 * ire, also set up the send-to queue 12419 * accordingly. 12420 */ 12421 ire = ire1; 12422 ire1 = NULL; 12423 stq = ire->ire_stq; 12424 nce = ire->ire_nce; 12425 ill = ire_to_ill(ire); 12426 mp = next_mp; 12427 next_mp = NULL; 12428 continue; 12429 } 12430 ASSERT(next_mp == NULL); 12431 ASSERT(ire1 == NULL); 12432 return; 12433 } 12434 12435 ASSERT(nce->nce_state != ND_INCOMPLETE); 12436 12437 /* 12438 * Check for upper layer advice 12439 */ 12440 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12441 /* 12442 * It should be o.k. to check the state without 12443 * a lock here, at most we lose an advice. 12444 */ 12445 nce->nce_last = TICK_TO_MSEC(lbolt64); 12446 if (nce->nce_state != ND_REACHABLE) { 12447 12448 mutex_enter(&nce->nce_lock); 12449 nce->nce_state = ND_REACHABLE; 12450 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12451 mutex_exit(&nce->nce_lock); 12452 (void) untimeout(nce->nce_timeout_id); 12453 if (ip_debug > 2) { 12454 /* ip1dbg */ 12455 pr_addr_dbg("ip_xmit_v6: state" 12456 " for %s changed to" 12457 " REACHABLE\n", AF_INET6, 12458 &ire->ire_addr_v6); 12459 } 12460 } 12461 if (ire != save_ire) { 12462 ire_refrele(ire); 12463 } 12464 if (multirt_send) { 12465 ASSERT(ire1 != NULL); 12466 /* 12467 * Proceed with the next RTF_MULTIRT 12468 * ire, also set up the send-to queue 12469 * accordingly. 12470 */ 12471 ire = ire1; 12472 ire1 = NULL; 12473 stq = ire->ire_stq; 12474 nce = ire->ire_nce; 12475 ill = ire_to_ill(ire); 12476 mp = next_mp; 12477 next_mp = NULL; 12478 continue; 12479 } 12480 ASSERT(next_mp == NULL); 12481 ASSERT(ire1 == NULL); 12482 return; 12483 } 12484 12485 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12486 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12487 " ill_reachable_time = %d \n", delta, 12488 ill->ill_reachable_time)); 12489 if (delta > (uint64_t)ill->ill_reachable_time) { 12490 nce = ire->ire_nce; 12491 mutex_enter(&nce->nce_lock); 12492 switch (nce->nce_state) { 12493 case ND_REACHABLE: 12494 case ND_STALE: 12495 /* 12496 * ND_REACHABLE is identical to 12497 * ND_STALE in this specific case. If 12498 * reachable time has expired for this 12499 * neighbor (delta is greater than 12500 * reachable time), conceptually, the 12501 * neighbor cache is no longer in 12502 * REACHABLE state, but already in 12503 * STALE state. So the correct 12504 * transition here is to ND_DELAY. 12505 */ 12506 nce->nce_state = ND_DELAY; 12507 mutex_exit(&nce->nce_lock); 12508 NDP_RESTART_TIMER(nce, 12509 ipst->ips_delay_first_probe_time); 12510 if (ip_debug > 3) { 12511 /* ip2dbg */ 12512 pr_addr_dbg("ip_xmit_v6: state" 12513 " for %s changed to" 12514 " DELAY\n", AF_INET6, 12515 &ire->ire_addr_v6); 12516 } 12517 break; 12518 case ND_DELAY: 12519 case ND_PROBE: 12520 mutex_exit(&nce->nce_lock); 12521 /* Timers have already started */ 12522 break; 12523 case ND_UNREACHABLE: 12524 /* 12525 * ndp timer has detected that this nce 12526 * is unreachable and initiated deleting 12527 * this nce and all its associated IREs. 12528 * This is a race where we found the 12529 * ire before it was deleted and have 12530 * just sent out a packet using this 12531 * unreachable nce. 12532 */ 12533 mutex_exit(&nce->nce_lock); 12534 break; 12535 default: 12536 ASSERT(0); 12537 } 12538 } 12539 12540 if (multirt_send) { 12541 ASSERT(ire1 != NULL); 12542 /* 12543 * Proceed with the next RTF_MULTIRT ire, 12544 * Also set up the send-to queue accordingly. 12545 */ 12546 if (ire != save_ire) { 12547 ire_refrele(ire); 12548 } 12549 ire = ire1; 12550 ire1 = NULL; 12551 stq = ire->ire_stq; 12552 nce = ire->ire_nce; 12553 ill = ire_to_ill(ire); 12554 mp = next_mp; 12555 next_mp = NULL; 12556 } 12557 } while (multirt_send); 12558 /* 12559 * In the multirouting case, release the last ire used for 12560 * emission. save_ire will be released by the caller. 12561 */ 12562 if (ire != save_ire) { 12563 ire_refrele(ire); 12564 } 12565 } else { 12566 /* 12567 * Queue packet if we have an conn to give back pressure. 12568 * We can't queue packets intended for hardware acceleration 12569 * since we've tossed that state already. If the packet is 12570 * being fed back from ire_send_v6, we don't know the 12571 * position in the queue to enqueue the packet and we discard 12572 * the packet. 12573 */ 12574 if (ipst->ips_ip_output_queue && (connp != NULL) && 12575 (io == NULL) && (caller != IRE_SEND)) { 12576 if (caller == IP_WSRV) { 12577 connp->conn_did_putbq = 1; 12578 (void) putbq(connp->conn_wq, mp); 12579 conn_drain_insert(connp); 12580 /* 12581 * caller == IP_WSRV implies we are 12582 * the service thread, and the 12583 * queue is already noenabled. 12584 * The check for canput and 12585 * the putbq is not atomic. 12586 * So we need to check again. 12587 */ 12588 if (canput(stq->q_next)) 12589 connp->conn_did_putbq = 0; 12590 } else { 12591 (void) putq(connp->conn_wq, mp); 12592 } 12593 return; 12594 } 12595 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12596 freemsg(mp); 12597 return; 12598 } 12599 } 12600 12601 /* 12602 * pr_addr_dbg function provides the needed buffer space to call 12603 * inet_ntop() function's 3rd argument. This function should be 12604 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12605 * stack buffer space in it's own stack frame. This function uses 12606 * a buffer from it's own stack and prints the information. 12607 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12608 * 12609 * Note: This function can call inet_ntop() once. 12610 */ 12611 void 12612 pr_addr_dbg(char *fmt1, int af, const void *addr) 12613 { 12614 char buf[INET6_ADDRSTRLEN]; 12615 12616 if (fmt1 == NULL) { 12617 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12618 return; 12619 } 12620 12621 /* 12622 * This does not compare debug level and just prints 12623 * out. Thus it is the responsibility of the caller 12624 * to check the appropriate debug-level before calling 12625 * this function. 12626 */ 12627 if (ip_debug > 0) { 12628 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12629 } 12630 12631 12632 } 12633 12634 12635 /* 12636 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12637 * if needed and extension headers) that will be needed based on the 12638 * ip6_pkt_t structure passed by the caller. 12639 * 12640 * The returned length does not include the length of the upper level 12641 * protocol (ULP) header. 12642 */ 12643 int 12644 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12645 { 12646 int len; 12647 12648 len = IPV6_HDR_LEN; 12649 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12650 len += sizeof (ip6i_t); 12651 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12652 ASSERT(ipp->ipp_hopoptslen != 0); 12653 len += ipp->ipp_hopoptslen; 12654 } 12655 if (ipp->ipp_fields & IPPF_RTHDR) { 12656 ASSERT(ipp->ipp_rthdrlen != 0); 12657 len += ipp->ipp_rthdrlen; 12658 } 12659 /* 12660 * En-route destination options 12661 * Only do them if there's a routing header as well 12662 */ 12663 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12664 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12665 ASSERT(ipp->ipp_rtdstoptslen != 0); 12666 len += ipp->ipp_rtdstoptslen; 12667 } 12668 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12669 ASSERT(ipp->ipp_dstoptslen != 0); 12670 len += ipp->ipp_dstoptslen; 12671 } 12672 return (len); 12673 } 12674 12675 /* 12676 * All-purpose routine to build a header chain of an IPv6 header 12677 * followed by any required extension headers and a proto header, 12678 * preceeded (where necessary) by an ip6i_t private header. 12679 * 12680 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12681 * will be filled in appropriately. 12682 * Thus the caller must fill in the rest of the IPv6 header, such as 12683 * traffic class/flowid, source address (if not set here), hoplimit (if not 12684 * set here) and destination address. 12685 * 12686 * The extension headers and ip6i_t header will all be fully filled in. 12687 */ 12688 void 12689 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12690 ip6_pkt_t *ipp, uint8_t protocol) 12691 { 12692 uint8_t *nxthdr_ptr; 12693 uint8_t *cp; 12694 ip6i_t *ip6i; 12695 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12696 12697 /* 12698 * If sending private ip6i_t header down (checksum info, nexthop, 12699 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12700 * then fill it in. (The checksum info will be filled in by icmp). 12701 */ 12702 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12703 ip6i = (ip6i_t *)ip6h; 12704 ip6h = (ip6_t *)&ip6i[1]; 12705 12706 ip6i->ip6i_flags = 0; 12707 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12708 if (ipp->ipp_fields & IPPF_IFINDEX || 12709 ipp->ipp_fields & IPPF_SCOPE_ID) { 12710 ASSERT(ipp->ipp_ifindex != 0); 12711 ip6i->ip6i_flags |= IP6I_IFINDEX; 12712 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12713 } 12714 if (ipp->ipp_fields & IPPF_ADDR) { 12715 /* 12716 * Enable per-packet source address verification if 12717 * IPV6_PKTINFO specified the source address. 12718 * ip6_src is set in the transport's _wput function. 12719 */ 12720 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12721 &ipp->ipp_addr)); 12722 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12723 } 12724 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12725 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12726 /* 12727 * We need to set this flag so that IP doesn't 12728 * rewrite the IPv6 header's hoplimit with the 12729 * current default value. 12730 */ 12731 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12732 } 12733 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12734 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12735 &ipp->ipp_nexthop)); 12736 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12737 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12738 } 12739 /* 12740 * tell IP this is an ip6i_t private header 12741 */ 12742 ip6i->ip6i_nxt = IPPROTO_RAW; 12743 } 12744 /* Initialize IPv6 header */ 12745 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12746 if (ipp->ipp_fields & IPPF_TCLASS) { 12747 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12748 (ipp->ipp_tclass << 20); 12749 } 12750 if (ipp->ipp_fields & IPPF_ADDR) 12751 ip6h->ip6_src = ipp->ipp_addr; 12752 12753 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12754 cp = (uint8_t *)&ip6h[1]; 12755 /* 12756 * Here's where we have to start stringing together 12757 * any extension headers in the right order: 12758 * Hop-by-hop, destination, routing, and final destination opts. 12759 */ 12760 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12761 /* Hop-by-hop options */ 12762 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12763 12764 *nxthdr_ptr = IPPROTO_HOPOPTS; 12765 nxthdr_ptr = &hbh->ip6h_nxt; 12766 12767 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12768 cp += ipp->ipp_hopoptslen; 12769 } 12770 /* 12771 * En-route destination options 12772 * Only do them if there's a routing header as well 12773 */ 12774 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12775 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12776 ip6_dest_t *dst = (ip6_dest_t *)cp; 12777 12778 *nxthdr_ptr = IPPROTO_DSTOPTS; 12779 nxthdr_ptr = &dst->ip6d_nxt; 12780 12781 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12782 cp += ipp->ipp_rtdstoptslen; 12783 } 12784 /* 12785 * Routing header next 12786 */ 12787 if (ipp->ipp_fields & IPPF_RTHDR) { 12788 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12789 12790 *nxthdr_ptr = IPPROTO_ROUTING; 12791 nxthdr_ptr = &rt->ip6r_nxt; 12792 12793 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12794 cp += ipp->ipp_rthdrlen; 12795 } 12796 /* 12797 * Do ultimate destination options 12798 */ 12799 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12800 ip6_dest_t *dest = (ip6_dest_t *)cp; 12801 12802 *nxthdr_ptr = IPPROTO_DSTOPTS; 12803 nxthdr_ptr = &dest->ip6d_nxt; 12804 12805 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12806 cp += ipp->ipp_dstoptslen; 12807 } 12808 /* 12809 * Now set the last header pointer to the proto passed in 12810 */ 12811 *nxthdr_ptr = protocol; 12812 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12813 } 12814 12815 /* 12816 * Return a pointer to the routing header extension header 12817 * in the IPv6 header(s) chain passed in. 12818 * If none found, return NULL 12819 * Assumes that all extension headers are in same mblk as the v6 header 12820 */ 12821 ip6_rthdr_t * 12822 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12823 { 12824 ip6_dest_t *desthdr; 12825 ip6_frag_t *fraghdr; 12826 uint_t hdrlen; 12827 uint8_t nexthdr; 12828 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12829 12830 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12831 return ((ip6_rthdr_t *)ptr); 12832 12833 /* 12834 * The routing header will precede all extension headers 12835 * other than the hop-by-hop and destination options 12836 * extension headers, so if we see anything other than those, 12837 * we're done and didn't find it. 12838 * We could see a destination options header alone but no 12839 * routing header, in which case we'll return NULL as soon as 12840 * we see anything after that. 12841 * Hop-by-hop and destination option headers are identical, 12842 * so we can use either one we want as a template. 12843 */ 12844 nexthdr = ip6h->ip6_nxt; 12845 while (ptr < endptr) { 12846 /* Is there enough left for len + nexthdr? */ 12847 if (ptr + MIN_EHDR_LEN > endptr) 12848 return (NULL); 12849 12850 switch (nexthdr) { 12851 case IPPROTO_HOPOPTS: 12852 case IPPROTO_DSTOPTS: 12853 /* Assumes the headers are identical for hbh and dst */ 12854 desthdr = (ip6_dest_t *)ptr; 12855 hdrlen = 8 * (desthdr->ip6d_len + 1); 12856 nexthdr = desthdr->ip6d_nxt; 12857 break; 12858 12859 case IPPROTO_ROUTING: 12860 return ((ip6_rthdr_t *)ptr); 12861 12862 case IPPROTO_FRAGMENT: 12863 fraghdr = (ip6_frag_t *)ptr; 12864 hdrlen = sizeof (ip6_frag_t); 12865 nexthdr = fraghdr->ip6f_nxt; 12866 break; 12867 12868 default: 12869 return (NULL); 12870 } 12871 ptr += hdrlen; 12872 } 12873 return (NULL); 12874 } 12875 12876 /* 12877 * Called for source-routed packets originating on this node. 12878 * Manipulates the original routing header by moving every entry up 12879 * one slot, placing the first entry in the v6 header's v6_dst field, 12880 * and placing the ultimate destination in the routing header's last 12881 * slot. 12882 * 12883 * Returns the checksum diference between the ultimate destination 12884 * (last hop in the routing header when the packet is sent) and 12885 * the first hop (ip6_dst when the packet is sent) 12886 */ 12887 /* ARGSUSED2 */ 12888 uint32_t 12889 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12890 { 12891 uint_t numaddr; 12892 uint_t i; 12893 in6_addr_t *addrptr; 12894 in6_addr_t tmp; 12895 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12896 uint32_t cksm; 12897 uint32_t addrsum = 0; 12898 uint16_t *ptr; 12899 12900 /* 12901 * Perform any processing needed for source routing. 12902 * We know that all extension headers will be in the same mblk 12903 * as the IPv6 header. 12904 */ 12905 12906 /* 12907 * If no segments left in header, or the header length field is zero, 12908 * don't move hop addresses around; 12909 * Checksum difference is zero. 12910 */ 12911 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12912 return (0); 12913 12914 ptr = (uint16_t *)&ip6h->ip6_dst; 12915 cksm = 0; 12916 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12917 cksm += ptr[i]; 12918 } 12919 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12920 12921 /* 12922 * Here's where the fun begins - we have to 12923 * move all addresses up one spot, take the 12924 * first hop and make it our first ip6_dst, 12925 * and place the ultimate destination in the 12926 * newly-opened last slot. 12927 */ 12928 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12929 numaddr = rthdr->ip6r0_len / 2; 12930 tmp = *addrptr; 12931 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12932 *addrptr = addrptr[1]; 12933 } 12934 *addrptr = ip6h->ip6_dst; 12935 ip6h->ip6_dst = tmp; 12936 12937 /* 12938 * From the checksummed ultimate destination subtract the checksummed 12939 * current ip6_dst (the first hop address). Return that number. 12940 * (In the v4 case, the second part of this is done in each routine 12941 * that calls ip_massage_options(). We do it all in this one place 12942 * for v6). 12943 */ 12944 ptr = (uint16_t *)&ip6h->ip6_dst; 12945 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12946 addrsum += ptr[i]; 12947 } 12948 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12949 if ((int)cksm < 0) 12950 cksm--; 12951 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12952 12953 return (cksm); 12954 } 12955 12956 /* 12957 * Propagate a multicast group membership operation (join/leave) (*fn) on 12958 * all interfaces crossed by the related multirt routes. 12959 * The call is considered successful if the operation succeeds 12960 * on at least one interface. 12961 * The function is called if the destination address in the packet to send 12962 * is multirouted. 12963 */ 12964 int 12965 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12966 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12967 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12968 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12969 { 12970 ire_t *ire_gw; 12971 irb_t *irb; 12972 int index, error = 0; 12973 opt_restart_t *or; 12974 ip_stack_t *ipst = ire->ire_ipst; 12975 12976 irb = ire->ire_bucket; 12977 ASSERT(irb != NULL); 12978 12979 ASSERT(DB_TYPE(first_mp) == M_CTL); 12980 or = (opt_restart_t *)first_mp->b_rptr; 12981 12982 IRB_REFHOLD(irb); 12983 for (; ire != NULL; ire = ire->ire_next) { 12984 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12985 continue; 12986 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12987 continue; 12988 12989 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12990 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12991 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12992 /* No resolver exists for the gateway; skip this ire. */ 12993 if (ire_gw == NULL) 12994 continue; 12995 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12996 /* 12997 * A resolver exists: we can get the interface on which we have 12998 * to apply the operation. 12999 */ 13000 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 13001 first_mp); 13002 if (error == 0) 13003 or->or_private = CGTP_MCAST_SUCCESS; 13004 13005 if (ip_debug > 0) { 13006 ulong_t off; 13007 char *ksym; 13008 13009 ksym = kobj_getsymname((uintptr_t)fn, &off); 13010 ip2dbg(("ip_multirt_apply_membership_v6: " 13011 "called %s, multirt group 0x%08x via itf 0x%08x, " 13012 "error %d [success %u]\n", 13013 ksym ? ksym : "?", 13014 ntohl(V4_PART_OF_V6((*v6grp))), 13015 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 13016 error, or->or_private)); 13017 } 13018 13019 ire_refrele(ire_gw); 13020 if (error == EINPROGRESS) { 13021 IRB_REFRELE(irb); 13022 return (error); 13023 } 13024 } 13025 IRB_REFRELE(irb); 13026 /* 13027 * Consider the call as successful if we succeeded on at least 13028 * one interface. Otherwise, return the last encountered error. 13029 */ 13030 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 13031 } 13032 13033 void 13034 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 13035 { 13036 kstat_t *ksp; 13037 13038 ip6_stat_t template = { 13039 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 13040 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 13041 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 13042 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 13043 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 13044 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 13045 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 13046 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13047 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13048 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13049 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 13050 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13051 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13052 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13053 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 13054 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 13055 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 13056 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 13057 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 13058 }; 13059 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 13060 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 13061 KSTAT_FLAG_VIRTUAL, stackid); 13062 13063 if (ksp == NULL) 13064 return (NULL); 13065 13066 bcopy(&template, ip6_statisticsp, sizeof (template)); 13067 ksp->ks_data = (void *)ip6_statisticsp; 13068 ksp->ks_private = (void *)(uintptr_t)stackid; 13069 13070 kstat_install(ksp); 13071 return (ksp); 13072 } 13073 13074 void 13075 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 13076 { 13077 if (ksp != NULL) { 13078 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 13079 kstat_delete_netstack(ksp, stackid); 13080 } 13081 } 13082 13083 /* 13084 * The following two functions set and get the value for the 13085 * IPV6_SRC_PREFERENCES socket option. 13086 */ 13087 int 13088 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 13089 { 13090 /* 13091 * We only support preferences that are covered by 13092 * IPV6_PREFER_SRC_MASK. 13093 */ 13094 if (prefs & ~IPV6_PREFER_SRC_MASK) 13095 return (EINVAL); 13096 13097 /* 13098 * Look for conflicting preferences or default preferences. If 13099 * both bits of a related pair are clear, the application wants the 13100 * system's default value for that pair. Both bits in a pair can't 13101 * be set. 13102 */ 13103 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13104 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13105 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13106 IPV6_PREFER_SRC_MIPMASK) { 13107 return (EINVAL); 13108 } 13109 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13110 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13111 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13112 IPV6_PREFER_SRC_TMPMASK) { 13113 return (EINVAL); 13114 } 13115 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13116 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13117 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13118 IPV6_PREFER_SRC_CGAMASK) { 13119 return (EINVAL); 13120 } 13121 13122 connp->conn_src_preferences = prefs; 13123 return (0); 13124 } 13125 13126 size_t 13127 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13128 { 13129 *val = connp->conn_src_preferences; 13130 return (sizeof (connp->conn_src_preferences)); 13131 } 13132 13133 int 13134 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13135 { 13136 ill_t *ill; 13137 ire_t *ire; 13138 int error; 13139 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13140 13141 /* 13142 * Verify the source address and ifindex. Privileged users can use 13143 * any source address. For ancillary data the source address is 13144 * checked in ip_wput_v6. 13145 */ 13146 if (pkti->ipi6_ifindex != 0) { 13147 ASSERT(connp != NULL); 13148 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13149 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error, ipst); 13150 if (ill == NULL) { 13151 /* 13152 * We just want to know if the interface exists, we 13153 * don't really care about the ill pointer itself. 13154 */ 13155 if (error != EINPROGRESS) 13156 return (error); 13157 error = 0; /* Ensure we don't use it below */ 13158 } else { 13159 ill_refrele(ill); 13160 } 13161 } 13162 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13163 secpolicy_net_rawaccess(cr) != 0) { 13164 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13165 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13166 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 13167 if (ire != NULL) 13168 ire_refrele(ire); 13169 else 13170 return (ENXIO); 13171 } 13172 return (0); 13173 } 13174 13175 /* 13176 * Get the size of the IP options (including the IP headers size) 13177 * without including the AH header's size. If till_ah is B_FALSE, 13178 * and if AH header is present, dest options beyond AH header will 13179 * also be included in the returned size. 13180 */ 13181 int 13182 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13183 { 13184 ip6_t *ip6h; 13185 uint8_t nexthdr; 13186 uint8_t *whereptr; 13187 ip6_hbh_t *hbhhdr; 13188 ip6_dest_t *dsthdr; 13189 ip6_rthdr_t *rthdr; 13190 int ehdrlen; 13191 int size; 13192 ah_t *ah; 13193 13194 ip6h = (ip6_t *)mp->b_rptr; 13195 size = IPV6_HDR_LEN; 13196 nexthdr = ip6h->ip6_nxt; 13197 whereptr = (uint8_t *)&ip6h[1]; 13198 for (;;) { 13199 /* Assume IP has already stripped it */ 13200 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13201 switch (nexthdr) { 13202 case IPPROTO_HOPOPTS: 13203 hbhhdr = (ip6_hbh_t *)whereptr; 13204 nexthdr = hbhhdr->ip6h_nxt; 13205 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13206 break; 13207 case IPPROTO_DSTOPTS: 13208 dsthdr = (ip6_dest_t *)whereptr; 13209 nexthdr = dsthdr->ip6d_nxt; 13210 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13211 break; 13212 case IPPROTO_ROUTING: 13213 rthdr = (ip6_rthdr_t *)whereptr; 13214 nexthdr = rthdr->ip6r_nxt; 13215 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13216 break; 13217 default : 13218 if (till_ah) { 13219 ASSERT(nexthdr == IPPROTO_AH); 13220 return (size); 13221 } 13222 /* 13223 * If we don't have a AH header to traverse, 13224 * return now. This happens normally for 13225 * outbound datagrams where we have not inserted 13226 * the AH header. 13227 */ 13228 if (nexthdr != IPPROTO_AH) { 13229 return (size); 13230 } 13231 13232 /* 13233 * We don't include the AH header's size 13234 * to be symmetrical with other cases where 13235 * we either don't have a AH header (outbound) 13236 * or peek into the AH header yet (inbound and 13237 * not pulled up yet). 13238 */ 13239 ah = (ah_t *)whereptr; 13240 nexthdr = ah->ah_nexthdr; 13241 ehdrlen = (ah->ah_length << 2) + 8; 13242 13243 if (nexthdr == IPPROTO_DSTOPTS) { 13244 if (whereptr + ehdrlen >= mp->b_wptr) { 13245 /* 13246 * The destination options header 13247 * is not part of the first mblk. 13248 */ 13249 whereptr = mp->b_cont->b_rptr; 13250 } else { 13251 whereptr += ehdrlen; 13252 } 13253 13254 dsthdr = (ip6_dest_t *)whereptr; 13255 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13256 size += ehdrlen; 13257 } 13258 return (size); 13259 } 13260 whereptr += ehdrlen; 13261 size += ehdrlen; 13262 } 13263 } 13264