1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/sdt.h> 46 #include <sys/kobj.h> 47 #include <sys/zone.h> 48 #include <sys/neti.h> 49 #include <sys/hook.h> 50 51 #include <sys/kmem.h> 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/atomic.h> 58 #include <sys/iphada.h> 59 #include <sys/policy.h> 60 #include <net/if.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/if_dl.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 #include <netinet/sctp.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/arp.h> 75 76 #include <inet/ip.h> 77 #include <inet/ip_impl.h> 78 #include <inet/ip6.h> 79 #include <inet/ip6_asp.h> 80 #include <inet/tcp.h> 81 #include <inet/tcp_impl.h> 82 #include <inet/udp_impl.h> 83 #include <inet/ipp_common.h> 84 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_rts.h> 89 #include <inet/optcom.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/tun.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/udp_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern squeue_func_t ip_input_proc; 112 113 /* 114 * Naming conventions: 115 * These rules should be judiciously applied 116 * if there is a need to identify something as IPv6 versus IPv4 117 * IPv6 funcions will end with _v6 in the ip module. 118 * IPv6 funcions will end with _ipv6 in the transport modules. 119 * IPv6 macros: 120 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 121 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 122 * And then there are ..V4_PART_OF_V6. 123 * The intent is that macros in the ip module end with _V6. 124 * IPv6 global variables will start with ipv6_ 125 * IPv6 structures will start with ipv6 126 * IPv6 defined constants should start with IPV6_ 127 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 128 */ 129 130 /* 131 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 132 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 133 * from IANA. This mechanism will remain in effect until an official 134 * number is obtained. 135 */ 136 uchar_t ip6opt_ls; 137 138 const in6_addr_t ipv6_all_ones = 139 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 140 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 141 142 #ifdef _BIG_ENDIAN 143 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 144 #else /* _BIG_ENDIAN */ 145 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 146 #endif /* _BIG_ENDIAN */ 147 148 #ifdef _BIG_ENDIAN 149 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 150 #else /* _BIG_ENDIAN */ 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 152 #endif /* _BIG_ENDIAN */ 153 154 #ifdef _BIG_ENDIAN 155 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 156 #else /* _BIG_ENDIAN */ 157 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 158 #endif /* _BIG_ENDIAN */ 159 160 #ifdef _BIG_ENDIAN 161 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 162 #else /* _BIG_ENDIAN */ 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 164 #endif /* _BIG_ENDIAN */ 165 166 #ifdef _BIG_ENDIAN 167 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 168 #else /* _BIG_ENDIAN */ 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 170 #endif /* _BIG_ENDIAN */ 171 172 #ifdef _BIG_ENDIAN 173 const in6_addr_t ipv6_solicited_node_mcast = 174 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 175 #else /* _BIG_ENDIAN */ 176 const in6_addr_t ipv6_solicited_node_mcast = 177 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 178 #endif /* _BIG_ENDIAN */ 179 180 /* Leave room for ip_newroute to tack on the src and target addresses */ 181 #define OK_RESOLVER_MP_V6(mp) \ 182 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 183 184 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 185 boolean_t, zoneid_t); 186 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 187 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 188 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 189 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 190 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 191 boolean_t, boolean_t, boolean_t, boolean_t); 192 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 193 iulp_t *, ip_stack_t *); 194 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 195 uint16_t, boolean_t, boolean_t, boolean_t); 196 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 197 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 198 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 199 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 200 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 201 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 202 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 203 uint8_t *, uint_t, uint8_t, ip_stack_t *); 204 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 205 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 206 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 207 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 208 conn_t *, int, int, int, zoneid_t); 209 210 void ip_rput_v6(queue_t *, mblk_t *); 211 static void ip_wput_v6(queue_t *, mblk_t *); 212 213 /* 214 * A template for an IPv6 AR_ENTRY_QUERY 215 */ 216 static areq_t ipv6_areq_template = { 217 AR_ENTRY_QUERY, /* cmd */ 218 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 219 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 220 IP6_DL_SAP, /* protocol, from arps perspective */ 221 sizeof (areq_t), /* target addr offset */ 222 IPV6_ADDR_LEN, /* target addr_length */ 223 0, /* flags */ 224 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 225 IPV6_ADDR_LEN, /* sender addr length */ 226 6, /* xmit_count */ 227 1000, /* (re)xmit_interval in milliseconds */ 228 4 /* max # of requests to buffer */ 229 /* anything else filled in by the code */ 230 }; 231 232 struct qinit rinit_ipv6 = { 233 (pfi_t)ip_rput_v6, 234 NULL, 235 ip_open, 236 ip_close, 237 NULL, 238 &ip_mod_info 239 }; 240 241 struct qinit winit_ipv6 = { 242 (pfi_t)ip_wput_v6, 243 (pfi_t)ip_wsrv, 244 ip_open, 245 ip_close, 246 NULL, 247 &ip_mod_info 248 }; 249 250 /* 251 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 252 * The message has already been checksummed and if needed, 253 * a copy has been made to be sent any interested ICMP client (conn) 254 * Note that this is different than icmp_inbound() which does the fanout 255 * to conn's as well as local processing of the ICMP packets. 256 * 257 * All error messages are passed to the matching transport stream. 258 * 259 * Zones notes: 260 * The packet is only processed in the context of the specified zone: typically 261 * only this zone will reply to an echo request. This means that the caller must 262 * call icmp_inbound_v6() for each relevant zone. 263 */ 264 static void 265 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 266 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 267 { 268 icmp6_t *icmp6; 269 ip6_t *ip6h; 270 boolean_t interested; 271 ip6i_t *ip6i; 272 in6_addr_t origsrc; 273 ire_t *ire; 274 mblk_t *first_mp; 275 ipsec_in_t *ii; 276 ip_stack_t *ipst = ill->ill_ipst; 277 278 ASSERT(ill != NULL); 279 first_mp = mp; 280 if (mctl_present) { 281 mp = first_mp->b_cont; 282 ASSERT(mp != NULL); 283 284 ii = (ipsec_in_t *)first_mp->b_rptr; 285 ASSERT(ii->ipsec_in_type == IPSEC_IN); 286 } 287 288 ip6h = (ip6_t *)mp->b_rptr; 289 290 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 291 292 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 293 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 294 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 295 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 296 freemsg(first_mp); 297 return; 298 } 299 ip6h = (ip6_t *)mp->b_rptr; 300 } 301 if (ipst->ips_icmp_accept_clear_messages == 0) { 302 first_mp = ipsec_check_global_policy(first_mp, NULL, 303 NULL, ip6h, mctl_present, ipst->ips_netstack); 304 if (first_mp == NULL) 305 return; 306 } 307 308 /* 309 * On a labeled system, we have to check whether the zone itself is 310 * permitted to receive raw traffic. 311 */ 312 if (is_system_labeled()) { 313 if (zoneid == ALL_ZONES) 314 zoneid = tsol_packet_to_zoneid(mp); 315 if (!tsol_can_accept_raw(mp, B_FALSE)) { 316 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 317 zoneid)); 318 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 319 freemsg(first_mp); 320 return; 321 } 322 } 323 324 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 325 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 326 icmp6->icmp6_code)); 327 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 328 329 /* Initiate IPPF processing here */ 330 if (IP6_IN_IPP(flags, ipst)) { 331 332 /* 333 * If the ifindex changes due to SIOCSLIFINDEX 334 * packet may return to IP on the wrong ill. 335 */ 336 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 337 if (mp == NULL) { 338 if (mctl_present) { 339 freeb(first_mp); 340 } 341 return; 342 } 343 } 344 345 switch (icmp6->icmp6_type) { 346 case ICMP6_DST_UNREACH: 347 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 348 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 349 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 350 break; 351 352 case ICMP6_TIME_EXCEEDED: 353 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 354 break; 355 356 case ICMP6_PARAM_PROB: 357 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 358 break; 359 360 case ICMP6_PACKET_TOO_BIG: 361 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 362 zoneid); 363 return; 364 case ICMP6_ECHO_REQUEST: 365 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 366 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 367 !ipst->ips_ipv6_resp_echo_mcast) 368 break; 369 370 /* 371 * We must have exclusive use of the mblk to convert it to 372 * a response. 373 * If not, we copy it. 374 */ 375 if (mp->b_datap->db_ref > 1) { 376 mblk_t *mp1; 377 378 mp1 = copymsg(mp); 379 freemsg(mp); 380 if (mp1 == NULL) { 381 BUMP_MIB(ill->ill_icmp6_mib, 382 ipv6IfIcmpInErrors); 383 if (mctl_present) 384 freeb(first_mp); 385 return; 386 } 387 mp = mp1; 388 ip6h = (ip6_t *)mp->b_rptr; 389 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 390 if (mctl_present) 391 first_mp->b_cont = mp; 392 else 393 first_mp = mp; 394 } 395 396 /* 397 * Turn the echo into an echo reply. 398 * Remove any extension headers (do not reverse a source route) 399 * and clear the flow id (keep traffic class for now). 400 */ 401 if (hdr_length != IPV6_HDR_LEN) { 402 int i; 403 404 for (i = 0; i < IPV6_HDR_LEN; i++) 405 mp->b_rptr[hdr_length - i - 1] = 406 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 407 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 408 ip6h = (ip6_t *)mp->b_rptr; 409 ip6h->ip6_nxt = IPPROTO_ICMPV6; 410 hdr_length = IPV6_HDR_LEN; 411 } 412 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 413 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 414 415 ip6h->ip6_plen = 416 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 417 origsrc = ip6h->ip6_src; 418 /* 419 * Reverse the source and destination addresses. 420 * If the return address is a multicast, zero out the source 421 * (ip_wput_v6 will set an address). 422 */ 423 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 424 ip6h->ip6_src = ipv6_all_zeros; 425 ip6h->ip6_dst = origsrc; 426 } else { 427 ip6h->ip6_src = ip6h->ip6_dst; 428 ip6h->ip6_dst = origsrc; 429 } 430 431 /* set the hop limit */ 432 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 433 434 /* 435 * Prepare for checksum by putting icmp length in the icmp 436 * checksum field. The checksum is calculated in ip_wput_v6. 437 */ 438 icmp6->icmp6_cksum = ip6h->ip6_plen; 439 /* 440 * ICMP echo replies should go out on the same interface 441 * the request came on as probes used by in.mpathd for 442 * detecting NIC failures are ECHO packets. We turn-off load 443 * spreading by allocating a ip6i and setting ip6i_attach_if 444 * to B_TRUE which is handled both by ip_wput_v6 and 445 * ip_newroute_v6. If we don't turnoff load spreading, 446 * the packets might get dropped if there are no 447 * non-FAILED/INACTIVE interfaces for it to go out on and 448 * in.mpathd would wrongly detect a failure or mis-detect 449 * a NIC failure as a link failure. As load spreading can 450 * happen only if ill_group is not NULL, we do only for 451 * that case and this does not affect the normal case. 452 * 453 * We force this only on echo packets that came from on-link 454 * hosts. We restrict this to link-local addresses which 455 * is used by in.mpathd for probing. In the IPv6 case, 456 * default routes typically have an ire_ipif pointer and 457 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 458 * might work. As a default route out of this interface 459 * may not be present, enforcing this packet to go out in 460 * this case may not work. 461 */ 462 if (ill->ill_group != NULL && 463 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 464 /* 465 * If we are sending replies to ourselves, don't 466 * set ATTACH_IF as we may not be able to find 467 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 468 * causes ip_wput_v6 to look for an IRE_LOCAL on 469 * "ill" which it may not find and will try to 470 * create an IRE_CACHE for our local address. Once 471 * we do this, we will try to forward all packets 472 * meant to our LOCAL address. 473 */ 474 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 475 NULL, ipst); 476 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 477 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 478 if (mp == NULL) { 479 BUMP_MIB(ill->ill_icmp6_mib, 480 ipv6IfIcmpInErrors); 481 if (ire != NULL) 482 ire_refrele(ire); 483 if (mctl_present) 484 freeb(first_mp); 485 return; 486 } else if (mctl_present) { 487 first_mp->b_cont = mp; 488 } else { 489 first_mp = mp; 490 } 491 ip6i = (ip6i_t *)mp->b_rptr; 492 ip6i->ip6i_flags = IP6I_ATTACH_IF; 493 ip6i->ip6i_ifindex = 494 ill->ill_phyint->phyint_ifindex; 495 } 496 if (ire != NULL) 497 ire_refrele(ire); 498 } 499 500 if (!mctl_present) { 501 /* 502 * This packet should go out the same way as it 503 * came in i.e in clear. To make sure that global 504 * policy will not be applied to this in ip_wput, 505 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 506 */ 507 ASSERT(first_mp == mp); 508 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 509 if (first_mp == NULL) { 510 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 511 freemsg(mp); 512 return; 513 } 514 ii = (ipsec_in_t *)first_mp->b_rptr; 515 516 /* This is not a secure packet */ 517 ii->ipsec_in_secure = B_FALSE; 518 first_mp->b_cont = mp; 519 } 520 ii->ipsec_in_zoneid = zoneid; 521 ASSERT(zoneid != ALL_ZONES); 522 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 523 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 524 return; 525 } 526 put(WR(q), first_mp); 527 return; 528 529 case ICMP6_ECHO_REPLY: 530 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 531 break; 532 533 case ND_ROUTER_SOLICIT: 534 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 535 break; 536 537 case ND_ROUTER_ADVERT: 538 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 539 break; 540 541 case ND_NEIGHBOR_SOLICIT: 542 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 543 if (mctl_present) 544 freeb(first_mp); 545 /* XXX may wish to pass first_mp up to ndp_input someday. */ 546 ndp_input(ill, mp, dl_mp); 547 return; 548 549 case ND_NEIGHBOR_ADVERT: 550 BUMP_MIB(ill->ill_icmp6_mib, 551 ipv6IfIcmpInNeighborAdvertisements); 552 if (mctl_present) 553 freeb(first_mp); 554 /* XXX may wish to pass first_mp up to ndp_input someday. */ 555 ndp_input(ill, mp, dl_mp); 556 return; 557 558 case ND_REDIRECT: { 559 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 560 561 if (ipst->ips_ipv6_ignore_redirect) 562 break; 563 564 /* 565 * As there is no upper client to deliver, we don't 566 * need the first_mp any more. 567 */ 568 if (mctl_present) 569 freeb(first_mp); 570 if (!pullupmsg(mp, -1)) { 571 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 572 break; 573 } 574 icmp_redirect_v6(q, mp, ill); 575 return; 576 } 577 578 /* 579 * The next three icmp messages will be handled by MLD. 580 * Pass all valid MLD packets up to any process(es) 581 * listening on a raw ICMP socket. MLD messages are 582 * freed by mld_input function. 583 */ 584 case MLD_LISTENER_QUERY: 585 case MLD_LISTENER_REPORT: 586 case MLD_LISTENER_REDUCTION: 587 if (mctl_present) 588 freeb(first_mp); 589 mld_input(q, mp, ill); 590 return; 591 default: 592 break; 593 } 594 if (interested) { 595 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 596 mctl_present, zoneid); 597 } else { 598 freemsg(first_mp); 599 } 600 } 601 602 /* 603 * Process received IPv6 ICMP Packet too big. 604 * After updating any IRE it does the fanout to any matching transport streams. 605 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 606 */ 607 /* ARGSUSED */ 608 static void 609 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 610 boolean_t mctl_present, zoneid_t zoneid) 611 { 612 ip6_t *ip6h; 613 ip6_t *inner_ip6h; 614 icmp6_t *icmp6; 615 uint16_t hdr_length; 616 uint32_t mtu; 617 ire_t *ire, *first_ire; 618 mblk_t *first_mp; 619 ip_stack_t *ipst = ill->ill_ipst; 620 621 first_mp = mp; 622 if (mctl_present) 623 mp = first_mp->b_cont; 624 /* 625 * We must have exclusive use of the mblk to update the MTU 626 * in the packet. 627 * If not, we copy it. 628 * 629 * If there's an M_CTL present, we know that allocated first_mp 630 * earlier in this function, so we know first_mp has refcnt of one. 631 */ 632 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 633 if (mp->b_datap->db_ref > 1) { 634 mblk_t *mp1; 635 636 mp1 = copymsg(mp); 637 freemsg(mp); 638 if (mp1 == NULL) { 639 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 640 if (mctl_present) 641 freeb(first_mp); 642 return; 643 } 644 mp = mp1; 645 if (mctl_present) 646 first_mp->b_cont = mp; 647 else 648 first_mp = mp; 649 } 650 ip6h = (ip6_t *)mp->b_rptr; 651 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 652 hdr_length = ip_hdr_length_v6(mp, ip6h); 653 else 654 hdr_length = IPV6_HDR_LEN; 655 656 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 657 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 658 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 659 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 660 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 661 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 662 freemsg(first_mp); 663 return; 664 } 665 ip6h = (ip6_t *)mp->b_rptr; 666 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 667 inner_ip6h = (ip6_t *)&icmp6[1]; 668 } 669 670 /* 671 * For link local destinations matching simply on IRE type is not 672 * sufficient. Same link local addresses for different ILL's is 673 * possible. 674 */ 675 676 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 677 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 678 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 679 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 680 681 if (first_ire == NULL) { 682 if (ip_debug > 2) { 683 /* ip1dbg */ 684 pr_addr_dbg("icmp_inbound_too_big_v6:" 685 "no ire for dst %s\n", AF_INET6, 686 &inner_ip6h->ip6_dst); 687 } 688 freemsg(first_mp); 689 return; 690 } 691 692 mtu = ntohl(icmp6->icmp6_mtu); 693 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 694 for (ire = first_ire; ire != NULL && 695 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 696 ire = ire->ire_next) { 697 mutex_enter(&ire->ire_lock); 698 if (mtu < IPV6_MIN_MTU) { 699 ip1dbg(("Received mtu less than IPv6 " 700 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 701 mtu = IPV6_MIN_MTU; 702 /* 703 * If an mtu less than IPv6 min mtu is received, 704 * we must include a fragment header in 705 * subsequent packets. 706 */ 707 ire->ire_frag_flag |= IPH_FRAG_HDR; 708 } 709 ip1dbg(("Received mtu from router: %d\n", mtu)); 710 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 711 /* Record the new max frag size for the ULP. */ 712 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 713 /* 714 * If we need a fragment header in every packet 715 * (above case or multirouting), make sure the 716 * ULP takes it into account when computing the 717 * payload size. 718 */ 719 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 720 sizeof (ip6_frag_t)); 721 } else { 722 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 723 } 724 mutex_exit(&ire->ire_lock); 725 } 726 rw_exit(&first_ire->ire_bucket->irb_lock); 727 ire_refrele(first_ire); 728 } else { 729 irb_t *irb = NULL; 730 /* 731 * for non-link local destinations we match only on the IRE type 732 */ 733 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 734 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 735 ipst); 736 if (ire == NULL) { 737 if (ip_debug > 2) { 738 /* ip1dbg */ 739 pr_addr_dbg("icmp_inbound_too_big_v6:" 740 "no ire for dst %s\n", 741 AF_INET6, &inner_ip6h->ip6_dst); 742 } 743 freemsg(first_mp); 744 return; 745 } 746 irb = ire->ire_bucket; 747 ire_refrele(ire); 748 rw_enter(&irb->irb_lock, RW_READER); 749 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 750 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 751 &inner_ip6h->ip6_dst)) { 752 mtu = ntohl(icmp6->icmp6_mtu); 753 mutex_enter(&ire->ire_lock); 754 if (mtu < IPV6_MIN_MTU) { 755 ip1dbg(("Received mtu less than IPv6" 756 "min mtu %d: %d\n", 757 IPV6_MIN_MTU, mtu)); 758 mtu = IPV6_MIN_MTU; 759 /* 760 * If an mtu less than IPv6 min mtu is 761 * received, we must include a fragment 762 * header in subsequent packets. 763 */ 764 ire->ire_frag_flag |= IPH_FRAG_HDR; 765 } 766 767 ip1dbg(("Received mtu from router: %d\n", mtu)); 768 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 769 /* Record the new max frag size for the ULP. */ 770 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 771 /* 772 * If we need a fragment header in 773 * every packet (above case or 774 * multirouting), make sure the ULP 775 * takes it into account when computing 776 * the payload size. 777 */ 778 icmp6->icmp6_mtu = 779 htonl(ire->ire_max_frag - 780 sizeof (ip6_frag_t)); 781 } else { 782 icmp6->icmp6_mtu = 783 htonl(ire->ire_max_frag); 784 } 785 mutex_exit(&ire->ire_lock); 786 } 787 } 788 rw_exit(&irb->irb_lock); 789 } 790 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 791 mctl_present, zoneid); 792 } 793 794 static void 795 pkt_too_big(conn_t *connp, void *arg) 796 { 797 mblk_t *mp; 798 799 if (!connp->conn_ipv6_recvpathmtu) 800 return; 801 802 /* create message and drop it on this connections read queue */ 803 if ((mp = dupb((mblk_t *)arg)) == NULL) { 804 return; 805 } 806 mp->b_datap->db_type = M_CTL; 807 808 putnext(connp->conn_rq, mp); 809 } 810 811 /* 812 * Fanout received ICMPv6 error packets to the transports. 813 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 814 */ 815 void 816 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 817 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 818 { 819 uint16_t *up; /* Pointer to ports in ULP header */ 820 uint32_t ports; /* reversed ports for fanout */ 821 ip6_t rip6h; /* With reversed addresses */ 822 uint16_t hdr_length; 823 uint8_t *nexthdrp; 824 uint8_t nexthdr; 825 mblk_t *first_mp; 826 ipsec_in_t *ii; 827 tcpha_t *tcpha; 828 conn_t *connp; 829 ip_stack_t *ipst = ill->ill_ipst; 830 831 first_mp = mp; 832 if (mctl_present) { 833 mp = first_mp->b_cont; 834 ASSERT(mp != NULL); 835 836 ii = (ipsec_in_t *)first_mp->b_rptr; 837 ASSERT(ii->ipsec_in_type == IPSEC_IN); 838 } else { 839 ii = NULL; 840 } 841 842 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 843 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 844 845 /* 846 * Need to pullup everything in order to use 847 * ip_hdr_length_nexthdr_v6() 848 */ 849 if (mp->b_cont != NULL) { 850 if (!pullupmsg(mp, -1)) { 851 ip1dbg(("icmp_inbound_error_fanout_v6: " 852 "pullupmsg failed\n")); 853 goto drop_pkt; 854 } 855 ip6h = (ip6_t *)mp->b_rptr; 856 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 857 } 858 859 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 860 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 861 goto drop_pkt; 862 863 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 864 goto drop_pkt; 865 nexthdr = *nexthdrp; 866 867 /* Set message type, must be done after pullups */ 868 mp->b_datap->db_type = M_CTL; 869 870 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 871 /* 872 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 873 * sockets. 874 * 875 * Note I don't like walking every connection to deliver 876 * this information to a set of listeners. A separate 877 * list could be kept to keep the cost of this down. 878 */ 879 ipcl_walk(pkt_too_big, (void *)mp, ipst); 880 } 881 882 /* Try to pass the ICMP message to clients who need it */ 883 switch (nexthdr) { 884 case IPPROTO_UDP: { 885 /* 886 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 887 * UDP header to get the port information. 888 */ 889 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 890 mp->b_wptr) { 891 break; 892 } 893 /* 894 * Attempt to find a client stream based on port. 895 * Note that we do a reverse lookup since the header is 896 * in the form we sent it out. 897 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 898 * and we only set the src and dst addresses and nexthdr. 899 */ 900 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 901 rip6h.ip6_src = ip6h->ip6_dst; 902 rip6h.ip6_dst = ip6h->ip6_src; 903 rip6h.ip6_nxt = nexthdr; 904 ((uint16_t *)&ports)[0] = up[1]; 905 ((uint16_t *)&ports)[1] = up[0]; 906 907 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 908 IP6_NO_IPPOLICY, mctl_present, zoneid); 909 return; 910 } 911 case IPPROTO_TCP: { 912 /* 913 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 914 * the TCP header to get the port information. 915 */ 916 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 917 mp->b_wptr) { 918 break; 919 } 920 921 /* 922 * Attempt to find a client stream based on port. 923 * Note that we do a reverse lookup since the header is 924 * in the form we sent it out. 925 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 926 * we only set the src and dst addresses and nexthdr. 927 */ 928 929 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 930 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 931 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 932 if (connp == NULL) { 933 goto drop_pkt; 934 } 935 936 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 937 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 938 return; 939 940 } 941 case IPPROTO_SCTP: 942 /* 943 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 944 * the SCTP header to get the port information. 945 */ 946 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 947 mp->b_wptr) { 948 break; 949 } 950 951 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 952 ((uint16_t *)&ports)[0] = up[1]; 953 ((uint16_t *)&ports)[1] = up[0]; 954 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 955 IP6_NO_IPPOLICY, zoneid); 956 return; 957 case IPPROTO_ESP: 958 case IPPROTO_AH: { 959 int ipsec_rc; 960 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 961 962 /* 963 * We need a IPSEC_IN in the front to fanout to AH/ESP. 964 * We will re-use the IPSEC_IN if it is already present as 965 * AH/ESP will not affect any fields in the IPSEC_IN for 966 * ICMP errors. If there is no IPSEC_IN, allocate a new 967 * one and attach it in the front. 968 */ 969 if (ii != NULL) { 970 /* 971 * ip_fanout_proto_again converts the ICMP errors 972 * that come back from AH/ESP to M_DATA so that 973 * if it is non-AH/ESP and we do a pullupmsg in 974 * this function, it would work. Convert it back 975 * to M_CTL before we send up as this is a ICMP 976 * error. This could have been generated locally or 977 * by some router. Validate the inner IPSEC 978 * headers. 979 * 980 * NOTE : ill_index is used by ip_fanout_proto_again 981 * to locate the ill. 982 */ 983 ASSERT(ill != NULL); 984 ii->ipsec_in_ill_index = 985 ill->ill_phyint->phyint_ifindex; 986 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 987 first_mp->b_cont->b_datap->db_type = M_CTL; 988 } else { 989 /* 990 * IPSEC_IN is not present. We attach a ipsec_in 991 * message and send up to IPSEC for validating 992 * and removing the IPSEC headers. Clear 993 * ipsec_in_secure so that when we return 994 * from IPSEC, we don't mistakenly think that this 995 * is a secure packet came from the network. 996 * 997 * NOTE : ill_index is used by ip_fanout_proto_again 998 * to locate the ill. 999 */ 1000 ASSERT(first_mp == mp); 1001 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1002 ASSERT(ill != NULL); 1003 if (first_mp == NULL) { 1004 freemsg(mp); 1005 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1006 return; 1007 } 1008 ii = (ipsec_in_t *)first_mp->b_rptr; 1009 1010 /* This is not a secure packet */ 1011 ii->ipsec_in_secure = B_FALSE; 1012 first_mp->b_cont = mp; 1013 mp->b_datap->db_type = M_CTL; 1014 ii->ipsec_in_ill_index = 1015 ill->ill_phyint->phyint_ifindex; 1016 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1017 } 1018 1019 if (!ipsec_loaded(ipss)) { 1020 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 1021 return; 1022 } 1023 1024 if (nexthdr == IPPROTO_ESP) 1025 ipsec_rc = ipsecesp_icmp_error(first_mp); 1026 else 1027 ipsec_rc = ipsecah_icmp_error(first_mp); 1028 if (ipsec_rc == IPSEC_STATUS_FAILED) 1029 return; 1030 1031 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1032 return; 1033 } 1034 case IPPROTO_ENCAP: 1035 case IPPROTO_IPV6: 1036 if ((uint8_t *)ip6h + hdr_length + 1037 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1038 sizeof (ip6_t)) > mp->b_wptr) { 1039 goto drop_pkt; 1040 } 1041 1042 if (nexthdr == IPPROTO_ENCAP || 1043 !IN6_ARE_ADDR_EQUAL( 1044 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1045 &ip6h->ip6_src) || 1046 !IN6_ARE_ADDR_EQUAL( 1047 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1048 &ip6h->ip6_dst)) { 1049 /* 1050 * For tunnels that have used IPsec protection, 1051 * we need to adjust the MTU to take into account 1052 * the IPsec overhead. 1053 */ 1054 if (ii != NULL) 1055 icmp6->icmp6_mtu = htonl( 1056 ntohl(icmp6->icmp6_mtu) - 1057 ipsec_in_extra_length(first_mp)); 1058 } else { 1059 /* 1060 * Self-encapsulated case. As in the ipv4 case, 1061 * we need to strip the 2nd IP header. Since mp 1062 * is already pulled-up, we can simply bcopy 1063 * the 3rd header + data over the 2nd header. 1064 */ 1065 uint16_t unused_len; 1066 ip6_t *inner_ip6h = (ip6_t *) 1067 ((uchar_t *)ip6h + hdr_length); 1068 1069 /* 1070 * Make sure we don't do recursion more than once. 1071 */ 1072 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1073 &unused_len, &nexthdrp) || 1074 *nexthdrp == IPPROTO_IPV6) { 1075 goto drop_pkt; 1076 } 1077 1078 /* 1079 * We are about to modify the packet. Make a copy if 1080 * someone else has a reference to it. 1081 */ 1082 if (DB_REF(mp) > 1) { 1083 mblk_t *mp1; 1084 uint16_t icmp6_offset; 1085 1086 mp1 = copymsg(mp); 1087 if (mp1 == NULL) { 1088 goto drop_pkt; 1089 } 1090 icmp6_offset = (uint16_t) 1091 ((uchar_t *)icmp6 - mp->b_rptr); 1092 freemsg(mp); 1093 mp = mp1; 1094 1095 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1096 ip6h = (ip6_t *)&icmp6[1]; 1097 inner_ip6h = (ip6_t *) 1098 ((uchar_t *)ip6h + hdr_length); 1099 1100 if (mctl_present) 1101 first_mp->b_cont = mp; 1102 else 1103 first_mp = mp; 1104 } 1105 1106 /* 1107 * Need to set db_type back to M_DATA before 1108 * refeeding mp into this function. 1109 */ 1110 DB_TYPE(mp) = M_DATA; 1111 1112 /* 1113 * Copy the 3rd header + remaining data on top 1114 * of the 2nd header. 1115 */ 1116 bcopy(inner_ip6h, ip6h, 1117 mp->b_wptr - (uchar_t *)inner_ip6h); 1118 1119 /* 1120 * Subtract length of the 2nd header. 1121 */ 1122 mp->b_wptr -= hdr_length; 1123 1124 /* 1125 * Now recurse, and see what I _really_ should be 1126 * doing here. 1127 */ 1128 icmp_inbound_error_fanout_v6(q, first_mp, 1129 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1130 zoneid); 1131 return; 1132 } 1133 /* FALLTHRU */ 1134 default: 1135 /* 1136 * The rip6h header is only used for the lookup and we 1137 * only set the src and dst addresses and nexthdr. 1138 */ 1139 rip6h.ip6_src = ip6h->ip6_dst; 1140 rip6h.ip6_dst = ip6h->ip6_src; 1141 rip6h.ip6_nxt = nexthdr; 1142 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1143 IP6_NO_IPPOLICY, mctl_present, zoneid); 1144 return; 1145 } 1146 /* NOTREACHED */ 1147 drop_pkt: 1148 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1149 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1150 freemsg(first_mp); 1151 } 1152 1153 /* 1154 * Process received IPv6 ICMP Redirect messages. 1155 */ 1156 /* ARGSUSED */ 1157 static void 1158 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1159 { 1160 ip6_t *ip6h; 1161 uint16_t hdr_length; 1162 nd_redirect_t *rd; 1163 ire_t *ire; 1164 ire_t *prev_ire; 1165 ire_t *redir_ire; 1166 in6_addr_t *src, *dst, *gateway; 1167 nd_opt_hdr_t *opt; 1168 nce_t *nce; 1169 int nce_flags = 0; 1170 int err = 0; 1171 boolean_t redirect_to_router = B_FALSE; 1172 int len; 1173 int optlen; 1174 iulp_t ulp_info = { 0 }; 1175 ill_t *prev_ire_ill; 1176 ipif_t *ipif; 1177 ip_stack_t *ipst = ill->ill_ipst; 1178 1179 ip6h = (ip6_t *)mp->b_rptr; 1180 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1181 hdr_length = ip_hdr_length_v6(mp, ip6h); 1182 else 1183 hdr_length = IPV6_HDR_LEN; 1184 1185 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1186 len = mp->b_wptr - mp->b_rptr - hdr_length; 1187 src = &ip6h->ip6_src; 1188 dst = &rd->nd_rd_dst; 1189 gateway = &rd->nd_rd_target; 1190 1191 /* Verify if it is a valid redirect */ 1192 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1193 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1194 (rd->nd_rd_code != 0) || 1195 (len < sizeof (nd_redirect_t)) || 1196 (IN6_IS_ADDR_V4MAPPED(dst)) || 1197 (IN6_IS_ADDR_MULTICAST(dst))) { 1198 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1199 freemsg(mp); 1200 return; 1201 } 1202 1203 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1204 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1205 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1206 freemsg(mp); 1207 return; 1208 } 1209 1210 if (len > sizeof (nd_redirect_t)) { 1211 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1212 len - sizeof (nd_redirect_t))) { 1213 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1214 freemsg(mp); 1215 return; 1216 } 1217 } 1218 1219 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1220 redirect_to_router = B_TRUE; 1221 nce_flags |= NCE_F_ISROUTER; 1222 } 1223 1224 /* ipif will be refreleased afterwards */ 1225 ipif = ipif_get_next_ipif(NULL, ill); 1226 if (ipif == NULL) { 1227 freemsg(mp); 1228 return; 1229 } 1230 1231 /* 1232 * Verify that the IP source address of the redirect is 1233 * the same as the current first-hop router for the specified 1234 * ICMP destination address. 1235 * Also, Make sure we had a route for the dest in question and 1236 * that route was pointing to the old gateway (the source of the 1237 * redirect packet.) 1238 */ 1239 1240 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1241 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1242 MATCH_IRE_DEFAULT, ipst); 1243 1244 /* 1245 * Check that 1246 * the redirect was not from ourselves 1247 * old gateway is still directly reachable 1248 */ 1249 if (prev_ire == NULL || 1250 prev_ire->ire_type == IRE_LOCAL) { 1251 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1252 ipif_refrele(ipif); 1253 goto fail_redirect; 1254 } 1255 prev_ire_ill = ire_to_ill(prev_ire); 1256 ASSERT(prev_ire_ill != NULL); 1257 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1258 nce_flags |= NCE_F_NONUD; 1259 1260 /* 1261 * Should we use the old ULP info to create the new gateway? From 1262 * a user's perspective, we should inherit the info so that it 1263 * is a "smooth" transition. If we do not do that, then new 1264 * connections going thru the new gateway will have no route metrics, 1265 * which is counter-intuitive to user. From a network point of 1266 * view, this may or may not make sense even though the new gateway 1267 * is still directly connected to us so the route metrics should not 1268 * change much. 1269 * 1270 * But if the old ire_uinfo is not initialized, we do another 1271 * recursive lookup on the dest using the new gateway. There may 1272 * be a route to that. If so, use it to initialize the redirect 1273 * route. 1274 */ 1275 if (prev_ire->ire_uinfo.iulp_set) { 1276 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1277 } else if (redirect_to_router) { 1278 /* 1279 * Only do the following if the redirection is really to 1280 * a router. 1281 */ 1282 ire_t *tmp_ire; 1283 ire_t *sire; 1284 1285 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1286 ALL_ZONES, 0, NULL, 1287 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1288 ipst); 1289 if (sire != NULL) { 1290 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1291 ASSERT(tmp_ire != NULL); 1292 ire_refrele(tmp_ire); 1293 ire_refrele(sire); 1294 } else if (tmp_ire != NULL) { 1295 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1296 sizeof (iulp_t)); 1297 ire_refrele(tmp_ire); 1298 } 1299 } 1300 1301 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1302 opt = (nd_opt_hdr_t *)&rd[1]; 1303 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1304 if (opt != NULL) { 1305 err = ndp_lookup_then_add_v6(ill, 1306 (uchar_t *)&opt[1], /* Link layer address */ 1307 gateway, 1308 &ipv6_all_ones, /* prefix mask */ 1309 &ipv6_all_zeros, /* Mapping mask */ 1310 0, 1311 nce_flags, 1312 ND_STALE, 1313 &nce); 1314 switch (err) { 1315 case 0: 1316 NCE_REFRELE(nce); 1317 break; 1318 case EEXIST: 1319 /* 1320 * Check to see if link layer address has changed and 1321 * process the nce_state accordingly. 1322 */ 1323 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1324 NCE_REFRELE(nce); 1325 break; 1326 default: 1327 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1328 err)); 1329 ipif_refrele(ipif); 1330 goto fail_redirect; 1331 } 1332 } 1333 if (redirect_to_router) { 1334 /* icmp_redirect_ok_v6() must have already verified this */ 1335 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1336 1337 /* 1338 * Create a Route Association. This will allow us to remember 1339 * a router told us to use the particular gateway. 1340 */ 1341 ire = ire_create_v6( 1342 dst, 1343 &ipv6_all_ones, /* mask */ 1344 &prev_ire->ire_src_addr_v6, /* source addr */ 1345 gateway, /* gateway addr */ 1346 &prev_ire->ire_max_frag, /* max frag */ 1347 NULL, /* no src nce */ 1348 NULL, /* no rfq */ 1349 NULL, /* no stq */ 1350 IRE_HOST, 1351 prev_ire->ire_ipif, 1352 NULL, 1353 0, 1354 0, 1355 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1356 &ulp_info, 1357 NULL, 1358 NULL, 1359 ipst); 1360 } else { 1361 queue_t *stq; 1362 1363 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1364 ? ipif->ipif_rq : ipif->ipif_wq; 1365 1366 /* 1367 * Just create an on link entry, i.e. interface route. 1368 */ 1369 ire = ire_create_v6( 1370 dst, /* gateway == dst */ 1371 &ipv6_all_ones, /* mask */ 1372 &prev_ire->ire_src_addr_v6, /* source addr */ 1373 &ipv6_all_zeros, /* gateway addr */ 1374 &prev_ire->ire_max_frag, /* max frag */ 1375 NULL, /* no src nce */ 1376 NULL, /* ire rfq */ 1377 stq, /* ire stq */ 1378 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1379 prev_ire->ire_ipif, 1380 &ipv6_all_ones, 1381 0, 1382 0, 1383 (RTF_DYNAMIC | RTF_HOST), 1384 &ulp_info, 1385 NULL, 1386 NULL, 1387 ipst); 1388 } 1389 1390 /* Release reference from earlier ipif_get_next_ipif() */ 1391 ipif_refrele(ipif); 1392 1393 if (ire == NULL) 1394 goto fail_redirect; 1395 1396 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1397 1398 /* tell routing sockets that we received a redirect */ 1399 ip_rts_change_v6(RTM_REDIRECT, 1400 &rd->nd_rd_dst, 1401 &rd->nd_rd_target, 1402 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1403 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1404 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1405 1406 /* 1407 * Delete any existing IRE_HOST type ires for this destination. 1408 * This together with the added IRE has the effect of 1409 * modifying an existing redirect. 1410 */ 1411 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1412 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1413 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), 1414 ipst); 1415 1416 ire_refrele(ire); /* Held in ire_add_v6 */ 1417 1418 if (redir_ire != NULL) { 1419 if (redir_ire->ire_flags & RTF_DYNAMIC) 1420 ire_delete(redir_ire); 1421 ire_refrele(redir_ire); 1422 } 1423 } 1424 1425 if (prev_ire->ire_type == IRE_CACHE) 1426 ire_delete(prev_ire); 1427 ire_refrele(prev_ire); 1428 prev_ire = NULL; 1429 1430 fail_redirect: 1431 if (prev_ire != NULL) 1432 ire_refrele(prev_ire); 1433 freemsg(mp); 1434 } 1435 1436 static ill_t * 1437 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1438 { 1439 ill_t *ill; 1440 1441 ASSERT(WR(q) == q); 1442 1443 if (q->q_next != NULL) { 1444 ill = (ill_t *)q->q_ptr; 1445 if (ILL_CAN_LOOKUP(ill)) 1446 ill_refhold(ill); 1447 else 1448 ill = NULL; 1449 } else { 1450 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1451 NULL, NULL, NULL, NULL, NULL, ipst); 1452 } 1453 if (ill == NULL) 1454 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1455 return (ill); 1456 } 1457 1458 /* 1459 * Assigns an appropriate source address to the packet. 1460 * If origdst is one of our IP addresses that use it as the source. 1461 * If the queue is an ill queue then select a source from that ill. 1462 * Otherwise pick a source based on a route lookup back to the origsrc. 1463 * 1464 * src is the return parameter. Returns a pointer to src or NULL if failure. 1465 */ 1466 static in6_addr_t * 1467 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1468 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1469 { 1470 ill_t *ill; 1471 ire_t *ire; 1472 ipif_t *ipif; 1473 1474 ASSERT(!(wq->q_flag & QREADR)); 1475 if (wq->q_next != NULL) { 1476 ill = (ill_t *)wq->q_ptr; 1477 } else { 1478 ill = NULL; 1479 } 1480 1481 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1482 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1483 ipst); 1484 if (ire != NULL) { 1485 /* Destined to one of our addresses */ 1486 *src = *origdst; 1487 ire_refrele(ire); 1488 return (src); 1489 } 1490 if (ire != NULL) { 1491 ire_refrele(ire); 1492 ire = NULL; 1493 } 1494 if (ill == NULL) { 1495 /* What is the route back to the original source? */ 1496 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1497 NULL, NULL, zoneid, NULL, 1498 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1499 if (ire == NULL) { 1500 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1501 return (NULL); 1502 } 1503 /* 1504 * Does not matter whether we use ire_stq or ire_ipif here. 1505 * Just pick an ill for ICMP replies. 1506 */ 1507 ASSERT(ire->ire_ipif != NULL); 1508 ill = ire->ire_ipif->ipif_ill; 1509 ire_refrele(ire); 1510 } 1511 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1512 IPV6_PREFER_SRC_DEFAULT, zoneid); 1513 if (ipif != NULL) { 1514 *src = ipif->ipif_v6src_addr; 1515 ipif_refrele(ipif); 1516 return (src); 1517 } 1518 /* 1519 * Unusual case - can't find a usable source address to reach the 1520 * original source. Use what in the route to the source. 1521 */ 1522 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1523 NULL, NULL, zoneid, NULL, 1524 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1525 if (ire == NULL) { 1526 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1527 return (NULL); 1528 } 1529 ASSERT(ire != NULL); 1530 *src = ire->ire_src_addr_v6; 1531 ire_refrele(ire); 1532 return (src); 1533 } 1534 1535 /* 1536 * Build and ship an IPv6 ICMP message using the packet data in mp, 1537 * and the ICMP header pointed to by "stuff". (May be called as 1538 * writer.) 1539 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1540 * verify that an icmp error packet can be sent. 1541 * 1542 * If q is an ill write side queue (which is the case when packets 1543 * arrive from ip_rput) then ip_wput code will ensure that packets to 1544 * link-local destinations are sent out that ill. 1545 * 1546 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1547 * source address (see above function). 1548 */ 1549 static void 1550 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1551 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1552 ip_stack_t *ipst) 1553 { 1554 ip6_t *ip6h; 1555 in6_addr_t v6dst; 1556 size_t len_needed; 1557 size_t msg_len; 1558 mblk_t *mp1; 1559 icmp6_t *icmp6; 1560 ill_t *ill; 1561 in6_addr_t v6src; 1562 mblk_t *ipsec_mp; 1563 ipsec_out_t *io; 1564 1565 ill = ip_queue_to_ill_v6(q, ipst); 1566 if (ill == NULL) { 1567 freemsg(mp); 1568 return; 1569 } 1570 1571 if (mctl_present) { 1572 /* 1573 * If it is : 1574 * 1575 * 1) a IPSEC_OUT, then this is caused by outbound 1576 * datagram originating on this host. IPSEC processing 1577 * may or may not have been done. Refer to comments above 1578 * icmp_inbound_error_fanout for details. 1579 * 1580 * 2) a IPSEC_IN if we are generating a icmp_message 1581 * for an incoming datagram destined for us i.e called 1582 * from ip_fanout_send_icmp. 1583 */ 1584 ipsec_info_t *in; 1585 1586 ipsec_mp = mp; 1587 mp = ipsec_mp->b_cont; 1588 1589 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1590 ip6h = (ip6_t *)mp->b_rptr; 1591 1592 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1593 in->ipsec_info_type == IPSEC_IN); 1594 1595 if (in->ipsec_info_type == IPSEC_IN) { 1596 /* 1597 * Convert the IPSEC_IN to IPSEC_OUT. 1598 */ 1599 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1600 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1601 ill_refrele(ill); 1602 return; 1603 } 1604 } else { 1605 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1606 io = (ipsec_out_t *)in; 1607 /* 1608 * Clear out ipsec_out_proc_begin, so we do a fresh 1609 * ire lookup. 1610 */ 1611 io->ipsec_out_proc_begin = B_FALSE; 1612 } 1613 } else { 1614 /* 1615 * This is in clear. The icmp message we are building 1616 * here should go out in clear. 1617 */ 1618 ipsec_in_t *ii; 1619 ASSERT(mp->b_datap->db_type == M_DATA); 1620 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1621 if (ipsec_mp == NULL) { 1622 freemsg(mp); 1623 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1624 ill_refrele(ill); 1625 return; 1626 } 1627 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1628 1629 /* This is not a secure packet */ 1630 ii->ipsec_in_secure = B_FALSE; 1631 /* 1632 * For trusted extensions using a shared IP address we can 1633 * send using any zoneid. 1634 */ 1635 if (zoneid == ALL_ZONES) 1636 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1637 else 1638 ii->ipsec_in_zoneid = zoneid; 1639 ipsec_mp->b_cont = mp; 1640 ip6h = (ip6_t *)mp->b_rptr; 1641 /* 1642 * Convert the IPSEC_IN to IPSEC_OUT. 1643 */ 1644 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1645 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1646 ill_refrele(ill); 1647 return; 1648 } 1649 } 1650 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1651 1652 if (v6src_ptr != NULL) { 1653 v6src = *v6src_ptr; 1654 } else { 1655 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1656 &v6src, zoneid, ipst) == NULL) { 1657 freemsg(ipsec_mp); 1658 ill_refrele(ill); 1659 return; 1660 } 1661 } 1662 v6dst = ip6h->ip6_src; 1663 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1664 msg_len = msgdsize(mp); 1665 if (msg_len > len_needed) { 1666 if (!adjmsg(mp, len_needed - msg_len)) { 1667 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1668 freemsg(ipsec_mp); 1669 ill_refrele(ill); 1670 return; 1671 } 1672 msg_len = len_needed; 1673 } 1674 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1675 if (mp1 == NULL) { 1676 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1677 freemsg(ipsec_mp); 1678 ill_refrele(ill); 1679 return; 1680 } 1681 ill_refrele(ill); 1682 mp1->b_cont = mp; 1683 mp = mp1; 1684 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1685 io->ipsec_out_type == IPSEC_OUT); 1686 ipsec_mp->b_cont = mp; 1687 1688 /* 1689 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1690 * node generates be accepted in peace by all on-host destinations. 1691 * If we do NOT assume that all on-host destinations trust 1692 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1693 * (Look for ipsec_out_icmp_loopback). 1694 */ 1695 io->ipsec_out_icmp_loopback = B_TRUE; 1696 1697 ip6h = (ip6_t *)mp->b_rptr; 1698 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1699 1700 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1701 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1702 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1703 ip6h->ip6_dst = v6dst; 1704 ip6h->ip6_src = v6src; 1705 msg_len += IPV6_HDR_LEN + len; 1706 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1707 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1708 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1709 } 1710 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1711 icmp6 = (icmp6_t *)&ip6h[1]; 1712 bcopy(stuff, (char *)icmp6, len); 1713 /* 1714 * Prepare for checksum by putting icmp length in the icmp 1715 * checksum field. The checksum is calculated in ip_wput_v6. 1716 */ 1717 icmp6->icmp6_cksum = ip6h->ip6_plen; 1718 if (icmp6->icmp6_type == ND_REDIRECT) { 1719 ip6h->ip6_hops = IPV6_MAX_HOPS; 1720 } 1721 /* Send to V6 writeside put routine */ 1722 put(q, ipsec_mp); 1723 } 1724 1725 /* 1726 * Update the output mib when ICMPv6 packets are sent. 1727 */ 1728 static void 1729 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1730 { 1731 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1732 1733 switch (icmp6->icmp6_type) { 1734 case ICMP6_DST_UNREACH: 1735 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1736 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1737 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1738 break; 1739 1740 case ICMP6_TIME_EXCEEDED: 1741 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1742 break; 1743 1744 case ICMP6_PARAM_PROB: 1745 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1746 break; 1747 1748 case ICMP6_PACKET_TOO_BIG: 1749 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1750 break; 1751 1752 case ICMP6_ECHO_REQUEST: 1753 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1754 break; 1755 1756 case ICMP6_ECHO_REPLY: 1757 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1758 break; 1759 1760 case ND_ROUTER_SOLICIT: 1761 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1762 break; 1763 1764 case ND_ROUTER_ADVERT: 1765 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1766 break; 1767 1768 case ND_NEIGHBOR_SOLICIT: 1769 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1770 break; 1771 1772 case ND_NEIGHBOR_ADVERT: 1773 BUMP_MIB(ill->ill_icmp6_mib, 1774 ipv6IfIcmpOutNeighborAdvertisements); 1775 break; 1776 1777 case ND_REDIRECT: 1778 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1779 break; 1780 1781 case MLD_LISTENER_QUERY: 1782 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1783 break; 1784 1785 case MLD_LISTENER_REPORT: 1786 case MLD_V2_LISTENER_REPORT: 1787 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1788 break; 1789 1790 case MLD_LISTENER_REDUCTION: 1791 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1792 break; 1793 } 1794 } 1795 1796 /* 1797 * Check if it is ok to send an ICMPv6 error packet in 1798 * response to the IP packet in mp. 1799 * Free the message and return null if no 1800 * ICMP error packet should be sent. 1801 */ 1802 static mblk_t * 1803 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1804 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1805 { 1806 ip6_t *ip6h; 1807 1808 if (!mp) 1809 return (NULL); 1810 1811 ip6h = (ip6_t *)mp->b_rptr; 1812 1813 /* Check if source address uniquely identifies the host */ 1814 1815 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1816 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1817 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1818 freemsg(mp); 1819 return (NULL); 1820 } 1821 1822 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1823 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1824 icmp6_t *icmp6; 1825 1826 if (mp->b_wptr - mp->b_rptr < len_needed) { 1827 if (!pullupmsg(mp, len_needed)) { 1828 ill_t *ill; 1829 1830 ill = ip_queue_to_ill_v6(q, ipst); 1831 if (ill == NULL) { 1832 BUMP_MIB(&ipst->ips_icmp6_mib, 1833 ipv6IfIcmpInErrors); 1834 } else { 1835 BUMP_MIB(ill->ill_icmp6_mib, 1836 ipv6IfIcmpInErrors); 1837 ill_refrele(ill); 1838 } 1839 freemsg(mp); 1840 return (NULL); 1841 } 1842 ip6h = (ip6_t *)mp->b_rptr; 1843 } 1844 icmp6 = (icmp6_t *)&ip6h[1]; 1845 /* Explicitly do not generate errors in response to redirects */ 1846 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1847 icmp6->icmp6_type == ND_REDIRECT) { 1848 freemsg(mp); 1849 return (NULL); 1850 } 1851 } 1852 /* 1853 * Check that the destination is not multicast and that the packet 1854 * was not sent on link layer broadcast or multicast. (Exception 1855 * is Packet too big message as per the draft - when mcast_ok is set.) 1856 */ 1857 if (!mcast_ok && 1858 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1859 freemsg(mp); 1860 return (NULL); 1861 } 1862 if (icmp_err_rate_limit(ipst)) { 1863 /* 1864 * Only send ICMP error packets every so often. 1865 * This should be done on a per port/source basis, 1866 * but for now this will suffice. 1867 */ 1868 freemsg(mp); 1869 return (NULL); 1870 } 1871 return (mp); 1872 } 1873 1874 /* 1875 * Generate an ICMPv6 redirect message. 1876 * Include target link layer address option if it exits. 1877 * Always include redirect header. 1878 */ 1879 static void 1880 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1881 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1882 { 1883 nd_redirect_t *rd; 1884 nd_opt_rd_hdr_t *rdh; 1885 uchar_t *buf; 1886 nce_t *nce = NULL; 1887 nd_opt_hdr_t *opt; 1888 int len; 1889 int ll_opt_len = 0; 1890 int max_redir_hdr_data_len; 1891 int pkt_len; 1892 in6_addr_t *srcp; 1893 ip_stack_t *ipst = ill->ill_ipst; 1894 1895 /* 1896 * We are called from ip_rput where we could 1897 * not have attached an IPSEC_IN. 1898 */ 1899 ASSERT(mp->b_datap->db_type == M_DATA); 1900 1901 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1902 if (mp == NULL) 1903 return; 1904 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1905 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1906 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1907 ill->ill_phys_addr_length + 7)/8 * 8; 1908 } 1909 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1910 ASSERT(len % 4 == 0); 1911 buf = kmem_alloc(len, KM_NOSLEEP); 1912 if (buf == NULL) { 1913 if (nce != NULL) 1914 NCE_REFRELE(nce); 1915 freemsg(mp); 1916 return; 1917 } 1918 1919 rd = (nd_redirect_t *)buf; 1920 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1921 rd->nd_rd_code = 0; 1922 rd->nd_rd_reserved = 0; 1923 rd->nd_rd_target = *targetp; 1924 rd->nd_rd_dst = *dest; 1925 1926 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1927 if (nce != NULL && ll_opt_len != 0) { 1928 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1929 opt->nd_opt_len = ll_opt_len/8; 1930 bcopy((char *)nce->nce_res_mp->b_rptr + 1931 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1932 ill->ill_phys_addr_length); 1933 } 1934 if (nce != NULL) 1935 NCE_REFRELE(nce); 1936 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1937 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1938 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1939 max_redir_hdr_data_len = 1940 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1941 pkt_len = msgdsize(mp); 1942 /* Make sure mp is 8 byte aligned */ 1943 if (pkt_len > max_redir_hdr_data_len) { 1944 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1945 sizeof (nd_opt_rd_hdr_t))/8; 1946 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1947 } else { 1948 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1949 (void) adjmsg(mp, -(pkt_len % 8)); 1950 } 1951 rdh->nd_opt_rh_reserved1 = 0; 1952 rdh->nd_opt_rh_reserved2 = 0; 1953 /* ipif_v6src_addr contains the link-local source address */ 1954 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1955 if (ill->ill_group != NULL) { 1956 /* 1957 * The receiver of the redirect will verify whether it 1958 * had a route through us (srcp that we will use in 1959 * the redirect) or not. As we load spread even link-locals, 1960 * we don't know which source address the receiver of 1961 * redirect has in its route for communicating with us. 1962 * Thus we randomly choose a source here and finally we 1963 * should get to the right one and it will eventually 1964 * accept the redirect from us. We can't call 1965 * ip_lookup_scope_v6 because we don't have the right 1966 * link-local address here. Thus we randomly choose one. 1967 */ 1968 int cnt = ill->ill_group->illgrp_ill_count; 1969 1970 ill = ill->ill_group->illgrp_ill; 1971 cnt = ++ipst->ips_icmp_redirect_v6_src_index % cnt; 1972 while (cnt--) 1973 ill = ill->ill_group_next; 1974 srcp = &ill->ill_ipif->ipif_v6src_addr; 1975 } else { 1976 srcp = &ill->ill_ipif->ipif_v6src_addr; 1977 } 1978 rw_exit(&ipst->ips_ill_g_lock); 1979 /* Redirects sent by router, and router is global zone */ 1980 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1981 kmem_free(buf, len); 1982 } 1983 1984 1985 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1986 void 1987 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1988 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1989 ip_stack_t *ipst) 1990 { 1991 icmp6_t icmp6; 1992 boolean_t mctl_present; 1993 mblk_t *first_mp; 1994 1995 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1996 1997 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1998 if (mp == NULL) { 1999 if (mctl_present) 2000 freeb(first_mp); 2001 return; 2002 } 2003 bzero(&icmp6, sizeof (icmp6_t)); 2004 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2005 icmp6.icmp6_code = code; 2006 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2007 zoneid, ipst); 2008 } 2009 2010 /* 2011 * Generate an ICMP unreachable message. 2012 */ 2013 void 2014 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2015 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2016 ip_stack_t *ipst) 2017 { 2018 icmp6_t icmp6; 2019 boolean_t mctl_present; 2020 mblk_t *first_mp; 2021 2022 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2023 2024 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2025 if (mp == NULL) { 2026 if (mctl_present) 2027 freeb(first_mp); 2028 return; 2029 } 2030 bzero(&icmp6, sizeof (icmp6_t)); 2031 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2032 icmp6.icmp6_code = code; 2033 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2034 zoneid, ipst); 2035 } 2036 2037 /* 2038 * Generate an ICMP pkt too big message. 2039 */ 2040 static void 2041 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2042 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 2043 { 2044 icmp6_t icmp6; 2045 mblk_t *first_mp; 2046 boolean_t mctl_present; 2047 2048 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2049 2050 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2051 if (mp == NULL) { 2052 if (mctl_present) 2053 freeb(first_mp); 2054 return; 2055 } 2056 bzero(&icmp6, sizeof (icmp6_t)); 2057 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2058 icmp6.icmp6_code = 0; 2059 icmp6.icmp6_mtu = htonl(mtu); 2060 2061 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2062 zoneid, ipst); 2063 } 2064 2065 /* 2066 * Generate an ICMP parameter problem message. (May be called as writer.) 2067 * 'offset' is the offset from the beginning of the packet in error. 2068 */ 2069 static void 2070 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2071 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2072 ip_stack_t *ipst) 2073 { 2074 icmp6_t icmp6; 2075 boolean_t mctl_present; 2076 mblk_t *first_mp; 2077 2078 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2079 2080 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2081 if (mp == NULL) { 2082 if (mctl_present) 2083 freeb(first_mp); 2084 return; 2085 } 2086 bzero((char *)&icmp6, sizeof (icmp6_t)); 2087 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2088 icmp6.icmp6_code = code; 2089 icmp6.icmp6_pptr = htonl(offset); 2090 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2091 zoneid, ipst); 2092 } 2093 2094 /* 2095 * This code will need to take into account the possibility of binding 2096 * to a link local address on a multi-homed host, in which case the 2097 * outgoing interface (from the conn) will need to be used when getting 2098 * an ire for the dst. Going through proper outgoing interface and 2099 * choosing the source address corresponding to the outgoing interface 2100 * is necessary when the destination address is a link-local address and 2101 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2102 * This can happen when active connection is setup; thus ipp pointer 2103 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2104 * pointer is passed as ipp pointer. 2105 */ 2106 mblk_t * 2107 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2108 { 2109 ssize_t len; 2110 int protocol; 2111 struct T_bind_req *tbr; 2112 sin6_t *sin6; 2113 ipa6_conn_t *ac6; 2114 in6_addr_t *v6srcp; 2115 in6_addr_t *v6dstp; 2116 uint16_t lport; 2117 uint16_t fport; 2118 uchar_t *ucp; 2119 mblk_t *mp1; 2120 boolean_t ire_requested; 2121 boolean_t ipsec_policy_set; 2122 int error = 0; 2123 boolean_t local_bind; 2124 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2125 ipa6_conn_x_t *acx6; 2126 boolean_t verify_dst; 2127 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2128 2129 ASSERT(connp->conn_af_isv6); 2130 len = mp->b_wptr - mp->b_rptr; 2131 if (len < (sizeof (*tbr) + 1)) { 2132 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2133 "ip_bind_v6: bogus msg, len %ld", len); 2134 goto bad_addr; 2135 } 2136 /* Back up and extract the protocol identifier. */ 2137 mp->b_wptr--; 2138 tbr = (struct T_bind_req *)mp->b_rptr; 2139 /* Reset the message type in preparation for shipping it back. */ 2140 mp->b_datap->db_type = M_PCPROTO; 2141 2142 protocol = *mp->b_wptr & 0xFF; 2143 connp->conn_ulp = (uint8_t)protocol; 2144 2145 /* 2146 * Check for a zero length address. This is from a protocol that 2147 * wants to register to receive all packets of its type. 2148 */ 2149 if (tbr->ADDR_length == 0) { 2150 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2151 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2152 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2153 NULL) { 2154 /* 2155 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2156 * Do not allow others to bind to these. 2157 */ 2158 goto bad_addr; 2159 } 2160 2161 /* 2162 * 2163 * The udp module never sends down a zero-length address, 2164 * and allowing this on a labeled system will break MLP 2165 * functionality. 2166 */ 2167 if (is_system_labeled() && protocol == IPPROTO_UDP) 2168 goto bad_addr; 2169 2170 /* Allow ipsec plumbing */ 2171 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2172 protocol != IPPROTO_ESP) 2173 goto bad_addr; 2174 2175 connp->conn_srcv6 = ipv6_all_zeros; 2176 ipcl_proto_insert_v6(connp, protocol); 2177 2178 tbr->PRIM_type = T_BIND_ACK; 2179 return (mp); 2180 } 2181 2182 /* Extract the address pointer from the message. */ 2183 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2184 tbr->ADDR_length); 2185 if (ucp == NULL) { 2186 ip1dbg(("ip_bind_v6: no address\n")); 2187 goto bad_addr; 2188 } 2189 if (!OK_32PTR(ucp)) { 2190 ip1dbg(("ip_bind_v6: unaligned address\n")); 2191 goto bad_addr; 2192 } 2193 mp1 = mp->b_cont; /* trailing mp if any */ 2194 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2195 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2196 2197 switch (tbr->ADDR_length) { 2198 default: 2199 ip1dbg(("ip_bind_v6: bad address length %d\n", 2200 (int)tbr->ADDR_length)); 2201 goto bad_addr; 2202 2203 case IPV6_ADDR_LEN: 2204 /* Verification of local address only */ 2205 v6srcp = (in6_addr_t *)ucp; 2206 lport = 0; 2207 local_bind = B_TRUE; 2208 break; 2209 2210 case sizeof (sin6_t): 2211 sin6 = (sin6_t *)ucp; 2212 v6srcp = &sin6->sin6_addr; 2213 lport = sin6->sin6_port; 2214 local_bind = B_TRUE; 2215 break; 2216 2217 case sizeof (ipa6_conn_t): 2218 /* 2219 * Verify that both the source and destination addresses 2220 * are valid. 2221 * Note that we allow connect to broadcast and multicast 2222 * addresses when ire_requested is set. Thus the ULP 2223 * has to check for IRE_BROADCAST and multicast. 2224 */ 2225 ac6 = (ipa6_conn_t *)ucp; 2226 v6srcp = &ac6->ac6_laddr; 2227 v6dstp = &ac6->ac6_faddr; 2228 fport = ac6->ac6_fport; 2229 /* For raw socket, the local port is not set. */ 2230 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2231 connp->conn_lport; 2232 local_bind = B_FALSE; 2233 /* Always verify destination reachability. */ 2234 verify_dst = B_TRUE; 2235 break; 2236 2237 case sizeof (ipa6_conn_x_t): 2238 /* 2239 * Verify that the source address is valid. 2240 * Note that we allow connect to broadcast and multicast 2241 * addresses when ire_requested is set. Thus the ULP 2242 * has to check for IRE_BROADCAST and multicast. 2243 */ 2244 acx6 = (ipa6_conn_x_t *)ucp; 2245 ac6 = &acx6->ac6x_conn; 2246 v6srcp = &ac6->ac6_laddr; 2247 v6dstp = &ac6->ac6_faddr; 2248 fport = ac6->ac6_fport; 2249 lport = ac6->ac6_lport; 2250 local_bind = B_FALSE; 2251 /* 2252 * Client that passed ipa6_conn_x_t to us specifies whether to 2253 * verify destination reachability. 2254 */ 2255 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2256 break; 2257 } 2258 if (local_bind) { 2259 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2260 /* Bind to IPv4 address */ 2261 ipaddr_t v4src; 2262 2263 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2264 2265 error = ip_bind_laddr(connp, mp, v4src, lport, 2266 ire_requested, ipsec_policy_set, 2267 tbr->ADDR_length != IPV6_ADDR_LEN); 2268 if (error != 0) 2269 goto bad_addr; 2270 connp->conn_pkt_isv6 = B_FALSE; 2271 } else { 2272 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2273 error = 0; 2274 goto bad_addr; 2275 } 2276 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2277 ire_requested, ipsec_policy_set, 2278 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2279 if (error != 0) 2280 goto bad_addr; 2281 connp->conn_pkt_isv6 = B_TRUE; 2282 } 2283 } else { 2284 /* 2285 * Bind to local and remote address. Local might be 2286 * unspecified in which case it will be extracted from 2287 * ire_src_addr_v6 2288 */ 2289 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2290 /* Connect to IPv4 address */ 2291 ipaddr_t v4src; 2292 ipaddr_t v4dst; 2293 2294 /* Is the source unspecified or mapped? */ 2295 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2296 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2297 ip1dbg(("ip_bind_v6: " 2298 "dst is mapped, but not the src\n")); 2299 goto bad_addr; 2300 } 2301 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2302 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2303 2304 /* 2305 * XXX Fix needed. Need to pass ipsec_policy_set 2306 * instead of B_FALSE. 2307 */ 2308 2309 /* Always verify destination reachability. */ 2310 error = ip_bind_connected(connp, mp, &v4src, lport, 2311 v4dst, fport, ire_requested, ipsec_policy_set, 2312 B_TRUE, B_TRUE); 2313 if (error != 0) 2314 goto bad_addr; 2315 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2316 connp->conn_pkt_isv6 = B_FALSE; 2317 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2318 ip1dbg(("ip_bind_v6: " 2319 "src is mapped, but not the dst\n")); 2320 goto bad_addr; 2321 } else { 2322 error = ip_bind_connected_v6(connp, mp, v6srcp, 2323 lport, v6dstp, ipp, fport, ire_requested, 2324 ipsec_policy_set, B_TRUE, verify_dst); 2325 if (error != 0) 2326 goto bad_addr; 2327 connp->conn_pkt_isv6 = B_TRUE; 2328 } 2329 } 2330 /* Update qinfo if v4/v6 changed */ 2331 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2332 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2333 if (connp->conn_pkt_isv6) 2334 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE, ipst); 2335 else 2336 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE, ipst); 2337 } 2338 2339 /* 2340 * Pass the IPSEC headers size in ire_ipsec_overhead. 2341 * We can't do this in ip_bind_insert_ire because the policy 2342 * may not have been inherited at that point in time and hence 2343 * conn_out_enforce_policy may not be set. 2344 */ 2345 mp1 = mp->b_cont; 2346 if (ire_requested && connp->conn_out_enforce_policy && 2347 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2348 ire_t *ire = (ire_t *)mp1->b_rptr; 2349 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2350 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2351 } 2352 2353 /* Send it home. */ 2354 mp->b_datap->db_type = M_PCPROTO; 2355 tbr->PRIM_type = T_BIND_ACK; 2356 return (mp); 2357 2358 bad_addr: 2359 if (error == EINPROGRESS) 2360 return (NULL); 2361 if (error > 0) 2362 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2363 else 2364 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2365 return (mp); 2366 } 2367 2368 /* 2369 * Here address is verified to be a valid local address. 2370 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2371 * address is also considered a valid local address. 2372 * In the case of a multicast address, however, the 2373 * upper protocol is expected to reset the src address 2374 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2375 * no packets are emitted with multicast address as 2376 * source address. 2377 * The addresses valid for bind are: 2378 * (1) - in6addr_any 2379 * (2) - IP address of an UP interface 2380 * (3) - IP address of a DOWN interface 2381 * (4) - a multicast address. In this case 2382 * the conn will only receive packets destined to 2383 * the specified multicast address. Note: the 2384 * application still has to issue an 2385 * IPV6_JOIN_GROUP socket option. 2386 * 2387 * In all the above cases, the bound address must be valid in the current zone. 2388 * When the address is loopback or multicast, there might be many matching IREs 2389 * so bind has to look up based on the zone. 2390 */ 2391 static int 2392 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2393 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2394 boolean_t fanout_insert) 2395 { 2396 int error = 0; 2397 ire_t *src_ire = NULL; 2398 ipif_t *ipif = NULL; 2399 mblk_t *policy_mp; 2400 zoneid_t zoneid; 2401 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2402 2403 if (ipsec_policy_set) 2404 policy_mp = mp->b_cont; 2405 2406 /* 2407 * If it was previously connected, conn_fully_bound would have 2408 * been set. 2409 */ 2410 connp->conn_fully_bound = B_FALSE; 2411 2412 zoneid = connp->conn_zoneid; 2413 2414 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2415 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2416 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2417 /* 2418 * If an address other than in6addr_any is requested, 2419 * we verify that it is a valid address for bind 2420 * Note: Following code is in if-else-if form for 2421 * readability compared to a condition check. 2422 */ 2423 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2424 if (IRE_IS_LOCAL(src_ire)) { 2425 /* 2426 * (2) Bind to address of local UP interface 2427 */ 2428 ipif = src_ire->ire_ipif; 2429 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2430 ipif_t *multi_ipif = NULL; 2431 ire_t *save_ire; 2432 /* 2433 * (4) bind to multicast address. 2434 * Fake out the IRE returned to upper 2435 * layer to be a broadcast IRE in 2436 * ip_bind_insert_ire_v6(). 2437 * Pass other information that matches 2438 * the ipif (e.g. the source address). 2439 * conn_multicast_ill is only used for 2440 * IPv6 packets 2441 */ 2442 mutex_enter(&connp->conn_lock); 2443 if (connp->conn_multicast_ill != NULL) { 2444 (void) ipif_lookup_zoneid( 2445 connp->conn_multicast_ill, zoneid, 0, 2446 &multi_ipif); 2447 } else { 2448 /* 2449 * Look for default like 2450 * ip_wput_v6 2451 */ 2452 multi_ipif = ipif_lookup_group_v6( 2453 &ipv6_unspecified_group, zoneid, ipst); 2454 } 2455 mutex_exit(&connp->conn_lock); 2456 save_ire = src_ire; 2457 src_ire = NULL; 2458 if (multi_ipif == NULL || !ire_requested || 2459 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2460 src_ire = save_ire; 2461 error = EADDRNOTAVAIL; 2462 } else { 2463 ASSERT(src_ire != NULL); 2464 if (save_ire != NULL) 2465 ire_refrele(save_ire); 2466 } 2467 if (multi_ipif != NULL) 2468 ipif_refrele(multi_ipif); 2469 } else { 2470 *mp->b_wptr++ = (char)connp->conn_ulp; 2471 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2472 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error, 2473 ipst); 2474 if (ipif == NULL) { 2475 if (error == EINPROGRESS) { 2476 if (src_ire != NULL) 2477 ire_refrele(src_ire); 2478 return (error); 2479 } 2480 /* 2481 * Not a valid address for bind 2482 */ 2483 error = EADDRNOTAVAIL; 2484 } else { 2485 ipif_refrele(ipif); 2486 } 2487 /* 2488 * Just to keep it consistent with the processing in 2489 * ip_bind_v6(). 2490 */ 2491 mp->b_wptr--; 2492 } 2493 2494 if (error != 0) { 2495 /* Red Alert! Attempting to be a bogon! */ 2496 if (ip_debug > 2) { 2497 /* ip1dbg */ 2498 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2499 " address %s\n", AF_INET6, v6src); 2500 } 2501 goto bad_addr; 2502 } 2503 } 2504 2505 /* 2506 * Allow setting new policies. For example, disconnects come 2507 * down as ipa_t bind. As we would have set conn_policy_cached 2508 * to B_TRUE before, we should set it to B_FALSE, so that policy 2509 * can change after the disconnect. 2510 */ 2511 connp->conn_policy_cached = B_FALSE; 2512 2513 /* If not fanout_insert this was just an address verification */ 2514 if (fanout_insert) { 2515 /* 2516 * The addresses have been verified. Time to insert in 2517 * the correct fanout list. 2518 */ 2519 connp->conn_srcv6 = *v6src; 2520 connp->conn_remv6 = ipv6_all_zeros; 2521 connp->conn_lport = lport; 2522 connp->conn_fport = 0; 2523 2524 /* 2525 * We need to make sure that the conn_recv is set to a non-null 2526 * value before we insert the conn_t into the classifier table. 2527 * This is to avoid a race with an incoming packet which does 2528 * an ipcl_classify(). 2529 */ 2530 if (*mp->b_wptr == IPPROTO_TCP) 2531 connp->conn_recv = tcp_conn_request; 2532 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2533 } 2534 if (error == 0) { 2535 if (ire_requested) { 2536 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL, 2537 ipst)) { 2538 error = -1; 2539 goto bad_addr; 2540 } 2541 } else if (ipsec_policy_set) { 2542 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2543 error = -1; 2544 goto bad_addr; 2545 } 2546 } 2547 } else if (connp->conn_ulp == IPPROTO_TCP) { 2548 connp->conn_recv = tcp_input; 2549 } 2550 bad_addr: 2551 if (error != 0) { 2552 if (connp->conn_anon_port) { 2553 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2554 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2555 B_FALSE); 2556 } 2557 connp->conn_mlp_type = mlptSingle; 2558 } 2559 2560 if (src_ire != NULL) 2561 ire_refrele(src_ire); 2562 2563 if (ipsec_policy_set) { 2564 ASSERT(policy_mp != NULL); 2565 freeb(policy_mp); 2566 /* 2567 * As of now assume that nothing else accompanies 2568 * IPSEC_POLICY_SET. 2569 */ 2570 mp->b_cont = NULL; 2571 } 2572 return (error); 2573 } 2574 2575 /* ARGSUSED */ 2576 static void 2577 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2578 void *dummy_arg) 2579 { 2580 conn_t *connp = NULL; 2581 t_scalar_t prim; 2582 2583 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2584 2585 if (CONN_Q(q)) 2586 connp = Q_TO_CONN(q); 2587 ASSERT(connp != NULL); 2588 2589 prim = ((union T_primitives *)mp->b_rptr)->type; 2590 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2591 2592 if (IPCL_IS_TCP(connp)) { 2593 /* Pass sticky_ipp for scope_id and pktinfo */ 2594 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2595 } else { 2596 /* For UDP and ICMP */ 2597 mp = ip_bind_v6(q, mp, connp, NULL); 2598 } 2599 if (mp != NULL) { 2600 if (IPCL_IS_TCP(connp)) { 2601 CONN_INC_REF(connp); 2602 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2603 connp, SQTAG_TCP_RPUTOTHER); 2604 } else if (IPCL_IS_UDP(connp)) { 2605 udp_resume_bind(connp, mp); 2606 } else { 2607 qreply(q, mp); 2608 CONN_OPER_PENDING_DONE(connp); 2609 } 2610 } 2611 } 2612 2613 /* 2614 * Verify that both the source and destination addresses 2615 * are valid. If verify_dst, then destination address must also be reachable, 2616 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2617 * It takes ip6_pkt_t * as one of the arguments to determine correct 2618 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2619 * destination address. Note that parameter ipp is only useful for TCP connect 2620 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2621 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2622 * 2623 */ 2624 static int 2625 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2626 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2627 boolean_t ire_requested, boolean_t ipsec_policy_set, 2628 boolean_t fanout_insert, boolean_t verify_dst) 2629 { 2630 ire_t *src_ire; 2631 ire_t *dst_ire; 2632 int error = 0; 2633 int protocol; 2634 mblk_t *policy_mp; 2635 ire_t *sire = NULL; 2636 ire_t *md_dst_ire = NULL; 2637 ill_t *md_ill = NULL; 2638 ill_t *dst_ill = NULL; 2639 ipif_t *src_ipif = NULL; 2640 zoneid_t zoneid; 2641 boolean_t ill_held = B_FALSE; 2642 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2643 2644 src_ire = dst_ire = NULL; 2645 /* 2646 * NOTE: The protocol is beyond the wptr because that's how 2647 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2648 */ 2649 protocol = *mp->b_wptr & 0xFF; 2650 2651 /* 2652 * If we never got a disconnect before, clear it now. 2653 */ 2654 connp->conn_fully_bound = B_FALSE; 2655 2656 if (ipsec_policy_set) { 2657 policy_mp = mp->b_cont; 2658 } 2659 2660 zoneid = connp->conn_zoneid; 2661 2662 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2663 ipif_t *ipif; 2664 2665 /* 2666 * Use an "emulated" IRE_BROADCAST to tell the transport it 2667 * is a multicast. 2668 * Pass other information that matches 2669 * the ipif (e.g. the source address). 2670 * 2671 * conn_multicast_ill is only used for IPv6 packets 2672 */ 2673 mutex_enter(&connp->conn_lock); 2674 if (connp->conn_multicast_ill != NULL) { 2675 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2676 zoneid, 0, &ipif); 2677 } else { 2678 /* Look for default like ip_wput_v6 */ 2679 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2680 } 2681 mutex_exit(&connp->conn_lock); 2682 if (ipif == NULL || !ire_requested || 2683 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2684 if (ipif != NULL) 2685 ipif_refrele(ipif); 2686 if (ip_debug > 2) { 2687 /* ip1dbg */ 2688 pr_addr_dbg("ip_bind_connected_v6: bad " 2689 "connected multicast %s\n", AF_INET6, 2690 v6dst); 2691 } 2692 error = ENETUNREACH; 2693 goto bad_addr; 2694 } 2695 if (ipif != NULL) 2696 ipif_refrele(ipif); 2697 } else { 2698 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2699 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2700 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2701 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2702 ipst); 2703 /* 2704 * We also prevent ire's with src address INADDR_ANY to 2705 * be used, which are created temporarily for 2706 * sending out packets from endpoints that have 2707 * conn_unspec_src set. 2708 */ 2709 if (dst_ire == NULL || 2710 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2711 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2712 /* 2713 * When verifying destination reachability, we always 2714 * complain. 2715 * 2716 * When not verifying destination reachability but we 2717 * found an IRE, i.e. the destination is reachable, 2718 * then the other tests still apply and we complain. 2719 */ 2720 if (verify_dst || (dst_ire != NULL)) { 2721 if (ip_debug > 2) { 2722 /* ip1dbg */ 2723 pr_addr_dbg("ip_bind_connected_v6: bad" 2724 " connected dst %s\n", AF_INET6, 2725 v6dst); 2726 } 2727 if (dst_ire == NULL || 2728 !(dst_ire->ire_type & IRE_HOST)) { 2729 error = ENETUNREACH; 2730 } else { 2731 error = EHOSTUNREACH; 2732 } 2733 goto bad_addr; 2734 } 2735 } 2736 } 2737 2738 /* 2739 * We now know that routing will allow us to reach the destination. 2740 * Check whether Trusted Solaris policy allows communication with this 2741 * host, and pretend that the destination is unreachable if not. 2742 * 2743 * This is never a problem for TCP, since that transport is known to 2744 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2745 * handling. If the remote is unreachable, it will be detected at that 2746 * point, so there's no reason to check it here. 2747 * 2748 * Note that for sendto (and other datagram-oriented friends), this 2749 * check is done as part of the data path label computation instead. 2750 * The check here is just to make non-TCP connect() report the right 2751 * error. 2752 */ 2753 if (dst_ire != NULL && is_system_labeled() && 2754 !IPCL_IS_TCP(connp) && 2755 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2756 connp->conn_mac_exempt, ipst) != 0) { 2757 error = EHOSTUNREACH; 2758 if (ip_debug > 2) { 2759 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2760 AF_INET6, v6dst); 2761 } 2762 goto bad_addr; 2763 } 2764 2765 /* 2766 * If the app does a connect(), it means that it will most likely 2767 * send more than 1 packet to the destination. It makes sense 2768 * to clear the temporary flag. 2769 */ 2770 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2771 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2772 irb_t *irb = dst_ire->ire_bucket; 2773 2774 rw_enter(&irb->irb_lock, RW_WRITER); 2775 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2776 irb->irb_tmp_ire_cnt--; 2777 rw_exit(&irb->irb_lock); 2778 } 2779 2780 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2781 2782 /* 2783 * See if we should notify ULP about MDT; we do this whether or not 2784 * ire_requested is TRUE, in order to handle active connects; MDT 2785 * eligibility tests for passive connects are handled separately 2786 * through tcp_adapt_ire(). We do this before the source address 2787 * selection, because dst_ire may change after a call to 2788 * ipif_select_source_v6(). This is a best-effort check, as the 2789 * packet for this connection may not actually go through 2790 * dst_ire->ire_stq, and the exact IRE can only be known after 2791 * calling ip_newroute_v6(). This is why we further check on the 2792 * IRE during Multidata packet transmission in tcp_multisend(). 2793 */ 2794 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2795 dst_ire != NULL && 2796 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2797 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2798 ILL_MDT_CAPABLE(md_ill)) { 2799 md_dst_ire = dst_ire; 2800 IRE_REFHOLD(md_dst_ire); 2801 } 2802 2803 if (dst_ire != NULL && 2804 dst_ire->ire_type == IRE_LOCAL && 2805 dst_ire->ire_zoneid != zoneid && 2806 dst_ire->ire_zoneid != ALL_ZONES) { 2807 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2808 zoneid, 0, NULL, 2809 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2810 MATCH_IRE_RJ_BHOLE, ipst); 2811 if (src_ire == NULL) { 2812 error = EHOSTUNREACH; 2813 goto bad_addr; 2814 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2815 if (!(src_ire->ire_type & IRE_HOST)) 2816 error = ENETUNREACH; 2817 else 2818 error = EHOSTUNREACH; 2819 goto bad_addr; 2820 } 2821 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2822 src_ipif = src_ire->ire_ipif; 2823 ipif_refhold(src_ipif); 2824 *v6src = src_ipif->ipif_v6lcl_addr; 2825 } 2826 ire_refrele(src_ire); 2827 src_ire = NULL; 2828 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2829 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2830 *v6src = sire->ire_src_addr_v6; 2831 ire_refrele(dst_ire); 2832 dst_ire = sire; 2833 sire = NULL; 2834 } else if (dst_ire->ire_type == IRE_CACHE && 2835 (dst_ire->ire_flags & RTF_SETSRC)) { 2836 ASSERT(dst_ire->ire_zoneid == zoneid || 2837 dst_ire->ire_zoneid == ALL_ZONES); 2838 *v6src = dst_ire->ire_src_addr_v6; 2839 } else { 2840 /* 2841 * Pick a source address so that a proper inbound load 2842 * spreading would happen. Use dst_ill specified by the 2843 * app. when socket option or scopeid is set. 2844 */ 2845 int err; 2846 2847 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2848 uint_t if_index; 2849 2850 /* 2851 * Scope id or IPV6_PKTINFO 2852 */ 2853 2854 if_index = ipp->ipp_ifindex; 2855 dst_ill = ill_lookup_on_ifindex( 2856 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2857 ipst); 2858 if (dst_ill == NULL) { 2859 ip1dbg(("ip_bind_connected_v6:" 2860 " bad ifindex %d\n", if_index)); 2861 error = EADDRNOTAVAIL; 2862 goto bad_addr; 2863 } 2864 ill_held = B_TRUE; 2865 } else if (connp->conn_outgoing_ill != NULL) { 2866 /* 2867 * For IPV6_BOUND_IF socket option, 2868 * conn_outgoing_ill should be set 2869 * already in TCP or UDP/ICMP. 2870 */ 2871 dst_ill = conn_get_held_ill(connp, 2872 &connp->conn_outgoing_ill, &err); 2873 if (err == ILL_LOOKUP_FAILED) { 2874 ip1dbg(("ip_bind_connected_v6:" 2875 "no ill for bound_if\n")); 2876 error = EADDRNOTAVAIL; 2877 goto bad_addr; 2878 } 2879 ill_held = B_TRUE; 2880 } else if (dst_ire->ire_stq != NULL) { 2881 /* No need to hold ill here */ 2882 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2883 } else { 2884 /* No need to hold ill here */ 2885 dst_ill = dst_ire->ire_ipif->ipif_ill; 2886 } 2887 if (!ip6_asp_can_lookup(ipst)) { 2888 *mp->b_wptr++ = (char)protocol; 2889 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2890 ip_bind_connected_resume_v6); 2891 error = EINPROGRESS; 2892 goto refrele_and_quit; 2893 } 2894 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2895 RESTRICT_TO_NONE, connp->conn_src_preferences, 2896 zoneid); 2897 ip6_asp_table_refrele(ipst); 2898 if (src_ipif == NULL) { 2899 pr_addr_dbg("ip_bind_connected_v6: " 2900 "no usable source address for " 2901 "connection to %s\n", AF_INET6, v6dst); 2902 error = EADDRNOTAVAIL; 2903 goto bad_addr; 2904 } 2905 *v6src = src_ipif->ipif_v6lcl_addr; 2906 } 2907 } 2908 2909 /* 2910 * We do ire_route_lookup_v6() here (and not an interface lookup) 2911 * as we assert that v6src should only come from an 2912 * UP interface for hard binding. 2913 */ 2914 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2915 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2916 2917 /* src_ire must be a local|loopback */ 2918 if (!IRE_IS_LOCAL(src_ire)) { 2919 if (ip_debug > 2) { 2920 /* ip1dbg */ 2921 pr_addr_dbg("ip_bind_connected_v6: bad " 2922 "connected src %s\n", AF_INET6, v6src); 2923 } 2924 error = EADDRNOTAVAIL; 2925 goto bad_addr; 2926 } 2927 2928 /* 2929 * If the source address is a loopback address, the 2930 * destination had best be local or multicast. 2931 * The transports that can't handle multicast will reject 2932 * those addresses. 2933 */ 2934 if (src_ire->ire_type == IRE_LOOPBACK && 2935 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2936 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2937 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2938 error = -1; 2939 goto bad_addr; 2940 } 2941 /* 2942 * Allow setting new policies. For example, disconnects come 2943 * down as ipa_t bind. As we would have set conn_policy_cached 2944 * to B_TRUE before, we should set it to B_FALSE, so that policy 2945 * can change after the disconnect. 2946 */ 2947 connp->conn_policy_cached = B_FALSE; 2948 2949 /* 2950 * The addresses have been verified. Initialize the conn 2951 * before calling the policy as they expect the conns 2952 * initialized. 2953 */ 2954 connp->conn_srcv6 = *v6src; 2955 connp->conn_remv6 = *v6dst; 2956 connp->conn_lport = lport; 2957 connp->conn_fport = fport; 2958 2959 ASSERT(!(ipsec_policy_set && ire_requested)); 2960 if (ire_requested) { 2961 iulp_t *ulp_info = NULL; 2962 2963 /* 2964 * Note that sire will not be NULL if this is an off-link 2965 * connection and there is not cache for that dest yet. 2966 * 2967 * XXX Because of an existing bug, if there are multiple 2968 * default routes, the IRE returned now may not be the actual 2969 * default route used (default routes are chosen in a 2970 * round robin fashion). So if the metrics for different 2971 * default routes are different, we may return the wrong 2972 * metrics. This will not be a problem if the existing 2973 * bug is fixed. 2974 */ 2975 if (sire != NULL) 2976 ulp_info = &(sire->ire_uinfo); 2977 2978 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info, 2979 ipst)) { 2980 error = -1; 2981 goto bad_addr; 2982 } 2983 } else if (ipsec_policy_set) { 2984 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2985 error = -1; 2986 goto bad_addr; 2987 } 2988 } 2989 2990 /* 2991 * Cache IPsec policy in this conn. If we have per-socket policy, 2992 * we'll cache that. If we don't, we'll inherit global policy. 2993 * 2994 * We can't insert until the conn reflects the policy. Note that 2995 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2996 * connections where we don't have a policy. This is to prevent 2997 * global policy lookups in the inbound path. 2998 * 2999 * If we insert before we set conn_policy_cached, 3000 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 3001 * because global policy cound be non-empty. We normally call 3002 * ipsec_check_policy() for conn_policy_cached connections only if 3003 * conn_in_enforce_policy is set. But in this case, 3004 * conn_policy_cached can get set anytime since we made the 3005 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 3006 * is called, which will make the above assumption false. Thus, we 3007 * need to insert after we set conn_policy_cached. 3008 */ 3009 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 3010 goto bad_addr; 3011 3012 /* If not fanout_insert this was just an address verification */ 3013 if (fanout_insert) { 3014 /* 3015 * The addresses have been verified. Time to insert in 3016 * the correct fanout list. 3017 * We need to make sure that the conn_recv is set to a non-null 3018 * value before we insert the conn_t into the classifier table. 3019 * This is to avoid a race with an incoming packet which does 3020 * an ipcl_classify(). 3021 */ 3022 if (protocol == IPPROTO_TCP) 3023 connp->conn_recv = tcp_input; 3024 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 3025 connp->conn_ports, 3026 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 3027 } 3028 if (error == 0) { 3029 connp->conn_fully_bound = B_TRUE; 3030 /* 3031 * Our initial checks for MDT have passed; the IRE is not 3032 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 3033 * be supporting MDT. Pass the IRE, IPC and ILL into 3034 * ip_mdinfo_return(), which performs further checks 3035 * against them and upon success, returns the MDT info 3036 * mblk which we will attach to the bind acknowledgment. 3037 */ 3038 if (md_dst_ire != NULL) { 3039 mblk_t *mdinfo_mp; 3040 3041 ASSERT(md_ill != NULL); 3042 ASSERT(md_ill->ill_mdt_capab != NULL); 3043 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 3044 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 3045 linkb(mp, mdinfo_mp); 3046 } 3047 } 3048 bad_addr: 3049 if (ipsec_policy_set) { 3050 ASSERT(policy_mp != NULL); 3051 freeb(policy_mp); 3052 /* 3053 * As of now assume that nothing else accompanies 3054 * IPSEC_POLICY_SET. 3055 */ 3056 mp->b_cont = NULL; 3057 } 3058 refrele_and_quit: 3059 if (src_ire != NULL) 3060 IRE_REFRELE(src_ire); 3061 if (dst_ire != NULL) 3062 IRE_REFRELE(dst_ire); 3063 if (sire != NULL) 3064 IRE_REFRELE(sire); 3065 if (src_ipif != NULL) 3066 ipif_refrele(src_ipif); 3067 if (md_dst_ire != NULL) 3068 IRE_REFRELE(md_dst_ire); 3069 if (ill_held && dst_ill != NULL) 3070 ill_refrele(dst_ill); 3071 return (error); 3072 } 3073 3074 /* 3075 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3076 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3077 */ 3078 /* ARGSUSED4 */ 3079 static boolean_t 3080 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3081 iulp_t *ulp_info, ip_stack_t *ipst) 3082 { 3083 mblk_t *mp1; 3084 ire_t *ret_ire; 3085 3086 mp1 = mp->b_cont; 3087 ASSERT(mp1 != NULL); 3088 3089 if (ire != NULL) { 3090 /* 3091 * mp1 initialized above to IRE_DB_REQ_TYPE 3092 * appended mblk. Its <upper protocol>'s 3093 * job to make sure there is room. 3094 */ 3095 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3096 return (B_FALSE); 3097 3098 mp1->b_datap->db_type = IRE_DB_TYPE; 3099 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3100 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3101 ret_ire = (ire_t *)mp1->b_rptr; 3102 if (IN6_IS_ADDR_MULTICAST(dst) || 3103 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3104 ret_ire->ire_type = IRE_BROADCAST; 3105 ret_ire->ire_addr_v6 = *dst; 3106 } 3107 if (ulp_info != NULL) { 3108 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3109 sizeof (iulp_t)); 3110 } 3111 ret_ire->ire_mp = mp1; 3112 } else { 3113 /* 3114 * No IRE was found. Remove IRE mblk. 3115 */ 3116 mp->b_cont = mp1->b_cont; 3117 freeb(mp1); 3118 } 3119 return (B_TRUE); 3120 } 3121 3122 /* 3123 * Add an ip6i_t header to the front of the mblk. 3124 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3125 * Returns NULL if allocation fails (and frees original message). 3126 * Used in outgoing path when going through ip_newroute_*v6(). 3127 * Used in incoming path to pass ifindex to transports. 3128 */ 3129 mblk_t * 3130 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3131 { 3132 mblk_t *mp1; 3133 ip6i_t *ip6i; 3134 ip6_t *ip6h; 3135 3136 ip6h = (ip6_t *)mp->b_rptr; 3137 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3138 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3139 mp->b_datap->db_ref > 1) { 3140 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3141 if (mp1 == NULL) { 3142 freemsg(mp); 3143 return (NULL); 3144 } 3145 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3146 mp1->b_cont = mp; 3147 mp = mp1; 3148 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3149 } 3150 mp->b_rptr = (uchar_t *)ip6i; 3151 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3152 ip6i->ip6i_nxt = IPPROTO_RAW; 3153 if (ill != NULL) { 3154 ip6i->ip6i_flags = IP6I_IFINDEX; 3155 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3156 } else { 3157 ip6i->ip6i_flags = 0; 3158 } 3159 ip6i->ip6i_nexthop = *dst; 3160 return (mp); 3161 } 3162 3163 /* 3164 * Handle protocols with which IP is less intimate. There 3165 * can be more than one stream bound to a particular 3166 * protocol. When this is the case, normally each one gets a copy 3167 * of any incoming packets. 3168 * However, if the packet was tunneled and not multicast we only send to it 3169 * the first match. 3170 * 3171 * Zones notes: 3172 * Packets will be distributed to streams in all zones. This is really only 3173 * useful for ICMPv6 as only applications in the global zone can create raw 3174 * sockets for other protocols. 3175 */ 3176 static void 3177 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3178 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3179 boolean_t mctl_present, zoneid_t zoneid) 3180 { 3181 queue_t *rq; 3182 mblk_t *mp1, *first_mp1; 3183 in6_addr_t dst = ip6h->ip6_dst; 3184 in6_addr_t src = ip6h->ip6_src; 3185 boolean_t one_only; 3186 mblk_t *first_mp = mp; 3187 boolean_t secure, shared_addr; 3188 conn_t *connp, *first_connp, *next_connp; 3189 connf_t *connfp; 3190 ip_stack_t *ipst = inill->ill_ipst; 3191 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3192 3193 if (mctl_present) { 3194 mp = first_mp->b_cont; 3195 secure = ipsec_in_is_secure(first_mp); 3196 ASSERT(mp != NULL); 3197 } else { 3198 secure = B_FALSE; 3199 } 3200 3201 /* 3202 * If the packet was tunneled and not multicast we only send to it 3203 * the first match. 3204 */ 3205 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3206 !IN6_IS_ADDR_MULTICAST(&dst)); 3207 3208 shared_addr = (zoneid == ALL_ZONES); 3209 if (shared_addr) { 3210 /* 3211 * We don't allow multilevel ports for raw IP, so no need to 3212 * check for that here. 3213 */ 3214 zoneid = tsol_packet_to_zoneid(mp); 3215 } 3216 3217 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3218 mutex_enter(&connfp->connf_lock); 3219 connp = connfp->connf_head; 3220 for (connp = connfp->connf_head; connp != NULL; 3221 connp = connp->conn_next) { 3222 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3223 zoneid) && 3224 (!is_system_labeled() || 3225 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3226 connp))) 3227 break; 3228 } 3229 3230 if (connp == NULL || connp->conn_upq == NULL) { 3231 /* 3232 * No one bound to this port. Is 3233 * there a client that wants all 3234 * unclaimed datagrams? 3235 */ 3236 mutex_exit(&connfp->connf_lock); 3237 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3238 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3239 nexthdr_offset, mctl_present, zoneid, ipst)) { 3240 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3241 } 3242 3243 return; 3244 } 3245 3246 CONN_INC_REF(connp); 3247 first_connp = connp; 3248 3249 /* 3250 * XXX: Fix the multiple protocol listeners case. We should not 3251 * be walking the conn->next list here. 3252 */ 3253 if (one_only) { 3254 /* 3255 * Only send message to one tunnel driver by immediately 3256 * terminating the loop. 3257 */ 3258 connp = NULL; 3259 } else { 3260 connp = connp->conn_next; 3261 3262 } 3263 for (;;) { 3264 while (connp != NULL) { 3265 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3266 flags, zoneid) && 3267 (!is_system_labeled() || 3268 tsol_receive_local(mp, &dst, IPV6_VERSION, 3269 shared_addr, connp))) 3270 break; 3271 connp = connp->conn_next; 3272 } 3273 3274 /* 3275 * Just copy the data part alone. The mctl part is 3276 * needed just for verifying policy and it is never 3277 * sent up. 3278 */ 3279 if (connp == NULL || connp->conn_upq == NULL || 3280 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3281 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3282 /* 3283 * No more intested clients or memory 3284 * allocation failed 3285 */ 3286 connp = first_connp; 3287 break; 3288 } 3289 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3290 CONN_INC_REF(connp); 3291 mutex_exit(&connfp->connf_lock); 3292 rq = connp->conn_rq; 3293 /* 3294 * For link-local always add ifindex so that transport can set 3295 * sin6_scope_id. Avoid it for ICMP error fanout. 3296 */ 3297 if ((connp->conn_ip_recvpktinfo || 3298 IN6_IS_ADDR_LINKLOCAL(&src)) && 3299 (flags & IP_FF_IPINFO)) { 3300 /* Add header */ 3301 mp1 = ip_add_info_v6(mp1, inill, &dst); 3302 } 3303 if (mp1 == NULL) { 3304 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3305 } else if (!canputnext(rq)) { 3306 if (flags & IP_FF_RAWIP) { 3307 BUMP_MIB(ill->ill_ip_mib, 3308 rawipIfStatsInOverflows); 3309 } else { 3310 BUMP_MIB(ill->ill_icmp6_mib, 3311 ipv6IfIcmpInOverflows); 3312 } 3313 3314 freemsg(mp1); 3315 } else { 3316 /* 3317 * Don't enforce here if we're a tunnel - let "tun" do 3318 * it instead. 3319 */ 3320 if (!IPCL_IS_IPTUN(connp) && 3321 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3322 secure)) { 3323 first_mp1 = ipsec_check_inbound_policy 3324 (first_mp1, connp, NULL, ip6h, 3325 mctl_present); 3326 } 3327 if (first_mp1 != NULL) { 3328 if (mctl_present) 3329 freeb(first_mp1); 3330 BUMP_MIB(ill->ill_ip_mib, 3331 ipIfStatsHCInDelivers); 3332 putnext(rq, mp1); 3333 } 3334 } 3335 mutex_enter(&connfp->connf_lock); 3336 /* Follow the next pointer before releasing the conn. */ 3337 next_connp = connp->conn_next; 3338 CONN_DEC_REF(connp); 3339 connp = next_connp; 3340 } 3341 3342 /* Last one. Send it upstream. */ 3343 mutex_exit(&connfp->connf_lock); 3344 3345 /* Initiate IPPF processing */ 3346 if (IP6_IN_IPP(flags, ipst)) { 3347 uint_t ifindex; 3348 3349 mutex_enter(&ill->ill_lock); 3350 ifindex = ill->ill_phyint->phyint_ifindex; 3351 mutex_exit(&ill->ill_lock); 3352 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3353 if (mp == NULL) { 3354 CONN_DEC_REF(connp); 3355 if (mctl_present) 3356 freeb(first_mp); 3357 return; 3358 } 3359 } 3360 3361 /* 3362 * For link-local always add ifindex so that transport can set 3363 * sin6_scope_id. Avoid it for ICMP error fanout. 3364 */ 3365 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3366 (flags & IP_FF_IPINFO)) { 3367 /* Add header */ 3368 mp = ip_add_info_v6(mp, inill, &dst); 3369 if (mp == NULL) { 3370 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3371 CONN_DEC_REF(connp); 3372 if (mctl_present) 3373 freeb(first_mp); 3374 return; 3375 } else if (mctl_present) { 3376 first_mp->b_cont = mp; 3377 } else { 3378 first_mp = mp; 3379 } 3380 } 3381 3382 rq = connp->conn_rq; 3383 if (!canputnext(rq)) { 3384 if (flags & IP_FF_RAWIP) { 3385 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3386 } else { 3387 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3388 } 3389 3390 freemsg(first_mp); 3391 } else { 3392 if (IPCL_IS_IPTUN(connp)) { 3393 /* 3394 * Tunneled packet. We enforce policy in the tunnel 3395 * module itself. 3396 * 3397 * Send the WHOLE packet up (incl. IPSEC_IN) without 3398 * a policy check. 3399 */ 3400 putnext(rq, first_mp); 3401 CONN_DEC_REF(connp); 3402 return; 3403 } 3404 /* 3405 * Don't enforce here if we're a tunnel - let "tun" do 3406 * it instead. 3407 */ 3408 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3409 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3410 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3411 NULL, ip6h, mctl_present); 3412 if (first_mp == NULL) { 3413 CONN_DEC_REF(connp); 3414 return; 3415 } 3416 } 3417 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3418 putnext(rq, mp); 3419 if (mctl_present) 3420 freeb(first_mp); 3421 } 3422 CONN_DEC_REF(connp); 3423 } 3424 3425 /* 3426 * Send an ICMP error after patching up the packet appropriately. Returns 3427 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3428 */ 3429 int 3430 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3431 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3432 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3433 { 3434 ip6_t *ip6h; 3435 mblk_t *first_mp; 3436 boolean_t secure; 3437 unsigned char db_type; 3438 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3439 3440 first_mp = mp; 3441 if (mctl_present) { 3442 mp = mp->b_cont; 3443 secure = ipsec_in_is_secure(first_mp); 3444 ASSERT(mp != NULL); 3445 } else { 3446 /* 3447 * If this is an ICMP error being reported - which goes 3448 * up as M_CTLs, we need to convert them to M_DATA till 3449 * we finish checking with global policy because 3450 * ipsec_check_global_policy() assumes M_DATA as clear 3451 * and M_CTL as secure. 3452 */ 3453 db_type = mp->b_datap->db_type; 3454 mp->b_datap->db_type = M_DATA; 3455 secure = B_FALSE; 3456 } 3457 /* 3458 * We are generating an icmp error for some inbound packet. 3459 * Called from all ip_fanout_(udp, tcp, proto) functions. 3460 * Before we generate an error, check with global policy 3461 * to see whether this is allowed to enter the system. As 3462 * there is no "conn", we are checking with global policy. 3463 */ 3464 ip6h = (ip6_t *)mp->b_rptr; 3465 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3466 first_mp = ipsec_check_global_policy(first_mp, NULL, 3467 NULL, ip6h, mctl_present, ipst->ips_netstack); 3468 if (first_mp == NULL) 3469 return (0); 3470 } 3471 3472 if (!mctl_present) 3473 mp->b_datap->db_type = db_type; 3474 3475 if (flags & IP_FF_SEND_ICMP) { 3476 if (flags & IP_FF_HDR_COMPLETE) { 3477 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3478 freemsg(first_mp); 3479 return (1); 3480 } 3481 } 3482 switch (icmp_type) { 3483 case ICMP6_DST_UNREACH: 3484 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3485 B_FALSE, B_FALSE, zoneid, ipst); 3486 break; 3487 case ICMP6_PARAM_PROB: 3488 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3489 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3490 break; 3491 default: 3492 #ifdef DEBUG 3493 panic("ip_fanout_send_icmp_v6: wrong type"); 3494 /*NOTREACHED*/ 3495 #else 3496 freemsg(first_mp); 3497 break; 3498 #endif 3499 } 3500 } else { 3501 freemsg(first_mp); 3502 return (0); 3503 } 3504 3505 return (1); 3506 } 3507 3508 3509 /* 3510 * Fanout for TCP packets 3511 * The caller puts <fport, lport> in the ports parameter. 3512 */ 3513 static void 3514 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3515 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3516 { 3517 mblk_t *first_mp; 3518 boolean_t secure; 3519 conn_t *connp; 3520 tcph_t *tcph; 3521 boolean_t syn_present = B_FALSE; 3522 ip_stack_t *ipst = inill->ill_ipst; 3523 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3524 3525 first_mp = mp; 3526 if (mctl_present) { 3527 mp = first_mp->b_cont; 3528 secure = ipsec_in_is_secure(first_mp); 3529 ASSERT(mp != NULL); 3530 } else { 3531 secure = B_FALSE; 3532 } 3533 3534 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3535 3536 if (connp == NULL || 3537 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3538 /* 3539 * No hard-bound match. Send Reset. 3540 */ 3541 dblk_t *dp = mp->b_datap; 3542 uint32_t ill_index; 3543 3544 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3545 3546 /* Initiate IPPf processing, if needed. */ 3547 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3548 (flags & IP6_NO_IPPOLICY)) { 3549 ill_index = ill->ill_phyint->phyint_ifindex; 3550 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3551 if (first_mp == NULL) { 3552 if (connp != NULL) 3553 CONN_DEC_REF(connp); 3554 return; 3555 } 3556 } 3557 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3558 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3559 ipst->ips_netstack->netstack_tcp, connp); 3560 if (connp != NULL) 3561 CONN_DEC_REF(connp); 3562 return; 3563 } 3564 3565 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3566 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3567 if (connp->conn_flags & IPCL_TCP) { 3568 squeue_t *sqp; 3569 3570 /* 3571 * For fused tcp loopback, assign the eager's 3572 * squeue to be that of the active connect's. 3573 */ 3574 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3575 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3576 !secure && 3577 !IP6_IN_IPP(flags, ipst)) { 3578 ASSERT(Q_TO_CONN(q) != NULL); 3579 sqp = Q_TO_CONN(q)->conn_sqp; 3580 } else { 3581 sqp = IP_SQUEUE_GET(lbolt); 3582 } 3583 3584 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3585 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3586 3587 /* 3588 * db_cksumstuff is unused in the incoming 3589 * path; Thus store the ifindex here. It will 3590 * be cleared in tcp_conn_create_v6(). 3591 */ 3592 DB_CKSUMSTUFF(mp) = 3593 (intptr_t)ill->ill_phyint->phyint_ifindex; 3594 syn_present = B_TRUE; 3595 } 3596 } 3597 3598 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3599 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3600 if ((flags & TH_RST) || (flags & TH_URG)) { 3601 CONN_DEC_REF(connp); 3602 freemsg(first_mp); 3603 return; 3604 } 3605 if (flags & TH_ACK) { 3606 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3607 ipst->ips_netstack->netstack_tcp, connp); 3608 CONN_DEC_REF(connp); 3609 return; 3610 } 3611 3612 CONN_DEC_REF(connp); 3613 freemsg(first_mp); 3614 return; 3615 } 3616 3617 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3618 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3619 NULL, ip6h, mctl_present); 3620 if (first_mp == NULL) { 3621 CONN_DEC_REF(connp); 3622 return; 3623 } 3624 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3625 ASSERT(syn_present); 3626 if (mctl_present) { 3627 ASSERT(first_mp != mp); 3628 first_mp->b_datap->db_struioflag |= 3629 STRUIO_POLICY; 3630 } else { 3631 ASSERT(first_mp == mp); 3632 mp->b_datap->db_struioflag &= 3633 ~STRUIO_EAGER; 3634 mp->b_datap->db_struioflag |= 3635 STRUIO_POLICY; 3636 } 3637 } else { 3638 /* 3639 * Discard first_mp early since we're dealing with a 3640 * fully-connected conn_t and tcp doesn't do policy in 3641 * this case. Also, if someone is bound to IPPROTO_TCP 3642 * over raw IP, they don't expect to see a M_CTL. 3643 */ 3644 if (mctl_present) { 3645 freeb(first_mp); 3646 mctl_present = B_FALSE; 3647 } 3648 first_mp = mp; 3649 } 3650 } 3651 3652 /* Initiate IPPF processing */ 3653 if (IP6_IN_IPP(flags, ipst)) { 3654 uint_t ifindex; 3655 3656 mutex_enter(&ill->ill_lock); 3657 ifindex = ill->ill_phyint->phyint_ifindex; 3658 mutex_exit(&ill->ill_lock); 3659 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3660 if (mp == NULL) { 3661 CONN_DEC_REF(connp); 3662 if (mctl_present) { 3663 freeb(first_mp); 3664 } 3665 return; 3666 } else if (mctl_present) { 3667 /* 3668 * ip_add_info_v6 might return a new mp. 3669 */ 3670 ASSERT(first_mp != mp); 3671 first_mp->b_cont = mp; 3672 } else { 3673 first_mp = mp; 3674 } 3675 } 3676 3677 /* 3678 * For link-local always add ifindex so that TCP can bind to that 3679 * interface. Avoid it for ICMP error fanout. 3680 */ 3681 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3682 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3683 (flags & IP_FF_IPINFO))) { 3684 /* Add header */ 3685 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3686 if (mp == NULL) { 3687 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3688 CONN_DEC_REF(connp); 3689 if (mctl_present) 3690 freeb(first_mp); 3691 return; 3692 } else if (mctl_present) { 3693 ASSERT(first_mp != mp); 3694 first_mp->b_cont = mp; 3695 } else { 3696 first_mp = mp; 3697 } 3698 } 3699 3700 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3701 if (IPCL_IS_TCP(connp)) { 3702 (*ip_input_proc)(connp->conn_sqp, first_mp, 3703 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3704 } else { 3705 putnext(connp->conn_rq, first_mp); 3706 CONN_DEC_REF(connp); 3707 } 3708 } 3709 3710 /* 3711 * Fanout for UDP packets. 3712 * The caller puts <fport, lport> in the ports parameter. 3713 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3714 * 3715 * If SO_REUSEADDR is set all multicast and broadcast packets 3716 * will be delivered to all streams bound to the same port. 3717 * 3718 * Zones notes: 3719 * Multicast packets will be distributed to streams in all zones. 3720 */ 3721 static void 3722 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3723 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3724 zoneid_t zoneid) 3725 { 3726 uint32_t dstport, srcport; 3727 in6_addr_t dst; 3728 mblk_t *first_mp; 3729 boolean_t secure; 3730 conn_t *connp; 3731 connf_t *connfp; 3732 conn_t *first_conn; 3733 conn_t *next_conn; 3734 mblk_t *mp1, *first_mp1; 3735 in6_addr_t src; 3736 boolean_t shared_addr; 3737 ip_stack_t *ipst = inill->ill_ipst; 3738 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3739 3740 first_mp = mp; 3741 if (mctl_present) { 3742 mp = first_mp->b_cont; 3743 secure = ipsec_in_is_secure(first_mp); 3744 ASSERT(mp != NULL); 3745 } else { 3746 secure = B_FALSE; 3747 } 3748 3749 /* Extract ports in net byte order */ 3750 dstport = htons(ntohl(ports) & 0xFFFF); 3751 srcport = htons(ntohl(ports) >> 16); 3752 dst = ip6h->ip6_dst; 3753 src = ip6h->ip6_src; 3754 3755 shared_addr = (zoneid == ALL_ZONES); 3756 if (shared_addr) { 3757 /* 3758 * No need to handle exclusive-stack zones since ALL_ZONES 3759 * only applies to the shared stack. 3760 */ 3761 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3762 /* 3763 * If no shared MLP is found, tsol_mlp_findzone returns 3764 * ALL_ZONES. In that case, we assume it's SLP, and 3765 * search for the zone based on the packet label. 3766 * That will also return ALL_ZONES on failure, but 3767 * we never allow conn_zoneid to be set to ALL_ZONES. 3768 */ 3769 if (zoneid == ALL_ZONES) 3770 zoneid = tsol_packet_to_zoneid(mp); 3771 } 3772 3773 /* Attempt to find a client stream based on destination port. */ 3774 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3775 mutex_enter(&connfp->connf_lock); 3776 connp = connfp->connf_head; 3777 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3778 /* 3779 * Not multicast. Send to the one (first) client we find. 3780 */ 3781 while (connp != NULL) { 3782 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3783 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3784 conn_wantpacket_v6(connp, ill, ip6h, 3785 flags, zoneid)) { 3786 break; 3787 } 3788 connp = connp->conn_next; 3789 } 3790 if (connp == NULL || connp->conn_upq == NULL) 3791 goto notfound; 3792 3793 if (is_system_labeled() && 3794 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3795 connp)) 3796 goto notfound; 3797 3798 /* Found a client */ 3799 CONN_INC_REF(connp); 3800 mutex_exit(&connfp->connf_lock); 3801 3802 if (CONN_UDP_FLOWCTLD(connp)) { 3803 freemsg(first_mp); 3804 CONN_DEC_REF(connp); 3805 return; 3806 } 3807 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3808 first_mp = ipsec_check_inbound_policy(first_mp, 3809 connp, NULL, ip6h, mctl_present); 3810 if (first_mp == NULL) { 3811 CONN_DEC_REF(connp); 3812 return; 3813 } 3814 } 3815 /* Initiate IPPF processing */ 3816 if (IP6_IN_IPP(flags, ipst)) { 3817 uint_t ifindex; 3818 3819 mutex_enter(&ill->ill_lock); 3820 ifindex = ill->ill_phyint->phyint_ifindex; 3821 mutex_exit(&ill->ill_lock); 3822 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3823 if (mp == NULL) { 3824 CONN_DEC_REF(connp); 3825 if (mctl_present) 3826 freeb(first_mp); 3827 return; 3828 } 3829 } 3830 /* 3831 * For link-local always add ifindex so that 3832 * transport can set sin6_scope_id. Avoid it for 3833 * ICMP error fanout. 3834 */ 3835 if ((connp->conn_ip_recvpktinfo || 3836 IN6_IS_ADDR_LINKLOCAL(&src)) && 3837 (flags & IP_FF_IPINFO)) { 3838 /* Add header */ 3839 mp = ip_add_info_v6(mp, inill, &dst); 3840 if (mp == NULL) { 3841 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3842 CONN_DEC_REF(connp); 3843 if (mctl_present) 3844 freeb(first_mp); 3845 return; 3846 } else if (mctl_present) { 3847 first_mp->b_cont = mp; 3848 } else { 3849 first_mp = mp; 3850 } 3851 } 3852 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3853 3854 /* Send it upstream */ 3855 CONN_UDP_RECV(connp, mp); 3856 3857 IP6_STAT(ipst, ip6_udp_fannorm); 3858 CONN_DEC_REF(connp); 3859 if (mctl_present) 3860 freeb(first_mp); 3861 return; 3862 } 3863 3864 while (connp != NULL) { 3865 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3866 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3867 (!is_system_labeled() || 3868 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3869 connp))) 3870 break; 3871 connp = connp->conn_next; 3872 } 3873 3874 if (connp == NULL || connp->conn_upq == NULL) 3875 goto notfound; 3876 3877 first_conn = connp; 3878 3879 CONN_INC_REF(connp); 3880 connp = connp->conn_next; 3881 for (;;) { 3882 while (connp != NULL) { 3883 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3884 src) && conn_wantpacket_v6(connp, ill, ip6h, 3885 flags, zoneid) && 3886 (!is_system_labeled() || 3887 tsol_receive_local(mp, &dst, IPV6_VERSION, 3888 shared_addr, connp))) 3889 break; 3890 connp = connp->conn_next; 3891 } 3892 /* 3893 * Just copy the data part alone. The mctl part is 3894 * needed just for verifying policy and it is never 3895 * sent up. 3896 */ 3897 if (connp == NULL || 3898 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3899 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3900 /* 3901 * No more interested clients or memory 3902 * allocation failed 3903 */ 3904 connp = first_conn; 3905 break; 3906 } 3907 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3908 CONN_INC_REF(connp); 3909 mutex_exit(&connfp->connf_lock); 3910 /* 3911 * For link-local always add ifindex so that transport 3912 * can set sin6_scope_id. Avoid it for ICMP error 3913 * fanout. 3914 */ 3915 if ((connp->conn_ip_recvpktinfo || 3916 IN6_IS_ADDR_LINKLOCAL(&src)) && 3917 (flags & IP_FF_IPINFO)) { 3918 /* Add header */ 3919 mp1 = ip_add_info_v6(mp1, inill, &dst); 3920 } 3921 /* mp1 could have changed */ 3922 if (mctl_present) 3923 first_mp1->b_cont = mp1; 3924 else 3925 first_mp1 = mp1; 3926 if (mp1 == NULL) { 3927 if (mctl_present) 3928 freeb(first_mp1); 3929 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3930 goto next_one; 3931 } 3932 if (CONN_UDP_FLOWCTLD(connp)) { 3933 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3934 freemsg(first_mp1); 3935 goto next_one; 3936 } 3937 3938 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3939 first_mp1 = ipsec_check_inbound_policy 3940 (first_mp1, connp, NULL, ip6h, 3941 mctl_present); 3942 } 3943 if (first_mp1 != NULL) { 3944 if (mctl_present) 3945 freeb(first_mp1); 3946 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3947 3948 /* Send it upstream */ 3949 CONN_UDP_RECV(connp, mp1); 3950 } 3951 next_one: 3952 mutex_enter(&connfp->connf_lock); 3953 /* Follow the next pointer before releasing the conn. */ 3954 next_conn = connp->conn_next; 3955 IP6_STAT(ipst, ip6_udp_fanmb); 3956 CONN_DEC_REF(connp); 3957 connp = next_conn; 3958 } 3959 3960 /* Last one. Send it upstream. */ 3961 mutex_exit(&connfp->connf_lock); 3962 3963 /* Initiate IPPF processing */ 3964 if (IP6_IN_IPP(flags, ipst)) { 3965 uint_t ifindex; 3966 3967 mutex_enter(&ill->ill_lock); 3968 ifindex = ill->ill_phyint->phyint_ifindex; 3969 mutex_exit(&ill->ill_lock); 3970 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3971 if (mp == NULL) { 3972 CONN_DEC_REF(connp); 3973 if (mctl_present) { 3974 freeb(first_mp); 3975 } 3976 return; 3977 } 3978 } 3979 3980 /* 3981 * For link-local always add ifindex so that transport can set 3982 * sin6_scope_id. Avoid it for ICMP error fanout. 3983 */ 3984 if ((connp->conn_ip_recvpktinfo || 3985 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3986 /* Add header */ 3987 mp = ip_add_info_v6(mp, inill, &dst); 3988 if (mp == NULL) { 3989 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3990 CONN_DEC_REF(connp); 3991 if (mctl_present) 3992 freeb(first_mp); 3993 return; 3994 } else if (mctl_present) { 3995 first_mp->b_cont = mp; 3996 } else { 3997 first_mp = mp; 3998 } 3999 } 4000 if (CONN_UDP_FLOWCTLD(connp)) { 4001 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 4002 freemsg(mp); 4003 } else { 4004 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 4005 first_mp = ipsec_check_inbound_policy(first_mp, 4006 connp, NULL, ip6h, mctl_present); 4007 if (first_mp == NULL) { 4008 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 4009 CONN_DEC_REF(connp); 4010 return; 4011 } 4012 } 4013 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 4014 4015 /* Send it upstream */ 4016 CONN_UDP_RECV(connp, mp); 4017 } 4018 IP6_STAT(ipst, ip6_udp_fanmb); 4019 CONN_DEC_REF(connp); 4020 if (mctl_present) 4021 freeb(first_mp); 4022 return; 4023 4024 notfound: 4025 mutex_exit(&connfp->connf_lock); 4026 /* 4027 * No one bound to this port. Is 4028 * there a client that wants all 4029 * unclaimed datagrams? 4030 */ 4031 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4032 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4033 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 4034 zoneid); 4035 } else { 4036 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4037 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4038 mctl_present, zoneid, ipst)) { 4039 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 4040 } 4041 } 4042 } 4043 4044 /* 4045 * int ip_find_hdr_v6() 4046 * 4047 * This routine is used by the upper layer protocols and the IP tunnel 4048 * module to: 4049 * - Set extension header pointers to appropriate locations 4050 * - Determine IPv6 header length and return it 4051 * - Return a pointer to the last nexthdr value 4052 * 4053 * The caller must initialize ipp_fields. 4054 * 4055 * NOTE: If multiple extension headers of the same type are present, 4056 * ip_find_hdr_v6() will set the respective extension header pointers 4057 * to the first one that it encounters in the IPv6 header. It also 4058 * skips fragment headers. This routine deals with malformed packets 4059 * of various sorts in which case the returned length is up to the 4060 * malformed part. 4061 */ 4062 int 4063 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4064 { 4065 uint_t length, ehdrlen; 4066 uint8_t nexthdr; 4067 uint8_t *whereptr, *endptr; 4068 ip6_dest_t *tmpdstopts; 4069 ip6_rthdr_t *tmprthdr; 4070 ip6_hbh_t *tmphopopts; 4071 ip6_frag_t *tmpfraghdr; 4072 4073 length = IPV6_HDR_LEN; 4074 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4075 endptr = mp->b_wptr; 4076 4077 nexthdr = ip6h->ip6_nxt; 4078 while (whereptr < endptr) { 4079 /* Is there enough left for len + nexthdr? */ 4080 if (whereptr + MIN_EHDR_LEN > endptr) 4081 goto done; 4082 4083 switch (nexthdr) { 4084 case IPPROTO_HOPOPTS: 4085 tmphopopts = (ip6_hbh_t *)whereptr; 4086 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4087 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4088 goto done; 4089 nexthdr = tmphopopts->ip6h_nxt; 4090 /* return only 1st hbh */ 4091 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4092 ipp->ipp_fields |= IPPF_HOPOPTS; 4093 ipp->ipp_hopopts = tmphopopts; 4094 ipp->ipp_hopoptslen = ehdrlen; 4095 } 4096 break; 4097 case IPPROTO_DSTOPTS: 4098 tmpdstopts = (ip6_dest_t *)whereptr; 4099 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4100 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4101 goto done; 4102 nexthdr = tmpdstopts->ip6d_nxt; 4103 /* 4104 * ipp_dstopts is set to the destination header after a 4105 * routing header. 4106 * Assume it is a post-rthdr destination header 4107 * and adjust when we find an rthdr. 4108 */ 4109 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4110 ipp->ipp_fields |= IPPF_DSTOPTS; 4111 ipp->ipp_dstopts = tmpdstopts; 4112 ipp->ipp_dstoptslen = ehdrlen; 4113 } 4114 break; 4115 case IPPROTO_ROUTING: 4116 tmprthdr = (ip6_rthdr_t *)whereptr; 4117 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4118 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4119 goto done; 4120 nexthdr = tmprthdr->ip6r_nxt; 4121 /* return only 1st rthdr */ 4122 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4123 ipp->ipp_fields |= IPPF_RTHDR; 4124 ipp->ipp_rthdr = tmprthdr; 4125 ipp->ipp_rthdrlen = ehdrlen; 4126 } 4127 /* 4128 * Make any destination header we've seen be a 4129 * pre-rthdr destination header. 4130 */ 4131 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4132 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4133 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4134 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4135 ipp->ipp_dstopts = NULL; 4136 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4137 ipp->ipp_dstoptslen = 0; 4138 } 4139 break; 4140 case IPPROTO_FRAGMENT: 4141 tmpfraghdr = (ip6_frag_t *)whereptr; 4142 ehdrlen = sizeof (ip6_frag_t); 4143 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4144 goto done; 4145 nexthdr = tmpfraghdr->ip6f_nxt; 4146 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4147 ipp->ipp_fields |= IPPF_FRAGHDR; 4148 ipp->ipp_fraghdr = tmpfraghdr; 4149 ipp->ipp_fraghdrlen = ehdrlen; 4150 } 4151 break; 4152 case IPPROTO_NONE: 4153 default: 4154 goto done; 4155 } 4156 length += ehdrlen; 4157 whereptr += ehdrlen; 4158 } 4159 done: 4160 if (nexthdrp != NULL) 4161 *nexthdrp = nexthdr; 4162 return (length); 4163 } 4164 4165 int 4166 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4167 { 4168 ire_t *ire; 4169 4170 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4171 ire = ire_lookup_local_v6(zoneid, ipst); 4172 if (ire == NULL) { 4173 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4174 return (1); 4175 } 4176 ip6h->ip6_src = ire->ire_addr_v6; 4177 ire_refrele(ire); 4178 } 4179 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4180 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4181 return (0); 4182 } 4183 4184 /* 4185 * Try to determine where and what are the IPv6 header length and 4186 * pointer to nexthdr value for the upper layer protocol (or an 4187 * unknown next hdr). 4188 * 4189 * Parameters returns a pointer to the nexthdr value; 4190 * Must handle malformed packets of various sorts. 4191 * Function returns failure for malformed cases. 4192 */ 4193 boolean_t 4194 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4195 uint8_t **nexthdrpp) 4196 { 4197 uint16_t length; 4198 uint_t ehdrlen; 4199 uint8_t *nexthdrp; 4200 uint8_t *whereptr; 4201 uint8_t *endptr; 4202 ip6_dest_t *desthdr; 4203 ip6_rthdr_t *rthdr; 4204 ip6_frag_t *fraghdr; 4205 4206 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4207 length = IPV6_HDR_LEN; 4208 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4209 endptr = mp->b_wptr; 4210 4211 nexthdrp = &ip6h->ip6_nxt; 4212 while (whereptr < endptr) { 4213 /* Is there enough left for len + nexthdr? */ 4214 if (whereptr + MIN_EHDR_LEN > endptr) 4215 break; 4216 4217 switch (*nexthdrp) { 4218 case IPPROTO_HOPOPTS: 4219 case IPPROTO_DSTOPTS: 4220 /* Assumes the headers are identical for hbh and dst */ 4221 desthdr = (ip6_dest_t *)whereptr; 4222 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4223 if ((uchar_t *)desthdr + ehdrlen > endptr) 4224 return (B_FALSE); 4225 nexthdrp = &desthdr->ip6d_nxt; 4226 break; 4227 case IPPROTO_ROUTING: 4228 rthdr = (ip6_rthdr_t *)whereptr; 4229 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4230 if ((uchar_t *)rthdr + ehdrlen > endptr) 4231 return (B_FALSE); 4232 nexthdrp = &rthdr->ip6r_nxt; 4233 break; 4234 case IPPROTO_FRAGMENT: 4235 fraghdr = (ip6_frag_t *)whereptr; 4236 ehdrlen = sizeof (ip6_frag_t); 4237 if ((uchar_t *)&fraghdr[1] > endptr) 4238 return (B_FALSE); 4239 nexthdrp = &fraghdr->ip6f_nxt; 4240 break; 4241 case IPPROTO_NONE: 4242 /* No next header means we're finished */ 4243 default: 4244 *hdr_length_ptr = length; 4245 *nexthdrpp = nexthdrp; 4246 return (B_TRUE); 4247 } 4248 length += ehdrlen; 4249 whereptr += ehdrlen; 4250 *hdr_length_ptr = length; 4251 *nexthdrpp = nexthdrp; 4252 } 4253 switch (*nexthdrp) { 4254 case IPPROTO_HOPOPTS: 4255 case IPPROTO_DSTOPTS: 4256 case IPPROTO_ROUTING: 4257 case IPPROTO_FRAGMENT: 4258 /* 4259 * If any know extension headers are still to be processed, 4260 * the packet's malformed (or at least all the IP header(s) are 4261 * not in the same mblk - and that should never happen. 4262 */ 4263 return (B_FALSE); 4264 4265 default: 4266 /* 4267 * If we get here, we know that all of the IP headers were in 4268 * the same mblk, even if the ULP header is in the next mblk. 4269 */ 4270 *hdr_length_ptr = length; 4271 *nexthdrpp = nexthdrp; 4272 return (B_TRUE); 4273 } 4274 } 4275 4276 /* 4277 * Return the length of the IPv6 related headers (including extension headers) 4278 * Returns a length even if the packet is malformed. 4279 */ 4280 int 4281 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4282 { 4283 uint16_t hdr_len; 4284 uint8_t *nexthdrp; 4285 4286 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4287 return (hdr_len); 4288 } 4289 4290 /* 4291 * Select an ill for the packet by considering load spreading across 4292 * a different ill in the group if dst_ill is part of some group. 4293 */ 4294 static ill_t * 4295 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4296 { 4297 ill_t *ill; 4298 4299 /* 4300 * We schedule irrespective of whether the source address is 4301 * INADDR_UNSPECIED or not. 4302 */ 4303 ill = illgrp_scheduler(dst_ill); 4304 if (ill == NULL) 4305 return (NULL); 4306 4307 /* 4308 * For groups with names ip_sioctl_groupname ensures that all 4309 * ills are of same type. For groups without names, ifgrp_insert 4310 * ensures this. 4311 */ 4312 ASSERT(dst_ill->ill_type == ill->ill_type); 4313 4314 return (ill); 4315 } 4316 4317 /* 4318 * IPv6 - 4319 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4320 * to send out a packet to a destination address for which we do not have 4321 * specific routing information. 4322 * 4323 * Handle non-multicast packets. If ill is non-NULL the match is done 4324 * for that ill. 4325 * 4326 * When a specific ill is specified (using IPV6_PKTINFO, 4327 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4328 * on routing entries (ftable and ctable) that have a matching 4329 * ire->ire_ipif->ipif_ill. Thus this can only be used 4330 * for destinations that are on-link for the specific ill 4331 * and that can appear on multiple links. Thus it is useful 4332 * for multicast destinations, link-local destinations, and 4333 * at some point perhaps for site-local destinations (if the 4334 * node sits at a site boundary). 4335 * We create the cache entries in the regular ctable since 4336 * it can not "confuse" things for other destinations. 4337 * table. 4338 * 4339 * When ill is part of a ill group, we subject the packets 4340 * to load spreading even if the ill is specified by the 4341 * means described above. We disable only for IPV6_BOUND_PIF 4342 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4343 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4344 * set. 4345 * 4346 * NOTE : These are the scopes of some of the variables that point at IRE, 4347 * which needs to be followed while making any future modifications 4348 * to avoid memory leaks. 4349 * 4350 * - ire and sire are the entries looked up initially by 4351 * ire_ftable_lookup_v6. 4352 * - ipif_ire is used to hold the interface ire associated with 4353 * the new cache ire. But it's scope is limited, so we always REFRELE 4354 * it before branching out to error paths. 4355 * - save_ire is initialized before ire_create, so that ire returned 4356 * by ire_create will not over-write the ire. We REFRELE save_ire 4357 * before breaking out of the switch. 4358 * 4359 * Thus on failures, we have to REFRELE only ire and sire, if they 4360 * are not NULL. 4361 * 4362 * v6srcp may be used in the future. Currently unused. 4363 */ 4364 /* ARGSUSED */ 4365 void 4366 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4367 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4368 { 4369 in6_addr_t v6gw; 4370 in6_addr_t dst; 4371 ire_t *ire = NULL; 4372 ipif_t *src_ipif = NULL; 4373 ill_t *dst_ill = NULL; 4374 ire_t *sire = NULL; 4375 ire_t *save_ire; 4376 ip6_t *ip6h; 4377 int err = 0; 4378 mblk_t *first_mp; 4379 ipsec_out_t *io; 4380 ill_t *attach_ill = NULL; 4381 ushort_t ire_marks = 0; 4382 int match_flags; 4383 boolean_t ip6i_present; 4384 ire_t *first_sire = NULL; 4385 mblk_t *copy_mp = NULL; 4386 mblk_t *xmit_mp = NULL; 4387 in6_addr_t save_dst; 4388 uint32_t multirt_flags = 4389 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4390 boolean_t multirt_is_resolvable; 4391 boolean_t multirt_resolve_next; 4392 boolean_t need_rele = B_FALSE; 4393 boolean_t do_attach_ill = B_FALSE; 4394 boolean_t ip6_asp_table_held = B_FALSE; 4395 tsol_ire_gw_secattr_t *attrp = NULL; 4396 tsol_gcgrp_t *gcgrp = NULL; 4397 tsol_gcgrp_addr_t ga; 4398 4399 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4400 4401 first_mp = mp; 4402 if (mp->b_datap->db_type == M_CTL) { 4403 mp = mp->b_cont; 4404 io = (ipsec_out_t *)first_mp->b_rptr; 4405 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4406 } else { 4407 io = NULL; 4408 } 4409 4410 /* 4411 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4412 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4413 * could be NULL. 4414 * 4415 * This information can appear either in an ip6i_t or an IPSEC_OUT 4416 * message. 4417 */ 4418 ip6h = (ip6_t *)mp->b_rptr; 4419 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4420 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4421 if (!ip6i_present || 4422 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4423 attach_ill = ip_grab_attach_ill(ill, first_mp, 4424 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4425 io->ipsec_out_ill_index), B_TRUE, ipst); 4426 /* Failure case frees things for us. */ 4427 if (attach_ill == NULL) 4428 return; 4429 4430 /* 4431 * Check if we need an ire that will not be 4432 * looked up by anybody else i.e. HIDDEN. 4433 */ 4434 if (ill_is_probeonly(attach_ill)) 4435 ire_marks = IRE_MARK_HIDDEN; 4436 } 4437 } 4438 4439 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4440 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4441 goto icmp_err_ret; 4442 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4443 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4444 goto icmp_err_ret; 4445 } 4446 4447 /* 4448 * If this IRE is created for forwarding or it is not for 4449 * TCP traffic, mark it as temporary. 4450 * 4451 * Is it sufficient just to check the next header?? 4452 */ 4453 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4454 ire_marks |= IRE_MARK_TEMPORARY; 4455 4456 /* 4457 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4458 * chain until it gets the most specific information available. 4459 * For example, we know that there is no IRE_CACHE for this dest, 4460 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4461 * ire_ftable_lookup_v6 will look up the gateway, etc. 4462 */ 4463 4464 if (ill == NULL) { 4465 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4466 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4467 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4468 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4469 match_flags, ipst); 4470 /* 4471 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4472 * in a NULL ill, but the packet could be a neighbor 4473 * solicitation/advertisment and could have a valid attach_ill. 4474 */ 4475 if (attach_ill != NULL) 4476 ill_refrele(attach_ill); 4477 } else { 4478 if (attach_ill != NULL) { 4479 /* 4480 * attach_ill is set only for communicating with 4481 * on-link hosts. So, don't look for DEFAULT. 4482 * ip_wput_v6 passes the right ill in this case and 4483 * hence we can assert. 4484 */ 4485 ASSERT(ill == attach_ill); 4486 ill_refrele(attach_ill); 4487 do_attach_ill = B_TRUE; 4488 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4489 } else { 4490 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4491 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4492 } 4493 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4494 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4495 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags, ipst); 4496 } 4497 4498 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4499 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4500 4501 if (zoneid == ALL_ZONES && ire != NULL) { 4502 /* 4503 * In the forwarding case, we can use a route from any zone 4504 * since we won't change the source address. We can easily 4505 * assert that the source address is already set when there's no 4506 * ip6_info header - otherwise we'd have to call pullupmsg(). 4507 */ 4508 ASSERT(ip6i_present || 4509 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4510 zoneid = ire->ire_zoneid; 4511 } 4512 4513 /* 4514 * We enter a loop that will be run only once in most cases. 4515 * The loop is re-entered in the case where the destination 4516 * can be reached through multiple RTF_MULTIRT-flagged routes. 4517 * The intention is to compute multiple routes to a single 4518 * destination in a single ip_newroute_v6 call. 4519 * The information is contained in sire->ire_flags. 4520 */ 4521 do { 4522 multirt_resolve_next = B_FALSE; 4523 4524 if (dst_ill != NULL) { 4525 ill_refrele(dst_ill); 4526 dst_ill = NULL; 4527 } 4528 if (src_ipif != NULL) { 4529 ipif_refrele(src_ipif); 4530 src_ipif = NULL; 4531 } 4532 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4533 ip3dbg(("ip_newroute_v6: starting new resolution " 4534 "with first_mp %p, tag %d\n", 4535 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4536 4537 /* 4538 * We check if there are trailing unresolved routes for 4539 * the destination contained in sire. 4540 */ 4541 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4542 &sire, multirt_flags, MBLK_GETLABEL(mp), ipst); 4543 4544 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4545 "ire %p, sire %p\n", 4546 multirt_is_resolvable, (void *)ire, (void *)sire)); 4547 4548 if (!multirt_is_resolvable) { 4549 /* 4550 * No more multirt routes to resolve; give up 4551 * (all routes resolved or no more resolvable 4552 * routes). 4553 */ 4554 if (ire != NULL) { 4555 ire_refrele(ire); 4556 ire = NULL; 4557 } 4558 } else { 4559 ASSERT(sire != NULL); 4560 ASSERT(ire != NULL); 4561 /* 4562 * We simply use first_sire as a flag that 4563 * indicates if a resolvable multirt route has 4564 * already been found during the preceding 4565 * loops. If it is not the case, we may have 4566 * to send an ICMP error to report that the 4567 * destination is unreachable. We do not 4568 * IRE_REFHOLD first_sire. 4569 */ 4570 if (first_sire == NULL) { 4571 first_sire = sire; 4572 } 4573 } 4574 } 4575 if ((ire == NULL) || (ire == sire)) { 4576 /* 4577 * either ire == NULL (the destination cannot be 4578 * resolved) or ire == sire (the gateway cannot be 4579 * resolved). At this point, there are no more routes 4580 * to resolve for the destination, thus we exit. 4581 */ 4582 if (ip_debug > 3) { 4583 /* ip2dbg */ 4584 pr_addr_dbg("ip_newroute_v6: " 4585 "can't resolve %s\n", AF_INET6, v6dstp); 4586 } 4587 ip3dbg(("ip_newroute_v6: " 4588 "ire %p, sire %p, first_sire %p\n", 4589 (void *)ire, (void *)sire, (void *)first_sire)); 4590 4591 if (sire != NULL) { 4592 ire_refrele(sire); 4593 sire = NULL; 4594 } 4595 4596 if (first_sire != NULL) { 4597 /* 4598 * At least one multirt route has been found 4599 * in the same ip_newroute() call; there is no 4600 * need to report an ICMP error. 4601 * first_sire was not IRE_REFHOLDed. 4602 */ 4603 MULTIRT_DEBUG_UNTAG(first_mp); 4604 freemsg(first_mp); 4605 return; 4606 } 4607 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4608 RTA_DST, ipst); 4609 goto icmp_err_ret; 4610 } 4611 4612 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4613 4614 /* 4615 * Verify that the returned IRE does not have either the 4616 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4617 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4618 */ 4619 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4620 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4621 goto icmp_err_ret; 4622 4623 /* 4624 * Increment the ire_ob_pkt_count field for ire if it is an 4625 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4626 * increment the same for the parent IRE, sire, if it is some 4627 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4628 */ 4629 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4630 UPDATE_OB_PKT_COUNT(ire); 4631 ire->ire_last_used_time = lbolt; 4632 } 4633 4634 if (sire != NULL) { 4635 mutex_enter(&sire->ire_lock); 4636 v6gw = sire->ire_gateway_addr_v6; 4637 mutex_exit(&sire->ire_lock); 4638 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4639 IRE_INTERFACE)) == 0); 4640 UPDATE_OB_PKT_COUNT(sire); 4641 sire->ire_last_used_time = lbolt; 4642 } else { 4643 v6gw = ipv6_all_zeros; 4644 } 4645 4646 /* 4647 * We have a route to reach the destination. 4648 * 4649 * 1) If the interface is part of ill group, try to get a new 4650 * ill taking load spreading into account. 4651 * 4652 * 2) After selecting the ill, get a source address that might 4653 * create good inbound load spreading and that matches the 4654 * right scope. ipif_select_source_v6 does this for us. 4655 * 4656 * If the application specified the ill (ifindex), we still 4657 * load spread. Only if the packets needs to go out specifically 4658 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4659 * IPV6_BOUND_PIF we don't try to use a different ill for load 4660 * spreading. 4661 */ 4662 if (!do_attach_ill) { 4663 /* 4664 * If the interface belongs to an interface group, 4665 * make sure the next possible interface in the group 4666 * is used. This encourages load spreading among 4667 * peers in an interface group. However, in the case 4668 * of multirouting, load spreading is not used, as we 4669 * actually want to replicate outgoing packets through 4670 * particular interfaces. 4671 * 4672 * Note: While we pick a dst_ill we are really only 4673 * interested in the ill for load spreading. 4674 * The source ipif is determined by source address 4675 * selection below. 4676 */ 4677 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4678 dst_ill = ire->ire_ipif->ipif_ill; 4679 /* For uniformity do a refhold */ 4680 ill_refhold(dst_ill); 4681 } else { 4682 /* 4683 * If we are here trying to create an IRE_CACHE 4684 * for an offlink destination and have the 4685 * IRE_CACHE for the next hop and the latter is 4686 * using virtual IP source address selection i.e 4687 * it's ire->ire_ipif is pointing to a virtual 4688 * network interface (vni) then 4689 * ip_newroute_get_dst_ll() will return the vni 4690 * interface as the dst_ill. Since the vni is 4691 * virtual i.e not associated with any physical 4692 * interface, it cannot be the dst_ill, hence 4693 * in such a case call ip_newroute_get_dst_ll() 4694 * with the stq_ill instead of the ire_ipif ILL. 4695 * The function returns a refheld ill. 4696 */ 4697 if ((ire->ire_type == IRE_CACHE) && 4698 IS_VNI(ire->ire_ipif->ipif_ill)) 4699 dst_ill = ip_newroute_get_dst_ill_v6( 4700 ire->ire_stq->q_ptr); 4701 else 4702 dst_ill = ip_newroute_get_dst_ill_v6( 4703 ire->ire_ipif->ipif_ill); 4704 } 4705 if (dst_ill == NULL) { 4706 if (ip_debug > 2) { 4707 pr_addr_dbg("ip_newroute_v6 : no dst " 4708 "ill for dst %s\n", 4709 AF_INET6, v6dstp); 4710 } 4711 goto icmp_err_ret; 4712 } else if (dst_ill->ill_group == NULL && ill != NULL && 4713 dst_ill != ill) { 4714 /* 4715 * If "ill" is not part of any group, we should 4716 * have found a route matching "ill" as we 4717 * called ire_ftable_lookup_v6 with 4718 * MATCH_IRE_ILL_GROUP. 4719 * Rather than asserting when there is a 4720 * mismatch, we just drop the packet. 4721 */ 4722 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4723 "dst_ill %s ill %s\n", 4724 dst_ill->ill_name, 4725 ill->ill_name)); 4726 goto icmp_err_ret; 4727 } 4728 } else { 4729 dst_ill = ire->ire_ipif->ipif_ill; 4730 /* For uniformity do refhold */ 4731 ill_refhold(dst_ill); 4732 /* 4733 * We should have found a route matching ill as we 4734 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4735 * Rather than asserting, while there is a mismatch, 4736 * we just drop the packet. 4737 */ 4738 if (dst_ill != ill) { 4739 ip0dbg(("ip_newroute_v6: Packet dropped as " 4740 "IP6I_ATTACH_IF ill is %s, " 4741 "ire->ire_ipif->ipif_ill is %s\n", 4742 ill->ill_name, 4743 dst_ill->ill_name)); 4744 goto icmp_err_ret; 4745 } 4746 } 4747 /* 4748 * Pick a source address which matches the scope of the 4749 * destination address. 4750 * For RTF_SETSRC routes, the source address is imposed by the 4751 * parent ire (sire). 4752 */ 4753 ASSERT(src_ipif == NULL); 4754 if (ire->ire_type == IRE_IF_RESOLVER && 4755 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4756 ip6_asp_can_lookup(ipst)) { 4757 /* 4758 * The ire cache entry we're adding is for the 4759 * gateway itself. The source address in this case 4760 * is relative to the gateway's address. 4761 */ 4762 ip6_asp_table_held = B_TRUE; 4763 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4764 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4765 if (src_ipif != NULL) 4766 ire_marks |= IRE_MARK_USESRC_CHECK; 4767 } else { 4768 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4769 /* 4770 * Check that the ipif matching the requested 4771 * source address still exists. 4772 */ 4773 src_ipif = ipif_lookup_addr_v6( 4774 &sire->ire_src_addr_v6, NULL, zoneid, 4775 NULL, NULL, NULL, NULL, ipst); 4776 } 4777 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4778 uint_t restrict_ill = RESTRICT_TO_NONE; 4779 4780 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4781 & IP6I_ATTACH_IF) 4782 restrict_ill = RESTRICT_TO_ILL; 4783 ip6_asp_table_held = B_TRUE; 4784 src_ipif = ipif_select_source_v6(dst_ill, 4785 v6dstp, restrict_ill, 4786 IPV6_PREFER_SRC_DEFAULT, zoneid); 4787 if (src_ipif != NULL) 4788 ire_marks |= IRE_MARK_USESRC_CHECK; 4789 } 4790 } 4791 4792 if (src_ipif == NULL) { 4793 if (ip_debug > 2) { 4794 /* ip1dbg */ 4795 pr_addr_dbg("ip_newroute_v6: no src for " 4796 "dst %s\n, ", AF_INET6, v6dstp); 4797 printf("ip_newroute_v6: interface name %s\n", 4798 dst_ill->ill_name); 4799 } 4800 goto icmp_err_ret; 4801 } 4802 4803 if (ip_debug > 3) { 4804 /* ip2dbg */ 4805 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4806 AF_INET6, &v6gw); 4807 } 4808 ip2dbg(("\tire type %s (%d)\n", 4809 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4810 4811 /* 4812 * At this point in ip_newroute_v6(), ire is either the 4813 * IRE_CACHE of the next-hop gateway for an off-subnet 4814 * destination or an IRE_INTERFACE type that should be used 4815 * to resolve an on-subnet destination or an on-subnet 4816 * next-hop gateway. 4817 * 4818 * In the IRE_CACHE case, we have the following : 4819 * 4820 * 1) src_ipif - used for getting a source address. 4821 * 4822 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4823 * means packets using this IRE_CACHE will go out on dst_ill. 4824 * 4825 * 3) The IRE sire will point to the prefix that is the longest 4826 * matching route for the destination. These prefix types 4827 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4828 * 4829 * The newly created IRE_CACHE entry for the off-subnet 4830 * destination is tied to both the prefix route and the 4831 * interface route used to resolve the next-hop gateway 4832 * via the ire_phandle and ire_ihandle fields, respectively. 4833 * 4834 * In the IRE_INTERFACE case, we have the following : 4835 * 4836 * 1) src_ipif - used for getting a source address. 4837 * 4838 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4839 * means packets using the IRE_CACHE that we will build 4840 * here will go out on dst_ill. 4841 * 4842 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4843 * to be created will only be tied to the IRE_INTERFACE that 4844 * was derived from the ire_ihandle field. 4845 * 4846 * If sire is non-NULL, it means the destination is off-link 4847 * and we will first create the IRE_CACHE for the gateway. 4848 * Next time through ip_newroute_v6, we will create the 4849 * IRE_CACHE for the final destination as described above. 4850 */ 4851 save_ire = ire; 4852 switch (ire->ire_type) { 4853 case IRE_CACHE: { 4854 ire_t *ipif_ire; 4855 4856 ASSERT(sire != NULL); 4857 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4858 mutex_enter(&ire->ire_lock); 4859 v6gw = ire->ire_gateway_addr_v6; 4860 mutex_exit(&ire->ire_lock); 4861 } 4862 /* 4863 * We need 3 ire's to create a new cache ire for an 4864 * off-link destination from the cache ire of the 4865 * gateway. 4866 * 4867 * 1. The prefix ire 'sire' 4868 * 2. The cache ire of the gateway 'ire' 4869 * 3. The interface ire 'ipif_ire' 4870 * 4871 * We have (1) and (2). We lookup (3) below. 4872 * 4873 * If there is no interface route to the gateway, 4874 * it is a race condition, where we found the cache 4875 * but the inteface route has been deleted. 4876 */ 4877 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4878 if (ipif_ire == NULL) { 4879 ip1dbg(("ip_newroute_v6:" 4880 "ire_ihandle_lookup_offlink_v6 failed\n")); 4881 goto icmp_err_ret; 4882 } 4883 /* 4884 * Assume DL_UNITDATA_REQ is same for all physical 4885 * interfaces in the ifgrp. If it isn't, this code will 4886 * have to be seriously rewhacked to allow the 4887 * fastpath probing (such that I cache the link 4888 * header in the IRE_CACHE) to work over ifgrps. 4889 * We have what we need to build an IRE_CACHE. 4890 */ 4891 /* 4892 * Note: the new ire inherits RTF_SETSRC 4893 * and RTF_MULTIRT to propagate these flags from prefix 4894 * to cache. 4895 */ 4896 4897 /* 4898 * Check cached gateway IRE for any security 4899 * attributes; if found, associate the gateway 4900 * credentials group to the destination IRE. 4901 */ 4902 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4903 mutex_enter(&attrp->igsa_lock); 4904 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4905 GCGRP_REFHOLD(gcgrp); 4906 mutex_exit(&attrp->igsa_lock); 4907 } 4908 4909 ire = ire_create_v6( 4910 v6dstp, /* dest address */ 4911 &ipv6_all_ones, /* mask */ 4912 &src_ipif->ipif_v6src_addr, /* source address */ 4913 &v6gw, /* gateway address */ 4914 &save_ire->ire_max_frag, 4915 NULL, /* src nce */ 4916 dst_ill->ill_rq, /* recv-from queue */ 4917 dst_ill->ill_wq, /* send-to queue */ 4918 IRE_CACHE, 4919 src_ipif, 4920 &sire->ire_mask_v6, /* Parent mask */ 4921 sire->ire_phandle, /* Parent handle */ 4922 ipif_ire->ire_ihandle, /* Interface handle */ 4923 sire->ire_flags & /* flags if any */ 4924 (RTF_SETSRC | RTF_MULTIRT), 4925 &(sire->ire_uinfo), 4926 NULL, 4927 gcgrp, 4928 ipst); 4929 4930 if (ire == NULL) { 4931 if (gcgrp != NULL) { 4932 GCGRP_REFRELE(gcgrp); 4933 gcgrp = NULL; 4934 } 4935 ire_refrele(save_ire); 4936 ire_refrele(ipif_ire); 4937 break; 4938 } 4939 4940 /* reference now held by IRE */ 4941 gcgrp = NULL; 4942 4943 ire->ire_marks |= ire_marks; 4944 4945 /* 4946 * Prevent sire and ipif_ire from getting deleted. The 4947 * newly created ire is tied to both of them via the 4948 * phandle and ihandle respectively. 4949 */ 4950 IRB_REFHOLD(sire->ire_bucket); 4951 /* Has it been removed already ? */ 4952 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4953 IRB_REFRELE(sire->ire_bucket); 4954 ire_refrele(ipif_ire); 4955 ire_refrele(save_ire); 4956 break; 4957 } 4958 4959 IRB_REFHOLD(ipif_ire->ire_bucket); 4960 /* Has it been removed already ? */ 4961 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4962 IRB_REFRELE(ipif_ire->ire_bucket); 4963 IRB_REFRELE(sire->ire_bucket); 4964 ire_refrele(ipif_ire); 4965 ire_refrele(save_ire); 4966 break; 4967 } 4968 4969 xmit_mp = first_mp; 4970 if (ire->ire_flags & RTF_MULTIRT) { 4971 copy_mp = copymsg(first_mp); 4972 if (copy_mp != NULL) { 4973 xmit_mp = copy_mp; 4974 MULTIRT_DEBUG_TAG(first_mp); 4975 } 4976 } 4977 ire_add_then_send(q, ire, xmit_mp); 4978 if (ip6_asp_table_held) { 4979 ip6_asp_table_refrele(ipst); 4980 ip6_asp_table_held = B_FALSE; 4981 } 4982 ire_refrele(save_ire); 4983 4984 /* Assert that sire is not deleted yet. */ 4985 ASSERT(sire->ire_ptpn != NULL); 4986 IRB_REFRELE(sire->ire_bucket); 4987 4988 /* Assert that ipif_ire is not deleted yet. */ 4989 ASSERT(ipif_ire->ire_ptpn != NULL); 4990 IRB_REFRELE(ipif_ire->ire_bucket); 4991 ire_refrele(ipif_ire); 4992 4993 if (copy_mp != NULL) { 4994 /* 4995 * Search for the next unresolved 4996 * multirt route. 4997 */ 4998 copy_mp = NULL; 4999 ipif_ire = NULL; 5000 ire = NULL; 5001 /* re-enter the loop */ 5002 multirt_resolve_next = B_TRUE; 5003 continue; 5004 } 5005 ire_refrele(sire); 5006 ill_refrele(dst_ill); 5007 ipif_refrele(src_ipif); 5008 return; 5009 } 5010 case IRE_IF_NORESOLVER: 5011 /* 5012 * We have what we need to build an IRE_CACHE. 5013 * 5014 * handle the Gated case, where we create 5015 * a NORESOLVER route for loopback. 5016 */ 5017 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5018 break; 5019 /* 5020 * TSol note: We are creating the ire cache for the 5021 * destination 'dst'. If 'dst' is offlink, going 5022 * through the first hop 'gw', the security attributes 5023 * of 'dst' must be set to point to the gateway 5024 * credentials of gateway 'gw'. If 'dst' is onlink, it 5025 * is possible that 'dst' is a potential gateway that is 5026 * referenced by some route that has some security 5027 * attributes. Thus in the former case, we need to do a 5028 * gcgrp_lookup of 'gw' while in the latter case we 5029 * need to do gcgrp_lookup of 'dst' itself. 5030 */ 5031 ga.ga_af = AF_INET6; 5032 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5033 ga.ga_addr = v6gw; 5034 else 5035 ga.ga_addr = *v6dstp; 5036 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5037 5038 /* 5039 * Note: the new ire inherits sire flags RTF_SETSRC 5040 * and RTF_MULTIRT to propagate those rules from prefix 5041 * to cache. 5042 */ 5043 ire = ire_create_v6( 5044 v6dstp, /* dest address */ 5045 &ipv6_all_ones, /* mask */ 5046 &src_ipif->ipif_v6src_addr, /* source address */ 5047 &v6gw, /* gateway address */ 5048 &save_ire->ire_max_frag, 5049 NULL, /* no src nce */ 5050 dst_ill->ill_rq, /* recv-from queue */ 5051 dst_ill->ill_wq, /* send-to queue */ 5052 IRE_CACHE, 5053 src_ipif, 5054 &save_ire->ire_mask_v6, /* Parent mask */ 5055 (sire != NULL) ? /* Parent handle */ 5056 sire->ire_phandle : 0, 5057 save_ire->ire_ihandle, /* Interface handle */ 5058 (sire != NULL) ? /* flags if any */ 5059 sire->ire_flags & 5060 (RTF_SETSRC | RTF_MULTIRT) : 0, 5061 &(save_ire->ire_uinfo), 5062 NULL, 5063 gcgrp, 5064 ipst); 5065 5066 if (ire == NULL) { 5067 if (gcgrp != NULL) { 5068 GCGRP_REFRELE(gcgrp); 5069 gcgrp = NULL; 5070 } 5071 ire_refrele(save_ire); 5072 break; 5073 } 5074 5075 /* reference now held by IRE */ 5076 gcgrp = NULL; 5077 5078 ire->ire_marks |= ire_marks; 5079 5080 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5081 dst = v6gw; 5082 else 5083 dst = *v6dstp; 5084 err = ndp_noresolver(dst_ill, &dst); 5085 if (err != 0) { 5086 ire_refrele(save_ire); 5087 break; 5088 } 5089 5090 /* Prevent save_ire from getting deleted */ 5091 IRB_REFHOLD(save_ire->ire_bucket); 5092 /* Has it been removed already ? */ 5093 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5094 IRB_REFRELE(save_ire->ire_bucket); 5095 ire_refrele(save_ire); 5096 break; 5097 } 5098 5099 xmit_mp = first_mp; 5100 /* 5101 * In case of MULTIRT, a copy of the current packet 5102 * to send is made to further re-enter the 5103 * loop and attempt another route resolution 5104 */ 5105 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5106 copy_mp = copymsg(first_mp); 5107 if (copy_mp != NULL) { 5108 xmit_mp = copy_mp; 5109 MULTIRT_DEBUG_TAG(first_mp); 5110 } 5111 } 5112 ire_add_then_send(q, ire, xmit_mp); 5113 if (ip6_asp_table_held) { 5114 ip6_asp_table_refrele(ipst); 5115 ip6_asp_table_held = B_FALSE; 5116 } 5117 5118 /* Assert that it is not deleted yet. */ 5119 ASSERT(save_ire->ire_ptpn != NULL); 5120 IRB_REFRELE(save_ire->ire_bucket); 5121 ire_refrele(save_ire); 5122 5123 if (copy_mp != NULL) { 5124 /* 5125 * If we found a (no)resolver, we ignore any 5126 * trailing top priority IRE_CACHE in 5127 * further loops. This ensures that we do not 5128 * omit any (no)resolver despite the priority 5129 * in this call. 5130 * IRE_CACHE, if any, will be processed 5131 * by another thread entering ip_newroute(), 5132 * (on resolver response, for example). 5133 * We use this to force multiple parallel 5134 * resolution as soon as a packet needs to be 5135 * sent. The result is, after one packet 5136 * emission all reachable routes are generally 5137 * resolved. 5138 * Otherwise, complete resolution of MULTIRT 5139 * routes would require several emissions as 5140 * side effect. 5141 */ 5142 multirt_flags &= ~MULTIRT_CACHEGW; 5143 5144 /* 5145 * Search for the next unresolved multirt 5146 * route. 5147 */ 5148 copy_mp = NULL; 5149 save_ire = NULL; 5150 ire = NULL; 5151 /* re-enter the loop */ 5152 multirt_resolve_next = B_TRUE; 5153 continue; 5154 } 5155 5156 /* Don't need sire anymore */ 5157 if (sire != NULL) 5158 ire_refrele(sire); 5159 ill_refrele(dst_ill); 5160 ipif_refrele(src_ipif); 5161 return; 5162 5163 case IRE_IF_RESOLVER: 5164 /* 5165 * We can't build an IRE_CACHE yet, but at least we 5166 * found a resolver that can help. 5167 */ 5168 dst = *v6dstp; 5169 5170 /* 5171 * To be at this point in the code with a non-zero gw 5172 * means that dst is reachable through a gateway that 5173 * we have never resolved. By changing dst to the gw 5174 * addr we resolve the gateway first. When 5175 * ire_add_then_send() tries to put the IP dg to dst, 5176 * it will reenter ip_newroute() at which time we will 5177 * find the IRE_CACHE for the gw and create another 5178 * IRE_CACHE above (for dst itself). 5179 */ 5180 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5181 save_dst = dst; 5182 dst = v6gw; 5183 v6gw = ipv6_all_zeros; 5184 } 5185 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5186 /* 5187 * Ask the external resolver to do its thing. 5188 * Make an mblk chain in the following form: 5189 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5190 */ 5191 mblk_t *ire_mp; 5192 mblk_t *areq_mp; 5193 areq_t *areq; 5194 in6_addr_t *addrp; 5195 5196 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5197 if (ip6_asp_table_held) { 5198 ip6_asp_table_refrele(ipst); 5199 ip6_asp_table_held = B_FALSE; 5200 } 5201 ire = ire_create_mp_v6( 5202 &dst, /* dest address */ 5203 &ipv6_all_ones, /* mask */ 5204 &src_ipif->ipif_v6src_addr, 5205 /* source address */ 5206 &v6gw, /* gateway address */ 5207 NULL, /* no src nce */ 5208 dst_ill->ill_rq, /* recv-from queue */ 5209 dst_ill->ill_wq, /* send-to queue */ 5210 IRE_CACHE, 5211 src_ipif, 5212 &save_ire->ire_mask_v6, /* Parent mask */ 5213 0, 5214 save_ire->ire_ihandle, 5215 /* Interface handle */ 5216 0, /* flags if any */ 5217 &(save_ire->ire_uinfo), 5218 NULL, 5219 NULL, 5220 ipst); 5221 5222 ire_refrele(save_ire); 5223 if (ire == NULL) { 5224 ip1dbg(("ip_newroute_v6:" 5225 "ire is NULL\n")); 5226 break; 5227 } 5228 5229 if ((sire != NULL) && 5230 (sire->ire_flags & RTF_MULTIRT)) { 5231 /* 5232 * processing a copy of the packet to 5233 * send for further resolution loops 5234 */ 5235 copy_mp = copymsg(first_mp); 5236 if (copy_mp != NULL) 5237 MULTIRT_DEBUG_TAG(copy_mp); 5238 } 5239 ire->ire_marks |= ire_marks; 5240 ire_mp = ire->ire_mp; 5241 /* 5242 * Now create or find an nce for this interface. 5243 * The hw addr will need to to be set from 5244 * the reply to the AR_ENTRY_QUERY that 5245 * we're about to send. This will be done in 5246 * ire_add_v6(). 5247 */ 5248 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5249 switch (err) { 5250 case 0: 5251 /* 5252 * New cache entry created. 5253 * Break, then ask the external 5254 * resolver. 5255 */ 5256 break; 5257 case EINPROGRESS: 5258 /* 5259 * Resolution in progress; 5260 * packet has been queued by 5261 * ndp_resolver(). 5262 */ 5263 ire_delete(ire); 5264 ire = NULL; 5265 /* 5266 * Check if another multirt 5267 * route must be resolved. 5268 */ 5269 if (copy_mp != NULL) { 5270 /* 5271 * If we found a resolver, we 5272 * ignore any trailing top 5273 * priority IRE_CACHE in 5274 * further loops. The reason is 5275 * the same as for noresolver. 5276 */ 5277 multirt_flags &= 5278 ~MULTIRT_CACHEGW; 5279 /* 5280 * Search for the next 5281 * unresolved multirt route. 5282 */ 5283 first_mp = copy_mp; 5284 copy_mp = NULL; 5285 mp = first_mp; 5286 if (mp->b_datap->db_type == 5287 M_CTL) { 5288 mp = mp->b_cont; 5289 } 5290 ASSERT(sire != NULL); 5291 dst = save_dst; 5292 /* 5293 * re-enter the loop 5294 */ 5295 multirt_resolve_next = 5296 B_TRUE; 5297 continue; 5298 } 5299 5300 if (sire != NULL) 5301 ire_refrele(sire); 5302 ill_refrele(dst_ill); 5303 ipif_refrele(src_ipif); 5304 return; 5305 default: 5306 /* 5307 * Transient error; packet will be 5308 * freed. 5309 */ 5310 ire_delete(ire); 5311 ire = NULL; 5312 break; 5313 } 5314 if (err != 0) 5315 break; 5316 /* 5317 * Now set up the AR_ENTRY_QUERY and send it. 5318 */ 5319 areq_mp = ill_arp_alloc(dst_ill, 5320 (uchar_t *)&ipv6_areq_template, 5321 (caddr_t)&dst); 5322 if (areq_mp == NULL) { 5323 ip1dbg(("ip_newroute_v6:" 5324 "areq_mp is NULL\n")); 5325 freemsg(ire_mp); 5326 break; 5327 } 5328 areq = (areq_t *)areq_mp->b_rptr; 5329 addrp = (in6_addr_t *)((char *)areq + 5330 areq->areq_target_addr_offset); 5331 *addrp = dst; 5332 addrp = (in6_addr_t *)((char *)areq + 5333 areq->areq_sender_addr_offset); 5334 *addrp = src_ipif->ipif_v6src_addr; 5335 /* 5336 * link the chain, then send up to the resolver. 5337 */ 5338 linkb(areq_mp, ire_mp); 5339 linkb(areq_mp, mp); 5340 ip1dbg(("ip_newroute_v6:" 5341 "putnext to resolver\n")); 5342 putnext(dst_ill->ill_rq, areq_mp); 5343 /* 5344 * Check if another multirt route 5345 * must be resolved. 5346 */ 5347 ire = NULL; 5348 if (copy_mp != NULL) { 5349 /* 5350 * If we find a resolver, we ignore any 5351 * trailing top priority IRE_CACHE in 5352 * further loops. The reason is the 5353 * same as for noresolver. 5354 */ 5355 multirt_flags &= ~MULTIRT_CACHEGW; 5356 /* 5357 * Search for the next unresolved 5358 * multirt route. 5359 */ 5360 first_mp = copy_mp; 5361 copy_mp = NULL; 5362 mp = first_mp; 5363 if (mp->b_datap->db_type == M_CTL) { 5364 mp = mp->b_cont; 5365 } 5366 ASSERT(sire != NULL); 5367 dst = save_dst; 5368 /* 5369 * re-enter the loop 5370 */ 5371 multirt_resolve_next = B_TRUE; 5372 continue; 5373 } 5374 5375 if (sire != NULL) 5376 ire_refrele(sire); 5377 ill_refrele(dst_ill); 5378 ipif_refrele(src_ipif); 5379 return; 5380 } 5381 /* 5382 * Non-external resolver case. 5383 * 5384 * TSol note: Please see the note above the 5385 * IRE_IF_NORESOLVER case. 5386 */ 5387 ga.ga_af = AF_INET6; 5388 ga.ga_addr = dst; 5389 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5390 5391 ire = ire_create_v6( 5392 &dst, /* dest address */ 5393 &ipv6_all_ones, /* mask */ 5394 &src_ipif->ipif_v6src_addr, /* source address */ 5395 &v6gw, /* gateway address */ 5396 &save_ire->ire_max_frag, 5397 NULL, /* no src nce */ 5398 dst_ill->ill_rq, /* recv-from queue */ 5399 dst_ill->ill_wq, /* send-to queue */ 5400 IRE_CACHE, 5401 src_ipif, 5402 &save_ire->ire_mask_v6, /* Parent mask */ 5403 0, 5404 save_ire->ire_ihandle, /* Interface handle */ 5405 0, /* flags if any */ 5406 &(save_ire->ire_uinfo), 5407 NULL, 5408 gcgrp, 5409 ipst); 5410 5411 if (ire == NULL) { 5412 if (gcgrp != NULL) { 5413 GCGRP_REFRELE(gcgrp); 5414 gcgrp = NULL; 5415 } 5416 ire_refrele(save_ire); 5417 break; 5418 } 5419 5420 /* reference now held by IRE */ 5421 gcgrp = NULL; 5422 5423 if ((sire != NULL) && 5424 (sire->ire_flags & RTF_MULTIRT)) { 5425 copy_mp = copymsg(first_mp); 5426 if (copy_mp != NULL) 5427 MULTIRT_DEBUG_TAG(copy_mp); 5428 } 5429 5430 ire->ire_marks |= ire_marks; 5431 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5432 switch (err) { 5433 case 0: 5434 /* Prevent save_ire from getting deleted */ 5435 IRB_REFHOLD(save_ire->ire_bucket); 5436 /* Has it been removed already ? */ 5437 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5438 IRB_REFRELE(save_ire->ire_bucket); 5439 ire_refrele(save_ire); 5440 break; 5441 } 5442 5443 /* 5444 * We have a resolved cache entry, 5445 * add in the IRE. 5446 */ 5447 ire_add_then_send(q, ire, first_mp); 5448 if (ip6_asp_table_held) { 5449 ip6_asp_table_refrele(ipst); 5450 ip6_asp_table_held = B_FALSE; 5451 } 5452 5453 /* Assert that it is not deleted yet. */ 5454 ASSERT(save_ire->ire_ptpn != NULL); 5455 IRB_REFRELE(save_ire->ire_bucket); 5456 ire_refrele(save_ire); 5457 /* 5458 * Check if another multirt route 5459 * must be resolved. 5460 */ 5461 ire = NULL; 5462 if (copy_mp != NULL) { 5463 /* 5464 * If we find a resolver, we ignore any 5465 * trailing top priority IRE_CACHE in 5466 * further loops. The reason is the 5467 * same as for noresolver. 5468 */ 5469 multirt_flags &= ~MULTIRT_CACHEGW; 5470 /* 5471 * Search for the next unresolved 5472 * multirt route. 5473 */ 5474 first_mp = copy_mp; 5475 copy_mp = NULL; 5476 mp = first_mp; 5477 if (mp->b_datap->db_type == M_CTL) { 5478 mp = mp->b_cont; 5479 } 5480 ASSERT(sire != NULL); 5481 dst = save_dst; 5482 /* 5483 * re-enter the loop 5484 */ 5485 multirt_resolve_next = B_TRUE; 5486 continue; 5487 } 5488 5489 if (sire != NULL) 5490 ire_refrele(sire); 5491 ill_refrele(dst_ill); 5492 ipif_refrele(src_ipif); 5493 return; 5494 5495 case EINPROGRESS: 5496 /* 5497 * mp was consumed - presumably queued. 5498 * No need for ire, presumably resolution is 5499 * in progress, and ire will be added when the 5500 * address is resolved. 5501 */ 5502 if (ip6_asp_table_held) { 5503 ip6_asp_table_refrele(ipst); 5504 ip6_asp_table_held = B_FALSE; 5505 } 5506 ASSERT(ire->ire_nce == NULL); 5507 ire_delete(ire); 5508 ire_refrele(save_ire); 5509 /* 5510 * Check if another multirt route 5511 * must be resolved. 5512 */ 5513 ire = NULL; 5514 if (copy_mp != NULL) { 5515 /* 5516 * If we find a resolver, we ignore any 5517 * trailing top priority IRE_CACHE in 5518 * further loops. The reason is the 5519 * same as for noresolver. 5520 */ 5521 multirt_flags &= ~MULTIRT_CACHEGW; 5522 /* 5523 * Search for the next unresolved 5524 * multirt route. 5525 */ 5526 first_mp = copy_mp; 5527 copy_mp = NULL; 5528 mp = first_mp; 5529 if (mp->b_datap->db_type == M_CTL) { 5530 mp = mp->b_cont; 5531 } 5532 ASSERT(sire != NULL); 5533 dst = save_dst; 5534 /* 5535 * re-enter the loop 5536 */ 5537 multirt_resolve_next = B_TRUE; 5538 continue; 5539 } 5540 if (sire != NULL) 5541 ire_refrele(sire); 5542 ill_refrele(dst_ill); 5543 ipif_refrele(src_ipif); 5544 return; 5545 default: 5546 /* Some transient error */ 5547 ASSERT(ire->ire_nce == NULL); 5548 ire_refrele(save_ire); 5549 break; 5550 } 5551 break; 5552 default: 5553 break; 5554 } 5555 if (ip6_asp_table_held) { 5556 ip6_asp_table_refrele(ipst); 5557 ip6_asp_table_held = B_FALSE; 5558 } 5559 } while (multirt_resolve_next); 5560 5561 err_ret: 5562 ip1dbg(("ip_newroute_v6: dropped\n")); 5563 if (src_ipif != NULL) 5564 ipif_refrele(src_ipif); 5565 if (dst_ill != NULL) { 5566 need_rele = B_TRUE; 5567 ill = dst_ill; 5568 } 5569 if (ill != NULL) { 5570 if (mp->b_prev != NULL) { 5571 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5572 } else { 5573 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5574 } 5575 5576 if (need_rele) 5577 ill_refrele(ill); 5578 } else { 5579 if (mp->b_prev != NULL) { 5580 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5581 } else { 5582 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5583 } 5584 } 5585 /* Did this packet originate externally? */ 5586 if (mp->b_prev) { 5587 mp->b_next = NULL; 5588 mp->b_prev = NULL; 5589 } 5590 if (copy_mp != NULL) { 5591 MULTIRT_DEBUG_UNTAG(copy_mp); 5592 freemsg(copy_mp); 5593 } 5594 MULTIRT_DEBUG_UNTAG(first_mp); 5595 freemsg(first_mp); 5596 if (ire != NULL) 5597 ire_refrele(ire); 5598 if (sire != NULL) 5599 ire_refrele(sire); 5600 return; 5601 5602 icmp_err_ret: 5603 if (ip6_asp_table_held) 5604 ip6_asp_table_refrele(ipst); 5605 if (src_ipif != NULL) 5606 ipif_refrele(src_ipif); 5607 if (dst_ill != NULL) { 5608 need_rele = B_TRUE; 5609 ill = dst_ill; 5610 } 5611 ip1dbg(("ip_newroute_v6: no route\n")); 5612 if (sire != NULL) 5613 ire_refrele(sire); 5614 /* 5615 * We need to set sire to NULL to avoid double freeing if we 5616 * ever goto err_ret from below. 5617 */ 5618 sire = NULL; 5619 ip6h = (ip6_t *)mp->b_rptr; 5620 /* Skip ip6i_t header if present */ 5621 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5622 /* Make sure the IPv6 header is present */ 5623 if ((mp->b_wptr - (uchar_t *)ip6h) < 5624 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5625 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5626 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5627 goto err_ret; 5628 } 5629 } 5630 mp->b_rptr += sizeof (ip6i_t); 5631 ip6h = (ip6_t *)mp->b_rptr; 5632 } 5633 /* Did this packet originate externally? */ 5634 if (mp->b_prev) { 5635 if (ill != NULL) { 5636 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5637 } else { 5638 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5639 } 5640 mp->b_next = NULL; 5641 mp->b_prev = NULL; 5642 q = WR(q); 5643 } else { 5644 if (ill != NULL) { 5645 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5646 } else { 5647 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5648 } 5649 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5650 /* Failed */ 5651 if (copy_mp != NULL) { 5652 MULTIRT_DEBUG_UNTAG(copy_mp); 5653 freemsg(copy_mp); 5654 } 5655 MULTIRT_DEBUG_UNTAG(first_mp); 5656 freemsg(first_mp); 5657 if (ire != NULL) 5658 ire_refrele(ire); 5659 if (need_rele) 5660 ill_refrele(ill); 5661 return; 5662 } 5663 } 5664 5665 if (need_rele) 5666 ill_refrele(ill); 5667 5668 /* 5669 * At this point we will have ire only if RTF_BLACKHOLE 5670 * or RTF_REJECT flags are set on the IRE. It will not 5671 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5672 */ 5673 if (ire != NULL) { 5674 if (ire->ire_flags & RTF_BLACKHOLE) { 5675 ire_refrele(ire); 5676 if (copy_mp != NULL) { 5677 MULTIRT_DEBUG_UNTAG(copy_mp); 5678 freemsg(copy_mp); 5679 } 5680 MULTIRT_DEBUG_UNTAG(first_mp); 5681 freemsg(first_mp); 5682 return; 5683 } 5684 ire_refrele(ire); 5685 } 5686 if (ip_debug > 3) { 5687 /* ip2dbg */ 5688 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5689 AF_INET6, v6dstp); 5690 } 5691 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5692 B_FALSE, B_FALSE, zoneid, ipst); 5693 } 5694 5695 /* 5696 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5697 * we need to send out a packet to a destination address for which we do not 5698 * have specific routing information. It is only used for multicast packets. 5699 * 5700 * If unspec_src we allow creating an IRE with source address zero. 5701 * ire_send_v6() will delete it after the packet is sent. 5702 */ 5703 void 5704 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5705 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5706 { 5707 ire_t *ire = NULL; 5708 ipif_t *src_ipif = NULL; 5709 int err = 0; 5710 ill_t *dst_ill = NULL; 5711 ire_t *save_ire; 5712 ushort_t ire_marks = 0; 5713 ipsec_out_t *io; 5714 ill_t *attach_ill = NULL; 5715 ill_t *ill; 5716 ip6_t *ip6h; 5717 mblk_t *first_mp; 5718 boolean_t ip6i_present; 5719 ire_t *fire = NULL; 5720 mblk_t *copy_mp = NULL; 5721 boolean_t multirt_resolve_next; 5722 in6_addr_t *v6dstp = &v6dst; 5723 boolean_t ipif_held = B_FALSE; 5724 boolean_t ill_held = B_FALSE; 5725 boolean_t ip6_asp_table_held = B_FALSE; 5726 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5727 5728 /* 5729 * This loop is run only once in most cases. 5730 * We loop to resolve further routes only when the destination 5731 * can be reached through multiple RTF_MULTIRT-flagged ires. 5732 */ 5733 do { 5734 multirt_resolve_next = B_FALSE; 5735 if (dst_ill != NULL) { 5736 ill_refrele(dst_ill); 5737 dst_ill = NULL; 5738 } 5739 5740 if (src_ipif != NULL) { 5741 ipif_refrele(src_ipif); 5742 src_ipif = NULL; 5743 } 5744 ASSERT(ipif != NULL); 5745 ill = ipif->ipif_ill; 5746 5747 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5748 if (ip_debug > 2) { 5749 /* ip1dbg */ 5750 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5751 AF_INET6, v6dstp); 5752 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5753 ill->ill_name, ipif->ipif_isv6); 5754 } 5755 5756 first_mp = mp; 5757 if (mp->b_datap->db_type == M_CTL) { 5758 mp = mp->b_cont; 5759 io = (ipsec_out_t *)first_mp->b_rptr; 5760 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5761 } else { 5762 io = NULL; 5763 } 5764 5765 /* 5766 * If the interface is a pt-pt interface we look for an 5767 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5768 * local_address and the pt-pt destination address. 5769 * Otherwise we just match the local address. 5770 */ 5771 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5772 goto err_ret; 5773 } 5774 /* 5775 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5776 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5777 * as it could be NULL. 5778 * 5779 * This information can appear either in an ip6i_t or an 5780 * IPSEC_OUT message. 5781 */ 5782 ip6h = (ip6_t *)mp->b_rptr; 5783 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5784 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5785 if (!ip6i_present || 5786 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5787 attach_ill = ip_grab_attach_ill(ill, first_mp, 5788 (ip6i_present ? 5789 ((ip6i_t *)ip6h)->ip6i_ifindex : 5790 io->ipsec_out_ill_index), B_TRUE, ipst); 5791 /* Failure case frees things for us. */ 5792 if (attach_ill == NULL) 5793 return; 5794 5795 /* 5796 * Check if we need an ire that will not be 5797 * looked up by anybody else i.e. HIDDEN. 5798 */ 5799 if (ill_is_probeonly(attach_ill)) 5800 ire_marks = IRE_MARK_HIDDEN; 5801 } 5802 } 5803 5804 /* 5805 * We check if an IRE_OFFSUBNET for the addr that goes through 5806 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5807 * RTF_MULTIRT flags must be honored. 5808 */ 5809 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5810 ip2dbg(("ip_newroute_ipif_v6: " 5811 "ipif_lookup_multi_ire_v6(" 5812 "ipif %p, dst %08x) = fire %p\n", 5813 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5814 (void *)fire)); 5815 5816 /* 5817 * If the application specified the ill (ifindex), we still 5818 * load spread. Only if the packets needs to go out specifically 5819 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5820 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5821 * multirouting, then we don't try to use a different ill for 5822 * load spreading. 5823 */ 5824 if (attach_ill == NULL) { 5825 /* 5826 * If the interface belongs to an interface group, 5827 * make sure the next possible interface in the group 5828 * is used. This encourages load spreading among peers 5829 * in an interface group. 5830 * 5831 * Note: While we pick a dst_ill we are really only 5832 * interested in the ill for load spreading. The source 5833 * ipif is determined by source address selection below. 5834 */ 5835 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5836 dst_ill = ipif->ipif_ill; 5837 /* For uniformity do a refhold */ 5838 ill_refhold(dst_ill); 5839 } else { 5840 /* refheld by ip_newroute_get_dst_ill_v6 */ 5841 dst_ill = 5842 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5843 } 5844 if (dst_ill == NULL) { 5845 if (ip_debug > 2) { 5846 pr_addr_dbg("ip_newroute_ipif_v6: " 5847 "no dst ill for dst %s\n", 5848 AF_INET6, v6dstp); 5849 } 5850 goto err_ret; 5851 } 5852 } else { 5853 dst_ill = ipif->ipif_ill; 5854 /* 5855 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5856 * and IPV6_BOUND_PIF case. 5857 */ 5858 ASSERT(dst_ill == attach_ill); 5859 /* attach_ill is already refheld */ 5860 } 5861 /* 5862 * Pick a source address which matches the scope of the 5863 * destination address. 5864 * For RTF_SETSRC routes, the source address is imposed by the 5865 * parent ire (fire). 5866 */ 5867 ASSERT(src_ipif == NULL); 5868 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5869 /* 5870 * Check that the ipif matching the requested source 5871 * address still exists. 5872 */ 5873 src_ipif = 5874 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5875 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5876 } 5877 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5878 ip6_asp_table_held = B_TRUE; 5879 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5880 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5881 } 5882 5883 if (src_ipif == NULL) { 5884 if (!unspec_src) { 5885 if (ip_debug > 2) { 5886 /* ip1dbg */ 5887 pr_addr_dbg("ip_newroute_ipif_v6: " 5888 "no src for dst %s\n,", 5889 AF_INET6, v6dstp); 5890 printf(" through interface %s\n", 5891 dst_ill->ill_name); 5892 } 5893 goto err_ret; 5894 } 5895 src_ipif = ipif; 5896 ipif_refhold(src_ipif); 5897 } 5898 ire = ipif_to_ire_v6(ipif); 5899 if (ire == NULL) { 5900 if (ip_debug > 2) { 5901 /* ip1dbg */ 5902 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5903 AF_INET6, &ipif->ipif_v6lcl_addr); 5904 printf("ip_newroute_ipif_v6: " 5905 "if %s\n", dst_ill->ill_name); 5906 } 5907 goto err_ret; 5908 } 5909 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5910 goto err_ret; 5911 5912 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5913 5914 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5915 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5916 if (ip_debug > 2) { 5917 /* ip1dbg */ 5918 pr_addr_dbg(" address %s\n", 5919 AF_INET6, &ire->ire_src_addr_v6); 5920 } 5921 save_ire = ire; 5922 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5923 (void *)ire, (void *)ipif)); 5924 5925 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5926 /* 5927 * an IRE_OFFSUBET was looked up 5928 * on that interface. 5929 * this ire has RTF_MULTIRT flag, 5930 * so the resolution loop 5931 * will be re-entered to resolve 5932 * additional routes on other 5933 * interfaces. For that purpose, 5934 * a copy of the packet is 5935 * made at this point. 5936 */ 5937 fire->ire_last_used_time = lbolt; 5938 copy_mp = copymsg(first_mp); 5939 if (copy_mp) { 5940 MULTIRT_DEBUG_TAG(copy_mp); 5941 } 5942 } 5943 5944 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5945 switch (ire->ire_type) { 5946 case IRE_IF_NORESOLVER: { 5947 /* 5948 * We have what we need to build an IRE_CACHE. 5949 * 5950 * handle the Gated case, where we create 5951 * a NORESOLVER route for loopback. 5952 */ 5953 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5954 break; 5955 /* 5956 * The newly created ire will inherit the flags of the 5957 * parent ire, if any. 5958 */ 5959 ire = ire_create_v6( 5960 v6dstp, /* dest address */ 5961 &ipv6_all_ones, /* mask */ 5962 &src_ipif->ipif_v6src_addr, /* source address */ 5963 NULL, /* gateway address */ 5964 &save_ire->ire_max_frag, 5965 NULL, /* no src nce */ 5966 dst_ill->ill_rq, /* recv-from queue */ 5967 dst_ill->ill_wq, /* send-to queue */ 5968 IRE_CACHE, 5969 src_ipif, 5970 NULL, 5971 (fire != NULL) ? /* Parent handle */ 5972 fire->ire_phandle : 0, 5973 save_ire->ire_ihandle, /* Interface handle */ 5974 (fire != NULL) ? 5975 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5976 0, 5977 &ire_uinfo_null, 5978 NULL, 5979 NULL, 5980 ipst); 5981 5982 if (ire == NULL) { 5983 ire_refrele(save_ire); 5984 break; 5985 } 5986 5987 ire->ire_marks |= ire_marks; 5988 5989 err = ndp_noresolver(dst_ill, v6dstp); 5990 if (err != 0) { 5991 ire_refrele(save_ire); 5992 break; 5993 } 5994 5995 /* Prevent save_ire from getting deleted */ 5996 IRB_REFHOLD(save_ire->ire_bucket); 5997 /* Has it been removed already ? */ 5998 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5999 IRB_REFRELE(save_ire->ire_bucket); 6000 ire_refrele(save_ire); 6001 break; 6002 } 6003 6004 ire_add_then_send(q, ire, first_mp); 6005 if (ip6_asp_table_held) { 6006 ip6_asp_table_refrele(ipst); 6007 ip6_asp_table_held = B_FALSE; 6008 } 6009 6010 /* Assert that it is not deleted yet. */ 6011 ASSERT(save_ire->ire_ptpn != NULL); 6012 IRB_REFRELE(save_ire->ire_bucket); 6013 ire_refrele(save_ire); 6014 if (fire != NULL) { 6015 ire_refrele(fire); 6016 fire = NULL; 6017 } 6018 6019 /* 6020 * The resolution loop is re-entered if we 6021 * actually are in a multirouting case. 6022 */ 6023 if (copy_mp != NULL) { 6024 boolean_t need_resolve = 6025 ire_multirt_need_resolve_v6(v6dstp, 6026 MBLK_GETLABEL(copy_mp), ipst); 6027 if (!need_resolve) { 6028 MULTIRT_DEBUG_UNTAG(copy_mp); 6029 freemsg(copy_mp); 6030 copy_mp = NULL; 6031 } else { 6032 /* 6033 * ipif_lookup_group_v6() calls 6034 * ire_lookup_multi_v6() that uses 6035 * ire_ftable_lookup_v6() to find 6036 * an IRE_INTERFACE for the group. 6037 * In the multirt case, 6038 * ire_lookup_multi_v6() then invokes 6039 * ire_multirt_lookup_v6() to find 6040 * the next resolvable ire. 6041 * As a result, we obtain a new 6042 * interface, derived from the 6043 * next ire. 6044 */ 6045 if (ipif_held) { 6046 ipif_refrele(ipif); 6047 ipif_held = B_FALSE; 6048 } 6049 ipif = ipif_lookup_group_v6(v6dstp, 6050 zoneid, ipst); 6051 ip2dbg(("ip_newroute_ipif: " 6052 "multirt dst %08x, ipif %p\n", 6053 ntohl(V4_PART_OF_V6((*v6dstp))), 6054 (void *)ipif)); 6055 if (ipif != NULL) { 6056 ipif_held = B_TRUE; 6057 mp = copy_mp; 6058 copy_mp = NULL; 6059 multirt_resolve_next = 6060 B_TRUE; 6061 continue; 6062 } else { 6063 freemsg(copy_mp); 6064 } 6065 } 6066 } 6067 ill_refrele(dst_ill); 6068 if (ipif_held) { 6069 ipif_refrele(ipif); 6070 ipif_held = B_FALSE; 6071 } 6072 if (src_ipif != NULL) 6073 ipif_refrele(src_ipif); 6074 return; 6075 } 6076 case IRE_IF_RESOLVER: { 6077 6078 ASSERT(dst_ill->ill_isv6); 6079 6080 /* 6081 * We obtain a partial IRE_CACHE which we will pass 6082 * along with the resolver query. When the response 6083 * comes back it will be there ready for us to add. 6084 */ 6085 /* 6086 * the newly created ire will inherit the flags of the 6087 * parent ire, if any. 6088 */ 6089 ire = ire_create_v6( 6090 v6dstp, /* dest address */ 6091 &ipv6_all_ones, /* mask */ 6092 &src_ipif->ipif_v6src_addr, /* source address */ 6093 NULL, /* gateway address */ 6094 &save_ire->ire_max_frag, 6095 NULL, /* src nce */ 6096 dst_ill->ill_rq, /* recv-from queue */ 6097 dst_ill->ill_wq, /* send-to queue */ 6098 IRE_CACHE, 6099 src_ipif, 6100 NULL, 6101 (fire != NULL) ? /* Parent handle */ 6102 fire->ire_phandle : 0, 6103 save_ire->ire_ihandle, /* Interface handle */ 6104 (fire != NULL) ? 6105 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6106 0, 6107 &ire_uinfo_null, 6108 NULL, 6109 NULL, 6110 ipst); 6111 6112 if (ire == NULL) { 6113 ire_refrele(save_ire); 6114 break; 6115 } 6116 6117 ire->ire_marks |= ire_marks; 6118 6119 /* Resolve and add ire to the ctable */ 6120 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6121 switch (err) { 6122 case 0: 6123 /* Prevent save_ire from getting deleted */ 6124 IRB_REFHOLD(save_ire->ire_bucket); 6125 /* Has it been removed already ? */ 6126 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6127 IRB_REFRELE(save_ire->ire_bucket); 6128 ire_refrele(save_ire); 6129 break; 6130 } 6131 /* 6132 * We have a resolved cache entry, 6133 * add in the IRE. 6134 */ 6135 ire_add_then_send(q, ire, first_mp); 6136 if (ip6_asp_table_held) { 6137 ip6_asp_table_refrele(ipst); 6138 ip6_asp_table_held = B_FALSE; 6139 } 6140 6141 /* Assert that it is not deleted yet. */ 6142 ASSERT(save_ire->ire_ptpn != NULL); 6143 IRB_REFRELE(save_ire->ire_bucket); 6144 ire_refrele(save_ire); 6145 if (fire != NULL) { 6146 ire_refrele(fire); 6147 fire = NULL; 6148 } 6149 6150 /* 6151 * The resolution loop is re-entered if we 6152 * actually are in a multirouting case. 6153 */ 6154 if (copy_mp != NULL) { 6155 boolean_t need_resolve = 6156 ire_multirt_need_resolve_v6(v6dstp, 6157 MBLK_GETLABEL(copy_mp), ipst); 6158 if (!need_resolve) { 6159 MULTIRT_DEBUG_UNTAG(copy_mp); 6160 freemsg(copy_mp); 6161 copy_mp = NULL; 6162 } else { 6163 /* 6164 * ipif_lookup_group_v6() calls 6165 * ire_lookup_multi_v6() that 6166 * uses ire_ftable_lookup_v6() 6167 * to find an IRE_INTERFACE for 6168 * the group. In the multirt 6169 * case, ire_lookup_multi_v6() 6170 * then invokes 6171 * ire_multirt_lookup_v6() to 6172 * find the next resolvable ire. 6173 * As a result, we obtain a new 6174 * interface, derived from the 6175 * next ire. 6176 */ 6177 if (ipif_held) { 6178 ipif_refrele(ipif); 6179 ipif_held = B_FALSE; 6180 } 6181 ipif = ipif_lookup_group_v6( 6182 v6dstp, zoneid, ipst); 6183 ip2dbg(("ip_newroute_ipif: " 6184 "multirt dst %08x, " 6185 "ipif %p\n", 6186 ntohl(V4_PART_OF_V6( 6187 (*v6dstp))), 6188 (void *)ipif)); 6189 if (ipif != NULL) { 6190 ipif_held = B_TRUE; 6191 mp = copy_mp; 6192 copy_mp = NULL; 6193 multirt_resolve_next = 6194 B_TRUE; 6195 continue; 6196 } else { 6197 freemsg(copy_mp); 6198 } 6199 } 6200 } 6201 ill_refrele(dst_ill); 6202 if (ipif_held) { 6203 ipif_refrele(ipif); 6204 ipif_held = B_FALSE; 6205 } 6206 if (src_ipif != NULL) 6207 ipif_refrele(src_ipif); 6208 return; 6209 6210 case EINPROGRESS: 6211 /* 6212 * mp was consumed - presumably queued. 6213 * No need for ire, presumably resolution is 6214 * in progress, and ire will be added when the 6215 * address is resolved. 6216 */ 6217 if (ip6_asp_table_held) { 6218 ip6_asp_table_refrele(ipst); 6219 ip6_asp_table_held = B_FALSE; 6220 } 6221 ire_delete(ire); 6222 ire_refrele(save_ire); 6223 if (fire != NULL) { 6224 ire_refrele(fire); 6225 fire = NULL; 6226 } 6227 6228 /* 6229 * The resolution loop is re-entered if we 6230 * actually are in a multirouting case. 6231 */ 6232 if (copy_mp != NULL) { 6233 boolean_t need_resolve = 6234 ire_multirt_need_resolve_v6(v6dstp, 6235 MBLK_GETLABEL(copy_mp), ipst); 6236 if (!need_resolve) { 6237 MULTIRT_DEBUG_UNTAG(copy_mp); 6238 freemsg(copy_mp); 6239 copy_mp = NULL; 6240 } else { 6241 /* 6242 * ipif_lookup_group_v6() calls 6243 * ire_lookup_multi_v6() that 6244 * uses ire_ftable_lookup_v6() 6245 * to find an IRE_INTERFACE for 6246 * the group. In the multirt 6247 * case, ire_lookup_multi_v6() 6248 * then invokes 6249 * ire_multirt_lookup_v6() to 6250 * find the next resolvable ire. 6251 * As a result, we obtain a new 6252 * interface, derived from the 6253 * next ire. 6254 */ 6255 if (ipif_held) { 6256 ipif_refrele(ipif); 6257 ipif_held = B_FALSE; 6258 } 6259 ipif = ipif_lookup_group_v6( 6260 v6dstp, zoneid, ipst); 6261 ip2dbg(("ip_newroute_ipif: " 6262 "multirt dst %08x, " 6263 "ipif %p\n", 6264 ntohl(V4_PART_OF_V6( 6265 (*v6dstp))), 6266 (void *)ipif)); 6267 if (ipif != NULL) { 6268 ipif_held = B_TRUE; 6269 mp = copy_mp; 6270 copy_mp = NULL; 6271 multirt_resolve_next = 6272 B_TRUE; 6273 continue; 6274 } else { 6275 freemsg(copy_mp); 6276 } 6277 } 6278 } 6279 ill_refrele(dst_ill); 6280 if (ipif_held) { 6281 ipif_refrele(ipif); 6282 ipif_held = B_FALSE; 6283 } 6284 if (src_ipif != NULL) 6285 ipif_refrele(src_ipif); 6286 return; 6287 default: 6288 /* Some transient error */ 6289 ire_refrele(save_ire); 6290 break; 6291 } 6292 break; 6293 } 6294 default: 6295 break; 6296 } 6297 if (ip6_asp_table_held) { 6298 ip6_asp_table_refrele(ipst); 6299 ip6_asp_table_held = B_FALSE; 6300 } 6301 } while (multirt_resolve_next); 6302 6303 err_ret: 6304 if (ip6_asp_table_held) 6305 ip6_asp_table_refrele(ipst); 6306 if (ire != NULL) 6307 ire_refrele(ire); 6308 if (fire != NULL) 6309 ire_refrele(fire); 6310 if (ipif != NULL && ipif_held) 6311 ipif_refrele(ipif); 6312 if (src_ipif != NULL) 6313 ipif_refrele(src_ipif); 6314 /* Multicast - no point in trying to generate ICMP error */ 6315 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6316 if (dst_ill != NULL) { 6317 ill = dst_ill; 6318 ill_held = B_TRUE; 6319 } 6320 if (mp->b_prev || mp->b_next) { 6321 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6322 } else { 6323 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6324 } 6325 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6326 mp->b_next = NULL; 6327 mp->b_prev = NULL; 6328 freemsg(first_mp); 6329 if (ill_held) 6330 ill_refrele(ill); 6331 } 6332 6333 /* 6334 * Parse and process any hop-by-hop or destination options. 6335 * 6336 * Assumes that q is an ill read queue so that ICMP errors for link-local 6337 * destinations are sent out the correct interface. 6338 * 6339 * Returns -1 if there was an error and mp has been consumed. 6340 * Returns 0 if no special action is needed. 6341 * Returns 1 if the packet contained a router alert option for this node 6342 * which is verified to be "interesting/known" for our implementation. 6343 * 6344 * XXX Note: In future as more hbh or dest options are defined, 6345 * it may be better to have different routines for hbh and dest 6346 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6347 * may have same value in different namespaces. Or is it same namespace ?? 6348 * Current code checks for each opt_type (other than pads) if it is in 6349 * the expected nexthdr (hbh or dest) 6350 */ 6351 static int 6352 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6353 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6354 { 6355 uint8_t opt_type; 6356 uint_t optused; 6357 int ret = 0; 6358 mblk_t *first_mp; 6359 const char *errtype; 6360 zoneid_t zoneid; 6361 ill_t *ill = q->q_ptr; 6362 6363 first_mp = mp; 6364 if (mp->b_datap->db_type == M_CTL) { 6365 mp = mp->b_cont; 6366 } 6367 6368 while (optlen != 0) { 6369 opt_type = *optptr; 6370 if (opt_type == IP6OPT_PAD1) { 6371 optused = 1; 6372 } else { 6373 if (optlen < 2) 6374 goto bad_opt; 6375 errtype = "malformed"; 6376 if (opt_type == ip6opt_ls) { 6377 optused = 2 + optptr[1]; 6378 if (optused > optlen) 6379 goto bad_opt; 6380 } else switch (opt_type) { 6381 case IP6OPT_PADN: 6382 /* 6383 * Note:We don't verify that (N-2) pad octets 6384 * are zero as required by spec. Adhere to 6385 * "be liberal in what you accept..." part of 6386 * implementation philosophy (RFC791,RFC1122) 6387 */ 6388 optused = 2 + optptr[1]; 6389 if (optused > optlen) 6390 goto bad_opt; 6391 break; 6392 6393 case IP6OPT_JUMBO: 6394 if (hdr_type != IPPROTO_HOPOPTS) 6395 goto opt_error; 6396 goto opt_error; /* XXX Not implemented! */ 6397 6398 case IP6OPT_ROUTER_ALERT: { 6399 struct ip6_opt_router *or; 6400 6401 if (hdr_type != IPPROTO_HOPOPTS) 6402 goto opt_error; 6403 optused = 2 + optptr[1]; 6404 if (optused > optlen) 6405 goto bad_opt; 6406 or = (struct ip6_opt_router *)optptr; 6407 /* Check total length and alignment */ 6408 if (optused != sizeof (*or) || 6409 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6410 goto opt_error; 6411 /* Check value */ 6412 switch (*((uint16_t *)or->ip6or_value)) { 6413 case IP6_ALERT_MLD: 6414 case IP6_ALERT_RSVP: 6415 ret = 1; 6416 } 6417 break; 6418 } 6419 case IP6OPT_HOME_ADDRESS: { 6420 /* 6421 * Minimal support for the home address option 6422 * (which is required by all IPv6 nodes). 6423 * Implement by just swapping the home address 6424 * and source address. 6425 * XXX Note: this has IPsec implications since 6426 * AH needs to take this into account. 6427 * Also, when IPsec is used we need to ensure 6428 * that this is only processed once 6429 * in the received packet (to avoid swapping 6430 * back and forth). 6431 * NOTE:This option processing is considered 6432 * to be unsafe and prone to a denial of 6433 * service attack. 6434 * The current processing is not safe even with 6435 * IPsec secured IP packets. Since the home 6436 * address option processing requirement still 6437 * is in the IETF draft and in the process of 6438 * being redefined for its usage, it has been 6439 * decided to turn off the option by default. 6440 * If this section of code needs to be executed, 6441 * ndd variable ip6_ignore_home_address_opt 6442 * should be set to 0 at the user's own risk. 6443 */ 6444 struct ip6_opt_home_address *oh; 6445 in6_addr_t tmp; 6446 6447 if (ipst->ips_ipv6_ignore_home_address_opt) 6448 goto opt_error; 6449 6450 if (hdr_type != IPPROTO_DSTOPTS) 6451 goto opt_error; 6452 optused = 2 + optptr[1]; 6453 if (optused > optlen) 6454 goto bad_opt; 6455 6456 /* 6457 * We did this dest. opt the first time 6458 * around (i.e. before AH processing). 6459 * If we've done AH... stop now. 6460 */ 6461 if (first_mp != mp) { 6462 ipsec_in_t *ii; 6463 6464 ii = (ipsec_in_t *)first_mp->b_rptr; 6465 if (ii->ipsec_in_ah_sa != NULL) 6466 break; 6467 } 6468 6469 oh = (struct ip6_opt_home_address *)optptr; 6470 /* Check total length and alignment */ 6471 if (optused < sizeof (*oh) || 6472 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6473 goto opt_error; 6474 /* Swap ip6_src and the home address */ 6475 tmp = ip6h->ip6_src; 6476 /* XXX Note: only 8 byte alignment option */ 6477 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6478 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6479 break; 6480 } 6481 6482 case IP6OPT_TUNNEL_LIMIT: 6483 if (hdr_type != IPPROTO_DSTOPTS) { 6484 goto opt_error; 6485 } 6486 optused = 2 + optptr[1]; 6487 if (optused > optlen) { 6488 goto bad_opt; 6489 } 6490 if (optused != 3) { 6491 goto opt_error; 6492 } 6493 break; 6494 6495 default: 6496 errtype = "unknown"; 6497 /* FALLTHROUGH */ 6498 opt_error: 6499 /* Determine which zone should send error */ 6500 zoneid = ipif_lookup_addr_zoneid_v6( 6501 &ip6h->ip6_dst, ill, ipst); 6502 switch (IP6OPT_TYPE(opt_type)) { 6503 case IP6OPT_TYPE_SKIP: 6504 optused = 2 + optptr[1]; 6505 if (optused > optlen) 6506 goto bad_opt; 6507 ip1dbg(("ip_process_options_v6: %s " 6508 "opt 0x%x skipped\n", 6509 errtype, opt_type)); 6510 break; 6511 case IP6OPT_TYPE_DISCARD: 6512 ip1dbg(("ip_process_options_v6: %s " 6513 "opt 0x%x; packet dropped\n", 6514 errtype, opt_type)); 6515 freemsg(first_mp); 6516 return (-1); 6517 case IP6OPT_TYPE_ICMP: 6518 if (zoneid == ALL_ZONES) { 6519 freemsg(first_mp); 6520 return (-1); 6521 } 6522 icmp_param_problem_v6(WR(q), first_mp, 6523 ICMP6_PARAMPROB_OPTION, 6524 (uint32_t)(optptr - 6525 (uint8_t *)ip6h), 6526 B_FALSE, B_FALSE, zoneid, ipst); 6527 return (-1); 6528 case IP6OPT_TYPE_FORCEICMP: 6529 if (zoneid == ALL_ZONES) { 6530 freemsg(first_mp); 6531 return (-1); 6532 } 6533 icmp_param_problem_v6(WR(q), first_mp, 6534 ICMP6_PARAMPROB_OPTION, 6535 (uint32_t)(optptr - 6536 (uint8_t *)ip6h), 6537 B_FALSE, B_TRUE, zoneid, ipst); 6538 return (-1); 6539 default: 6540 ASSERT(0); 6541 } 6542 } 6543 } 6544 optlen -= optused; 6545 optptr += optused; 6546 } 6547 return (ret); 6548 6549 bad_opt: 6550 /* Determine which zone should send error */ 6551 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6552 if (zoneid == ALL_ZONES) { 6553 freemsg(first_mp); 6554 } else { 6555 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6556 (uint32_t)(optptr - (uint8_t *)ip6h), 6557 B_FALSE, B_FALSE, zoneid, ipst); 6558 } 6559 return (-1); 6560 } 6561 6562 /* 6563 * Process a routing header that is not yet empty. 6564 * Only handles type 0 routing headers. 6565 */ 6566 static void 6567 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6568 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6569 { 6570 ip6_rthdr0_t *rthdr; 6571 uint_t ehdrlen; 6572 uint_t numaddr; 6573 in6_addr_t *addrptr; 6574 in6_addr_t tmp; 6575 ip_stack_t *ipst = ill->ill_ipst; 6576 6577 ASSERT(rth->ip6r_segleft != 0); 6578 6579 if (!ipst->ips_ipv6_forward_src_routed) { 6580 /* XXX Check for source routed out same interface? */ 6581 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6582 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6583 freemsg(hada_mp); 6584 freemsg(mp); 6585 return; 6586 } 6587 6588 if (rth->ip6r_type != 0) { 6589 if (hada_mp != NULL) 6590 goto hada_drop; 6591 /* Sent by forwarding path, and router is global zone */ 6592 icmp_param_problem_v6(WR(q), mp, 6593 ICMP6_PARAMPROB_HEADER, 6594 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6595 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6596 return; 6597 } 6598 rthdr = (ip6_rthdr0_t *)rth; 6599 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6600 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6601 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6602 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6603 if (rthdr->ip6r0_len & 0x1) { 6604 /* An odd length is impossible */ 6605 if (hada_mp != NULL) 6606 goto hada_drop; 6607 /* Sent by forwarding path, and router is global zone */ 6608 icmp_param_problem_v6(WR(q), mp, 6609 ICMP6_PARAMPROB_HEADER, 6610 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6611 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6612 return; 6613 } 6614 numaddr = rthdr->ip6r0_len / 2; 6615 if (rthdr->ip6r0_segleft > numaddr) { 6616 /* segleft exceeds number of addresses in routing header */ 6617 if (hada_mp != NULL) 6618 goto hada_drop; 6619 /* Sent by forwarding path, and router is global zone */ 6620 icmp_param_problem_v6(WR(q), mp, 6621 ICMP6_PARAMPROB_HEADER, 6622 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6623 (uchar_t *)ip6h), 6624 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6625 return; 6626 } 6627 addrptr += (numaddr - rthdr->ip6r0_segleft); 6628 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6629 IN6_IS_ADDR_MULTICAST(addrptr)) { 6630 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6631 freemsg(hada_mp); 6632 freemsg(mp); 6633 return; 6634 } 6635 /* Swap */ 6636 tmp = *addrptr; 6637 *addrptr = ip6h->ip6_dst; 6638 ip6h->ip6_dst = tmp; 6639 rthdr->ip6r0_segleft--; 6640 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6641 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6642 if (hada_mp != NULL) 6643 goto hada_drop; 6644 /* Sent by forwarding path, and router is global zone */ 6645 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6646 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6647 return; 6648 } 6649 if (ip_check_v6_mblk(mp, ill) == 0) { 6650 ip6h = (ip6_t *)mp->b_rptr; 6651 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6652 } 6653 return; 6654 hada_drop: 6655 /* IPsec kstats: bean counter? */ 6656 freemsg(hada_mp); 6657 freemsg(mp); 6658 } 6659 6660 /* 6661 * Read side put procedure for IPv6 module. 6662 */ 6663 void 6664 ip_rput_v6(queue_t *q, mblk_t *mp) 6665 { 6666 mblk_t *first_mp; 6667 mblk_t *hada_mp = NULL; 6668 ip6_t *ip6h; 6669 boolean_t ll_multicast = B_FALSE; 6670 boolean_t mctl_present = B_FALSE; 6671 ill_t *ill; 6672 struct iocblk *iocp; 6673 uint_t flags = 0; 6674 mblk_t *dl_mp; 6675 ip_stack_t *ipst; 6676 6677 ill = (ill_t *)q->q_ptr; 6678 ipst = ill->ill_ipst; 6679 if (ill->ill_state_flags & ILL_CONDEMNED) { 6680 union DL_primitives *dl; 6681 6682 dl = (union DL_primitives *)mp->b_rptr; 6683 /* 6684 * Things are opening or closing - only accept DLPI 6685 * ack messages. If the stream is closing and ip_wsrv 6686 * has completed, ip_close is out of the qwait, but has 6687 * not yet completed qprocsoff. Don't proceed any further 6688 * because the ill has been cleaned up and things hanging 6689 * off the ill have been freed. 6690 */ 6691 if ((mp->b_datap->db_type != M_PCPROTO) || 6692 (dl->dl_primitive == DL_UNITDATA_IND)) { 6693 inet_freemsg(mp); 6694 return; 6695 } 6696 } 6697 6698 dl_mp = NULL; 6699 switch (mp->b_datap->db_type) { 6700 case M_DATA: { 6701 int hlen; 6702 uchar_t *ucp; 6703 struct ether_header *eh; 6704 dl_unitdata_ind_t *dui; 6705 6706 /* 6707 * This is a work-around for CR 6451644, a bug in Nemo. It 6708 * should be removed when that problem is fixed. 6709 */ 6710 if (ill->ill_mactype == DL_ETHER && 6711 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6712 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6713 ucp[-2] == (IP6_DL_SAP >> 8)) { 6714 if (hlen >= sizeof (struct ether_vlan_header) && 6715 ucp[-5] == 0 && ucp[-6] == 0x81) 6716 ucp -= sizeof (struct ether_vlan_header); 6717 else 6718 ucp -= sizeof (struct ether_header); 6719 /* 6720 * If it's a group address, then fabricate a 6721 * DL_UNITDATA_IND message. 6722 */ 6723 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6724 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6725 BPRI_HI)) != NULL) { 6726 eh = (struct ether_header *)ucp; 6727 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6728 DB_TYPE(dl_mp) = M_PROTO; 6729 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6730 dui->dl_primitive = DL_UNITDATA_IND; 6731 dui->dl_dest_addr_length = 8; 6732 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6733 dui->dl_src_addr_length = 8; 6734 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6735 8; 6736 dui->dl_group_address = 1; 6737 ucp = (uchar_t *)(dui + 1); 6738 if (ill->ill_sap_length > 0) 6739 ucp += ill->ill_sap_length; 6740 bcopy(&eh->ether_dhost, ucp, 6); 6741 bcopy(&eh->ether_shost, ucp + 8, 6); 6742 ucp = (uchar_t *)(dui + 1); 6743 if (ill->ill_sap_length < 0) 6744 ucp += 8 + ill->ill_sap_length; 6745 bcopy(&eh->ether_type, ucp, 2); 6746 bcopy(&eh->ether_type, ucp + 8, 2); 6747 } 6748 } 6749 break; 6750 } 6751 6752 case M_PROTO: 6753 case M_PCPROTO: 6754 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6755 DL_UNITDATA_IND) { 6756 /* Go handle anything other than data elsewhere. */ 6757 ip_rput_dlpi(q, mp); 6758 return; 6759 } 6760 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6761 ll_multicast = dlur->dl_group_address; 6762 #undef dlur 6763 /* Save the DLPI header. */ 6764 dl_mp = mp; 6765 mp = mp->b_cont; 6766 dl_mp->b_cont = NULL; 6767 break; 6768 case M_BREAK: 6769 panic("ip_rput_v6: got an M_BREAK"); 6770 /*NOTREACHED*/ 6771 case M_IOCACK: 6772 iocp = (struct iocblk *)mp->b_rptr; 6773 switch (iocp->ioc_cmd) { 6774 case DL_IOC_HDR_INFO: 6775 ill = (ill_t *)q->q_ptr; 6776 ill_fastpath_ack(ill, mp); 6777 return; 6778 6779 case SIOCGTUNPARAM: 6780 case OSIOCGTUNPARAM: 6781 ip_rput_other(NULL, q, mp, NULL); 6782 return; 6783 6784 case SIOCSTUNPARAM: 6785 case OSIOCSTUNPARAM: 6786 /* Go through qwriter */ 6787 break; 6788 default: 6789 putnext(q, mp); 6790 return; 6791 } 6792 /* FALLTHRU */ 6793 case M_ERROR: 6794 case M_HANGUP: 6795 mutex_enter(&ill->ill_lock); 6796 if (ill->ill_state_flags & ILL_CONDEMNED) { 6797 mutex_exit(&ill->ill_lock); 6798 freemsg(mp); 6799 return; 6800 } 6801 ill_refhold_locked(ill); 6802 mutex_exit(&ill->ill_lock); 6803 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6804 return; 6805 case M_CTL: 6806 if ((MBLKL(mp) > sizeof (int)) && 6807 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6808 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6809 mctl_present = B_TRUE; 6810 break; 6811 } 6812 putnext(q, mp); 6813 return; 6814 case M_IOCNAK: 6815 iocp = (struct iocblk *)mp->b_rptr; 6816 switch (iocp->ioc_cmd) { 6817 case DL_IOC_HDR_INFO: 6818 case SIOCGTUNPARAM: 6819 case OSIOCGTUNPARAM: 6820 ip_rput_other(NULL, q, mp, NULL); 6821 return; 6822 6823 case SIOCSTUNPARAM: 6824 case OSIOCSTUNPARAM: 6825 mutex_enter(&ill->ill_lock); 6826 if (ill->ill_state_flags & ILL_CONDEMNED) { 6827 mutex_exit(&ill->ill_lock); 6828 freemsg(mp); 6829 return; 6830 } 6831 ill_refhold_locked(ill); 6832 mutex_exit(&ill->ill_lock); 6833 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6834 return; 6835 default: 6836 break; 6837 } 6838 /* FALLTHRU */ 6839 default: 6840 putnext(q, mp); 6841 return; 6842 } 6843 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6844 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6845 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6846 /* 6847 * if db_ref > 1 then copymsg and free original. Packet may be 6848 * changed and do not want other entity who has a reference to this 6849 * message to trip over the changes. This is a blind change because 6850 * trying to catch all places that might change packet is too 6851 * difficult (since it may be a module above this one). 6852 */ 6853 if (mp->b_datap->db_ref > 1) { 6854 mblk_t *mp1; 6855 6856 mp1 = copymsg(mp); 6857 freemsg(mp); 6858 if (mp1 == NULL) { 6859 first_mp = NULL; 6860 goto discard; 6861 } 6862 mp = mp1; 6863 } 6864 first_mp = mp; 6865 if (mctl_present) { 6866 hada_mp = first_mp; 6867 mp = first_mp->b_cont; 6868 } 6869 6870 if (ip_check_v6_mblk(mp, ill) == -1) 6871 return; 6872 6873 ip6h = (ip6_t *)mp->b_rptr; 6874 6875 DTRACE_PROBE4(ip6__physical__in__start, 6876 ill_t *, ill, ill_t *, NULL, 6877 ip6_t *, ip6h, mblk_t *, first_mp); 6878 6879 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6880 ipst->ips_ipv6firewall_physical_in, 6881 ill, NULL, ip6h, first_mp, mp, ipst); 6882 6883 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6884 6885 if (first_mp == NULL) 6886 return; 6887 6888 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6889 IPV6_DEFAULT_VERS_AND_FLOW) { 6890 /* 6891 * It may be a bit too expensive to do this mapped address 6892 * check here, but in the interest of robustness, it seems 6893 * like the correct place. 6894 * TODO: Avoid this check for e.g. connected TCP sockets 6895 */ 6896 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6897 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6898 goto discard; 6899 } 6900 6901 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6902 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6903 goto discard; 6904 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6905 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6906 goto discard; 6907 } 6908 6909 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6910 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6911 } else { 6912 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6913 goto discard; 6914 } 6915 freemsg(dl_mp); 6916 return; 6917 6918 discard: 6919 if (dl_mp != NULL) 6920 freeb(dl_mp); 6921 freemsg(first_mp); 6922 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6923 } 6924 6925 /* 6926 * Walk through the IPv6 packet in mp and see if there's an AH header 6927 * in it. See if the AH header needs to get done before other headers in 6928 * the packet. (Worker function for ipsec_early_ah_v6().) 6929 */ 6930 #define IPSEC_HDR_DONT_PROCESS 0 6931 #define IPSEC_HDR_PROCESS 1 6932 #define IPSEC_MEMORY_ERROR 2 6933 static int 6934 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6935 { 6936 uint_t length; 6937 uint_t ehdrlen; 6938 uint8_t *whereptr; 6939 uint8_t *endptr; 6940 uint8_t *nexthdrp; 6941 ip6_dest_t *desthdr; 6942 ip6_rthdr_t *rthdr; 6943 ip6_t *ip6h; 6944 6945 /* 6946 * For now just pullup everything. In general, the less pullups, 6947 * the better, but there's so much squirrelling through anyway, 6948 * it's just easier this way. 6949 */ 6950 if (!pullupmsg(mp, -1)) { 6951 return (IPSEC_MEMORY_ERROR); 6952 } 6953 6954 ip6h = (ip6_t *)mp->b_rptr; 6955 length = IPV6_HDR_LEN; 6956 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6957 endptr = mp->b_wptr; 6958 6959 /* 6960 * We can't just use the argument nexthdr in the place 6961 * of nexthdrp becaue we don't dereference nexthdrp 6962 * till we confirm whether it is a valid address. 6963 */ 6964 nexthdrp = &ip6h->ip6_nxt; 6965 while (whereptr < endptr) { 6966 /* Is there enough left for len + nexthdr? */ 6967 if (whereptr + MIN_EHDR_LEN > endptr) 6968 return (IPSEC_MEMORY_ERROR); 6969 6970 switch (*nexthdrp) { 6971 case IPPROTO_HOPOPTS: 6972 case IPPROTO_DSTOPTS: 6973 /* Assumes the headers are identical for hbh and dst */ 6974 desthdr = (ip6_dest_t *)whereptr; 6975 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6976 if ((uchar_t *)desthdr + ehdrlen > endptr) 6977 return (IPSEC_MEMORY_ERROR); 6978 /* 6979 * Return DONT_PROCESS because the destination 6980 * options header may be for each hop in a 6981 * routing-header, and we only want AH if we're 6982 * finished with routing headers. 6983 */ 6984 if (*nexthdrp == IPPROTO_DSTOPTS) 6985 return (IPSEC_HDR_DONT_PROCESS); 6986 nexthdrp = &desthdr->ip6d_nxt; 6987 break; 6988 case IPPROTO_ROUTING: 6989 rthdr = (ip6_rthdr_t *)whereptr; 6990 6991 /* 6992 * If there's more hops left on the routing header, 6993 * return now with DON'T PROCESS. 6994 */ 6995 if (rthdr->ip6r_segleft > 0) 6996 return (IPSEC_HDR_DONT_PROCESS); 6997 6998 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6999 if ((uchar_t *)rthdr + ehdrlen > endptr) 7000 return (IPSEC_MEMORY_ERROR); 7001 nexthdrp = &rthdr->ip6r_nxt; 7002 break; 7003 case IPPROTO_FRAGMENT: 7004 /* Wait for reassembly */ 7005 return (IPSEC_HDR_DONT_PROCESS); 7006 case IPPROTO_AH: 7007 *nexthdr = IPPROTO_AH; 7008 return (IPSEC_HDR_PROCESS); 7009 case IPPROTO_NONE: 7010 /* No next header means we're finished */ 7011 default: 7012 return (IPSEC_HDR_DONT_PROCESS); 7013 } 7014 length += ehdrlen; 7015 whereptr += ehdrlen; 7016 } 7017 panic("ipsec_needs_processing_v6"); 7018 /*NOTREACHED*/ 7019 } 7020 7021 /* 7022 * Path for AH if options are present. If this is the first time we are 7023 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7024 * Otherwise, just fanout. Return value answers the boolean question: 7025 * "Did I consume the mblk you sent me?" 7026 * 7027 * Sometimes AH needs to be done before other IPv6 headers for security 7028 * reasons. This function (and its ipsec_needs_processing_v6() above) 7029 * indicates if that is so, and fans out to the appropriate IPsec protocol 7030 * for the datagram passed in. 7031 */ 7032 static boolean_t 7033 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7034 ill_t *ill, mblk_t *hada_mp, zoneid_t zoneid) 7035 { 7036 mblk_t *mp; 7037 uint8_t nexthdr; 7038 ipsec_in_t *ii = NULL; 7039 ah_t *ah; 7040 ipsec_status_t ipsec_rc; 7041 ip_stack_t *ipst = ill->ill_ipst; 7042 netstack_t *ns = ipst->ips_netstack; 7043 ipsec_stack_t *ipss = ns->netstack_ipsec; 7044 7045 ASSERT((hada_mp == NULL) || (!mctl_present)); 7046 7047 switch (ipsec_needs_processing_v6( 7048 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7049 case IPSEC_MEMORY_ERROR: 7050 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7051 freemsg(hada_mp); 7052 freemsg(first_mp); 7053 return (B_TRUE); 7054 case IPSEC_HDR_DONT_PROCESS: 7055 return (B_FALSE); 7056 } 7057 7058 /* Default means send it to AH! */ 7059 ASSERT(nexthdr == IPPROTO_AH); 7060 if (!mctl_present) { 7061 mp = first_mp; 7062 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 7063 if (first_mp == NULL) { 7064 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7065 "allocation failure.\n")); 7066 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7067 freemsg(hada_mp); 7068 freemsg(mp); 7069 return (B_TRUE); 7070 } 7071 /* 7072 * Store the ill_index so that when we come back 7073 * from IPSEC we ride on the same queue. 7074 */ 7075 ii = (ipsec_in_t *)first_mp->b_rptr; 7076 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7077 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7078 first_mp->b_cont = mp; 7079 } 7080 /* 7081 * Cache hardware acceleration info. 7082 */ 7083 if (hada_mp != NULL) { 7084 ASSERT(ii != NULL); 7085 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7086 "caching data attr.\n")); 7087 ii->ipsec_in_accelerated = B_TRUE; 7088 ii->ipsec_in_da = hada_mp; 7089 } 7090 7091 if (!ipsec_loaded(ipss)) { 7092 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 7093 return (B_TRUE); 7094 } 7095 7096 ah = ipsec_inbound_ah_sa(first_mp, ns); 7097 if (ah == NULL) 7098 return (B_TRUE); 7099 ASSERT(ii->ipsec_in_ah_sa != NULL); 7100 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7101 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7102 7103 switch (ipsec_rc) { 7104 case IPSEC_STATUS_SUCCESS: 7105 /* we're done with IPsec processing, send it up */ 7106 ip_fanout_proto_again(first_mp, ill, ill, NULL); 7107 break; 7108 case IPSEC_STATUS_FAILED: 7109 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 7110 break; 7111 case IPSEC_STATUS_PENDING: 7112 /* no action needed */ 7113 break; 7114 } 7115 return (B_TRUE); 7116 } 7117 7118 /* 7119 * Validate the IPv6 mblk for alignment. 7120 */ 7121 int 7122 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7123 { 7124 int pkt_len, ip6_len; 7125 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7126 7127 /* check for alignment and full IPv6 header */ 7128 if (!OK_32PTR((uchar_t *)ip6h) || 7129 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7130 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7131 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7132 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7133 freemsg(mp); 7134 return (-1); 7135 } 7136 ip6h = (ip6_t *)mp->b_rptr; 7137 } 7138 7139 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7140 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7141 7142 if (mp->b_cont == NULL) 7143 pkt_len = mp->b_wptr - mp->b_rptr; 7144 else 7145 pkt_len = msgdsize(mp); 7146 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7147 7148 /* 7149 * Check for bogus (too short packet) and packet which 7150 * was padded by the link layer. 7151 */ 7152 if (ip6_len != pkt_len) { 7153 ssize_t diff; 7154 7155 if (ip6_len > pkt_len) { 7156 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7157 ip6_len, pkt_len)); 7158 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7159 freemsg(mp); 7160 return (-1); 7161 } 7162 diff = (ssize_t)(pkt_len - ip6_len); 7163 7164 if (!adjmsg(mp, -diff)) { 7165 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7166 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7167 freemsg(mp); 7168 return (-1); 7169 } 7170 } 7171 return (0); 7172 } 7173 7174 /* 7175 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7176 * ip_rput_v6 has already verified alignment, the min length, the version, 7177 * and db_ref = 1. 7178 * 7179 * The ill passed in (the arg named inill) is the ill that the packet 7180 * actually arrived on. We need to remember this when saving the 7181 * input interface index into potential IPV6_PKTINFO data in 7182 * ip_add_info_v6(). 7183 * 7184 * This routine doesn't free dl_mp; that's the caller's responsibility on 7185 * return. (Note that the callers are complex enough that there's no tail 7186 * recursion here anyway.) 7187 */ 7188 void 7189 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7190 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7191 { 7192 ire_t *ire = NULL; 7193 ill_t *ill = inill; 7194 ill_t *outill; 7195 ipif_t *ipif; 7196 uint8_t *whereptr; 7197 uint8_t nexthdr; 7198 uint16_t remlen; 7199 uint_t prev_nexthdr_offset; 7200 uint_t used; 7201 size_t old_pkt_len; 7202 size_t pkt_len; 7203 uint16_t ip6_len; 7204 uint_t hdr_len; 7205 boolean_t mctl_present; 7206 mblk_t *first_mp; 7207 mblk_t *first_mp1; 7208 boolean_t no_forward; 7209 ip6_hbh_t *hbhhdr; 7210 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7211 conn_t *connp; 7212 ilm_t *ilm; 7213 uint32_t ports; 7214 zoneid_t zoneid = GLOBAL_ZONEID; 7215 uint16_t hck_flags, reass_hck_flags; 7216 uint32_t reass_sum; 7217 boolean_t cksum_err; 7218 mblk_t *mp1; 7219 ip_stack_t *ipst = inill->ill_ipst; 7220 7221 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7222 7223 if (hada_mp != NULL) { 7224 /* 7225 * It's an IPsec accelerated packet. 7226 * Keep a pointer to the data attributes around until 7227 * we allocate the ipsecinfo structure. 7228 */ 7229 IPSECHW_DEBUG(IPSECHW_PKT, 7230 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7231 hada_mp->b_cont = NULL; 7232 /* 7233 * Since it is accelerated, it came directly from 7234 * the ill. 7235 */ 7236 ASSERT(mctl_present == B_FALSE); 7237 ASSERT(mp->b_datap->db_type != M_CTL); 7238 } 7239 7240 ip6h = (ip6_t *)mp->b_rptr; 7241 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7242 old_pkt_len = pkt_len = ip6_len; 7243 7244 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7245 hck_flags = DB_CKSUMFLAGS(mp); 7246 else 7247 hck_flags = 0; 7248 7249 /* Clear checksum flags in case we need to forward */ 7250 DB_CKSUMFLAGS(mp) = 0; 7251 reass_sum = reass_hck_flags = 0; 7252 7253 nexthdr = ip6h->ip6_nxt; 7254 7255 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7256 (uchar_t *)ip6h); 7257 whereptr = (uint8_t *)&ip6h[1]; 7258 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7259 7260 /* Process hop by hop header options */ 7261 if (nexthdr == IPPROTO_HOPOPTS) { 7262 uint_t ehdrlen; 7263 uint8_t *optptr; 7264 7265 if (remlen < MIN_EHDR_LEN) 7266 goto pkt_too_short; 7267 if (mp->b_cont != NULL && 7268 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7269 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7270 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7271 freemsg(hada_mp); 7272 freemsg(first_mp); 7273 return; 7274 } 7275 ip6h = (ip6_t *)mp->b_rptr; 7276 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7277 } 7278 hbhhdr = (ip6_hbh_t *)whereptr; 7279 nexthdr = hbhhdr->ip6h_nxt; 7280 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7281 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7282 7283 if (remlen < ehdrlen) 7284 goto pkt_too_short; 7285 if (mp->b_cont != NULL && 7286 whereptr + ehdrlen > mp->b_wptr) { 7287 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7288 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7289 freemsg(hada_mp); 7290 freemsg(first_mp); 7291 return; 7292 } 7293 ip6h = (ip6_t *)mp->b_rptr; 7294 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7295 hbhhdr = (ip6_hbh_t *)whereptr; 7296 } 7297 7298 optptr = whereptr + 2; 7299 whereptr += ehdrlen; 7300 remlen -= ehdrlen; 7301 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7302 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7303 case -1: 7304 /* 7305 * Packet has been consumed and any 7306 * needed ICMP messages sent. 7307 */ 7308 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7309 freemsg(hada_mp); 7310 return; 7311 case 0: 7312 /* no action needed */ 7313 break; 7314 case 1: 7315 /* Known router alert */ 7316 goto ipv6forus; 7317 } 7318 } 7319 7320 /* 7321 * Attach any necessary label information to this packet. 7322 */ 7323 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7324 if (ip6opt_ls != 0) 7325 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7326 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7327 freemsg(hada_mp); 7328 freemsg(first_mp); 7329 return; 7330 } 7331 7332 /* 7333 * On incoming v6 multicast packets we will bypass the ire table, 7334 * and assume that the read queue corresponds to the targetted 7335 * interface. 7336 * 7337 * The effect of this is the same as the IPv4 original code, but is 7338 * much cleaner I think. See ip_rput for how that was done. 7339 */ 7340 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7341 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7342 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7343 /* 7344 * XXX TODO Give to mrouted to for multicast forwarding. 7345 */ 7346 ILM_WALKER_HOLD(ill); 7347 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7348 ILM_WALKER_RELE(ill); 7349 if (ilm == NULL) { 7350 if (ip_debug > 3) { 7351 /* ip2dbg */ 7352 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7353 " which is not for us: %s\n", AF_INET6, 7354 &ip6h->ip6_dst); 7355 } 7356 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7357 freemsg(hada_mp); 7358 freemsg(first_mp); 7359 return; 7360 } 7361 if (ip_debug > 3) { 7362 /* ip2dbg */ 7363 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7364 AF_INET6, &ip6h->ip6_dst); 7365 } 7366 zoneid = GLOBAL_ZONEID; 7367 goto ipv6forus; 7368 } 7369 7370 ipif = ill->ill_ipif; 7371 7372 /* 7373 * If a packet was received on an interface that is a 6to4 tunnel, 7374 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7375 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7376 * the 6to4 prefix of the address configured on the receiving interface. 7377 * Otherwise, the packet was delivered to this interface in error and 7378 * the packet must be dropped. 7379 */ 7380 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7381 7382 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7383 &ip6h->ip6_dst)) { 7384 if (ip_debug > 2) { 7385 /* ip1dbg */ 7386 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7387 "addressed packet which is not for us: " 7388 "%s\n", AF_INET6, &ip6h->ip6_dst); 7389 } 7390 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7391 freemsg(first_mp); 7392 return; 7393 } 7394 } 7395 7396 /* 7397 * Find an ire that matches destination. For link-local addresses 7398 * we have to match the ill. 7399 * TBD for site local addresses. 7400 */ 7401 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7402 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7403 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7404 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 7405 } else { 7406 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7407 MBLK_GETLABEL(mp), ipst); 7408 } 7409 if (ire == NULL) { 7410 /* 7411 * No matching IRE found. Mark this packet as having 7412 * originated externally. 7413 */ 7414 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7415 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7416 if (!(ill->ill_flags & ILLF_ROUTER)) { 7417 BUMP_MIB(ill->ill_ip_mib, 7418 ipIfStatsInAddrErrors); 7419 } 7420 freemsg(hada_mp); 7421 freemsg(first_mp); 7422 return; 7423 } 7424 if (ip6h->ip6_hops <= 1) { 7425 if (hada_mp != NULL) 7426 goto hada_drop; 7427 /* Sent by forwarding path, and router is global zone */ 7428 icmp_time_exceeded_v6(WR(q), first_mp, 7429 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7430 GLOBAL_ZONEID, ipst); 7431 return; 7432 } 7433 /* 7434 * Per RFC 3513 section 2.5.2, we must not forward packets with 7435 * an unspecified source address. 7436 */ 7437 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7438 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7439 freemsg(hada_mp); 7440 freemsg(first_mp); 7441 return; 7442 } 7443 mp->b_prev = (mblk_t *)(uintptr_t) 7444 ill->ill_phyint->phyint_ifindex; 7445 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7446 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7447 ALL_ZONES, ipst); 7448 return; 7449 } 7450 /* we have a matching IRE */ 7451 if (ire->ire_stq != NULL) { 7452 ill_group_t *ill_group; 7453 ill_group_t *ire_group; 7454 7455 /* 7456 * To be quicker, we may wish not to chase pointers 7457 * (ire->ire_ipif->ipif_ill...) and instead store the 7458 * forwarding policy in the ire. An unfortunate side- 7459 * effect of this would be requiring an ire flush whenever 7460 * the ILLF_ROUTER flag changes. For now, chase pointers 7461 * once and store in the boolean no_forward. 7462 * 7463 * This appears twice to keep it out of the non-forwarding, 7464 * yes-it's-for-us-on-the-right-interface case. 7465 */ 7466 no_forward = ((ill->ill_flags & 7467 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7468 7469 7470 ASSERT(first_mp == mp); 7471 /* 7472 * This ire has a send-to queue - forward the packet. 7473 */ 7474 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7475 freemsg(hada_mp); 7476 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7477 if (no_forward) { 7478 BUMP_MIB(ill->ill_ip_mib, 7479 ipIfStatsInAddrErrors); 7480 } 7481 freemsg(mp); 7482 ire_refrele(ire); 7483 return; 7484 } 7485 /* 7486 * ipIfStatsHCInForwDatagrams should only be increment if there 7487 * will be an attempt to forward the packet, which is why we 7488 * increment after the above condition has been checked. 7489 */ 7490 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7491 if (ip6h->ip6_hops <= 1) { 7492 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7493 /* Sent by forwarding path, and router is global zone */ 7494 icmp_time_exceeded_v6(WR(q), mp, 7495 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7496 GLOBAL_ZONEID, ipst); 7497 ire_refrele(ire); 7498 return; 7499 } 7500 /* 7501 * Per RFC 3513 section 2.5.2, we must not forward packets with 7502 * an unspecified source address. 7503 */ 7504 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7505 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7506 freemsg(mp); 7507 ire_refrele(ire); 7508 return; 7509 } 7510 7511 if (is_system_labeled()) { 7512 mblk_t *mp1; 7513 7514 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7515 BUMP_MIB(ill->ill_ip_mib, 7516 ipIfStatsForwProhibits); 7517 freemsg(mp); 7518 ire_refrele(ire); 7519 return; 7520 } 7521 /* Size may have changed */ 7522 mp = mp1; 7523 ip6h = (ip6_t *)mp->b_rptr; 7524 pkt_len = msgdsize(mp); 7525 } 7526 7527 if (pkt_len > ire->ire_max_frag) { 7528 int max_frag = ire->ire_max_frag; 7529 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7530 /* 7531 * Handle labeled packet resizing. 7532 */ 7533 if (is_system_labeled()) { 7534 max_frag = tsol_pmtu_adjust(mp, max_frag, 7535 pkt_len - old_pkt_len, AF_INET6); 7536 } 7537 7538 /* Sent by forwarding path, and router is global zone */ 7539 icmp_pkt2big_v6(WR(q), mp, max_frag, 7540 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7541 ire_refrele(ire); 7542 return; 7543 } 7544 7545 /* 7546 * Check to see if we're forwarding the packet to a 7547 * different link from which it came. If so, check the 7548 * source and destination addresses since routers must not 7549 * forward any packets with link-local source or 7550 * destination addresses to other links. Otherwise (if 7551 * we're forwarding onto the same link), conditionally send 7552 * a redirect message. 7553 */ 7554 ill_group = ill->ill_group; 7555 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7556 if (ire->ire_rfq != q && (ill_group == NULL || 7557 ill_group != ire_group)) { 7558 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7559 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7560 BUMP_MIB(ill->ill_ip_mib, 7561 ipIfStatsInAddrErrors); 7562 freemsg(mp); 7563 ire_refrele(ire); 7564 return; 7565 } 7566 /* TBD add site-local check at site boundary? */ 7567 } else if (ipst->ips_ipv6_send_redirects) { 7568 in6_addr_t *v6targ; 7569 in6_addr_t gw_addr_v6; 7570 ire_t *src_ire_v6 = NULL; 7571 7572 /* 7573 * Don't send a redirect when forwarding a source 7574 * routed packet. 7575 */ 7576 if (ip_source_routed_v6(ip6h, mp, ipst)) 7577 goto forward; 7578 7579 mutex_enter(&ire->ire_lock); 7580 gw_addr_v6 = ire->ire_gateway_addr_v6; 7581 mutex_exit(&ire->ire_lock); 7582 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7583 v6targ = &gw_addr_v6; 7584 /* 7585 * We won't send redirects to a router 7586 * that doesn't have a link local 7587 * address, but will forward. 7588 */ 7589 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7590 BUMP_MIB(ill->ill_ip_mib, 7591 ipIfStatsInAddrErrors); 7592 goto forward; 7593 } 7594 } else { 7595 v6targ = &ip6h->ip6_dst; 7596 } 7597 7598 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7599 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7600 ALL_ZONES, 0, NULL, 7601 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7602 ipst); 7603 7604 if (src_ire_v6 != NULL) { 7605 /* 7606 * The source is directly connected. 7607 */ 7608 mp1 = copymsg(mp); 7609 if (mp1 != NULL) { 7610 icmp_send_redirect_v6(WR(q), 7611 mp1, v6targ, &ip6h->ip6_dst, 7612 ill, B_FALSE); 7613 } 7614 ire_refrele(src_ire_v6); 7615 } 7616 } 7617 7618 forward: 7619 /* Hoplimit verified above */ 7620 ip6h->ip6_hops--; 7621 7622 outill = ire->ire_ipif->ipif_ill; 7623 7624 DTRACE_PROBE4(ip6__forwarding__start, 7625 ill_t *, inill, ill_t *, outill, 7626 ip6_t *, ip6h, mblk_t *, mp); 7627 7628 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7629 ipst->ips_ipv6firewall_forwarding, 7630 inill, outill, ip6h, mp, mp, ipst); 7631 7632 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7633 7634 if (mp != NULL) { 7635 UPDATE_IB_PKT_COUNT(ire); 7636 ire->ire_last_used_time = lbolt; 7637 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7638 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7639 } 7640 IRE_REFRELE(ire); 7641 return; 7642 } 7643 7644 /* 7645 * Need to put on correct queue for reassembly to find it. 7646 * No need to use put() since reassembly has its own locks. 7647 * Note: multicast packets and packets destined to addresses 7648 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7649 * the arriving ill. Unlike the IPv4 case, enabling strict 7650 * destination multihoming will prevent accepting packets 7651 * addressed to an IRE_LOCAL on lo0. 7652 */ 7653 if (ire->ire_rfq != q) { 7654 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7655 == NULL) { 7656 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7657 freemsg(hada_mp); 7658 freemsg(first_mp); 7659 return; 7660 } 7661 if (ire->ire_rfq != NULL) { 7662 q = ire->ire_rfq; 7663 ill = (ill_t *)q->q_ptr; 7664 ASSERT(ill != NULL); 7665 } 7666 } 7667 7668 zoneid = ire->ire_zoneid; 7669 UPDATE_IB_PKT_COUNT(ire); 7670 ire->ire_last_used_time = lbolt; 7671 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7672 ire_refrele(ire); 7673 ire = NULL; 7674 ipv6forus: 7675 /* 7676 * Looks like this packet is for us one way or another. 7677 * This is where we'll process destination headers etc. 7678 */ 7679 for (; ; ) { 7680 switch (nexthdr) { 7681 case IPPROTO_TCP: { 7682 uint16_t *up; 7683 uint32_t sum; 7684 int offset; 7685 7686 hdr_len = pkt_len - remlen; 7687 7688 if (hada_mp != NULL) { 7689 ip0dbg(("tcp hada drop\n")); 7690 goto hada_drop; 7691 } 7692 7693 7694 /* TCP needs all of the TCP header */ 7695 if (remlen < TCP_MIN_HEADER_LENGTH) 7696 goto pkt_too_short; 7697 if (mp->b_cont != NULL && 7698 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7699 if (!pullupmsg(mp, 7700 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7701 BUMP_MIB(ill->ill_ip_mib, 7702 ipIfStatsInDiscards); 7703 freemsg(first_mp); 7704 return; 7705 } 7706 hck_flags = 0; 7707 ip6h = (ip6_t *)mp->b_rptr; 7708 whereptr = (uint8_t *)ip6h + hdr_len; 7709 } 7710 /* 7711 * Extract the offset field from the TCP header. 7712 */ 7713 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7714 if (offset != 5) { 7715 if (offset < 5) { 7716 ip1dbg(("ip_rput_data_v6: short " 7717 "TCP data offset")); 7718 BUMP_MIB(ill->ill_ip_mib, 7719 ipIfStatsInDiscards); 7720 freemsg(first_mp); 7721 return; 7722 } 7723 /* 7724 * There must be TCP options. 7725 * Make sure we can grab them. 7726 */ 7727 offset <<= 2; 7728 if (remlen < offset) 7729 goto pkt_too_short; 7730 if (mp->b_cont != NULL && 7731 whereptr + offset > mp->b_wptr) { 7732 if (!pullupmsg(mp, 7733 hdr_len + offset)) { 7734 BUMP_MIB(ill->ill_ip_mib, 7735 ipIfStatsInDiscards); 7736 freemsg(first_mp); 7737 return; 7738 } 7739 hck_flags = 0; 7740 ip6h = (ip6_t *)mp->b_rptr; 7741 whereptr = (uint8_t *)ip6h + hdr_len; 7742 } 7743 } 7744 7745 up = (uint16_t *)&ip6h->ip6_src; 7746 /* 7747 * TCP checksum calculation. First sum up the 7748 * pseudo-header fields: 7749 * - Source IPv6 address 7750 * - Destination IPv6 address 7751 * - TCP payload length 7752 * - TCP protocol ID 7753 */ 7754 sum = htons(IPPROTO_TCP + remlen) + 7755 up[0] + up[1] + up[2] + up[3] + 7756 up[4] + up[5] + up[6] + up[7] + 7757 up[8] + up[9] + up[10] + up[11] + 7758 up[12] + up[13] + up[14] + up[15]; 7759 7760 /* Fold initial sum */ 7761 sum = (sum & 0xffff) + (sum >> 16); 7762 7763 mp1 = mp->b_cont; 7764 7765 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7766 IP6_STAT(ipst, ip6_in_sw_cksum); 7767 7768 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7769 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7770 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7771 mp, mp1, cksum_err); 7772 7773 if (cksum_err) { 7774 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7775 7776 if (hck_flags & HCK_FULLCKSUM) { 7777 IP6_STAT(ipst, 7778 ip6_tcp_in_full_hw_cksum_err); 7779 } else if (hck_flags & HCK_PARTIALCKSUM) { 7780 IP6_STAT(ipst, 7781 ip6_tcp_in_part_hw_cksum_err); 7782 } else { 7783 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7784 } 7785 freemsg(first_mp); 7786 return; 7787 } 7788 tcp_fanout: 7789 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7790 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7791 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7792 return; 7793 } 7794 case IPPROTO_SCTP: 7795 { 7796 sctp_hdr_t *sctph; 7797 uint32_t calcsum, pktsum; 7798 uint_t hdr_len = pkt_len - remlen; 7799 sctp_stack_t *sctps; 7800 7801 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7802 7803 /* SCTP needs all of the SCTP header */ 7804 if (remlen < sizeof (*sctph)) { 7805 goto pkt_too_short; 7806 } 7807 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7808 ASSERT(mp->b_cont != NULL); 7809 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7810 BUMP_MIB(ill->ill_ip_mib, 7811 ipIfStatsInDiscards); 7812 freemsg(mp); 7813 return; 7814 } 7815 ip6h = (ip6_t *)mp->b_rptr; 7816 whereptr = (uint8_t *)ip6h + hdr_len; 7817 } 7818 7819 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7820 /* checksum */ 7821 pktsum = sctph->sh_chksum; 7822 sctph->sh_chksum = 0; 7823 calcsum = sctp_cksum(mp, hdr_len); 7824 if (calcsum != pktsum) { 7825 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7826 freemsg(mp); 7827 return; 7828 } 7829 sctph->sh_chksum = pktsum; 7830 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7831 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7832 ports, zoneid, mp, sctps)) == NULL) { 7833 ip_fanout_sctp_raw(first_mp, ill, 7834 (ipha_t *)ip6h, B_FALSE, ports, 7835 mctl_present, 7836 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7837 B_TRUE, zoneid); 7838 return; 7839 } 7840 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7841 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7842 B_FALSE, mctl_present); 7843 return; 7844 } 7845 case IPPROTO_UDP: { 7846 uint16_t *up; 7847 uint32_t sum; 7848 7849 hdr_len = pkt_len - remlen; 7850 7851 if (hada_mp != NULL) { 7852 ip0dbg(("udp hada drop\n")); 7853 goto hada_drop; 7854 } 7855 7856 /* Verify that at least the ports are present */ 7857 if (remlen < UDPH_SIZE) 7858 goto pkt_too_short; 7859 if (mp->b_cont != NULL && 7860 whereptr + UDPH_SIZE > mp->b_wptr) { 7861 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7862 BUMP_MIB(ill->ill_ip_mib, 7863 ipIfStatsInDiscards); 7864 freemsg(first_mp); 7865 return; 7866 } 7867 hck_flags = 0; 7868 ip6h = (ip6_t *)mp->b_rptr; 7869 whereptr = (uint8_t *)ip6h + hdr_len; 7870 } 7871 7872 /* 7873 * Before going through the regular checksum 7874 * calculation, make sure the received checksum 7875 * is non-zero. RFC 2460 says, a 0x0000 checksum 7876 * in a UDP packet (within IPv6 packet) is invalid 7877 * and should be replaced by 0xffff. This makes 7878 * sense as regular checksum calculation will 7879 * pass for both the cases i.e. 0x0000 and 0xffff. 7880 * Removing one of the case makes error detection 7881 * stronger. 7882 */ 7883 7884 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7885 /* 0x0000 checksum is invalid */ 7886 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7887 "checksum value 0x0000\n")); 7888 BUMP_MIB(ill->ill_ip_mib, 7889 udpIfStatsInCksumErrs); 7890 freemsg(first_mp); 7891 return; 7892 } 7893 7894 up = (uint16_t *)&ip6h->ip6_src; 7895 7896 /* 7897 * UDP checksum calculation. First sum up the 7898 * pseudo-header fields: 7899 * - Source IPv6 address 7900 * - Destination IPv6 address 7901 * - UDP payload length 7902 * - UDP protocol ID 7903 */ 7904 7905 sum = htons(IPPROTO_UDP + remlen) + 7906 up[0] + up[1] + up[2] + up[3] + 7907 up[4] + up[5] + up[6] + up[7] + 7908 up[8] + up[9] + up[10] + up[11] + 7909 up[12] + up[13] + up[14] + up[15]; 7910 7911 /* Fold initial sum */ 7912 sum = (sum & 0xffff) + (sum >> 16); 7913 7914 if (reass_hck_flags != 0) { 7915 hck_flags = reass_hck_flags; 7916 7917 IP_CKSUM_RECV_REASS(hck_flags, 7918 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7919 sum, reass_sum, cksum_err); 7920 } else { 7921 mp1 = mp->b_cont; 7922 7923 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7924 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7925 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7926 mp, mp1, cksum_err); 7927 } 7928 7929 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7930 IP6_STAT(ipst, ip6_in_sw_cksum); 7931 7932 if (cksum_err) { 7933 BUMP_MIB(ill->ill_ip_mib, 7934 udpIfStatsInCksumErrs); 7935 7936 if (hck_flags & HCK_FULLCKSUM) 7937 IP6_STAT(ipst, 7938 ip6_udp_in_full_hw_cksum_err); 7939 else if (hck_flags & HCK_PARTIALCKSUM) 7940 IP6_STAT(ipst, 7941 ip6_udp_in_part_hw_cksum_err); 7942 else 7943 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7944 7945 freemsg(first_mp); 7946 return; 7947 } 7948 goto udp_fanout; 7949 } 7950 case IPPROTO_ICMPV6: { 7951 uint16_t *up; 7952 uint32_t sum; 7953 uint_t hdr_len = pkt_len - remlen; 7954 7955 if (hada_mp != NULL) { 7956 ip0dbg(("icmp hada drop\n")); 7957 goto hada_drop; 7958 } 7959 7960 up = (uint16_t *)&ip6h->ip6_src; 7961 sum = htons(IPPROTO_ICMPV6 + remlen) + 7962 up[0] + up[1] + up[2] + up[3] + 7963 up[4] + up[5] + up[6] + up[7] + 7964 up[8] + up[9] + up[10] + up[11] + 7965 up[12] + up[13] + up[14] + up[15]; 7966 sum = (sum & 0xffff) + (sum >> 16); 7967 sum = IP_CSUM(mp, hdr_len, sum); 7968 if (sum != 0) { 7969 /* IPv6 ICMP checksum failed */ 7970 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7971 "failed %x\n", 7972 sum)); 7973 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7974 BUMP_MIB(ill->ill_icmp6_mib, 7975 ipv6IfIcmpInErrors); 7976 freemsg(first_mp); 7977 return; 7978 } 7979 7980 icmp_fanout: 7981 /* Check variable for testing applications */ 7982 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7983 freemsg(first_mp); 7984 return; 7985 } 7986 /* 7987 * Assume that there is always at least one conn for 7988 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7989 * where there is no conn. 7990 */ 7991 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7992 ASSERT(!IS_LOOPBACK((ill))); 7993 /* 7994 * In the multicast case, applications may have 7995 * joined the group from different zones, so we 7996 * need to deliver the packet to each of them. 7997 * Loop through the multicast memberships 7998 * structures (ilm) on the receive ill and send 7999 * a copy of the packet up each matching one. 8000 */ 8001 ILM_WALKER_HOLD(ill); 8002 for (ilm = ill->ill_ilm; ilm != NULL; 8003 ilm = ilm->ilm_next) { 8004 if (ilm->ilm_flags & ILM_DELETED) 8005 continue; 8006 if (!IN6_ARE_ADDR_EQUAL( 8007 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 8008 continue; 8009 if (!ipif_lookup_zoneid(ill, 8010 ilm->ilm_zoneid, IPIF_UP, NULL)) 8011 continue; 8012 8013 first_mp1 = ip_copymsg(first_mp); 8014 if (first_mp1 == NULL) 8015 continue; 8016 icmp_inbound_v6(q, first_mp1, ill, 8017 hdr_len, mctl_present, 0, 8018 ilm->ilm_zoneid, dl_mp); 8019 } 8020 ILM_WALKER_RELE(ill); 8021 } else { 8022 first_mp1 = ip_copymsg(first_mp); 8023 if (first_mp1 != NULL) 8024 icmp_inbound_v6(q, first_mp1, ill, 8025 hdr_len, mctl_present, 0, zoneid, 8026 dl_mp); 8027 } 8028 /* FALLTHRU */ 8029 default: { 8030 /* 8031 * Handle protocols with which IPv6 is less intimate. 8032 */ 8033 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 8034 8035 if (hada_mp != NULL) { 8036 ip0dbg(("default hada drop\n")); 8037 goto hada_drop; 8038 } 8039 8040 /* 8041 * Enable sending ICMP for "Unknown" nexthdr 8042 * case. i.e. where we did not FALLTHRU from 8043 * IPPROTO_ICMPV6 processing case above. 8044 * If we did FALLTHRU, then the packet has already been 8045 * processed for IPPF, don't process it again in 8046 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8047 * flags 8048 */ 8049 if (nexthdr != IPPROTO_ICMPV6) 8050 proto_flags |= IP_FF_SEND_ICMP; 8051 else 8052 proto_flags |= IP6_NO_IPPOLICY; 8053 8054 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8055 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8056 mctl_present, zoneid); 8057 return; 8058 } 8059 8060 case IPPROTO_DSTOPTS: { 8061 uint_t ehdrlen; 8062 uint8_t *optptr; 8063 ip6_dest_t *desthdr; 8064 8065 /* Check if AH is present. */ 8066 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8067 hada_mp, zoneid)) { 8068 ip0dbg(("dst early hada drop\n")); 8069 return; 8070 } 8071 8072 /* 8073 * Reinitialize pointers, as ipsec_early_ah_v6() does 8074 * complete pullups. We don't have to do more pullups 8075 * as a result. 8076 */ 8077 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8078 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8079 ip6h = (ip6_t *)mp->b_rptr; 8080 8081 if (remlen < MIN_EHDR_LEN) 8082 goto pkt_too_short; 8083 8084 desthdr = (ip6_dest_t *)whereptr; 8085 nexthdr = desthdr->ip6d_nxt; 8086 prev_nexthdr_offset = (uint_t)(whereptr - 8087 (uint8_t *)ip6h); 8088 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8089 if (remlen < ehdrlen) 8090 goto pkt_too_short; 8091 optptr = whereptr + 2; 8092 /* 8093 * Note: XXX This code does not seem to make 8094 * distinction between Destination Options Header 8095 * being before/after Routing Header which can 8096 * happen if we are at the end of source route. 8097 * This may become significant in future. 8098 * (No real significant Destination Options are 8099 * defined/implemented yet ). 8100 */ 8101 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8102 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 8103 case -1: 8104 /* 8105 * Packet has been consumed and any needed 8106 * ICMP errors sent. 8107 */ 8108 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8109 freemsg(hada_mp); 8110 return; 8111 case 0: 8112 /* No action needed continue */ 8113 break; 8114 case 1: 8115 /* 8116 * Unnexpected return value 8117 * (Router alert is a Hop-by-Hop option) 8118 */ 8119 #ifdef DEBUG 8120 panic("ip_rput_data_v6: router " 8121 "alert hbh opt indication in dest opt"); 8122 /*NOTREACHED*/ 8123 #else 8124 freemsg(hada_mp); 8125 freemsg(first_mp); 8126 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8127 return; 8128 #endif 8129 } 8130 used = ehdrlen; 8131 break; 8132 } 8133 case IPPROTO_FRAGMENT: { 8134 ip6_frag_t *fraghdr; 8135 size_t no_frag_hdr_len; 8136 8137 if (hada_mp != NULL) { 8138 ip0dbg(("frag hada drop\n")); 8139 goto hada_drop; 8140 } 8141 8142 ASSERT(first_mp == mp); 8143 if (remlen < sizeof (ip6_frag_t)) 8144 goto pkt_too_short; 8145 8146 if (mp->b_cont != NULL && 8147 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8148 if (!pullupmsg(mp, 8149 pkt_len - remlen + sizeof (ip6_frag_t))) { 8150 BUMP_MIB(ill->ill_ip_mib, 8151 ipIfStatsInDiscards); 8152 freemsg(mp); 8153 return; 8154 } 8155 hck_flags = 0; 8156 ip6h = (ip6_t *)mp->b_rptr; 8157 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8158 } 8159 8160 fraghdr = (ip6_frag_t *)whereptr; 8161 used = (uint_t)sizeof (ip6_frag_t); 8162 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8163 8164 /* 8165 * Invoke the CGTP (multirouting) filtering module to 8166 * process the incoming packet. Packets identified as 8167 * duplicates must be discarded. Filtering is active 8168 * only if the the ip_cgtp_filter ndd variable is 8169 * non-zero. 8170 */ 8171 if (ipst->ips_ip_cgtp_filter && 8172 ipst->ips_ip_cgtp_filter_ops != NULL) { 8173 int cgtp_flt_pkt; 8174 netstackid_t stackid; 8175 8176 stackid = ipst->ips_netstack->netstack_stackid; 8177 8178 cgtp_flt_pkt = 8179 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 8180 stackid, inill->ill_phyint->phyint_ifindex, 8181 ip6h, fraghdr); 8182 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8183 freemsg(mp); 8184 return; 8185 } 8186 } 8187 8188 /* Restore the flags */ 8189 DB_CKSUMFLAGS(mp) = hck_flags; 8190 8191 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8192 remlen - used, &prev_nexthdr_offset, 8193 &reass_sum, &reass_hck_flags); 8194 if (mp == NULL) { 8195 /* Reassembly is still pending */ 8196 return; 8197 } 8198 /* The first mblk are the headers before the frag hdr */ 8199 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8200 8201 first_mp = mp; /* mp has most likely changed! */ 8202 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8203 ip6h = (ip6_t *)mp->b_rptr; 8204 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8205 whereptr = mp->b_rptr + no_frag_hdr_len; 8206 remlen = ntohs(ip6h->ip6_plen) + 8207 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8208 pkt_len = msgdsize(mp); 8209 used = 0; 8210 break; 8211 } 8212 case IPPROTO_HOPOPTS: 8213 if (hada_mp != NULL) { 8214 ip0dbg(("hop hada drop\n")); 8215 goto hada_drop; 8216 } 8217 /* 8218 * Illegal header sequence. 8219 * (Hop-by-hop headers are processed above 8220 * and required to immediately follow IPv6 header) 8221 */ 8222 icmp_param_problem_v6(WR(q), first_mp, 8223 ICMP6_PARAMPROB_NEXTHEADER, 8224 prev_nexthdr_offset, 8225 B_FALSE, B_FALSE, zoneid, ipst); 8226 return; 8227 } 8228 case IPPROTO_ROUTING: { 8229 uint_t ehdrlen; 8230 ip6_rthdr_t *rthdr; 8231 8232 /* Check if AH is present. */ 8233 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8234 hada_mp, zoneid)) { 8235 ip0dbg(("routing hada drop\n")); 8236 return; 8237 } 8238 8239 /* 8240 * Reinitialize pointers, as ipsec_early_ah_v6() does 8241 * complete pullups. We don't have to do more pullups 8242 * as a result. 8243 */ 8244 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8245 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8246 ip6h = (ip6_t *)mp->b_rptr; 8247 8248 if (remlen < MIN_EHDR_LEN) 8249 goto pkt_too_short; 8250 rthdr = (ip6_rthdr_t *)whereptr; 8251 nexthdr = rthdr->ip6r_nxt; 8252 prev_nexthdr_offset = (uint_t)(whereptr - 8253 (uint8_t *)ip6h); 8254 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8255 if (remlen < ehdrlen) 8256 goto pkt_too_short; 8257 if (rthdr->ip6r_segleft != 0) { 8258 /* Not end of source route */ 8259 if (ll_multicast) { 8260 BUMP_MIB(ill->ill_ip_mib, 8261 ipIfStatsForwProhibits); 8262 freemsg(hada_mp); 8263 freemsg(mp); 8264 return; 8265 } 8266 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8267 flags, hada_mp, dl_mp); 8268 return; 8269 } 8270 used = ehdrlen; 8271 break; 8272 } 8273 case IPPROTO_AH: 8274 case IPPROTO_ESP: { 8275 /* 8276 * Fast path for AH/ESP. If this is the first time 8277 * we are sending a datagram to AH/ESP, allocate 8278 * a IPSEC_IN message and prepend it. Otherwise, 8279 * just fanout. 8280 */ 8281 8282 ipsec_in_t *ii; 8283 int ipsec_rc; 8284 ipsec_stack_t *ipss; 8285 8286 ipss = ipst->ips_netstack->netstack_ipsec; 8287 if (!mctl_present) { 8288 ASSERT(first_mp == mp); 8289 first_mp = ipsec_in_alloc(B_FALSE, 8290 ipst->ips_netstack); 8291 if (first_mp == NULL) { 8292 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8293 "allocation failure.\n")); 8294 BUMP_MIB(ill->ill_ip_mib, 8295 ipIfStatsInDiscards); 8296 freemsg(mp); 8297 return; 8298 } 8299 /* 8300 * Store the ill_index so that when we come back 8301 * from IPSEC we ride on the same queue. 8302 */ 8303 ii = (ipsec_in_t *)first_mp->b_rptr; 8304 ii->ipsec_in_ill_index = 8305 ill->ill_phyint->phyint_ifindex; 8306 ii->ipsec_in_rill_index = 8307 ii->ipsec_in_ill_index; 8308 first_mp->b_cont = mp; 8309 /* 8310 * Cache hardware acceleration info. 8311 */ 8312 if (hada_mp != NULL) { 8313 IPSECHW_DEBUG(IPSECHW_PKT, 8314 ("ip_rput_data_v6: " 8315 "caching data attr.\n")); 8316 ii->ipsec_in_accelerated = B_TRUE; 8317 ii->ipsec_in_da = hada_mp; 8318 hada_mp = NULL; 8319 } 8320 } else { 8321 ii = (ipsec_in_t *)first_mp->b_rptr; 8322 } 8323 8324 if (!ipsec_loaded(ipss)) { 8325 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8326 zoneid, ipst); 8327 return; 8328 } 8329 8330 /* select inbound SA and have IPsec process the pkt */ 8331 if (nexthdr == IPPROTO_ESP) { 8332 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8333 ipst->ips_netstack); 8334 if (esph == NULL) 8335 return; 8336 ASSERT(ii->ipsec_in_esp_sa != NULL); 8337 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8338 NULL); 8339 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8340 first_mp, esph); 8341 } else { 8342 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8343 ipst->ips_netstack); 8344 if (ah == NULL) 8345 return; 8346 ASSERT(ii->ipsec_in_ah_sa != NULL); 8347 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8348 NULL); 8349 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8350 first_mp, ah); 8351 } 8352 8353 switch (ipsec_rc) { 8354 case IPSEC_STATUS_SUCCESS: 8355 break; 8356 case IPSEC_STATUS_FAILED: 8357 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8358 /* FALLTHRU */ 8359 case IPSEC_STATUS_PENDING: 8360 return; 8361 } 8362 /* we're done with IPsec processing, send it up */ 8363 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8364 return; 8365 } 8366 case IPPROTO_NONE: 8367 /* All processing is done. Count as "delivered". */ 8368 freemsg(hada_mp); 8369 freemsg(first_mp); 8370 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8371 return; 8372 } 8373 whereptr += used; 8374 ASSERT(remlen >= used); 8375 remlen -= used; 8376 } 8377 /* NOTREACHED */ 8378 8379 pkt_too_short: 8380 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8381 ip6_len, pkt_len, remlen)); 8382 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8383 freemsg(hada_mp); 8384 freemsg(first_mp); 8385 return; 8386 udp_fanout: 8387 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8388 connp = NULL; 8389 } else { 8390 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8391 ipst); 8392 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8393 CONN_DEC_REF(connp); 8394 connp = NULL; 8395 } 8396 } 8397 8398 if (connp == NULL) { 8399 uint32_t ports; 8400 8401 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8402 UDP_PORTS_OFFSET); 8403 IP6_STAT(ipst, ip6_udp_slow_path); 8404 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8405 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8406 zoneid); 8407 return; 8408 } 8409 8410 if (CONN_UDP_FLOWCTLD(connp)) { 8411 freemsg(first_mp); 8412 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8413 CONN_DEC_REF(connp); 8414 return; 8415 } 8416 8417 /* Initiate IPPF processing */ 8418 if (IP6_IN_IPP(flags, ipst)) { 8419 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8420 if (mp == NULL) { 8421 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8422 CONN_DEC_REF(connp); 8423 return; 8424 } 8425 } 8426 8427 if (connp->conn_ip_recvpktinfo || 8428 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8429 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8430 if (mp == NULL) { 8431 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8432 CONN_DEC_REF(connp); 8433 return; 8434 } 8435 } 8436 8437 IP6_STAT(ipst, ip6_udp_fast_path); 8438 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8439 8440 /* Send it upstream */ 8441 CONN_UDP_RECV(connp, mp); 8442 8443 CONN_DEC_REF(connp); 8444 freemsg(hada_mp); 8445 return; 8446 8447 hada_drop: 8448 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8449 /* IPsec kstats: bump counter here */ 8450 freemsg(hada_mp); 8451 freemsg(first_mp); 8452 } 8453 8454 /* 8455 * Reassemble fragment. 8456 * When it returns a completed message the first mblk will only contain 8457 * the headers prior to the fragment header. 8458 * 8459 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8460 * of the preceding header. This is needed to patch the previous header's 8461 * nexthdr field when reassembly completes. 8462 */ 8463 static mblk_t * 8464 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8465 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8466 uint32_t *cksum_val, uint16_t *cksum_flags) 8467 { 8468 ill_t *ill = (ill_t *)q->q_ptr; 8469 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8470 uint16_t offset; 8471 boolean_t more_frags; 8472 uint8_t nexthdr = fraghdr->ip6f_nxt; 8473 in6_addr_t *v6dst_ptr; 8474 in6_addr_t *v6src_ptr; 8475 uint_t end; 8476 uint_t hdr_length; 8477 size_t count; 8478 ipf_t *ipf; 8479 ipf_t **ipfp; 8480 ipfb_t *ipfb; 8481 mblk_t *mp1; 8482 uint8_t ecn_info = 0; 8483 size_t msg_len; 8484 mblk_t *tail_mp; 8485 mblk_t *t_mp; 8486 boolean_t pruned = B_FALSE; 8487 uint32_t sum_val; 8488 uint16_t sum_flags; 8489 ip_stack_t *ipst = ill->ill_ipst; 8490 8491 if (cksum_val != NULL) 8492 *cksum_val = 0; 8493 if (cksum_flags != NULL) 8494 *cksum_flags = 0; 8495 8496 /* 8497 * We utilize hardware computed checksum info only for UDP since 8498 * IP fragmentation is a normal occurence for the protocol. In 8499 * addition, checksum offload support for IP fragments carrying 8500 * UDP payload is commonly implemented across network adapters. 8501 */ 8502 ASSERT(ill != NULL); 8503 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8504 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8505 mblk_t *mp1 = mp->b_cont; 8506 int32_t len; 8507 8508 /* Record checksum information from the packet */ 8509 sum_val = (uint32_t)DB_CKSUM16(mp); 8510 sum_flags = DB_CKSUMFLAGS(mp); 8511 8512 /* fragmented payload offset from beginning of mblk */ 8513 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8514 8515 if ((sum_flags & HCK_PARTIALCKSUM) && 8516 (mp1 == NULL || mp1->b_cont == NULL) && 8517 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8518 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8519 uint32_t adj; 8520 /* 8521 * Partial checksum has been calculated by hardware 8522 * and attached to the packet; in addition, any 8523 * prepended extraneous data is even byte aligned. 8524 * If any such data exists, we adjust the checksum; 8525 * this would also handle any postpended data. 8526 */ 8527 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8528 mp, mp1, len, adj); 8529 8530 /* One's complement subtract extraneous checksum */ 8531 if (adj >= sum_val) 8532 sum_val = ~(adj - sum_val) & 0xFFFF; 8533 else 8534 sum_val -= adj; 8535 } 8536 } else { 8537 sum_val = 0; 8538 sum_flags = 0; 8539 } 8540 8541 /* Clear hardware checksumming flag */ 8542 DB_CKSUMFLAGS(mp) = 0; 8543 8544 /* 8545 * Note: Fragment offset in header is in 8-octet units. 8546 * Clearing least significant 3 bits not only extracts 8547 * it but also gets it in units of octets. 8548 */ 8549 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8550 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8551 8552 /* 8553 * Is the more frags flag on and the payload length not a multiple 8554 * of eight? 8555 */ 8556 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8557 zoneid_t zoneid; 8558 8559 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8560 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8561 if (zoneid == ALL_ZONES) { 8562 freemsg(mp); 8563 return (NULL); 8564 } 8565 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8566 (uint32_t)((char *)&ip6h->ip6_plen - 8567 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8568 return (NULL); 8569 } 8570 8571 v6src_ptr = &ip6h->ip6_src; 8572 v6dst_ptr = &ip6h->ip6_dst; 8573 end = remlen; 8574 8575 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8576 end += offset; 8577 8578 /* 8579 * Would fragment cause reassembled packet to have a payload length 8580 * greater than IP_MAXPACKET - the max payload size? 8581 */ 8582 if (end > IP_MAXPACKET) { 8583 zoneid_t zoneid; 8584 8585 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8586 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8587 if (zoneid == ALL_ZONES) { 8588 freemsg(mp); 8589 return (NULL); 8590 } 8591 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8592 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8593 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8594 return (NULL); 8595 } 8596 8597 /* 8598 * This packet just has one fragment. Reassembly not 8599 * needed. 8600 */ 8601 if (!more_frags && offset == 0) { 8602 goto reass_done; 8603 } 8604 8605 /* 8606 * Drop the fragmented as early as possible, if 8607 * we don't have resource(s) to re-assemble. 8608 */ 8609 if (ipst->ips_ip_reass_queue_bytes == 0) { 8610 freemsg(mp); 8611 return (NULL); 8612 } 8613 8614 /* Record the ECN field info. */ 8615 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8616 /* 8617 * If this is not the first fragment, dump the unfragmentable 8618 * portion of the packet. 8619 */ 8620 if (offset) 8621 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8622 8623 /* 8624 * Fragmentation reassembly. Each ILL has a hash table for 8625 * queueing packets undergoing reassembly for all IPIFs 8626 * associated with the ILL. The hash is based on the packet 8627 * IP ident field. The ILL frag hash table was allocated 8628 * as a timer block at the time the ILL was created. Whenever 8629 * there is anything on the reassembly queue, the timer will 8630 * be running. 8631 */ 8632 msg_len = MBLKSIZE(mp); 8633 tail_mp = mp; 8634 while (tail_mp->b_cont != NULL) { 8635 tail_mp = tail_mp->b_cont; 8636 msg_len += MBLKSIZE(tail_mp); 8637 } 8638 /* 8639 * If the reassembly list for this ILL will get too big 8640 * prune it. 8641 */ 8642 8643 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8644 ipst->ips_ip_reass_queue_bytes) { 8645 ill_frag_prune(ill, 8646 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8647 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8648 pruned = B_TRUE; 8649 } 8650 8651 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8652 mutex_enter(&ipfb->ipfb_lock); 8653 8654 ipfp = &ipfb->ipfb_ipf; 8655 /* Try to find an existing fragment queue for this packet. */ 8656 for (;;) { 8657 ipf = ipfp[0]; 8658 if (ipf) { 8659 /* 8660 * It has to match on ident, source address, and 8661 * dest address. 8662 */ 8663 if (ipf->ipf_ident == ident && 8664 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8665 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8666 8667 /* 8668 * If we have received too many 8669 * duplicate fragments for this packet 8670 * free it. 8671 */ 8672 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8673 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8674 freemsg(mp); 8675 mutex_exit(&ipfb->ipfb_lock); 8676 return (NULL); 8677 } 8678 8679 break; 8680 } 8681 ipfp = &ipf->ipf_hash_next; 8682 continue; 8683 } 8684 8685 8686 /* 8687 * If we pruned the list, do we want to store this new 8688 * fragment?. We apply an optimization here based on the 8689 * fact that most fragments will be received in order. 8690 * So if the offset of this incoming fragment is zero, 8691 * it is the first fragment of a new packet. We will 8692 * keep it. Otherwise drop the fragment, as we have 8693 * probably pruned the packet already (since the 8694 * packet cannot be found). 8695 */ 8696 8697 if (pruned && offset != 0) { 8698 mutex_exit(&ipfb->ipfb_lock); 8699 freemsg(mp); 8700 return (NULL); 8701 } 8702 8703 /* New guy. Allocate a frag message. */ 8704 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8705 if (!mp1) { 8706 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8707 freemsg(mp); 8708 partial_reass_done: 8709 mutex_exit(&ipfb->ipfb_lock); 8710 return (NULL); 8711 } 8712 8713 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8714 /* 8715 * Too many fragmented packets in this hash bucket. 8716 * Free the oldest. 8717 */ 8718 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8719 } 8720 8721 mp1->b_cont = mp; 8722 8723 /* Initialize the fragment header. */ 8724 ipf = (ipf_t *)mp1->b_rptr; 8725 ipf->ipf_mp = mp1; 8726 ipf->ipf_ptphn = ipfp; 8727 ipfp[0] = ipf; 8728 ipf->ipf_hash_next = NULL; 8729 ipf->ipf_ident = ident; 8730 ipf->ipf_v6src = *v6src_ptr; 8731 ipf->ipf_v6dst = *v6dst_ptr; 8732 /* Record reassembly start time. */ 8733 ipf->ipf_timestamp = gethrestime_sec(); 8734 /* Record ipf generation and account for frag header */ 8735 ipf->ipf_gen = ill->ill_ipf_gen++; 8736 ipf->ipf_count = MBLKSIZE(mp1); 8737 ipf->ipf_protocol = nexthdr; 8738 ipf->ipf_nf_hdr_len = 0; 8739 ipf->ipf_prev_nexthdr_offset = 0; 8740 ipf->ipf_last_frag_seen = B_FALSE; 8741 ipf->ipf_ecn = ecn_info; 8742 ipf->ipf_num_dups = 0; 8743 ipfb->ipfb_frag_pkts++; 8744 ipf->ipf_checksum = 0; 8745 ipf->ipf_checksum_flags = 0; 8746 8747 /* Store checksum value in fragment header */ 8748 if (sum_flags != 0) { 8749 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8750 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8751 ipf->ipf_checksum = sum_val; 8752 ipf->ipf_checksum_flags = sum_flags; 8753 } 8754 8755 /* 8756 * We handle reassembly two ways. In the easy case, 8757 * where all the fragments show up in order, we do 8758 * minimal bookkeeping, and just clip new pieces on 8759 * the end. If we ever see a hole, then we go off 8760 * to ip_reassemble which has to mark the pieces and 8761 * keep track of the number of holes, etc. Obviously, 8762 * the point of having both mechanisms is so we can 8763 * handle the easy case as efficiently as possible. 8764 */ 8765 if (offset == 0) { 8766 /* Easy case, in-order reassembly so far. */ 8767 /* Update the byte count */ 8768 ipf->ipf_count += msg_len; 8769 ipf->ipf_tail_mp = tail_mp; 8770 /* 8771 * Keep track of next expected offset in 8772 * ipf_end. 8773 */ 8774 ipf->ipf_end = end; 8775 ipf->ipf_nf_hdr_len = hdr_length; 8776 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8777 } else { 8778 /* Hard case, hole at the beginning. */ 8779 ipf->ipf_tail_mp = NULL; 8780 /* 8781 * ipf_end == 0 means that we have given up 8782 * on easy reassembly. 8783 */ 8784 ipf->ipf_end = 0; 8785 8786 /* Forget checksum offload from now on */ 8787 ipf->ipf_checksum_flags = 0; 8788 8789 /* 8790 * ipf_hole_cnt is set by ip_reassemble. 8791 * ipf_count is updated by ip_reassemble. 8792 * No need to check for return value here 8793 * as we don't expect reassembly to complete or 8794 * fail for the first fragment itself. 8795 */ 8796 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8797 msg_len); 8798 } 8799 /* Update per ipfb and ill byte counts */ 8800 ipfb->ipfb_count += ipf->ipf_count; 8801 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8802 ill->ill_frag_count += ipf->ipf_count; 8803 /* If the frag timer wasn't already going, start it. */ 8804 mutex_enter(&ill->ill_lock); 8805 ill_frag_timer_start(ill); 8806 mutex_exit(&ill->ill_lock); 8807 goto partial_reass_done; 8808 } 8809 8810 /* 8811 * If the packet's flag has changed (it could be coming up 8812 * from an interface different than the previous, therefore 8813 * possibly different checksum capability), then forget about 8814 * any stored checksum states. Otherwise add the value to 8815 * the existing one stored in the fragment header. 8816 */ 8817 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8818 sum_val += ipf->ipf_checksum; 8819 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8820 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8821 ipf->ipf_checksum = sum_val; 8822 } else if (ipf->ipf_checksum_flags != 0) { 8823 /* Forget checksum offload from now on */ 8824 ipf->ipf_checksum_flags = 0; 8825 } 8826 8827 /* 8828 * We have a new piece of a datagram which is already being 8829 * reassembled. Update the ECN info if all IP fragments 8830 * are ECN capable. If there is one which is not, clear 8831 * all the info. If there is at least one which has CE 8832 * code point, IP needs to report that up to transport. 8833 */ 8834 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8835 if (ecn_info == IPH_ECN_CE) 8836 ipf->ipf_ecn = IPH_ECN_CE; 8837 } else { 8838 ipf->ipf_ecn = IPH_ECN_NECT; 8839 } 8840 8841 if (offset && ipf->ipf_end == offset) { 8842 /* The new fragment fits at the end */ 8843 ipf->ipf_tail_mp->b_cont = mp; 8844 /* Update the byte count */ 8845 ipf->ipf_count += msg_len; 8846 /* Update per ipfb and ill byte counts */ 8847 ipfb->ipfb_count += msg_len; 8848 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8849 ill->ill_frag_count += msg_len; 8850 if (more_frags) { 8851 /* More to come. */ 8852 ipf->ipf_end = end; 8853 ipf->ipf_tail_mp = tail_mp; 8854 goto partial_reass_done; 8855 } 8856 } else { 8857 /* 8858 * Go do the hard cases. 8859 * Call ip_reassemble(). 8860 */ 8861 int ret; 8862 8863 if (offset == 0) { 8864 if (ipf->ipf_prev_nexthdr_offset == 0) { 8865 ipf->ipf_nf_hdr_len = hdr_length; 8866 ipf->ipf_prev_nexthdr_offset = 8867 *prev_nexthdr_offset; 8868 } 8869 } 8870 /* Save current byte count */ 8871 count = ipf->ipf_count; 8872 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8873 8874 /* Count of bytes added and subtracted (freeb()ed) */ 8875 count = ipf->ipf_count - count; 8876 if (count) { 8877 /* Update per ipfb and ill byte counts */ 8878 ipfb->ipfb_count += count; 8879 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8880 ill->ill_frag_count += count; 8881 } 8882 if (ret == IP_REASS_PARTIAL) { 8883 goto partial_reass_done; 8884 } else if (ret == IP_REASS_FAILED) { 8885 /* Reassembly failed. Free up all resources */ 8886 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8887 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8888 IP_REASS_SET_START(t_mp, 0); 8889 IP_REASS_SET_END(t_mp, 0); 8890 } 8891 freemsg(mp); 8892 goto partial_reass_done; 8893 } 8894 8895 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8896 } 8897 /* 8898 * We have completed reassembly. Unhook the frag header from 8899 * the reassembly list. 8900 * 8901 * Grab the unfragmentable header length next header value out 8902 * of the first fragment 8903 */ 8904 ASSERT(ipf->ipf_nf_hdr_len != 0); 8905 hdr_length = ipf->ipf_nf_hdr_len; 8906 8907 /* 8908 * Before we free the frag header, record the ECN info 8909 * to report back to the transport. 8910 */ 8911 ecn_info = ipf->ipf_ecn; 8912 8913 /* 8914 * Store the nextheader field in the header preceding the fragment 8915 * header 8916 */ 8917 nexthdr = ipf->ipf_protocol; 8918 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8919 ipfp = ipf->ipf_ptphn; 8920 8921 /* We need to supply these to caller */ 8922 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8923 sum_val = ipf->ipf_checksum; 8924 else 8925 sum_val = 0; 8926 8927 mp1 = ipf->ipf_mp; 8928 count = ipf->ipf_count; 8929 ipf = ipf->ipf_hash_next; 8930 if (ipf) 8931 ipf->ipf_ptphn = ipfp; 8932 ipfp[0] = ipf; 8933 ill->ill_frag_count -= count; 8934 ASSERT(ipfb->ipfb_count >= count); 8935 ipfb->ipfb_count -= count; 8936 ipfb->ipfb_frag_pkts--; 8937 mutex_exit(&ipfb->ipfb_lock); 8938 /* Ditch the frag header. */ 8939 mp = mp1->b_cont; 8940 freeb(mp1); 8941 8942 /* 8943 * Make sure the packet is good by doing some sanity 8944 * check. If bad we can silentely drop the packet. 8945 */ 8946 reass_done: 8947 if (hdr_length < sizeof (ip6_frag_t)) { 8948 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8949 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8950 freemsg(mp); 8951 return (NULL); 8952 } 8953 8954 /* 8955 * Remove the fragment header from the initial header by 8956 * splitting the mblk into the non-fragmentable header and 8957 * everthing after the fragment extension header. This has the 8958 * side effect of putting all the headers that need destination 8959 * processing into the b_cont block-- on return this fact is 8960 * used in order to avoid having to look at the extensions 8961 * already processed. 8962 * 8963 * Note that this code assumes that the unfragmentable portion 8964 * of the header is in the first mblk and increments 8965 * the read pointer past it. If this assumption is broken 8966 * this code fails badly. 8967 */ 8968 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8969 mblk_t *nmp; 8970 8971 if (!(nmp = dupb(mp))) { 8972 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8973 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8974 freemsg(mp); 8975 return (NULL); 8976 } 8977 nmp->b_cont = mp->b_cont; 8978 mp->b_cont = nmp; 8979 nmp->b_rptr += hdr_length; 8980 } 8981 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8982 8983 ip6h = (ip6_t *)mp->b_rptr; 8984 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8985 8986 /* Restore original IP length in header. */ 8987 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8988 /* Record the ECN info. */ 8989 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8990 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8991 8992 /* Reassembly is successful; return checksum information if needed */ 8993 if (cksum_val != NULL) 8994 *cksum_val = sum_val; 8995 if (cksum_flags != NULL) 8996 *cksum_flags = sum_flags; 8997 8998 return (mp); 8999 } 9000 9001 /* 9002 * Walk through the options to see if there is a routing header. 9003 * If present get the destination which is the last address of 9004 * the option. 9005 */ 9006 in6_addr_t 9007 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 9008 { 9009 uint8_t nexthdr; 9010 uint8_t *whereptr; 9011 ip6_hbh_t *hbhhdr; 9012 ip6_dest_t *dsthdr; 9013 ip6_rthdr0_t *rthdr; 9014 ip6_frag_t *fraghdr; 9015 int ehdrlen; 9016 int left; 9017 in6_addr_t *ap, rv; 9018 9019 if (is_fragment != NULL) 9020 *is_fragment = B_FALSE; 9021 9022 rv = ip6h->ip6_dst; 9023 9024 nexthdr = ip6h->ip6_nxt; 9025 whereptr = (uint8_t *)&ip6h[1]; 9026 for (;;) { 9027 9028 ASSERT(nexthdr != IPPROTO_RAW); 9029 switch (nexthdr) { 9030 case IPPROTO_HOPOPTS: 9031 hbhhdr = (ip6_hbh_t *)whereptr; 9032 nexthdr = hbhhdr->ip6h_nxt; 9033 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 9034 break; 9035 case IPPROTO_DSTOPTS: 9036 dsthdr = (ip6_dest_t *)whereptr; 9037 nexthdr = dsthdr->ip6d_nxt; 9038 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 9039 break; 9040 case IPPROTO_ROUTING: 9041 rthdr = (ip6_rthdr0_t *)whereptr; 9042 nexthdr = rthdr->ip6r0_nxt; 9043 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9044 9045 left = rthdr->ip6r0_segleft; 9046 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9047 rv = *(ap + left - 1); 9048 /* 9049 * If the caller doesn't care whether the packet 9050 * is a fragment or not, we can stop here since 9051 * we have our destination. 9052 */ 9053 if (is_fragment == NULL) 9054 goto done; 9055 break; 9056 case IPPROTO_FRAGMENT: 9057 fraghdr = (ip6_frag_t *)whereptr; 9058 nexthdr = fraghdr->ip6f_nxt; 9059 ehdrlen = sizeof (ip6_frag_t); 9060 if (is_fragment != NULL) 9061 *is_fragment = B_TRUE; 9062 goto done; 9063 default : 9064 goto done; 9065 } 9066 whereptr += ehdrlen; 9067 } 9068 9069 done: 9070 return (rv); 9071 } 9072 9073 /* 9074 * ip_source_routed_v6: 9075 * This function is called by redirect code in ip_rput_data_v6 to 9076 * know whether this packet is source routed through this node i.e 9077 * whether this node (router) is part of the journey. This 9078 * function is called under two cases : 9079 * 9080 * case 1 : Routing header was processed by this node and 9081 * ip_process_rthdr replaced ip6_dst with the next hop 9082 * and we are forwarding the packet to the next hop. 9083 * 9084 * case 2 : Routing header was not processed by this node and we 9085 * are just forwarding the packet. 9086 * 9087 * For case (1) we don't want to send redirects. For case(2) we 9088 * want to send redirects. 9089 */ 9090 static boolean_t 9091 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 9092 { 9093 uint8_t nexthdr; 9094 in6_addr_t *addrptr; 9095 ip6_rthdr0_t *rthdr; 9096 uint8_t numaddr; 9097 ip6_hbh_t *hbhhdr; 9098 uint_t ehdrlen; 9099 uint8_t *byteptr; 9100 9101 ip2dbg(("ip_source_routed_v6\n")); 9102 nexthdr = ip6h->ip6_nxt; 9103 ehdrlen = IPV6_HDR_LEN; 9104 9105 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9106 while (nexthdr == IPPROTO_HOPOPTS || 9107 nexthdr == IPPROTO_DSTOPTS) { 9108 byteptr = (uint8_t *)ip6h + ehdrlen; 9109 /* 9110 * Check if we have already processed 9111 * packets or we are just a forwarding 9112 * router which only pulled up msgs up 9113 * to IPV6HDR and one HBH ext header 9114 */ 9115 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9116 ip2dbg(("ip_source_routed_v6: Extension" 9117 " headers not processed\n")); 9118 return (B_FALSE); 9119 } 9120 hbhhdr = (ip6_hbh_t *)byteptr; 9121 nexthdr = hbhhdr->ip6h_nxt; 9122 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9123 } 9124 switch (nexthdr) { 9125 case IPPROTO_ROUTING: 9126 byteptr = (uint8_t *)ip6h + ehdrlen; 9127 /* 9128 * If for some reason, we haven't pulled up 9129 * the routing hdr data mblk, then we must 9130 * not have processed it at all. So for sure 9131 * we are not part of the source routed journey. 9132 */ 9133 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9134 ip2dbg(("ip_source_routed_v6: Routing" 9135 " header not processed\n")); 9136 return (B_FALSE); 9137 } 9138 rthdr = (ip6_rthdr0_t *)byteptr; 9139 /* 9140 * Either we are an intermediate router or the 9141 * last hop before destination and we have 9142 * already processed the routing header. 9143 * If segment_left is greater than or equal to zero, 9144 * then we must be the (numaddr - segleft) entry 9145 * of the routing header. Although ip6r0_segleft 9146 * is a unit8_t variable, we still check for zero 9147 * or greater value, if in case the data type 9148 * is changed someday in future. 9149 */ 9150 if (rthdr->ip6r0_segleft > 0 || 9151 rthdr->ip6r0_segleft == 0) { 9152 ire_t *ire = NULL; 9153 9154 numaddr = rthdr->ip6r0_len / 2; 9155 addrptr = (in6_addr_t *)((char *)rthdr + 9156 sizeof (*rthdr)); 9157 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9158 if (addrptr != NULL) { 9159 ire = ire_ctable_lookup_v6(addrptr, NULL, 9160 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9161 MATCH_IRE_TYPE, 9162 ipst); 9163 if (ire != NULL) { 9164 ire_refrele(ire); 9165 return (B_TRUE); 9166 } 9167 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9168 } 9169 } 9170 /* FALLTHRU */ 9171 default: 9172 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9173 return (B_FALSE); 9174 } 9175 } 9176 9177 /* 9178 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9179 * Assumes that the following set of headers appear in the first 9180 * mblk: 9181 * ip6i_t (if present) CAN also appear as a separate mblk. 9182 * ip6_t 9183 * Any extension headers 9184 * TCP/UDP/SCTP header (if present) 9185 * The routine can handle an ICMPv6 header that is not in the first mblk. 9186 * 9187 * The order to determine the outgoing interface is as follows: 9188 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9189 * 2. If conn_nofailover_ill is set then use that ill. 9190 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9191 * 4. If q is an ill queue and (link local or multicast destination) then 9192 * use that ill. 9193 * 5. If IPV6_BOUND_IF has been set use that ill. 9194 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9195 * look for the best IRE match for the unspecified group to determine 9196 * the ill. 9197 * 7. For unicast: Just do an IRE lookup for the best match. 9198 * 9199 * arg2 is always a queue_t *. 9200 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9201 * the zoneid. 9202 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9203 */ 9204 void 9205 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9206 { 9207 conn_t *connp = NULL; 9208 queue_t *q = (queue_t *)arg2; 9209 ire_t *ire = NULL; 9210 ire_t *sctp_ire = NULL; 9211 ip6_t *ip6h; 9212 in6_addr_t *v6dstp; 9213 ill_t *ill = NULL; 9214 ipif_t *ipif; 9215 ip6i_t *ip6i; 9216 int cksum_request; /* -1 => normal. */ 9217 /* 1 => Skip TCP/UDP/SCTP checksum */ 9218 /* Otherwise contains insert offset for checksum */ 9219 int unspec_src; 9220 boolean_t do_outrequests; /* Increment OutRequests? */ 9221 mib2_ipIfStatsEntry_t *mibptr; 9222 int match_flags = MATCH_IRE_ILL_GROUP; 9223 boolean_t attach_if = B_FALSE; 9224 mblk_t *first_mp; 9225 boolean_t mctl_present; 9226 ipsec_out_t *io; 9227 boolean_t drop_if_delayed = B_FALSE; 9228 boolean_t multirt_need_resolve = B_FALSE; 9229 mblk_t *copy_mp = NULL; 9230 int err; 9231 int ip6i_flags = 0; 9232 zoneid_t zoneid; 9233 ill_t *saved_ill = NULL; 9234 boolean_t conn_lock_held; 9235 boolean_t need_decref = B_FALSE; 9236 ip_stack_t *ipst; 9237 9238 if (q->q_next != NULL) { 9239 ill = (ill_t *)q->q_ptr; 9240 ipst = ill->ill_ipst; 9241 } else { 9242 connp = (conn_t *)arg; 9243 ASSERT(connp != NULL); 9244 ipst = connp->conn_netstack->netstack_ip; 9245 } 9246 9247 /* 9248 * Highest bit in version field is Reachability Confirmation bit 9249 * used by NUD in ip_xmit_v6(). 9250 */ 9251 #ifdef _BIG_ENDIAN 9252 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9253 #else 9254 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9255 #endif 9256 9257 /* 9258 * M_CTL comes from 6 places 9259 * 9260 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9261 * both V4 and V6 datagrams. 9262 * 9263 * 2) AH/ESP sends down M_CTL after doing their job with both 9264 * V4 and V6 datagrams. 9265 * 9266 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9267 * attached. 9268 * 9269 * 4) Notifications from an external resolver (for XRESOLV ifs) 9270 * 9271 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9272 * IPsec hardware acceleration support. 9273 * 9274 * 6) TUN_HELLO. 9275 * 9276 * We need to handle (1)'s IPv6 case and (3) here. For the 9277 * IPv4 case in (1), and (2), IPSEC processing has already 9278 * started. The code in ip_wput() already knows how to handle 9279 * continuing IPSEC processing (for IPv4 and IPv6). All other 9280 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9281 * for handling. 9282 */ 9283 first_mp = mp; 9284 mctl_present = B_FALSE; 9285 io = NULL; 9286 9287 /* Multidata transmit? */ 9288 if (DB_TYPE(mp) == M_MULTIDATA) { 9289 /* 9290 * We should never get here, since all Multidata messages 9291 * originating from tcp should have been directed over to 9292 * tcp_multisend() in the first place. 9293 */ 9294 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9295 freemsg(mp); 9296 return; 9297 } else if (DB_TYPE(mp) == M_CTL) { 9298 uint32_t mctltype = 0; 9299 uint32_t mlen = MBLKL(first_mp); 9300 9301 mp = mp->b_cont; 9302 mctl_present = B_TRUE; 9303 io = (ipsec_out_t *)first_mp->b_rptr; 9304 9305 /* 9306 * Validate this M_CTL message. The only three types of 9307 * M_CTL messages we expect to see in this code path are 9308 * ipsec_out_t or ipsec_in_t structures (allocated as 9309 * ipsec_info_t unions), or ipsec_ctl_t structures. 9310 * The ipsec_out_type and ipsec_in_type overlap in the two 9311 * data structures, and they are either set to IPSEC_OUT 9312 * or IPSEC_IN depending on which data structure it is. 9313 * ipsec_ctl_t is an IPSEC_CTL. 9314 * 9315 * All other M_CTL messages are sent to ip_wput_nondata() 9316 * for handling. 9317 */ 9318 if (mlen >= sizeof (io->ipsec_out_type)) 9319 mctltype = io->ipsec_out_type; 9320 9321 if ((mlen == sizeof (ipsec_ctl_t)) && 9322 (mctltype == IPSEC_CTL)) { 9323 ip_output(arg, first_mp, arg2, caller); 9324 return; 9325 } 9326 9327 if ((mlen < sizeof (ipsec_info_t)) || 9328 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9329 mp == NULL) { 9330 ip_wput_nondata(NULL, q, first_mp, NULL); 9331 return; 9332 } 9333 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9334 if (q->q_next == NULL) { 9335 ip6h = (ip6_t *)mp->b_rptr; 9336 /* 9337 * For a freshly-generated TCP dgram that needs IPV6 9338 * processing, don't call ip_wput immediately. We can 9339 * tell this by the ipsec_out_proc_begin. In-progress 9340 * IPSEC_OUT messages have proc_begin set to TRUE, 9341 * and we want to send all IPSEC_IN messages to 9342 * ip_wput() for IPsec processing or finishing. 9343 */ 9344 if (mctltype == IPSEC_IN || 9345 IPVER(ip6h) != IPV6_VERSION || 9346 io->ipsec_out_proc_begin) { 9347 mibptr = &ipst->ips_ip6_mib; 9348 goto notv6; 9349 } 9350 } 9351 } else if (DB_TYPE(mp) != M_DATA) { 9352 ip_wput_nondata(NULL, q, mp, NULL); 9353 return; 9354 } 9355 9356 ip6h = (ip6_t *)mp->b_rptr; 9357 9358 if (IPVER(ip6h) != IPV6_VERSION) { 9359 mibptr = &ipst->ips_ip6_mib; 9360 goto notv6; 9361 } 9362 9363 if (q->q_next != NULL) { 9364 /* 9365 * We don't know if this ill will be used for IPv6 9366 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9367 * ipif_set_values() sets the ill_isv6 flag to true if 9368 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9369 * just drop the packet. 9370 */ 9371 if (!ill->ill_isv6) { 9372 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9373 "ILLF_IPV6 was set\n")); 9374 freemsg(first_mp); 9375 return; 9376 } 9377 /* For uniformity do a refhold */ 9378 mutex_enter(&ill->ill_lock); 9379 if (!ILL_CAN_LOOKUP(ill)) { 9380 mutex_exit(&ill->ill_lock); 9381 freemsg(first_mp); 9382 return; 9383 } 9384 ill_refhold_locked(ill); 9385 mutex_exit(&ill->ill_lock); 9386 mibptr = ill->ill_ip_mib; 9387 9388 ASSERT(mibptr != NULL); 9389 unspec_src = 0; 9390 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9391 do_outrequests = B_FALSE; 9392 zoneid = (zoneid_t)(uintptr_t)arg; 9393 } else { 9394 connp = (conn_t *)arg; 9395 ASSERT(connp != NULL); 9396 zoneid = connp->conn_zoneid; 9397 9398 /* is queue flow controlled? */ 9399 if ((q->q_first || connp->conn_draining) && 9400 (caller == IP_WPUT)) { 9401 /* 9402 * 1) TCP sends down M_CTL for detached connections. 9403 * 2) AH/ESP sends down M_CTL. 9404 * 9405 * We don't flow control either of the above. Only 9406 * UDP and others are flow controlled for which we 9407 * can't have a M_CTL. 9408 */ 9409 ASSERT(first_mp == mp); 9410 (void) putq(q, mp); 9411 return; 9412 } 9413 mibptr = &ipst->ips_ip6_mib; 9414 unspec_src = connp->conn_unspec_src; 9415 do_outrequests = B_TRUE; 9416 if (mp->b_flag & MSGHASREF) { 9417 mp->b_flag &= ~MSGHASREF; 9418 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9419 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9420 need_decref = B_TRUE; 9421 } 9422 9423 /* 9424 * If there is a policy, try to attach an ipsec_out in 9425 * the front. At the end, first_mp either points to a 9426 * M_DATA message or IPSEC_OUT message linked to a 9427 * M_DATA message. We have to do it now as we might 9428 * lose the "conn" if we go through ip_newroute. 9429 */ 9430 if (!mctl_present && 9431 (connp->conn_out_enforce_policy || 9432 connp->conn_latch != NULL)) { 9433 ASSERT(first_mp == mp); 9434 /* XXX Any better way to get the protocol fast ? */ 9435 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9436 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9437 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9438 if (need_decref) 9439 CONN_DEC_REF(connp); 9440 return; 9441 } else { 9442 ASSERT(mp->b_datap->db_type == M_CTL); 9443 first_mp = mp; 9444 mp = mp->b_cont; 9445 mctl_present = B_TRUE; 9446 io = (ipsec_out_t *)first_mp->b_rptr; 9447 } 9448 } 9449 } 9450 9451 /* check for alignment and full IPv6 header */ 9452 if (!OK_32PTR((uchar_t *)ip6h) || 9453 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9454 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9455 if (do_outrequests) 9456 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9457 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9458 freemsg(first_mp); 9459 if (ill != NULL) 9460 ill_refrele(ill); 9461 if (need_decref) 9462 CONN_DEC_REF(connp); 9463 return; 9464 } 9465 v6dstp = &ip6h->ip6_dst; 9466 cksum_request = -1; 9467 ip6i = NULL; 9468 9469 /* 9470 * Once neighbor discovery has completed, ndp_process() will provide 9471 * locally generated packets for which processing can be reattempted. 9472 * In these cases, connp is NULL and the original zone is part of a 9473 * prepended ipsec_out_t. 9474 */ 9475 if (io != NULL) { 9476 /* 9477 * When coming from icmp_input_v6, the zoneid might not match 9478 * for the loopback case, because inside icmp_input_v6 the 9479 * queue_t is a conn queue from the sending side. 9480 */ 9481 zoneid = io->ipsec_out_zoneid; 9482 ASSERT(zoneid != ALL_ZONES); 9483 } 9484 9485 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9486 /* 9487 * This is an ip6i_t header followed by an ip6_hdr. 9488 * Check which fields are set. 9489 * 9490 * When the packet comes from a transport we should have 9491 * all needed headers in the first mblk. However, when 9492 * going through ip_newroute*_v6 the ip6i might be in 9493 * a separate mblk when we return here. In that case 9494 * we pullup everything to ensure that extension and transport 9495 * headers "stay" in the first mblk. 9496 */ 9497 ip6i = (ip6i_t *)ip6h; 9498 ip6i_flags = ip6i->ip6i_flags; 9499 9500 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9501 ((mp->b_wptr - (uchar_t *)ip6i) >= 9502 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9503 9504 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9505 if (!pullupmsg(mp, -1)) { 9506 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9507 if (do_outrequests) { 9508 BUMP_MIB(mibptr, 9509 ipIfStatsHCOutRequests); 9510 } 9511 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9512 freemsg(first_mp); 9513 if (ill != NULL) 9514 ill_refrele(ill); 9515 if (need_decref) 9516 CONN_DEC_REF(connp); 9517 return; 9518 } 9519 ip6h = (ip6_t *)mp->b_rptr; 9520 v6dstp = &ip6h->ip6_dst; 9521 ip6i = (ip6i_t *)ip6h; 9522 } 9523 ip6h = (ip6_t *)&ip6i[1]; 9524 9525 /* 9526 * Advance rptr past the ip6i_t to get ready for 9527 * transmitting the packet. However, if the packet gets 9528 * passed to ip_newroute*_v6 then rptr is moved back so 9529 * that the ip6i_t header can be inspected when the 9530 * packet comes back here after passing through 9531 * ire_add_then_send. 9532 */ 9533 mp->b_rptr = (uchar_t *)ip6h; 9534 9535 /* 9536 * IP6I_ATTACH_IF is set in this function when we had a 9537 * conn and it was either bound to the IPFF_NOFAILOVER address 9538 * or IPV6_BOUND_PIF was set. These options override other 9539 * options that set the ifindex. We come here with 9540 * IP6I_ATTACH_IF set when we can't find the ire and 9541 * ip_newroute_v6 is feeding the packet for second time. 9542 */ 9543 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9544 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9545 ASSERT(ip6i->ip6i_ifindex != 0); 9546 if (ill != NULL) 9547 ill_refrele(ill); 9548 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9549 NULL, NULL, NULL, NULL, ipst); 9550 if (ill == NULL) { 9551 if (do_outrequests) { 9552 BUMP_MIB(mibptr, 9553 ipIfStatsHCOutRequests); 9554 } 9555 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9556 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9557 ip6i->ip6i_ifindex)); 9558 if (need_decref) 9559 CONN_DEC_REF(connp); 9560 freemsg(first_mp); 9561 return; 9562 } 9563 mibptr = ill->ill_ip_mib; 9564 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9565 /* 9566 * Preserve the index so that when we return 9567 * from IPSEC processing, we know where to 9568 * send the packet. 9569 */ 9570 if (mctl_present) { 9571 ASSERT(io != NULL); 9572 io->ipsec_out_ill_index = 9573 ip6i->ip6i_ifindex; 9574 } 9575 } 9576 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9577 /* 9578 * This is a multipathing probe packet that has 9579 * been delayed in ND resolution. Drop the 9580 * packet for the reasons mentioned in 9581 * nce_queue_mp() 9582 */ 9583 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9584 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9585 freemsg(first_mp); 9586 ill_refrele(ill); 9587 if (need_decref) 9588 CONN_DEC_REF(connp); 9589 return; 9590 } 9591 } 9592 } 9593 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9594 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9595 9596 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9597 if (secpolicy_net_rawaccess(cr) != 0) { 9598 /* 9599 * Use IPCL_ZONEID to honor SO_ALLZONES. 9600 */ 9601 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9602 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9603 NULL, connp != NULL ? 9604 IPCL_ZONEID(connp) : zoneid, NULL, 9605 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9606 if (ire == NULL) { 9607 if (do_outrequests) 9608 BUMP_MIB(mibptr, 9609 ipIfStatsHCOutRequests); 9610 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9611 ip1dbg(("ip_wput_v6: bad source " 9612 "addr\n")); 9613 freemsg(first_mp); 9614 if (ill != NULL) 9615 ill_refrele(ill); 9616 if (need_decref) 9617 CONN_DEC_REF(connp); 9618 return; 9619 } 9620 ire_refrele(ire); 9621 } 9622 /* No need to verify again when using ip_newroute */ 9623 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9624 } 9625 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9626 /* 9627 * Make sure they match since ip_newroute*_v6 etc might 9628 * (unknown to them) inspect ip6i_nexthop when 9629 * they think they access ip6_dst. 9630 */ 9631 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9632 } 9633 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9634 cksum_request = 1; 9635 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9636 cksum_request = ip6i->ip6i_checksum_off; 9637 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9638 unspec_src = 1; 9639 9640 if (do_outrequests && ill != NULL) { 9641 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9642 do_outrequests = B_FALSE; 9643 } 9644 /* 9645 * Store ip6i_t info that we need after we come back 9646 * from IPSEC processing. 9647 */ 9648 if (mctl_present) { 9649 ASSERT(io != NULL); 9650 io->ipsec_out_unspec_src = unspec_src; 9651 } 9652 } 9653 if (connp != NULL && connp->conn_dontroute) 9654 ip6h->ip6_hops = 1; 9655 9656 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9657 goto ipv6multicast; 9658 9659 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9660 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9661 ill_t *conn_outgoing_pill; 9662 9663 conn_outgoing_pill = conn_get_held_ill(connp, 9664 &connp->conn_outgoing_pill, &err); 9665 if (err == ILL_LOOKUP_FAILED) { 9666 if (ill != NULL) 9667 ill_refrele(ill); 9668 if (need_decref) 9669 CONN_DEC_REF(connp); 9670 freemsg(first_mp); 9671 return; 9672 } 9673 if (conn_outgoing_pill != NULL) { 9674 if (ill != NULL) 9675 ill_refrele(ill); 9676 ill = conn_outgoing_pill; 9677 attach_if = B_TRUE; 9678 match_flags = MATCH_IRE_ILL; 9679 mibptr = ill->ill_ip_mib; 9680 9681 /* 9682 * Check if we need an ire that will not be 9683 * looked up by anybody else i.e. HIDDEN. 9684 */ 9685 if (ill_is_probeonly(ill)) 9686 match_flags |= MATCH_IRE_MARK_HIDDEN; 9687 goto send_from_ill; 9688 } 9689 } 9690 9691 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9692 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9693 ill_t *conn_nofailover_ill; 9694 9695 conn_nofailover_ill = conn_get_held_ill(connp, 9696 &connp->conn_nofailover_ill, &err); 9697 if (err == ILL_LOOKUP_FAILED) { 9698 if (ill != NULL) 9699 ill_refrele(ill); 9700 if (need_decref) 9701 CONN_DEC_REF(connp); 9702 freemsg(first_mp); 9703 return; 9704 } 9705 if (conn_nofailover_ill != NULL) { 9706 if (ill != NULL) 9707 ill_refrele(ill); 9708 ill = conn_nofailover_ill; 9709 attach_if = B_TRUE; 9710 /* 9711 * Assumes that ipc_nofailover_ill is used only for 9712 * multipathing probe packets. These packets are better 9713 * dropped, if they are delayed in ND resolution, for 9714 * the reasons described in nce_queue_mp(). 9715 * IP6I_DROP_IFDELAYED will be set later on in this 9716 * function for this packet. 9717 */ 9718 drop_if_delayed = B_TRUE; 9719 match_flags = MATCH_IRE_ILL; 9720 mibptr = ill->ill_ip_mib; 9721 9722 /* 9723 * Check if we need an ire that will not be 9724 * looked up by anybody else i.e. HIDDEN. 9725 */ 9726 if (ill_is_probeonly(ill)) 9727 match_flags |= MATCH_IRE_MARK_HIDDEN; 9728 goto send_from_ill; 9729 } 9730 } 9731 9732 /* 9733 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9734 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9735 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9736 */ 9737 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9738 ASSERT(ip6i->ip6i_ifindex != 0); 9739 attach_if = B_TRUE; 9740 ASSERT(ill != NULL); 9741 match_flags = MATCH_IRE_ILL; 9742 9743 /* 9744 * Check if we need an ire that will not be 9745 * looked up by anybody else i.e. HIDDEN. 9746 */ 9747 if (ill_is_probeonly(ill)) 9748 match_flags |= MATCH_IRE_MARK_HIDDEN; 9749 goto send_from_ill; 9750 } 9751 9752 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9753 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9754 ASSERT(ill != NULL); 9755 goto send_from_ill; 9756 } 9757 9758 /* 9759 * 4. If q is an ill queue and (link local or multicast destination) 9760 * then use that ill. 9761 */ 9762 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9763 goto send_from_ill; 9764 } 9765 9766 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9767 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9768 ill_t *conn_outgoing_ill; 9769 9770 conn_outgoing_ill = conn_get_held_ill(connp, 9771 &connp->conn_outgoing_ill, &err); 9772 if (err == ILL_LOOKUP_FAILED) { 9773 if (ill != NULL) 9774 ill_refrele(ill); 9775 if (need_decref) 9776 CONN_DEC_REF(connp); 9777 freemsg(first_mp); 9778 return; 9779 } 9780 if (ill != NULL) 9781 ill_refrele(ill); 9782 ill = conn_outgoing_ill; 9783 mibptr = ill->ill_ip_mib; 9784 goto send_from_ill; 9785 } 9786 9787 /* 9788 * 6. For unicast: Just do an IRE lookup for the best match. 9789 * If we get here for a link-local address it is rather random 9790 * what interface we pick on a multihomed host. 9791 * *If* there is an IRE_CACHE (and the link-local address 9792 * isn't duplicated on multi links) this will find the IRE_CACHE. 9793 * Otherwise it will use one of the matching IRE_INTERFACE routes 9794 * for the link-local prefix. Hence, applications 9795 * *should* be encouraged to specify an outgoing interface when sending 9796 * to a link local address. 9797 */ 9798 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9799 !connp->conn_fully_bound)) { 9800 /* 9801 * We cache IRE_CACHEs to avoid lookups. We don't do 9802 * this for the tcp global queue and listen end point 9803 * as it does not really have a real destination to 9804 * talk to. 9805 */ 9806 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp), 9807 ipst); 9808 } else { 9809 /* 9810 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9811 * grab a lock here to check for CONDEMNED as it is okay 9812 * to send a packet or two with the IRE_CACHE that is going 9813 * away. 9814 */ 9815 mutex_enter(&connp->conn_lock); 9816 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9817 if (ire != NULL && 9818 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9819 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9820 9821 IRE_REFHOLD(ire); 9822 mutex_exit(&connp->conn_lock); 9823 9824 } else { 9825 boolean_t cached = B_FALSE; 9826 9827 connp->conn_ire_cache = NULL; 9828 mutex_exit(&connp->conn_lock); 9829 /* Release the old ire */ 9830 if (ire != NULL && sctp_ire == NULL) 9831 IRE_REFRELE_NOTR(ire); 9832 9833 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9834 MBLK_GETLABEL(mp), ipst); 9835 if (ire != NULL) { 9836 IRE_REFHOLD_NOTR(ire); 9837 9838 mutex_enter(&connp->conn_lock); 9839 if (CONN_CACHE_IRE(connp) && 9840 (connp->conn_ire_cache == NULL)) { 9841 rw_enter(&ire->ire_bucket->irb_lock, 9842 RW_READER); 9843 if (!(ire->ire_marks & 9844 IRE_MARK_CONDEMNED)) { 9845 connp->conn_ire_cache = ire; 9846 cached = B_TRUE; 9847 } 9848 rw_exit(&ire->ire_bucket->irb_lock); 9849 } 9850 mutex_exit(&connp->conn_lock); 9851 9852 /* 9853 * We can continue to use the ire but since it 9854 * was not cached, we should drop the extra 9855 * reference. 9856 */ 9857 if (!cached) 9858 IRE_REFRELE_NOTR(ire); 9859 } 9860 } 9861 } 9862 9863 if (ire != NULL) { 9864 if (do_outrequests) { 9865 /* Handle IRE_LOCAL's that might appear here */ 9866 if (ire->ire_type == IRE_CACHE) { 9867 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9868 ill_ip_mib; 9869 } else { 9870 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9871 } 9872 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9873 } 9874 ASSERT(!attach_if); 9875 9876 /* 9877 * Check if the ire has the RTF_MULTIRT flag, inherited 9878 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9879 */ 9880 if (ire->ire_flags & RTF_MULTIRT) { 9881 /* 9882 * Force hop limit of multirouted packets if required. 9883 * The hop limit of such packets is bounded by the 9884 * ip_multirt_ttl ndd variable. 9885 * NDP packets must have a hop limit of 255; don't 9886 * change the hop limit in that case. 9887 */ 9888 if ((ipst->ips_ip_multirt_ttl > 0) && 9889 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9890 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9891 if (ip_debug > 3) { 9892 ip2dbg(("ip_wput_v6: forcing multirt " 9893 "hop limit to %d (was %d) ", 9894 ipst->ips_ip_multirt_ttl, 9895 ip6h->ip6_hops)); 9896 pr_addr_dbg("v6dst %s\n", AF_INET6, 9897 &ire->ire_addr_v6); 9898 } 9899 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9900 } 9901 9902 /* 9903 * We look at this point if there are pending 9904 * unresolved routes. ire_multirt_need_resolve_v6() 9905 * checks in O(n) that all IRE_OFFSUBNET ire 9906 * entries for the packet's destination and 9907 * flagged RTF_MULTIRT are currently resolved. 9908 * If some remain unresolved, we do a copy 9909 * of the current message. It will be used 9910 * to initiate additional route resolutions. 9911 */ 9912 multirt_need_resolve = 9913 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9914 MBLK_GETLABEL(first_mp), ipst); 9915 ip2dbg(("ip_wput_v6: ire %p, " 9916 "multirt_need_resolve %d, first_mp %p\n", 9917 (void *)ire, multirt_need_resolve, 9918 (void *)first_mp)); 9919 if (multirt_need_resolve) { 9920 copy_mp = copymsg(first_mp); 9921 if (copy_mp != NULL) { 9922 MULTIRT_DEBUG_TAG(copy_mp); 9923 } 9924 } 9925 } 9926 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9927 connp, caller, 0, ip6i_flags, zoneid); 9928 if (need_decref) { 9929 CONN_DEC_REF(connp); 9930 connp = NULL; 9931 } 9932 IRE_REFRELE(ire); 9933 9934 /* 9935 * Try to resolve another multiroute if 9936 * ire_multirt_need_resolve_v6() deemed it necessary. 9937 * copy_mp will be consumed (sent or freed) by 9938 * ip_newroute_v6(). 9939 */ 9940 if (copy_mp != NULL) { 9941 if (mctl_present) { 9942 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9943 } else { 9944 ip6h = (ip6_t *)copy_mp->b_rptr; 9945 } 9946 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9947 &ip6h->ip6_src, NULL, zoneid, ipst); 9948 } 9949 if (ill != NULL) 9950 ill_refrele(ill); 9951 return; 9952 } 9953 9954 /* 9955 * No full IRE for this destination. Send it to 9956 * ip_newroute_v6 to see if anything else matches. 9957 * Mark this packet as having originated on this 9958 * machine. 9959 * Update rptr if there was an ip6i_t header. 9960 */ 9961 mp->b_prev = NULL; 9962 mp->b_next = NULL; 9963 if (ip6i != NULL) 9964 mp->b_rptr -= sizeof (ip6i_t); 9965 9966 if (unspec_src) { 9967 if (ip6i == NULL) { 9968 /* 9969 * Add ip6i_t header to carry unspec_src 9970 * until the packet comes back in ip_wput_v6. 9971 */ 9972 mp = ip_add_info_v6(mp, NULL, v6dstp); 9973 if (mp == NULL) { 9974 if (do_outrequests) 9975 BUMP_MIB(mibptr, 9976 ipIfStatsHCOutRequests); 9977 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9978 if (mctl_present) 9979 freeb(first_mp); 9980 if (ill != NULL) 9981 ill_refrele(ill); 9982 if (need_decref) 9983 CONN_DEC_REF(connp); 9984 return; 9985 } 9986 ip6i = (ip6i_t *)mp->b_rptr; 9987 9988 if (mctl_present) { 9989 ASSERT(first_mp != mp); 9990 first_mp->b_cont = mp; 9991 } else { 9992 first_mp = mp; 9993 } 9994 9995 if ((mp->b_wptr - (uchar_t *)ip6i) == 9996 sizeof (ip6i_t)) { 9997 /* 9998 * ndp_resolver called from ip_newroute_v6 9999 * expects pulled up message. 10000 */ 10001 if (!pullupmsg(mp, -1)) { 10002 ip1dbg(("ip_wput_v6: pullupmsg" 10003 " failed\n")); 10004 if (do_outrequests) { 10005 BUMP_MIB(mibptr, 10006 ipIfStatsHCOutRequests); 10007 } 10008 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10009 freemsg(first_mp); 10010 if (ill != NULL) 10011 ill_refrele(ill); 10012 if (need_decref) 10013 CONN_DEC_REF(connp); 10014 return; 10015 } 10016 ip6i = (ip6i_t *)mp->b_rptr; 10017 } 10018 ip6h = (ip6_t *)&ip6i[1]; 10019 v6dstp = &ip6h->ip6_dst; 10020 } 10021 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10022 if (mctl_present) { 10023 ASSERT(io != NULL); 10024 io->ipsec_out_unspec_src = unspec_src; 10025 } 10026 } 10027 if (do_outrequests) 10028 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10029 if (need_decref) 10030 CONN_DEC_REF(connp); 10031 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 10032 if (ill != NULL) 10033 ill_refrele(ill); 10034 return; 10035 10036 10037 /* 10038 * Handle multicast packets with or without an conn. 10039 * Assumes that the transports set ip6_hops taking 10040 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10041 * into account. 10042 */ 10043 ipv6multicast: 10044 ip2dbg(("ip_wput_v6: multicast\n")); 10045 10046 /* 10047 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10048 * 2. If conn_nofailover_ill is set then use that ill. 10049 * 10050 * Hold the conn_lock till we refhold the ill of interest that is 10051 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10052 * while holding any locks, postpone the refrele until after the 10053 * conn_lock is dropped. 10054 */ 10055 if (connp != NULL) { 10056 mutex_enter(&connp->conn_lock); 10057 conn_lock_held = B_TRUE; 10058 } else { 10059 conn_lock_held = B_FALSE; 10060 } 10061 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10062 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10063 if (err == ILL_LOOKUP_FAILED) { 10064 ip1dbg(("ip_output_v6: multicast" 10065 " conn_outgoing_pill no ipif\n")); 10066 multicast_discard: 10067 ASSERT(saved_ill == NULL); 10068 if (conn_lock_held) 10069 mutex_exit(&connp->conn_lock); 10070 if (ill != NULL) 10071 ill_refrele(ill); 10072 freemsg(first_mp); 10073 if (do_outrequests) 10074 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10075 if (need_decref) 10076 CONN_DEC_REF(connp); 10077 return; 10078 } 10079 saved_ill = ill; 10080 ill = connp->conn_outgoing_pill; 10081 attach_if = B_TRUE; 10082 match_flags = MATCH_IRE_ILL; 10083 mibptr = ill->ill_ip_mib; 10084 10085 /* 10086 * Check if we need an ire that will not be 10087 * looked up by anybody else i.e. HIDDEN. 10088 */ 10089 if (ill_is_probeonly(ill)) 10090 match_flags |= MATCH_IRE_MARK_HIDDEN; 10091 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10092 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10093 if (err == ILL_LOOKUP_FAILED) { 10094 ip1dbg(("ip_output_v6: multicast" 10095 " conn_nofailover_ill no ipif\n")); 10096 goto multicast_discard; 10097 } 10098 saved_ill = ill; 10099 ill = connp->conn_nofailover_ill; 10100 attach_if = B_TRUE; 10101 match_flags = MATCH_IRE_ILL; 10102 10103 /* 10104 * Check if we need an ire that will not be 10105 * looked up by anybody else i.e. HIDDEN. 10106 */ 10107 if (ill_is_probeonly(ill)) 10108 match_flags |= MATCH_IRE_MARK_HIDDEN; 10109 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10110 /* 10111 * Redo 1. If we did not find an IRE_CACHE the first time, 10112 * we should have an ip6i_t with IP6I_ATTACH_IF if 10113 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10114 * used on this endpoint. 10115 */ 10116 ASSERT(ip6i->ip6i_ifindex != 0); 10117 attach_if = B_TRUE; 10118 ASSERT(ill != NULL); 10119 match_flags = MATCH_IRE_ILL; 10120 10121 /* 10122 * Check if we need an ire that will not be 10123 * looked up by anybody else i.e. HIDDEN. 10124 */ 10125 if (ill_is_probeonly(ill)) 10126 match_flags |= MATCH_IRE_MARK_HIDDEN; 10127 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10128 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10129 10130 ASSERT(ill != NULL); 10131 } else if (ill != NULL) { 10132 /* 10133 * 4. If q is an ill queue and (link local or multicast 10134 * destination) then use that ill. 10135 * We don't need the ipif initialization here. 10136 * This useless assert below is just to prevent lint from 10137 * reporting a null body if statement. 10138 */ 10139 ASSERT(ill != NULL); 10140 } else if (connp != NULL) { 10141 /* 10142 * 5. If IPV6_BOUND_IF has been set use that ill. 10143 * 10144 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10145 * Otherwise look for the best IRE match for the unspecified 10146 * group to determine the ill. 10147 * 10148 * conn_multicast_ill is used for only IPv6 packets. 10149 * conn_multicast_ipif is used for only IPv4 packets. 10150 * Thus a PF_INET6 socket send both IPv4 and IPv6 10151 * multicast packets using different IP*_MULTICAST_IF 10152 * interfaces. 10153 */ 10154 if (connp->conn_outgoing_ill != NULL) { 10155 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10156 if (err == ILL_LOOKUP_FAILED) { 10157 ip1dbg(("ip_output_v6: multicast" 10158 " conn_outgoing_ill no ipif\n")); 10159 goto multicast_discard; 10160 } 10161 ill = connp->conn_outgoing_ill; 10162 } else if (connp->conn_multicast_ill != NULL) { 10163 err = ill_check_and_refhold(connp->conn_multicast_ill); 10164 if (err == ILL_LOOKUP_FAILED) { 10165 ip1dbg(("ip_output_v6: multicast" 10166 " conn_multicast_ill no ipif\n")); 10167 goto multicast_discard; 10168 } 10169 ill = connp->conn_multicast_ill; 10170 } else { 10171 mutex_exit(&connp->conn_lock); 10172 conn_lock_held = B_FALSE; 10173 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 10174 if (ipif == NULL) { 10175 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10176 goto multicast_discard; 10177 } 10178 /* 10179 * We have a ref to this ipif, so we can safely 10180 * access ipif_ill. 10181 */ 10182 ill = ipif->ipif_ill; 10183 mutex_enter(&ill->ill_lock); 10184 if (!ILL_CAN_LOOKUP(ill)) { 10185 mutex_exit(&ill->ill_lock); 10186 ipif_refrele(ipif); 10187 ill = NULL; 10188 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10189 goto multicast_discard; 10190 } 10191 ill_refhold_locked(ill); 10192 mutex_exit(&ill->ill_lock); 10193 ipif_refrele(ipif); 10194 /* 10195 * Save binding until IPV6_MULTICAST_IF 10196 * changes it 10197 */ 10198 mutex_enter(&connp->conn_lock); 10199 connp->conn_multicast_ill = ill; 10200 connp->conn_orig_multicast_ifindex = 10201 ill->ill_phyint->phyint_ifindex; 10202 mutex_exit(&connp->conn_lock); 10203 } 10204 } 10205 if (conn_lock_held) 10206 mutex_exit(&connp->conn_lock); 10207 10208 if (saved_ill != NULL) 10209 ill_refrele(saved_ill); 10210 10211 ASSERT(ill != NULL); 10212 /* 10213 * For multicast loopback interfaces replace the multicast address 10214 * with a unicast address for the ire lookup. 10215 */ 10216 if (IS_LOOPBACK(ill)) 10217 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10218 10219 mibptr = ill->ill_ip_mib; 10220 if (do_outrequests) { 10221 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10222 do_outrequests = B_FALSE; 10223 } 10224 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10225 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10226 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10227 10228 /* 10229 * As we may lose the conn by the time we reach ip_wput_ire_v6 10230 * we copy conn_multicast_loop and conn_dontroute on to an 10231 * ipsec_out. In case if this datagram goes out secure, 10232 * we need the ill_index also. Copy that also into the 10233 * ipsec_out. 10234 */ 10235 if (mctl_present) { 10236 io = (ipsec_out_t *)first_mp->b_rptr; 10237 ASSERT(first_mp->b_datap->db_type == M_CTL); 10238 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10239 } else { 10240 ASSERT(mp == first_mp); 10241 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 10242 NULL) { 10243 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10244 freemsg(mp); 10245 if (ill != NULL) 10246 ill_refrele(ill); 10247 if (need_decref) 10248 CONN_DEC_REF(connp); 10249 return; 10250 } 10251 io = (ipsec_out_t *)first_mp->b_rptr; 10252 /* This is not a secure packet */ 10253 io->ipsec_out_secure = B_FALSE; 10254 io->ipsec_out_use_global_policy = B_TRUE; 10255 io->ipsec_out_zoneid = 10256 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10257 first_mp->b_cont = mp; 10258 mctl_present = B_TRUE; 10259 } 10260 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10261 io->ipsec_out_unspec_src = unspec_src; 10262 if (connp != NULL) 10263 io->ipsec_out_dontroute = connp->conn_dontroute; 10264 10265 send_from_ill: 10266 ASSERT(ill != NULL); 10267 ASSERT(mibptr == ill->ill_ip_mib); 10268 if (do_outrequests) { 10269 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10270 do_outrequests = B_FALSE; 10271 } 10272 10273 if (io != NULL) 10274 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10275 10276 /* 10277 * When a specific ill is specified (using IPV6_PKTINFO, 10278 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10279 * on routing entries (ftable and ctable) that have a matching 10280 * ire->ire_ipif->ipif_ill. Thus this can only be used 10281 * for destinations that are on-link for the specific ill 10282 * and that can appear on multiple links. Thus it is useful 10283 * for multicast destinations, link-local destinations, and 10284 * at some point perhaps for site-local destinations (if the 10285 * node sits at a site boundary). 10286 * We create the cache entries in the regular ctable since 10287 * it can not "confuse" things for other destinations. 10288 * table. 10289 * 10290 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10291 * It is used only when ire_cache_lookup is used above. 10292 */ 10293 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10294 zoneid, MBLK_GETLABEL(mp), match_flags, ipst); 10295 if (ire != NULL) { 10296 /* 10297 * Check if the ire has the RTF_MULTIRT flag, inherited 10298 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10299 */ 10300 if (ire->ire_flags & RTF_MULTIRT) { 10301 /* 10302 * Force hop limit of multirouted packets if required. 10303 * The hop limit of such packets is bounded by the 10304 * ip_multirt_ttl ndd variable. 10305 * NDP packets must have a hop limit of 255; don't 10306 * change the hop limit in that case. 10307 */ 10308 if ((ipst->ips_ip_multirt_ttl > 0) && 10309 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10310 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10311 if (ip_debug > 3) { 10312 ip2dbg(("ip_wput_v6: forcing multirt " 10313 "hop limit to %d (was %d) ", 10314 ipst->ips_ip_multirt_ttl, 10315 ip6h->ip6_hops)); 10316 pr_addr_dbg("v6dst %s\n", AF_INET6, 10317 &ire->ire_addr_v6); 10318 } 10319 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10320 } 10321 10322 /* 10323 * We look at this point if there are pending 10324 * unresolved routes. ire_multirt_need_resolve_v6() 10325 * checks in O(n) that all IRE_OFFSUBNET ire 10326 * entries for the packet's destination and 10327 * flagged RTF_MULTIRT are currently resolved. 10328 * If some remain unresolved, we make a copy 10329 * of the current message. It will be used 10330 * to initiate additional route resolutions. 10331 */ 10332 multirt_need_resolve = 10333 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10334 MBLK_GETLABEL(first_mp), ipst); 10335 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10336 "multirt_need_resolve %d, first_mp %p\n", 10337 (void *)ire, multirt_need_resolve, 10338 (void *)first_mp)); 10339 if (multirt_need_resolve) { 10340 copy_mp = copymsg(first_mp); 10341 if (copy_mp != NULL) { 10342 MULTIRT_DEBUG_TAG(copy_mp); 10343 } 10344 } 10345 } 10346 10347 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10348 ill->ill_name, (void *)ire, 10349 ill->ill_phyint->phyint_ifindex)); 10350 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10351 connp, caller, 10352 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10353 ip6i_flags, zoneid); 10354 ire_refrele(ire); 10355 if (need_decref) { 10356 CONN_DEC_REF(connp); 10357 connp = NULL; 10358 } 10359 10360 /* 10361 * Try to resolve another multiroute if 10362 * ire_multirt_need_resolve_v6() deemed it necessary. 10363 * copy_mp will be consumed (sent or freed) by 10364 * ip_newroute_[ipif_]v6(). 10365 */ 10366 if (copy_mp != NULL) { 10367 if (mctl_present) { 10368 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10369 } else { 10370 ip6h = (ip6_t *)copy_mp->b_rptr; 10371 } 10372 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10373 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10374 zoneid, ipst); 10375 if (ipif == NULL) { 10376 ip1dbg(("ip_wput_v6: No ipif for " 10377 "multicast\n")); 10378 MULTIRT_DEBUG_UNTAG(copy_mp); 10379 freemsg(copy_mp); 10380 return; 10381 } 10382 ip_newroute_ipif_v6(q, copy_mp, ipif, 10383 ip6h->ip6_dst, unspec_src, zoneid); 10384 ipif_refrele(ipif); 10385 } else { 10386 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10387 &ip6h->ip6_src, ill, zoneid, ipst); 10388 } 10389 } 10390 ill_refrele(ill); 10391 return; 10392 } 10393 if (need_decref) { 10394 CONN_DEC_REF(connp); 10395 connp = NULL; 10396 } 10397 10398 /* Update rptr if there was an ip6i_t header. */ 10399 if (ip6i != NULL) 10400 mp->b_rptr -= sizeof (ip6i_t); 10401 if (unspec_src || attach_if) { 10402 if (ip6i == NULL) { 10403 /* 10404 * Add ip6i_t header to carry unspec_src 10405 * or attach_if until the packet comes back in 10406 * ip_wput_v6. 10407 */ 10408 if (mctl_present) { 10409 first_mp->b_cont = 10410 ip_add_info_v6(mp, NULL, v6dstp); 10411 mp = first_mp->b_cont; 10412 if (mp == NULL) 10413 freeb(first_mp); 10414 } else { 10415 first_mp = mp = ip_add_info_v6(mp, NULL, 10416 v6dstp); 10417 } 10418 if (mp == NULL) { 10419 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10420 ill_refrele(ill); 10421 return; 10422 } 10423 ip6i = (ip6i_t *)mp->b_rptr; 10424 if ((mp->b_wptr - (uchar_t *)ip6i) == 10425 sizeof (ip6i_t)) { 10426 /* 10427 * ndp_resolver called from ip_newroute_v6 10428 * expects a pulled up message. 10429 */ 10430 if (!pullupmsg(mp, -1)) { 10431 ip1dbg(("ip_wput_v6: pullupmsg" 10432 " failed\n")); 10433 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10434 freemsg(first_mp); 10435 return; 10436 } 10437 ip6i = (ip6i_t *)mp->b_rptr; 10438 } 10439 ip6h = (ip6_t *)&ip6i[1]; 10440 v6dstp = &ip6h->ip6_dst; 10441 } 10442 if (unspec_src) 10443 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10444 if (attach_if) { 10445 /* 10446 * Bind to nofailover/BOUND_PIF overrides ifindex. 10447 */ 10448 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10449 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10450 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10451 if (drop_if_delayed) { 10452 /* This is a multipathing probe packet */ 10453 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10454 } 10455 } 10456 if (mctl_present) { 10457 ASSERT(io != NULL); 10458 io->ipsec_out_unspec_src = unspec_src; 10459 } 10460 } 10461 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10462 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10463 unspec_src, zoneid); 10464 } else { 10465 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10466 zoneid, ipst); 10467 } 10468 ill_refrele(ill); 10469 return; 10470 10471 notv6: 10472 /* 10473 * XXX implement a IPv4 and IPv6 packet counter per conn and 10474 * switch when ratio exceeds e.g. 10:1 10475 */ 10476 if (q->q_next == NULL) { 10477 connp = Q_TO_CONN(q); 10478 10479 if (IPCL_IS_TCP(connp)) { 10480 /* change conn_send for the tcp_v4_connections */ 10481 connp->conn_send = ip_output; 10482 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10483 /* The 'q' is the default SCTP queue */ 10484 connp = (conn_t *)arg; 10485 } else { 10486 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE, ipst); 10487 } 10488 } 10489 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10490 (void) ip_output(arg, first_mp, arg2, caller); 10491 if (ill != NULL) 10492 ill_refrele(ill); 10493 } 10494 10495 /* 10496 * If this is a conn_t queue, then we pass in the conn. This includes the 10497 * zoneid. 10498 * Otherwise, this is a message for an ill_t queue, 10499 * in which case we use the global zoneid since those are all part of 10500 * the global zone. 10501 */ 10502 static void 10503 ip_wput_v6(queue_t *q, mblk_t *mp) 10504 { 10505 if (CONN_Q(q)) 10506 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10507 else 10508 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10509 } 10510 10511 static void 10512 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10513 { 10514 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10515 io->ipsec_out_attach_if = B_TRUE; 10516 io->ipsec_out_ill_index = attach_index; 10517 } 10518 10519 /* 10520 * NULL send-to queue - packet is to be delivered locally. 10521 */ 10522 void 10523 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10524 ire_t *ire, int fanout_flags) 10525 { 10526 uint32_t ports; 10527 mblk_t *mp = first_mp, *first_mp1; 10528 boolean_t mctl_present; 10529 uint8_t nexthdr; 10530 uint16_t hdr_length; 10531 ipsec_out_t *io; 10532 mib2_ipIfStatsEntry_t *mibptr; 10533 ilm_t *ilm; 10534 uint_t nexthdr_offset; 10535 ip_stack_t *ipst = ill->ill_ipst; 10536 10537 if (DB_TYPE(mp) == M_CTL) { 10538 io = (ipsec_out_t *)mp->b_rptr; 10539 if (!io->ipsec_out_secure) { 10540 mp = mp->b_cont; 10541 freeb(first_mp); 10542 first_mp = mp; 10543 mctl_present = B_FALSE; 10544 } else { 10545 mctl_present = B_TRUE; 10546 mp = first_mp->b_cont; 10547 ipsec_out_to_in(first_mp); 10548 } 10549 } else { 10550 mctl_present = B_FALSE; 10551 } 10552 10553 /* 10554 * Remove reachability confirmation bit from version field 10555 * before passing the packet on to any firewall hooks or 10556 * looping back the packet. 10557 */ 10558 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10559 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10560 10561 DTRACE_PROBE4(ip6__loopback__in__start, 10562 ill_t *, ill, ill_t *, NULL, 10563 ip6_t *, ip6h, mblk_t *, first_mp); 10564 10565 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10566 ipst->ips_ipv6firewall_loopback_in, 10567 ill, NULL, ip6h, first_mp, mp, ipst); 10568 10569 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10570 10571 if (first_mp == NULL) 10572 return; 10573 10574 nexthdr = ip6h->ip6_nxt; 10575 mibptr = ill->ill_ip_mib; 10576 10577 /* Fastpath */ 10578 switch (nexthdr) { 10579 case IPPROTO_TCP: 10580 case IPPROTO_UDP: 10581 case IPPROTO_ICMPV6: 10582 case IPPROTO_SCTP: 10583 hdr_length = IPV6_HDR_LEN; 10584 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10585 (uchar_t *)ip6h); 10586 break; 10587 default: { 10588 uint8_t *nexthdrp; 10589 10590 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10591 &hdr_length, &nexthdrp)) { 10592 /* Malformed packet */ 10593 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10594 freemsg(first_mp); 10595 return; 10596 } 10597 nexthdr = *nexthdrp; 10598 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10599 break; 10600 } 10601 } 10602 10603 UPDATE_OB_PKT_COUNT(ire); 10604 ire->ire_last_used_time = lbolt; 10605 10606 switch (nexthdr) { 10607 case IPPROTO_TCP: 10608 if (DB_TYPE(mp) == M_DATA) { 10609 /* 10610 * M_DATA mblk, so init mblk (chain) for 10611 * no struio(). 10612 */ 10613 mblk_t *mp1 = mp; 10614 10615 do { 10616 mp1->b_datap->db_struioflag = 0; 10617 } while ((mp1 = mp1->b_cont) != NULL); 10618 } 10619 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10620 TCP_PORTS_OFFSET); 10621 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10622 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10623 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10624 hdr_length, mctl_present, ire->ire_zoneid); 10625 return; 10626 10627 case IPPROTO_UDP: 10628 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10629 UDP_PORTS_OFFSET); 10630 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10631 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10632 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10633 return; 10634 10635 case IPPROTO_SCTP: 10636 { 10637 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10638 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10639 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10640 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10641 return; 10642 } 10643 case IPPROTO_ICMPV6: { 10644 icmp6_t *icmp6; 10645 10646 /* check for full IPv6+ICMPv6 header */ 10647 if ((mp->b_wptr - mp->b_rptr) < 10648 (hdr_length + ICMP6_MINLEN)) { 10649 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10650 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10651 " failed\n")); 10652 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10653 freemsg(first_mp); 10654 return; 10655 } 10656 ip6h = (ip6_t *)mp->b_rptr; 10657 } 10658 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10659 10660 /* Update output mib stats */ 10661 icmp_update_out_mib_v6(ill, icmp6); 10662 10663 /* Check variable for testing applications */ 10664 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10665 freemsg(first_mp); 10666 return; 10667 } 10668 /* 10669 * Assume that there is always at least one conn for 10670 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10671 * where there is no conn. 10672 */ 10673 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10674 !IS_LOOPBACK(ill)) { 10675 /* 10676 * In the multicast case, applications may have 10677 * joined the group from different zones, so we 10678 * need to deliver the packet to each of them. 10679 * Loop through the multicast memberships 10680 * structures (ilm) on the receive ill and send 10681 * a copy of the packet up each matching one. 10682 * However, we don't do this for multicasts sent 10683 * on the loopback interface (PHYI_LOOPBACK flag 10684 * set) as they must stay in the sender's zone. 10685 */ 10686 ILM_WALKER_HOLD(ill); 10687 for (ilm = ill->ill_ilm; ilm != NULL; 10688 ilm = ilm->ilm_next) { 10689 if (ilm->ilm_flags & ILM_DELETED) 10690 continue; 10691 if (!IN6_ARE_ADDR_EQUAL( 10692 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10693 continue; 10694 if ((fanout_flags & 10695 IP_FF_NO_MCAST_LOOP) && 10696 ilm->ilm_zoneid == ire->ire_zoneid) 10697 continue; 10698 if (!ipif_lookup_zoneid(ill, 10699 ilm->ilm_zoneid, IPIF_UP, NULL)) 10700 continue; 10701 10702 first_mp1 = ip_copymsg(first_mp); 10703 if (first_mp1 == NULL) 10704 continue; 10705 icmp_inbound_v6(q, first_mp1, ill, 10706 hdr_length, mctl_present, 10707 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10708 NULL); 10709 } 10710 ILM_WALKER_RELE(ill); 10711 } else { 10712 first_mp1 = ip_copymsg(first_mp); 10713 if (first_mp1 != NULL) 10714 icmp_inbound_v6(q, first_mp1, ill, 10715 hdr_length, mctl_present, 10716 IP6_NO_IPPOLICY, ire->ire_zoneid, 10717 NULL); 10718 } 10719 } 10720 /* FALLTHRU */ 10721 default: { 10722 /* 10723 * Handle protocols with which IPv6 is less intimate. 10724 */ 10725 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10726 10727 /* 10728 * Enable sending ICMP for "Unknown" nexthdr 10729 * case. i.e. where we did not FALLTHRU from 10730 * IPPROTO_ICMPV6 processing case above. 10731 */ 10732 if (nexthdr != IPPROTO_ICMPV6) 10733 fanout_flags |= IP_FF_SEND_ICMP; 10734 /* 10735 * Note: There can be more than one stream bound 10736 * to a particular protocol. When this is the case, 10737 * each one gets a copy of any incoming packets. 10738 */ 10739 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10740 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10741 mctl_present, ire->ire_zoneid); 10742 return; 10743 } 10744 } 10745 } 10746 10747 /* 10748 * Send packet using IRE. 10749 * Checksumming is controlled by cksum_request: 10750 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10751 * 1 => Skip TCP/UDP/SCTP checksum 10752 * Otherwise => checksum_request contains insert offset for checksum 10753 * 10754 * Assumes that the following set of headers appear in the first 10755 * mblk: 10756 * ip6_t 10757 * Any extension headers 10758 * TCP/UDP/SCTP header (if present) 10759 * The routine can handle an ICMPv6 header that is not in the first mblk. 10760 * 10761 * NOTE : This function does not ire_refrele the ire passed in as the 10762 * argument unlike ip_wput_ire where the REFRELE is done. 10763 * Refer to ip_wput_ire for more on this. 10764 */ 10765 static void 10766 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10767 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10768 zoneid_t zoneid) 10769 { 10770 ip6_t *ip6h; 10771 uint8_t nexthdr; 10772 uint16_t hdr_length; 10773 uint_t reachable = 0x0; 10774 ill_t *ill; 10775 mib2_ipIfStatsEntry_t *mibptr; 10776 mblk_t *first_mp; 10777 boolean_t mctl_present; 10778 ipsec_out_t *io; 10779 boolean_t conn_dontroute; /* conn value for multicast */ 10780 boolean_t conn_multicast_loop; /* conn value for multicast */ 10781 boolean_t multicast_forward; /* Should we forward ? */ 10782 int max_frag; 10783 ip_stack_t *ipst = ire->ire_ipst; 10784 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10785 10786 ill = ire_to_ill(ire); 10787 first_mp = mp; 10788 multicast_forward = B_FALSE; 10789 10790 if (mp->b_datap->db_type != M_CTL) { 10791 ip6h = (ip6_t *)first_mp->b_rptr; 10792 } else { 10793 io = (ipsec_out_t *)first_mp->b_rptr; 10794 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10795 /* 10796 * Grab the zone id now because the M_CTL can be discarded by 10797 * ip_wput_ire_parse_ipsec_out() below. 10798 */ 10799 ASSERT(zoneid == io->ipsec_out_zoneid); 10800 ASSERT(zoneid != ALL_ZONES); 10801 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10802 /* 10803 * For the multicast case, ipsec_out carries conn_dontroute and 10804 * conn_multicast_loop as conn may not be available here. We 10805 * need this for multicast loopback and forwarding which is done 10806 * later in the code. 10807 */ 10808 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10809 conn_dontroute = io->ipsec_out_dontroute; 10810 conn_multicast_loop = io->ipsec_out_multicast_loop; 10811 /* 10812 * If conn_dontroute is not set or conn_multicast_loop 10813 * is set, we need to do forwarding/loopback. For 10814 * datagrams from ip_wput_multicast, conn_dontroute is 10815 * set to B_TRUE and conn_multicast_loop is set to 10816 * B_FALSE so that we neither do forwarding nor 10817 * loopback. 10818 */ 10819 if (!conn_dontroute || conn_multicast_loop) 10820 multicast_forward = B_TRUE; 10821 } 10822 } 10823 10824 /* 10825 * If the sender didn't supply the hop limit and there is a default 10826 * unicast hop limit associated with the output interface, we use 10827 * that if the packet is unicast. Interface specific unicast hop 10828 * limits as set via the SIOCSLIFLNKINFO ioctl. 10829 */ 10830 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10831 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10832 ip6h->ip6_hops = ill->ill_max_hops; 10833 } 10834 10835 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10836 ire->ire_zoneid != ALL_ZONES) { 10837 /* 10838 * When a zone sends a packet to another zone, we try to deliver 10839 * the packet under the same conditions as if the destination 10840 * was a real node on the network. To do so, we look for a 10841 * matching route in the forwarding table. 10842 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10843 * ip_newroute_v6() does. 10844 * Note that IRE_LOCAL are special, since they are used 10845 * when the zoneid doesn't match in some cases. This means that 10846 * we need to handle ipha_src differently since ire_src_addr 10847 * belongs to the receiving zone instead of the sending zone. 10848 * When ip_restrict_interzone_loopback is set, then 10849 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10850 * for loopback between zones when the logical "Ethernet" would 10851 * have looped them back. 10852 */ 10853 ire_t *src_ire; 10854 10855 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10856 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10857 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10858 if (src_ire != NULL && 10859 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10860 (!ipst->ips_ip_restrict_interzone_loopback || 10861 ire_local_same_ill_group(ire, src_ire))) { 10862 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10863 !unspec_src) { 10864 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10865 } 10866 ire_refrele(src_ire); 10867 } else { 10868 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10869 if (src_ire != NULL) { 10870 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10871 ire_refrele(src_ire); 10872 freemsg(first_mp); 10873 return; 10874 } 10875 ire_refrele(src_ire); 10876 } 10877 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10878 /* Failed */ 10879 freemsg(first_mp); 10880 return; 10881 } 10882 icmp_unreachable_v6(q, first_mp, 10883 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10884 zoneid, ipst); 10885 return; 10886 } 10887 } 10888 10889 if (mp->b_datap->db_type == M_CTL || 10890 ipss->ipsec_outbound_v6_policy_present) { 10891 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10892 connp, unspec_src, zoneid); 10893 if (mp == NULL) { 10894 return; 10895 } 10896 } 10897 10898 first_mp = mp; 10899 if (mp->b_datap->db_type == M_CTL) { 10900 io = (ipsec_out_t *)mp->b_rptr; 10901 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10902 mp = mp->b_cont; 10903 mctl_present = B_TRUE; 10904 } else { 10905 mctl_present = B_FALSE; 10906 } 10907 10908 ip6h = (ip6_t *)mp->b_rptr; 10909 nexthdr = ip6h->ip6_nxt; 10910 mibptr = ill->ill_ip_mib; 10911 10912 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10913 ipif_t *ipif; 10914 10915 /* 10916 * Select the source address using ipif_select_source_v6. 10917 */ 10918 if (attach_index != 0) { 10919 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10920 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10921 } else { 10922 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10923 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10924 } 10925 if (ipif == NULL) { 10926 if (ip_debug > 2) { 10927 /* ip1dbg */ 10928 pr_addr_dbg("ip_wput_ire_v6: no src for " 10929 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10930 printf("ip_wput_ire_v6: interface name %s\n", 10931 ill->ill_name); 10932 } 10933 freemsg(first_mp); 10934 return; 10935 } 10936 ip6h->ip6_src = ipif->ipif_v6src_addr; 10937 ipif_refrele(ipif); 10938 } 10939 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10940 if ((connp != NULL && connp->conn_multicast_loop) || 10941 !IS_LOOPBACK(ill)) { 10942 ilm_t *ilm; 10943 10944 ILM_WALKER_HOLD(ill); 10945 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10946 ILM_WALKER_RELE(ill); 10947 if (ilm != NULL) { 10948 mblk_t *nmp; 10949 int fanout_flags = 0; 10950 10951 if (connp != NULL && 10952 !connp->conn_multicast_loop) { 10953 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10954 } 10955 ip1dbg(("ip_wput_ire_v6: " 10956 "Loopback multicast\n")); 10957 nmp = ip_copymsg(first_mp); 10958 if (nmp != NULL) { 10959 ip6_t *nip6h; 10960 mblk_t *mp_ip6h; 10961 10962 if (mctl_present) { 10963 nip6h = (ip6_t *) 10964 nmp->b_cont->b_rptr; 10965 mp_ip6h = nmp->b_cont; 10966 } else { 10967 nip6h = (ip6_t *)nmp->b_rptr; 10968 mp_ip6h = nmp; 10969 } 10970 10971 DTRACE_PROBE4( 10972 ip6__loopback__out__start, 10973 ill_t *, NULL, 10974 ill_t *, ill, 10975 ip6_t *, nip6h, 10976 mblk_t *, nmp); 10977 10978 FW_HOOKS6( 10979 ipst->ips_ip6_loopback_out_event, 10980 ipst->ips_ipv6firewall_loopback_out, 10981 NULL, ill, nip6h, nmp, mp_ip6h, 10982 ipst); 10983 10984 DTRACE_PROBE1( 10985 ip6__loopback__out__end, 10986 mblk_t *, nmp); 10987 10988 if (nmp != NULL) { 10989 /* 10990 * Deliver locally and to 10991 * every local zone, except 10992 * the sending zone when 10993 * IPV6_MULTICAST_LOOP is 10994 * disabled. 10995 */ 10996 ip_wput_local_v6(RD(q), ill, 10997 nip6h, nmp, 10998 ire, fanout_flags); 10999 } 11000 } else { 11001 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11002 ip1dbg(("ip_wput_ire_v6: " 11003 "copymsg failed\n")); 11004 } 11005 } 11006 } 11007 if (ip6h->ip6_hops == 0 || 11008 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 11009 IS_LOOPBACK(ill)) { 11010 /* 11011 * Local multicast or just loopback on loopback 11012 * interface. 11013 */ 11014 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 11015 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 11016 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11017 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 11018 freemsg(first_mp); 11019 return; 11020 } 11021 } 11022 11023 if (ire->ire_stq != NULL) { 11024 uint32_t sum; 11025 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 11026 ill_phyint->phyint_ifindex; 11027 queue_t *dev_q = ire->ire_stq->q_next; 11028 11029 /* 11030 * non-NULL send-to queue - packet is to be sent 11031 * out an interface. 11032 */ 11033 11034 /* Driver is flow-controlling? */ 11035 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 11036 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 11037 /* 11038 * Queue packet if we have an conn to give back 11039 * pressure. We can't queue packets intended for 11040 * hardware acceleration since we've tossed that 11041 * state already. If the packet is being fed back 11042 * from ire_send_v6, we don't know the position in 11043 * the queue to enqueue the packet and we discard 11044 * the packet. 11045 */ 11046 if (ipst->ips_ip_output_queue && connp != NULL && 11047 !mctl_present && caller != IRE_SEND) { 11048 if (caller == IP_WSRV) { 11049 connp->conn_did_putbq = 1; 11050 (void) putbq(connp->conn_wq, mp); 11051 conn_drain_insert(connp); 11052 /* 11053 * caller == IP_WSRV implies we are 11054 * the service thread, and the 11055 * queue is already noenabled. 11056 * The check for canput and 11057 * the putbq is not atomic. 11058 * So we need to check again. 11059 */ 11060 if (canput(dev_q)) 11061 connp->conn_did_putbq = 0; 11062 } else { 11063 (void) putq(connp->conn_wq, mp); 11064 } 11065 return; 11066 } 11067 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11068 freemsg(first_mp); 11069 return; 11070 } 11071 11072 /* 11073 * Look for reachability confirmations from the transport. 11074 */ 11075 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11076 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11077 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11078 if (mctl_present) 11079 io->ipsec_out_reachable = B_TRUE; 11080 } 11081 /* Fastpath */ 11082 switch (nexthdr) { 11083 case IPPROTO_TCP: 11084 case IPPROTO_UDP: 11085 case IPPROTO_ICMPV6: 11086 case IPPROTO_SCTP: 11087 hdr_length = IPV6_HDR_LEN; 11088 break; 11089 default: { 11090 uint8_t *nexthdrp; 11091 11092 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11093 &hdr_length, &nexthdrp)) { 11094 /* Malformed packet */ 11095 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11096 freemsg(first_mp); 11097 return; 11098 } 11099 nexthdr = *nexthdrp; 11100 break; 11101 } 11102 } 11103 11104 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11105 uint16_t *up; 11106 uint16_t *insp; 11107 11108 /* 11109 * The packet header is processed once for all, even 11110 * in the multirouting case. We disable hardware 11111 * checksum if the packet is multirouted, as it will be 11112 * replicated via several interfaces, and not all of 11113 * them may have this capability. 11114 */ 11115 if (cksum_request == 1 && 11116 !(ire->ire_flags & RTF_MULTIRT)) { 11117 /* Skip the transport checksum */ 11118 goto cksum_done; 11119 } 11120 /* 11121 * Do user-configured raw checksum. 11122 * Compute checksum and insert at offset "cksum_request" 11123 */ 11124 11125 /* check for enough headers for checksum */ 11126 cksum_request += hdr_length; /* offset from rptr */ 11127 if ((mp->b_wptr - mp->b_rptr) < 11128 (cksum_request + sizeof (int16_t))) { 11129 if (!pullupmsg(mp, 11130 cksum_request + sizeof (int16_t))) { 11131 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11132 " failed\n")); 11133 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11134 freemsg(first_mp); 11135 return; 11136 } 11137 ip6h = (ip6_t *)mp->b_rptr; 11138 } 11139 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11140 ASSERT(((uintptr_t)insp & 0x1) == 0); 11141 up = (uint16_t *)&ip6h->ip6_src; 11142 /* 11143 * icmp has placed length and routing 11144 * header adjustment in *insp. 11145 */ 11146 sum = htons(nexthdr) + 11147 up[0] + up[1] + up[2] + up[3] + 11148 up[4] + up[5] + up[6] + up[7] + 11149 up[8] + up[9] + up[10] + up[11] + 11150 up[12] + up[13] + up[14] + up[15]; 11151 sum = (sum & 0xffff) + (sum >> 16); 11152 *insp = IP_CSUM(mp, hdr_length, sum); 11153 } else if (nexthdr == IPPROTO_TCP) { 11154 uint16_t *up; 11155 11156 /* 11157 * Check for full IPv6 header + enough TCP header 11158 * to get at the checksum field. 11159 */ 11160 if ((mp->b_wptr - mp->b_rptr) < 11161 (hdr_length + TCP_CHECKSUM_OFFSET + 11162 TCP_CHECKSUM_SIZE)) { 11163 if (!pullupmsg(mp, hdr_length + 11164 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11165 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11166 " failed\n")); 11167 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11168 freemsg(first_mp); 11169 return; 11170 } 11171 ip6h = (ip6_t *)mp->b_rptr; 11172 } 11173 11174 up = (uint16_t *)&ip6h->ip6_src; 11175 /* 11176 * Note: The TCP module has stored the length value 11177 * into the tcp checksum field, so we don't 11178 * need to explicitly sum it in here. 11179 */ 11180 sum = up[0] + up[1] + up[2] + up[3] + 11181 up[4] + up[5] + up[6] + up[7] + 11182 up[8] + up[9] + up[10] + up[11] + 11183 up[12] + up[13] + up[14] + up[15]; 11184 11185 /* Fold the initial sum */ 11186 sum = (sum & 0xffff) + (sum >> 16); 11187 11188 up = (uint16_t *)(((uchar_t *)ip6h) + 11189 hdr_length + TCP_CHECKSUM_OFFSET); 11190 11191 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11192 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11193 ire->ire_max_frag, mctl_present, sum); 11194 11195 /* Software checksum? */ 11196 if (DB_CKSUMFLAGS(mp) == 0) { 11197 IP6_STAT(ipst, ip6_out_sw_cksum); 11198 IP6_STAT_UPDATE(ipst, 11199 ip6_tcp_out_sw_cksum_bytes, 11200 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11201 hdr_length); 11202 } 11203 } else if (nexthdr == IPPROTO_UDP) { 11204 uint16_t *up; 11205 11206 /* 11207 * check for full IPv6 header + enough UDP header 11208 * to get at the UDP checksum field 11209 */ 11210 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11211 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11212 if (!pullupmsg(mp, hdr_length + 11213 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11214 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11215 " failed\n")); 11216 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11217 freemsg(first_mp); 11218 return; 11219 } 11220 ip6h = (ip6_t *)mp->b_rptr; 11221 } 11222 up = (uint16_t *)&ip6h->ip6_src; 11223 /* 11224 * Note: The UDP module has stored the length value 11225 * into the udp checksum field, so we don't 11226 * need to explicitly sum it in here. 11227 */ 11228 sum = up[0] + up[1] + up[2] + up[3] + 11229 up[4] + up[5] + up[6] + up[7] + 11230 up[8] + up[9] + up[10] + up[11] + 11231 up[12] + up[13] + up[14] + up[15]; 11232 11233 /* Fold the initial sum */ 11234 sum = (sum & 0xffff) + (sum >> 16); 11235 11236 up = (uint16_t *)(((uchar_t *)ip6h) + 11237 hdr_length + UDP_CHECKSUM_OFFSET); 11238 11239 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11240 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11241 ire->ire_max_frag, mctl_present, sum); 11242 11243 /* Software checksum? */ 11244 if (DB_CKSUMFLAGS(mp) == 0) { 11245 IP6_STAT(ipst, ip6_out_sw_cksum); 11246 IP6_STAT_UPDATE(ipst, 11247 ip6_udp_out_sw_cksum_bytes, 11248 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11249 hdr_length); 11250 } 11251 } else if (nexthdr == IPPROTO_ICMPV6) { 11252 uint16_t *up; 11253 icmp6_t *icmp6; 11254 11255 /* check for full IPv6+ICMPv6 header */ 11256 if ((mp->b_wptr - mp->b_rptr) < 11257 (hdr_length + ICMP6_MINLEN)) { 11258 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11259 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11260 " failed\n")); 11261 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11262 freemsg(first_mp); 11263 return; 11264 } 11265 ip6h = (ip6_t *)mp->b_rptr; 11266 } 11267 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11268 up = (uint16_t *)&ip6h->ip6_src; 11269 /* 11270 * icmp has placed length and routing 11271 * header adjustment in icmp6_cksum. 11272 */ 11273 sum = htons(IPPROTO_ICMPV6) + 11274 up[0] + up[1] + up[2] + up[3] + 11275 up[4] + up[5] + up[6] + up[7] + 11276 up[8] + up[9] + up[10] + up[11] + 11277 up[12] + up[13] + up[14] + up[15]; 11278 sum = (sum & 0xffff) + (sum >> 16); 11279 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11280 11281 /* Update output mib stats */ 11282 icmp_update_out_mib_v6(ill, icmp6); 11283 } else if (nexthdr == IPPROTO_SCTP) { 11284 sctp_hdr_t *sctph; 11285 11286 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11287 if (!pullupmsg(mp, hdr_length + 11288 sizeof (*sctph))) { 11289 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11290 " failed\n")); 11291 BUMP_MIB(ill->ill_ip_mib, 11292 ipIfStatsOutDiscards); 11293 freemsg(mp); 11294 return; 11295 } 11296 ip6h = (ip6_t *)mp->b_rptr; 11297 } 11298 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11299 sctph->sh_chksum = 0; 11300 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11301 } 11302 11303 cksum_done: 11304 /* 11305 * We force the insertion of a fragment header using the 11306 * IPH_FRAG_HDR flag in two cases: 11307 * - after reception of an ICMPv6 "packet too big" message 11308 * with a MTU < 1280 (cf. RFC 2460 section 5) 11309 * - for multirouted IPv6 packets, so that the receiver can 11310 * discard duplicates according to their fragment identifier 11311 * 11312 * Two flags modifed from the API can modify this behavior. 11313 * The first is IPV6_USE_MIN_MTU. With this API the user 11314 * can specify how to manage PMTUD for unicast and multicast. 11315 * 11316 * IPV6_DONTFRAG disallows fragmentation. 11317 */ 11318 max_frag = ire->ire_max_frag; 11319 switch (IP6I_USE_MIN_MTU_API(flags)) { 11320 case IPV6_USE_MIN_MTU_DEFAULT: 11321 case IPV6_USE_MIN_MTU_UNICAST: 11322 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11323 max_frag = IPV6_MIN_MTU; 11324 } 11325 break; 11326 11327 case IPV6_USE_MIN_MTU_NEVER: 11328 max_frag = IPV6_MIN_MTU; 11329 break; 11330 } 11331 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11332 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11333 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11334 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11335 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11336 return; 11337 } 11338 11339 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11340 (mp->b_cont ? msgdsize(mp) : 11341 mp->b_wptr - (uchar_t *)ip6h)) { 11342 ip0dbg(("Packet length mismatch: %d, %ld\n", 11343 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11344 msgdsize(mp))); 11345 freemsg(first_mp); 11346 return; 11347 } 11348 /* Do IPSEC processing first */ 11349 if (mctl_present) { 11350 if (attach_index != 0) 11351 ipsec_out_attach_if(io, attach_index); 11352 ipsec_out_process(q, first_mp, ire, ill_index); 11353 return; 11354 } 11355 ASSERT(mp->b_prev == NULL); 11356 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11357 ntohs(ip6h->ip6_plen) + 11358 IPV6_HDR_LEN, max_frag)); 11359 ASSERT(mp == first_mp); 11360 /* Initiate IPPF processing */ 11361 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11362 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11363 if (mp == NULL) { 11364 return; 11365 } 11366 } 11367 ip_wput_frag_v6(mp, ire, reachable, connp, 11368 caller, max_frag); 11369 return; 11370 } 11371 /* Do IPSEC processing first */ 11372 if (mctl_present) { 11373 int extra_len = ipsec_out_extra_length(first_mp); 11374 11375 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11376 max_frag) { 11377 /* 11378 * IPsec headers will push the packet over the 11379 * MTU limit. Issue an ICMPv6 Packet Too Big 11380 * message for this packet if the upper-layer 11381 * that issued this packet will be able to 11382 * react to the icmp_pkt2big_v6() that we'll 11383 * generate. 11384 */ 11385 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11386 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11387 return; 11388 } 11389 if (attach_index != 0) 11390 ipsec_out_attach_if(io, attach_index); 11391 ipsec_out_process(q, first_mp, ire, ill_index); 11392 return; 11393 } 11394 /* 11395 * XXX multicast: add ip_mforward_v6() here. 11396 * Check conn_dontroute 11397 */ 11398 #ifdef lint 11399 /* 11400 * XXX The only purpose of this statement is to avoid lint 11401 * errors. See the above "XXX multicast". When that gets 11402 * fixed, remove this whole #ifdef lint section. 11403 */ 11404 ip3dbg(("multicast forward is %s.\n", 11405 (multicast_forward ? "TRUE" : "FALSE"))); 11406 #endif 11407 11408 UPDATE_OB_PKT_COUNT(ire); 11409 ire->ire_last_used_time = lbolt; 11410 ASSERT(mp == first_mp); 11411 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11412 } else { 11413 DTRACE_PROBE4(ip6__loopback__out__start, 11414 ill_t *, NULL, ill_t *, ill, 11415 ip6_t *, ip6h, mblk_t *, first_mp); 11416 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11417 ipst->ips_ipv6firewall_loopback_out, 11418 NULL, ill, ip6h, first_mp, mp, ipst); 11419 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11420 if (first_mp != NULL) 11421 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11422 } 11423 } 11424 11425 /* 11426 * Outbound IPv6 fragmentation routine using MDT. 11427 */ 11428 static void 11429 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11430 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11431 { 11432 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11433 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11434 mblk_t *hdr_mp, *md_mp = NULL; 11435 int i1; 11436 multidata_t *mmd; 11437 unsigned char *hdr_ptr, *pld_ptr; 11438 ip_pdescinfo_t pdi; 11439 uint32_t ident; 11440 size_t len; 11441 uint16_t offset; 11442 queue_t *stq = ire->ire_stq; 11443 ill_t *ill = (ill_t *)stq->q_ptr; 11444 ip_stack_t *ipst = ill->ill_ipst; 11445 11446 ASSERT(DB_TYPE(mp) == M_DATA); 11447 ASSERT(MBLKL(mp) > unfragmentable_len); 11448 11449 /* 11450 * Move read ptr past unfragmentable portion, we don't want this part 11451 * of the data in our fragments. 11452 */ 11453 mp->b_rptr += unfragmentable_len; 11454 11455 /* Calculate how many packets we will send out */ 11456 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11457 pkts = (i1 + max_chunk - 1) / max_chunk; 11458 ASSERT(pkts > 1); 11459 11460 /* Allocate a message block which will hold all the IP Headers. */ 11461 wroff = ipst->ips_ip_wroff_extra; 11462 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11463 11464 i1 = pkts * hdr_chunk_len; 11465 /* 11466 * Create the header buffer, Multidata and destination address 11467 * and SAP attribute that should be associated with it. 11468 */ 11469 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11470 ((hdr_mp->b_wptr += i1), 11471 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11472 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11473 freemsg(mp); 11474 if (md_mp == NULL) { 11475 freemsg(hdr_mp); 11476 } else { 11477 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11478 freemsg(md_mp); 11479 } 11480 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11481 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11482 return; 11483 } 11484 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11485 11486 /* 11487 * Add a payload buffer to the Multidata; this operation must not 11488 * fail, or otherwise our logic in this routine is broken. There 11489 * is no memory allocation done by the routine, so any returned 11490 * failure simply tells us that we've done something wrong. 11491 * 11492 * A failure tells us that either we're adding the same payload 11493 * buffer more than once, or we're trying to add more buffers than 11494 * allowed. None of the above cases should happen, and we panic 11495 * because either there's horrible heap corruption, and/or 11496 * programming mistake. 11497 */ 11498 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11499 goto pbuf_panic; 11500 } 11501 11502 hdr_ptr = hdr_mp->b_rptr; 11503 pld_ptr = mp->b_rptr; 11504 11505 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11506 11507 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11508 11509 /* 11510 * len is the total length of the fragmentable data in this 11511 * datagram. For each fragment sent, we will decrement len 11512 * by the amount of fragmentable data sent in that fragment 11513 * until len reaches zero. 11514 */ 11515 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11516 11517 offset = 0; 11518 prev_nexthdr_offset += wroff; 11519 11520 while (len != 0) { 11521 size_t mlen; 11522 ip6_t *fip6h; 11523 ip6_frag_t *fraghdr; 11524 int error; 11525 11526 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11527 mlen = MIN(len, max_chunk); 11528 len -= mlen; 11529 11530 fip6h = (ip6_t *)(hdr_ptr + wroff); 11531 ASSERT(OK_32PTR(fip6h)); 11532 bcopy(ip6h, fip6h, unfragmentable_len); 11533 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11534 11535 fip6h->ip6_plen = htons((uint16_t)(mlen + 11536 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11537 11538 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11539 unfragmentable_len); 11540 fraghdr->ip6f_nxt = nexthdr; 11541 fraghdr->ip6f_reserved = 0; 11542 fraghdr->ip6f_offlg = htons(offset) | 11543 ((len != 0) ? IP6F_MORE_FRAG : 0); 11544 fraghdr->ip6f_ident = ident; 11545 11546 /* 11547 * Record offset and size of header and data of the next packet 11548 * in the multidata message. 11549 */ 11550 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11551 unfragmentable_len + sizeof (ip6_frag_t), 0); 11552 PDESC_PLD_INIT(&pdi); 11553 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11554 ASSERT(i1 > 0); 11555 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11556 if (i1 == mlen) { 11557 pld_ptr += mlen; 11558 } else { 11559 i1 = mlen - i1; 11560 mp = mp->b_cont; 11561 ASSERT(mp != NULL); 11562 ASSERT(MBLKL(mp) >= i1); 11563 /* 11564 * Attach the next payload message block to the 11565 * multidata message. 11566 */ 11567 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11568 goto pbuf_panic; 11569 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11570 pld_ptr = mp->b_rptr + i1; 11571 } 11572 11573 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11574 KM_NOSLEEP)) == NULL) { 11575 /* 11576 * Any failure other than ENOMEM indicates that we 11577 * have passed in invalid pdesc info or parameters 11578 * to mmd_addpdesc, which must not happen. 11579 * 11580 * EINVAL is a result of failure on boundary checks 11581 * against the pdesc info contents. It should not 11582 * happen, and we panic because either there's 11583 * horrible heap corruption, and/or programming 11584 * mistake. 11585 */ 11586 if (error != ENOMEM) { 11587 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11588 "pdesc logic error detected for " 11589 "mmd %p pinfo %p (%d)\n", 11590 (void *)mmd, (void *)&pdi, error); 11591 /* NOTREACHED */ 11592 } 11593 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11594 /* Free unattached payload message blocks as well */ 11595 md_mp->b_cont = mp->b_cont; 11596 goto free_mmd; 11597 } 11598 11599 /* Advance fragment offset. */ 11600 offset += mlen; 11601 11602 /* Advance to location for next header in the buffer. */ 11603 hdr_ptr += hdr_chunk_len; 11604 11605 /* Did we reach the next payload message block? */ 11606 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11607 mp = mp->b_cont; 11608 /* 11609 * Attach the next message block with payload 11610 * data to the multidata message. 11611 */ 11612 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11613 goto pbuf_panic; 11614 pld_ptr = mp->b_rptr; 11615 } 11616 } 11617 11618 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11619 ASSERT(mp->b_wptr == pld_ptr); 11620 11621 /* Update IP statistics */ 11622 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11623 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11624 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11625 /* 11626 * The ipv6 header len is accounted for in unfragmentable_len so 11627 * when calculating the fragmentation overhead just add the frag 11628 * header len. 11629 */ 11630 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11631 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11632 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11633 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11634 11635 ire->ire_ob_pkt_count += pkts; 11636 if (ire->ire_ipif != NULL) 11637 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11638 11639 ire->ire_last_used_time = lbolt; 11640 /* Send it down */ 11641 putnext(stq, md_mp); 11642 return; 11643 11644 pbuf_panic: 11645 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11646 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11647 pbuf_idx); 11648 /* NOTREACHED */ 11649 } 11650 11651 /* 11652 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11653 * We have not optimized this in terms of number of mblks 11654 * allocated. For instance, for each fragment sent we always allocate a 11655 * mblk to hold the IPv6 header and fragment header. 11656 * 11657 * Assumes that all the extension headers are contained in the first mblk. 11658 * 11659 * The fragment header is inserted after an hop-by-hop options header 11660 * and after [an optional destinations header followed by] a routing header. 11661 * 11662 * NOTE : This function does not ire_refrele the ire passed in as 11663 * the argument. 11664 */ 11665 void 11666 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11667 int caller, int max_frag) 11668 { 11669 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11670 ip6_t *fip6h; 11671 mblk_t *hmp; 11672 mblk_t *hmp0; 11673 mblk_t *dmp; 11674 ip6_frag_t *fraghdr; 11675 size_t unfragmentable_len; 11676 size_t len; 11677 size_t mlen; 11678 size_t max_chunk; 11679 uint32_t ident; 11680 uint16_t off_flags; 11681 uint16_t offset = 0; 11682 ill_t *ill; 11683 uint8_t nexthdr; 11684 uint_t prev_nexthdr_offset; 11685 uint8_t *ptr; 11686 ip_stack_t *ipst = ire->ire_ipst; 11687 11688 ASSERT(ire->ire_type == IRE_CACHE); 11689 ill = (ill_t *)ire->ire_stq->q_ptr; 11690 11691 if (max_frag <= 0) { 11692 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11693 freemsg(mp); 11694 return; 11695 } 11696 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11697 11698 /* 11699 * Determine the length of the unfragmentable portion of this 11700 * datagram. This consists of the IPv6 header, a potential 11701 * hop-by-hop options header, a potential pre-routing-header 11702 * destination options header, and a potential routing header. 11703 */ 11704 nexthdr = ip6h->ip6_nxt; 11705 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11706 ptr = (uint8_t *)&ip6h[1]; 11707 11708 if (nexthdr == IPPROTO_HOPOPTS) { 11709 ip6_hbh_t *hbh_hdr; 11710 uint_t hdr_len; 11711 11712 hbh_hdr = (ip6_hbh_t *)ptr; 11713 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11714 nexthdr = hbh_hdr->ip6h_nxt; 11715 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11716 - (uint8_t *)ip6h; 11717 ptr += hdr_len; 11718 } 11719 if (nexthdr == IPPROTO_DSTOPTS) { 11720 ip6_dest_t *dest_hdr; 11721 uint_t hdr_len; 11722 11723 dest_hdr = (ip6_dest_t *)ptr; 11724 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11725 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11726 nexthdr = dest_hdr->ip6d_nxt; 11727 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11728 - (uint8_t *)ip6h; 11729 ptr += hdr_len; 11730 } 11731 } 11732 if (nexthdr == IPPROTO_ROUTING) { 11733 ip6_rthdr_t *rthdr; 11734 uint_t hdr_len; 11735 11736 rthdr = (ip6_rthdr_t *)ptr; 11737 nexthdr = rthdr->ip6r_nxt; 11738 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11739 - (uint8_t *)ip6h; 11740 hdr_len = 8 * (rthdr->ip6r_len + 1); 11741 ptr += hdr_len; 11742 } 11743 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11744 11745 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11746 sizeof (ip6_frag_t)) & ~7; 11747 11748 /* Check if we can use MDT to send out the frags. */ 11749 ASSERT(!IRE_IS_LOCAL(ire)); 11750 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11751 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11752 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11753 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11754 nexthdr, prev_nexthdr_offset); 11755 return; 11756 } 11757 11758 /* 11759 * Allocate an mblk with enough room for the link-layer 11760 * header, the unfragmentable part of the datagram, and the 11761 * fragment header. This (or a copy) will be used as the 11762 * first mblk for each fragment we send. 11763 */ 11764 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + 11765 ipst->ips_ip_wroff_extra, BPRI_HI); 11766 if (hmp == NULL) { 11767 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11768 freemsg(mp); 11769 return; 11770 } 11771 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11772 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11773 11774 fip6h = (ip6_t *)hmp->b_rptr; 11775 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11776 11777 bcopy(ip6h, fip6h, unfragmentable_len); 11778 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11779 11780 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11781 11782 fraghdr->ip6f_nxt = nexthdr; 11783 fraghdr->ip6f_reserved = 0; 11784 fraghdr->ip6f_offlg = 0; 11785 fraghdr->ip6f_ident = htonl(ident); 11786 11787 /* 11788 * len is the total length of the fragmentable data in this 11789 * datagram. For each fragment sent, we will decrement len 11790 * by the amount of fragmentable data sent in that fragment 11791 * until len reaches zero. 11792 */ 11793 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11794 11795 /* 11796 * Move read ptr past unfragmentable portion, we don't want this part 11797 * of the data in our fragments. 11798 */ 11799 mp->b_rptr += unfragmentable_len; 11800 11801 while (len != 0) { 11802 mlen = MIN(len, max_chunk); 11803 len -= mlen; 11804 if (len != 0) { 11805 /* Not last */ 11806 hmp0 = copyb(hmp); 11807 if (hmp0 == NULL) { 11808 freeb(hmp); 11809 freemsg(mp); 11810 BUMP_MIB(ill->ill_ip_mib, 11811 ipIfStatsOutFragFails); 11812 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11813 return; 11814 } 11815 off_flags = IP6F_MORE_FRAG; 11816 } else { 11817 /* Last fragment */ 11818 hmp0 = hmp; 11819 hmp = NULL; 11820 off_flags = 0; 11821 } 11822 fip6h = (ip6_t *)(hmp0->b_rptr); 11823 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11824 11825 fip6h->ip6_plen = htons((uint16_t)(mlen + 11826 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11827 /* 11828 * Note: Optimization alert. 11829 * In IPv6 (and IPv4) protocol header, Fragment Offset 11830 * ("offset") is 13 bits wide and in 8-octet units. 11831 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11832 * it occupies the most significant 13 bits. 11833 * (least significant 13 bits in IPv4). 11834 * We do not do any shifts here. Not shifting is same effect 11835 * as taking offset value in octet units, dividing by 8 and 11836 * then shifting 3 bits left to line it up in place in proper 11837 * place protocol header. 11838 */ 11839 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11840 11841 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11842 /* mp has already been freed by ip_carve_mp() */ 11843 if (hmp != NULL) 11844 freeb(hmp); 11845 freeb(hmp0); 11846 ip1dbg(("ip_carve_mp: failed\n")); 11847 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11848 return; 11849 } 11850 hmp0->b_cont = dmp; 11851 /* Get the priority marking, if any */ 11852 hmp0->b_band = dmp->b_band; 11853 UPDATE_OB_PKT_COUNT(ire); 11854 ire->ire_last_used_time = lbolt; 11855 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11856 caller, NULL); 11857 reachable = 0; /* No need to redo state machine in loop */ 11858 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11859 offset += mlen; 11860 } 11861 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11862 } 11863 11864 /* 11865 * Determine if the ill and multicast aspects of that packets 11866 * "matches" the conn. 11867 */ 11868 boolean_t 11869 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11870 zoneid_t zoneid) 11871 { 11872 ill_t *in_ill; 11873 boolean_t wantpacket = B_TRUE; 11874 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11875 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11876 11877 /* 11878 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11879 * unicast and multicast reception to conn_incoming_ill. 11880 * conn_wantpacket_v6 is called both for unicast and 11881 * multicast. 11882 * 11883 * 1) The unicast copy of the packet can come anywhere in 11884 * the ill group if it is part of the group. Thus, we 11885 * need to check to see whether the ill group matches 11886 * if in_ill is part of a group. 11887 * 11888 * 2) ip_rput does not suppress duplicate multicast packets. 11889 * If there are two interfaces in a ill group and we have 11890 * 2 applications (conns) joined a multicast group G on 11891 * both the interfaces, ilm_lookup_ill filter in ip_rput 11892 * will give us two packets because we join G on both the 11893 * interfaces rather than nominating just one interface 11894 * for receiving multicast like broadcast above. So, 11895 * we have to call ilg_lookup_ill to filter out duplicate 11896 * copies, if ill is part of a group, to supress duplicates. 11897 */ 11898 in_ill = connp->conn_incoming_ill; 11899 if (in_ill != NULL) { 11900 mutex_enter(&connp->conn_lock); 11901 in_ill = connp->conn_incoming_ill; 11902 mutex_enter(&ill->ill_lock); 11903 /* 11904 * No IPMP, and the packet did not arrive on conn_incoming_ill 11905 * OR, IPMP in use and the packet arrived on an IPMP group 11906 * different from the conn_incoming_ill's IPMP group. 11907 * Reject the packet. 11908 */ 11909 if ((in_ill->ill_group == NULL && in_ill != ill) || 11910 (in_ill->ill_group != NULL && 11911 in_ill->ill_group != ill->ill_group)) { 11912 wantpacket = B_FALSE; 11913 } 11914 mutex_exit(&ill->ill_lock); 11915 mutex_exit(&connp->conn_lock); 11916 if (!wantpacket) 11917 return (B_FALSE); 11918 } 11919 11920 if (connp->conn_multi_router) 11921 return (B_TRUE); 11922 11923 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11924 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11925 /* 11926 * Unicast case: we match the conn only if it's in the specified 11927 * zone. 11928 */ 11929 return (IPCL_ZONE_MATCH(connp, zoneid)); 11930 } 11931 11932 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11933 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11934 /* 11935 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11936 * disabled, therefore we don't dispatch the multicast packet to 11937 * the sending zone. 11938 */ 11939 return (B_FALSE); 11940 } 11941 11942 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11943 zoneid != ALL_ZONES) { 11944 /* 11945 * Multicast packet on the loopback interface: we only match 11946 * conns who joined the group in the specified zone. 11947 */ 11948 return (B_FALSE); 11949 } 11950 11951 mutex_enter(&connp->conn_lock); 11952 wantpacket = 11953 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11954 mutex_exit(&connp->conn_lock); 11955 11956 return (wantpacket); 11957 } 11958 11959 11960 /* 11961 * Transmit a packet and update any NUD state based on the flags 11962 * XXX need to "recover" any ip6i_t when doing putq! 11963 * 11964 * NOTE : This function does not ire_refrele the ire passed in as the 11965 * argument. 11966 */ 11967 void 11968 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11969 int caller, ipsec_out_t *io) 11970 { 11971 mblk_t *mp1; 11972 nce_t *nce = ire->ire_nce; 11973 ill_t *ill; 11974 ill_t *out_ill; 11975 uint64_t delta; 11976 ip6_t *ip6h; 11977 queue_t *stq = ire->ire_stq; 11978 ire_t *ire1 = NULL; 11979 ire_t *save_ire = ire; 11980 boolean_t multirt_send = B_FALSE; 11981 mblk_t *next_mp = NULL; 11982 ip_stack_t *ipst = ire->ire_ipst; 11983 11984 ip6h = (ip6_t *)mp->b_rptr; 11985 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11986 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11987 ASSERT(nce != NULL); 11988 ASSERT(mp->b_datap->db_type == M_DATA); 11989 ASSERT(stq != NULL); 11990 11991 ill = ire_to_ill(ire); 11992 if (!ill) { 11993 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11994 freemsg(mp); 11995 return; 11996 } 11997 11998 /* 11999 * If a packet is to be sent out an interface that is a 6to4 12000 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 12001 * destination, must be checked to have a 6to4 prefix 12002 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 12003 * address configured on the sending interface. Otherwise, 12004 * the packet was delivered to this interface in error and the 12005 * packet must be dropped. 12006 */ 12007 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 12008 ipif_t *ipif = ill->ill_ipif; 12009 12010 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 12011 &ip6h->ip6_dst)) { 12012 if (ip_debug > 2) { 12013 /* ip1dbg */ 12014 pr_addr_dbg("ip_xmit_v6: attempting to " 12015 "send 6to4 addressed IPv6 " 12016 "destination (%s) out the wrong " 12017 "interface.\n", AF_INET6, 12018 &ip6h->ip6_dst); 12019 } 12020 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12021 freemsg(mp); 12022 return; 12023 } 12024 } 12025 12026 /* Flow-control check has been done in ip_wput_ire_v6 */ 12027 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 12028 caller == IP_WSRV || canput(stq->q_next)) { 12029 uint32_t ill_index; 12030 12031 /* 12032 * In most cases, the emission loop below is entered only 12033 * once. Only in the case where the ire holds the 12034 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 12035 * flagged ires in the bucket, and send the packet 12036 * through all crossed RTF_MULTIRT routes. 12037 */ 12038 if (ire->ire_flags & RTF_MULTIRT) { 12039 /* 12040 * Multirouting case. The bucket where ire is stored 12041 * probably holds other RTF_MULTIRT flagged ires 12042 * to the destination. In this call to ip_xmit_v6, 12043 * we attempt to send the packet through all 12044 * those ires. Thus, we first ensure that ire is the 12045 * first RTF_MULTIRT ire in the bucket, 12046 * before walking the ire list. 12047 */ 12048 ire_t *first_ire; 12049 irb_t *irb = ire->ire_bucket; 12050 ASSERT(irb != NULL); 12051 multirt_send = B_TRUE; 12052 12053 /* Make sure we do not omit any multiroute ire. */ 12054 IRB_REFHOLD(irb); 12055 for (first_ire = irb->irb_ire; 12056 first_ire != NULL; 12057 first_ire = first_ire->ire_next) { 12058 if ((first_ire->ire_flags & RTF_MULTIRT) && 12059 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12060 &ire->ire_addr_v6)) && 12061 !(first_ire->ire_marks & 12062 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12063 break; 12064 } 12065 12066 if ((first_ire != NULL) && (first_ire != ire)) { 12067 IRE_REFHOLD(first_ire); 12068 /* ire will be released by the caller */ 12069 ire = first_ire; 12070 nce = ire->ire_nce; 12071 stq = ire->ire_stq; 12072 ill = ire_to_ill(ire); 12073 } 12074 IRB_REFRELE(irb); 12075 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12076 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12077 ILL_MDT_USABLE(ill)) { 12078 /* 12079 * This tcp connection was marked as MDT-capable, but 12080 * it has been turned off due changes in the interface. 12081 * Now that the interface support is back, turn it on 12082 * by notifying tcp. We don't directly modify tcp_mdt, 12083 * since we leave all the details to the tcp code that 12084 * knows better. 12085 */ 12086 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12087 12088 if (mdimp == NULL) { 12089 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12090 "connp %p (ENOMEM)\n", (void *)connp)); 12091 } else { 12092 CONN_INC_REF(connp); 12093 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 12094 connp, SQTAG_TCP_INPUT_MCTL); 12095 } 12096 } 12097 12098 do { 12099 mblk_t *mp_ip6h; 12100 12101 if (multirt_send) { 12102 irb_t *irb; 12103 /* 12104 * We are in a multiple send case, need to get 12105 * the next ire and make a duplicate of the 12106 * packet. ire1 holds here the next ire to 12107 * process in the bucket. If multirouting is 12108 * expected, any non-RTF_MULTIRT ire that has 12109 * the right destination address is ignored. 12110 */ 12111 irb = ire->ire_bucket; 12112 ASSERT(irb != NULL); 12113 12114 IRB_REFHOLD(irb); 12115 for (ire1 = ire->ire_next; 12116 ire1 != NULL; 12117 ire1 = ire1->ire_next) { 12118 if (!(ire1->ire_flags & RTF_MULTIRT)) 12119 continue; 12120 if (!IN6_ARE_ADDR_EQUAL( 12121 &ire1->ire_addr_v6, 12122 &ire->ire_addr_v6)) 12123 continue; 12124 if (ire1->ire_marks & 12125 (IRE_MARK_CONDEMNED| 12126 IRE_MARK_HIDDEN)) 12127 continue; 12128 12129 /* Got one */ 12130 if (ire1 != save_ire) { 12131 IRE_REFHOLD(ire1); 12132 } 12133 break; 12134 } 12135 IRB_REFRELE(irb); 12136 12137 if (ire1 != NULL) { 12138 next_mp = copyb(mp); 12139 if ((next_mp == NULL) || 12140 ((mp->b_cont != NULL) && 12141 ((next_mp->b_cont = 12142 dupmsg(mp->b_cont)) == NULL))) { 12143 freemsg(next_mp); 12144 next_mp = NULL; 12145 ire_refrele(ire1); 12146 ire1 = NULL; 12147 } 12148 } 12149 12150 /* Last multiroute ire; don't loop anymore. */ 12151 if (ire1 == NULL) { 12152 multirt_send = B_FALSE; 12153 } 12154 } 12155 12156 ill_index = 12157 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12158 12159 /* Initiate IPPF processing */ 12160 if (IP6_OUT_IPP(flags, ipst)) { 12161 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12162 if (mp == NULL) { 12163 BUMP_MIB(ill->ill_ip_mib, 12164 ipIfStatsOutDiscards); 12165 if (next_mp != NULL) 12166 freemsg(next_mp); 12167 if (ire != save_ire) { 12168 ire_refrele(ire); 12169 } 12170 return; 12171 } 12172 ip6h = (ip6_t *)mp->b_rptr; 12173 } 12174 mp_ip6h = mp; 12175 12176 /* 12177 * Check for fastpath, we need to hold nce_lock to 12178 * prevent fastpath update from chaining nce_fp_mp. 12179 */ 12180 12181 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12182 mutex_enter(&nce->nce_lock); 12183 if ((mp1 = nce->nce_fp_mp) != NULL) { 12184 uint32_t hlen; 12185 uchar_t *rptr; 12186 12187 hlen = MBLKL(mp1); 12188 rptr = mp->b_rptr - hlen; 12189 /* 12190 * make sure there is room for the fastpath 12191 * datalink header 12192 */ 12193 if (rptr < mp->b_datap->db_base) { 12194 mp1 = copyb(mp1); 12195 mutex_exit(&nce->nce_lock); 12196 if (mp1 == NULL) { 12197 BUMP_MIB(ill->ill_ip_mib, 12198 ipIfStatsOutDiscards); 12199 freemsg(mp); 12200 if (next_mp != NULL) 12201 freemsg(next_mp); 12202 if (ire != save_ire) { 12203 ire_refrele(ire); 12204 } 12205 return; 12206 } 12207 mp1->b_cont = mp; 12208 12209 /* Get the priority marking, if any */ 12210 mp1->b_band = mp->b_band; 12211 mp = mp1; 12212 } else { 12213 mp->b_rptr = rptr; 12214 /* 12215 * fastpath - pre-pend datalink 12216 * header 12217 */ 12218 bcopy(mp1->b_rptr, rptr, hlen); 12219 mutex_exit(&nce->nce_lock); 12220 } 12221 } else { 12222 /* 12223 * Get the DL_UNITDATA_REQ. 12224 */ 12225 mp1 = nce->nce_res_mp; 12226 if (mp1 == NULL) { 12227 mutex_exit(&nce->nce_lock); 12228 ip1dbg(("ip_xmit_v6: No resolution " 12229 "block ire = %p\n", (void *)ire)); 12230 freemsg(mp); 12231 if (next_mp != NULL) 12232 freemsg(next_mp); 12233 if (ire != save_ire) { 12234 ire_refrele(ire); 12235 } 12236 return; 12237 } 12238 /* 12239 * Prepend the DL_UNITDATA_REQ. 12240 */ 12241 mp1 = copyb(mp1); 12242 mutex_exit(&nce->nce_lock); 12243 if (mp1 == NULL) { 12244 BUMP_MIB(ill->ill_ip_mib, 12245 ipIfStatsOutDiscards); 12246 freemsg(mp); 12247 if (next_mp != NULL) 12248 freemsg(next_mp); 12249 if (ire != save_ire) { 12250 ire_refrele(ire); 12251 } 12252 return; 12253 } 12254 mp1->b_cont = mp; 12255 12256 /* Get the priority marking, if any */ 12257 mp1->b_band = mp->b_band; 12258 mp = mp1; 12259 } 12260 12261 out_ill = (ill_t *)stq->q_ptr; 12262 12263 DTRACE_PROBE4(ip6__physical__out__start, 12264 ill_t *, NULL, ill_t *, out_ill, 12265 ip6_t *, ip6h, mblk_t *, mp); 12266 12267 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12268 ipst->ips_ipv6firewall_physical_out, 12269 NULL, out_ill, ip6h, mp, mp_ip6h, ipst); 12270 12271 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12272 12273 if (mp == NULL) { 12274 if (multirt_send) { 12275 ASSERT(ire1 != NULL); 12276 if (ire != save_ire) { 12277 ire_refrele(ire); 12278 } 12279 /* 12280 * Proceed with the next RTF_MULTIRT 12281 * ire, also set up the send-to queue 12282 * accordingly. 12283 */ 12284 ire = ire1; 12285 ire1 = NULL; 12286 stq = ire->ire_stq; 12287 nce = ire->ire_nce; 12288 ill = ire_to_ill(ire); 12289 mp = next_mp; 12290 next_mp = NULL; 12291 continue; 12292 } else { 12293 ASSERT(next_mp == NULL); 12294 ASSERT(ire1 == NULL); 12295 break; 12296 } 12297 } 12298 12299 /* 12300 * Update ire and MIB counters; for save_ire, this has 12301 * been done by the caller. 12302 */ 12303 if (ire != save_ire) { 12304 UPDATE_OB_PKT_COUNT(ire); 12305 ire->ire_last_used_time = lbolt; 12306 12307 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12308 BUMP_MIB(ill->ill_ip_mib, 12309 ipIfStatsHCOutMcastPkts); 12310 UPDATE_MIB(ill->ill_ip_mib, 12311 ipIfStatsHCOutMcastOctets, 12312 ntohs(ip6h->ip6_plen) + 12313 IPV6_HDR_LEN); 12314 } 12315 } 12316 12317 /* 12318 * Send it down. XXX Do we want to flow control AH/ESP 12319 * packets that carry TCP payloads? We don't flow 12320 * control TCP packets, but we should also not 12321 * flow-control TCP packets that have been protected. 12322 * We don't have an easy way to find out if an AH/ESP 12323 * packet was originally TCP or not currently. 12324 */ 12325 if (io == NULL) { 12326 BUMP_MIB(ill->ill_ip_mib, 12327 ipIfStatsHCOutTransmits); 12328 UPDATE_MIB(ill->ill_ip_mib, 12329 ipIfStatsHCOutOctets, 12330 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12331 putnext(stq, mp); 12332 } else { 12333 /* 12334 * Safety Pup says: make sure this is 12335 * going to the right interface! 12336 */ 12337 if (io->ipsec_out_capab_ill_index != 12338 ill_index) { 12339 /* IPsec kstats: bump lose counter */ 12340 freemsg(mp1); 12341 } else { 12342 BUMP_MIB(ill->ill_ip_mib, 12343 ipIfStatsHCOutTransmits); 12344 UPDATE_MIB(ill->ill_ip_mib, 12345 ipIfStatsHCOutOctets, 12346 ntohs(ip6h->ip6_plen) + 12347 IPV6_HDR_LEN); 12348 ipsec_hw_putnext(stq, mp); 12349 } 12350 } 12351 12352 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12353 if (ire != save_ire) { 12354 ire_refrele(ire); 12355 } 12356 if (multirt_send) { 12357 ASSERT(ire1 != NULL); 12358 /* 12359 * Proceed with the next RTF_MULTIRT 12360 * ire, also set up the send-to queue 12361 * accordingly. 12362 */ 12363 ire = ire1; 12364 ire1 = NULL; 12365 stq = ire->ire_stq; 12366 nce = ire->ire_nce; 12367 ill = ire_to_ill(ire); 12368 mp = next_mp; 12369 next_mp = NULL; 12370 continue; 12371 } 12372 ASSERT(next_mp == NULL); 12373 ASSERT(ire1 == NULL); 12374 return; 12375 } 12376 12377 ASSERT(nce->nce_state != ND_INCOMPLETE); 12378 12379 /* 12380 * Check for upper layer advice 12381 */ 12382 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12383 /* 12384 * It should be o.k. to check the state without 12385 * a lock here, at most we lose an advice. 12386 */ 12387 nce->nce_last = TICK_TO_MSEC(lbolt64); 12388 if (nce->nce_state != ND_REACHABLE) { 12389 12390 mutex_enter(&nce->nce_lock); 12391 nce->nce_state = ND_REACHABLE; 12392 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12393 mutex_exit(&nce->nce_lock); 12394 (void) untimeout(nce->nce_timeout_id); 12395 if (ip_debug > 2) { 12396 /* ip1dbg */ 12397 pr_addr_dbg("ip_xmit_v6: state" 12398 " for %s changed to" 12399 " REACHABLE\n", AF_INET6, 12400 &ire->ire_addr_v6); 12401 } 12402 } 12403 if (ire != save_ire) { 12404 ire_refrele(ire); 12405 } 12406 if (multirt_send) { 12407 ASSERT(ire1 != NULL); 12408 /* 12409 * Proceed with the next RTF_MULTIRT 12410 * ire, also set up the send-to queue 12411 * accordingly. 12412 */ 12413 ire = ire1; 12414 ire1 = NULL; 12415 stq = ire->ire_stq; 12416 nce = ire->ire_nce; 12417 ill = ire_to_ill(ire); 12418 mp = next_mp; 12419 next_mp = NULL; 12420 continue; 12421 } 12422 ASSERT(next_mp == NULL); 12423 ASSERT(ire1 == NULL); 12424 return; 12425 } 12426 12427 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12428 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12429 " ill_reachable_time = %d \n", delta, 12430 ill->ill_reachable_time)); 12431 if (delta > (uint64_t)ill->ill_reachable_time) { 12432 nce = ire->ire_nce; 12433 mutex_enter(&nce->nce_lock); 12434 switch (nce->nce_state) { 12435 case ND_REACHABLE: 12436 case ND_STALE: 12437 /* 12438 * ND_REACHABLE is identical to 12439 * ND_STALE in this specific case. If 12440 * reachable time has expired for this 12441 * neighbor (delta is greater than 12442 * reachable time), conceptually, the 12443 * neighbor cache is no longer in 12444 * REACHABLE state, but already in 12445 * STALE state. So the correct 12446 * transition here is to ND_DELAY. 12447 */ 12448 nce->nce_state = ND_DELAY; 12449 mutex_exit(&nce->nce_lock); 12450 NDP_RESTART_TIMER(nce, 12451 ipst->ips_delay_first_probe_time); 12452 if (ip_debug > 3) { 12453 /* ip2dbg */ 12454 pr_addr_dbg("ip_xmit_v6: state" 12455 " for %s changed to" 12456 " DELAY\n", AF_INET6, 12457 &ire->ire_addr_v6); 12458 } 12459 break; 12460 case ND_DELAY: 12461 case ND_PROBE: 12462 mutex_exit(&nce->nce_lock); 12463 /* Timers have already started */ 12464 break; 12465 case ND_UNREACHABLE: 12466 /* 12467 * ndp timer has detected that this nce 12468 * is unreachable and initiated deleting 12469 * this nce and all its associated IREs. 12470 * This is a race where we found the 12471 * ire before it was deleted and have 12472 * just sent out a packet using this 12473 * unreachable nce. 12474 */ 12475 mutex_exit(&nce->nce_lock); 12476 break; 12477 default: 12478 ASSERT(0); 12479 } 12480 } 12481 12482 if (multirt_send) { 12483 ASSERT(ire1 != NULL); 12484 /* 12485 * Proceed with the next RTF_MULTIRT ire, 12486 * Also set up the send-to queue accordingly. 12487 */ 12488 if (ire != save_ire) { 12489 ire_refrele(ire); 12490 } 12491 ire = ire1; 12492 ire1 = NULL; 12493 stq = ire->ire_stq; 12494 nce = ire->ire_nce; 12495 ill = ire_to_ill(ire); 12496 mp = next_mp; 12497 next_mp = NULL; 12498 } 12499 } while (multirt_send); 12500 /* 12501 * In the multirouting case, release the last ire used for 12502 * emission. save_ire will be released by the caller. 12503 */ 12504 if (ire != save_ire) { 12505 ire_refrele(ire); 12506 } 12507 } else { 12508 /* 12509 * Queue packet if we have an conn to give back pressure. 12510 * We can't queue packets intended for hardware acceleration 12511 * since we've tossed that state already. If the packet is 12512 * being fed back from ire_send_v6, we don't know the 12513 * position in the queue to enqueue the packet and we discard 12514 * the packet. 12515 */ 12516 if (ipst->ips_ip_output_queue && (connp != NULL) && 12517 (io == NULL) && (caller != IRE_SEND)) { 12518 if (caller == IP_WSRV) { 12519 connp->conn_did_putbq = 1; 12520 (void) putbq(connp->conn_wq, mp); 12521 conn_drain_insert(connp); 12522 /* 12523 * caller == IP_WSRV implies we are 12524 * the service thread, and the 12525 * queue is already noenabled. 12526 * The check for canput and 12527 * the putbq is not atomic. 12528 * So we need to check again. 12529 */ 12530 if (canput(stq->q_next)) 12531 connp->conn_did_putbq = 0; 12532 } else { 12533 (void) putq(connp->conn_wq, mp); 12534 } 12535 return; 12536 } 12537 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12538 freemsg(mp); 12539 return; 12540 } 12541 } 12542 12543 /* 12544 * pr_addr_dbg function provides the needed buffer space to call 12545 * inet_ntop() function's 3rd argument. This function should be 12546 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12547 * stack buffer space in it's own stack frame. This function uses 12548 * a buffer from it's own stack and prints the information. 12549 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12550 * 12551 * Note: This function can call inet_ntop() once. 12552 */ 12553 void 12554 pr_addr_dbg(char *fmt1, int af, const void *addr) 12555 { 12556 char buf[INET6_ADDRSTRLEN]; 12557 12558 if (fmt1 == NULL) { 12559 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12560 return; 12561 } 12562 12563 /* 12564 * This does not compare debug level and just prints 12565 * out. Thus it is the responsibility of the caller 12566 * to check the appropriate debug-level before calling 12567 * this function. 12568 */ 12569 if (ip_debug > 0) { 12570 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12571 } 12572 12573 12574 } 12575 12576 12577 /* 12578 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12579 * if needed and extension headers) that will be needed based on the 12580 * ip6_pkt_t structure passed by the caller. 12581 * 12582 * The returned length does not include the length of the upper level 12583 * protocol (ULP) header. 12584 */ 12585 int 12586 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12587 { 12588 int len; 12589 12590 len = IPV6_HDR_LEN; 12591 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12592 len += sizeof (ip6i_t); 12593 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12594 ASSERT(ipp->ipp_hopoptslen != 0); 12595 len += ipp->ipp_hopoptslen; 12596 } 12597 if (ipp->ipp_fields & IPPF_RTHDR) { 12598 ASSERT(ipp->ipp_rthdrlen != 0); 12599 len += ipp->ipp_rthdrlen; 12600 } 12601 /* 12602 * En-route destination options 12603 * Only do them if there's a routing header as well 12604 */ 12605 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12606 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12607 ASSERT(ipp->ipp_rtdstoptslen != 0); 12608 len += ipp->ipp_rtdstoptslen; 12609 } 12610 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12611 ASSERT(ipp->ipp_dstoptslen != 0); 12612 len += ipp->ipp_dstoptslen; 12613 } 12614 return (len); 12615 } 12616 12617 /* 12618 * All-purpose routine to build a header chain of an IPv6 header 12619 * followed by any required extension headers and a proto header, 12620 * preceeded (where necessary) by an ip6i_t private header. 12621 * 12622 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12623 * will be filled in appropriately. 12624 * Thus the caller must fill in the rest of the IPv6 header, such as 12625 * traffic class/flowid, source address (if not set here), hoplimit (if not 12626 * set here) and destination address. 12627 * 12628 * The extension headers and ip6i_t header will all be fully filled in. 12629 */ 12630 void 12631 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12632 ip6_pkt_t *ipp, uint8_t protocol) 12633 { 12634 uint8_t *nxthdr_ptr; 12635 uint8_t *cp; 12636 ip6i_t *ip6i; 12637 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12638 12639 /* 12640 * If sending private ip6i_t header down (checksum info, nexthop, 12641 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12642 * then fill it in. (The checksum info will be filled in by icmp). 12643 */ 12644 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12645 ip6i = (ip6i_t *)ip6h; 12646 ip6h = (ip6_t *)&ip6i[1]; 12647 12648 ip6i->ip6i_flags = 0; 12649 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12650 if (ipp->ipp_fields & IPPF_IFINDEX || 12651 ipp->ipp_fields & IPPF_SCOPE_ID) { 12652 ASSERT(ipp->ipp_ifindex != 0); 12653 ip6i->ip6i_flags |= IP6I_IFINDEX; 12654 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12655 } 12656 if (ipp->ipp_fields & IPPF_ADDR) { 12657 /* 12658 * Enable per-packet source address verification if 12659 * IPV6_PKTINFO specified the source address. 12660 * ip6_src is set in the transport's _wput function. 12661 */ 12662 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12663 &ipp->ipp_addr)); 12664 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12665 } 12666 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12667 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12668 /* 12669 * We need to set this flag so that IP doesn't 12670 * rewrite the IPv6 header's hoplimit with the 12671 * current default value. 12672 */ 12673 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12674 } 12675 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12676 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12677 &ipp->ipp_nexthop)); 12678 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12679 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12680 } 12681 /* 12682 * tell IP this is an ip6i_t private header 12683 */ 12684 ip6i->ip6i_nxt = IPPROTO_RAW; 12685 } 12686 /* Initialize IPv6 header */ 12687 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12688 if (ipp->ipp_fields & IPPF_TCLASS) { 12689 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12690 (ipp->ipp_tclass << 20); 12691 } 12692 if (ipp->ipp_fields & IPPF_ADDR) 12693 ip6h->ip6_src = ipp->ipp_addr; 12694 12695 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12696 cp = (uint8_t *)&ip6h[1]; 12697 /* 12698 * Here's where we have to start stringing together 12699 * any extension headers in the right order: 12700 * Hop-by-hop, destination, routing, and final destination opts. 12701 */ 12702 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12703 /* Hop-by-hop options */ 12704 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12705 12706 *nxthdr_ptr = IPPROTO_HOPOPTS; 12707 nxthdr_ptr = &hbh->ip6h_nxt; 12708 12709 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12710 cp += ipp->ipp_hopoptslen; 12711 } 12712 /* 12713 * En-route destination options 12714 * Only do them if there's a routing header as well 12715 */ 12716 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12717 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12718 ip6_dest_t *dst = (ip6_dest_t *)cp; 12719 12720 *nxthdr_ptr = IPPROTO_DSTOPTS; 12721 nxthdr_ptr = &dst->ip6d_nxt; 12722 12723 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12724 cp += ipp->ipp_rtdstoptslen; 12725 } 12726 /* 12727 * Routing header next 12728 */ 12729 if (ipp->ipp_fields & IPPF_RTHDR) { 12730 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12731 12732 *nxthdr_ptr = IPPROTO_ROUTING; 12733 nxthdr_ptr = &rt->ip6r_nxt; 12734 12735 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12736 cp += ipp->ipp_rthdrlen; 12737 } 12738 /* 12739 * Do ultimate destination options 12740 */ 12741 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12742 ip6_dest_t *dest = (ip6_dest_t *)cp; 12743 12744 *nxthdr_ptr = IPPROTO_DSTOPTS; 12745 nxthdr_ptr = &dest->ip6d_nxt; 12746 12747 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12748 cp += ipp->ipp_dstoptslen; 12749 } 12750 /* 12751 * Now set the last header pointer to the proto passed in 12752 */ 12753 *nxthdr_ptr = protocol; 12754 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12755 } 12756 12757 /* 12758 * Return a pointer to the routing header extension header 12759 * in the IPv6 header(s) chain passed in. 12760 * If none found, return NULL 12761 * Assumes that all extension headers are in same mblk as the v6 header 12762 */ 12763 ip6_rthdr_t * 12764 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12765 { 12766 ip6_dest_t *desthdr; 12767 ip6_frag_t *fraghdr; 12768 uint_t hdrlen; 12769 uint8_t nexthdr; 12770 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12771 12772 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12773 return ((ip6_rthdr_t *)ptr); 12774 12775 /* 12776 * The routing header will precede all extension headers 12777 * other than the hop-by-hop and destination options 12778 * extension headers, so if we see anything other than those, 12779 * we're done and didn't find it. 12780 * We could see a destination options header alone but no 12781 * routing header, in which case we'll return NULL as soon as 12782 * we see anything after that. 12783 * Hop-by-hop and destination option headers are identical, 12784 * so we can use either one we want as a template. 12785 */ 12786 nexthdr = ip6h->ip6_nxt; 12787 while (ptr < endptr) { 12788 /* Is there enough left for len + nexthdr? */ 12789 if (ptr + MIN_EHDR_LEN > endptr) 12790 return (NULL); 12791 12792 switch (nexthdr) { 12793 case IPPROTO_HOPOPTS: 12794 case IPPROTO_DSTOPTS: 12795 /* Assumes the headers are identical for hbh and dst */ 12796 desthdr = (ip6_dest_t *)ptr; 12797 hdrlen = 8 * (desthdr->ip6d_len + 1); 12798 nexthdr = desthdr->ip6d_nxt; 12799 break; 12800 12801 case IPPROTO_ROUTING: 12802 return ((ip6_rthdr_t *)ptr); 12803 12804 case IPPROTO_FRAGMENT: 12805 fraghdr = (ip6_frag_t *)ptr; 12806 hdrlen = sizeof (ip6_frag_t); 12807 nexthdr = fraghdr->ip6f_nxt; 12808 break; 12809 12810 default: 12811 return (NULL); 12812 } 12813 ptr += hdrlen; 12814 } 12815 return (NULL); 12816 } 12817 12818 /* 12819 * Called for source-routed packets originating on this node. 12820 * Manipulates the original routing header by moving every entry up 12821 * one slot, placing the first entry in the v6 header's v6_dst field, 12822 * and placing the ultimate destination in the routing header's last 12823 * slot. 12824 * 12825 * Returns the checksum diference between the ultimate destination 12826 * (last hop in the routing header when the packet is sent) and 12827 * the first hop (ip6_dst when the packet is sent) 12828 */ 12829 /* ARGSUSED2 */ 12830 uint32_t 12831 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12832 { 12833 uint_t numaddr; 12834 uint_t i; 12835 in6_addr_t *addrptr; 12836 in6_addr_t tmp; 12837 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12838 uint32_t cksm; 12839 uint32_t addrsum = 0; 12840 uint16_t *ptr; 12841 12842 /* 12843 * Perform any processing needed for source routing. 12844 * We know that all extension headers will be in the same mblk 12845 * as the IPv6 header. 12846 */ 12847 12848 /* 12849 * If no segments left in header, or the header length field is zero, 12850 * don't move hop addresses around; 12851 * Checksum difference is zero. 12852 */ 12853 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12854 return (0); 12855 12856 ptr = (uint16_t *)&ip6h->ip6_dst; 12857 cksm = 0; 12858 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12859 cksm += ptr[i]; 12860 } 12861 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12862 12863 /* 12864 * Here's where the fun begins - we have to 12865 * move all addresses up one spot, take the 12866 * first hop and make it our first ip6_dst, 12867 * and place the ultimate destination in the 12868 * newly-opened last slot. 12869 */ 12870 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12871 numaddr = rthdr->ip6r0_len / 2; 12872 tmp = *addrptr; 12873 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12874 *addrptr = addrptr[1]; 12875 } 12876 *addrptr = ip6h->ip6_dst; 12877 ip6h->ip6_dst = tmp; 12878 12879 /* 12880 * From the checksummed ultimate destination subtract the checksummed 12881 * current ip6_dst (the first hop address). Return that number. 12882 * (In the v4 case, the second part of this is done in each routine 12883 * that calls ip_massage_options(). We do it all in this one place 12884 * for v6). 12885 */ 12886 ptr = (uint16_t *)&ip6h->ip6_dst; 12887 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12888 addrsum += ptr[i]; 12889 } 12890 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12891 if ((int)cksm < 0) 12892 cksm--; 12893 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12894 12895 return (cksm); 12896 } 12897 12898 /* 12899 * Propagate a multicast group membership operation (join/leave) (*fn) on 12900 * all interfaces crossed by the related multirt routes. 12901 * The call is considered successful if the operation succeeds 12902 * on at least one interface. 12903 * The function is called if the destination address in the packet to send 12904 * is multirouted. 12905 */ 12906 int 12907 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12908 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12909 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12910 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12911 { 12912 ire_t *ire_gw; 12913 irb_t *irb; 12914 int index, error = 0; 12915 opt_restart_t *or; 12916 ip_stack_t *ipst = ire->ire_ipst; 12917 12918 irb = ire->ire_bucket; 12919 ASSERT(irb != NULL); 12920 12921 ASSERT(DB_TYPE(first_mp) == M_CTL); 12922 or = (opt_restart_t *)first_mp->b_rptr; 12923 12924 IRB_REFHOLD(irb); 12925 for (; ire != NULL; ire = ire->ire_next) { 12926 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12927 continue; 12928 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12929 continue; 12930 12931 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12932 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12933 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12934 /* No resolver exists for the gateway; skip this ire. */ 12935 if (ire_gw == NULL) 12936 continue; 12937 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12938 /* 12939 * A resolver exists: we can get the interface on which we have 12940 * to apply the operation. 12941 */ 12942 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12943 first_mp); 12944 if (error == 0) 12945 or->or_private = CGTP_MCAST_SUCCESS; 12946 12947 if (ip_debug > 0) { 12948 ulong_t off; 12949 char *ksym; 12950 12951 ksym = kobj_getsymname((uintptr_t)fn, &off); 12952 ip2dbg(("ip_multirt_apply_membership_v6: " 12953 "called %s, multirt group 0x%08x via itf 0x%08x, " 12954 "error %d [success %u]\n", 12955 ksym ? ksym : "?", 12956 ntohl(V4_PART_OF_V6((*v6grp))), 12957 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12958 error, or->or_private)); 12959 } 12960 12961 ire_refrele(ire_gw); 12962 if (error == EINPROGRESS) { 12963 IRB_REFRELE(irb); 12964 return (error); 12965 } 12966 } 12967 IRB_REFRELE(irb); 12968 /* 12969 * Consider the call as successful if we succeeded on at least 12970 * one interface. Otherwise, return the last encountered error. 12971 */ 12972 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12973 } 12974 12975 void 12976 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 12977 { 12978 kstat_t *ksp; 12979 12980 ip6_stat_t template = { 12981 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 12982 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 12983 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 12984 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 12985 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 12986 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 12987 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12988 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12989 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12990 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12991 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12992 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12993 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12994 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12995 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 12996 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 12997 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 12998 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 12999 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 13000 }; 13001 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 13002 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 13003 KSTAT_FLAG_VIRTUAL, stackid); 13004 13005 if (ksp == NULL) 13006 return (NULL); 13007 13008 bcopy(&template, ip6_statisticsp, sizeof (template)); 13009 ksp->ks_data = (void *)ip6_statisticsp; 13010 ksp->ks_private = (void *)(uintptr_t)stackid; 13011 13012 kstat_install(ksp); 13013 return (ksp); 13014 } 13015 13016 void 13017 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 13018 { 13019 if (ksp != NULL) { 13020 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 13021 kstat_delete_netstack(ksp, stackid); 13022 } 13023 } 13024 13025 /* 13026 * The following two functions set and get the value for the 13027 * IPV6_SRC_PREFERENCES socket option. 13028 */ 13029 int 13030 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 13031 { 13032 /* 13033 * We only support preferences that are covered by 13034 * IPV6_PREFER_SRC_MASK. 13035 */ 13036 if (prefs & ~IPV6_PREFER_SRC_MASK) 13037 return (EINVAL); 13038 13039 /* 13040 * Look for conflicting preferences or default preferences. If 13041 * both bits of a related pair are clear, the application wants the 13042 * system's default value for that pair. Both bits in a pair can't 13043 * be set. 13044 */ 13045 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13046 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13047 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13048 IPV6_PREFER_SRC_MIPMASK) { 13049 return (EINVAL); 13050 } 13051 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13052 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13053 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13054 IPV6_PREFER_SRC_TMPMASK) { 13055 return (EINVAL); 13056 } 13057 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13058 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13059 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13060 IPV6_PREFER_SRC_CGAMASK) { 13061 return (EINVAL); 13062 } 13063 13064 connp->conn_src_preferences = prefs; 13065 return (0); 13066 } 13067 13068 size_t 13069 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13070 { 13071 *val = connp->conn_src_preferences; 13072 return (sizeof (connp->conn_src_preferences)); 13073 } 13074 13075 int 13076 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13077 { 13078 ill_t *ill; 13079 ire_t *ire; 13080 int error; 13081 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13082 13083 /* 13084 * Verify the source address and ifindex. Privileged users can use 13085 * any source address. For ancillary data the source address is 13086 * checked in ip_wput_v6. 13087 */ 13088 if (pkti->ipi6_ifindex != 0) { 13089 ASSERT(connp != NULL); 13090 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13091 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error, ipst); 13092 if (ill == NULL) { 13093 /* 13094 * We just want to know if the interface exists, we 13095 * don't really care about the ill pointer itself. 13096 */ 13097 if (error != EINPROGRESS) 13098 return (error); 13099 error = 0; /* Ensure we don't use it below */ 13100 } else { 13101 ill_refrele(ill); 13102 } 13103 } 13104 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13105 secpolicy_net_rawaccess(cr) != 0) { 13106 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13107 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13108 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 13109 if (ire != NULL) 13110 ire_refrele(ire); 13111 else 13112 return (ENXIO); 13113 } 13114 return (0); 13115 } 13116 13117 /* 13118 * Get the size of the IP options (including the IP headers size) 13119 * without including the AH header's size. If till_ah is B_FALSE, 13120 * and if AH header is present, dest options beyond AH header will 13121 * also be included in the returned size. 13122 */ 13123 int 13124 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13125 { 13126 ip6_t *ip6h; 13127 uint8_t nexthdr; 13128 uint8_t *whereptr; 13129 ip6_hbh_t *hbhhdr; 13130 ip6_dest_t *dsthdr; 13131 ip6_rthdr_t *rthdr; 13132 int ehdrlen; 13133 int size; 13134 ah_t *ah; 13135 13136 ip6h = (ip6_t *)mp->b_rptr; 13137 size = IPV6_HDR_LEN; 13138 nexthdr = ip6h->ip6_nxt; 13139 whereptr = (uint8_t *)&ip6h[1]; 13140 for (;;) { 13141 /* Assume IP has already stripped it */ 13142 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13143 switch (nexthdr) { 13144 case IPPROTO_HOPOPTS: 13145 hbhhdr = (ip6_hbh_t *)whereptr; 13146 nexthdr = hbhhdr->ip6h_nxt; 13147 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13148 break; 13149 case IPPROTO_DSTOPTS: 13150 dsthdr = (ip6_dest_t *)whereptr; 13151 nexthdr = dsthdr->ip6d_nxt; 13152 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13153 break; 13154 case IPPROTO_ROUTING: 13155 rthdr = (ip6_rthdr_t *)whereptr; 13156 nexthdr = rthdr->ip6r_nxt; 13157 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13158 break; 13159 default : 13160 if (till_ah) { 13161 ASSERT(nexthdr == IPPROTO_AH); 13162 return (size); 13163 } 13164 /* 13165 * If we don't have a AH header to traverse, 13166 * return now. This happens normally for 13167 * outbound datagrams where we have not inserted 13168 * the AH header. 13169 */ 13170 if (nexthdr != IPPROTO_AH) { 13171 return (size); 13172 } 13173 13174 /* 13175 * We don't include the AH header's size 13176 * to be symmetrical with other cases where 13177 * we either don't have a AH header (outbound) 13178 * or peek into the AH header yet (inbound and 13179 * not pulled up yet). 13180 */ 13181 ah = (ah_t *)whereptr; 13182 nexthdr = ah->ah_nexthdr; 13183 ehdrlen = (ah->ah_length << 2) + 8; 13184 13185 if (nexthdr == IPPROTO_DSTOPTS) { 13186 if (whereptr + ehdrlen >= mp->b_wptr) { 13187 /* 13188 * The destination options header 13189 * is not part of the first mblk. 13190 */ 13191 whereptr = mp->b_cont->b_rptr; 13192 } else { 13193 whereptr += ehdrlen; 13194 } 13195 13196 dsthdr = (ip6_dest_t *)whereptr; 13197 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13198 size += ehdrlen; 13199 } 13200 return (size); 13201 } 13202 whereptr += ehdrlen; 13203 size += ehdrlen; 13204 } 13205 } 13206