1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/sdt.h> 46 #include <sys/kobj.h> 47 #include <sys/zone.h> 48 #include <sys/neti.h> 49 #include <sys/hook.h> 50 51 #include <sys/kmem.h> 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/atomic.h> 58 #include <sys/iphada.h> 59 #include <sys/policy.h> 60 #include <net/if.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/if_dl.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 #include <netinet/sctp.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/optcom.h> 73 #include <inet/mib2.h> 74 #include <inet/nd.h> 75 #include <inet/arp.h> 76 77 #include <inet/ip.h> 78 #include <inet/ip_impl.h> 79 #include <inet/ip6.h> 80 #include <inet/ip6_asp.h> 81 #include <inet/tcp.h> 82 #include <inet/tcp_impl.h> 83 #include <inet/udp_impl.h> 84 #include <inet/ipp_common.h> 85 86 #include <inet/ip_multi.h> 87 #include <inet/ip_if.h> 88 #include <inet/ip_ire.h> 89 #include <inet/ip_rts.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/tun.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/udp_impl.h> 101 #include <inet/rawip_impl.h> 102 #include <inet/rts_impl.h> 103 #include <sys/squeue.h> 104 105 #include <sys/tsol/label.h> 106 #include <sys/tsol/tnet.h> 107 108 #include <rpc/pmap_prot.h> 109 110 /* Temporary; for CR 6451644 work-around */ 111 #include <sys/ethernet.h> 112 113 extern squeue_func_t ip_input_proc; 114 115 /* 116 * Naming conventions: 117 * These rules should be judiciously applied 118 * if there is a need to identify something as IPv6 versus IPv4 119 * IPv6 funcions will end with _v6 in the ip module. 120 * IPv6 funcions will end with _ipv6 in the transport modules. 121 * IPv6 macros: 122 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 123 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 124 * And then there are ..V4_PART_OF_V6. 125 * The intent is that macros in the ip module end with _V6. 126 * IPv6 global variables will start with ipv6_ 127 * IPv6 structures will start with ipv6 128 * IPv6 defined constants should start with IPV6_ 129 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 130 */ 131 132 /* 133 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 134 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 135 * from IANA. This mechanism will remain in effect until an official 136 * number is obtained. 137 */ 138 uchar_t ip6opt_ls; 139 140 const in6_addr_t ipv6_all_ones = 141 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 142 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 143 144 #ifdef _BIG_ENDIAN 145 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 146 #else /* _BIG_ENDIAN */ 147 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 148 #endif /* _BIG_ENDIAN */ 149 150 #ifdef _BIG_ENDIAN 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 152 #else /* _BIG_ENDIAN */ 153 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 154 #endif /* _BIG_ENDIAN */ 155 156 #ifdef _BIG_ENDIAN 157 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 158 #else /* _BIG_ENDIAN */ 159 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 160 #endif /* _BIG_ENDIAN */ 161 162 #ifdef _BIG_ENDIAN 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 164 #else /* _BIG_ENDIAN */ 165 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 166 #endif /* _BIG_ENDIAN */ 167 168 #ifdef _BIG_ENDIAN 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 170 #else /* _BIG_ENDIAN */ 171 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 172 #endif /* _BIG_ENDIAN */ 173 174 #ifdef _BIG_ENDIAN 175 const in6_addr_t ipv6_solicited_node_mcast = 176 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 177 #else /* _BIG_ENDIAN */ 178 const in6_addr_t ipv6_solicited_node_mcast = 179 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 180 #endif /* _BIG_ENDIAN */ 181 182 /* Leave room for ip_newroute to tack on the src and target addresses */ 183 #define OK_RESOLVER_MP_V6(mp) \ 184 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 185 186 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 187 boolean_t, zoneid_t); 188 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 189 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 190 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 191 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 192 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 193 boolean_t, boolean_t, boolean_t, boolean_t); 194 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 195 iulp_t *, ip_stack_t *); 196 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 197 uint16_t, boolean_t, boolean_t, boolean_t); 198 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 199 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 200 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 201 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 202 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 203 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 204 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 205 uint8_t *, uint_t, uint8_t, ip_stack_t *); 206 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 207 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 208 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 209 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 210 conn_t *, int, int, int, zoneid_t); 211 212 /* 213 * A template for an IPv6 AR_ENTRY_QUERY 214 */ 215 static areq_t ipv6_areq_template = { 216 AR_ENTRY_QUERY, /* cmd */ 217 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 218 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 219 IP6_DL_SAP, /* protocol, from arps perspective */ 220 sizeof (areq_t), /* target addr offset */ 221 IPV6_ADDR_LEN, /* target addr_length */ 222 0, /* flags */ 223 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 224 IPV6_ADDR_LEN, /* sender addr length */ 225 6, /* xmit_count */ 226 1000, /* (re)xmit_interval in milliseconds */ 227 4 /* max # of requests to buffer */ 228 /* anything else filled in by the code */ 229 }; 230 231 /* 232 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 233 * The message has already been checksummed and if needed, 234 * a copy has been made to be sent any interested ICMP client (conn) 235 * Note that this is different than icmp_inbound() which does the fanout 236 * to conn's as well as local processing of the ICMP packets. 237 * 238 * All error messages are passed to the matching transport stream. 239 * 240 * Zones notes: 241 * The packet is only processed in the context of the specified zone: typically 242 * only this zone will reply to an echo request. This means that the caller must 243 * call icmp_inbound_v6() for each relevant zone. 244 */ 245 static void 246 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 247 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 248 { 249 icmp6_t *icmp6; 250 ip6_t *ip6h; 251 boolean_t interested; 252 ip6i_t *ip6i; 253 in6_addr_t origsrc; 254 ire_t *ire; 255 mblk_t *first_mp; 256 ipsec_in_t *ii; 257 ip_stack_t *ipst = ill->ill_ipst; 258 259 ASSERT(ill != NULL); 260 first_mp = mp; 261 if (mctl_present) { 262 mp = first_mp->b_cont; 263 ASSERT(mp != NULL); 264 265 ii = (ipsec_in_t *)first_mp->b_rptr; 266 ASSERT(ii->ipsec_in_type == IPSEC_IN); 267 } 268 269 ip6h = (ip6_t *)mp->b_rptr; 270 271 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 272 273 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 274 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 275 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 276 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 277 freemsg(first_mp); 278 return; 279 } 280 ip6h = (ip6_t *)mp->b_rptr; 281 } 282 if (ipst->ips_icmp_accept_clear_messages == 0) { 283 first_mp = ipsec_check_global_policy(first_mp, NULL, 284 NULL, ip6h, mctl_present, ipst->ips_netstack); 285 if (first_mp == NULL) 286 return; 287 } 288 289 /* 290 * On a labeled system, we have to check whether the zone itself is 291 * permitted to receive raw traffic. 292 */ 293 if (is_system_labeled()) { 294 if (zoneid == ALL_ZONES) 295 zoneid = tsol_packet_to_zoneid(mp); 296 if (!tsol_can_accept_raw(mp, B_FALSE)) { 297 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 298 zoneid)); 299 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 300 freemsg(first_mp); 301 return; 302 } 303 } 304 305 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 306 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 307 icmp6->icmp6_code)); 308 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 309 310 /* Initiate IPPF processing here */ 311 if (IP6_IN_IPP(flags, ipst)) { 312 313 /* 314 * If the ifindex changes due to SIOCSLIFINDEX 315 * packet may return to IP on the wrong ill. 316 */ 317 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 318 if (mp == NULL) { 319 if (mctl_present) { 320 freeb(first_mp); 321 } 322 return; 323 } 324 } 325 326 switch (icmp6->icmp6_type) { 327 case ICMP6_DST_UNREACH: 328 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 329 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 330 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 331 break; 332 333 case ICMP6_TIME_EXCEEDED: 334 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 335 break; 336 337 case ICMP6_PARAM_PROB: 338 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 339 break; 340 341 case ICMP6_PACKET_TOO_BIG: 342 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 343 zoneid); 344 return; 345 case ICMP6_ECHO_REQUEST: 346 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 347 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 348 !ipst->ips_ipv6_resp_echo_mcast) 349 break; 350 351 /* 352 * We must have exclusive use of the mblk to convert it to 353 * a response. 354 * If not, we copy it. 355 */ 356 if (mp->b_datap->db_ref > 1) { 357 mblk_t *mp1; 358 359 mp1 = copymsg(mp); 360 freemsg(mp); 361 if (mp1 == NULL) { 362 BUMP_MIB(ill->ill_icmp6_mib, 363 ipv6IfIcmpInErrors); 364 if (mctl_present) 365 freeb(first_mp); 366 return; 367 } 368 mp = mp1; 369 ip6h = (ip6_t *)mp->b_rptr; 370 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 371 if (mctl_present) 372 first_mp->b_cont = mp; 373 else 374 first_mp = mp; 375 } 376 377 /* 378 * Turn the echo into an echo reply. 379 * Remove any extension headers (do not reverse a source route) 380 * and clear the flow id (keep traffic class for now). 381 */ 382 if (hdr_length != IPV6_HDR_LEN) { 383 int i; 384 385 for (i = 0; i < IPV6_HDR_LEN; i++) 386 mp->b_rptr[hdr_length - i - 1] = 387 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 388 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 389 ip6h = (ip6_t *)mp->b_rptr; 390 ip6h->ip6_nxt = IPPROTO_ICMPV6; 391 hdr_length = IPV6_HDR_LEN; 392 } 393 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 394 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 395 396 ip6h->ip6_plen = 397 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 398 origsrc = ip6h->ip6_src; 399 /* 400 * Reverse the source and destination addresses. 401 * If the return address is a multicast, zero out the source 402 * (ip_wput_v6 will set an address). 403 */ 404 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 405 ip6h->ip6_src = ipv6_all_zeros; 406 ip6h->ip6_dst = origsrc; 407 } else { 408 ip6h->ip6_src = ip6h->ip6_dst; 409 ip6h->ip6_dst = origsrc; 410 } 411 412 /* set the hop limit */ 413 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 414 415 /* 416 * Prepare for checksum by putting icmp length in the icmp 417 * checksum field. The checksum is calculated in ip_wput_v6. 418 */ 419 icmp6->icmp6_cksum = ip6h->ip6_plen; 420 /* 421 * ICMP echo replies should go out on the same interface 422 * the request came on as probes used by in.mpathd for 423 * detecting NIC failures are ECHO packets. We turn-off load 424 * spreading by allocating a ip6i and setting ip6i_attach_if 425 * to B_TRUE which is handled both by ip_wput_v6 and 426 * ip_newroute_v6. If we don't turnoff load spreading, 427 * the packets might get dropped if there are no 428 * non-FAILED/INACTIVE interfaces for it to go out on and 429 * in.mpathd would wrongly detect a failure or mis-detect 430 * a NIC failure as a link failure. As load spreading can 431 * happen only if ill_group is not NULL, we do only for 432 * that case and this does not affect the normal case. 433 * 434 * We force this only on echo packets that came from on-link 435 * hosts. We restrict this to link-local addresses which 436 * is used by in.mpathd for probing. In the IPv6 case, 437 * default routes typically have an ire_ipif pointer and 438 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 439 * might work. As a default route out of this interface 440 * may not be present, enforcing this packet to go out in 441 * this case may not work. 442 */ 443 if (ill->ill_group != NULL && 444 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 445 /* 446 * If we are sending replies to ourselves, don't 447 * set ATTACH_IF as we may not be able to find 448 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 449 * causes ip_wput_v6 to look for an IRE_LOCAL on 450 * "ill" which it may not find and will try to 451 * create an IRE_CACHE for our local address. Once 452 * we do this, we will try to forward all packets 453 * meant to our LOCAL address. 454 */ 455 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 456 NULL, ipst); 457 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 458 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 459 if (mp == NULL) { 460 BUMP_MIB(ill->ill_icmp6_mib, 461 ipv6IfIcmpInErrors); 462 if (ire != NULL) 463 ire_refrele(ire); 464 if (mctl_present) 465 freeb(first_mp); 466 return; 467 } else if (mctl_present) { 468 first_mp->b_cont = mp; 469 } else { 470 first_mp = mp; 471 } 472 ip6i = (ip6i_t *)mp->b_rptr; 473 ip6i->ip6i_flags = IP6I_ATTACH_IF; 474 ip6i->ip6i_ifindex = 475 ill->ill_phyint->phyint_ifindex; 476 } 477 if (ire != NULL) 478 ire_refrele(ire); 479 } 480 481 if (!mctl_present) { 482 /* 483 * This packet should go out the same way as it 484 * came in i.e in clear. To make sure that global 485 * policy will not be applied to this in ip_wput, 486 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 487 */ 488 ASSERT(first_mp == mp); 489 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 490 if (first_mp == NULL) { 491 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 492 freemsg(mp); 493 return; 494 } 495 ii = (ipsec_in_t *)first_mp->b_rptr; 496 497 /* This is not a secure packet */ 498 ii->ipsec_in_secure = B_FALSE; 499 first_mp->b_cont = mp; 500 } 501 ii->ipsec_in_zoneid = zoneid; 502 ASSERT(zoneid != ALL_ZONES); 503 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 504 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 505 return; 506 } 507 put(WR(q), first_mp); 508 return; 509 510 case ICMP6_ECHO_REPLY: 511 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 512 break; 513 514 case ND_ROUTER_SOLICIT: 515 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 516 break; 517 518 case ND_ROUTER_ADVERT: 519 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 520 break; 521 522 case ND_NEIGHBOR_SOLICIT: 523 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 524 if (mctl_present) 525 freeb(first_mp); 526 /* XXX may wish to pass first_mp up to ndp_input someday. */ 527 ndp_input(ill, mp, dl_mp); 528 return; 529 530 case ND_NEIGHBOR_ADVERT: 531 BUMP_MIB(ill->ill_icmp6_mib, 532 ipv6IfIcmpInNeighborAdvertisements); 533 if (mctl_present) 534 freeb(first_mp); 535 /* XXX may wish to pass first_mp up to ndp_input someday. */ 536 ndp_input(ill, mp, dl_mp); 537 return; 538 539 case ND_REDIRECT: { 540 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 541 542 if (ipst->ips_ipv6_ignore_redirect) 543 break; 544 545 /* 546 * As there is no upper client to deliver, we don't 547 * need the first_mp any more. 548 */ 549 if (mctl_present) 550 freeb(first_mp); 551 if (!pullupmsg(mp, -1)) { 552 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 553 break; 554 } 555 icmp_redirect_v6(q, mp, ill); 556 return; 557 } 558 559 /* 560 * The next three icmp messages will be handled by MLD. 561 * Pass all valid MLD packets up to any process(es) 562 * listening on a raw ICMP socket. MLD messages are 563 * freed by mld_input function. 564 */ 565 case MLD_LISTENER_QUERY: 566 case MLD_LISTENER_REPORT: 567 case MLD_LISTENER_REDUCTION: 568 if (mctl_present) 569 freeb(first_mp); 570 mld_input(q, mp, ill); 571 return; 572 default: 573 break; 574 } 575 if (interested) { 576 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 577 mctl_present, zoneid); 578 } else { 579 freemsg(first_mp); 580 } 581 } 582 583 /* 584 * Process received IPv6 ICMP Packet too big. 585 * After updating any IRE it does the fanout to any matching transport streams. 586 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 587 */ 588 /* ARGSUSED */ 589 static void 590 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 591 boolean_t mctl_present, zoneid_t zoneid) 592 { 593 ip6_t *ip6h; 594 ip6_t *inner_ip6h; 595 icmp6_t *icmp6; 596 uint16_t hdr_length; 597 uint32_t mtu; 598 ire_t *ire, *first_ire; 599 mblk_t *first_mp; 600 ip_stack_t *ipst = ill->ill_ipst; 601 602 first_mp = mp; 603 if (mctl_present) 604 mp = first_mp->b_cont; 605 /* 606 * We must have exclusive use of the mblk to update the MTU 607 * in the packet. 608 * If not, we copy it. 609 * 610 * If there's an M_CTL present, we know that allocated first_mp 611 * earlier in this function, so we know first_mp has refcnt of one. 612 */ 613 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 614 if (mp->b_datap->db_ref > 1) { 615 mblk_t *mp1; 616 617 mp1 = copymsg(mp); 618 freemsg(mp); 619 if (mp1 == NULL) { 620 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 621 if (mctl_present) 622 freeb(first_mp); 623 return; 624 } 625 mp = mp1; 626 if (mctl_present) 627 first_mp->b_cont = mp; 628 else 629 first_mp = mp; 630 } 631 ip6h = (ip6_t *)mp->b_rptr; 632 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 633 hdr_length = ip_hdr_length_v6(mp, ip6h); 634 else 635 hdr_length = IPV6_HDR_LEN; 636 637 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 638 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 639 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 640 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 641 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 642 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 643 freemsg(first_mp); 644 return; 645 } 646 ip6h = (ip6_t *)mp->b_rptr; 647 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 648 inner_ip6h = (ip6_t *)&icmp6[1]; 649 } 650 651 /* 652 * For link local destinations matching simply on IRE type is not 653 * sufficient. Same link local addresses for different ILL's is 654 * possible. 655 */ 656 657 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 658 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 659 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 660 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 661 662 if (first_ire == NULL) { 663 if (ip_debug > 2) { 664 /* ip1dbg */ 665 pr_addr_dbg("icmp_inbound_too_big_v6:" 666 "no ire for dst %s\n", AF_INET6, 667 &inner_ip6h->ip6_dst); 668 } 669 freemsg(first_mp); 670 return; 671 } 672 673 mtu = ntohl(icmp6->icmp6_mtu); 674 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 675 for (ire = first_ire; ire != NULL && 676 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 677 ire = ire->ire_next) { 678 mutex_enter(&ire->ire_lock); 679 if (mtu < IPV6_MIN_MTU) { 680 ip1dbg(("Received mtu less than IPv6 " 681 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 682 mtu = IPV6_MIN_MTU; 683 /* 684 * If an mtu less than IPv6 min mtu is received, 685 * we must include a fragment header in 686 * subsequent packets. 687 */ 688 ire->ire_frag_flag |= IPH_FRAG_HDR; 689 } 690 ip1dbg(("Received mtu from router: %d\n", mtu)); 691 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 692 /* Record the new max frag size for the ULP. */ 693 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 694 /* 695 * If we need a fragment header in every packet 696 * (above case or multirouting), make sure the 697 * ULP takes it into account when computing the 698 * payload size. 699 */ 700 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 701 sizeof (ip6_frag_t)); 702 } else { 703 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 704 } 705 mutex_exit(&ire->ire_lock); 706 } 707 rw_exit(&first_ire->ire_bucket->irb_lock); 708 ire_refrele(first_ire); 709 } else { 710 irb_t *irb = NULL; 711 /* 712 * for non-link local destinations we match only on the IRE type 713 */ 714 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 715 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 716 ipst); 717 if (ire == NULL) { 718 if (ip_debug > 2) { 719 /* ip1dbg */ 720 pr_addr_dbg("icmp_inbound_too_big_v6:" 721 "no ire for dst %s\n", 722 AF_INET6, &inner_ip6h->ip6_dst); 723 } 724 freemsg(first_mp); 725 return; 726 } 727 irb = ire->ire_bucket; 728 ire_refrele(ire); 729 rw_enter(&irb->irb_lock, RW_READER); 730 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 731 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 732 &inner_ip6h->ip6_dst)) { 733 mtu = ntohl(icmp6->icmp6_mtu); 734 mutex_enter(&ire->ire_lock); 735 if (mtu < IPV6_MIN_MTU) { 736 ip1dbg(("Received mtu less than IPv6" 737 "min mtu %d: %d\n", 738 IPV6_MIN_MTU, mtu)); 739 mtu = IPV6_MIN_MTU; 740 /* 741 * If an mtu less than IPv6 min mtu is 742 * received, we must include a fragment 743 * header in subsequent packets. 744 */ 745 ire->ire_frag_flag |= IPH_FRAG_HDR; 746 } 747 748 ip1dbg(("Received mtu from router: %d\n", mtu)); 749 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 750 /* Record the new max frag size for the ULP. */ 751 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 752 /* 753 * If we need a fragment header in 754 * every packet (above case or 755 * multirouting), make sure the ULP 756 * takes it into account when computing 757 * the payload size. 758 */ 759 icmp6->icmp6_mtu = 760 htonl(ire->ire_max_frag - 761 sizeof (ip6_frag_t)); 762 } else { 763 icmp6->icmp6_mtu = 764 htonl(ire->ire_max_frag); 765 } 766 mutex_exit(&ire->ire_lock); 767 } 768 } 769 rw_exit(&irb->irb_lock); 770 } 771 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 772 mctl_present, zoneid); 773 } 774 775 /* 776 * Fanout received ICMPv6 error packets to the transports. 777 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 778 */ 779 void 780 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 781 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 782 { 783 uint16_t *up; /* Pointer to ports in ULP header */ 784 uint32_t ports; /* reversed ports for fanout */ 785 ip6_t rip6h; /* With reversed addresses */ 786 uint16_t hdr_length; 787 uint8_t *nexthdrp; 788 uint8_t nexthdr; 789 mblk_t *first_mp; 790 ipsec_in_t *ii; 791 tcpha_t *tcpha; 792 conn_t *connp; 793 ip_stack_t *ipst = ill->ill_ipst; 794 795 first_mp = mp; 796 if (mctl_present) { 797 mp = first_mp->b_cont; 798 ASSERT(mp != NULL); 799 800 ii = (ipsec_in_t *)first_mp->b_rptr; 801 ASSERT(ii->ipsec_in_type == IPSEC_IN); 802 } else { 803 ii = NULL; 804 } 805 806 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 807 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 808 809 /* 810 * Need to pullup everything in order to use 811 * ip_hdr_length_nexthdr_v6() 812 */ 813 if (mp->b_cont != NULL) { 814 if (!pullupmsg(mp, -1)) { 815 ip1dbg(("icmp_inbound_error_fanout_v6: " 816 "pullupmsg failed\n")); 817 goto drop_pkt; 818 } 819 ip6h = (ip6_t *)mp->b_rptr; 820 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 821 } 822 823 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 824 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 825 goto drop_pkt; 826 827 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 828 goto drop_pkt; 829 nexthdr = *nexthdrp; 830 831 /* Set message type, must be done after pullups */ 832 mp->b_datap->db_type = M_CTL; 833 834 /* Try to pass the ICMP message to clients who need it */ 835 switch (nexthdr) { 836 case IPPROTO_UDP: { 837 /* 838 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 839 * UDP header to get the port information. 840 */ 841 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 842 mp->b_wptr) { 843 break; 844 } 845 /* 846 * Attempt to find a client stream based on port. 847 * Note that we do a reverse lookup since the header is 848 * in the form we sent it out. 849 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 850 * and we only set the src and dst addresses and nexthdr. 851 */ 852 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 853 rip6h.ip6_src = ip6h->ip6_dst; 854 rip6h.ip6_dst = ip6h->ip6_src; 855 rip6h.ip6_nxt = nexthdr; 856 ((uint16_t *)&ports)[0] = up[1]; 857 ((uint16_t *)&ports)[1] = up[0]; 858 859 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 860 IP6_NO_IPPOLICY, mctl_present, zoneid); 861 return; 862 } 863 case IPPROTO_TCP: { 864 /* 865 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 866 * the TCP header to get the port information. 867 */ 868 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 869 mp->b_wptr) { 870 break; 871 } 872 873 /* 874 * Attempt to find a client stream based on port. 875 * Note that we do a reverse lookup since the header is 876 * in the form we sent it out. 877 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 878 * we only set the src and dst addresses and nexthdr. 879 */ 880 881 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 882 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 883 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 884 if (connp == NULL) { 885 goto drop_pkt; 886 } 887 888 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 889 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 890 return; 891 892 } 893 case IPPROTO_SCTP: 894 /* 895 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 896 * the SCTP header to get the port information. 897 */ 898 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 899 mp->b_wptr) { 900 break; 901 } 902 903 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 904 ((uint16_t *)&ports)[0] = up[1]; 905 ((uint16_t *)&ports)[1] = up[0]; 906 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 0, 907 mctl_present, IP6_NO_IPPOLICY, zoneid); 908 return; 909 case IPPROTO_ESP: 910 case IPPROTO_AH: { 911 int ipsec_rc; 912 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 913 914 /* 915 * We need a IPSEC_IN in the front to fanout to AH/ESP. 916 * We will re-use the IPSEC_IN if it is already present as 917 * AH/ESP will not affect any fields in the IPSEC_IN for 918 * ICMP errors. If there is no IPSEC_IN, allocate a new 919 * one and attach it in the front. 920 */ 921 if (ii != NULL) { 922 /* 923 * ip_fanout_proto_again converts the ICMP errors 924 * that come back from AH/ESP to M_DATA so that 925 * if it is non-AH/ESP and we do a pullupmsg in 926 * this function, it would work. Convert it back 927 * to M_CTL before we send up as this is a ICMP 928 * error. This could have been generated locally or 929 * by some router. Validate the inner IPSEC 930 * headers. 931 * 932 * NOTE : ill_index is used by ip_fanout_proto_again 933 * to locate the ill. 934 */ 935 ASSERT(ill != NULL); 936 ii->ipsec_in_ill_index = 937 ill->ill_phyint->phyint_ifindex; 938 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 939 first_mp->b_cont->b_datap->db_type = M_CTL; 940 } else { 941 /* 942 * IPSEC_IN is not present. We attach a ipsec_in 943 * message and send up to IPSEC for validating 944 * and removing the IPSEC headers. Clear 945 * ipsec_in_secure so that when we return 946 * from IPSEC, we don't mistakenly think that this 947 * is a secure packet came from the network. 948 * 949 * NOTE : ill_index is used by ip_fanout_proto_again 950 * to locate the ill. 951 */ 952 ASSERT(first_mp == mp); 953 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 954 ASSERT(ill != NULL); 955 if (first_mp == NULL) { 956 freemsg(mp); 957 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 958 return; 959 } 960 ii = (ipsec_in_t *)first_mp->b_rptr; 961 962 /* This is not a secure packet */ 963 ii->ipsec_in_secure = B_FALSE; 964 first_mp->b_cont = mp; 965 mp->b_datap->db_type = M_CTL; 966 ii->ipsec_in_ill_index = 967 ill->ill_phyint->phyint_ifindex; 968 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 969 } 970 971 if (!ipsec_loaded(ipss)) { 972 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 973 return; 974 } 975 976 if (nexthdr == IPPROTO_ESP) 977 ipsec_rc = ipsecesp_icmp_error(first_mp); 978 else 979 ipsec_rc = ipsecah_icmp_error(first_mp); 980 if (ipsec_rc == IPSEC_STATUS_FAILED) 981 return; 982 983 ip_fanout_proto_again(first_mp, ill, ill, NULL); 984 return; 985 } 986 case IPPROTO_ENCAP: 987 case IPPROTO_IPV6: 988 if ((uint8_t *)ip6h + hdr_length + 989 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 990 sizeof (ip6_t)) > mp->b_wptr) { 991 goto drop_pkt; 992 } 993 994 if (nexthdr == IPPROTO_ENCAP || 995 !IN6_ARE_ADDR_EQUAL( 996 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 997 &ip6h->ip6_src) || 998 !IN6_ARE_ADDR_EQUAL( 999 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1000 &ip6h->ip6_dst)) { 1001 /* 1002 * For tunnels that have used IPsec protection, 1003 * we need to adjust the MTU to take into account 1004 * the IPsec overhead. 1005 */ 1006 if (ii != NULL) 1007 icmp6->icmp6_mtu = htonl( 1008 ntohl(icmp6->icmp6_mtu) - 1009 ipsec_in_extra_length(first_mp)); 1010 } else { 1011 /* 1012 * Self-encapsulated case. As in the ipv4 case, 1013 * we need to strip the 2nd IP header. Since mp 1014 * is already pulled-up, we can simply bcopy 1015 * the 3rd header + data over the 2nd header. 1016 */ 1017 uint16_t unused_len; 1018 ip6_t *inner_ip6h = (ip6_t *) 1019 ((uchar_t *)ip6h + hdr_length); 1020 1021 /* 1022 * Make sure we don't do recursion more than once. 1023 */ 1024 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1025 &unused_len, &nexthdrp) || 1026 *nexthdrp == IPPROTO_IPV6) { 1027 goto drop_pkt; 1028 } 1029 1030 /* 1031 * We are about to modify the packet. Make a copy if 1032 * someone else has a reference to it. 1033 */ 1034 if (DB_REF(mp) > 1) { 1035 mblk_t *mp1; 1036 uint16_t icmp6_offset; 1037 1038 mp1 = copymsg(mp); 1039 if (mp1 == NULL) { 1040 goto drop_pkt; 1041 } 1042 icmp6_offset = (uint16_t) 1043 ((uchar_t *)icmp6 - mp->b_rptr); 1044 freemsg(mp); 1045 mp = mp1; 1046 1047 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1048 ip6h = (ip6_t *)&icmp6[1]; 1049 inner_ip6h = (ip6_t *) 1050 ((uchar_t *)ip6h + hdr_length); 1051 1052 if (mctl_present) 1053 first_mp->b_cont = mp; 1054 else 1055 first_mp = mp; 1056 } 1057 1058 /* 1059 * Need to set db_type back to M_DATA before 1060 * refeeding mp into this function. 1061 */ 1062 DB_TYPE(mp) = M_DATA; 1063 1064 /* 1065 * Copy the 3rd header + remaining data on top 1066 * of the 2nd header. 1067 */ 1068 bcopy(inner_ip6h, ip6h, 1069 mp->b_wptr - (uchar_t *)inner_ip6h); 1070 1071 /* 1072 * Subtract length of the 2nd header. 1073 */ 1074 mp->b_wptr -= hdr_length; 1075 1076 /* 1077 * Now recurse, and see what I _really_ should be 1078 * doing here. 1079 */ 1080 icmp_inbound_error_fanout_v6(q, first_mp, 1081 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1082 zoneid); 1083 return; 1084 } 1085 /* FALLTHRU */ 1086 default: 1087 /* 1088 * The rip6h header is only used for the lookup and we 1089 * only set the src and dst addresses and nexthdr. 1090 */ 1091 rip6h.ip6_src = ip6h->ip6_dst; 1092 rip6h.ip6_dst = ip6h->ip6_src; 1093 rip6h.ip6_nxt = nexthdr; 1094 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1095 IP6_NO_IPPOLICY, mctl_present, zoneid); 1096 return; 1097 } 1098 /* NOTREACHED */ 1099 drop_pkt: 1100 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1101 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1102 freemsg(first_mp); 1103 } 1104 1105 /* 1106 * Process received IPv6 ICMP Redirect messages. 1107 */ 1108 /* ARGSUSED */ 1109 static void 1110 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1111 { 1112 ip6_t *ip6h; 1113 uint16_t hdr_length; 1114 nd_redirect_t *rd; 1115 ire_t *ire; 1116 ire_t *prev_ire; 1117 ire_t *redir_ire; 1118 in6_addr_t *src, *dst, *gateway; 1119 nd_opt_hdr_t *opt; 1120 nce_t *nce; 1121 int nce_flags = 0; 1122 int err = 0; 1123 boolean_t redirect_to_router = B_FALSE; 1124 int len; 1125 int optlen; 1126 iulp_t ulp_info = { 0 }; 1127 ill_t *prev_ire_ill; 1128 ipif_t *ipif; 1129 ip_stack_t *ipst = ill->ill_ipst; 1130 1131 ip6h = (ip6_t *)mp->b_rptr; 1132 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1133 hdr_length = ip_hdr_length_v6(mp, ip6h); 1134 else 1135 hdr_length = IPV6_HDR_LEN; 1136 1137 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1138 len = mp->b_wptr - mp->b_rptr - hdr_length; 1139 src = &ip6h->ip6_src; 1140 dst = &rd->nd_rd_dst; 1141 gateway = &rd->nd_rd_target; 1142 1143 /* Verify if it is a valid redirect */ 1144 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1145 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1146 (rd->nd_rd_code != 0) || 1147 (len < sizeof (nd_redirect_t)) || 1148 (IN6_IS_ADDR_V4MAPPED(dst)) || 1149 (IN6_IS_ADDR_MULTICAST(dst))) { 1150 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1151 freemsg(mp); 1152 return; 1153 } 1154 1155 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1156 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1157 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1158 freemsg(mp); 1159 return; 1160 } 1161 1162 if (len > sizeof (nd_redirect_t)) { 1163 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1164 len - sizeof (nd_redirect_t))) { 1165 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1166 freemsg(mp); 1167 return; 1168 } 1169 } 1170 1171 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1172 redirect_to_router = B_TRUE; 1173 nce_flags |= NCE_F_ISROUTER; 1174 } 1175 1176 /* ipif will be refreleased afterwards */ 1177 ipif = ipif_get_next_ipif(NULL, ill); 1178 if (ipif == NULL) { 1179 freemsg(mp); 1180 return; 1181 } 1182 1183 /* 1184 * Verify that the IP source address of the redirect is 1185 * the same as the current first-hop router for the specified 1186 * ICMP destination address. 1187 * Also, Make sure we had a route for the dest in question and 1188 * that route was pointing to the old gateway (the source of the 1189 * redirect packet.) 1190 */ 1191 1192 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1193 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1194 MATCH_IRE_DEFAULT, ipst); 1195 1196 /* 1197 * Check that 1198 * the redirect was not from ourselves 1199 * old gateway is still directly reachable 1200 */ 1201 if (prev_ire == NULL || 1202 prev_ire->ire_type == IRE_LOCAL) { 1203 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1204 ipif_refrele(ipif); 1205 goto fail_redirect; 1206 } 1207 prev_ire_ill = ire_to_ill(prev_ire); 1208 ASSERT(prev_ire_ill != NULL); 1209 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1210 nce_flags |= NCE_F_NONUD; 1211 1212 /* 1213 * Should we use the old ULP info to create the new gateway? From 1214 * a user's perspective, we should inherit the info so that it 1215 * is a "smooth" transition. If we do not do that, then new 1216 * connections going thru the new gateway will have no route metrics, 1217 * which is counter-intuitive to user. From a network point of 1218 * view, this may or may not make sense even though the new gateway 1219 * is still directly connected to us so the route metrics should not 1220 * change much. 1221 * 1222 * But if the old ire_uinfo is not initialized, we do another 1223 * recursive lookup on the dest using the new gateway. There may 1224 * be a route to that. If so, use it to initialize the redirect 1225 * route. 1226 */ 1227 if (prev_ire->ire_uinfo.iulp_set) { 1228 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1229 } else if (redirect_to_router) { 1230 /* 1231 * Only do the following if the redirection is really to 1232 * a router. 1233 */ 1234 ire_t *tmp_ire; 1235 ire_t *sire; 1236 1237 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1238 ALL_ZONES, 0, NULL, 1239 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1240 ipst); 1241 if (sire != NULL) { 1242 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1243 ASSERT(tmp_ire != NULL); 1244 ire_refrele(tmp_ire); 1245 ire_refrele(sire); 1246 } else if (tmp_ire != NULL) { 1247 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1248 sizeof (iulp_t)); 1249 ire_refrele(tmp_ire); 1250 } 1251 } 1252 1253 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1254 opt = (nd_opt_hdr_t *)&rd[1]; 1255 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1256 if (opt != NULL) { 1257 err = ndp_lookup_then_add_v6(ill, 1258 (uchar_t *)&opt[1], /* Link layer address */ 1259 gateway, 1260 &ipv6_all_ones, /* prefix mask */ 1261 &ipv6_all_zeros, /* Mapping mask */ 1262 0, 1263 nce_flags, 1264 ND_STALE, 1265 &nce); 1266 switch (err) { 1267 case 0: 1268 NCE_REFRELE(nce); 1269 break; 1270 case EEXIST: 1271 /* 1272 * Check to see if link layer address has changed and 1273 * process the nce_state accordingly. 1274 */ 1275 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1276 NCE_REFRELE(nce); 1277 break; 1278 default: 1279 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1280 err)); 1281 ipif_refrele(ipif); 1282 goto fail_redirect; 1283 } 1284 } 1285 if (redirect_to_router) { 1286 /* icmp_redirect_ok_v6() must have already verified this */ 1287 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1288 1289 /* 1290 * Create a Route Association. This will allow us to remember 1291 * a router told us to use the particular gateway. 1292 */ 1293 ire = ire_create_v6( 1294 dst, 1295 &ipv6_all_ones, /* mask */ 1296 &prev_ire->ire_src_addr_v6, /* source addr */ 1297 gateway, /* gateway addr */ 1298 &prev_ire->ire_max_frag, /* max frag */ 1299 NULL, /* no src nce */ 1300 NULL, /* no rfq */ 1301 NULL, /* no stq */ 1302 IRE_HOST, 1303 prev_ire->ire_ipif, 1304 NULL, 1305 0, 1306 0, 1307 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1308 &ulp_info, 1309 NULL, 1310 NULL, 1311 ipst); 1312 } else { 1313 queue_t *stq; 1314 1315 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1316 ? ipif->ipif_rq : ipif->ipif_wq; 1317 1318 /* 1319 * Just create an on link entry, i.e. interface route. 1320 */ 1321 ire = ire_create_v6( 1322 dst, /* gateway == dst */ 1323 &ipv6_all_ones, /* mask */ 1324 &prev_ire->ire_src_addr_v6, /* source addr */ 1325 &ipv6_all_zeros, /* gateway addr */ 1326 &prev_ire->ire_max_frag, /* max frag */ 1327 NULL, /* no src nce */ 1328 NULL, /* ire rfq */ 1329 stq, /* ire stq */ 1330 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1331 prev_ire->ire_ipif, 1332 &ipv6_all_ones, 1333 0, 1334 0, 1335 (RTF_DYNAMIC | RTF_HOST), 1336 &ulp_info, 1337 NULL, 1338 NULL, 1339 ipst); 1340 } 1341 1342 /* Release reference from earlier ipif_get_next_ipif() */ 1343 ipif_refrele(ipif); 1344 1345 if (ire == NULL) 1346 goto fail_redirect; 1347 1348 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1349 1350 /* tell routing sockets that we received a redirect */ 1351 ip_rts_change_v6(RTM_REDIRECT, 1352 &rd->nd_rd_dst, 1353 &rd->nd_rd_target, 1354 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1355 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1356 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1357 1358 /* 1359 * Delete any existing IRE_HOST type ires for this destination. 1360 * This together with the added IRE has the effect of 1361 * modifying an existing redirect. 1362 */ 1363 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1364 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1365 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), 1366 ipst); 1367 1368 ire_refrele(ire); /* Held in ire_add_v6 */ 1369 1370 if (redir_ire != NULL) { 1371 if (redir_ire->ire_flags & RTF_DYNAMIC) 1372 ire_delete(redir_ire); 1373 ire_refrele(redir_ire); 1374 } 1375 } 1376 1377 if (prev_ire->ire_type == IRE_CACHE) 1378 ire_delete(prev_ire); 1379 ire_refrele(prev_ire); 1380 prev_ire = NULL; 1381 1382 fail_redirect: 1383 if (prev_ire != NULL) 1384 ire_refrele(prev_ire); 1385 freemsg(mp); 1386 } 1387 1388 static ill_t * 1389 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1390 { 1391 ill_t *ill; 1392 1393 ASSERT(WR(q) == q); 1394 1395 if (q->q_next != NULL) { 1396 ill = (ill_t *)q->q_ptr; 1397 if (ILL_CAN_LOOKUP(ill)) 1398 ill_refhold(ill); 1399 else 1400 ill = NULL; 1401 } else { 1402 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1403 NULL, NULL, NULL, NULL, NULL, ipst); 1404 } 1405 if (ill == NULL) 1406 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1407 return (ill); 1408 } 1409 1410 /* 1411 * Assigns an appropriate source address to the packet. 1412 * If origdst is one of our IP addresses that use it as the source. 1413 * If the queue is an ill queue then select a source from that ill. 1414 * Otherwise pick a source based on a route lookup back to the origsrc. 1415 * 1416 * src is the return parameter. Returns a pointer to src or NULL if failure. 1417 */ 1418 static in6_addr_t * 1419 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1420 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1421 { 1422 ill_t *ill; 1423 ire_t *ire; 1424 ipif_t *ipif; 1425 1426 ASSERT(!(wq->q_flag & QREADR)); 1427 if (wq->q_next != NULL) { 1428 ill = (ill_t *)wq->q_ptr; 1429 } else { 1430 ill = NULL; 1431 } 1432 1433 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1434 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1435 ipst); 1436 if (ire != NULL) { 1437 /* Destined to one of our addresses */ 1438 *src = *origdst; 1439 ire_refrele(ire); 1440 return (src); 1441 } 1442 if (ire != NULL) { 1443 ire_refrele(ire); 1444 ire = NULL; 1445 } 1446 if (ill == NULL) { 1447 /* What is the route back to the original source? */ 1448 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1449 NULL, NULL, zoneid, NULL, 1450 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1451 if (ire == NULL) { 1452 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1453 return (NULL); 1454 } 1455 /* 1456 * Does not matter whether we use ire_stq or ire_ipif here. 1457 * Just pick an ill for ICMP replies. 1458 */ 1459 ASSERT(ire->ire_ipif != NULL); 1460 ill = ire->ire_ipif->ipif_ill; 1461 ire_refrele(ire); 1462 } 1463 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1464 IPV6_PREFER_SRC_DEFAULT, zoneid); 1465 if (ipif != NULL) { 1466 *src = ipif->ipif_v6src_addr; 1467 ipif_refrele(ipif); 1468 return (src); 1469 } 1470 /* 1471 * Unusual case - can't find a usable source address to reach the 1472 * original source. Use what in the route to the source. 1473 */ 1474 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1475 NULL, NULL, zoneid, NULL, 1476 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1477 if (ire == NULL) { 1478 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1479 return (NULL); 1480 } 1481 ASSERT(ire != NULL); 1482 *src = ire->ire_src_addr_v6; 1483 ire_refrele(ire); 1484 return (src); 1485 } 1486 1487 /* 1488 * Build and ship an IPv6 ICMP message using the packet data in mp, 1489 * and the ICMP header pointed to by "stuff". (May be called as 1490 * writer.) 1491 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1492 * verify that an icmp error packet can be sent. 1493 * 1494 * If q is an ill write side queue (which is the case when packets 1495 * arrive from ip_rput) then ip_wput code will ensure that packets to 1496 * link-local destinations are sent out that ill. 1497 * 1498 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1499 * source address (see above function). 1500 */ 1501 static void 1502 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1503 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1504 ip_stack_t *ipst) 1505 { 1506 ip6_t *ip6h; 1507 in6_addr_t v6dst; 1508 size_t len_needed; 1509 size_t msg_len; 1510 mblk_t *mp1; 1511 icmp6_t *icmp6; 1512 ill_t *ill; 1513 in6_addr_t v6src; 1514 mblk_t *ipsec_mp; 1515 ipsec_out_t *io; 1516 1517 ill = ip_queue_to_ill_v6(q, ipst); 1518 if (ill == NULL) { 1519 freemsg(mp); 1520 return; 1521 } 1522 1523 if (mctl_present) { 1524 /* 1525 * If it is : 1526 * 1527 * 1) a IPSEC_OUT, then this is caused by outbound 1528 * datagram originating on this host. IPSEC processing 1529 * may or may not have been done. Refer to comments above 1530 * icmp_inbound_error_fanout for details. 1531 * 1532 * 2) a IPSEC_IN if we are generating a icmp_message 1533 * for an incoming datagram destined for us i.e called 1534 * from ip_fanout_send_icmp. 1535 */ 1536 ipsec_info_t *in; 1537 1538 ipsec_mp = mp; 1539 mp = ipsec_mp->b_cont; 1540 1541 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1542 ip6h = (ip6_t *)mp->b_rptr; 1543 1544 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1545 in->ipsec_info_type == IPSEC_IN); 1546 1547 if (in->ipsec_info_type == IPSEC_IN) { 1548 /* 1549 * Convert the IPSEC_IN to IPSEC_OUT. 1550 */ 1551 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1552 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1553 ill_refrele(ill); 1554 return; 1555 } 1556 } else { 1557 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1558 io = (ipsec_out_t *)in; 1559 /* 1560 * Clear out ipsec_out_proc_begin, so we do a fresh 1561 * ire lookup. 1562 */ 1563 io->ipsec_out_proc_begin = B_FALSE; 1564 } 1565 } else { 1566 /* 1567 * This is in clear. The icmp message we are building 1568 * here should go out in clear. 1569 */ 1570 ipsec_in_t *ii; 1571 ASSERT(mp->b_datap->db_type == M_DATA); 1572 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1573 if (ipsec_mp == NULL) { 1574 freemsg(mp); 1575 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1576 ill_refrele(ill); 1577 return; 1578 } 1579 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1580 1581 /* This is not a secure packet */ 1582 ii->ipsec_in_secure = B_FALSE; 1583 /* 1584 * For trusted extensions using a shared IP address we can 1585 * send using any zoneid. 1586 */ 1587 if (zoneid == ALL_ZONES) 1588 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1589 else 1590 ii->ipsec_in_zoneid = zoneid; 1591 ipsec_mp->b_cont = mp; 1592 ip6h = (ip6_t *)mp->b_rptr; 1593 /* 1594 * Convert the IPSEC_IN to IPSEC_OUT. 1595 */ 1596 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1597 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1598 ill_refrele(ill); 1599 return; 1600 } 1601 } 1602 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1603 1604 if (v6src_ptr != NULL) { 1605 v6src = *v6src_ptr; 1606 } else { 1607 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1608 &v6src, zoneid, ipst) == NULL) { 1609 freemsg(ipsec_mp); 1610 ill_refrele(ill); 1611 return; 1612 } 1613 } 1614 v6dst = ip6h->ip6_src; 1615 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1616 msg_len = msgdsize(mp); 1617 if (msg_len > len_needed) { 1618 if (!adjmsg(mp, len_needed - msg_len)) { 1619 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1620 freemsg(ipsec_mp); 1621 ill_refrele(ill); 1622 return; 1623 } 1624 msg_len = len_needed; 1625 } 1626 mp1 = allocb_cred(IPV6_HDR_LEN + len, DB_CRED(mp)); 1627 if (mp1 == NULL) { 1628 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1629 freemsg(ipsec_mp); 1630 ill_refrele(ill); 1631 return; 1632 } 1633 ill_refrele(ill); 1634 mp1->b_cont = mp; 1635 mp = mp1; 1636 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1637 io->ipsec_out_type == IPSEC_OUT); 1638 ipsec_mp->b_cont = mp; 1639 1640 /* 1641 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1642 * node generates be accepted in peace by all on-host destinations. 1643 * If we do NOT assume that all on-host destinations trust 1644 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1645 * (Look for ipsec_out_icmp_loopback). 1646 */ 1647 io->ipsec_out_icmp_loopback = B_TRUE; 1648 1649 ip6h = (ip6_t *)mp->b_rptr; 1650 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1651 1652 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1653 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1654 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1655 ip6h->ip6_dst = v6dst; 1656 ip6h->ip6_src = v6src; 1657 msg_len += IPV6_HDR_LEN + len; 1658 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1659 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1660 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1661 } 1662 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1663 icmp6 = (icmp6_t *)&ip6h[1]; 1664 bcopy(stuff, (char *)icmp6, len); 1665 /* 1666 * Prepare for checksum by putting icmp length in the icmp 1667 * checksum field. The checksum is calculated in ip_wput_v6. 1668 */ 1669 icmp6->icmp6_cksum = ip6h->ip6_plen; 1670 if (icmp6->icmp6_type == ND_REDIRECT) { 1671 ip6h->ip6_hops = IPV6_MAX_HOPS; 1672 } 1673 /* Send to V6 writeside put routine */ 1674 put(q, ipsec_mp); 1675 } 1676 1677 /* 1678 * Update the output mib when ICMPv6 packets are sent. 1679 */ 1680 static void 1681 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1682 { 1683 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1684 1685 switch (icmp6->icmp6_type) { 1686 case ICMP6_DST_UNREACH: 1687 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1688 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1689 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1690 break; 1691 1692 case ICMP6_TIME_EXCEEDED: 1693 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1694 break; 1695 1696 case ICMP6_PARAM_PROB: 1697 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1698 break; 1699 1700 case ICMP6_PACKET_TOO_BIG: 1701 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1702 break; 1703 1704 case ICMP6_ECHO_REQUEST: 1705 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1706 break; 1707 1708 case ICMP6_ECHO_REPLY: 1709 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1710 break; 1711 1712 case ND_ROUTER_SOLICIT: 1713 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1714 break; 1715 1716 case ND_ROUTER_ADVERT: 1717 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1718 break; 1719 1720 case ND_NEIGHBOR_SOLICIT: 1721 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1722 break; 1723 1724 case ND_NEIGHBOR_ADVERT: 1725 BUMP_MIB(ill->ill_icmp6_mib, 1726 ipv6IfIcmpOutNeighborAdvertisements); 1727 break; 1728 1729 case ND_REDIRECT: 1730 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1731 break; 1732 1733 case MLD_LISTENER_QUERY: 1734 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1735 break; 1736 1737 case MLD_LISTENER_REPORT: 1738 case MLD_V2_LISTENER_REPORT: 1739 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1740 break; 1741 1742 case MLD_LISTENER_REDUCTION: 1743 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1744 break; 1745 } 1746 } 1747 1748 /* 1749 * Check if it is ok to send an ICMPv6 error packet in 1750 * response to the IP packet in mp. 1751 * Free the message and return null if no 1752 * ICMP error packet should be sent. 1753 */ 1754 static mblk_t * 1755 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1756 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1757 { 1758 ip6_t *ip6h; 1759 1760 if (!mp) 1761 return (NULL); 1762 1763 ip6h = (ip6_t *)mp->b_rptr; 1764 1765 /* Check if source address uniquely identifies the host */ 1766 1767 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1768 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1769 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1770 freemsg(mp); 1771 return (NULL); 1772 } 1773 1774 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1775 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1776 icmp6_t *icmp6; 1777 1778 if (mp->b_wptr - mp->b_rptr < len_needed) { 1779 if (!pullupmsg(mp, len_needed)) { 1780 ill_t *ill; 1781 1782 ill = ip_queue_to_ill_v6(q, ipst); 1783 if (ill == NULL) { 1784 BUMP_MIB(&ipst->ips_icmp6_mib, 1785 ipv6IfIcmpInErrors); 1786 } else { 1787 BUMP_MIB(ill->ill_icmp6_mib, 1788 ipv6IfIcmpInErrors); 1789 ill_refrele(ill); 1790 } 1791 freemsg(mp); 1792 return (NULL); 1793 } 1794 ip6h = (ip6_t *)mp->b_rptr; 1795 } 1796 icmp6 = (icmp6_t *)&ip6h[1]; 1797 /* Explicitly do not generate errors in response to redirects */ 1798 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1799 icmp6->icmp6_type == ND_REDIRECT) { 1800 freemsg(mp); 1801 return (NULL); 1802 } 1803 } 1804 /* 1805 * Check that the destination is not multicast and that the packet 1806 * was not sent on link layer broadcast or multicast. (Exception 1807 * is Packet too big message as per the draft - when mcast_ok is set.) 1808 */ 1809 if (!mcast_ok && 1810 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1811 freemsg(mp); 1812 return (NULL); 1813 } 1814 if (icmp_err_rate_limit(ipst)) { 1815 /* 1816 * Only send ICMP error packets every so often. 1817 * This should be done on a per port/source basis, 1818 * but for now this will suffice. 1819 */ 1820 freemsg(mp); 1821 return (NULL); 1822 } 1823 return (mp); 1824 } 1825 1826 /* 1827 * Generate an ICMPv6 redirect message. 1828 * Include target link layer address option if it exits. 1829 * Always include redirect header. 1830 */ 1831 static void 1832 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1833 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1834 { 1835 nd_redirect_t *rd; 1836 nd_opt_rd_hdr_t *rdh; 1837 uchar_t *buf; 1838 nce_t *nce = NULL; 1839 nd_opt_hdr_t *opt; 1840 int len; 1841 int ll_opt_len = 0; 1842 int max_redir_hdr_data_len; 1843 int pkt_len; 1844 in6_addr_t *srcp; 1845 ip_stack_t *ipst = ill->ill_ipst; 1846 1847 /* 1848 * We are called from ip_rput where we could 1849 * not have attached an IPSEC_IN. 1850 */ 1851 ASSERT(mp->b_datap->db_type == M_DATA); 1852 1853 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1854 if (mp == NULL) 1855 return; 1856 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1857 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1858 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1859 ill->ill_phys_addr_length + 7)/8 * 8; 1860 } 1861 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1862 ASSERT(len % 4 == 0); 1863 buf = kmem_alloc(len, KM_NOSLEEP); 1864 if (buf == NULL) { 1865 if (nce != NULL) 1866 NCE_REFRELE(nce); 1867 freemsg(mp); 1868 return; 1869 } 1870 1871 rd = (nd_redirect_t *)buf; 1872 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1873 rd->nd_rd_code = 0; 1874 rd->nd_rd_reserved = 0; 1875 rd->nd_rd_target = *targetp; 1876 rd->nd_rd_dst = *dest; 1877 1878 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1879 if (nce != NULL && ll_opt_len != 0) { 1880 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1881 opt->nd_opt_len = ll_opt_len/8; 1882 bcopy((char *)nce->nce_res_mp->b_rptr + 1883 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1884 ill->ill_phys_addr_length); 1885 } 1886 if (nce != NULL) 1887 NCE_REFRELE(nce); 1888 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1889 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1890 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1891 max_redir_hdr_data_len = 1892 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1893 pkt_len = msgdsize(mp); 1894 /* Make sure mp is 8 byte aligned */ 1895 if (pkt_len > max_redir_hdr_data_len) { 1896 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1897 sizeof (nd_opt_rd_hdr_t))/8; 1898 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1899 } else { 1900 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1901 (void) adjmsg(mp, -(pkt_len % 8)); 1902 } 1903 rdh->nd_opt_rh_reserved1 = 0; 1904 rdh->nd_opt_rh_reserved2 = 0; 1905 /* ipif_v6src_addr contains the link-local source address */ 1906 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1907 if (ill->ill_group != NULL) { 1908 /* 1909 * The receiver of the redirect will verify whether it 1910 * had a route through us (srcp that we will use in 1911 * the redirect) or not. As we load spread even link-locals, 1912 * we don't know which source address the receiver of 1913 * redirect has in its route for communicating with us. 1914 * Thus we randomly choose a source here and finally we 1915 * should get to the right one and it will eventually 1916 * accept the redirect from us. We can't call 1917 * ip_lookup_scope_v6 because we don't have the right 1918 * link-local address here. Thus we randomly choose one. 1919 */ 1920 int cnt = ill->ill_group->illgrp_ill_count; 1921 1922 ill = ill->ill_group->illgrp_ill; 1923 cnt = ++ipst->ips_icmp_redirect_v6_src_index % cnt; 1924 while (cnt--) 1925 ill = ill->ill_group_next; 1926 srcp = &ill->ill_ipif->ipif_v6src_addr; 1927 } else { 1928 srcp = &ill->ill_ipif->ipif_v6src_addr; 1929 } 1930 rw_exit(&ipst->ips_ill_g_lock); 1931 /* Redirects sent by router, and router is global zone */ 1932 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1933 kmem_free(buf, len); 1934 } 1935 1936 1937 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1938 void 1939 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1940 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1941 ip_stack_t *ipst) 1942 { 1943 icmp6_t icmp6; 1944 boolean_t mctl_present; 1945 mblk_t *first_mp; 1946 1947 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1948 1949 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1950 if (mp == NULL) { 1951 if (mctl_present) 1952 freeb(first_mp); 1953 return; 1954 } 1955 bzero(&icmp6, sizeof (icmp6_t)); 1956 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1957 icmp6.icmp6_code = code; 1958 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1959 zoneid, ipst); 1960 } 1961 1962 /* 1963 * Generate an ICMP unreachable message. 1964 */ 1965 void 1966 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1967 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1968 ip_stack_t *ipst) 1969 { 1970 icmp6_t icmp6; 1971 boolean_t mctl_present; 1972 mblk_t *first_mp; 1973 1974 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1975 1976 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1977 if (mp == NULL) { 1978 if (mctl_present) 1979 freeb(first_mp); 1980 return; 1981 } 1982 bzero(&icmp6, sizeof (icmp6_t)); 1983 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1984 icmp6.icmp6_code = code; 1985 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1986 zoneid, ipst); 1987 } 1988 1989 /* 1990 * Generate an ICMP pkt too big message. 1991 */ 1992 static void 1993 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1994 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1995 { 1996 icmp6_t icmp6; 1997 mblk_t *first_mp; 1998 boolean_t mctl_present; 1999 2000 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2001 2002 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2003 if (mp == NULL) { 2004 if (mctl_present) 2005 freeb(first_mp); 2006 return; 2007 } 2008 bzero(&icmp6, sizeof (icmp6_t)); 2009 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2010 icmp6.icmp6_code = 0; 2011 icmp6.icmp6_mtu = htonl(mtu); 2012 2013 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2014 zoneid, ipst); 2015 } 2016 2017 /* 2018 * Generate an ICMP parameter problem message. (May be called as writer.) 2019 * 'offset' is the offset from the beginning of the packet in error. 2020 */ 2021 static void 2022 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2023 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2024 ip_stack_t *ipst) 2025 { 2026 icmp6_t icmp6; 2027 boolean_t mctl_present; 2028 mblk_t *first_mp; 2029 2030 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2031 2032 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2033 if (mp == NULL) { 2034 if (mctl_present) 2035 freeb(first_mp); 2036 return; 2037 } 2038 bzero((char *)&icmp6, sizeof (icmp6_t)); 2039 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2040 icmp6.icmp6_code = code; 2041 icmp6.icmp6_pptr = htonl(offset); 2042 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2043 zoneid, ipst); 2044 } 2045 2046 /* 2047 * This code will need to take into account the possibility of binding 2048 * to a link local address on a multi-homed host, in which case the 2049 * outgoing interface (from the conn) will need to be used when getting 2050 * an ire for the dst. Going through proper outgoing interface and 2051 * choosing the source address corresponding to the outgoing interface 2052 * is necessary when the destination address is a link-local address and 2053 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2054 * This can happen when active connection is setup; thus ipp pointer 2055 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2056 * pointer is passed as ipp pointer. 2057 */ 2058 mblk_t * 2059 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2060 { 2061 ssize_t len; 2062 int protocol; 2063 struct T_bind_req *tbr; 2064 sin6_t *sin6; 2065 ipa6_conn_t *ac6; 2066 in6_addr_t *v6srcp; 2067 in6_addr_t *v6dstp; 2068 uint16_t lport; 2069 uint16_t fport; 2070 uchar_t *ucp; 2071 mblk_t *mp1; 2072 boolean_t ire_requested; 2073 boolean_t ipsec_policy_set; 2074 int error = 0; 2075 boolean_t local_bind; 2076 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2077 ipa6_conn_x_t *acx6; 2078 boolean_t verify_dst; 2079 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2080 2081 ASSERT(connp->conn_af_isv6); 2082 len = mp->b_wptr - mp->b_rptr; 2083 if (len < (sizeof (*tbr) + 1)) { 2084 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2085 "ip_bind_v6: bogus msg, len %ld", len); 2086 goto bad_addr; 2087 } 2088 /* Back up and extract the protocol identifier. */ 2089 mp->b_wptr--; 2090 tbr = (struct T_bind_req *)mp->b_rptr; 2091 /* Reset the message type in preparation for shipping it back. */ 2092 mp->b_datap->db_type = M_PCPROTO; 2093 2094 protocol = *mp->b_wptr & 0xFF; 2095 connp->conn_ulp = (uint8_t)protocol; 2096 2097 /* 2098 * Check for a zero length address. This is from a protocol that 2099 * wants to register to receive all packets of its type. 2100 */ 2101 if (tbr->ADDR_length == 0) { 2102 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2103 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2104 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2105 NULL) { 2106 /* 2107 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2108 * Do not allow others to bind to these. 2109 */ 2110 goto bad_addr; 2111 } 2112 2113 /* 2114 * 2115 * The udp module never sends down a zero-length address, 2116 * and allowing this on a labeled system will break MLP 2117 * functionality. 2118 */ 2119 if (is_system_labeled() && protocol == IPPROTO_UDP) 2120 goto bad_addr; 2121 2122 /* Allow ipsec plumbing */ 2123 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2124 protocol != IPPROTO_ESP) 2125 goto bad_addr; 2126 2127 connp->conn_srcv6 = ipv6_all_zeros; 2128 ipcl_proto_insert_v6(connp, protocol); 2129 2130 tbr->PRIM_type = T_BIND_ACK; 2131 return (mp); 2132 } 2133 2134 /* Extract the address pointer from the message. */ 2135 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2136 tbr->ADDR_length); 2137 if (ucp == NULL) { 2138 ip1dbg(("ip_bind_v6: no address\n")); 2139 goto bad_addr; 2140 } 2141 if (!OK_32PTR(ucp)) { 2142 ip1dbg(("ip_bind_v6: unaligned address\n")); 2143 goto bad_addr; 2144 } 2145 mp1 = mp->b_cont; /* trailing mp if any */ 2146 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2147 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2148 2149 switch (tbr->ADDR_length) { 2150 default: 2151 ip1dbg(("ip_bind_v6: bad address length %d\n", 2152 (int)tbr->ADDR_length)); 2153 goto bad_addr; 2154 2155 case IPV6_ADDR_LEN: 2156 /* Verification of local address only */ 2157 v6srcp = (in6_addr_t *)ucp; 2158 lport = 0; 2159 local_bind = B_TRUE; 2160 break; 2161 2162 case sizeof (sin6_t): 2163 sin6 = (sin6_t *)ucp; 2164 v6srcp = &sin6->sin6_addr; 2165 lport = sin6->sin6_port; 2166 local_bind = B_TRUE; 2167 break; 2168 2169 case sizeof (ipa6_conn_t): 2170 /* 2171 * Verify that both the source and destination addresses 2172 * are valid. 2173 * Note that we allow connect to broadcast and multicast 2174 * addresses when ire_requested is set. Thus the ULP 2175 * has to check for IRE_BROADCAST and multicast. 2176 */ 2177 ac6 = (ipa6_conn_t *)ucp; 2178 v6srcp = &ac6->ac6_laddr; 2179 v6dstp = &ac6->ac6_faddr; 2180 fport = ac6->ac6_fport; 2181 /* For raw socket, the local port is not set. */ 2182 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2183 connp->conn_lport; 2184 local_bind = B_FALSE; 2185 /* Always verify destination reachability. */ 2186 verify_dst = B_TRUE; 2187 break; 2188 2189 case sizeof (ipa6_conn_x_t): 2190 /* 2191 * Verify that the source address is valid. 2192 * Note that we allow connect to broadcast and multicast 2193 * addresses when ire_requested is set. Thus the ULP 2194 * has to check for IRE_BROADCAST and multicast. 2195 */ 2196 acx6 = (ipa6_conn_x_t *)ucp; 2197 ac6 = &acx6->ac6x_conn; 2198 v6srcp = &ac6->ac6_laddr; 2199 v6dstp = &ac6->ac6_faddr; 2200 fport = ac6->ac6_fport; 2201 lport = ac6->ac6_lport; 2202 local_bind = B_FALSE; 2203 /* 2204 * Client that passed ipa6_conn_x_t to us specifies whether to 2205 * verify destination reachability. 2206 */ 2207 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2208 break; 2209 } 2210 if (local_bind) { 2211 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2212 /* Bind to IPv4 address */ 2213 ipaddr_t v4src; 2214 2215 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2216 2217 error = ip_bind_laddr(connp, mp, v4src, lport, 2218 ire_requested, ipsec_policy_set, 2219 tbr->ADDR_length != IPV6_ADDR_LEN); 2220 if (error != 0) 2221 goto bad_addr; 2222 connp->conn_pkt_isv6 = B_FALSE; 2223 } else { 2224 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2225 error = 0; 2226 goto bad_addr; 2227 } 2228 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2229 ire_requested, ipsec_policy_set, 2230 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2231 if (error != 0) 2232 goto bad_addr; 2233 connp->conn_pkt_isv6 = B_TRUE; 2234 } 2235 } else { 2236 /* 2237 * Bind to local and remote address. Local might be 2238 * unspecified in which case it will be extracted from 2239 * ire_src_addr_v6 2240 */ 2241 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2242 /* Connect to IPv4 address */ 2243 ipaddr_t v4src; 2244 ipaddr_t v4dst; 2245 2246 /* Is the source unspecified or mapped? */ 2247 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2248 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2249 ip1dbg(("ip_bind_v6: " 2250 "dst is mapped, but not the src\n")); 2251 goto bad_addr; 2252 } 2253 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2254 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2255 2256 /* 2257 * XXX Fix needed. Need to pass ipsec_policy_set 2258 * instead of B_FALSE. 2259 */ 2260 2261 /* Always verify destination reachability. */ 2262 error = ip_bind_connected(connp, mp, &v4src, lport, 2263 v4dst, fport, ire_requested, ipsec_policy_set, 2264 B_TRUE, B_TRUE); 2265 if (error != 0) 2266 goto bad_addr; 2267 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2268 connp->conn_pkt_isv6 = B_FALSE; 2269 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2270 ip1dbg(("ip_bind_v6: " 2271 "src is mapped, but not the dst\n")); 2272 goto bad_addr; 2273 } else { 2274 error = ip_bind_connected_v6(connp, mp, v6srcp, 2275 lport, v6dstp, ipp, fport, ire_requested, 2276 ipsec_policy_set, B_TRUE, verify_dst); 2277 if (error != 0) 2278 goto bad_addr; 2279 connp->conn_pkt_isv6 = B_TRUE; 2280 } 2281 } 2282 2283 /* Update conn_send and pktversion if v4/v6 changed */ 2284 if (orig_pkt_isv6 != connp->conn_pkt_isv6) { 2285 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2286 } 2287 /* 2288 * Pass the IPSEC headers size in ire_ipsec_overhead. 2289 * We can't do this in ip_bind_insert_ire because the policy 2290 * may not have been inherited at that point in time and hence 2291 * conn_out_enforce_policy may not be set. 2292 */ 2293 mp1 = mp->b_cont; 2294 if (ire_requested && connp->conn_out_enforce_policy && 2295 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2296 ire_t *ire = (ire_t *)mp1->b_rptr; 2297 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2298 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2299 } 2300 2301 /* Send it home. */ 2302 mp->b_datap->db_type = M_PCPROTO; 2303 tbr->PRIM_type = T_BIND_ACK; 2304 return (mp); 2305 2306 bad_addr: 2307 if (error == EINPROGRESS) 2308 return (NULL); 2309 if (error > 0) 2310 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2311 else 2312 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2313 return (mp); 2314 } 2315 2316 /* 2317 * Here address is verified to be a valid local address. 2318 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2319 * address is also considered a valid local address. 2320 * In the case of a multicast address, however, the 2321 * upper protocol is expected to reset the src address 2322 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2323 * no packets are emitted with multicast address as 2324 * source address. 2325 * The addresses valid for bind are: 2326 * (1) - in6addr_any 2327 * (2) - IP address of an UP interface 2328 * (3) - IP address of a DOWN interface 2329 * (4) - a multicast address. In this case 2330 * the conn will only receive packets destined to 2331 * the specified multicast address. Note: the 2332 * application still has to issue an 2333 * IPV6_JOIN_GROUP socket option. 2334 * 2335 * In all the above cases, the bound address must be valid in the current zone. 2336 * When the address is loopback or multicast, there might be many matching IREs 2337 * so bind has to look up based on the zone. 2338 */ 2339 static int 2340 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2341 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2342 boolean_t fanout_insert) 2343 { 2344 int error = 0; 2345 ire_t *src_ire = NULL; 2346 ipif_t *ipif = NULL; 2347 mblk_t *policy_mp; 2348 zoneid_t zoneid; 2349 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2350 2351 if (ipsec_policy_set) 2352 policy_mp = mp->b_cont; 2353 2354 /* 2355 * If it was previously connected, conn_fully_bound would have 2356 * been set. 2357 */ 2358 connp->conn_fully_bound = B_FALSE; 2359 2360 zoneid = connp->conn_zoneid; 2361 2362 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2363 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2364 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2365 /* 2366 * If an address other than in6addr_any is requested, 2367 * we verify that it is a valid address for bind 2368 * Note: Following code is in if-else-if form for 2369 * readability compared to a condition check. 2370 */ 2371 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2372 if (IRE_IS_LOCAL(src_ire)) { 2373 /* 2374 * (2) Bind to address of local UP interface 2375 */ 2376 ipif = src_ire->ire_ipif; 2377 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2378 ipif_t *multi_ipif = NULL; 2379 ire_t *save_ire; 2380 /* 2381 * (4) bind to multicast address. 2382 * Fake out the IRE returned to upper 2383 * layer to be a broadcast IRE in 2384 * ip_bind_insert_ire_v6(). 2385 * Pass other information that matches 2386 * the ipif (e.g. the source address). 2387 * conn_multicast_ill is only used for 2388 * IPv6 packets 2389 */ 2390 mutex_enter(&connp->conn_lock); 2391 if (connp->conn_multicast_ill != NULL) { 2392 (void) ipif_lookup_zoneid( 2393 connp->conn_multicast_ill, zoneid, 0, 2394 &multi_ipif); 2395 } else { 2396 /* 2397 * Look for default like 2398 * ip_wput_v6 2399 */ 2400 multi_ipif = ipif_lookup_group_v6( 2401 &ipv6_unspecified_group, zoneid, ipst); 2402 } 2403 mutex_exit(&connp->conn_lock); 2404 save_ire = src_ire; 2405 src_ire = NULL; 2406 if (multi_ipif == NULL || !ire_requested || 2407 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2408 src_ire = save_ire; 2409 error = EADDRNOTAVAIL; 2410 } else { 2411 ASSERT(src_ire != NULL); 2412 if (save_ire != NULL) 2413 ire_refrele(save_ire); 2414 } 2415 if (multi_ipif != NULL) 2416 ipif_refrele(multi_ipif); 2417 } else { 2418 *mp->b_wptr++ = (char)connp->conn_ulp; 2419 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2420 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error, 2421 ipst); 2422 if (ipif == NULL) { 2423 if (error == EINPROGRESS) { 2424 if (src_ire != NULL) 2425 ire_refrele(src_ire); 2426 return (error); 2427 } 2428 /* 2429 * Not a valid address for bind 2430 */ 2431 error = EADDRNOTAVAIL; 2432 } else { 2433 ipif_refrele(ipif); 2434 } 2435 /* 2436 * Just to keep it consistent with the processing in 2437 * ip_bind_v6(). 2438 */ 2439 mp->b_wptr--; 2440 } 2441 2442 if (error != 0) { 2443 /* Red Alert! Attempting to be a bogon! */ 2444 if (ip_debug > 2) { 2445 /* ip1dbg */ 2446 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2447 " address %s\n", AF_INET6, v6src); 2448 } 2449 goto bad_addr; 2450 } 2451 } 2452 2453 /* 2454 * Allow setting new policies. For example, disconnects come 2455 * down as ipa_t bind. As we would have set conn_policy_cached 2456 * to B_TRUE before, we should set it to B_FALSE, so that policy 2457 * can change after the disconnect. 2458 */ 2459 connp->conn_policy_cached = B_FALSE; 2460 2461 /* If not fanout_insert this was just an address verification */ 2462 if (fanout_insert) { 2463 /* 2464 * The addresses have been verified. Time to insert in 2465 * the correct fanout list. 2466 */ 2467 connp->conn_srcv6 = *v6src; 2468 connp->conn_remv6 = ipv6_all_zeros; 2469 connp->conn_lport = lport; 2470 connp->conn_fport = 0; 2471 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2472 } 2473 if (error == 0) { 2474 if (ire_requested) { 2475 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL, 2476 ipst)) { 2477 error = -1; 2478 goto bad_addr; 2479 } 2480 } else if (ipsec_policy_set) { 2481 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2482 error = -1; 2483 goto bad_addr; 2484 } 2485 } 2486 } 2487 bad_addr: 2488 if (error != 0) { 2489 if (connp->conn_anon_port) { 2490 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2491 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2492 B_FALSE); 2493 } 2494 connp->conn_mlp_type = mlptSingle; 2495 } 2496 2497 if (src_ire != NULL) 2498 ire_refrele(src_ire); 2499 2500 if (ipsec_policy_set) { 2501 ASSERT(policy_mp != NULL); 2502 freeb(policy_mp); 2503 /* 2504 * As of now assume that nothing else accompanies 2505 * IPSEC_POLICY_SET. 2506 */ 2507 mp->b_cont = NULL; 2508 } 2509 return (error); 2510 } 2511 2512 /* ARGSUSED */ 2513 static void 2514 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2515 void *dummy_arg) 2516 { 2517 conn_t *connp = NULL; 2518 t_scalar_t prim; 2519 2520 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2521 2522 if (CONN_Q(q)) 2523 connp = Q_TO_CONN(q); 2524 ASSERT(connp != NULL); 2525 2526 prim = ((union T_primitives *)mp->b_rptr)->type; 2527 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2528 2529 if (IPCL_IS_TCP(connp)) { 2530 /* Pass sticky_ipp for scope_id and pktinfo */ 2531 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2532 } else { 2533 /* For UDP and ICMP */ 2534 mp = ip_bind_v6(q, mp, connp, NULL); 2535 } 2536 if (mp != NULL) { 2537 if (IPCL_IS_TCP(connp)) { 2538 CONN_INC_REF(connp); 2539 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2540 connp, SQTAG_TCP_RPUTOTHER); 2541 } else if (IPCL_IS_UDP(connp)) { 2542 udp_resume_bind(connp, mp); 2543 } else { 2544 ASSERT(IPCL_IS_RAWIP(connp)); 2545 rawip_resume_bind(connp, mp); 2546 } 2547 } 2548 } 2549 2550 /* 2551 * Verify that both the source and destination addresses 2552 * are valid. If verify_dst, then destination address must also be reachable, 2553 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2554 * It takes ip6_pkt_t * as one of the arguments to determine correct 2555 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2556 * destination address. Note that parameter ipp is only useful for TCP connect 2557 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2558 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2559 * 2560 */ 2561 static int 2562 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2563 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2564 boolean_t ire_requested, boolean_t ipsec_policy_set, 2565 boolean_t fanout_insert, boolean_t verify_dst) 2566 { 2567 ire_t *src_ire; 2568 ire_t *dst_ire; 2569 int error = 0; 2570 int protocol; 2571 mblk_t *policy_mp; 2572 ire_t *sire = NULL; 2573 ire_t *md_dst_ire = NULL; 2574 ill_t *md_ill = NULL; 2575 ill_t *dst_ill = NULL; 2576 ipif_t *src_ipif = NULL; 2577 zoneid_t zoneid; 2578 boolean_t ill_held = B_FALSE; 2579 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2580 2581 src_ire = dst_ire = NULL; 2582 /* 2583 * NOTE: The protocol is beyond the wptr because that's how 2584 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2585 */ 2586 protocol = *mp->b_wptr & 0xFF; 2587 2588 /* 2589 * If we never got a disconnect before, clear it now. 2590 */ 2591 connp->conn_fully_bound = B_FALSE; 2592 2593 if (ipsec_policy_set) { 2594 policy_mp = mp->b_cont; 2595 } 2596 2597 zoneid = connp->conn_zoneid; 2598 2599 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2600 ipif_t *ipif; 2601 2602 /* 2603 * Use an "emulated" IRE_BROADCAST to tell the transport it 2604 * is a multicast. 2605 * Pass other information that matches 2606 * the ipif (e.g. the source address). 2607 * 2608 * conn_multicast_ill is only used for IPv6 packets 2609 */ 2610 mutex_enter(&connp->conn_lock); 2611 if (connp->conn_multicast_ill != NULL) { 2612 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2613 zoneid, 0, &ipif); 2614 } else { 2615 /* Look for default like ip_wput_v6 */ 2616 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2617 } 2618 mutex_exit(&connp->conn_lock); 2619 if (ipif == NULL || !ire_requested || 2620 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2621 if (ipif != NULL) 2622 ipif_refrele(ipif); 2623 if (ip_debug > 2) { 2624 /* ip1dbg */ 2625 pr_addr_dbg("ip_bind_connected_v6: bad " 2626 "connected multicast %s\n", AF_INET6, 2627 v6dst); 2628 } 2629 error = ENETUNREACH; 2630 goto bad_addr; 2631 } 2632 if (ipif != NULL) 2633 ipif_refrele(ipif); 2634 } else { 2635 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2636 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2637 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2638 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2639 ipst); 2640 /* 2641 * We also prevent ire's with src address INADDR_ANY to 2642 * be used, which are created temporarily for 2643 * sending out packets from endpoints that have 2644 * conn_unspec_src set. 2645 */ 2646 if (dst_ire == NULL || 2647 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2648 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2649 /* 2650 * When verifying destination reachability, we always 2651 * complain. 2652 * 2653 * When not verifying destination reachability but we 2654 * found an IRE, i.e. the destination is reachable, 2655 * then the other tests still apply and we complain. 2656 */ 2657 if (verify_dst || (dst_ire != NULL)) { 2658 if (ip_debug > 2) { 2659 /* ip1dbg */ 2660 pr_addr_dbg("ip_bind_connected_v6: bad" 2661 " connected dst %s\n", AF_INET6, 2662 v6dst); 2663 } 2664 if (dst_ire == NULL || 2665 !(dst_ire->ire_type & IRE_HOST)) { 2666 error = ENETUNREACH; 2667 } else { 2668 error = EHOSTUNREACH; 2669 } 2670 goto bad_addr; 2671 } 2672 } 2673 } 2674 2675 /* 2676 * We now know that routing will allow us to reach the destination. 2677 * Check whether Trusted Solaris policy allows communication with this 2678 * host, and pretend that the destination is unreachable if not. 2679 * 2680 * This is never a problem for TCP, since that transport is known to 2681 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2682 * handling. If the remote is unreachable, it will be detected at that 2683 * point, so there's no reason to check it here. 2684 * 2685 * Note that for sendto (and other datagram-oriented friends), this 2686 * check is done as part of the data path label computation instead. 2687 * The check here is just to make non-TCP connect() report the right 2688 * error. 2689 */ 2690 if (dst_ire != NULL && is_system_labeled() && 2691 !IPCL_IS_TCP(connp) && 2692 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2693 connp->conn_mac_exempt, ipst) != 0) { 2694 error = EHOSTUNREACH; 2695 if (ip_debug > 2) { 2696 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2697 AF_INET6, v6dst); 2698 } 2699 goto bad_addr; 2700 } 2701 2702 /* 2703 * If the app does a connect(), it means that it will most likely 2704 * send more than 1 packet to the destination. It makes sense 2705 * to clear the temporary flag. 2706 */ 2707 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2708 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2709 irb_t *irb = dst_ire->ire_bucket; 2710 2711 rw_enter(&irb->irb_lock, RW_WRITER); 2712 /* 2713 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2714 * the lock in order to guarantee irb_tmp_ire_cnt. 2715 */ 2716 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2717 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2718 irb->irb_tmp_ire_cnt--; 2719 } 2720 rw_exit(&irb->irb_lock); 2721 } 2722 2723 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2724 2725 /* 2726 * See if we should notify ULP about MDT; we do this whether or not 2727 * ire_requested is TRUE, in order to handle active connects; MDT 2728 * eligibility tests for passive connects are handled separately 2729 * through tcp_adapt_ire(). We do this before the source address 2730 * selection, because dst_ire may change after a call to 2731 * ipif_select_source_v6(). This is a best-effort check, as the 2732 * packet for this connection may not actually go through 2733 * dst_ire->ire_stq, and the exact IRE can only be known after 2734 * calling ip_newroute_v6(). This is why we further check on the 2735 * IRE during Multidata packet transmission in tcp_multisend(). 2736 */ 2737 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2738 dst_ire != NULL && 2739 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2740 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2741 ILL_MDT_CAPABLE(md_ill)) { 2742 md_dst_ire = dst_ire; 2743 IRE_REFHOLD(md_dst_ire); 2744 } 2745 2746 if (dst_ire != NULL && 2747 dst_ire->ire_type == IRE_LOCAL && 2748 dst_ire->ire_zoneid != zoneid && 2749 dst_ire->ire_zoneid != ALL_ZONES) { 2750 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2751 zoneid, 0, NULL, 2752 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2753 MATCH_IRE_RJ_BHOLE, ipst); 2754 if (src_ire == NULL) { 2755 error = EHOSTUNREACH; 2756 goto bad_addr; 2757 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2758 if (!(src_ire->ire_type & IRE_HOST)) 2759 error = ENETUNREACH; 2760 else 2761 error = EHOSTUNREACH; 2762 goto bad_addr; 2763 } 2764 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2765 src_ipif = src_ire->ire_ipif; 2766 ipif_refhold(src_ipif); 2767 *v6src = src_ipif->ipif_v6lcl_addr; 2768 } 2769 ire_refrele(src_ire); 2770 src_ire = NULL; 2771 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2772 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2773 *v6src = sire->ire_src_addr_v6; 2774 ire_refrele(dst_ire); 2775 dst_ire = sire; 2776 sire = NULL; 2777 } else if (dst_ire->ire_type == IRE_CACHE && 2778 (dst_ire->ire_flags & RTF_SETSRC)) { 2779 ASSERT(dst_ire->ire_zoneid == zoneid || 2780 dst_ire->ire_zoneid == ALL_ZONES); 2781 *v6src = dst_ire->ire_src_addr_v6; 2782 } else { 2783 /* 2784 * Pick a source address so that a proper inbound load 2785 * spreading would happen. Use dst_ill specified by the 2786 * app. when socket option or scopeid is set. 2787 */ 2788 int err; 2789 2790 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2791 uint_t if_index; 2792 2793 /* 2794 * Scope id or IPV6_PKTINFO 2795 */ 2796 2797 if_index = ipp->ipp_ifindex; 2798 dst_ill = ill_lookup_on_ifindex( 2799 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2800 ipst); 2801 if (dst_ill == NULL) { 2802 ip1dbg(("ip_bind_connected_v6:" 2803 " bad ifindex %d\n", if_index)); 2804 error = EADDRNOTAVAIL; 2805 goto bad_addr; 2806 } 2807 ill_held = B_TRUE; 2808 } else if (connp->conn_outgoing_ill != NULL) { 2809 /* 2810 * For IPV6_BOUND_IF socket option, 2811 * conn_outgoing_ill should be set 2812 * already in TCP or UDP/ICMP. 2813 */ 2814 dst_ill = conn_get_held_ill(connp, 2815 &connp->conn_outgoing_ill, &err); 2816 if (err == ILL_LOOKUP_FAILED) { 2817 ip1dbg(("ip_bind_connected_v6:" 2818 "no ill for bound_if\n")); 2819 error = EADDRNOTAVAIL; 2820 goto bad_addr; 2821 } 2822 ill_held = B_TRUE; 2823 } else if (dst_ire->ire_stq != NULL) { 2824 /* No need to hold ill here */ 2825 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2826 } else { 2827 /* No need to hold ill here */ 2828 dst_ill = dst_ire->ire_ipif->ipif_ill; 2829 } 2830 if (!ip6_asp_can_lookup(ipst)) { 2831 *mp->b_wptr++ = (char)protocol; 2832 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2833 ip_bind_connected_resume_v6); 2834 error = EINPROGRESS; 2835 goto refrele_and_quit; 2836 } 2837 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2838 RESTRICT_TO_NONE, connp->conn_src_preferences, 2839 zoneid); 2840 ip6_asp_table_refrele(ipst); 2841 if (src_ipif == NULL) { 2842 pr_addr_dbg("ip_bind_connected_v6: " 2843 "no usable source address for " 2844 "connection to %s\n", AF_INET6, v6dst); 2845 error = EADDRNOTAVAIL; 2846 goto bad_addr; 2847 } 2848 *v6src = src_ipif->ipif_v6lcl_addr; 2849 } 2850 } 2851 2852 /* 2853 * We do ire_route_lookup_v6() here (and not an interface lookup) 2854 * as we assert that v6src should only come from an 2855 * UP interface for hard binding. 2856 */ 2857 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2858 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2859 2860 /* src_ire must be a local|loopback */ 2861 if (!IRE_IS_LOCAL(src_ire)) { 2862 if (ip_debug > 2) { 2863 /* ip1dbg */ 2864 pr_addr_dbg("ip_bind_connected_v6: bad " 2865 "connected src %s\n", AF_INET6, v6src); 2866 } 2867 error = EADDRNOTAVAIL; 2868 goto bad_addr; 2869 } 2870 2871 /* 2872 * If the source address is a loopback address, the 2873 * destination had best be local or multicast. 2874 * The transports that can't handle multicast will reject 2875 * those addresses. 2876 */ 2877 if (src_ire->ire_type == IRE_LOOPBACK && 2878 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2879 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2880 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2881 error = -1; 2882 goto bad_addr; 2883 } 2884 /* 2885 * Allow setting new policies. For example, disconnects come 2886 * down as ipa_t bind. As we would have set conn_policy_cached 2887 * to B_TRUE before, we should set it to B_FALSE, so that policy 2888 * can change after the disconnect. 2889 */ 2890 connp->conn_policy_cached = B_FALSE; 2891 2892 /* 2893 * The addresses have been verified. Initialize the conn 2894 * before calling the policy as they expect the conns 2895 * initialized. 2896 */ 2897 connp->conn_srcv6 = *v6src; 2898 connp->conn_remv6 = *v6dst; 2899 connp->conn_lport = lport; 2900 connp->conn_fport = fport; 2901 2902 ASSERT(!(ipsec_policy_set && ire_requested)); 2903 if (ire_requested) { 2904 iulp_t *ulp_info = NULL; 2905 2906 /* 2907 * Note that sire will not be NULL if this is an off-link 2908 * connection and there is not cache for that dest yet. 2909 * 2910 * XXX Because of an existing bug, if there are multiple 2911 * default routes, the IRE returned now may not be the actual 2912 * default route used (default routes are chosen in a 2913 * round robin fashion). So if the metrics for different 2914 * default routes are different, we may return the wrong 2915 * metrics. This will not be a problem if the existing 2916 * bug is fixed. 2917 */ 2918 if (sire != NULL) 2919 ulp_info = &(sire->ire_uinfo); 2920 2921 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info, 2922 ipst)) { 2923 error = -1; 2924 goto bad_addr; 2925 } 2926 } else if (ipsec_policy_set) { 2927 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2928 error = -1; 2929 goto bad_addr; 2930 } 2931 } 2932 2933 /* 2934 * Cache IPsec policy in this conn. If we have per-socket policy, 2935 * we'll cache that. If we don't, we'll inherit global policy. 2936 * 2937 * We can't insert until the conn reflects the policy. Note that 2938 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2939 * connections where we don't have a policy. This is to prevent 2940 * global policy lookups in the inbound path. 2941 * 2942 * If we insert before we set conn_policy_cached, 2943 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2944 * because global policy cound be non-empty. We normally call 2945 * ipsec_check_policy() for conn_policy_cached connections only if 2946 * conn_in_enforce_policy is set. But in this case, 2947 * conn_policy_cached can get set anytime since we made the 2948 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2949 * is called, which will make the above assumption false. Thus, we 2950 * need to insert after we set conn_policy_cached. 2951 */ 2952 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2953 goto bad_addr; 2954 2955 /* If not fanout_insert this was just an address verification */ 2956 if (fanout_insert) { 2957 /* 2958 * The addresses have been verified. Time to insert in 2959 * the correct fanout list. 2960 */ 2961 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2962 connp->conn_ports, 2963 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2964 } 2965 if (error == 0) { 2966 connp->conn_fully_bound = B_TRUE; 2967 /* 2968 * Our initial checks for MDT have passed; the IRE is not 2969 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2970 * be supporting MDT. Pass the IRE, IPC and ILL into 2971 * ip_mdinfo_return(), which performs further checks 2972 * against them and upon success, returns the MDT info 2973 * mblk which we will attach to the bind acknowledgment. 2974 */ 2975 if (md_dst_ire != NULL) { 2976 mblk_t *mdinfo_mp; 2977 2978 ASSERT(md_ill != NULL); 2979 ASSERT(md_ill->ill_mdt_capab != NULL); 2980 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2981 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 2982 linkb(mp, mdinfo_mp); 2983 } 2984 } 2985 bad_addr: 2986 if (ipsec_policy_set) { 2987 ASSERT(policy_mp != NULL); 2988 freeb(policy_mp); 2989 /* 2990 * As of now assume that nothing else accompanies 2991 * IPSEC_POLICY_SET. 2992 */ 2993 mp->b_cont = NULL; 2994 } 2995 refrele_and_quit: 2996 if (src_ire != NULL) 2997 IRE_REFRELE(src_ire); 2998 if (dst_ire != NULL) 2999 IRE_REFRELE(dst_ire); 3000 if (sire != NULL) 3001 IRE_REFRELE(sire); 3002 if (src_ipif != NULL) 3003 ipif_refrele(src_ipif); 3004 if (md_dst_ire != NULL) 3005 IRE_REFRELE(md_dst_ire); 3006 if (ill_held && dst_ill != NULL) 3007 ill_refrele(dst_ill); 3008 return (error); 3009 } 3010 3011 /* 3012 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3013 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3014 */ 3015 /* ARGSUSED4 */ 3016 static boolean_t 3017 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3018 iulp_t *ulp_info, ip_stack_t *ipst) 3019 { 3020 mblk_t *mp1; 3021 ire_t *ret_ire; 3022 3023 mp1 = mp->b_cont; 3024 ASSERT(mp1 != NULL); 3025 3026 if (ire != NULL) { 3027 /* 3028 * mp1 initialized above to IRE_DB_REQ_TYPE 3029 * appended mblk. Its <upper protocol>'s 3030 * job to make sure there is room. 3031 */ 3032 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3033 return (B_FALSE); 3034 3035 mp1->b_datap->db_type = IRE_DB_TYPE; 3036 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3037 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3038 ret_ire = (ire_t *)mp1->b_rptr; 3039 if (IN6_IS_ADDR_MULTICAST(dst) || 3040 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3041 ret_ire->ire_type = IRE_BROADCAST; 3042 ret_ire->ire_addr_v6 = *dst; 3043 } 3044 if (ulp_info != NULL) { 3045 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3046 sizeof (iulp_t)); 3047 } 3048 ret_ire->ire_mp = mp1; 3049 } else { 3050 /* 3051 * No IRE was found. Remove IRE mblk. 3052 */ 3053 mp->b_cont = mp1->b_cont; 3054 freeb(mp1); 3055 } 3056 return (B_TRUE); 3057 } 3058 3059 /* 3060 * Add an ip6i_t header to the front of the mblk. 3061 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3062 * Returns NULL if allocation fails (and frees original message). 3063 * Used in outgoing path when going through ip_newroute_*v6(). 3064 * Used in incoming path to pass ifindex to transports. 3065 */ 3066 mblk_t * 3067 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3068 { 3069 mblk_t *mp1; 3070 ip6i_t *ip6i; 3071 ip6_t *ip6h; 3072 3073 ip6h = (ip6_t *)mp->b_rptr; 3074 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3075 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3076 mp->b_datap->db_ref > 1) { 3077 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3078 if (mp1 == NULL) { 3079 freemsg(mp); 3080 return (NULL); 3081 } 3082 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3083 mp1->b_cont = mp; 3084 mp = mp1; 3085 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3086 } 3087 mp->b_rptr = (uchar_t *)ip6i; 3088 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3089 ip6i->ip6i_nxt = IPPROTO_RAW; 3090 if (ill != NULL) { 3091 ip6i->ip6i_flags = IP6I_IFINDEX; 3092 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3093 } else { 3094 ip6i->ip6i_flags = 0; 3095 } 3096 ip6i->ip6i_nexthop = *dst; 3097 return (mp); 3098 } 3099 3100 /* 3101 * Handle protocols with which IP is less intimate. There 3102 * can be more than one stream bound to a particular 3103 * protocol. When this is the case, normally each one gets a copy 3104 * of any incoming packets. 3105 * However, if the packet was tunneled and not multicast we only send to it 3106 * the first match. 3107 * 3108 * Zones notes: 3109 * Packets will be distributed to streams in all zones. This is really only 3110 * useful for ICMPv6 as only applications in the global zone can create raw 3111 * sockets for other protocols. 3112 */ 3113 static void 3114 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3115 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3116 boolean_t mctl_present, zoneid_t zoneid) 3117 { 3118 queue_t *rq; 3119 mblk_t *mp1, *first_mp1; 3120 in6_addr_t dst = ip6h->ip6_dst; 3121 in6_addr_t src = ip6h->ip6_src; 3122 boolean_t one_only; 3123 mblk_t *first_mp = mp; 3124 boolean_t secure, shared_addr; 3125 conn_t *connp, *first_connp, *next_connp; 3126 connf_t *connfp; 3127 ip_stack_t *ipst = inill->ill_ipst; 3128 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3129 3130 if (mctl_present) { 3131 mp = first_mp->b_cont; 3132 secure = ipsec_in_is_secure(first_mp); 3133 ASSERT(mp != NULL); 3134 } else { 3135 secure = B_FALSE; 3136 } 3137 3138 /* 3139 * If the packet was tunneled and not multicast we only send to it 3140 * the first match. 3141 */ 3142 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3143 !IN6_IS_ADDR_MULTICAST(&dst)); 3144 3145 shared_addr = (zoneid == ALL_ZONES); 3146 if (shared_addr) { 3147 /* 3148 * We don't allow multilevel ports for raw IP, so no need to 3149 * check for that here. 3150 */ 3151 zoneid = tsol_packet_to_zoneid(mp); 3152 } 3153 3154 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3155 mutex_enter(&connfp->connf_lock); 3156 connp = connfp->connf_head; 3157 for (connp = connfp->connf_head; connp != NULL; 3158 connp = connp->conn_next) { 3159 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3160 zoneid) && 3161 (!is_system_labeled() || 3162 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3163 connp))) 3164 break; 3165 } 3166 3167 if (connp == NULL || connp->conn_upq == NULL) { 3168 /* 3169 * No one bound to this port. Is 3170 * there a client that wants all 3171 * unclaimed datagrams? 3172 */ 3173 mutex_exit(&connfp->connf_lock); 3174 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3175 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3176 nexthdr_offset, mctl_present, zoneid, ipst)) { 3177 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3178 } 3179 3180 return; 3181 } 3182 3183 CONN_INC_REF(connp); 3184 first_connp = connp; 3185 3186 /* 3187 * XXX: Fix the multiple protocol listeners case. We should not 3188 * be walking the conn->next list here. 3189 */ 3190 if (one_only) { 3191 /* 3192 * Only send message to one tunnel driver by immediately 3193 * terminating the loop. 3194 */ 3195 connp = NULL; 3196 } else { 3197 connp = connp->conn_next; 3198 3199 } 3200 for (;;) { 3201 while (connp != NULL) { 3202 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3203 flags, zoneid) && 3204 (!is_system_labeled() || 3205 tsol_receive_local(mp, &dst, IPV6_VERSION, 3206 shared_addr, connp))) 3207 break; 3208 connp = connp->conn_next; 3209 } 3210 3211 /* 3212 * Just copy the data part alone. The mctl part is 3213 * needed just for verifying policy and it is never 3214 * sent up. 3215 */ 3216 if (connp == NULL || connp->conn_upq == NULL || 3217 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3218 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3219 /* 3220 * No more intested clients or memory 3221 * allocation failed 3222 */ 3223 connp = first_connp; 3224 break; 3225 } 3226 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3227 CONN_INC_REF(connp); 3228 mutex_exit(&connfp->connf_lock); 3229 rq = connp->conn_rq; 3230 /* 3231 * For link-local always add ifindex so that transport can set 3232 * sin6_scope_id. Avoid it for ICMP error fanout. 3233 */ 3234 if ((connp->conn_ip_recvpktinfo || 3235 IN6_IS_ADDR_LINKLOCAL(&src)) && 3236 (flags & IP_FF_IPINFO)) { 3237 /* Add header */ 3238 mp1 = ip_add_info_v6(mp1, inill, &dst); 3239 } 3240 if (mp1 == NULL) { 3241 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3242 } else if (!canputnext(rq)) { 3243 if (flags & IP_FF_RAWIP) { 3244 BUMP_MIB(ill->ill_ip_mib, 3245 rawipIfStatsInOverflows); 3246 } else { 3247 BUMP_MIB(ill->ill_icmp6_mib, 3248 ipv6IfIcmpInOverflows); 3249 } 3250 3251 freemsg(mp1); 3252 } else { 3253 /* 3254 * Don't enforce here if we're a tunnel - let "tun" do 3255 * it instead. 3256 */ 3257 if (!IPCL_IS_IPTUN(connp) && 3258 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3259 secure)) { 3260 first_mp1 = ipsec_check_inbound_policy 3261 (first_mp1, connp, NULL, ip6h, 3262 mctl_present); 3263 } 3264 if (first_mp1 != NULL) { 3265 if (mctl_present) 3266 freeb(first_mp1); 3267 BUMP_MIB(ill->ill_ip_mib, 3268 ipIfStatsHCInDelivers); 3269 (connp->conn_recv)(connp, mp1, NULL); 3270 } 3271 } 3272 mutex_enter(&connfp->connf_lock); 3273 /* Follow the next pointer before releasing the conn. */ 3274 next_connp = connp->conn_next; 3275 CONN_DEC_REF(connp); 3276 connp = next_connp; 3277 } 3278 3279 /* Last one. Send it upstream. */ 3280 mutex_exit(&connfp->connf_lock); 3281 3282 /* Initiate IPPF processing */ 3283 if (IP6_IN_IPP(flags, ipst)) { 3284 uint_t ifindex; 3285 3286 mutex_enter(&ill->ill_lock); 3287 ifindex = ill->ill_phyint->phyint_ifindex; 3288 mutex_exit(&ill->ill_lock); 3289 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3290 if (mp == NULL) { 3291 CONN_DEC_REF(connp); 3292 if (mctl_present) 3293 freeb(first_mp); 3294 return; 3295 } 3296 } 3297 3298 /* 3299 * For link-local always add ifindex so that transport can set 3300 * sin6_scope_id. Avoid it for ICMP error fanout. 3301 */ 3302 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3303 (flags & IP_FF_IPINFO)) { 3304 /* Add header */ 3305 mp = ip_add_info_v6(mp, inill, &dst); 3306 if (mp == NULL) { 3307 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3308 CONN_DEC_REF(connp); 3309 if (mctl_present) 3310 freeb(first_mp); 3311 return; 3312 } else if (mctl_present) { 3313 first_mp->b_cont = mp; 3314 } else { 3315 first_mp = mp; 3316 } 3317 } 3318 3319 rq = connp->conn_rq; 3320 if (!canputnext(rq)) { 3321 if (flags & IP_FF_RAWIP) { 3322 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3323 } else { 3324 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3325 } 3326 3327 freemsg(first_mp); 3328 } else { 3329 if (IPCL_IS_IPTUN(connp)) { 3330 /* 3331 * Tunneled packet. We enforce policy in the tunnel 3332 * module itself. 3333 * 3334 * Send the WHOLE packet up (incl. IPSEC_IN) without 3335 * a policy check. 3336 */ 3337 putnext(rq, first_mp); 3338 CONN_DEC_REF(connp); 3339 return; 3340 } 3341 /* 3342 * Don't enforce here if we're a tunnel - let "tun" do 3343 * it instead. 3344 */ 3345 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3346 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3347 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3348 NULL, ip6h, mctl_present); 3349 if (first_mp == NULL) { 3350 CONN_DEC_REF(connp); 3351 return; 3352 } 3353 } 3354 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3355 (connp->conn_recv)(connp, mp, NULL); 3356 if (mctl_present) 3357 freeb(first_mp); 3358 } 3359 CONN_DEC_REF(connp); 3360 } 3361 3362 /* 3363 * Send an ICMP error after patching up the packet appropriately. Returns 3364 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3365 */ 3366 int 3367 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3368 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3369 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3370 { 3371 ip6_t *ip6h; 3372 mblk_t *first_mp; 3373 boolean_t secure; 3374 unsigned char db_type; 3375 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3376 3377 first_mp = mp; 3378 if (mctl_present) { 3379 mp = mp->b_cont; 3380 secure = ipsec_in_is_secure(first_mp); 3381 ASSERT(mp != NULL); 3382 } else { 3383 /* 3384 * If this is an ICMP error being reported - which goes 3385 * up as M_CTLs, we need to convert them to M_DATA till 3386 * we finish checking with global policy because 3387 * ipsec_check_global_policy() assumes M_DATA as clear 3388 * and M_CTL as secure. 3389 */ 3390 db_type = mp->b_datap->db_type; 3391 mp->b_datap->db_type = M_DATA; 3392 secure = B_FALSE; 3393 } 3394 /* 3395 * We are generating an icmp error for some inbound packet. 3396 * Called from all ip_fanout_(udp, tcp, proto) functions. 3397 * Before we generate an error, check with global policy 3398 * to see whether this is allowed to enter the system. As 3399 * there is no "conn", we are checking with global policy. 3400 */ 3401 ip6h = (ip6_t *)mp->b_rptr; 3402 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3403 first_mp = ipsec_check_global_policy(first_mp, NULL, 3404 NULL, ip6h, mctl_present, ipst->ips_netstack); 3405 if (first_mp == NULL) 3406 return (0); 3407 } 3408 3409 if (!mctl_present) 3410 mp->b_datap->db_type = db_type; 3411 3412 if (flags & IP_FF_SEND_ICMP) { 3413 if (flags & IP_FF_HDR_COMPLETE) { 3414 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3415 freemsg(first_mp); 3416 return (1); 3417 } 3418 } 3419 switch (icmp_type) { 3420 case ICMP6_DST_UNREACH: 3421 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3422 B_FALSE, B_FALSE, zoneid, ipst); 3423 break; 3424 case ICMP6_PARAM_PROB: 3425 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3426 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3427 break; 3428 default: 3429 #ifdef DEBUG 3430 panic("ip_fanout_send_icmp_v6: wrong type"); 3431 /*NOTREACHED*/ 3432 #else 3433 freemsg(first_mp); 3434 break; 3435 #endif 3436 } 3437 } else { 3438 freemsg(first_mp); 3439 return (0); 3440 } 3441 3442 return (1); 3443 } 3444 3445 3446 /* 3447 * Fanout for TCP packets 3448 * The caller puts <fport, lport> in the ports parameter. 3449 */ 3450 static void 3451 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3452 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3453 { 3454 mblk_t *first_mp; 3455 boolean_t secure; 3456 conn_t *connp; 3457 tcph_t *tcph; 3458 boolean_t syn_present = B_FALSE; 3459 ip_stack_t *ipst = inill->ill_ipst; 3460 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3461 3462 first_mp = mp; 3463 if (mctl_present) { 3464 mp = first_mp->b_cont; 3465 secure = ipsec_in_is_secure(first_mp); 3466 ASSERT(mp != NULL); 3467 } else { 3468 secure = B_FALSE; 3469 } 3470 3471 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3472 3473 if (connp == NULL || 3474 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3475 /* 3476 * No hard-bound match. Send Reset. 3477 */ 3478 dblk_t *dp = mp->b_datap; 3479 uint32_t ill_index; 3480 3481 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3482 3483 /* Initiate IPPf processing, if needed. */ 3484 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3485 (flags & IP6_NO_IPPOLICY)) { 3486 ill_index = ill->ill_phyint->phyint_ifindex; 3487 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3488 if (first_mp == NULL) { 3489 if (connp != NULL) 3490 CONN_DEC_REF(connp); 3491 return; 3492 } 3493 } 3494 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3495 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3496 ipst->ips_netstack->netstack_tcp, connp); 3497 if (connp != NULL) 3498 CONN_DEC_REF(connp); 3499 return; 3500 } 3501 3502 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3503 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3504 if (connp->conn_flags & IPCL_TCP) { 3505 squeue_t *sqp; 3506 3507 /* 3508 * For fused tcp loopback, assign the eager's 3509 * squeue to be that of the active connect's. 3510 */ 3511 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3512 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3513 !secure && 3514 !IP6_IN_IPP(flags, ipst)) { 3515 ASSERT(Q_TO_CONN(q) != NULL); 3516 sqp = Q_TO_CONN(q)->conn_sqp; 3517 } else { 3518 sqp = IP_SQUEUE_GET(lbolt); 3519 } 3520 3521 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3522 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3523 3524 /* 3525 * db_cksumstuff is unused in the incoming 3526 * path; Thus store the ifindex here. It will 3527 * be cleared in tcp_conn_create_v6(). 3528 */ 3529 DB_CKSUMSTUFF(mp) = 3530 (intptr_t)ill->ill_phyint->phyint_ifindex; 3531 syn_present = B_TRUE; 3532 } 3533 } 3534 3535 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3536 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3537 if ((flags & TH_RST) || (flags & TH_URG)) { 3538 CONN_DEC_REF(connp); 3539 freemsg(first_mp); 3540 return; 3541 } 3542 if (flags & TH_ACK) { 3543 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3544 ipst->ips_netstack->netstack_tcp, connp); 3545 CONN_DEC_REF(connp); 3546 return; 3547 } 3548 3549 CONN_DEC_REF(connp); 3550 freemsg(first_mp); 3551 return; 3552 } 3553 3554 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3555 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3556 NULL, ip6h, mctl_present); 3557 if (first_mp == NULL) { 3558 CONN_DEC_REF(connp); 3559 return; 3560 } 3561 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3562 ASSERT(syn_present); 3563 if (mctl_present) { 3564 ASSERT(first_mp != mp); 3565 first_mp->b_datap->db_struioflag |= 3566 STRUIO_POLICY; 3567 } else { 3568 ASSERT(first_mp == mp); 3569 mp->b_datap->db_struioflag &= 3570 ~STRUIO_EAGER; 3571 mp->b_datap->db_struioflag |= 3572 STRUIO_POLICY; 3573 } 3574 } else { 3575 /* 3576 * Discard first_mp early since we're dealing with a 3577 * fully-connected conn_t and tcp doesn't do policy in 3578 * this case. Also, if someone is bound to IPPROTO_TCP 3579 * over raw IP, they don't expect to see a M_CTL. 3580 */ 3581 if (mctl_present) { 3582 freeb(first_mp); 3583 mctl_present = B_FALSE; 3584 } 3585 first_mp = mp; 3586 } 3587 } 3588 3589 /* Initiate IPPF processing */ 3590 if (IP6_IN_IPP(flags, ipst)) { 3591 uint_t ifindex; 3592 3593 mutex_enter(&ill->ill_lock); 3594 ifindex = ill->ill_phyint->phyint_ifindex; 3595 mutex_exit(&ill->ill_lock); 3596 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3597 if (mp == NULL) { 3598 CONN_DEC_REF(connp); 3599 if (mctl_present) { 3600 freeb(first_mp); 3601 } 3602 return; 3603 } else if (mctl_present) { 3604 /* 3605 * ip_add_info_v6 might return a new mp. 3606 */ 3607 ASSERT(first_mp != mp); 3608 first_mp->b_cont = mp; 3609 } else { 3610 first_mp = mp; 3611 } 3612 } 3613 3614 /* 3615 * For link-local always add ifindex so that TCP can bind to that 3616 * interface. Avoid it for ICMP error fanout. 3617 */ 3618 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3619 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3620 (flags & IP_FF_IPINFO))) { 3621 /* Add header */ 3622 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3623 if (mp == NULL) { 3624 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3625 CONN_DEC_REF(connp); 3626 if (mctl_present) 3627 freeb(first_mp); 3628 return; 3629 } else if (mctl_present) { 3630 ASSERT(first_mp != mp); 3631 first_mp->b_cont = mp; 3632 } else { 3633 first_mp = mp; 3634 } 3635 } 3636 3637 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3638 if (IPCL_IS_TCP(connp)) { 3639 (*ip_input_proc)(connp->conn_sqp, first_mp, 3640 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3641 } else { 3642 /* SOCK_RAW, IPPROTO_TCP case */ 3643 (connp->conn_recv)(connp, first_mp, NULL); 3644 CONN_DEC_REF(connp); 3645 } 3646 } 3647 3648 /* 3649 * Fanout for UDP packets. 3650 * The caller puts <fport, lport> in the ports parameter. 3651 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3652 * 3653 * If SO_REUSEADDR is set all multicast and broadcast packets 3654 * will be delivered to all streams bound to the same port. 3655 * 3656 * Zones notes: 3657 * Multicast packets will be distributed to streams in all zones. 3658 */ 3659 static void 3660 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3661 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3662 zoneid_t zoneid) 3663 { 3664 uint32_t dstport, srcport; 3665 in6_addr_t dst; 3666 mblk_t *first_mp; 3667 boolean_t secure; 3668 conn_t *connp; 3669 connf_t *connfp; 3670 conn_t *first_conn; 3671 conn_t *next_conn; 3672 mblk_t *mp1, *first_mp1; 3673 in6_addr_t src; 3674 boolean_t shared_addr; 3675 ip_stack_t *ipst = inill->ill_ipst; 3676 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3677 3678 first_mp = mp; 3679 if (mctl_present) { 3680 mp = first_mp->b_cont; 3681 secure = ipsec_in_is_secure(first_mp); 3682 ASSERT(mp != NULL); 3683 } else { 3684 secure = B_FALSE; 3685 } 3686 3687 /* Extract ports in net byte order */ 3688 dstport = htons(ntohl(ports) & 0xFFFF); 3689 srcport = htons(ntohl(ports) >> 16); 3690 dst = ip6h->ip6_dst; 3691 src = ip6h->ip6_src; 3692 3693 shared_addr = (zoneid == ALL_ZONES); 3694 if (shared_addr) { 3695 /* 3696 * No need to handle exclusive-stack zones since ALL_ZONES 3697 * only applies to the shared stack. 3698 */ 3699 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3700 /* 3701 * If no shared MLP is found, tsol_mlp_findzone returns 3702 * ALL_ZONES. In that case, we assume it's SLP, and 3703 * search for the zone based on the packet label. 3704 * That will also return ALL_ZONES on failure, but 3705 * we never allow conn_zoneid to be set to ALL_ZONES. 3706 */ 3707 if (zoneid == ALL_ZONES) 3708 zoneid = tsol_packet_to_zoneid(mp); 3709 } 3710 3711 /* Attempt to find a client stream based on destination port. */ 3712 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3713 mutex_enter(&connfp->connf_lock); 3714 connp = connfp->connf_head; 3715 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3716 /* 3717 * Not multicast. Send to the one (first) client we find. 3718 */ 3719 while (connp != NULL) { 3720 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3721 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3722 conn_wantpacket_v6(connp, ill, ip6h, 3723 flags, zoneid)) { 3724 break; 3725 } 3726 connp = connp->conn_next; 3727 } 3728 if (connp == NULL || connp->conn_upq == NULL) 3729 goto notfound; 3730 3731 if (is_system_labeled() && 3732 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3733 connp)) 3734 goto notfound; 3735 3736 /* Found a client */ 3737 CONN_INC_REF(connp); 3738 mutex_exit(&connfp->connf_lock); 3739 3740 if (CONN_UDP_FLOWCTLD(connp)) { 3741 freemsg(first_mp); 3742 CONN_DEC_REF(connp); 3743 return; 3744 } 3745 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3746 first_mp = ipsec_check_inbound_policy(first_mp, 3747 connp, NULL, ip6h, mctl_present); 3748 if (first_mp == NULL) { 3749 CONN_DEC_REF(connp); 3750 return; 3751 } 3752 } 3753 /* Initiate IPPF processing */ 3754 if (IP6_IN_IPP(flags, ipst)) { 3755 uint_t ifindex; 3756 3757 mutex_enter(&ill->ill_lock); 3758 ifindex = ill->ill_phyint->phyint_ifindex; 3759 mutex_exit(&ill->ill_lock); 3760 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3761 if (mp == NULL) { 3762 CONN_DEC_REF(connp); 3763 if (mctl_present) 3764 freeb(first_mp); 3765 return; 3766 } 3767 } 3768 /* 3769 * For link-local always add ifindex so that 3770 * transport can set sin6_scope_id. Avoid it for 3771 * ICMP error fanout. 3772 */ 3773 if ((connp->conn_ip_recvpktinfo || 3774 IN6_IS_ADDR_LINKLOCAL(&src)) && 3775 (flags & IP_FF_IPINFO)) { 3776 /* Add header */ 3777 mp = ip_add_info_v6(mp, inill, &dst); 3778 if (mp == NULL) { 3779 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3780 CONN_DEC_REF(connp); 3781 if (mctl_present) 3782 freeb(first_mp); 3783 return; 3784 } else if (mctl_present) { 3785 first_mp->b_cont = mp; 3786 } else { 3787 first_mp = mp; 3788 } 3789 } 3790 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3791 3792 /* Send it upstream */ 3793 (connp->conn_recv)(connp, mp, NULL); 3794 3795 IP6_STAT(ipst, ip6_udp_fannorm); 3796 CONN_DEC_REF(connp); 3797 if (mctl_present) 3798 freeb(first_mp); 3799 return; 3800 } 3801 3802 while (connp != NULL) { 3803 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3804 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3805 (!is_system_labeled() || 3806 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3807 connp))) 3808 break; 3809 connp = connp->conn_next; 3810 } 3811 3812 if (connp == NULL || connp->conn_upq == NULL) 3813 goto notfound; 3814 3815 first_conn = connp; 3816 3817 CONN_INC_REF(connp); 3818 connp = connp->conn_next; 3819 for (;;) { 3820 while (connp != NULL) { 3821 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3822 src) && conn_wantpacket_v6(connp, ill, ip6h, 3823 flags, zoneid) && 3824 (!is_system_labeled() || 3825 tsol_receive_local(mp, &dst, IPV6_VERSION, 3826 shared_addr, connp))) 3827 break; 3828 connp = connp->conn_next; 3829 } 3830 /* 3831 * Just copy the data part alone. The mctl part is 3832 * needed just for verifying policy and it is never 3833 * sent up. 3834 */ 3835 if (connp == NULL || 3836 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3837 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3838 /* 3839 * No more interested clients or memory 3840 * allocation failed 3841 */ 3842 connp = first_conn; 3843 break; 3844 } 3845 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3846 CONN_INC_REF(connp); 3847 mutex_exit(&connfp->connf_lock); 3848 /* 3849 * For link-local always add ifindex so that transport 3850 * can set sin6_scope_id. Avoid it for ICMP error 3851 * fanout. 3852 */ 3853 if ((connp->conn_ip_recvpktinfo || 3854 IN6_IS_ADDR_LINKLOCAL(&src)) && 3855 (flags & IP_FF_IPINFO)) { 3856 /* Add header */ 3857 mp1 = ip_add_info_v6(mp1, inill, &dst); 3858 } 3859 /* mp1 could have changed */ 3860 if (mctl_present) 3861 first_mp1->b_cont = mp1; 3862 else 3863 first_mp1 = mp1; 3864 if (mp1 == NULL) { 3865 if (mctl_present) 3866 freeb(first_mp1); 3867 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3868 goto next_one; 3869 } 3870 if (CONN_UDP_FLOWCTLD(connp)) { 3871 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3872 freemsg(first_mp1); 3873 goto next_one; 3874 } 3875 3876 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3877 first_mp1 = ipsec_check_inbound_policy 3878 (first_mp1, connp, NULL, ip6h, 3879 mctl_present); 3880 } 3881 if (first_mp1 != NULL) { 3882 if (mctl_present) 3883 freeb(first_mp1); 3884 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3885 3886 /* Send it upstream */ 3887 (connp->conn_recv)(connp, mp1, NULL); 3888 } 3889 next_one: 3890 mutex_enter(&connfp->connf_lock); 3891 /* Follow the next pointer before releasing the conn. */ 3892 next_conn = connp->conn_next; 3893 IP6_STAT(ipst, ip6_udp_fanmb); 3894 CONN_DEC_REF(connp); 3895 connp = next_conn; 3896 } 3897 3898 /* Last one. Send it upstream. */ 3899 mutex_exit(&connfp->connf_lock); 3900 3901 /* Initiate IPPF processing */ 3902 if (IP6_IN_IPP(flags, ipst)) { 3903 uint_t ifindex; 3904 3905 mutex_enter(&ill->ill_lock); 3906 ifindex = ill->ill_phyint->phyint_ifindex; 3907 mutex_exit(&ill->ill_lock); 3908 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3909 if (mp == NULL) { 3910 CONN_DEC_REF(connp); 3911 if (mctl_present) { 3912 freeb(first_mp); 3913 } 3914 return; 3915 } 3916 } 3917 3918 /* 3919 * For link-local always add ifindex so that transport can set 3920 * sin6_scope_id. Avoid it for ICMP error fanout. 3921 */ 3922 if ((connp->conn_ip_recvpktinfo || 3923 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3924 /* Add header */ 3925 mp = ip_add_info_v6(mp, inill, &dst); 3926 if (mp == NULL) { 3927 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3928 CONN_DEC_REF(connp); 3929 if (mctl_present) 3930 freeb(first_mp); 3931 return; 3932 } else if (mctl_present) { 3933 first_mp->b_cont = mp; 3934 } else { 3935 first_mp = mp; 3936 } 3937 } 3938 if (CONN_UDP_FLOWCTLD(connp)) { 3939 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3940 freemsg(mp); 3941 } else { 3942 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3943 first_mp = ipsec_check_inbound_policy(first_mp, 3944 connp, NULL, ip6h, mctl_present); 3945 if (first_mp == NULL) { 3946 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3947 CONN_DEC_REF(connp); 3948 return; 3949 } 3950 } 3951 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3952 3953 /* Send it upstream */ 3954 (connp->conn_recv)(connp, mp, NULL); 3955 } 3956 IP6_STAT(ipst, ip6_udp_fanmb); 3957 CONN_DEC_REF(connp); 3958 if (mctl_present) 3959 freeb(first_mp); 3960 return; 3961 3962 notfound: 3963 mutex_exit(&connfp->connf_lock); 3964 /* 3965 * No one bound to this port. Is 3966 * there a client that wants all 3967 * unclaimed datagrams? 3968 */ 3969 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3970 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3971 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3972 zoneid); 3973 } else { 3974 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3975 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3976 mctl_present, zoneid, ipst)) { 3977 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3978 } 3979 } 3980 } 3981 3982 /* 3983 * int ip_find_hdr_v6() 3984 * 3985 * This routine is used by the upper layer protocols and the IP tunnel 3986 * module to: 3987 * - Set extension header pointers to appropriate locations 3988 * - Determine IPv6 header length and return it 3989 * - Return a pointer to the last nexthdr value 3990 * 3991 * The caller must initialize ipp_fields. 3992 * 3993 * NOTE: If multiple extension headers of the same type are present, 3994 * ip_find_hdr_v6() will set the respective extension header pointers 3995 * to the first one that it encounters in the IPv6 header. It also 3996 * skips fragment headers. This routine deals with malformed packets 3997 * of various sorts in which case the returned length is up to the 3998 * malformed part. 3999 */ 4000 int 4001 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4002 { 4003 uint_t length, ehdrlen; 4004 uint8_t nexthdr; 4005 uint8_t *whereptr, *endptr; 4006 ip6_dest_t *tmpdstopts; 4007 ip6_rthdr_t *tmprthdr; 4008 ip6_hbh_t *tmphopopts; 4009 ip6_frag_t *tmpfraghdr; 4010 4011 length = IPV6_HDR_LEN; 4012 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4013 endptr = mp->b_wptr; 4014 4015 nexthdr = ip6h->ip6_nxt; 4016 while (whereptr < endptr) { 4017 /* Is there enough left for len + nexthdr? */ 4018 if (whereptr + MIN_EHDR_LEN > endptr) 4019 goto done; 4020 4021 switch (nexthdr) { 4022 case IPPROTO_HOPOPTS: 4023 tmphopopts = (ip6_hbh_t *)whereptr; 4024 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4025 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4026 goto done; 4027 nexthdr = tmphopopts->ip6h_nxt; 4028 /* return only 1st hbh */ 4029 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4030 ipp->ipp_fields |= IPPF_HOPOPTS; 4031 ipp->ipp_hopopts = tmphopopts; 4032 ipp->ipp_hopoptslen = ehdrlen; 4033 } 4034 break; 4035 case IPPROTO_DSTOPTS: 4036 tmpdstopts = (ip6_dest_t *)whereptr; 4037 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4038 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4039 goto done; 4040 nexthdr = tmpdstopts->ip6d_nxt; 4041 /* 4042 * ipp_dstopts is set to the destination header after a 4043 * routing header. 4044 * Assume it is a post-rthdr destination header 4045 * and adjust when we find an rthdr. 4046 */ 4047 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4048 ipp->ipp_fields |= IPPF_DSTOPTS; 4049 ipp->ipp_dstopts = tmpdstopts; 4050 ipp->ipp_dstoptslen = ehdrlen; 4051 } 4052 break; 4053 case IPPROTO_ROUTING: 4054 tmprthdr = (ip6_rthdr_t *)whereptr; 4055 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4056 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4057 goto done; 4058 nexthdr = tmprthdr->ip6r_nxt; 4059 /* return only 1st rthdr */ 4060 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4061 ipp->ipp_fields |= IPPF_RTHDR; 4062 ipp->ipp_rthdr = tmprthdr; 4063 ipp->ipp_rthdrlen = ehdrlen; 4064 } 4065 /* 4066 * Make any destination header we've seen be a 4067 * pre-rthdr destination header. 4068 */ 4069 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4070 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4071 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4072 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4073 ipp->ipp_dstopts = NULL; 4074 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4075 ipp->ipp_dstoptslen = 0; 4076 } 4077 break; 4078 case IPPROTO_FRAGMENT: 4079 tmpfraghdr = (ip6_frag_t *)whereptr; 4080 ehdrlen = sizeof (ip6_frag_t); 4081 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4082 goto done; 4083 nexthdr = tmpfraghdr->ip6f_nxt; 4084 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4085 ipp->ipp_fields |= IPPF_FRAGHDR; 4086 ipp->ipp_fraghdr = tmpfraghdr; 4087 ipp->ipp_fraghdrlen = ehdrlen; 4088 } 4089 break; 4090 case IPPROTO_NONE: 4091 default: 4092 goto done; 4093 } 4094 length += ehdrlen; 4095 whereptr += ehdrlen; 4096 } 4097 done: 4098 if (nexthdrp != NULL) 4099 *nexthdrp = nexthdr; 4100 return (length); 4101 } 4102 4103 int 4104 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4105 { 4106 ire_t *ire; 4107 4108 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4109 ire = ire_lookup_local_v6(zoneid, ipst); 4110 if (ire == NULL) { 4111 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4112 return (1); 4113 } 4114 ip6h->ip6_src = ire->ire_addr_v6; 4115 ire_refrele(ire); 4116 } 4117 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4118 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4119 return (0); 4120 } 4121 4122 /* 4123 * Try to determine where and what are the IPv6 header length and 4124 * pointer to nexthdr value for the upper layer protocol (or an 4125 * unknown next hdr). 4126 * 4127 * Parameters returns a pointer to the nexthdr value; 4128 * Must handle malformed packets of various sorts. 4129 * Function returns failure for malformed cases. 4130 */ 4131 boolean_t 4132 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4133 uint8_t **nexthdrpp) 4134 { 4135 uint16_t length; 4136 uint_t ehdrlen; 4137 uint8_t *nexthdrp; 4138 uint8_t *whereptr; 4139 uint8_t *endptr; 4140 ip6_dest_t *desthdr; 4141 ip6_rthdr_t *rthdr; 4142 ip6_frag_t *fraghdr; 4143 4144 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4145 length = IPV6_HDR_LEN; 4146 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4147 endptr = mp->b_wptr; 4148 4149 nexthdrp = &ip6h->ip6_nxt; 4150 while (whereptr < endptr) { 4151 /* Is there enough left for len + nexthdr? */ 4152 if (whereptr + MIN_EHDR_LEN > endptr) 4153 break; 4154 4155 switch (*nexthdrp) { 4156 case IPPROTO_HOPOPTS: 4157 case IPPROTO_DSTOPTS: 4158 /* Assumes the headers are identical for hbh and dst */ 4159 desthdr = (ip6_dest_t *)whereptr; 4160 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4161 if ((uchar_t *)desthdr + ehdrlen > endptr) 4162 return (B_FALSE); 4163 nexthdrp = &desthdr->ip6d_nxt; 4164 break; 4165 case IPPROTO_ROUTING: 4166 rthdr = (ip6_rthdr_t *)whereptr; 4167 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4168 if ((uchar_t *)rthdr + ehdrlen > endptr) 4169 return (B_FALSE); 4170 nexthdrp = &rthdr->ip6r_nxt; 4171 break; 4172 case IPPROTO_FRAGMENT: 4173 fraghdr = (ip6_frag_t *)whereptr; 4174 ehdrlen = sizeof (ip6_frag_t); 4175 if ((uchar_t *)&fraghdr[1] > endptr) 4176 return (B_FALSE); 4177 nexthdrp = &fraghdr->ip6f_nxt; 4178 break; 4179 case IPPROTO_NONE: 4180 /* No next header means we're finished */ 4181 default: 4182 *hdr_length_ptr = length; 4183 *nexthdrpp = nexthdrp; 4184 return (B_TRUE); 4185 } 4186 length += ehdrlen; 4187 whereptr += ehdrlen; 4188 *hdr_length_ptr = length; 4189 *nexthdrpp = nexthdrp; 4190 } 4191 switch (*nexthdrp) { 4192 case IPPROTO_HOPOPTS: 4193 case IPPROTO_DSTOPTS: 4194 case IPPROTO_ROUTING: 4195 case IPPROTO_FRAGMENT: 4196 /* 4197 * If any know extension headers are still to be processed, 4198 * the packet's malformed (or at least all the IP header(s) are 4199 * not in the same mblk - and that should never happen. 4200 */ 4201 return (B_FALSE); 4202 4203 default: 4204 /* 4205 * If we get here, we know that all of the IP headers were in 4206 * the same mblk, even if the ULP header is in the next mblk. 4207 */ 4208 *hdr_length_ptr = length; 4209 *nexthdrpp = nexthdrp; 4210 return (B_TRUE); 4211 } 4212 } 4213 4214 /* 4215 * Return the length of the IPv6 related headers (including extension headers) 4216 * Returns a length even if the packet is malformed. 4217 */ 4218 int 4219 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4220 { 4221 uint16_t hdr_len; 4222 uint8_t *nexthdrp; 4223 4224 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4225 return (hdr_len); 4226 } 4227 4228 /* 4229 * Select an ill for the packet by considering load spreading across 4230 * a different ill in the group if dst_ill is part of some group. 4231 */ 4232 static ill_t * 4233 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4234 { 4235 ill_t *ill; 4236 4237 /* 4238 * We schedule irrespective of whether the source address is 4239 * INADDR_UNSPECIED or not. 4240 */ 4241 ill = illgrp_scheduler(dst_ill); 4242 if (ill == NULL) 4243 return (NULL); 4244 4245 /* 4246 * For groups with names ip_sioctl_groupname ensures that all 4247 * ills are of same type. For groups without names, ifgrp_insert 4248 * ensures this. 4249 */ 4250 ASSERT(dst_ill->ill_type == ill->ill_type); 4251 4252 return (ill); 4253 } 4254 4255 /* 4256 * IPv6 - 4257 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4258 * to send out a packet to a destination address for which we do not have 4259 * specific routing information. 4260 * 4261 * Handle non-multicast packets. If ill is non-NULL the match is done 4262 * for that ill. 4263 * 4264 * When a specific ill is specified (using IPV6_PKTINFO, 4265 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4266 * on routing entries (ftable and ctable) that have a matching 4267 * ire->ire_ipif->ipif_ill. Thus this can only be used 4268 * for destinations that are on-link for the specific ill 4269 * and that can appear on multiple links. Thus it is useful 4270 * for multicast destinations, link-local destinations, and 4271 * at some point perhaps for site-local destinations (if the 4272 * node sits at a site boundary). 4273 * We create the cache entries in the regular ctable since 4274 * it can not "confuse" things for other destinations. 4275 * table. 4276 * 4277 * When ill is part of a ill group, we subject the packets 4278 * to load spreading even if the ill is specified by the 4279 * means described above. We disable only for IPV6_BOUND_PIF 4280 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4281 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4282 * set. 4283 * 4284 * NOTE : These are the scopes of some of the variables that point at IRE, 4285 * which needs to be followed while making any future modifications 4286 * to avoid memory leaks. 4287 * 4288 * - ire and sire are the entries looked up initially by 4289 * ire_ftable_lookup_v6. 4290 * - ipif_ire is used to hold the interface ire associated with 4291 * the new cache ire. But it's scope is limited, so we always REFRELE 4292 * it before branching out to error paths. 4293 * - save_ire is initialized before ire_create, so that ire returned 4294 * by ire_create will not over-write the ire. We REFRELE save_ire 4295 * before breaking out of the switch. 4296 * 4297 * Thus on failures, we have to REFRELE only ire and sire, if they 4298 * are not NULL. 4299 * 4300 * v6srcp may be used in the future. Currently unused. 4301 */ 4302 /* ARGSUSED */ 4303 void 4304 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4305 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4306 { 4307 in6_addr_t v6gw; 4308 in6_addr_t dst; 4309 ire_t *ire = NULL; 4310 ipif_t *src_ipif = NULL; 4311 ill_t *dst_ill = NULL; 4312 ire_t *sire = NULL; 4313 ire_t *save_ire; 4314 ip6_t *ip6h; 4315 int err = 0; 4316 mblk_t *first_mp; 4317 ipsec_out_t *io; 4318 ill_t *attach_ill = NULL; 4319 ushort_t ire_marks = 0; 4320 int match_flags; 4321 boolean_t ip6i_present; 4322 ire_t *first_sire = NULL; 4323 mblk_t *copy_mp = NULL; 4324 mblk_t *xmit_mp = NULL; 4325 in6_addr_t save_dst; 4326 uint32_t multirt_flags = 4327 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4328 boolean_t multirt_is_resolvable; 4329 boolean_t multirt_resolve_next; 4330 boolean_t need_rele = B_FALSE; 4331 boolean_t do_attach_ill = B_FALSE; 4332 boolean_t ip6_asp_table_held = B_FALSE; 4333 tsol_ire_gw_secattr_t *attrp = NULL; 4334 tsol_gcgrp_t *gcgrp = NULL; 4335 tsol_gcgrp_addr_t ga; 4336 4337 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4338 4339 first_mp = mp; 4340 if (mp->b_datap->db_type == M_CTL) { 4341 mp = mp->b_cont; 4342 io = (ipsec_out_t *)first_mp->b_rptr; 4343 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4344 } else { 4345 io = NULL; 4346 } 4347 4348 /* 4349 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4350 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4351 * could be NULL. 4352 * 4353 * This information can appear either in an ip6i_t or an IPSEC_OUT 4354 * message. 4355 */ 4356 ip6h = (ip6_t *)mp->b_rptr; 4357 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4358 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4359 if (!ip6i_present || 4360 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4361 attach_ill = ip_grab_attach_ill(ill, first_mp, 4362 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4363 io->ipsec_out_ill_index), B_TRUE, ipst); 4364 /* Failure case frees things for us. */ 4365 if (attach_ill == NULL) 4366 return; 4367 4368 /* 4369 * Check if we need an ire that will not be 4370 * looked up by anybody else i.e. HIDDEN. 4371 */ 4372 if (ill_is_probeonly(attach_ill)) 4373 ire_marks = IRE_MARK_HIDDEN; 4374 } 4375 } 4376 4377 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4378 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4379 goto icmp_err_ret; 4380 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4381 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4382 goto icmp_err_ret; 4383 } 4384 4385 /* 4386 * If this IRE is created for forwarding or it is not for 4387 * TCP traffic, mark it as temporary. 4388 * 4389 * Is it sufficient just to check the next header?? 4390 */ 4391 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4392 ire_marks |= IRE_MARK_TEMPORARY; 4393 4394 /* 4395 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4396 * chain until it gets the most specific information available. 4397 * For example, we know that there is no IRE_CACHE for this dest, 4398 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4399 * ire_ftable_lookup_v6 will look up the gateway, etc. 4400 */ 4401 4402 if (ill == NULL) { 4403 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4404 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4405 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4406 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4407 match_flags, ipst); 4408 /* 4409 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4410 * in a NULL ill, but the packet could be a neighbor 4411 * solicitation/advertisment and could have a valid attach_ill. 4412 */ 4413 if (attach_ill != NULL) 4414 ill_refrele(attach_ill); 4415 } else { 4416 if (attach_ill != NULL) { 4417 /* 4418 * attach_ill is set only for communicating with 4419 * on-link hosts. So, don't look for DEFAULT. 4420 * ip_wput_v6 passes the right ill in this case and 4421 * hence we can assert. 4422 */ 4423 ASSERT(ill == attach_ill); 4424 ill_refrele(attach_ill); 4425 do_attach_ill = B_TRUE; 4426 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4427 } else { 4428 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4429 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4430 } 4431 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4432 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4433 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags, ipst); 4434 } 4435 4436 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4437 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4438 4439 /* 4440 * We enter a loop that will be run only once in most cases. 4441 * The loop is re-entered in the case where the destination 4442 * can be reached through multiple RTF_MULTIRT-flagged routes. 4443 * The intention is to compute multiple routes to a single 4444 * destination in a single ip_newroute_v6 call. 4445 * The information is contained in sire->ire_flags. 4446 */ 4447 do { 4448 multirt_resolve_next = B_FALSE; 4449 4450 if (dst_ill != NULL) { 4451 ill_refrele(dst_ill); 4452 dst_ill = NULL; 4453 } 4454 if (src_ipif != NULL) { 4455 ipif_refrele(src_ipif); 4456 src_ipif = NULL; 4457 } 4458 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4459 ip3dbg(("ip_newroute_v6: starting new resolution " 4460 "with first_mp %p, tag %d\n", 4461 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4462 4463 /* 4464 * We check if there are trailing unresolved routes for 4465 * the destination contained in sire. 4466 */ 4467 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4468 &sire, multirt_flags, MBLK_GETLABEL(mp), ipst); 4469 4470 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4471 "ire %p, sire %p\n", 4472 multirt_is_resolvable, (void *)ire, (void *)sire)); 4473 4474 if (!multirt_is_resolvable) { 4475 /* 4476 * No more multirt routes to resolve; give up 4477 * (all routes resolved or no more resolvable 4478 * routes). 4479 */ 4480 if (ire != NULL) { 4481 ire_refrele(ire); 4482 ire = NULL; 4483 } 4484 } else { 4485 ASSERT(sire != NULL); 4486 ASSERT(ire != NULL); 4487 /* 4488 * We simply use first_sire as a flag that 4489 * indicates if a resolvable multirt route has 4490 * already been found during the preceding 4491 * loops. If it is not the case, we may have 4492 * to send an ICMP error to report that the 4493 * destination is unreachable. We do not 4494 * IRE_REFHOLD first_sire. 4495 */ 4496 if (first_sire == NULL) { 4497 first_sire = sire; 4498 } 4499 } 4500 } 4501 if ((ire == NULL) || (ire == sire)) { 4502 /* 4503 * either ire == NULL (the destination cannot be 4504 * resolved) or ire == sire (the gateway cannot be 4505 * resolved). At this point, there are no more routes 4506 * to resolve for the destination, thus we exit. 4507 */ 4508 if (ip_debug > 3) { 4509 /* ip2dbg */ 4510 pr_addr_dbg("ip_newroute_v6: " 4511 "can't resolve %s\n", AF_INET6, v6dstp); 4512 } 4513 ip3dbg(("ip_newroute_v6: " 4514 "ire %p, sire %p, first_sire %p\n", 4515 (void *)ire, (void *)sire, (void *)first_sire)); 4516 4517 if (sire != NULL) { 4518 ire_refrele(sire); 4519 sire = NULL; 4520 } 4521 4522 if (first_sire != NULL) { 4523 /* 4524 * At least one multirt route has been found 4525 * in the same ip_newroute() call; there is no 4526 * need to report an ICMP error. 4527 * first_sire was not IRE_REFHOLDed. 4528 */ 4529 MULTIRT_DEBUG_UNTAG(first_mp); 4530 freemsg(first_mp); 4531 return; 4532 } 4533 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4534 RTA_DST, ipst); 4535 goto icmp_err_ret; 4536 } 4537 4538 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4539 4540 /* 4541 * Verify that the returned IRE does not have either the 4542 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4543 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4544 */ 4545 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4546 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4547 goto icmp_err_ret; 4548 4549 /* 4550 * Increment the ire_ob_pkt_count field for ire if it is an 4551 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4552 * increment the same for the parent IRE, sire, if it is some 4553 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4554 */ 4555 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4556 UPDATE_OB_PKT_COUNT(ire); 4557 ire->ire_last_used_time = lbolt; 4558 } 4559 4560 if (sire != NULL) { 4561 mutex_enter(&sire->ire_lock); 4562 v6gw = sire->ire_gateway_addr_v6; 4563 mutex_exit(&sire->ire_lock); 4564 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4565 IRE_INTERFACE)) == 0); 4566 UPDATE_OB_PKT_COUNT(sire); 4567 sire->ire_last_used_time = lbolt; 4568 } else { 4569 v6gw = ipv6_all_zeros; 4570 } 4571 4572 /* 4573 * We have a route to reach the destination. 4574 * 4575 * 1) If the interface is part of ill group, try to get a new 4576 * ill taking load spreading into account. 4577 * 4578 * 2) After selecting the ill, get a source address that might 4579 * create good inbound load spreading and that matches the 4580 * right scope. ipif_select_source_v6 does this for us. 4581 * 4582 * If the application specified the ill (ifindex), we still 4583 * load spread. Only if the packets needs to go out specifically 4584 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4585 * IPV6_BOUND_PIF we don't try to use a different ill for load 4586 * spreading. 4587 */ 4588 if (!do_attach_ill) { 4589 /* 4590 * If the interface belongs to an interface group, 4591 * make sure the next possible interface in the group 4592 * is used. This encourages load spreading among 4593 * peers in an interface group. However, in the case 4594 * of multirouting, load spreading is not used, as we 4595 * actually want to replicate outgoing packets through 4596 * particular interfaces. 4597 * 4598 * Note: While we pick a dst_ill we are really only 4599 * interested in the ill for load spreading. 4600 * The source ipif is determined by source address 4601 * selection below. 4602 */ 4603 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4604 dst_ill = ire->ire_ipif->ipif_ill; 4605 /* For uniformity do a refhold */ 4606 ill_refhold(dst_ill); 4607 } else { 4608 /* 4609 * If we are here trying to create an IRE_CACHE 4610 * for an offlink destination and have the 4611 * IRE_CACHE for the next hop and the latter is 4612 * using virtual IP source address selection i.e 4613 * it's ire->ire_ipif is pointing to a virtual 4614 * network interface (vni) then 4615 * ip_newroute_get_dst_ll() will return the vni 4616 * interface as the dst_ill. Since the vni is 4617 * virtual i.e not associated with any physical 4618 * interface, it cannot be the dst_ill, hence 4619 * in such a case call ip_newroute_get_dst_ll() 4620 * with the stq_ill instead of the ire_ipif ILL. 4621 * The function returns a refheld ill. 4622 */ 4623 if ((ire->ire_type == IRE_CACHE) && 4624 IS_VNI(ire->ire_ipif->ipif_ill)) 4625 dst_ill = ip_newroute_get_dst_ill_v6( 4626 ire->ire_stq->q_ptr); 4627 else 4628 dst_ill = ip_newroute_get_dst_ill_v6( 4629 ire->ire_ipif->ipif_ill); 4630 } 4631 if (dst_ill == NULL) { 4632 if (ip_debug > 2) { 4633 pr_addr_dbg("ip_newroute_v6 : no dst " 4634 "ill for dst %s\n", 4635 AF_INET6, v6dstp); 4636 } 4637 goto icmp_err_ret; 4638 } else if (dst_ill->ill_group == NULL && ill != NULL && 4639 dst_ill != ill) { 4640 /* 4641 * If "ill" is not part of any group, we should 4642 * have found a route matching "ill" as we 4643 * called ire_ftable_lookup_v6 with 4644 * MATCH_IRE_ILL_GROUP. 4645 * Rather than asserting when there is a 4646 * mismatch, we just drop the packet. 4647 */ 4648 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4649 "dst_ill %s ill %s\n", 4650 dst_ill->ill_name, 4651 ill->ill_name)); 4652 goto icmp_err_ret; 4653 } 4654 } else { 4655 dst_ill = ire->ire_ipif->ipif_ill; 4656 /* For uniformity do refhold */ 4657 ill_refhold(dst_ill); 4658 /* 4659 * We should have found a route matching ill as we 4660 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4661 * Rather than asserting, while there is a mismatch, 4662 * we just drop the packet. 4663 */ 4664 if (dst_ill != ill) { 4665 ip0dbg(("ip_newroute_v6: Packet dropped as " 4666 "IP6I_ATTACH_IF ill is %s, " 4667 "ire->ire_ipif->ipif_ill is %s\n", 4668 ill->ill_name, 4669 dst_ill->ill_name)); 4670 goto icmp_err_ret; 4671 } 4672 } 4673 /* 4674 * Pick a source address which matches the scope of the 4675 * destination address. 4676 * For RTF_SETSRC routes, the source address is imposed by the 4677 * parent ire (sire). 4678 */ 4679 ASSERT(src_ipif == NULL); 4680 if (ire->ire_type == IRE_IF_RESOLVER && 4681 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4682 ip6_asp_can_lookup(ipst)) { 4683 /* 4684 * The ire cache entry we're adding is for the 4685 * gateway itself. The source address in this case 4686 * is relative to the gateway's address. 4687 */ 4688 ip6_asp_table_held = B_TRUE; 4689 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4690 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4691 if (src_ipif != NULL) 4692 ire_marks |= IRE_MARK_USESRC_CHECK; 4693 } else { 4694 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4695 /* 4696 * Check that the ipif matching the requested 4697 * source address still exists. 4698 */ 4699 src_ipif = ipif_lookup_addr_v6( 4700 &sire->ire_src_addr_v6, NULL, zoneid, 4701 NULL, NULL, NULL, NULL, ipst); 4702 } 4703 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4704 uint_t restrict_ill = RESTRICT_TO_NONE; 4705 4706 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4707 & IP6I_ATTACH_IF) 4708 restrict_ill = RESTRICT_TO_ILL; 4709 ip6_asp_table_held = B_TRUE; 4710 src_ipif = ipif_select_source_v6(dst_ill, 4711 v6dstp, restrict_ill, 4712 IPV6_PREFER_SRC_DEFAULT, zoneid); 4713 if (src_ipif != NULL) 4714 ire_marks |= IRE_MARK_USESRC_CHECK; 4715 } 4716 } 4717 4718 if (src_ipif == NULL) { 4719 if (ip_debug > 2) { 4720 /* ip1dbg */ 4721 pr_addr_dbg("ip_newroute_v6: no src for " 4722 "dst %s\n, ", AF_INET6, v6dstp); 4723 printf("ip_newroute_v6: interface name %s\n", 4724 dst_ill->ill_name); 4725 } 4726 goto icmp_err_ret; 4727 } 4728 4729 if (ip_debug > 3) { 4730 /* ip2dbg */ 4731 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4732 AF_INET6, &v6gw); 4733 } 4734 ip2dbg(("\tire type %s (%d)\n", 4735 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4736 4737 /* 4738 * At this point in ip_newroute_v6(), ire is either the 4739 * IRE_CACHE of the next-hop gateway for an off-subnet 4740 * destination or an IRE_INTERFACE type that should be used 4741 * to resolve an on-subnet destination or an on-subnet 4742 * next-hop gateway. 4743 * 4744 * In the IRE_CACHE case, we have the following : 4745 * 4746 * 1) src_ipif - used for getting a source address. 4747 * 4748 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4749 * means packets using this IRE_CACHE will go out on dst_ill. 4750 * 4751 * 3) The IRE sire will point to the prefix that is the longest 4752 * matching route for the destination. These prefix types 4753 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4754 * 4755 * The newly created IRE_CACHE entry for the off-subnet 4756 * destination is tied to both the prefix route and the 4757 * interface route used to resolve the next-hop gateway 4758 * via the ire_phandle and ire_ihandle fields, respectively. 4759 * 4760 * In the IRE_INTERFACE case, we have the following : 4761 * 4762 * 1) src_ipif - used for getting a source address. 4763 * 4764 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4765 * means packets using the IRE_CACHE that we will build 4766 * here will go out on dst_ill. 4767 * 4768 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4769 * to be created will only be tied to the IRE_INTERFACE that 4770 * was derived from the ire_ihandle field. 4771 * 4772 * If sire is non-NULL, it means the destination is off-link 4773 * and we will first create the IRE_CACHE for the gateway. 4774 * Next time through ip_newroute_v6, we will create the 4775 * IRE_CACHE for the final destination as described above. 4776 */ 4777 save_ire = ire; 4778 switch (ire->ire_type) { 4779 case IRE_CACHE: { 4780 ire_t *ipif_ire; 4781 4782 ASSERT(sire != NULL); 4783 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4784 mutex_enter(&ire->ire_lock); 4785 v6gw = ire->ire_gateway_addr_v6; 4786 mutex_exit(&ire->ire_lock); 4787 } 4788 /* 4789 * We need 3 ire's to create a new cache ire for an 4790 * off-link destination from the cache ire of the 4791 * gateway. 4792 * 4793 * 1. The prefix ire 'sire' 4794 * 2. The cache ire of the gateway 'ire' 4795 * 3. The interface ire 'ipif_ire' 4796 * 4797 * We have (1) and (2). We lookup (3) below. 4798 * 4799 * If there is no interface route to the gateway, 4800 * it is a race condition, where we found the cache 4801 * but the inteface route has been deleted. 4802 */ 4803 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4804 if (ipif_ire == NULL) { 4805 ip1dbg(("ip_newroute_v6:" 4806 "ire_ihandle_lookup_offlink_v6 failed\n")); 4807 goto icmp_err_ret; 4808 } 4809 /* 4810 * Assume DL_UNITDATA_REQ is same for all physical 4811 * interfaces in the ifgrp. If it isn't, this code will 4812 * have to be seriously rewhacked to allow the 4813 * fastpath probing (such that I cache the link 4814 * header in the IRE_CACHE) to work over ifgrps. 4815 * We have what we need to build an IRE_CACHE. 4816 */ 4817 /* 4818 * Note: the new ire inherits RTF_SETSRC 4819 * and RTF_MULTIRT to propagate these flags from prefix 4820 * to cache. 4821 */ 4822 4823 /* 4824 * Check cached gateway IRE for any security 4825 * attributes; if found, associate the gateway 4826 * credentials group to the destination IRE. 4827 */ 4828 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4829 mutex_enter(&attrp->igsa_lock); 4830 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4831 GCGRP_REFHOLD(gcgrp); 4832 mutex_exit(&attrp->igsa_lock); 4833 } 4834 4835 ire = ire_create_v6( 4836 v6dstp, /* dest address */ 4837 &ipv6_all_ones, /* mask */ 4838 &src_ipif->ipif_v6src_addr, /* source address */ 4839 &v6gw, /* gateway address */ 4840 &save_ire->ire_max_frag, 4841 NULL, /* src nce */ 4842 dst_ill->ill_rq, /* recv-from queue */ 4843 dst_ill->ill_wq, /* send-to queue */ 4844 IRE_CACHE, 4845 src_ipif, 4846 &sire->ire_mask_v6, /* Parent mask */ 4847 sire->ire_phandle, /* Parent handle */ 4848 ipif_ire->ire_ihandle, /* Interface handle */ 4849 sire->ire_flags & /* flags if any */ 4850 (RTF_SETSRC | RTF_MULTIRT), 4851 &(sire->ire_uinfo), 4852 NULL, 4853 gcgrp, 4854 ipst); 4855 4856 if (ire == NULL) { 4857 if (gcgrp != NULL) { 4858 GCGRP_REFRELE(gcgrp); 4859 gcgrp = NULL; 4860 } 4861 ire_refrele(save_ire); 4862 ire_refrele(ipif_ire); 4863 break; 4864 } 4865 4866 /* reference now held by IRE */ 4867 gcgrp = NULL; 4868 4869 ire->ire_marks |= ire_marks; 4870 4871 /* 4872 * Prevent sire and ipif_ire from getting deleted. The 4873 * newly created ire is tied to both of them via the 4874 * phandle and ihandle respectively. 4875 */ 4876 IRB_REFHOLD(sire->ire_bucket); 4877 /* Has it been removed already ? */ 4878 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4879 IRB_REFRELE(sire->ire_bucket); 4880 ire_refrele(ipif_ire); 4881 ire_refrele(save_ire); 4882 break; 4883 } 4884 4885 IRB_REFHOLD(ipif_ire->ire_bucket); 4886 /* Has it been removed already ? */ 4887 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4888 IRB_REFRELE(ipif_ire->ire_bucket); 4889 IRB_REFRELE(sire->ire_bucket); 4890 ire_refrele(ipif_ire); 4891 ire_refrele(save_ire); 4892 break; 4893 } 4894 4895 xmit_mp = first_mp; 4896 if (ire->ire_flags & RTF_MULTIRT) { 4897 copy_mp = copymsg(first_mp); 4898 if (copy_mp != NULL) { 4899 xmit_mp = copy_mp; 4900 MULTIRT_DEBUG_TAG(first_mp); 4901 } 4902 } 4903 ire_add_then_send(q, ire, xmit_mp); 4904 if (ip6_asp_table_held) { 4905 ip6_asp_table_refrele(ipst); 4906 ip6_asp_table_held = B_FALSE; 4907 } 4908 ire_refrele(save_ire); 4909 4910 /* Assert that sire is not deleted yet. */ 4911 ASSERT(sire->ire_ptpn != NULL); 4912 IRB_REFRELE(sire->ire_bucket); 4913 4914 /* Assert that ipif_ire is not deleted yet. */ 4915 ASSERT(ipif_ire->ire_ptpn != NULL); 4916 IRB_REFRELE(ipif_ire->ire_bucket); 4917 ire_refrele(ipif_ire); 4918 4919 if (copy_mp != NULL) { 4920 /* 4921 * Search for the next unresolved 4922 * multirt route. 4923 */ 4924 copy_mp = NULL; 4925 ipif_ire = NULL; 4926 ire = NULL; 4927 /* re-enter the loop */ 4928 multirt_resolve_next = B_TRUE; 4929 continue; 4930 } 4931 ire_refrele(sire); 4932 ill_refrele(dst_ill); 4933 ipif_refrele(src_ipif); 4934 return; 4935 } 4936 case IRE_IF_NORESOLVER: 4937 /* 4938 * We have what we need to build an IRE_CACHE. 4939 * 4940 * handle the Gated case, where we create 4941 * a NORESOLVER route for loopback. 4942 */ 4943 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4944 break; 4945 /* 4946 * TSol note: We are creating the ire cache for the 4947 * destination 'dst'. If 'dst' is offlink, going 4948 * through the first hop 'gw', the security attributes 4949 * of 'dst' must be set to point to the gateway 4950 * credentials of gateway 'gw'. If 'dst' is onlink, it 4951 * is possible that 'dst' is a potential gateway that is 4952 * referenced by some route that has some security 4953 * attributes. Thus in the former case, we need to do a 4954 * gcgrp_lookup of 'gw' while in the latter case we 4955 * need to do gcgrp_lookup of 'dst' itself. 4956 */ 4957 ga.ga_af = AF_INET6; 4958 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4959 ga.ga_addr = v6gw; 4960 else 4961 ga.ga_addr = *v6dstp; 4962 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4963 4964 /* 4965 * Note: the new ire inherits sire flags RTF_SETSRC 4966 * and RTF_MULTIRT to propagate those rules from prefix 4967 * to cache. 4968 */ 4969 ire = ire_create_v6( 4970 v6dstp, /* dest address */ 4971 &ipv6_all_ones, /* mask */ 4972 &src_ipif->ipif_v6src_addr, /* source address */ 4973 &v6gw, /* gateway address */ 4974 &save_ire->ire_max_frag, 4975 NULL, /* no src nce */ 4976 dst_ill->ill_rq, /* recv-from queue */ 4977 dst_ill->ill_wq, /* send-to queue */ 4978 IRE_CACHE, 4979 src_ipif, 4980 &save_ire->ire_mask_v6, /* Parent mask */ 4981 (sire != NULL) ? /* Parent handle */ 4982 sire->ire_phandle : 0, 4983 save_ire->ire_ihandle, /* Interface handle */ 4984 (sire != NULL) ? /* flags if any */ 4985 sire->ire_flags & 4986 (RTF_SETSRC | RTF_MULTIRT) : 0, 4987 &(save_ire->ire_uinfo), 4988 NULL, 4989 gcgrp, 4990 ipst); 4991 4992 if (ire == NULL) { 4993 if (gcgrp != NULL) { 4994 GCGRP_REFRELE(gcgrp); 4995 gcgrp = NULL; 4996 } 4997 ire_refrele(save_ire); 4998 break; 4999 } 5000 5001 /* reference now held by IRE */ 5002 gcgrp = NULL; 5003 5004 ire->ire_marks |= ire_marks; 5005 5006 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5007 dst = v6gw; 5008 else 5009 dst = *v6dstp; 5010 err = ndp_noresolver(dst_ill, &dst); 5011 if (err != 0) { 5012 ire_refrele(save_ire); 5013 break; 5014 } 5015 5016 /* Prevent save_ire from getting deleted */ 5017 IRB_REFHOLD(save_ire->ire_bucket); 5018 /* Has it been removed already ? */ 5019 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5020 IRB_REFRELE(save_ire->ire_bucket); 5021 ire_refrele(save_ire); 5022 break; 5023 } 5024 5025 xmit_mp = first_mp; 5026 /* 5027 * In case of MULTIRT, a copy of the current packet 5028 * to send is made to further re-enter the 5029 * loop and attempt another route resolution 5030 */ 5031 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5032 copy_mp = copymsg(first_mp); 5033 if (copy_mp != NULL) { 5034 xmit_mp = copy_mp; 5035 MULTIRT_DEBUG_TAG(first_mp); 5036 } 5037 } 5038 ire_add_then_send(q, ire, xmit_mp); 5039 if (ip6_asp_table_held) { 5040 ip6_asp_table_refrele(ipst); 5041 ip6_asp_table_held = B_FALSE; 5042 } 5043 5044 /* Assert that it is not deleted yet. */ 5045 ASSERT(save_ire->ire_ptpn != NULL); 5046 IRB_REFRELE(save_ire->ire_bucket); 5047 ire_refrele(save_ire); 5048 5049 if (copy_mp != NULL) { 5050 /* 5051 * If we found a (no)resolver, we ignore any 5052 * trailing top priority IRE_CACHE in 5053 * further loops. This ensures that we do not 5054 * omit any (no)resolver despite the priority 5055 * in this call. 5056 * IRE_CACHE, if any, will be processed 5057 * by another thread entering ip_newroute(), 5058 * (on resolver response, for example). 5059 * We use this to force multiple parallel 5060 * resolution as soon as a packet needs to be 5061 * sent. The result is, after one packet 5062 * emission all reachable routes are generally 5063 * resolved. 5064 * Otherwise, complete resolution of MULTIRT 5065 * routes would require several emissions as 5066 * side effect. 5067 */ 5068 multirt_flags &= ~MULTIRT_CACHEGW; 5069 5070 /* 5071 * Search for the next unresolved multirt 5072 * route. 5073 */ 5074 copy_mp = NULL; 5075 save_ire = NULL; 5076 ire = NULL; 5077 /* re-enter the loop */ 5078 multirt_resolve_next = B_TRUE; 5079 continue; 5080 } 5081 5082 /* Don't need sire anymore */ 5083 if (sire != NULL) 5084 ire_refrele(sire); 5085 ill_refrele(dst_ill); 5086 ipif_refrele(src_ipif); 5087 return; 5088 5089 case IRE_IF_RESOLVER: 5090 /* 5091 * We can't build an IRE_CACHE yet, but at least we 5092 * found a resolver that can help. 5093 */ 5094 dst = *v6dstp; 5095 5096 /* 5097 * To be at this point in the code with a non-zero gw 5098 * means that dst is reachable through a gateway that 5099 * we have never resolved. By changing dst to the gw 5100 * addr we resolve the gateway first. When 5101 * ire_add_then_send() tries to put the IP dg to dst, 5102 * it will reenter ip_newroute() at which time we will 5103 * find the IRE_CACHE for the gw and create another 5104 * IRE_CACHE above (for dst itself). 5105 */ 5106 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5107 save_dst = dst; 5108 dst = v6gw; 5109 v6gw = ipv6_all_zeros; 5110 } 5111 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5112 /* 5113 * Ask the external resolver to do its thing. 5114 * Make an mblk chain in the following form: 5115 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5116 */ 5117 mblk_t *ire_mp; 5118 mblk_t *areq_mp; 5119 areq_t *areq; 5120 in6_addr_t *addrp; 5121 5122 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5123 if (ip6_asp_table_held) { 5124 ip6_asp_table_refrele(ipst); 5125 ip6_asp_table_held = B_FALSE; 5126 } 5127 ire = ire_create_mp_v6( 5128 &dst, /* dest address */ 5129 &ipv6_all_ones, /* mask */ 5130 &src_ipif->ipif_v6src_addr, 5131 /* source address */ 5132 &v6gw, /* gateway address */ 5133 NULL, /* no src nce */ 5134 dst_ill->ill_rq, /* recv-from queue */ 5135 dst_ill->ill_wq, /* send-to queue */ 5136 IRE_CACHE, 5137 src_ipif, 5138 &save_ire->ire_mask_v6, /* Parent mask */ 5139 0, 5140 save_ire->ire_ihandle, 5141 /* Interface handle */ 5142 0, /* flags if any */ 5143 &(save_ire->ire_uinfo), 5144 NULL, 5145 NULL, 5146 ipst); 5147 5148 ire_refrele(save_ire); 5149 if (ire == NULL) { 5150 ip1dbg(("ip_newroute_v6:" 5151 "ire is NULL\n")); 5152 break; 5153 } 5154 5155 if ((sire != NULL) && 5156 (sire->ire_flags & RTF_MULTIRT)) { 5157 /* 5158 * processing a copy of the packet to 5159 * send for further resolution loops 5160 */ 5161 copy_mp = copymsg(first_mp); 5162 if (copy_mp != NULL) 5163 MULTIRT_DEBUG_TAG(copy_mp); 5164 } 5165 ire->ire_marks |= ire_marks; 5166 ire_mp = ire->ire_mp; 5167 /* 5168 * Now create or find an nce for this interface. 5169 * The hw addr will need to to be set from 5170 * the reply to the AR_ENTRY_QUERY that 5171 * we're about to send. This will be done in 5172 * ire_add_v6(). 5173 */ 5174 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5175 switch (err) { 5176 case 0: 5177 /* 5178 * New cache entry created. 5179 * Break, then ask the external 5180 * resolver. 5181 */ 5182 break; 5183 case EINPROGRESS: 5184 /* 5185 * Resolution in progress; 5186 * packet has been queued by 5187 * ndp_resolver(). 5188 */ 5189 ire_delete(ire); 5190 ire = NULL; 5191 /* 5192 * Check if another multirt 5193 * route must be resolved. 5194 */ 5195 if (copy_mp != NULL) { 5196 /* 5197 * If we found a resolver, we 5198 * ignore any trailing top 5199 * priority IRE_CACHE in 5200 * further loops. The reason is 5201 * the same as for noresolver. 5202 */ 5203 multirt_flags &= 5204 ~MULTIRT_CACHEGW; 5205 /* 5206 * Search for the next 5207 * unresolved multirt route. 5208 */ 5209 first_mp = copy_mp; 5210 copy_mp = NULL; 5211 mp = first_mp; 5212 if (mp->b_datap->db_type == 5213 M_CTL) { 5214 mp = mp->b_cont; 5215 } 5216 ASSERT(sire != NULL); 5217 dst = save_dst; 5218 /* 5219 * re-enter the loop 5220 */ 5221 multirt_resolve_next = 5222 B_TRUE; 5223 continue; 5224 } 5225 5226 if (sire != NULL) 5227 ire_refrele(sire); 5228 ill_refrele(dst_ill); 5229 ipif_refrele(src_ipif); 5230 return; 5231 default: 5232 /* 5233 * Transient error; packet will be 5234 * freed. 5235 */ 5236 ire_delete(ire); 5237 ire = NULL; 5238 break; 5239 } 5240 if (err != 0) 5241 break; 5242 /* 5243 * Now set up the AR_ENTRY_QUERY and send it. 5244 */ 5245 areq_mp = ill_arp_alloc(dst_ill, 5246 (uchar_t *)&ipv6_areq_template, 5247 (caddr_t)&dst); 5248 if (areq_mp == NULL) { 5249 ip1dbg(("ip_newroute_v6:" 5250 "areq_mp is NULL\n")); 5251 freemsg(ire_mp); 5252 break; 5253 } 5254 areq = (areq_t *)areq_mp->b_rptr; 5255 addrp = (in6_addr_t *)((char *)areq + 5256 areq->areq_target_addr_offset); 5257 *addrp = dst; 5258 addrp = (in6_addr_t *)((char *)areq + 5259 areq->areq_sender_addr_offset); 5260 *addrp = src_ipif->ipif_v6src_addr; 5261 /* 5262 * link the chain, then send up to the resolver. 5263 */ 5264 linkb(areq_mp, ire_mp); 5265 linkb(areq_mp, mp); 5266 ip1dbg(("ip_newroute_v6:" 5267 "putnext to resolver\n")); 5268 putnext(dst_ill->ill_rq, areq_mp); 5269 /* 5270 * Check if another multirt route 5271 * must be resolved. 5272 */ 5273 ire = NULL; 5274 if (copy_mp != NULL) { 5275 /* 5276 * If we find a resolver, we ignore any 5277 * trailing top priority IRE_CACHE in 5278 * further loops. The reason is the 5279 * same as for noresolver. 5280 */ 5281 multirt_flags &= ~MULTIRT_CACHEGW; 5282 /* 5283 * Search for the next unresolved 5284 * multirt route. 5285 */ 5286 first_mp = copy_mp; 5287 copy_mp = NULL; 5288 mp = first_mp; 5289 if (mp->b_datap->db_type == M_CTL) { 5290 mp = mp->b_cont; 5291 } 5292 ASSERT(sire != NULL); 5293 dst = save_dst; 5294 /* 5295 * re-enter the loop 5296 */ 5297 multirt_resolve_next = B_TRUE; 5298 continue; 5299 } 5300 5301 if (sire != NULL) 5302 ire_refrele(sire); 5303 ill_refrele(dst_ill); 5304 ipif_refrele(src_ipif); 5305 return; 5306 } 5307 /* 5308 * Non-external resolver case. 5309 * 5310 * TSol note: Please see the note above the 5311 * IRE_IF_NORESOLVER case. 5312 */ 5313 ga.ga_af = AF_INET6; 5314 ga.ga_addr = dst; 5315 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5316 5317 ire = ire_create_v6( 5318 &dst, /* dest address */ 5319 &ipv6_all_ones, /* mask */ 5320 &src_ipif->ipif_v6src_addr, /* source address */ 5321 &v6gw, /* gateway address */ 5322 &save_ire->ire_max_frag, 5323 NULL, /* no src nce */ 5324 dst_ill->ill_rq, /* recv-from queue */ 5325 dst_ill->ill_wq, /* send-to queue */ 5326 IRE_CACHE, 5327 src_ipif, 5328 &save_ire->ire_mask_v6, /* Parent mask */ 5329 0, 5330 save_ire->ire_ihandle, /* Interface handle */ 5331 0, /* flags if any */ 5332 &(save_ire->ire_uinfo), 5333 NULL, 5334 gcgrp, 5335 ipst); 5336 5337 if (ire == NULL) { 5338 if (gcgrp != NULL) { 5339 GCGRP_REFRELE(gcgrp); 5340 gcgrp = NULL; 5341 } 5342 ire_refrele(save_ire); 5343 break; 5344 } 5345 5346 /* reference now held by IRE */ 5347 gcgrp = NULL; 5348 5349 if ((sire != NULL) && 5350 (sire->ire_flags & RTF_MULTIRT)) { 5351 copy_mp = copymsg(first_mp); 5352 if (copy_mp != NULL) 5353 MULTIRT_DEBUG_TAG(copy_mp); 5354 } 5355 5356 ire->ire_marks |= ire_marks; 5357 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5358 switch (err) { 5359 case 0: 5360 /* Prevent save_ire from getting deleted */ 5361 IRB_REFHOLD(save_ire->ire_bucket); 5362 /* Has it been removed already ? */ 5363 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5364 IRB_REFRELE(save_ire->ire_bucket); 5365 ire_refrele(save_ire); 5366 break; 5367 } 5368 5369 /* 5370 * We have a resolved cache entry, 5371 * add in the IRE. 5372 */ 5373 ire_add_then_send(q, ire, first_mp); 5374 if (ip6_asp_table_held) { 5375 ip6_asp_table_refrele(ipst); 5376 ip6_asp_table_held = B_FALSE; 5377 } 5378 5379 /* Assert that it is not deleted yet. */ 5380 ASSERT(save_ire->ire_ptpn != NULL); 5381 IRB_REFRELE(save_ire->ire_bucket); 5382 ire_refrele(save_ire); 5383 /* 5384 * Check if another multirt route 5385 * must be resolved. 5386 */ 5387 ire = NULL; 5388 if (copy_mp != NULL) { 5389 /* 5390 * If we find a resolver, we ignore any 5391 * trailing top priority IRE_CACHE in 5392 * further loops. The reason is the 5393 * same as for noresolver. 5394 */ 5395 multirt_flags &= ~MULTIRT_CACHEGW; 5396 /* 5397 * Search for the next unresolved 5398 * multirt route. 5399 */ 5400 first_mp = copy_mp; 5401 copy_mp = NULL; 5402 mp = first_mp; 5403 if (mp->b_datap->db_type == M_CTL) { 5404 mp = mp->b_cont; 5405 } 5406 ASSERT(sire != NULL); 5407 dst = save_dst; 5408 /* 5409 * re-enter the loop 5410 */ 5411 multirt_resolve_next = B_TRUE; 5412 continue; 5413 } 5414 5415 if (sire != NULL) 5416 ire_refrele(sire); 5417 ill_refrele(dst_ill); 5418 ipif_refrele(src_ipif); 5419 return; 5420 5421 case EINPROGRESS: 5422 /* 5423 * mp was consumed - presumably queued. 5424 * No need for ire, presumably resolution is 5425 * in progress, and ire will be added when the 5426 * address is resolved. 5427 */ 5428 if (ip6_asp_table_held) { 5429 ip6_asp_table_refrele(ipst); 5430 ip6_asp_table_held = B_FALSE; 5431 } 5432 ASSERT(ire->ire_nce == NULL); 5433 ire_delete(ire); 5434 ire_refrele(save_ire); 5435 /* 5436 * Check if another multirt route 5437 * must be resolved. 5438 */ 5439 ire = NULL; 5440 if (copy_mp != NULL) { 5441 /* 5442 * If we find a resolver, we ignore any 5443 * trailing top priority IRE_CACHE in 5444 * further loops. The reason is the 5445 * same as for noresolver. 5446 */ 5447 multirt_flags &= ~MULTIRT_CACHEGW; 5448 /* 5449 * Search for the next unresolved 5450 * multirt route. 5451 */ 5452 first_mp = copy_mp; 5453 copy_mp = NULL; 5454 mp = first_mp; 5455 if (mp->b_datap->db_type == M_CTL) { 5456 mp = mp->b_cont; 5457 } 5458 ASSERT(sire != NULL); 5459 dst = save_dst; 5460 /* 5461 * re-enter the loop 5462 */ 5463 multirt_resolve_next = B_TRUE; 5464 continue; 5465 } 5466 if (sire != NULL) 5467 ire_refrele(sire); 5468 ill_refrele(dst_ill); 5469 ipif_refrele(src_ipif); 5470 return; 5471 default: 5472 /* Some transient error */ 5473 ASSERT(ire->ire_nce == NULL); 5474 ire_refrele(save_ire); 5475 break; 5476 } 5477 break; 5478 default: 5479 break; 5480 } 5481 if (ip6_asp_table_held) { 5482 ip6_asp_table_refrele(ipst); 5483 ip6_asp_table_held = B_FALSE; 5484 } 5485 } while (multirt_resolve_next); 5486 5487 err_ret: 5488 ip1dbg(("ip_newroute_v6: dropped\n")); 5489 if (src_ipif != NULL) 5490 ipif_refrele(src_ipif); 5491 if (dst_ill != NULL) { 5492 need_rele = B_TRUE; 5493 ill = dst_ill; 5494 } 5495 if (ill != NULL) { 5496 if (mp->b_prev != NULL) { 5497 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5498 } else { 5499 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5500 } 5501 5502 if (need_rele) 5503 ill_refrele(ill); 5504 } else { 5505 if (mp->b_prev != NULL) { 5506 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5507 } else { 5508 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5509 } 5510 } 5511 /* Did this packet originate externally? */ 5512 if (mp->b_prev) { 5513 mp->b_next = NULL; 5514 mp->b_prev = NULL; 5515 } 5516 if (copy_mp != NULL) { 5517 MULTIRT_DEBUG_UNTAG(copy_mp); 5518 freemsg(copy_mp); 5519 } 5520 MULTIRT_DEBUG_UNTAG(first_mp); 5521 freemsg(first_mp); 5522 if (ire != NULL) 5523 ire_refrele(ire); 5524 if (sire != NULL) 5525 ire_refrele(sire); 5526 return; 5527 5528 icmp_err_ret: 5529 if (ip6_asp_table_held) 5530 ip6_asp_table_refrele(ipst); 5531 if (src_ipif != NULL) 5532 ipif_refrele(src_ipif); 5533 if (dst_ill != NULL) { 5534 need_rele = B_TRUE; 5535 ill = dst_ill; 5536 } 5537 ip1dbg(("ip_newroute_v6: no route\n")); 5538 if (sire != NULL) 5539 ire_refrele(sire); 5540 /* 5541 * We need to set sire to NULL to avoid double freeing if we 5542 * ever goto err_ret from below. 5543 */ 5544 sire = NULL; 5545 ip6h = (ip6_t *)mp->b_rptr; 5546 /* Skip ip6i_t header if present */ 5547 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5548 /* Make sure the IPv6 header is present */ 5549 if ((mp->b_wptr - (uchar_t *)ip6h) < 5550 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5551 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5552 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5553 goto err_ret; 5554 } 5555 } 5556 mp->b_rptr += sizeof (ip6i_t); 5557 ip6h = (ip6_t *)mp->b_rptr; 5558 } 5559 /* Did this packet originate externally? */ 5560 if (mp->b_prev) { 5561 if (ill != NULL) { 5562 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5563 } else { 5564 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5565 } 5566 mp->b_next = NULL; 5567 mp->b_prev = NULL; 5568 q = WR(q); 5569 } else { 5570 if (ill != NULL) { 5571 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5572 } else { 5573 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5574 } 5575 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5576 /* Failed */ 5577 if (copy_mp != NULL) { 5578 MULTIRT_DEBUG_UNTAG(copy_mp); 5579 freemsg(copy_mp); 5580 } 5581 MULTIRT_DEBUG_UNTAG(first_mp); 5582 freemsg(first_mp); 5583 if (ire != NULL) 5584 ire_refrele(ire); 5585 if (need_rele) 5586 ill_refrele(ill); 5587 return; 5588 } 5589 } 5590 5591 if (need_rele) 5592 ill_refrele(ill); 5593 5594 /* 5595 * At this point we will have ire only if RTF_BLACKHOLE 5596 * or RTF_REJECT flags are set on the IRE. It will not 5597 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5598 */ 5599 if (ire != NULL) { 5600 if (ire->ire_flags & RTF_BLACKHOLE) { 5601 ire_refrele(ire); 5602 if (copy_mp != NULL) { 5603 MULTIRT_DEBUG_UNTAG(copy_mp); 5604 freemsg(copy_mp); 5605 } 5606 MULTIRT_DEBUG_UNTAG(first_mp); 5607 freemsg(first_mp); 5608 return; 5609 } 5610 ire_refrele(ire); 5611 } 5612 if (ip_debug > 3) { 5613 /* ip2dbg */ 5614 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5615 AF_INET6, v6dstp); 5616 } 5617 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5618 B_FALSE, B_FALSE, zoneid, ipst); 5619 } 5620 5621 /* 5622 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5623 * we need to send out a packet to a destination address for which we do not 5624 * have specific routing information. It is only used for multicast packets. 5625 * 5626 * If unspec_src we allow creating an IRE with source address zero. 5627 * ire_send_v6() will delete it after the packet is sent. 5628 */ 5629 void 5630 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5631 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5632 { 5633 ire_t *ire = NULL; 5634 ipif_t *src_ipif = NULL; 5635 int err = 0; 5636 ill_t *dst_ill = NULL; 5637 ire_t *save_ire; 5638 ushort_t ire_marks = 0; 5639 ipsec_out_t *io; 5640 ill_t *attach_ill = NULL; 5641 ill_t *ill; 5642 ip6_t *ip6h; 5643 mblk_t *first_mp; 5644 boolean_t ip6i_present; 5645 ire_t *fire = NULL; 5646 mblk_t *copy_mp = NULL; 5647 boolean_t multirt_resolve_next; 5648 in6_addr_t *v6dstp = &v6dst; 5649 boolean_t ipif_held = B_FALSE; 5650 boolean_t ill_held = B_FALSE; 5651 boolean_t ip6_asp_table_held = B_FALSE; 5652 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5653 5654 /* 5655 * This loop is run only once in most cases. 5656 * We loop to resolve further routes only when the destination 5657 * can be reached through multiple RTF_MULTIRT-flagged ires. 5658 */ 5659 do { 5660 multirt_resolve_next = B_FALSE; 5661 if (dst_ill != NULL) { 5662 ill_refrele(dst_ill); 5663 dst_ill = NULL; 5664 } 5665 5666 if (src_ipif != NULL) { 5667 ipif_refrele(src_ipif); 5668 src_ipif = NULL; 5669 } 5670 ASSERT(ipif != NULL); 5671 ill = ipif->ipif_ill; 5672 5673 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5674 if (ip_debug > 2) { 5675 /* ip1dbg */ 5676 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5677 AF_INET6, v6dstp); 5678 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5679 ill->ill_name, ipif->ipif_isv6); 5680 } 5681 5682 first_mp = mp; 5683 if (mp->b_datap->db_type == M_CTL) { 5684 mp = mp->b_cont; 5685 io = (ipsec_out_t *)first_mp->b_rptr; 5686 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5687 } else { 5688 io = NULL; 5689 } 5690 5691 /* 5692 * If the interface is a pt-pt interface we look for an 5693 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5694 * local_address and the pt-pt destination address. 5695 * Otherwise we just match the local address. 5696 */ 5697 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5698 goto err_ret; 5699 } 5700 /* 5701 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5702 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5703 * as it could be NULL. 5704 * 5705 * This information can appear either in an ip6i_t or an 5706 * IPSEC_OUT message. 5707 */ 5708 ip6h = (ip6_t *)mp->b_rptr; 5709 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5710 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5711 if (!ip6i_present || 5712 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5713 attach_ill = ip_grab_attach_ill(ill, first_mp, 5714 (ip6i_present ? 5715 ((ip6i_t *)ip6h)->ip6i_ifindex : 5716 io->ipsec_out_ill_index), B_TRUE, ipst); 5717 /* Failure case frees things for us. */ 5718 if (attach_ill == NULL) 5719 return; 5720 5721 /* 5722 * Check if we need an ire that will not be 5723 * looked up by anybody else i.e. HIDDEN. 5724 */ 5725 if (ill_is_probeonly(attach_ill)) 5726 ire_marks = IRE_MARK_HIDDEN; 5727 } 5728 } 5729 5730 /* 5731 * We check if an IRE_OFFSUBNET for the addr that goes through 5732 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5733 * RTF_MULTIRT flags must be honored. 5734 */ 5735 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5736 ip2dbg(("ip_newroute_ipif_v6: " 5737 "ipif_lookup_multi_ire_v6(" 5738 "ipif %p, dst %08x) = fire %p\n", 5739 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5740 (void *)fire)); 5741 5742 /* 5743 * If the application specified the ill (ifindex), we still 5744 * load spread. Only if the packets needs to go out specifically 5745 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5746 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5747 * multirouting, then we don't try to use a different ill for 5748 * load spreading. 5749 */ 5750 if (attach_ill == NULL) { 5751 /* 5752 * If the interface belongs to an interface group, 5753 * make sure the next possible interface in the group 5754 * is used. This encourages load spreading among peers 5755 * in an interface group. 5756 * 5757 * Note: While we pick a dst_ill we are really only 5758 * interested in the ill for load spreading. The source 5759 * ipif is determined by source address selection below. 5760 */ 5761 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5762 dst_ill = ipif->ipif_ill; 5763 /* For uniformity do a refhold */ 5764 ill_refhold(dst_ill); 5765 } else { 5766 /* refheld by ip_newroute_get_dst_ill_v6 */ 5767 dst_ill = 5768 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5769 } 5770 if (dst_ill == NULL) { 5771 if (ip_debug > 2) { 5772 pr_addr_dbg("ip_newroute_ipif_v6: " 5773 "no dst ill for dst %s\n", 5774 AF_INET6, v6dstp); 5775 } 5776 goto err_ret; 5777 } 5778 } else { 5779 dst_ill = ipif->ipif_ill; 5780 /* 5781 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5782 * and IPV6_BOUND_PIF case. 5783 */ 5784 ASSERT(dst_ill == attach_ill); 5785 /* attach_ill is already refheld */ 5786 } 5787 /* 5788 * Pick a source address which matches the scope of the 5789 * destination address. 5790 * For RTF_SETSRC routes, the source address is imposed by the 5791 * parent ire (fire). 5792 */ 5793 ASSERT(src_ipif == NULL); 5794 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5795 /* 5796 * Check that the ipif matching the requested source 5797 * address still exists. 5798 */ 5799 src_ipif = 5800 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5801 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5802 } 5803 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5804 ip6_asp_table_held = B_TRUE; 5805 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5806 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5807 } 5808 5809 if (src_ipif == NULL) { 5810 if (!unspec_src) { 5811 if (ip_debug > 2) { 5812 /* ip1dbg */ 5813 pr_addr_dbg("ip_newroute_ipif_v6: " 5814 "no src for dst %s\n,", 5815 AF_INET6, v6dstp); 5816 printf(" through interface %s\n", 5817 dst_ill->ill_name); 5818 } 5819 goto err_ret; 5820 } 5821 src_ipif = ipif; 5822 ipif_refhold(src_ipif); 5823 } 5824 ire = ipif_to_ire_v6(ipif); 5825 if (ire == NULL) { 5826 if (ip_debug > 2) { 5827 /* ip1dbg */ 5828 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5829 AF_INET6, &ipif->ipif_v6lcl_addr); 5830 printf("ip_newroute_ipif_v6: " 5831 "if %s\n", dst_ill->ill_name); 5832 } 5833 goto err_ret; 5834 } 5835 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5836 goto err_ret; 5837 5838 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5839 5840 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5841 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5842 if (ip_debug > 2) { 5843 /* ip1dbg */ 5844 pr_addr_dbg(" address %s\n", 5845 AF_INET6, &ire->ire_src_addr_v6); 5846 } 5847 save_ire = ire; 5848 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5849 (void *)ire, (void *)ipif)); 5850 5851 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5852 /* 5853 * an IRE_OFFSUBET was looked up 5854 * on that interface. 5855 * this ire has RTF_MULTIRT flag, 5856 * so the resolution loop 5857 * will be re-entered to resolve 5858 * additional routes on other 5859 * interfaces. For that purpose, 5860 * a copy of the packet is 5861 * made at this point. 5862 */ 5863 fire->ire_last_used_time = lbolt; 5864 copy_mp = copymsg(first_mp); 5865 if (copy_mp) { 5866 MULTIRT_DEBUG_TAG(copy_mp); 5867 } 5868 } 5869 5870 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5871 switch (ire->ire_type) { 5872 case IRE_IF_NORESOLVER: { 5873 /* 5874 * We have what we need to build an IRE_CACHE. 5875 * 5876 * handle the Gated case, where we create 5877 * a NORESOLVER route for loopback. 5878 */ 5879 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5880 break; 5881 /* 5882 * The newly created ire will inherit the flags of the 5883 * parent ire, if any. 5884 */ 5885 ire = ire_create_v6( 5886 v6dstp, /* dest address */ 5887 &ipv6_all_ones, /* mask */ 5888 &src_ipif->ipif_v6src_addr, /* source address */ 5889 NULL, /* gateway address */ 5890 &save_ire->ire_max_frag, 5891 NULL, /* no src nce */ 5892 dst_ill->ill_rq, /* recv-from queue */ 5893 dst_ill->ill_wq, /* send-to queue */ 5894 IRE_CACHE, 5895 src_ipif, 5896 NULL, 5897 (fire != NULL) ? /* Parent handle */ 5898 fire->ire_phandle : 0, 5899 save_ire->ire_ihandle, /* Interface handle */ 5900 (fire != NULL) ? 5901 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5902 0, 5903 &ire_uinfo_null, 5904 NULL, 5905 NULL, 5906 ipst); 5907 5908 if (ire == NULL) { 5909 ire_refrele(save_ire); 5910 break; 5911 } 5912 5913 ire->ire_marks |= ire_marks; 5914 5915 err = ndp_noresolver(dst_ill, v6dstp); 5916 if (err != 0) { 5917 ire_refrele(save_ire); 5918 break; 5919 } 5920 5921 /* Prevent save_ire from getting deleted */ 5922 IRB_REFHOLD(save_ire->ire_bucket); 5923 /* Has it been removed already ? */ 5924 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5925 IRB_REFRELE(save_ire->ire_bucket); 5926 ire_refrele(save_ire); 5927 break; 5928 } 5929 5930 ire_add_then_send(q, ire, first_mp); 5931 if (ip6_asp_table_held) { 5932 ip6_asp_table_refrele(ipst); 5933 ip6_asp_table_held = B_FALSE; 5934 } 5935 5936 /* Assert that it is not deleted yet. */ 5937 ASSERT(save_ire->ire_ptpn != NULL); 5938 IRB_REFRELE(save_ire->ire_bucket); 5939 ire_refrele(save_ire); 5940 if (fire != NULL) { 5941 ire_refrele(fire); 5942 fire = NULL; 5943 } 5944 5945 /* 5946 * The resolution loop is re-entered if we 5947 * actually are in a multirouting case. 5948 */ 5949 if (copy_mp != NULL) { 5950 boolean_t need_resolve = 5951 ire_multirt_need_resolve_v6(v6dstp, 5952 MBLK_GETLABEL(copy_mp), ipst); 5953 if (!need_resolve) { 5954 MULTIRT_DEBUG_UNTAG(copy_mp); 5955 freemsg(copy_mp); 5956 copy_mp = NULL; 5957 } else { 5958 /* 5959 * ipif_lookup_group_v6() calls 5960 * ire_lookup_multi_v6() that uses 5961 * ire_ftable_lookup_v6() to find 5962 * an IRE_INTERFACE for the group. 5963 * In the multirt case, 5964 * ire_lookup_multi_v6() then invokes 5965 * ire_multirt_lookup_v6() to find 5966 * the next resolvable ire. 5967 * As a result, we obtain a new 5968 * interface, derived from the 5969 * next ire. 5970 */ 5971 if (ipif_held) { 5972 ipif_refrele(ipif); 5973 ipif_held = B_FALSE; 5974 } 5975 ipif = ipif_lookup_group_v6(v6dstp, 5976 zoneid, ipst); 5977 ip2dbg(("ip_newroute_ipif: " 5978 "multirt dst %08x, ipif %p\n", 5979 ntohl(V4_PART_OF_V6((*v6dstp))), 5980 (void *)ipif)); 5981 if (ipif != NULL) { 5982 ipif_held = B_TRUE; 5983 mp = copy_mp; 5984 copy_mp = NULL; 5985 multirt_resolve_next = 5986 B_TRUE; 5987 continue; 5988 } else { 5989 freemsg(copy_mp); 5990 } 5991 } 5992 } 5993 ill_refrele(dst_ill); 5994 if (ipif_held) { 5995 ipif_refrele(ipif); 5996 ipif_held = B_FALSE; 5997 } 5998 if (src_ipif != NULL) 5999 ipif_refrele(src_ipif); 6000 return; 6001 } 6002 case IRE_IF_RESOLVER: { 6003 6004 ASSERT(dst_ill->ill_isv6); 6005 6006 /* 6007 * We obtain a partial IRE_CACHE which we will pass 6008 * along with the resolver query. When the response 6009 * comes back it will be there ready for us to add. 6010 */ 6011 /* 6012 * the newly created ire will inherit the flags of the 6013 * parent ire, if any. 6014 */ 6015 ire = ire_create_v6( 6016 v6dstp, /* dest address */ 6017 &ipv6_all_ones, /* mask */ 6018 &src_ipif->ipif_v6src_addr, /* source address */ 6019 NULL, /* gateway address */ 6020 &save_ire->ire_max_frag, 6021 NULL, /* src nce */ 6022 dst_ill->ill_rq, /* recv-from queue */ 6023 dst_ill->ill_wq, /* send-to queue */ 6024 IRE_CACHE, 6025 src_ipif, 6026 NULL, 6027 (fire != NULL) ? /* Parent handle */ 6028 fire->ire_phandle : 0, 6029 save_ire->ire_ihandle, /* Interface handle */ 6030 (fire != NULL) ? 6031 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6032 0, 6033 &ire_uinfo_null, 6034 NULL, 6035 NULL, 6036 ipst); 6037 6038 if (ire == NULL) { 6039 ire_refrele(save_ire); 6040 break; 6041 } 6042 6043 ire->ire_marks |= ire_marks; 6044 6045 /* Resolve and add ire to the ctable */ 6046 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6047 switch (err) { 6048 case 0: 6049 /* Prevent save_ire from getting deleted */ 6050 IRB_REFHOLD(save_ire->ire_bucket); 6051 /* Has it been removed already ? */ 6052 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6053 IRB_REFRELE(save_ire->ire_bucket); 6054 ire_refrele(save_ire); 6055 break; 6056 } 6057 /* 6058 * We have a resolved cache entry, 6059 * add in the IRE. 6060 */ 6061 ire_add_then_send(q, ire, first_mp); 6062 if (ip6_asp_table_held) { 6063 ip6_asp_table_refrele(ipst); 6064 ip6_asp_table_held = B_FALSE; 6065 } 6066 6067 /* Assert that it is not deleted yet. */ 6068 ASSERT(save_ire->ire_ptpn != NULL); 6069 IRB_REFRELE(save_ire->ire_bucket); 6070 ire_refrele(save_ire); 6071 if (fire != NULL) { 6072 ire_refrele(fire); 6073 fire = NULL; 6074 } 6075 6076 /* 6077 * The resolution loop is re-entered if we 6078 * actually are in a multirouting case. 6079 */ 6080 if (copy_mp != NULL) { 6081 boolean_t need_resolve = 6082 ire_multirt_need_resolve_v6(v6dstp, 6083 MBLK_GETLABEL(copy_mp), ipst); 6084 if (!need_resolve) { 6085 MULTIRT_DEBUG_UNTAG(copy_mp); 6086 freemsg(copy_mp); 6087 copy_mp = NULL; 6088 } else { 6089 /* 6090 * ipif_lookup_group_v6() calls 6091 * ire_lookup_multi_v6() that 6092 * uses ire_ftable_lookup_v6() 6093 * to find an IRE_INTERFACE for 6094 * the group. In the multirt 6095 * case, ire_lookup_multi_v6() 6096 * then invokes 6097 * ire_multirt_lookup_v6() to 6098 * find the next resolvable ire. 6099 * As a result, we obtain a new 6100 * interface, derived from the 6101 * next ire. 6102 */ 6103 if (ipif_held) { 6104 ipif_refrele(ipif); 6105 ipif_held = B_FALSE; 6106 } 6107 ipif = ipif_lookup_group_v6( 6108 v6dstp, zoneid, ipst); 6109 ip2dbg(("ip_newroute_ipif: " 6110 "multirt dst %08x, " 6111 "ipif %p\n", 6112 ntohl(V4_PART_OF_V6( 6113 (*v6dstp))), 6114 (void *)ipif)); 6115 if (ipif != NULL) { 6116 ipif_held = B_TRUE; 6117 mp = copy_mp; 6118 copy_mp = NULL; 6119 multirt_resolve_next = 6120 B_TRUE; 6121 continue; 6122 } else { 6123 freemsg(copy_mp); 6124 } 6125 } 6126 } 6127 ill_refrele(dst_ill); 6128 if (ipif_held) { 6129 ipif_refrele(ipif); 6130 ipif_held = B_FALSE; 6131 } 6132 if (src_ipif != NULL) 6133 ipif_refrele(src_ipif); 6134 return; 6135 6136 case EINPROGRESS: 6137 /* 6138 * mp was consumed - presumably queued. 6139 * No need for ire, presumably resolution is 6140 * in progress, and ire will be added when the 6141 * address is resolved. 6142 */ 6143 if (ip6_asp_table_held) { 6144 ip6_asp_table_refrele(ipst); 6145 ip6_asp_table_held = B_FALSE; 6146 } 6147 ire_delete(ire); 6148 ire_refrele(save_ire); 6149 if (fire != NULL) { 6150 ire_refrele(fire); 6151 fire = NULL; 6152 } 6153 6154 /* 6155 * The resolution loop is re-entered if we 6156 * actually are in a multirouting case. 6157 */ 6158 if (copy_mp != NULL) { 6159 boolean_t need_resolve = 6160 ire_multirt_need_resolve_v6(v6dstp, 6161 MBLK_GETLABEL(copy_mp), ipst); 6162 if (!need_resolve) { 6163 MULTIRT_DEBUG_UNTAG(copy_mp); 6164 freemsg(copy_mp); 6165 copy_mp = NULL; 6166 } else { 6167 /* 6168 * ipif_lookup_group_v6() calls 6169 * ire_lookup_multi_v6() that 6170 * uses ire_ftable_lookup_v6() 6171 * to find an IRE_INTERFACE for 6172 * the group. In the multirt 6173 * case, ire_lookup_multi_v6() 6174 * then invokes 6175 * ire_multirt_lookup_v6() to 6176 * find the next resolvable ire. 6177 * As a result, we obtain a new 6178 * interface, derived from the 6179 * next ire. 6180 */ 6181 if (ipif_held) { 6182 ipif_refrele(ipif); 6183 ipif_held = B_FALSE; 6184 } 6185 ipif = ipif_lookup_group_v6( 6186 v6dstp, zoneid, ipst); 6187 ip2dbg(("ip_newroute_ipif: " 6188 "multirt dst %08x, " 6189 "ipif %p\n", 6190 ntohl(V4_PART_OF_V6( 6191 (*v6dstp))), 6192 (void *)ipif)); 6193 if (ipif != NULL) { 6194 ipif_held = B_TRUE; 6195 mp = copy_mp; 6196 copy_mp = NULL; 6197 multirt_resolve_next = 6198 B_TRUE; 6199 continue; 6200 } else { 6201 freemsg(copy_mp); 6202 } 6203 } 6204 } 6205 ill_refrele(dst_ill); 6206 if (ipif_held) { 6207 ipif_refrele(ipif); 6208 ipif_held = B_FALSE; 6209 } 6210 if (src_ipif != NULL) 6211 ipif_refrele(src_ipif); 6212 return; 6213 default: 6214 /* Some transient error */ 6215 ire_refrele(save_ire); 6216 break; 6217 } 6218 break; 6219 } 6220 default: 6221 break; 6222 } 6223 if (ip6_asp_table_held) { 6224 ip6_asp_table_refrele(ipst); 6225 ip6_asp_table_held = B_FALSE; 6226 } 6227 } while (multirt_resolve_next); 6228 6229 err_ret: 6230 if (ip6_asp_table_held) 6231 ip6_asp_table_refrele(ipst); 6232 if (ire != NULL) 6233 ire_refrele(ire); 6234 if (fire != NULL) 6235 ire_refrele(fire); 6236 if (ipif != NULL && ipif_held) 6237 ipif_refrele(ipif); 6238 if (src_ipif != NULL) 6239 ipif_refrele(src_ipif); 6240 /* Multicast - no point in trying to generate ICMP error */ 6241 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6242 if (dst_ill != NULL) { 6243 ill = dst_ill; 6244 ill_held = B_TRUE; 6245 } 6246 if (mp->b_prev || mp->b_next) { 6247 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6248 } else { 6249 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6250 } 6251 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6252 mp->b_next = NULL; 6253 mp->b_prev = NULL; 6254 freemsg(first_mp); 6255 if (ill_held) 6256 ill_refrele(ill); 6257 } 6258 6259 /* 6260 * Parse and process any hop-by-hop or destination options. 6261 * 6262 * Assumes that q is an ill read queue so that ICMP errors for link-local 6263 * destinations are sent out the correct interface. 6264 * 6265 * Returns -1 if there was an error and mp has been consumed. 6266 * Returns 0 if no special action is needed. 6267 * Returns 1 if the packet contained a router alert option for this node 6268 * which is verified to be "interesting/known" for our implementation. 6269 * 6270 * XXX Note: In future as more hbh or dest options are defined, 6271 * it may be better to have different routines for hbh and dest 6272 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6273 * may have same value in different namespaces. Or is it same namespace ?? 6274 * Current code checks for each opt_type (other than pads) if it is in 6275 * the expected nexthdr (hbh or dest) 6276 */ 6277 static int 6278 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6279 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6280 { 6281 uint8_t opt_type; 6282 uint_t optused; 6283 int ret = 0; 6284 mblk_t *first_mp; 6285 const char *errtype; 6286 zoneid_t zoneid; 6287 ill_t *ill = q->q_ptr; 6288 6289 first_mp = mp; 6290 if (mp->b_datap->db_type == M_CTL) { 6291 mp = mp->b_cont; 6292 } 6293 6294 while (optlen != 0) { 6295 opt_type = *optptr; 6296 if (opt_type == IP6OPT_PAD1) { 6297 optused = 1; 6298 } else { 6299 if (optlen < 2) 6300 goto bad_opt; 6301 errtype = "malformed"; 6302 if (opt_type == ip6opt_ls) { 6303 optused = 2 + optptr[1]; 6304 if (optused > optlen) 6305 goto bad_opt; 6306 } else switch (opt_type) { 6307 case IP6OPT_PADN: 6308 /* 6309 * Note:We don't verify that (N-2) pad octets 6310 * are zero as required by spec. Adhere to 6311 * "be liberal in what you accept..." part of 6312 * implementation philosophy (RFC791,RFC1122) 6313 */ 6314 optused = 2 + optptr[1]; 6315 if (optused > optlen) 6316 goto bad_opt; 6317 break; 6318 6319 case IP6OPT_JUMBO: 6320 if (hdr_type != IPPROTO_HOPOPTS) 6321 goto opt_error; 6322 goto opt_error; /* XXX Not implemented! */ 6323 6324 case IP6OPT_ROUTER_ALERT: { 6325 struct ip6_opt_router *or; 6326 6327 if (hdr_type != IPPROTO_HOPOPTS) 6328 goto opt_error; 6329 optused = 2 + optptr[1]; 6330 if (optused > optlen) 6331 goto bad_opt; 6332 or = (struct ip6_opt_router *)optptr; 6333 /* Check total length and alignment */ 6334 if (optused != sizeof (*or) || 6335 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6336 goto opt_error; 6337 /* Check value */ 6338 switch (*((uint16_t *)or->ip6or_value)) { 6339 case IP6_ALERT_MLD: 6340 case IP6_ALERT_RSVP: 6341 ret = 1; 6342 } 6343 break; 6344 } 6345 case IP6OPT_HOME_ADDRESS: { 6346 /* 6347 * Minimal support for the home address option 6348 * (which is required by all IPv6 nodes). 6349 * Implement by just swapping the home address 6350 * and source address. 6351 * XXX Note: this has IPsec implications since 6352 * AH needs to take this into account. 6353 * Also, when IPsec is used we need to ensure 6354 * that this is only processed once 6355 * in the received packet (to avoid swapping 6356 * back and forth). 6357 * NOTE:This option processing is considered 6358 * to be unsafe and prone to a denial of 6359 * service attack. 6360 * The current processing is not safe even with 6361 * IPsec secured IP packets. Since the home 6362 * address option processing requirement still 6363 * is in the IETF draft and in the process of 6364 * being redefined for its usage, it has been 6365 * decided to turn off the option by default. 6366 * If this section of code needs to be executed, 6367 * ndd variable ip6_ignore_home_address_opt 6368 * should be set to 0 at the user's own risk. 6369 */ 6370 struct ip6_opt_home_address *oh; 6371 in6_addr_t tmp; 6372 6373 if (ipst->ips_ipv6_ignore_home_address_opt) 6374 goto opt_error; 6375 6376 if (hdr_type != IPPROTO_DSTOPTS) 6377 goto opt_error; 6378 optused = 2 + optptr[1]; 6379 if (optused > optlen) 6380 goto bad_opt; 6381 6382 /* 6383 * We did this dest. opt the first time 6384 * around (i.e. before AH processing). 6385 * If we've done AH... stop now. 6386 */ 6387 if (first_mp != mp) { 6388 ipsec_in_t *ii; 6389 6390 ii = (ipsec_in_t *)first_mp->b_rptr; 6391 if (ii->ipsec_in_ah_sa != NULL) 6392 break; 6393 } 6394 6395 oh = (struct ip6_opt_home_address *)optptr; 6396 /* Check total length and alignment */ 6397 if (optused < sizeof (*oh) || 6398 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6399 goto opt_error; 6400 /* Swap ip6_src and the home address */ 6401 tmp = ip6h->ip6_src; 6402 /* XXX Note: only 8 byte alignment option */ 6403 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6404 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6405 break; 6406 } 6407 6408 case IP6OPT_TUNNEL_LIMIT: 6409 if (hdr_type != IPPROTO_DSTOPTS) { 6410 goto opt_error; 6411 } 6412 optused = 2 + optptr[1]; 6413 if (optused > optlen) { 6414 goto bad_opt; 6415 } 6416 if (optused != 3) { 6417 goto opt_error; 6418 } 6419 break; 6420 6421 default: 6422 errtype = "unknown"; 6423 /* FALLTHROUGH */ 6424 opt_error: 6425 /* Determine which zone should send error */ 6426 zoneid = ipif_lookup_addr_zoneid_v6( 6427 &ip6h->ip6_dst, ill, ipst); 6428 switch (IP6OPT_TYPE(opt_type)) { 6429 case IP6OPT_TYPE_SKIP: 6430 optused = 2 + optptr[1]; 6431 if (optused > optlen) 6432 goto bad_opt; 6433 ip1dbg(("ip_process_options_v6: %s " 6434 "opt 0x%x skipped\n", 6435 errtype, opt_type)); 6436 break; 6437 case IP6OPT_TYPE_DISCARD: 6438 ip1dbg(("ip_process_options_v6: %s " 6439 "opt 0x%x; packet dropped\n", 6440 errtype, opt_type)); 6441 freemsg(first_mp); 6442 return (-1); 6443 case IP6OPT_TYPE_ICMP: 6444 if (zoneid == ALL_ZONES) { 6445 freemsg(first_mp); 6446 return (-1); 6447 } 6448 icmp_param_problem_v6(WR(q), first_mp, 6449 ICMP6_PARAMPROB_OPTION, 6450 (uint32_t)(optptr - 6451 (uint8_t *)ip6h), 6452 B_FALSE, B_FALSE, zoneid, ipst); 6453 return (-1); 6454 case IP6OPT_TYPE_FORCEICMP: 6455 if (zoneid == ALL_ZONES) { 6456 freemsg(first_mp); 6457 return (-1); 6458 } 6459 icmp_param_problem_v6(WR(q), first_mp, 6460 ICMP6_PARAMPROB_OPTION, 6461 (uint32_t)(optptr - 6462 (uint8_t *)ip6h), 6463 B_FALSE, B_TRUE, zoneid, ipst); 6464 return (-1); 6465 default: 6466 ASSERT(0); 6467 } 6468 } 6469 } 6470 optlen -= optused; 6471 optptr += optused; 6472 } 6473 return (ret); 6474 6475 bad_opt: 6476 /* Determine which zone should send error */ 6477 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6478 if (zoneid == ALL_ZONES) { 6479 freemsg(first_mp); 6480 } else { 6481 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6482 (uint32_t)(optptr - (uint8_t *)ip6h), 6483 B_FALSE, B_FALSE, zoneid, ipst); 6484 } 6485 return (-1); 6486 } 6487 6488 /* 6489 * Process a routing header that is not yet empty. 6490 * Only handles type 0 routing headers. 6491 */ 6492 static void 6493 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6494 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6495 { 6496 ip6_rthdr0_t *rthdr; 6497 uint_t ehdrlen; 6498 uint_t numaddr; 6499 in6_addr_t *addrptr; 6500 in6_addr_t tmp; 6501 ip_stack_t *ipst = ill->ill_ipst; 6502 6503 ASSERT(rth->ip6r_segleft != 0); 6504 6505 if (!ipst->ips_ipv6_forward_src_routed) { 6506 /* XXX Check for source routed out same interface? */ 6507 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6508 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6509 freemsg(hada_mp); 6510 freemsg(mp); 6511 return; 6512 } 6513 6514 if (rth->ip6r_type != 0) { 6515 if (hada_mp != NULL) 6516 goto hada_drop; 6517 /* Sent by forwarding path, and router is global zone */ 6518 icmp_param_problem_v6(WR(q), mp, 6519 ICMP6_PARAMPROB_HEADER, 6520 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6521 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6522 return; 6523 } 6524 rthdr = (ip6_rthdr0_t *)rth; 6525 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6526 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6527 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6528 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6529 if (rthdr->ip6r0_len & 0x1) { 6530 /* An odd length is impossible */ 6531 if (hada_mp != NULL) 6532 goto hada_drop; 6533 /* Sent by forwarding path, and router is global zone */ 6534 icmp_param_problem_v6(WR(q), mp, 6535 ICMP6_PARAMPROB_HEADER, 6536 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6537 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6538 return; 6539 } 6540 numaddr = rthdr->ip6r0_len / 2; 6541 if (rthdr->ip6r0_segleft > numaddr) { 6542 /* segleft exceeds number of addresses in routing header */ 6543 if (hada_mp != NULL) 6544 goto hada_drop; 6545 /* Sent by forwarding path, and router is global zone */ 6546 icmp_param_problem_v6(WR(q), mp, 6547 ICMP6_PARAMPROB_HEADER, 6548 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6549 (uchar_t *)ip6h), 6550 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6551 return; 6552 } 6553 addrptr += (numaddr - rthdr->ip6r0_segleft); 6554 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6555 IN6_IS_ADDR_MULTICAST(addrptr)) { 6556 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6557 freemsg(hada_mp); 6558 freemsg(mp); 6559 return; 6560 } 6561 /* Swap */ 6562 tmp = *addrptr; 6563 *addrptr = ip6h->ip6_dst; 6564 ip6h->ip6_dst = tmp; 6565 rthdr->ip6r0_segleft--; 6566 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6567 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6568 if (hada_mp != NULL) 6569 goto hada_drop; 6570 /* Sent by forwarding path, and router is global zone */ 6571 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6572 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6573 return; 6574 } 6575 if (ip_check_v6_mblk(mp, ill) == 0) { 6576 ip6h = (ip6_t *)mp->b_rptr; 6577 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6578 } 6579 return; 6580 hada_drop: 6581 /* IPsec kstats: bean counter? */ 6582 freemsg(hada_mp); 6583 freemsg(mp); 6584 } 6585 6586 /* 6587 * Read side put procedure for IPv6 module. 6588 */ 6589 void 6590 ip_rput_v6(queue_t *q, mblk_t *mp) 6591 { 6592 mblk_t *first_mp; 6593 mblk_t *hada_mp = NULL; 6594 ip6_t *ip6h; 6595 boolean_t ll_multicast = B_FALSE; 6596 boolean_t mctl_present = B_FALSE; 6597 ill_t *ill; 6598 struct iocblk *iocp; 6599 uint_t flags = 0; 6600 mblk_t *dl_mp; 6601 ip_stack_t *ipst; 6602 6603 ill = (ill_t *)q->q_ptr; 6604 ipst = ill->ill_ipst; 6605 if (ill->ill_state_flags & ILL_CONDEMNED) { 6606 union DL_primitives *dl; 6607 6608 dl = (union DL_primitives *)mp->b_rptr; 6609 /* 6610 * Things are opening or closing - only accept DLPI 6611 * ack messages. If the stream is closing and ip_wsrv 6612 * has completed, ip_close is out of the qwait, but has 6613 * not yet completed qprocsoff. Don't proceed any further 6614 * because the ill has been cleaned up and things hanging 6615 * off the ill have been freed. 6616 */ 6617 if ((mp->b_datap->db_type != M_PCPROTO) || 6618 (dl->dl_primitive == DL_UNITDATA_IND)) { 6619 inet_freemsg(mp); 6620 return; 6621 } 6622 } 6623 6624 dl_mp = NULL; 6625 switch (mp->b_datap->db_type) { 6626 case M_DATA: { 6627 int hlen; 6628 uchar_t *ucp; 6629 struct ether_header *eh; 6630 dl_unitdata_ind_t *dui; 6631 6632 /* 6633 * This is a work-around for CR 6451644, a bug in Nemo. It 6634 * should be removed when that problem is fixed. 6635 */ 6636 if (ill->ill_mactype == DL_ETHER && 6637 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6638 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6639 ucp[-2] == (IP6_DL_SAP >> 8)) { 6640 if (hlen >= sizeof (struct ether_vlan_header) && 6641 ucp[-5] == 0 && ucp[-6] == 0x81) 6642 ucp -= sizeof (struct ether_vlan_header); 6643 else 6644 ucp -= sizeof (struct ether_header); 6645 /* 6646 * If it's a group address, then fabricate a 6647 * DL_UNITDATA_IND message. 6648 */ 6649 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6650 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6651 BPRI_HI)) != NULL) { 6652 eh = (struct ether_header *)ucp; 6653 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6654 DB_TYPE(dl_mp) = M_PROTO; 6655 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6656 dui->dl_primitive = DL_UNITDATA_IND; 6657 dui->dl_dest_addr_length = 8; 6658 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6659 dui->dl_src_addr_length = 8; 6660 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6661 8; 6662 dui->dl_group_address = 1; 6663 ucp = (uchar_t *)(dui + 1); 6664 if (ill->ill_sap_length > 0) 6665 ucp += ill->ill_sap_length; 6666 bcopy(&eh->ether_dhost, ucp, 6); 6667 bcopy(&eh->ether_shost, ucp + 8, 6); 6668 ucp = (uchar_t *)(dui + 1); 6669 if (ill->ill_sap_length < 0) 6670 ucp += 8 + ill->ill_sap_length; 6671 bcopy(&eh->ether_type, ucp, 2); 6672 bcopy(&eh->ether_type, ucp + 8, 2); 6673 } 6674 } 6675 break; 6676 } 6677 6678 case M_PROTO: 6679 case M_PCPROTO: 6680 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6681 DL_UNITDATA_IND) { 6682 /* Go handle anything other than data elsewhere. */ 6683 ip_rput_dlpi(q, mp); 6684 return; 6685 } 6686 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6687 6688 /* Save the DLPI header. */ 6689 dl_mp = mp; 6690 mp = mp->b_cont; 6691 dl_mp->b_cont = NULL; 6692 break; 6693 case M_BREAK: 6694 panic("ip_rput_v6: got an M_BREAK"); 6695 /*NOTREACHED*/ 6696 case M_IOCACK: 6697 iocp = (struct iocblk *)mp->b_rptr; 6698 switch (iocp->ioc_cmd) { 6699 case DL_IOC_HDR_INFO: 6700 ill = (ill_t *)q->q_ptr; 6701 ill_fastpath_ack(ill, mp); 6702 return; 6703 6704 case SIOCGTUNPARAM: 6705 case OSIOCGTUNPARAM: 6706 ip_rput_other(NULL, q, mp, NULL); 6707 return; 6708 6709 case SIOCSTUNPARAM: 6710 case OSIOCSTUNPARAM: 6711 /* Go through qwriter */ 6712 break; 6713 default: 6714 putnext(q, mp); 6715 return; 6716 } 6717 /* FALLTHRU */ 6718 case M_ERROR: 6719 case M_HANGUP: 6720 mutex_enter(&ill->ill_lock); 6721 if (ill->ill_state_flags & ILL_CONDEMNED) { 6722 mutex_exit(&ill->ill_lock); 6723 freemsg(mp); 6724 return; 6725 } 6726 ill_refhold_locked(ill); 6727 mutex_exit(&ill->ill_lock); 6728 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6729 return; 6730 case M_CTL: 6731 if ((MBLKL(mp) > sizeof (int)) && 6732 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6733 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6734 mctl_present = B_TRUE; 6735 break; 6736 } 6737 putnext(q, mp); 6738 return; 6739 case M_IOCNAK: 6740 iocp = (struct iocblk *)mp->b_rptr; 6741 switch (iocp->ioc_cmd) { 6742 case DL_IOC_HDR_INFO: 6743 case SIOCGTUNPARAM: 6744 case OSIOCGTUNPARAM: 6745 ip_rput_other(NULL, q, mp, NULL); 6746 return; 6747 6748 case SIOCSTUNPARAM: 6749 case OSIOCSTUNPARAM: 6750 mutex_enter(&ill->ill_lock); 6751 if (ill->ill_state_flags & ILL_CONDEMNED) { 6752 mutex_exit(&ill->ill_lock); 6753 freemsg(mp); 6754 return; 6755 } 6756 ill_refhold_locked(ill); 6757 mutex_exit(&ill->ill_lock); 6758 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6759 return; 6760 default: 6761 break; 6762 } 6763 /* FALLTHRU */ 6764 default: 6765 putnext(q, mp); 6766 return; 6767 } 6768 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6769 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6770 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6771 /* 6772 * if db_ref > 1 then copymsg and free original. Packet may be 6773 * changed and do not want other entity who has a reference to this 6774 * message to trip over the changes. This is a blind change because 6775 * trying to catch all places that might change packet is too 6776 * difficult (since it may be a module above this one). 6777 */ 6778 if (mp->b_datap->db_ref > 1) { 6779 mblk_t *mp1; 6780 6781 mp1 = copymsg(mp); 6782 freemsg(mp); 6783 if (mp1 == NULL) { 6784 first_mp = NULL; 6785 goto discard; 6786 } 6787 mp = mp1; 6788 } 6789 first_mp = mp; 6790 if (mctl_present) { 6791 hada_mp = first_mp; 6792 mp = first_mp->b_cont; 6793 } 6794 6795 if (ip_check_v6_mblk(mp, ill) == -1) 6796 return; 6797 6798 ip6h = (ip6_t *)mp->b_rptr; 6799 6800 DTRACE_PROBE4(ip6__physical__in__start, 6801 ill_t *, ill, ill_t *, NULL, 6802 ip6_t *, ip6h, mblk_t *, first_mp); 6803 6804 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6805 ipst->ips_ipv6firewall_physical_in, 6806 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6807 6808 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6809 6810 if (first_mp == NULL) 6811 return; 6812 6813 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6814 IPV6_DEFAULT_VERS_AND_FLOW) { 6815 /* 6816 * It may be a bit too expensive to do this mapped address 6817 * check here, but in the interest of robustness, it seems 6818 * like the correct place. 6819 * TODO: Avoid this check for e.g. connected TCP sockets 6820 */ 6821 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6822 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6823 goto discard; 6824 } 6825 6826 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6827 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6828 goto discard; 6829 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6830 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6831 goto discard; 6832 } 6833 6834 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6835 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6836 } else { 6837 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6838 goto discard; 6839 } 6840 freemsg(dl_mp); 6841 return; 6842 6843 discard: 6844 if (dl_mp != NULL) 6845 freeb(dl_mp); 6846 freemsg(first_mp); 6847 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6848 } 6849 6850 /* 6851 * Walk through the IPv6 packet in mp and see if there's an AH header 6852 * in it. See if the AH header needs to get done before other headers in 6853 * the packet. (Worker function for ipsec_early_ah_v6().) 6854 */ 6855 #define IPSEC_HDR_DONT_PROCESS 0 6856 #define IPSEC_HDR_PROCESS 1 6857 #define IPSEC_MEMORY_ERROR 2 6858 static int 6859 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6860 { 6861 uint_t length; 6862 uint_t ehdrlen; 6863 uint8_t *whereptr; 6864 uint8_t *endptr; 6865 uint8_t *nexthdrp; 6866 ip6_dest_t *desthdr; 6867 ip6_rthdr_t *rthdr; 6868 ip6_t *ip6h; 6869 6870 /* 6871 * For now just pullup everything. In general, the less pullups, 6872 * the better, but there's so much squirrelling through anyway, 6873 * it's just easier this way. 6874 */ 6875 if (!pullupmsg(mp, -1)) { 6876 return (IPSEC_MEMORY_ERROR); 6877 } 6878 6879 ip6h = (ip6_t *)mp->b_rptr; 6880 length = IPV6_HDR_LEN; 6881 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6882 endptr = mp->b_wptr; 6883 6884 /* 6885 * We can't just use the argument nexthdr in the place 6886 * of nexthdrp becaue we don't dereference nexthdrp 6887 * till we confirm whether it is a valid address. 6888 */ 6889 nexthdrp = &ip6h->ip6_nxt; 6890 while (whereptr < endptr) { 6891 /* Is there enough left for len + nexthdr? */ 6892 if (whereptr + MIN_EHDR_LEN > endptr) 6893 return (IPSEC_MEMORY_ERROR); 6894 6895 switch (*nexthdrp) { 6896 case IPPROTO_HOPOPTS: 6897 case IPPROTO_DSTOPTS: 6898 /* Assumes the headers are identical for hbh and dst */ 6899 desthdr = (ip6_dest_t *)whereptr; 6900 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6901 if ((uchar_t *)desthdr + ehdrlen > endptr) 6902 return (IPSEC_MEMORY_ERROR); 6903 /* 6904 * Return DONT_PROCESS because the destination 6905 * options header may be for each hop in a 6906 * routing-header, and we only want AH if we're 6907 * finished with routing headers. 6908 */ 6909 if (*nexthdrp == IPPROTO_DSTOPTS) 6910 return (IPSEC_HDR_DONT_PROCESS); 6911 nexthdrp = &desthdr->ip6d_nxt; 6912 break; 6913 case IPPROTO_ROUTING: 6914 rthdr = (ip6_rthdr_t *)whereptr; 6915 6916 /* 6917 * If there's more hops left on the routing header, 6918 * return now with DON'T PROCESS. 6919 */ 6920 if (rthdr->ip6r_segleft > 0) 6921 return (IPSEC_HDR_DONT_PROCESS); 6922 6923 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6924 if ((uchar_t *)rthdr + ehdrlen > endptr) 6925 return (IPSEC_MEMORY_ERROR); 6926 nexthdrp = &rthdr->ip6r_nxt; 6927 break; 6928 case IPPROTO_FRAGMENT: 6929 /* Wait for reassembly */ 6930 return (IPSEC_HDR_DONT_PROCESS); 6931 case IPPROTO_AH: 6932 *nexthdr = IPPROTO_AH; 6933 return (IPSEC_HDR_PROCESS); 6934 case IPPROTO_NONE: 6935 /* No next header means we're finished */ 6936 default: 6937 return (IPSEC_HDR_DONT_PROCESS); 6938 } 6939 length += ehdrlen; 6940 whereptr += ehdrlen; 6941 } 6942 panic("ipsec_needs_processing_v6"); 6943 /*NOTREACHED*/ 6944 } 6945 6946 /* 6947 * Path for AH if options are present. If this is the first time we are 6948 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6949 * Otherwise, just fanout. Return value answers the boolean question: 6950 * "Did I consume the mblk you sent me?" 6951 * 6952 * Sometimes AH needs to be done before other IPv6 headers for security 6953 * reasons. This function (and its ipsec_needs_processing_v6() above) 6954 * indicates if that is so, and fans out to the appropriate IPsec protocol 6955 * for the datagram passed in. 6956 */ 6957 static boolean_t 6958 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6959 ill_t *ill, mblk_t *hada_mp, zoneid_t zoneid) 6960 { 6961 mblk_t *mp; 6962 uint8_t nexthdr; 6963 ipsec_in_t *ii = NULL; 6964 ah_t *ah; 6965 ipsec_status_t ipsec_rc; 6966 ip_stack_t *ipst = ill->ill_ipst; 6967 netstack_t *ns = ipst->ips_netstack; 6968 ipsec_stack_t *ipss = ns->netstack_ipsec; 6969 6970 ASSERT((hada_mp == NULL) || (!mctl_present)); 6971 6972 switch (ipsec_needs_processing_v6( 6973 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6974 case IPSEC_MEMORY_ERROR: 6975 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6976 freemsg(hada_mp); 6977 freemsg(first_mp); 6978 return (B_TRUE); 6979 case IPSEC_HDR_DONT_PROCESS: 6980 return (B_FALSE); 6981 } 6982 6983 /* Default means send it to AH! */ 6984 ASSERT(nexthdr == IPPROTO_AH); 6985 if (!mctl_present) { 6986 mp = first_mp; 6987 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 6988 if (first_mp == NULL) { 6989 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6990 "allocation failure.\n")); 6991 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6992 freemsg(hada_mp); 6993 freemsg(mp); 6994 return (B_TRUE); 6995 } 6996 /* 6997 * Store the ill_index so that when we come back 6998 * from IPSEC we ride on the same queue. 6999 */ 7000 ii = (ipsec_in_t *)first_mp->b_rptr; 7001 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7002 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7003 first_mp->b_cont = mp; 7004 } 7005 /* 7006 * Cache hardware acceleration info. 7007 */ 7008 if (hada_mp != NULL) { 7009 ASSERT(ii != NULL); 7010 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7011 "caching data attr.\n")); 7012 ii->ipsec_in_accelerated = B_TRUE; 7013 ii->ipsec_in_da = hada_mp; 7014 } 7015 7016 if (!ipsec_loaded(ipss)) { 7017 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 7018 return (B_TRUE); 7019 } 7020 7021 ah = ipsec_inbound_ah_sa(first_mp, ns); 7022 if (ah == NULL) 7023 return (B_TRUE); 7024 ASSERT(ii->ipsec_in_ah_sa != NULL); 7025 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7026 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7027 7028 switch (ipsec_rc) { 7029 case IPSEC_STATUS_SUCCESS: 7030 /* we're done with IPsec processing, send it up */ 7031 ip_fanout_proto_again(first_mp, ill, ill, NULL); 7032 break; 7033 case IPSEC_STATUS_FAILED: 7034 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 7035 break; 7036 case IPSEC_STATUS_PENDING: 7037 /* no action needed */ 7038 break; 7039 } 7040 return (B_TRUE); 7041 } 7042 7043 /* 7044 * Validate the IPv6 mblk for alignment. 7045 */ 7046 int 7047 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7048 { 7049 int pkt_len, ip6_len; 7050 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7051 7052 /* check for alignment and full IPv6 header */ 7053 if (!OK_32PTR((uchar_t *)ip6h) || 7054 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7055 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7056 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7057 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7058 freemsg(mp); 7059 return (-1); 7060 } 7061 ip6h = (ip6_t *)mp->b_rptr; 7062 } 7063 7064 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7065 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7066 7067 if (mp->b_cont == NULL) 7068 pkt_len = mp->b_wptr - mp->b_rptr; 7069 else 7070 pkt_len = msgdsize(mp); 7071 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7072 7073 /* 7074 * Check for bogus (too short packet) and packet which 7075 * was padded by the link layer. 7076 */ 7077 if (ip6_len != pkt_len) { 7078 ssize_t diff; 7079 7080 if (ip6_len > pkt_len) { 7081 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7082 ip6_len, pkt_len)); 7083 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7084 freemsg(mp); 7085 return (-1); 7086 } 7087 diff = (ssize_t)(pkt_len - ip6_len); 7088 7089 if (!adjmsg(mp, -diff)) { 7090 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7091 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7092 freemsg(mp); 7093 return (-1); 7094 } 7095 } 7096 return (0); 7097 } 7098 7099 /* 7100 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7101 * ip_rput_v6 has already verified alignment, the min length, the version, 7102 * and db_ref = 1. 7103 * 7104 * The ill passed in (the arg named inill) is the ill that the packet 7105 * actually arrived on. We need to remember this when saving the 7106 * input interface index into potential IPV6_PKTINFO data in 7107 * ip_add_info_v6(). 7108 * 7109 * This routine doesn't free dl_mp; that's the caller's responsibility on 7110 * return. (Note that the callers are complex enough that there's no tail 7111 * recursion here anyway.) 7112 */ 7113 void 7114 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7115 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7116 { 7117 ire_t *ire = NULL; 7118 ill_t *ill = inill; 7119 ill_t *outill; 7120 ipif_t *ipif; 7121 uint8_t *whereptr; 7122 uint8_t nexthdr; 7123 uint16_t remlen; 7124 uint_t prev_nexthdr_offset; 7125 uint_t used; 7126 size_t old_pkt_len; 7127 size_t pkt_len; 7128 uint16_t ip6_len; 7129 uint_t hdr_len; 7130 boolean_t mctl_present; 7131 mblk_t *first_mp; 7132 mblk_t *first_mp1; 7133 boolean_t no_forward; 7134 ip6_hbh_t *hbhhdr; 7135 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7136 conn_t *connp; 7137 ilm_t *ilm; 7138 uint32_t ports; 7139 zoneid_t zoneid = GLOBAL_ZONEID; 7140 uint16_t hck_flags, reass_hck_flags; 7141 uint32_t reass_sum; 7142 boolean_t cksum_err; 7143 mblk_t *mp1; 7144 ip_stack_t *ipst = inill->ill_ipst; 7145 7146 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7147 7148 if (hada_mp != NULL) { 7149 /* 7150 * It's an IPsec accelerated packet. 7151 * Keep a pointer to the data attributes around until 7152 * we allocate the ipsecinfo structure. 7153 */ 7154 IPSECHW_DEBUG(IPSECHW_PKT, 7155 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7156 hada_mp->b_cont = NULL; 7157 /* 7158 * Since it is accelerated, it came directly from 7159 * the ill. 7160 */ 7161 ASSERT(mctl_present == B_FALSE); 7162 ASSERT(mp->b_datap->db_type != M_CTL); 7163 } 7164 7165 ip6h = (ip6_t *)mp->b_rptr; 7166 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7167 old_pkt_len = pkt_len = ip6_len; 7168 7169 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7170 hck_flags = DB_CKSUMFLAGS(mp); 7171 else 7172 hck_flags = 0; 7173 7174 /* Clear checksum flags in case we need to forward */ 7175 DB_CKSUMFLAGS(mp) = 0; 7176 reass_sum = reass_hck_flags = 0; 7177 7178 nexthdr = ip6h->ip6_nxt; 7179 7180 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7181 (uchar_t *)ip6h); 7182 whereptr = (uint8_t *)&ip6h[1]; 7183 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7184 7185 /* Process hop by hop header options */ 7186 if (nexthdr == IPPROTO_HOPOPTS) { 7187 uint_t ehdrlen; 7188 uint8_t *optptr; 7189 7190 if (remlen < MIN_EHDR_LEN) 7191 goto pkt_too_short; 7192 if (mp->b_cont != NULL && 7193 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7194 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7195 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7196 freemsg(hada_mp); 7197 freemsg(first_mp); 7198 return; 7199 } 7200 ip6h = (ip6_t *)mp->b_rptr; 7201 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7202 } 7203 hbhhdr = (ip6_hbh_t *)whereptr; 7204 nexthdr = hbhhdr->ip6h_nxt; 7205 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7206 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7207 7208 if (remlen < ehdrlen) 7209 goto pkt_too_short; 7210 if (mp->b_cont != NULL && 7211 whereptr + ehdrlen > mp->b_wptr) { 7212 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7213 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7214 freemsg(hada_mp); 7215 freemsg(first_mp); 7216 return; 7217 } 7218 ip6h = (ip6_t *)mp->b_rptr; 7219 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7220 hbhhdr = (ip6_hbh_t *)whereptr; 7221 } 7222 7223 optptr = whereptr + 2; 7224 whereptr += ehdrlen; 7225 remlen -= ehdrlen; 7226 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7227 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7228 case -1: 7229 /* 7230 * Packet has been consumed and any 7231 * needed ICMP messages sent. 7232 */ 7233 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7234 freemsg(hada_mp); 7235 return; 7236 case 0: 7237 /* no action needed */ 7238 break; 7239 case 1: 7240 /* Known router alert */ 7241 goto ipv6forus; 7242 } 7243 } 7244 7245 /* 7246 * Attach any necessary label information to this packet. 7247 */ 7248 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7249 if (ip6opt_ls != 0) 7250 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7251 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7252 freemsg(hada_mp); 7253 freemsg(first_mp); 7254 return; 7255 } 7256 7257 /* 7258 * On incoming v6 multicast packets we will bypass the ire table, 7259 * and assume that the read queue corresponds to the targetted 7260 * interface. 7261 * 7262 * The effect of this is the same as the IPv4 original code, but is 7263 * much cleaner I think. See ip_rput for how that was done. 7264 */ 7265 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7266 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7267 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7268 /* 7269 * XXX TODO Give to mrouted to for multicast forwarding. 7270 */ 7271 ILM_WALKER_HOLD(ill); 7272 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7273 ILM_WALKER_RELE(ill); 7274 if (ilm == NULL) { 7275 if (ip_debug > 3) { 7276 /* ip2dbg */ 7277 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7278 " which is not for us: %s\n", AF_INET6, 7279 &ip6h->ip6_dst); 7280 } 7281 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7282 freemsg(hada_mp); 7283 freemsg(first_mp); 7284 return; 7285 } 7286 if (ip_debug > 3) { 7287 /* ip2dbg */ 7288 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7289 AF_INET6, &ip6h->ip6_dst); 7290 } 7291 zoneid = GLOBAL_ZONEID; 7292 goto ipv6forus; 7293 } 7294 7295 ipif = ill->ill_ipif; 7296 7297 /* 7298 * If a packet was received on an interface that is a 6to4 tunnel, 7299 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7300 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7301 * the 6to4 prefix of the address configured on the receiving interface. 7302 * Otherwise, the packet was delivered to this interface in error and 7303 * the packet must be dropped. 7304 */ 7305 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7306 7307 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7308 &ip6h->ip6_dst)) { 7309 if (ip_debug > 2) { 7310 /* ip1dbg */ 7311 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7312 "addressed packet which is not for us: " 7313 "%s\n", AF_INET6, &ip6h->ip6_dst); 7314 } 7315 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7316 freemsg(first_mp); 7317 return; 7318 } 7319 } 7320 7321 /* 7322 * Find an ire that matches destination. For link-local addresses 7323 * we have to match the ill. 7324 * TBD for site local addresses. 7325 */ 7326 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7327 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7328 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7329 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 7330 } else { 7331 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7332 MBLK_GETLABEL(mp), ipst); 7333 7334 if (ire != NULL && ire->ire_stq != NULL && 7335 ire->ire_zoneid != GLOBAL_ZONEID && 7336 ire->ire_zoneid != ALL_ZONES) { 7337 /* 7338 * Should only use IREs that are visible from the 7339 * global zone for forwarding. 7340 */ 7341 ire_refrele(ire); 7342 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7343 GLOBAL_ZONEID, MBLK_GETLABEL(mp), ipst); 7344 } 7345 } 7346 7347 if (ire == NULL) { 7348 /* 7349 * No matching IRE found. Mark this packet as having 7350 * originated externally. 7351 */ 7352 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7353 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7354 if (!(ill->ill_flags & ILLF_ROUTER)) { 7355 BUMP_MIB(ill->ill_ip_mib, 7356 ipIfStatsInAddrErrors); 7357 } 7358 freemsg(hada_mp); 7359 freemsg(first_mp); 7360 return; 7361 } 7362 if (ip6h->ip6_hops <= 1) { 7363 if (hada_mp != NULL) 7364 goto hada_drop; 7365 /* Sent by forwarding path, and router is global zone */ 7366 icmp_time_exceeded_v6(WR(q), first_mp, 7367 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7368 GLOBAL_ZONEID, ipst); 7369 return; 7370 } 7371 /* 7372 * Per RFC 3513 section 2.5.2, we must not forward packets with 7373 * an unspecified source address. 7374 */ 7375 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7376 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7377 freemsg(hada_mp); 7378 freemsg(first_mp); 7379 return; 7380 } 7381 mp->b_prev = (mblk_t *)(uintptr_t) 7382 ill->ill_phyint->phyint_ifindex; 7383 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7384 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7385 GLOBAL_ZONEID, ipst); 7386 return; 7387 } 7388 /* we have a matching IRE */ 7389 if (ire->ire_stq != NULL) { 7390 ill_group_t *ill_group; 7391 ill_group_t *ire_group; 7392 7393 /* 7394 * To be quicker, we may wish not to chase pointers 7395 * (ire->ire_ipif->ipif_ill...) and instead store the 7396 * forwarding policy in the ire. An unfortunate side- 7397 * effect of this would be requiring an ire flush whenever 7398 * the ILLF_ROUTER flag changes. For now, chase pointers 7399 * once and store in the boolean no_forward. 7400 * 7401 * This appears twice to keep it out of the non-forwarding, 7402 * yes-it's-for-us-on-the-right-interface case. 7403 */ 7404 no_forward = ((ill->ill_flags & 7405 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7406 7407 7408 ASSERT(first_mp == mp); 7409 /* 7410 * This ire has a send-to queue - forward the packet. 7411 */ 7412 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7413 freemsg(hada_mp); 7414 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7415 if (no_forward) { 7416 BUMP_MIB(ill->ill_ip_mib, 7417 ipIfStatsInAddrErrors); 7418 } 7419 freemsg(mp); 7420 ire_refrele(ire); 7421 return; 7422 } 7423 /* 7424 * ipIfStatsHCInForwDatagrams should only be increment if there 7425 * will be an attempt to forward the packet, which is why we 7426 * increment after the above condition has been checked. 7427 */ 7428 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7429 if (ip6h->ip6_hops <= 1) { 7430 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7431 /* Sent by forwarding path, and router is global zone */ 7432 icmp_time_exceeded_v6(WR(q), mp, 7433 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7434 GLOBAL_ZONEID, ipst); 7435 ire_refrele(ire); 7436 return; 7437 } 7438 /* 7439 * Per RFC 3513 section 2.5.2, we must not forward packets with 7440 * an unspecified source address. 7441 */ 7442 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7443 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7444 freemsg(mp); 7445 ire_refrele(ire); 7446 return; 7447 } 7448 7449 if (is_system_labeled()) { 7450 mblk_t *mp1; 7451 7452 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7453 BUMP_MIB(ill->ill_ip_mib, 7454 ipIfStatsForwProhibits); 7455 freemsg(mp); 7456 ire_refrele(ire); 7457 return; 7458 } 7459 /* Size may have changed */ 7460 mp = mp1; 7461 ip6h = (ip6_t *)mp->b_rptr; 7462 pkt_len = msgdsize(mp); 7463 } 7464 7465 if (pkt_len > ire->ire_max_frag) { 7466 int max_frag = ire->ire_max_frag; 7467 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7468 /* 7469 * Handle labeled packet resizing. 7470 */ 7471 if (is_system_labeled()) { 7472 max_frag = tsol_pmtu_adjust(mp, max_frag, 7473 pkt_len - old_pkt_len, AF_INET6); 7474 } 7475 7476 /* Sent by forwarding path, and router is global zone */ 7477 icmp_pkt2big_v6(WR(q), mp, max_frag, 7478 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7479 ire_refrele(ire); 7480 return; 7481 } 7482 7483 /* 7484 * Check to see if we're forwarding the packet to a 7485 * different link from which it came. If so, check the 7486 * source and destination addresses since routers must not 7487 * forward any packets with link-local source or 7488 * destination addresses to other links. Otherwise (if 7489 * we're forwarding onto the same link), conditionally send 7490 * a redirect message. 7491 */ 7492 ill_group = ill->ill_group; 7493 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7494 if (ire->ire_rfq != q && (ill_group == NULL || 7495 ill_group != ire_group)) { 7496 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7497 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7498 BUMP_MIB(ill->ill_ip_mib, 7499 ipIfStatsInAddrErrors); 7500 freemsg(mp); 7501 ire_refrele(ire); 7502 return; 7503 } 7504 /* TBD add site-local check at site boundary? */ 7505 } else if (ipst->ips_ipv6_send_redirects) { 7506 in6_addr_t *v6targ; 7507 in6_addr_t gw_addr_v6; 7508 ire_t *src_ire_v6 = NULL; 7509 7510 /* 7511 * Don't send a redirect when forwarding a source 7512 * routed packet. 7513 */ 7514 if (ip_source_routed_v6(ip6h, mp, ipst)) 7515 goto forward; 7516 7517 mutex_enter(&ire->ire_lock); 7518 gw_addr_v6 = ire->ire_gateway_addr_v6; 7519 mutex_exit(&ire->ire_lock); 7520 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7521 v6targ = &gw_addr_v6; 7522 /* 7523 * We won't send redirects to a router 7524 * that doesn't have a link local 7525 * address, but will forward. 7526 */ 7527 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7528 BUMP_MIB(ill->ill_ip_mib, 7529 ipIfStatsInAddrErrors); 7530 goto forward; 7531 } 7532 } else { 7533 v6targ = &ip6h->ip6_dst; 7534 } 7535 7536 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7537 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7538 GLOBAL_ZONEID, 0, NULL, 7539 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7540 ipst); 7541 7542 if (src_ire_v6 != NULL) { 7543 /* 7544 * The source is directly connected. 7545 */ 7546 mp1 = copymsg(mp); 7547 if (mp1 != NULL) { 7548 icmp_send_redirect_v6(WR(q), 7549 mp1, v6targ, &ip6h->ip6_dst, 7550 ill, B_FALSE); 7551 } 7552 ire_refrele(src_ire_v6); 7553 } 7554 } 7555 7556 forward: 7557 /* Hoplimit verified above */ 7558 ip6h->ip6_hops--; 7559 7560 outill = ire->ire_ipif->ipif_ill; 7561 7562 DTRACE_PROBE4(ip6__forwarding__start, 7563 ill_t *, inill, ill_t *, outill, 7564 ip6_t *, ip6h, mblk_t *, mp); 7565 7566 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7567 ipst->ips_ipv6firewall_forwarding, 7568 inill, outill, ip6h, mp, mp, 0, ipst); 7569 7570 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7571 7572 if (mp != NULL) { 7573 UPDATE_IB_PKT_COUNT(ire); 7574 ire->ire_last_used_time = lbolt; 7575 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7576 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7577 } 7578 IRE_REFRELE(ire); 7579 return; 7580 } 7581 7582 /* 7583 * Need to put on correct queue for reassembly to find it. 7584 * No need to use put() since reassembly has its own locks. 7585 * Note: multicast packets and packets destined to addresses 7586 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7587 * the arriving ill. Unlike the IPv4 case, enabling strict 7588 * destination multihoming will prevent accepting packets 7589 * addressed to an IRE_LOCAL on lo0. 7590 */ 7591 if (ire->ire_rfq != q) { 7592 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7593 == NULL) { 7594 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7595 freemsg(hada_mp); 7596 freemsg(first_mp); 7597 return; 7598 } 7599 if (ire->ire_rfq != NULL) { 7600 q = ire->ire_rfq; 7601 ill = (ill_t *)q->q_ptr; 7602 ASSERT(ill != NULL); 7603 } 7604 } 7605 7606 zoneid = ire->ire_zoneid; 7607 UPDATE_IB_PKT_COUNT(ire); 7608 ire->ire_last_used_time = lbolt; 7609 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7610 ire_refrele(ire); 7611 ire = NULL; 7612 ipv6forus: 7613 /* 7614 * Looks like this packet is for us one way or another. 7615 * This is where we'll process destination headers etc. 7616 */ 7617 for (; ; ) { 7618 switch (nexthdr) { 7619 case IPPROTO_TCP: { 7620 uint16_t *up; 7621 uint32_t sum; 7622 int offset; 7623 7624 hdr_len = pkt_len - remlen; 7625 7626 if (hada_mp != NULL) { 7627 ip0dbg(("tcp hada drop\n")); 7628 goto hada_drop; 7629 } 7630 7631 7632 /* TCP needs all of the TCP header */ 7633 if (remlen < TCP_MIN_HEADER_LENGTH) 7634 goto pkt_too_short; 7635 if (mp->b_cont != NULL && 7636 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7637 if (!pullupmsg(mp, 7638 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7639 BUMP_MIB(ill->ill_ip_mib, 7640 ipIfStatsInDiscards); 7641 freemsg(first_mp); 7642 return; 7643 } 7644 hck_flags = 0; 7645 ip6h = (ip6_t *)mp->b_rptr; 7646 whereptr = (uint8_t *)ip6h + hdr_len; 7647 } 7648 /* 7649 * Extract the offset field from the TCP header. 7650 */ 7651 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7652 if (offset != 5) { 7653 if (offset < 5) { 7654 ip1dbg(("ip_rput_data_v6: short " 7655 "TCP data offset")); 7656 BUMP_MIB(ill->ill_ip_mib, 7657 ipIfStatsInDiscards); 7658 freemsg(first_mp); 7659 return; 7660 } 7661 /* 7662 * There must be TCP options. 7663 * Make sure we can grab them. 7664 */ 7665 offset <<= 2; 7666 if (remlen < offset) 7667 goto pkt_too_short; 7668 if (mp->b_cont != NULL && 7669 whereptr + offset > mp->b_wptr) { 7670 if (!pullupmsg(mp, 7671 hdr_len + offset)) { 7672 BUMP_MIB(ill->ill_ip_mib, 7673 ipIfStatsInDiscards); 7674 freemsg(first_mp); 7675 return; 7676 } 7677 hck_flags = 0; 7678 ip6h = (ip6_t *)mp->b_rptr; 7679 whereptr = (uint8_t *)ip6h + hdr_len; 7680 } 7681 } 7682 7683 up = (uint16_t *)&ip6h->ip6_src; 7684 /* 7685 * TCP checksum calculation. First sum up the 7686 * pseudo-header fields: 7687 * - Source IPv6 address 7688 * - Destination IPv6 address 7689 * - TCP payload length 7690 * - TCP protocol ID 7691 */ 7692 sum = htons(IPPROTO_TCP + remlen) + 7693 up[0] + up[1] + up[2] + up[3] + 7694 up[4] + up[5] + up[6] + up[7] + 7695 up[8] + up[9] + up[10] + up[11] + 7696 up[12] + up[13] + up[14] + up[15]; 7697 7698 /* Fold initial sum */ 7699 sum = (sum & 0xffff) + (sum >> 16); 7700 7701 mp1 = mp->b_cont; 7702 7703 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7704 IP6_STAT(ipst, ip6_in_sw_cksum); 7705 7706 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7707 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7708 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7709 mp, mp1, cksum_err); 7710 7711 if (cksum_err) { 7712 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7713 7714 if (hck_flags & HCK_FULLCKSUM) { 7715 IP6_STAT(ipst, 7716 ip6_tcp_in_full_hw_cksum_err); 7717 } else if (hck_flags & HCK_PARTIALCKSUM) { 7718 IP6_STAT(ipst, 7719 ip6_tcp_in_part_hw_cksum_err); 7720 } else { 7721 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7722 } 7723 freemsg(first_mp); 7724 return; 7725 } 7726 tcp_fanout: 7727 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7728 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7729 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7730 return; 7731 } 7732 case IPPROTO_SCTP: 7733 { 7734 sctp_hdr_t *sctph; 7735 uint32_t calcsum, pktsum; 7736 uint_t hdr_len = pkt_len - remlen; 7737 sctp_stack_t *sctps; 7738 7739 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7740 7741 /* SCTP needs all of the SCTP header */ 7742 if (remlen < sizeof (*sctph)) { 7743 goto pkt_too_short; 7744 } 7745 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7746 ASSERT(mp->b_cont != NULL); 7747 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7748 BUMP_MIB(ill->ill_ip_mib, 7749 ipIfStatsInDiscards); 7750 freemsg(mp); 7751 return; 7752 } 7753 ip6h = (ip6_t *)mp->b_rptr; 7754 whereptr = (uint8_t *)ip6h + hdr_len; 7755 } 7756 7757 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7758 /* checksum */ 7759 pktsum = sctph->sh_chksum; 7760 sctph->sh_chksum = 0; 7761 calcsum = sctp_cksum(mp, hdr_len); 7762 if (calcsum != pktsum) { 7763 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7764 freemsg(mp); 7765 return; 7766 } 7767 sctph->sh_chksum = pktsum; 7768 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7769 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7770 ports, zoneid, mp, sctps)) == NULL) { 7771 ip_fanout_sctp_raw(first_mp, ill, 7772 (ipha_t *)ip6h, B_FALSE, ports, 7773 mctl_present, 7774 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7775 B_TRUE, zoneid); 7776 return; 7777 } 7778 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7779 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7780 B_FALSE, mctl_present); 7781 return; 7782 } 7783 case IPPROTO_UDP: { 7784 uint16_t *up; 7785 uint32_t sum; 7786 7787 hdr_len = pkt_len - remlen; 7788 7789 if (hada_mp != NULL) { 7790 ip0dbg(("udp hada drop\n")); 7791 goto hada_drop; 7792 } 7793 7794 /* Verify that at least the ports are present */ 7795 if (remlen < UDPH_SIZE) 7796 goto pkt_too_short; 7797 if (mp->b_cont != NULL && 7798 whereptr + UDPH_SIZE > mp->b_wptr) { 7799 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7800 BUMP_MIB(ill->ill_ip_mib, 7801 ipIfStatsInDiscards); 7802 freemsg(first_mp); 7803 return; 7804 } 7805 hck_flags = 0; 7806 ip6h = (ip6_t *)mp->b_rptr; 7807 whereptr = (uint8_t *)ip6h + hdr_len; 7808 } 7809 7810 /* 7811 * Before going through the regular checksum 7812 * calculation, make sure the received checksum 7813 * is non-zero. RFC 2460 says, a 0x0000 checksum 7814 * in a UDP packet (within IPv6 packet) is invalid 7815 * and should be replaced by 0xffff. This makes 7816 * sense as regular checksum calculation will 7817 * pass for both the cases i.e. 0x0000 and 0xffff. 7818 * Removing one of the case makes error detection 7819 * stronger. 7820 */ 7821 7822 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7823 /* 0x0000 checksum is invalid */ 7824 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7825 "checksum value 0x0000\n")); 7826 BUMP_MIB(ill->ill_ip_mib, 7827 udpIfStatsInCksumErrs); 7828 freemsg(first_mp); 7829 return; 7830 } 7831 7832 up = (uint16_t *)&ip6h->ip6_src; 7833 7834 /* 7835 * UDP checksum calculation. First sum up the 7836 * pseudo-header fields: 7837 * - Source IPv6 address 7838 * - Destination IPv6 address 7839 * - UDP payload length 7840 * - UDP protocol ID 7841 */ 7842 7843 sum = htons(IPPROTO_UDP + remlen) + 7844 up[0] + up[1] + up[2] + up[3] + 7845 up[4] + up[5] + up[6] + up[7] + 7846 up[8] + up[9] + up[10] + up[11] + 7847 up[12] + up[13] + up[14] + up[15]; 7848 7849 /* Fold initial sum */ 7850 sum = (sum & 0xffff) + (sum >> 16); 7851 7852 if (reass_hck_flags != 0) { 7853 hck_flags = reass_hck_flags; 7854 7855 IP_CKSUM_RECV_REASS(hck_flags, 7856 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7857 sum, reass_sum, cksum_err); 7858 } else { 7859 mp1 = mp->b_cont; 7860 7861 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7862 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7863 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7864 mp, mp1, cksum_err); 7865 } 7866 7867 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7868 IP6_STAT(ipst, ip6_in_sw_cksum); 7869 7870 if (cksum_err) { 7871 BUMP_MIB(ill->ill_ip_mib, 7872 udpIfStatsInCksumErrs); 7873 7874 if (hck_flags & HCK_FULLCKSUM) 7875 IP6_STAT(ipst, 7876 ip6_udp_in_full_hw_cksum_err); 7877 else if (hck_flags & HCK_PARTIALCKSUM) 7878 IP6_STAT(ipst, 7879 ip6_udp_in_part_hw_cksum_err); 7880 else 7881 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7882 7883 freemsg(first_mp); 7884 return; 7885 } 7886 goto udp_fanout; 7887 } 7888 case IPPROTO_ICMPV6: { 7889 uint16_t *up; 7890 uint32_t sum; 7891 uint_t hdr_len = pkt_len - remlen; 7892 7893 if (hada_mp != NULL) { 7894 ip0dbg(("icmp hada drop\n")); 7895 goto hada_drop; 7896 } 7897 7898 up = (uint16_t *)&ip6h->ip6_src; 7899 sum = htons(IPPROTO_ICMPV6 + remlen) + 7900 up[0] + up[1] + up[2] + up[3] + 7901 up[4] + up[5] + up[6] + up[7] + 7902 up[8] + up[9] + up[10] + up[11] + 7903 up[12] + up[13] + up[14] + up[15]; 7904 sum = (sum & 0xffff) + (sum >> 16); 7905 sum = IP_CSUM(mp, hdr_len, sum); 7906 if (sum != 0) { 7907 /* IPv6 ICMP checksum failed */ 7908 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7909 "failed %x\n", 7910 sum)); 7911 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7912 BUMP_MIB(ill->ill_icmp6_mib, 7913 ipv6IfIcmpInErrors); 7914 freemsg(first_mp); 7915 return; 7916 } 7917 7918 icmp_fanout: 7919 /* Check variable for testing applications */ 7920 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7921 freemsg(first_mp); 7922 return; 7923 } 7924 /* 7925 * Assume that there is always at least one conn for 7926 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7927 * where there is no conn. 7928 */ 7929 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7930 ASSERT(!IS_LOOPBACK((ill))); 7931 /* 7932 * In the multicast case, applications may have 7933 * joined the group from different zones, so we 7934 * need to deliver the packet to each of them. 7935 * Loop through the multicast memberships 7936 * structures (ilm) on the receive ill and send 7937 * a copy of the packet up each matching one. 7938 */ 7939 ILM_WALKER_HOLD(ill); 7940 for (ilm = ill->ill_ilm; ilm != NULL; 7941 ilm = ilm->ilm_next) { 7942 if (ilm->ilm_flags & ILM_DELETED) 7943 continue; 7944 if (!IN6_ARE_ADDR_EQUAL( 7945 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7946 continue; 7947 if (!ipif_lookup_zoneid(ill, 7948 ilm->ilm_zoneid, IPIF_UP, NULL)) 7949 continue; 7950 7951 first_mp1 = ip_copymsg(first_mp); 7952 if (first_mp1 == NULL) 7953 continue; 7954 icmp_inbound_v6(q, first_mp1, ill, 7955 hdr_len, mctl_present, 0, 7956 ilm->ilm_zoneid, dl_mp); 7957 } 7958 ILM_WALKER_RELE(ill); 7959 } else { 7960 first_mp1 = ip_copymsg(first_mp); 7961 if (first_mp1 != NULL) 7962 icmp_inbound_v6(q, first_mp1, ill, 7963 hdr_len, mctl_present, 0, zoneid, 7964 dl_mp); 7965 } 7966 } 7967 /* FALLTHRU */ 7968 default: { 7969 /* 7970 * Handle protocols with which IPv6 is less intimate. 7971 */ 7972 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 7973 7974 if (hada_mp != NULL) { 7975 ip0dbg(("default hada drop\n")); 7976 goto hada_drop; 7977 } 7978 7979 /* 7980 * Enable sending ICMP for "Unknown" nexthdr 7981 * case. i.e. where we did not FALLTHRU from 7982 * IPPROTO_ICMPV6 processing case above. 7983 * If we did FALLTHRU, then the packet has already been 7984 * processed for IPPF, don't process it again in 7985 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7986 * flags 7987 */ 7988 if (nexthdr != IPPROTO_ICMPV6) 7989 proto_flags |= IP_FF_SEND_ICMP; 7990 else 7991 proto_flags |= IP6_NO_IPPOLICY; 7992 7993 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7994 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7995 mctl_present, zoneid); 7996 return; 7997 } 7998 7999 case IPPROTO_DSTOPTS: { 8000 uint_t ehdrlen; 8001 uint8_t *optptr; 8002 ip6_dest_t *desthdr; 8003 8004 /* Check if AH is present. */ 8005 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8006 hada_mp, zoneid)) { 8007 ip0dbg(("dst early hada drop\n")); 8008 return; 8009 } 8010 8011 /* 8012 * Reinitialize pointers, as ipsec_early_ah_v6() does 8013 * complete pullups. We don't have to do more pullups 8014 * as a result. 8015 */ 8016 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8017 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8018 ip6h = (ip6_t *)mp->b_rptr; 8019 8020 if (remlen < MIN_EHDR_LEN) 8021 goto pkt_too_short; 8022 8023 desthdr = (ip6_dest_t *)whereptr; 8024 nexthdr = desthdr->ip6d_nxt; 8025 prev_nexthdr_offset = (uint_t)(whereptr - 8026 (uint8_t *)ip6h); 8027 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8028 if (remlen < ehdrlen) 8029 goto pkt_too_short; 8030 optptr = whereptr + 2; 8031 /* 8032 * Note: XXX This code does not seem to make 8033 * distinction between Destination Options Header 8034 * being before/after Routing Header which can 8035 * happen if we are at the end of source route. 8036 * This may become significant in future. 8037 * (No real significant Destination Options are 8038 * defined/implemented yet ). 8039 */ 8040 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8041 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 8042 case -1: 8043 /* 8044 * Packet has been consumed and any needed 8045 * ICMP errors sent. 8046 */ 8047 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8048 freemsg(hada_mp); 8049 return; 8050 case 0: 8051 /* No action needed continue */ 8052 break; 8053 case 1: 8054 /* 8055 * Unnexpected return value 8056 * (Router alert is a Hop-by-Hop option) 8057 */ 8058 #ifdef DEBUG 8059 panic("ip_rput_data_v6: router " 8060 "alert hbh opt indication in dest opt"); 8061 /*NOTREACHED*/ 8062 #else 8063 freemsg(hada_mp); 8064 freemsg(first_mp); 8065 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8066 return; 8067 #endif 8068 } 8069 used = ehdrlen; 8070 break; 8071 } 8072 case IPPROTO_FRAGMENT: { 8073 ip6_frag_t *fraghdr; 8074 size_t no_frag_hdr_len; 8075 8076 if (hada_mp != NULL) { 8077 ip0dbg(("frag hada drop\n")); 8078 goto hada_drop; 8079 } 8080 8081 ASSERT(first_mp == mp); 8082 if (remlen < sizeof (ip6_frag_t)) 8083 goto pkt_too_short; 8084 8085 if (mp->b_cont != NULL && 8086 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8087 if (!pullupmsg(mp, 8088 pkt_len - remlen + sizeof (ip6_frag_t))) { 8089 BUMP_MIB(ill->ill_ip_mib, 8090 ipIfStatsInDiscards); 8091 freemsg(mp); 8092 return; 8093 } 8094 hck_flags = 0; 8095 ip6h = (ip6_t *)mp->b_rptr; 8096 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8097 } 8098 8099 fraghdr = (ip6_frag_t *)whereptr; 8100 used = (uint_t)sizeof (ip6_frag_t); 8101 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8102 8103 /* 8104 * Invoke the CGTP (multirouting) filtering module to 8105 * process the incoming packet. Packets identified as 8106 * duplicates must be discarded. Filtering is active 8107 * only if the the ip_cgtp_filter ndd variable is 8108 * non-zero. 8109 */ 8110 if (ipst->ips_ip_cgtp_filter && 8111 ipst->ips_ip_cgtp_filter_ops != NULL) { 8112 int cgtp_flt_pkt; 8113 netstackid_t stackid; 8114 8115 stackid = ipst->ips_netstack->netstack_stackid; 8116 8117 cgtp_flt_pkt = 8118 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 8119 stackid, inill->ill_phyint->phyint_ifindex, 8120 ip6h, fraghdr); 8121 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8122 freemsg(mp); 8123 return; 8124 } 8125 } 8126 8127 /* Restore the flags */ 8128 DB_CKSUMFLAGS(mp) = hck_flags; 8129 8130 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8131 remlen - used, &prev_nexthdr_offset, 8132 &reass_sum, &reass_hck_flags); 8133 if (mp == NULL) { 8134 /* Reassembly is still pending */ 8135 return; 8136 } 8137 /* The first mblk are the headers before the frag hdr */ 8138 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8139 8140 first_mp = mp; /* mp has most likely changed! */ 8141 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8142 ip6h = (ip6_t *)mp->b_rptr; 8143 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8144 whereptr = mp->b_rptr + no_frag_hdr_len; 8145 remlen = ntohs(ip6h->ip6_plen) + 8146 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8147 pkt_len = msgdsize(mp); 8148 used = 0; 8149 break; 8150 } 8151 case IPPROTO_HOPOPTS: { 8152 if (hada_mp != NULL) { 8153 ip0dbg(("hop hada drop\n")); 8154 goto hada_drop; 8155 } 8156 /* 8157 * Illegal header sequence. 8158 * (Hop-by-hop headers are processed above 8159 * and required to immediately follow IPv6 header) 8160 */ 8161 icmp_param_problem_v6(WR(q), first_mp, 8162 ICMP6_PARAMPROB_NEXTHEADER, 8163 prev_nexthdr_offset, 8164 B_FALSE, B_FALSE, zoneid, ipst); 8165 return; 8166 } 8167 case IPPROTO_ROUTING: { 8168 uint_t ehdrlen; 8169 ip6_rthdr_t *rthdr; 8170 8171 /* Check if AH is present. */ 8172 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8173 hada_mp, zoneid)) { 8174 ip0dbg(("routing hada drop\n")); 8175 return; 8176 } 8177 8178 /* 8179 * Reinitialize pointers, as ipsec_early_ah_v6() does 8180 * complete pullups. We don't have to do more pullups 8181 * as a result. 8182 */ 8183 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8184 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8185 ip6h = (ip6_t *)mp->b_rptr; 8186 8187 if (remlen < MIN_EHDR_LEN) 8188 goto pkt_too_short; 8189 rthdr = (ip6_rthdr_t *)whereptr; 8190 nexthdr = rthdr->ip6r_nxt; 8191 prev_nexthdr_offset = (uint_t)(whereptr - 8192 (uint8_t *)ip6h); 8193 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8194 if (remlen < ehdrlen) 8195 goto pkt_too_short; 8196 if (rthdr->ip6r_segleft != 0) { 8197 /* Not end of source route */ 8198 if (ll_multicast) { 8199 BUMP_MIB(ill->ill_ip_mib, 8200 ipIfStatsForwProhibits); 8201 freemsg(hada_mp); 8202 freemsg(mp); 8203 return; 8204 } 8205 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8206 flags, hada_mp, dl_mp); 8207 return; 8208 } 8209 used = ehdrlen; 8210 break; 8211 } 8212 case IPPROTO_AH: 8213 case IPPROTO_ESP: { 8214 /* 8215 * Fast path for AH/ESP. If this is the first time 8216 * we are sending a datagram to AH/ESP, allocate 8217 * a IPSEC_IN message and prepend it. Otherwise, 8218 * just fanout. 8219 */ 8220 8221 ipsec_in_t *ii; 8222 int ipsec_rc; 8223 ipsec_stack_t *ipss; 8224 8225 ipss = ipst->ips_netstack->netstack_ipsec; 8226 if (!mctl_present) { 8227 ASSERT(first_mp == mp); 8228 first_mp = ipsec_in_alloc(B_FALSE, 8229 ipst->ips_netstack); 8230 if (first_mp == NULL) { 8231 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8232 "allocation failure.\n")); 8233 BUMP_MIB(ill->ill_ip_mib, 8234 ipIfStatsInDiscards); 8235 freemsg(mp); 8236 return; 8237 } 8238 /* 8239 * Store the ill_index so that when we come back 8240 * from IPSEC we ride on the same queue. 8241 */ 8242 ii = (ipsec_in_t *)first_mp->b_rptr; 8243 ii->ipsec_in_ill_index = 8244 ill->ill_phyint->phyint_ifindex; 8245 ii->ipsec_in_rill_index = 8246 ii->ipsec_in_ill_index; 8247 first_mp->b_cont = mp; 8248 /* 8249 * Cache hardware acceleration info. 8250 */ 8251 if (hada_mp != NULL) { 8252 IPSECHW_DEBUG(IPSECHW_PKT, 8253 ("ip_rput_data_v6: " 8254 "caching data attr.\n")); 8255 ii->ipsec_in_accelerated = B_TRUE; 8256 ii->ipsec_in_da = hada_mp; 8257 hada_mp = NULL; 8258 } 8259 } else { 8260 ii = (ipsec_in_t *)first_mp->b_rptr; 8261 } 8262 8263 if (!ipsec_loaded(ipss)) { 8264 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8265 zoneid, ipst); 8266 return; 8267 } 8268 8269 /* select inbound SA and have IPsec process the pkt */ 8270 if (nexthdr == IPPROTO_ESP) { 8271 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8272 ipst->ips_netstack); 8273 if (esph == NULL) 8274 return; 8275 ASSERT(ii->ipsec_in_esp_sa != NULL); 8276 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8277 NULL); 8278 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8279 first_mp, esph); 8280 } else { 8281 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8282 ipst->ips_netstack); 8283 if (ah == NULL) 8284 return; 8285 ASSERT(ii->ipsec_in_ah_sa != NULL); 8286 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8287 NULL); 8288 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8289 first_mp, ah); 8290 } 8291 8292 switch (ipsec_rc) { 8293 case IPSEC_STATUS_SUCCESS: 8294 break; 8295 case IPSEC_STATUS_FAILED: 8296 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8297 /* FALLTHRU */ 8298 case IPSEC_STATUS_PENDING: 8299 return; 8300 } 8301 /* we're done with IPsec processing, send it up */ 8302 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8303 return; 8304 } 8305 case IPPROTO_NONE: 8306 /* All processing is done. Count as "delivered". */ 8307 freemsg(hada_mp); 8308 freemsg(first_mp); 8309 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8310 return; 8311 } 8312 whereptr += used; 8313 ASSERT(remlen >= used); 8314 remlen -= used; 8315 } 8316 /* NOTREACHED */ 8317 8318 pkt_too_short: 8319 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8320 ip6_len, pkt_len, remlen)); 8321 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8322 freemsg(hada_mp); 8323 freemsg(first_mp); 8324 return; 8325 udp_fanout: 8326 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8327 connp = NULL; 8328 } else { 8329 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8330 ipst); 8331 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8332 CONN_DEC_REF(connp); 8333 connp = NULL; 8334 } 8335 } 8336 8337 if (connp == NULL) { 8338 uint32_t ports; 8339 8340 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8341 UDP_PORTS_OFFSET); 8342 IP6_STAT(ipst, ip6_udp_slow_path); 8343 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8344 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8345 zoneid); 8346 return; 8347 } 8348 8349 if (CONN_UDP_FLOWCTLD(connp)) { 8350 freemsg(first_mp); 8351 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8352 CONN_DEC_REF(connp); 8353 return; 8354 } 8355 8356 /* Initiate IPPF processing */ 8357 if (IP6_IN_IPP(flags, ipst)) { 8358 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8359 if (mp == NULL) { 8360 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8361 CONN_DEC_REF(connp); 8362 return; 8363 } 8364 } 8365 8366 if (connp->conn_ip_recvpktinfo || 8367 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8368 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8369 if (mp == NULL) { 8370 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8371 CONN_DEC_REF(connp); 8372 return; 8373 } 8374 } 8375 8376 IP6_STAT(ipst, ip6_udp_fast_path); 8377 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8378 8379 /* Send it upstream */ 8380 (connp->conn_recv)(connp, mp, NULL); 8381 8382 CONN_DEC_REF(connp); 8383 freemsg(hada_mp); 8384 return; 8385 8386 hada_drop: 8387 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8388 /* IPsec kstats: bump counter here */ 8389 freemsg(hada_mp); 8390 freemsg(first_mp); 8391 } 8392 8393 /* 8394 * Reassemble fragment. 8395 * When it returns a completed message the first mblk will only contain 8396 * the headers prior to the fragment header. 8397 * 8398 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8399 * of the preceding header. This is needed to patch the previous header's 8400 * nexthdr field when reassembly completes. 8401 */ 8402 static mblk_t * 8403 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8404 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8405 uint32_t *cksum_val, uint16_t *cksum_flags) 8406 { 8407 ill_t *ill = (ill_t *)q->q_ptr; 8408 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8409 uint16_t offset; 8410 boolean_t more_frags; 8411 uint8_t nexthdr = fraghdr->ip6f_nxt; 8412 in6_addr_t *v6dst_ptr; 8413 in6_addr_t *v6src_ptr; 8414 uint_t end; 8415 uint_t hdr_length; 8416 size_t count; 8417 ipf_t *ipf; 8418 ipf_t **ipfp; 8419 ipfb_t *ipfb; 8420 mblk_t *mp1; 8421 uint8_t ecn_info = 0; 8422 size_t msg_len; 8423 mblk_t *tail_mp; 8424 mblk_t *t_mp; 8425 boolean_t pruned = B_FALSE; 8426 uint32_t sum_val; 8427 uint16_t sum_flags; 8428 ip_stack_t *ipst = ill->ill_ipst; 8429 8430 if (cksum_val != NULL) 8431 *cksum_val = 0; 8432 if (cksum_flags != NULL) 8433 *cksum_flags = 0; 8434 8435 /* 8436 * We utilize hardware computed checksum info only for UDP since 8437 * IP fragmentation is a normal occurence for the protocol. In 8438 * addition, checksum offload support for IP fragments carrying 8439 * UDP payload is commonly implemented across network adapters. 8440 */ 8441 ASSERT(ill != NULL); 8442 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8443 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8444 mblk_t *mp1 = mp->b_cont; 8445 int32_t len; 8446 8447 /* Record checksum information from the packet */ 8448 sum_val = (uint32_t)DB_CKSUM16(mp); 8449 sum_flags = DB_CKSUMFLAGS(mp); 8450 8451 /* fragmented payload offset from beginning of mblk */ 8452 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8453 8454 if ((sum_flags & HCK_PARTIALCKSUM) && 8455 (mp1 == NULL || mp1->b_cont == NULL) && 8456 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8457 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8458 uint32_t adj; 8459 /* 8460 * Partial checksum has been calculated by hardware 8461 * and attached to the packet; in addition, any 8462 * prepended extraneous data is even byte aligned. 8463 * If any such data exists, we adjust the checksum; 8464 * this would also handle any postpended data. 8465 */ 8466 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8467 mp, mp1, len, adj); 8468 8469 /* One's complement subtract extraneous checksum */ 8470 if (adj >= sum_val) 8471 sum_val = ~(adj - sum_val) & 0xFFFF; 8472 else 8473 sum_val -= adj; 8474 } 8475 } else { 8476 sum_val = 0; 8477 sum_flags = 0; 8478 } 8479 8480 /* Clear hardware checksumming flag */ 8481 DB_CKSUMFLAGS(mp) = 0; 8482 8483 /* 8484 * Note: Fragment offset in header is in 8-octet units. 8485 * Clearing least significant 3 bits not only extracts 8486 * it but also gets it in units of octets. 8487 */ 8488 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8489 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8490 8491 /* 8492 * Is the more frags flag on and the payload length not a multiple 8493 * of eight? 8494 */ 8495 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8496 zoneid_t zoneid; 8497 8498 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8499 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8500 if (zoneid == ALL_ZONES) { 8501 freemsg(mp); 8502 return (NULL); 8503 } 8504 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8505 (uint32_t)((char *)&ip6h->ip6_plen - 8506 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8507 return (NULL); 8508 } 8509 8510 v6src_ptr = &ip6h->ip6_src; 8511 v6dst_ptr = &ip6h->ip6_dst; 8512 end = remlen; 8513 8514 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8515 end += offset; 8516 8517 /* 8518 * Would fragment cause reassembled packet to have a payload length 8519 * greater than IP_MAXPACKET - the max payload size? 8520 */ 8521 if (end > IP_MAXPACKET) { 8522 zoneid_t zoneid; 8523 8524 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8525 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8526 if (zoneid == ALL_ZONES) { 8527 freemsg(mp); 8528 return (NULL); 8529 } 8530 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8531 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8532 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8533 return (NULL); 8534 } 8535 8536 /* 8537 * This packet just has one fragment. Reassembly not 8538 * needed. 8539 */ 8540 if (!more_frags && offset == 0) { 8541 goto reass_done; 8542 } 8543 8544 /* 8545 * Drop the fragmented as early as possible, if 8546 * we don't have resource(s) to re-assemble. 8547 */ 8548 if (ipst->ips_ip_reass_queue_bytes == 0) { 8549 freemsg(mp); 8550 return (NULL); 8551 } 8552 8553 /* Record the ECN field info. */ 8554 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8555 /* 8556 * If this is not the first fragment, dump the unfragmentable 8557 * portion of the packet. 8558 */ 8559 if (offset) 8560 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8561 8562 /* 8563 * Fragmentation reassembly. Each ILL has a hash table for 8564 * queueing packets undergoing reassembly for all IPIFs 8565 * associated with the ILL. The hash is based on the packet 8566 * IP ident field. The ILL frag hash table was allocated 8567 * as a timer block at the time the ILL was created. Whenever 8568 * there is anything on the reassembly queue, the timer will 8569 * be running. 8570 */ 8571 msg_len = MBLKSIZE(mp); 8572 tail_mp = mp; 8573 while (tail_mp->b_cont != NULL) { 8574 tail_mp = tail_mp->b_cont; 8575 msg_len += MBLKSIZE(tail_mp); 8576 } 8577 /* 8578 * If the reassembly list for this ILL will get too big 8579 * prune it. 8580 */ 8581 8582 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8583 ipst->ips_ip_reass_queue_bytes) { 8584 ill_frag_prune(ill, 8585 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8586 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8587 pruned = B_TRUE; 8588 } 8589 8590 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8591 mutex_enter(&ipfb->ipfb_lock); 8592 8593 ipfp = &ipfb->ipfb_ipf; 8594 /* Try to find an existing fragment queue for this packet. */ 8595 for (;;) { 8596 ipf = ipfp[0]; 8597 if (ipf) { 8598 /* 8599 * It has to match on ident, source address, and 8600 * dest address. 8601 */ 8602 if (ipf->ipf_ident == ident && 8603 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8604 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8605 8606 /* 8607 * If we have received too many 8608 * duplicate fragments for this packet 8609 * free it. 8610 */ 8611 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8612 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8613 freemsg(mp); 8614 mutex_exit(&ipfb->ipfb_lock); 8615 return (NULL); 8616 } 8617 8618 break; 8619 } 8620 ipfp = &ipf->ipf_hash_next; 8621 continue; 8622 } 8623 8624 8625 /* 8626 * If we pruned the list, do we want to store this new 8627 * fragment?. We apply an optimization here based on the 8628 * fact that most fragments will be received in order. 8629 * So if the offset of this incoming fragment is zero, 8630 * it is the first fragment of a new packet. We will 8631 * keep it. Otherwise drop the fragment, as we have 8632 * probably pruned the packet already (since the 8633 * packet cannot be found). 8634 */ 8635 8636 if (pruned && offset != 0) { 8637 mutex_exit(&ipfb->ipfb_lock); 8638 freemsg(mp); 8639 return (NULL); 8640 } 8641 8642 /* New guy. Allocate a frag message. */ 8643 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8644 if (!mp1) { 8645 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8646 freemsg(mp); 8647 partial_reass_done: 8648 mutex_exit(&ipfb->ipfb_lock); 8649 return (NULL); 8650 } 8651 8652 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8653 /* 8654 * Too many fragmented packets in this hash bucket. 8655 * Free the oldest. 8656 */ 8657 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8658 } 8659 8660 mp1->b_cont = mp; 8661 8662 /* Initialize the fragment header. */ 8663 ipf = (ipf_t *)mp1->b_rptr; 8664 ipf->ipf_mp = mp1; 8665 ipf->ipf_ptphn = ipfp; 8666 ipfp[0] = ipf; 8667 ipf->ipf_hash_next = NULL; 8668 ipf->ipf_ident = ident; 8669 ipf->ipf_v6src = *v6src_ptr; 8670 ipf->ipf_v6dst = *v6dst_ptr; 8671 /* Record reassembly start time. */ 8672 ipf->ipf_timestamp = gethrestime_sec(); 8673 /* Record ipf generation and account for frag header */ 8674 ipf->ipf_gen = ill->ill_ipf_gen++; 8675 ipf->ipf_count = MBLKSIZE(mp1); 8676 ipf->ipf_protocol = nexthdr; 8677 ipf->ipf_nf_hdr_len = 0; 8678 ipf->ipf_prev_nexthdr_offset = 0; 8679 ipf->ipf_last_frag_seen = B_FALSE; 8680 ipf->ipf_ecn = ecn_info; 8681 ipf->ipf_num_dups = 0; 8682 ipfb->ipfb_frag_pkts++; 8683 ipf->ipf_checksum = 0; 8684 ipf->ipf_checksum_flags = 0; 8685 8686 /* Store checksum value in fragment header */ 8687 if (sum_flags != 0) { 8688 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8689 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8690 ipf->ipf_checksum = sum_val; 8691 ipf->ipf_checksum_flags = sum_flags; 8692 } 8693 8694 /* 8695 * We handle reassembly two ways. In the easy case, 8696 * where all the fragments show up in order, we do 8697 * minimal bookkeeping, and just clip new pieces on 8698 * the end. If we ever see a hole, then we go off 8699 * to ip_reassemble which has to mark the pieces and 8700 * keep track of the number of holes, etc. Obviously, 8701 * the point of having both mechanisms is so we can 8702 * handle the easy case as efficiently as possible. 8703 */ 8704 if (offset == 0) { 8705 /* Easy case, in-order reassembly so far. */ 8706 /* Update the byte count */ 8707 ipf->ipf_count += msg_len; 8708 ipf->ipf_tail_mp = tail_mp; 8709 /* 8710 * Keep track of next expected offset in 8711 * ipf_end. 8712 */ 8713 ipf->ipf_end = end; 8714 ipf->ipf_nf_hdr_len = hdr_length; 8715 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8716 } else { 8717 /* Hard case, hole at the beginning. */ 8718 ipf->ipf_tail_mp = NULL; 8719 /* 8720 * ipf_end == 0 means that we have given up 8721 * on easy reassembly. 8722 */ 8723 ipf->ipf_end = 0; 8724 8725 /* Forget checksum offload from now on */ 8726 ipf->ipf_checksum_flags = 0; 8727 8728 /* 8729 * ipf_hole_cnt is set by ip_reassemble. 8730 * ipf_count is updated by ip_reassemble. 8731 * No need to check for return value here 8732 * as we don't expect reassembly to complete or 8733 * fail for the first fragment itself. 8734 */ 8735 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8736 msg_len); 8737 } 8738 /* Update per ipfb and ill byte counts */ 8739 ipfb->ipfb_count += ipf->ipf_count; 8740 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8741 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8742 /* If the frag timer wasn't already going, start it. */ 8743 mutex_enter(&ill->ill_lock); 8744 ill_frag_timer_start(ill); 8745 mutex_exit(&ill->ill_lock); 8746 goto partial_reass_done; 8747 } 8748 8749 /* 8750 * If the packet's flag has changed (it could be coming up 8751 * from an interface different than the previous, therefore 8752 * possibly different checksum capability), then forget about 8753 * any stored checksum states. Otherwise add the value to 8754 * the existing one stored in the fragment header. 8755 */ 8756 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8757 sum_val += ipf->ipf_checksum; 8758 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8759 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8760 ipf->ipf_checksum = sum_val; 8761 } else if (ipf->ipf_checksum_flags != 0) { 8762 /* Forget checksum offload from now on */ 8763 ipf->ipf_checksum_flags = 0; 8764 } 8765 8766 /* 8767 * We have a new piece of a datagram which is already being 8768 * reassembled. Update the ECN info if all IP fragments 8769 * are ECN capable. If there is one which is not, clear 8770 * all the info. If there is at least one which has CE 8771 * code point, IP needs to report that up to transport. 8772 */ 8773 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8774 if (ecn_info == IPH_ECN_CE) 8775 ipf->ipf_ecn = IPH_ECN_CE; 8776 } else { 8777 ipf->ipf_ecn = IPH_ECN_NECT; 8778 } 8779 8780 if (offset && ipf->ipf_end == offset) { 8781 /* The new fragment fits at the end */ 8782 ipf->ipf_tail_mp->b_cont = mp; 8783 /* Update the byte count */ 8784 ipf->ipf_count += msg_len; 8785 /* Update per ipfb and ill byte counts */ 8786 ipfb->ipfb_count += msg_len; 8787 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8788 atomic_add_32(&ill->ill_frag_count, msg_len); 8789 if (more_frags) { 8790 /* More to come. */ 8791 ipf->ipf_end = end; 8792 ipf->ipf_tail_mp = tail_mp; 8793 goto partial_reass_done; 8794 } 8795 } else { 8796 /* 8797 * Go do the hard cases. 8798 * Call ip_reassemble(). 8799 */ 8800 int ret; 8801 8802 if (offset == 0) { 8803 if (ipf->ipf_prev_nexthdr_offset == 0) { 8804 ipf->ipf_nf_hdr_len = hdr_length; 8805 ipf->ipf_prev_nexthdr_offset = 8806 *prev_nexthdr_offset; 8807 } 8808 } 8809 /* Save current byte count */ 8810 count = ipf->ipf_count; 8811 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8812 8813 /* Count of bytes added and subtracted (freeb()ed) */ 8814 count = ipf->ipf_count - count; 8815 if (count) { 8816 /* Update per ipfb and ill byte counts */ 8817 ipfb->ipfb_count += count; 8818 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8819 atomic_add_32(&ill->ill_frag_count, count); 8820 } 8821 if (ret == IP_REASS_PARTIAL) { 8822 goto partial_reass_done; 8823 } else if (ret == IP_REASS_FAILED) { 8824 /* Reassembly failed. Free up all resources */ 8825 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8826 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8827 IP_REASS_SET_START(t_mp, 0); 8828 IP_REASS_SET_END(t_mp, 0); 8829 } 8830 freemsg(mp); 8831 goto partial_reass_done; 8832 } 8833 8834 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8835 } 8836 /* 8837 * We have completed reassembly. Unhook the frag header from 8838 * the reassembly list. 8839 * 8840 * Grab the unfragmentable header length next header value out 8841 * of the first fragment 8842 */ 8843 ASSERT(ipf->ipf_nf_hdr_len != 0); 8844 hdr_length = ipf->ipf_nf_hdr_len; 8845 8846 /* 8847 * Before we free the frag header, record the ECN info 8848 * to report back to the transport. 8849 */ 8850 ecn_info = ipf->ipf_ecn; 8851 8852 /* 8853 * Store the nextheader field in the header preceding the fragment 8854 * header 8855 */ 8856 nexthdr = ipf->ipf_protocol; 8857 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8858 ipfp = ipf->ipf_ptphn; 8859 8860 /* We need to supply these to caller */ 8861 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8862 sum_val = ipf->ipf_checksum; 8863 else 8864 sum_val = 0; 8865 8866 mp1 = ipf->ipf_mp; 8867 count = ipf->ipf_count; 8868 ipf = ipf->ipf_hash_next; 8869 if (ipf) 8870 ipf->ipf_ptphn = ipfp; 8871 ipfp[0] = ipf; 8872 atomic_add_32(&ill->ill_frag_count, -count); 8873 ASSERT(ipfb->ipfb_count >= count); 8874 ipfb->ipfb_count -= count; 8875 ipfb->ipfb_frag_pkts--; 8876 mutex_exit(&ipfb->ipfb_lock); 8877 /* Ditch the frag header. */ 8878 mp = mp1->b_cont; 8879 freeb(mp1); 8880 8881 /* 8882 * Make sure the packet is good by doing some sanity 8883 * check. If bad we can silentely drop the packet. 8884 */ 8885 reass_done: 8886 if (hdr_length < sizeof (ip6_frag_t)) { 8887 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8888 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8889 freemsg(mp); 8890 return (NULL); 8891 } 8892 8893 /* 8894 * Remove the fragment header from the initial header by 8895 * splitting the mblk into the non-fragmentable header and 8896 * everthing after the fragment extension header. This has the 8897 * side effect of putting all the headers that need destination 8898 * processing into the b_cont block-- on return this fact is 8899 * used in order to avoid having to look at the extensions 8900 * already processed. 8901 * 8902 * Note that this code assumes that the unfragmentable portion 8903 * of the header is in the first mblk and increments 8904 * the read pointer past it. If this assumption is broken 8905 * this code fails badly. 8906 */ 8907 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8908 mblk_t *nmp; 8909 8910 if (!(nmp = dupb(mp))) { 8911 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8912 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8913 freemsg(mp); 8914 return (NULL); 8915 } 8916 nmp->b_cont = mp->b_cont; 8917 mp->b_cont = nmp; 8918 nmp->b_rptr += hdr_length; 8919 } 8920 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8921 8922 ip6h = (ip6_t *)mp->b_rptr; 8923 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8924 8925 /* Restore original IP length in header. */ 8926 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8927 /* Record the ECN info. */ 8928 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8929 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8930 8931 /* Reassembly is successful; return checksum information if needed */ 8932 if (cksum_val != NULL) 8933 *cksum_val = sum_val; 8934 if (cksum_flags != NULL) 8935 *cksum_flags = sum_flags; 8936 8937 return (mp); 8938 } 8939 8940 /* 8941 * Walk through the options to see if there is a routing header. 8942 * If present get the destination which is the last address of 8943 * the option. 8944 */ 8945 in6_addr_t 8946 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8947 { 8948 uint8_t nexthdr; 8949 uint8_t *whereptr; 8950 ip6_hbh_t *hbhhdr; 8951 ip6_dest_t *dsthdr; 8952 ip6_rthdr0_t *rthdr; 8953 ip6_frag_t *fraghdr; 8954 int ehdrlen; 8955 int left; 8956 in6_addr_t *ap, rv; 8957 8958 if (is_fragment != NULL) 8959 *is_fragment = B_FALSE; 8960 8961 rv = ip6h->ip6_dst; 8962 8963 nexthdr = ip6h->ip6_nxt; 8964 whereptr = (uint8_t *)&ip6h[1]; 8965 for (;;) { 8966 8967 ASSERT(nexthdr != IPPROTO_RAW); 8968 switch (nexthdr) { 8969 case IPPROTO_HOPOPTS: 8970 hbhhdr = (ip6_hbh_t *)whereptr; 8971 nexthdr = hbhhdr->ip6h_nxt; 8972 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8973 break; 8974 case IPPROTO_DSTOPTS: 8975 dsthdr = (ip6_dest_t *)whereptr; 8976 nexthdr = dsthdr->ip6d_nxt; 8977 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8978 break; 8979 case IPPROTO_ROUTING: 8980 rthdr = (ip6_rthdr0_t *)whereptr; 8981 nexthdr = rthdr->ip6r0_nxt; 8982 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 8983 8984 left = rthdr->ip6r0_segleft; 8985 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 8986 rv = *(ap + left - 1); 8987 /* 8988 * If the caller doesn't care whether the packet 8989 * is a fragment or not, we can stop here since 8990 * we have our destination. 8991 */ 8992 if (is_fragment == NULL) 8993 goto done; 8994 break; 8995 case IPPROTO_FRAGMENT: 8996 fraghdr = (ip6_frag_t *)whereptr; 8997 nexthdr = fraghdr->ip6f_nxt; 8998 ehdrlen = sizeof (ip6_frag_t); 8999 if (is_fragment != NULL) 9000 *is_fragment = B_TRUE; 9001 goto done; 9002 default : 9003 goto done; 9004 } 9005 whereptr += ehdrlen; 9006 } 9007 9008 done: 9009 return (rv); 9010 } 9011 9012 /* 9013 * ip_source_routed_v6: 9014 * This function is called by redirect code in ip_rput_data_v6 to 9015 * know whether this packet is source routed through this node i.e 9016 * whether this node (router) is part of the journey. This 9017 * function is called under two cases : 9018 * 9019 * case 1 : Routing header was processed by this node and 9020 * ip_process_rthdr replaced ip6_dst with the next hop 9021 * and we are forwarding the packet to the next hop. 9022 * 9023 * case 2 : Routing header was not processed by this node and we 9024 * are just forwarding the packet. 9025 * 9026 * For case (1) we don't want to send redirects. For case(2) we 9027 * want to send redirects. 9028 */ 9029 static boolean_t 9030 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 9031 { 9032 uint8_t nexthdr; 9033 in6_addr_t *addrptr; 9034 ip6_rthdr0_t *rthdr; 9035 uint8_t numaddr; 9036 ip6_hbh_t *hbhhdr; 9037 uint_t ehdrlen; 9038 uint8_t *byteptr; 9039 9040 ip2dbg(("ip_source_routed_v6\n")); 9041 nexthdr = ip6h->ip6_nxt; 9042 ehdrlen = IPV6_HDR_LEN; 9043 9044 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9045 while (nexthdr == IPPROTO_HOPOPTS || 9046 nexthdr == IPPROTO_DSTOPTS) { 9047 byteptr = (uint8_t *)ip6h + ehdrlen; 9048 /* 9049 * Check if we have already processed 9050 * packets or we are just a forwarding 9051 * router which only pulled up msgs up 9052 * to IPV6HDR and one HBH ext header 9053 */ 9054 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9055 ip2dbg(("ip_source_routed_v6: Extension" 9056 " headers not processed\n")); 9057 return (B_FALSE); 9058 } 9059 hbhhdr = (ip6_hbh_t *)byteptr; 9060 nexthdr = hbhhdr->ip6h_nxt; 9061 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9062 } 9063 switch (nexthdr) { 9064 case IPPROTO_ROUTING: 9065 byteptr = (uint8_t *)ip6h + ehdrlen; 9066 /* 9067 * If for some reason, we haven't pulled up 9068 * the routing hdr data mblk, then we must 9069 * not have processed it at all. So for sure 9070 * we are not part of the source routed journey. 9071 */ 9072 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9073 ip2dbg(("ip_source_routed_v6: Routing" 9074 " header not processed\n")); 9075 return (B_FALSE); 9076 } 9077 rthdr = (ip6_rthdr0_t *)byteptr; 9078 /* 9079 * Either we are an intermediate router or the 9080 * last hop before destination and we have 9081 * already processed the routing header. 9082 * If segment_left is greater than or equal to zero, 9083 * then we must be the (numaddr - segleft) entry 9084 * of the routing header. Although ip6r0_segleft 9085 * is a unit8_t variable, we still check for zero 9086 * or greater value, if in case the data type 9087 * is changed someday in future. 9088 */ 9089 if (rthdr->ip6r0_segleft > 0 || 9090 rthdr->ip6r0_segleft == 0) { 9091 ire_t *ire = NULL; 9092 9093 numaddr = rthdr->ip6r0_len / 2; 9094 addrptr = (in6_addr_t *)((char *)rthdr + 9095 sizeof (*rthdr)); 9096 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9097 if (addrptr != NULL) { 9098 ire = ire_ctable_lookup_v6(addrptr, NULL, 9099 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9100 MATCH_IRE_TYPE, 9101 ipst); 9102 if (ire != NULL) { 9103 ire_refrele(ire); 9104 return (B_TRUE); 9105 } 9106 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9107 } 9108 } 9109 /* FALLTHRU */ 9110 default: 9111 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9112 return (B_FALSE); 9113 } 9114 } 9115 9116 /* 9117 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9118 * Assumes that the following set of headers appear in the first 9119 * mblk: 9120 * ip6i_t (if present) CAN also appear as a separate mblk. 9121 * ip6_t 9122 * Any extension headers 9123 * TCP/UDP/SCTP header (if present) 9124 * The routine can handle an ICMPv6 header that is not in the first mblk. 9125 * 9126 * The order to determine the outgoing interface is as follows: 9127 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9128 * 2. If conn_nofailover_ill is set then use that ill. 9129 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9130 * 4. If q is an ill queue and (link local or multicast destination) then 9131 * use that ill. 9132 * 5. If IPV6_BOUND_IF has been set use that ill. 9133 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9134 * look for the best IRE match for the unspecified group to determine 9135 * the ill. 9136 * 7. For unicast: Just do an IRE lookup for the best match. 9137 * 9138 * arg2 is always a queue_t *. 9139 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9140 * the zoneid. 9141 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9142 */ 9143 void 9144 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9145 { 9146 conn_t *connp = NULL; 9147 queue_t *q = (queue_t *)arg2; 9148 ire_t *ire = NULL; 9149 ire_t *sctp_ire = NULL; 9150 ip6_t *ip6h; 9151 in6_addr_t *v6dstp; 9152 ill_t *ill = NULL; 9153 ipif_t *ipif; 9154 ip6i_t *ip6i; 9155 int cksum_request; /* -1 => normal. */ 9156 /* 1 => Skip TCP/UDP/SCTP checksum */ 9157 /* Otherwise contains insert offset for checksum */ 9158 int unspec_src; 9159 boolean_t do_outrequests; /* Increment OutRequests? */ 9160 mib2_ipIfStatsEntry_t *mibptr; 9161 int match_flags = MATCH_IRE_ILL_GROUP; 9162 boolean_t attach_if = B_FALSE; 9163 mblk_t *first_mp; 9164 boolean_t mctl_present; 9165 ipsec_out_t *io; 9166 boolean_t drop_if_delayed = B_FALSE; 9167 boolean_t multirt_need_resolve = B_FALSE; 9168 mblk_t *copy_mp = NULL; 9169 int err = 0; 9170 int ip6i_flags = 0; 9171 zoneid_t zoneid; 9172 ill_t *saved_ill = NULL; 9173 boolean_t conn_lock_held; 9174 boolean_t need_decref = B_FALSE; 9175 ip_stack_t *ipst; 9176 9177 if (q->q_next != NULL) { 9178 ill = (ill_t *)q->q_ptr; 9179 ipst = ill->ill_ipst; 9180 } else { 9181 connp = (conn_t *)arg; 9182 ASSERT(connp != NULL); 9183 ipst = connp->conn_netstack->netstack_ip; 9184 } 9185 9186 /* 9187 * Highest bit in version field is Reachability Confirmation bit 9188 * used by NUD in ip_xmit_v6(). 9189 */ 9190 #ifdef _BIG_ENDIAN 9191 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9192 #else 9193 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9194 #endif 9195 9196 /* 9197 * M_CTL comes from 6 places 9198 * 9199 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9200 * both V4 and V6 datagrams. 9201 * 9202 * 2) AH/ESP sends down M_CTL after doing their job with both 9203 * V4 and V6 datagrams. 9204 * 9205 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9206 * attached. 9207 * 9208 * 4) Notifications from an external resolver (for XRESOLV ifs) 9209 * 9210 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9211 * IPsec hardware acceleration support. 9212 * 9213 * 6) TUN_HELLO. 9214 * 9215 * We need to handle (1)'s IPv6 case and (3) here. For the 9216 * IPv4 case in (1), and (2), IPSEC processing has already 9217 * started. The code in ip_wput() already knows how to handle 9218 * continuing IPSEC processing (for IPv4 and IPv6). All other 9219 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9220 * for handling. 9221 */ 9222 first_mp = mp; 9223 mctl_present = B_FALSE; 9224 io = NULL; 9225 9226 /* Multidata transmit? */ 9227 if (DB_TYPE(mp) == M_MULTIDATA) { 9228 /* 9229 * We should never get here, since all Multidata messages 9230 * originating from tcp should have been directed over to 9231 * tcp_multisend() in the first place. 9232 */ 9233 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9234 freemsg(mp); 9235 return; 9236 } else if (DB_TYPE(mp) == M_CTL) { 9237 uint32_t mctltype = 0; 9238 uint32_t mlen = MBLKL(first_mp); 9239 9240 mp = mp->b_cont; 9241 mctl_present = B_TRUE; 9242 io = (ipsec_out_t *)first_mp->b_rptr; 9243 9244 /* 9245 * Validate this M_CTL message. The only three types of 9246 * M_CTL messages we expect to see in this code path are 9247 * ipsec_out_t or ipsec_in_t structures (allocated as 9248 * ipsec_info_t unions), or ipsec_ctl_t structures. 9249 * The ipsec_out_type and ipsec_in_type overlap in the two 9250 * data structures, and they are either set to IPSEC_OUT 9251 * or IPSEC_IN depending on which data structure it is. 9252 * ipsec_ctl_t is an IPSEC_CTL. 9253 * 9254 * All other M_CTL messages are sent to ip_wput_nondata() 9255 * for handling. 9256 */ 9257 if (mlen >= sizeof (io->ipsec_out_type)) 9258 mctltype = io->ipsec_out_type; 9259 9260 if ((mlen == sizeof (ipsec_ctl_t)) && 9261 (mctltype == IPSEC_CTL)) { 9262 ip_output(arg, first_mp, arg2, caller); 9263 return; 9264 } 9265 9266 if ((mlen < sizeof (ipsec_info_t)) || 9267 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9268 mp == NULL) { 9269 ip_wput_nondata(NULL, q, first_mp, NULL); 9270 return; 9271 } 9272 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9273 if (q->q_next == NULL) { 9274 ip6h = (ip6_t *)mp->b_rptr; 9275 /* 9276 * For a freshly-generated TCP dgram that needs IPV6 9277 * processing, don't call ip_wput immediately. We can 9278 * tell this by the ipsec_out_proc_begin. In-progress 9279 * IPSEC_OUT messages have proc_begin set to TRUE, 9280 * and we want to send all IPSEC_IN messages to 9281 * ip_wput() for IPsec processing or finishing. 9282 */ 9283 if (mctltype == IPSEC_IN || 9284 IPVER(ip6h) != IPV6_VERSION || 9285 io->ipsec_out_proc_begin) { 9286 mibptr = &ipst->ips_ip6_mib; 9287 goto notv6; 9288 } 9289 } 9290 } else if (DB_TYPE(mp) != M_DATA) { 9291 ip_wput_nondata(NULL, q, mp, NULL); 9292 return; 9293 } 9294 9295 ip6h = (ip6_t *)mp->b_rptr; 9296 9297 if (IPVER(ip6h) != IPV6_VERSION) { 9298 mibptr = &ipst->ips_ip6_mib; 9299 goto notv6; 9300 } 9301 9302 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9303 (connp == NULL || !connp->conn_ulp_labeled)) { 9304 if (connp != NULL) { 9305 ASSERT(CONN_CRED(connp) != NULL); 9306 err = tsol_check_label_v6(BEST_CRED(mp, connp), 9307 &mp, connp->conn_mac_exempt, ipst); 9308 } else if (DB_CRED(mp) != NULL) { 9309 err = tsol_check_label_v6(DB_CRED(mp), 9310 &mp, B_FALSE, ipst); 9311 } 9312 if (mctl_present) 9313 first_mp->b_cont = mp; 9314 else 9315 first_mp = mp; 9316 if (err != 0) { 9317 DTRACE_PROBE3( 9318 tsol_ip_log_drop_checklabel_ip6, char *, 9319 "conn(1), failed to check/update mp(2)", 9320 conn_t, connp, mblk_t, mp); 9321 freemsg(first_mp); 9322 return; 9323 } 9324 ip6h = (ip6_t *)mp->b_rptr; 9325 } 9326 if (q->q_next != NULL) { 9327 /* 9328 * We don't know if this ill will be used for IPv6 9329 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9330 * ipif_set_values() sets the ill_isv6 flag to true if 9331 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9332 * just drop the packet. 9333 */ 9334 if (!ill->ill_isv6) { 9335 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9336 "ILLF_IPV6 was set\n")); 9337 freemsg(first_mp); 9338 return; 9339 } 9340 /* For uniformity do a refhold */ 9341 mutex_enter(&ill->ill_lock); 9342 if (!ILL_CAN_LOOKUP(ill)) { 9343 mutex_exit(&ill->ill_lock); 9344 freemsg(first_mp); 9345 return; 9346 } 9347 ill_refhold_locked(ill); 9348 mutex_exit(&ill->ill_lock); 9349 mibptr = ill->ill_ip_mib; 9350 9351 ASSERT(mibptr != NULL); 9352 unspec_src = 0; 9353 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9354 do_outrequests = B_FALSE; 9355 zoneid = (zoneid_t)(uintptr_t)arg; 9356 } else { 9357 ASSERT(connp != NULL); 9358 zoneid = connp->conn_zoneid; 9359 9360 /* is queue flow controlled? */ 9361 if ((q->q_first || connp->conn_draining) && 9362 (caller == IP_WPUT)) { 9363 /* 9364 * 1) TCP sends down M_CTL for detached connections. 9365 * 2) AH/ESP sends down M_CTL. 9366 * 9367 * We don't flow control either of the above. Only 9368 * UDP and others are flow controlled for which we 9369 * can't have a M_CTL. 9370 */ 9371 ASSERT(first_mp == mp); 9372 (void) putq(q, mp); 9373 return; 9374 } 9375 mibptr = &ipst->ips_ip6_mib; 9376 unspec_src = connp->conn_unspec_src; 9377 do_outrequests = B_TRUE; 9378 if (mp->b_flag & MSGHASREF) { 9379 mp->b_flag &= ~MSGHASREF; 9380 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9381 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9382 need_decref = B_TRUE; 9383 } 9384 9385 /* 9386 * If there is a policy, try to attach an ipsec_out in 9387 * the front. At the end, first_mp either points to a 9388 * M_DATA message or IPSEC_OUT message linked to a 9389 * M_DATA message. We have to do it now as we might 9390 * lose the "conn" if we go through ip_newroute. 9391 */ 9392 if (!mctl_present && 9393 (connp->conn_out_enforce_policy || 9394 connp->conn_latch != NULL)) { 9395 ASSERT(first_mp == mp); 9396 /* XXX Any better way to get the protocol fast ? */ 9397 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9398 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9399 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9400 if (need_decref) 9401 CONN_DEC_REF(connp); 9402 return; 9403 } else { 9404 ASSERT(mp->b_datap->db_type == M_CTL); 9405 first_mp = mp; 9406 mp = mp->b_cont; 9407 mctl_present = B_TRUE; 9408 io = (ipsec_out_t *)first_mp->b_rptr; 9409 } 9410 } 9411 } 9412 9413 /* check for alignment and full IPv6 header */ 9414 if (!OK_32PTR((uchar_t *)ip6h) || 9415 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9416 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9417 if (do_outrequests) 9418 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9419 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9420 freemsg(first_mp); 9421 if (ill != NULL) 9422 ill_refrele(ill); 9423 if (need_decref) 9424 CONN_DEC_REF(connp); 9425 return; 9426 } 9427 v6dstp = &ip6h->ip6_dst; 9428 cksum_request = -1; 9429 ip6i = NULL; 9430 9431 /* 9432 * Once neighbor discovery has completed, ndp_process() will provide 9433 * locally generated packets for which processing can be reattempted. 9434 * In these cases, connp is NULL and the original zone is part of a 9435 * prepended ipsec_out_t. 9436 */ 9437 if (io != NULL) { 9438 /* 9439 * When coming from icmp_input_v6, the zoneid might not match 9440 * for the loopback case, because inside icmp_input_v6 the 9441 * queue_t is a conn queue from the sending side. 9442 */ 9443 zoneid = io->ipsec_out_zoneid; 9444 ASSERT(zoneid != ALL_ZONES); 9445 } 9446 9447 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9448 /* 9449 * This is an ip6i_t header followed by an ip6_hdr. 9450 * Check which fields are set. 9451 * 9452 * When the packet comes from a transport we should have 9453 * all needed headers in the first mblk. However, when 9454 * going through ip_newroute*_v6 the ip6i might be in 9455 * a separate mblk when we return here. In that case 9456 * we pullup everything to ensure that extension and transport 9457 * headers "stay" in the first mblk. 9458 */ 9459 ip6i = (ip6i_t *)ip6h; 9460 ip6i_flags = ip6i->ip6i_flags; 9461 9462 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9463 ((mp->b_wptr - (uchar_t *)ip6i) >= 9464 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9465 9466 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9467 if (!pullupmsg(mp, -1)) { 9468 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9469 if (do_outrequests) { 9470 BUMP_MIB(mibptr, 9471 ipIfStatsHCOutRequests); 9472 } 9473 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9474 freemsg(first_mp); 9475 if (ill != NULL) 9476 ill_refrele(ill); 9477 if (need_decref) 9478 CONN_DEC_REF(connp); 9479 return; 9480 } 9481 ip6h = (ip6_t *)mp->b_rptr; 9482 v6dstp = &ip6h->ip6_dst; 9483 ip6i = (ip6i_t *)ip6h; 9484 } 9485 ip6h = (ip6_t *)&ip6i[1]; 9486 9487 /* 9488 * Advance rptr past the ip6i_t to get ready for 9489 * transmitting the packet. However, if the packet gets 9490 * passed to ip_newroute*_v6 then rptr is moved back so 9491 * that the ip6i_t header can be inspected when the 9492 * packet comes back here after passing through 9493 * ire_add_then_send. 9494 */ 9495 mp->b_rptr = (uchar_t *)ip6h; 9496 9497 /* 9498 * IP6I_ATTACH_IF is set in this function when we had a 9499 * conn and it was either bound to the IPFF_NOFAILOVER address 9500 * or IPV6_BOUND_PIF was set. These options override other 9501 * options that set the ifindex. We come here with 9502 * IP6I_ATTACH_IF set when we can't find the ire and 9503 * ip_newroute_v6 is feeding the packet for second time. 9504 */ 9505 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9506 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9507 ASSERT(ip6i->ip6i_ifindex != 0); 9508 if (ill != NULL) 9509 ill_refrele(ill); 9510 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9511 NULL, NULL, NULL, NULL, ipst); 9512 if (ill == NULL) { 9513 if (do_outrequests) { 9514 BUMP_MIB(mibptr, 9515 ipIfStatsHCOutRequests); 9516 } 9517 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9518 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9519 ip6i->ip6i_ifindex)); 9520 if (need_decref) 9521 CONN_DEC_REF(connp); 9522 freemsg(first_mp); 9523 return; 9524 } 9525 mibptr = ill->ill_ip_mib; 9526 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9527 /* 9528 * Preserve the index so that when we return 9529 * from IPSEC processing, we know where to 9530 * send the packet. 9531 */ 9532 if (mctl_present) { 9533 ASSERT(io != NULL); 9534 io->ipsec_out_ill_index = 9535 ip6i->ip6i_ifindex; 9536 } 9537 } 9538 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9539 /* 9540 * This is a multipathing probe packet that has 9541 * been delayed in ND resolution. Drop the 9542 * packet for the reasons mentioned in 9543 * nce_queue_mp() 9544 */ 9545 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9546 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9547 freemsg(first_mp); 9548 ill_refrele(ill); 9549 if (need_decref) 9550 CONN_DEC_REF(connp); 9551 return; 9552 } 9553 } 9554 } 9555 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9556 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9557 9558 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9559 if (secpolicy_net_rawaccess(cr) != 0) { 9560 /* 9561 * Use IPCL_ZONEID to honor SO_ALLZONES. 9562 */ 9563 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9564 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9565 NULL, connp != NULL ? 9566 IPCL_ZONEID(connp) : zoneid, NULL, 9567 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9568 if (ire == NULL) { 9569 if (do_outrequests) 9570 BUMP_MIB(mibptr, 9571 ipIfStatsHCOutRequests); 9572 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9573 ip1dbg(("ip_wput_v6: bad source " 9574 "addr\n")); 9575 freemsg(first_mp); 9576 if (ill != NULL) 9577 ill_refrele(ill); 9578 if (need_decref) 9579 CONN_DEC_REF(connp); 9580 return; 9581 } 9582 ire_refrele(ire); 9583 } 9584 /* No need to verify again when using ip_newroute */ 9585 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9586 } 9587 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9588 /* 9589 * Make sure they match since ip_newroute*_v6 etc might 9590 * (unknown to them) inspect ip6i_nexthop when 9591 * they think they access ip6_dst. 9592 */ 9593 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9594 } 9595 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9596 cksum_request = 1; 9597 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9598 cksum_request = ip6i->ip6i_checksum_off; 9599 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9600 unspec_src = 1; 9601 9602 if (do_outrequests && ill != NULL) { 9603 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9604 do_outrequests = B_FALSE; 9605 } 9606 /* 9607 * Store ip6i_t info that we need after we come back 9608 * from IPSEC processing. 9609 */ 9610 if (mctl_present) { 9611 ASSERT(io != NULL); 9612 io->ipsec_out_unspec_src = unspec_src; 9613 } 9614 } 9615 if (connp != NULL && connp->conn_dontroute) 9616 ip6h->ip6_hops = 1; 9617 9618 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9619 goto ipv6multicast; 9620 9621 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9622 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9623 ill_t *conn_outgoing_pill; 9624 9625 conn_outgoing_pill = conn_get_held_ill(connp, 9626 &connp->conn_outgoing_pill, &err); 9627 if (err == ILL_LOOKUP_FAILED) { 9628 if (ill != NULL) 9629 ill_refrele(ill); 9630 if (need_decref) 9631 CONN_DEC_REF(connp); 9632 freemsg(first_mp); 9633 return; 9634 } 9635 if (conn_outgoing_pill != NULL) { 9636 if (ill != NULL) 9637 ill_refrele(ill); 9638 ill = conn_outgoing_pill; 9639 attach_if = B_TRUE; 9640 match_flags = MATCH_IRE_ILL; 9641 mibptr = ill->ill_ip_mib; 9642 9643 /* 9644 * Check if we need an ire that will not be 9645 * looked up by anybody else i.e. HIDDEN. 9646 */ 9647 if (ill_is_probeonly(ill)) 9648 match_flags |= MATCH_IRE_MARK_HIDDEN; 9649 goto send_from_ill; 9650 } 9651 } 9652 9653 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9654 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9655 ill_t *conn_nofailover_ill; 9656 9657 conn_nofailover_ill = conn_get_held_ill(connp, 9658 &connp->conn_nofailover_ill, &err); 9659 if (err == ILL_LOOKUP_FAILED) { 9660 if (ill != NULL) 9661 ill_refrele(ill); 9662 if (need_decref) 9663 CONN_DEC_REF(connp); 9664 freemsg(first_mp); 9665 return; 9666 } 9667 if (conn_nofailover_ill != NULL) { 9668 if (ill != NULL) 9669 ill_refrele(ill); 9670 ill = conn_nofailover_ill; 9671 attach_if = B_TRUE; 9672 /* 9673 * Assumes that ipc_nofailover_ill is used only for 9674 * multipathing probe packets. These packets are better 9675 * dropped, if they are delayed in ND resolution, for 9676 * the reasons described in nce_queue_mp(). 9677 * IP6I_DROP_IFDELAYED will be set later on in this 9678 * function for this packet. 9679 */ 9680 drop_if_delayed = B_TRUE; 9681 match_flags = MATCH_IRE_ILL; 9682 mibptr = ill->ill_ip_mib; 9683 9684 /* 9685 * Check if we need an ire that will not be 9686 * looked up by anybody else i.e. HIDDEN. 9687 */ 9688 if (ill_is_probeonly(ill)) 9689 match_flags |= MATCH_IRE_MARK_HIDDEN; 9690 goto send_from_ill; 9691 } 9692 } 9693 9694 /* 9695 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9696 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9697 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9698 */ 9699 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9700 ASSERT(ip6i->ip6i_ifindex != 0); 9701 attach_if = B_TRUE; 9702 ASSERT(ill != NULL); 9703 match_flags = MATCH_IRE_ILL; 9704 9705 /* 9706 * Check if we need an ire that will not be 9707 * looked up by anybody else i.e. HIDDEN. 9708 */ 9709 if (ill_is_probeonly(ill)) 9710 match_flags |= MATCH_IRE_MARK_HIDDEN; 9711 goto send_from_ill; 9712 } 9713 9714 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9715 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9716 ASSERT(ill != NULL); 9717 goto send_from_ill; 9718 } 9719 9720 /* 9721 * 4. If q is an ill queue and (link local or multicast destination) 9722 * then use that ill. 9723 */ 9724 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9725 goto send_from_ill; 9726 } 9727 9728 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9729 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9730 ill_t *conn_outgoing_ill; 9731 9732 conn_outgoing_ill = conn_get_held_ill(connp, 9733 &connp->conn_outgoing_ill, &err); 9734 if (err == ILL_LOOKUP_FAILED) { 9735 if (ill != NULL) 9736 ill_refrele(ill); 9737 if (need_decref) 9738 CONN_DEC_REF(connp); 9739 freemsg(first_mp); 9740 return; 9741 } 9742 if (ill != NULL) 9743 ill_refrele(ill); 9744 ill = conn_outgoing_ill; 9745 mibptr = ill->ill_ip_mib; 9746 goto send_from_ill; 9747 } 9748 9749 /* 9750 * 6. For unicast: Just do an IRE lookup for the best match. 9751 * If we get here for a link-local address it is rather random 9752 * what interface we pick on a multihomed host. 9753 * *If* there is an IRE_CACHE (and the link-local address 9754 * isn't duplicated on multi links) this will find the IRE_CACHE. 9755 * Otherwise it will use one of the matching IRE_INTERFACE routes 9756 * for the link-local prefix. Hence, applications 9757 * *should* be encouraged to specify an outgoing interface when sending 9758 * to a link local address. 9759 */ 9760 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9761 !connp->conn_fully_bound)) { 9762 /* 9763 * We cache IRE_CACHEs to avoid lookups. We don't do 9764 * this for the tcp global queue and listen end point 9765 * as it does not really have a real destination to 9766 * talk to. 9767 */ 9768 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp), 9769 ipst); 9770 } else { 9771 /* 9772 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9773 * grab a lock here to check for CONDEMNED as it is okay 9774 * to send a packet or two with the IRE_CACHE that is going 9775 * away. 9776 */ 9777 mutex_enter(&connp->conn_lock); 9778 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9779 if (ire != NULL && 9780 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9781 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9782 9783 IRE_REFHOLD(ire); 9784 mutex_exit(&connp->conn_lock); 9785 9786 } else { 9787 boolean_t cached = B_FALSE; 9788 9789 connp->conn_ire_cache = NULL; 9790 mutex_exit(&connp->conn_lock); 9791 /* Release the old ire */ 9792 if (ire != NULL && sctp_ire == NULL) 9793 IRE_REFRELE_NOTR(ire); 9794 9795 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9796 MBLK_GETLABEL(mp), ipst); 9797 if (ire != NULL) { 9798 IRE_REFHOLD_NOTR(ire); 9799 9800 mutex_enter(&connp->conn_lock); 9801 if (CONN_CACHE_IRE(connp) && 9802 (connp->conn_ire_cache == NULL)) { 9803 rw_enter(&ire->ire_bucket->irb_lock, 9804 RW_READER); 9805 if (!(ire->ire_marks & 9806 IRE_MARK_CONDEMNED)) { 9807 connp->conn_ire_cache = ire; 9808 cached = B_TRUE; 9809 } 9810 rw_exit(&ire->ire_bucket->irb_lock); 9811 } 9812 mutex_exit(&connp->conn_lock); 9813 9814 /* 9815 * We can continue to use the ire but since it 9816 * was not cached, we should drop the extra 9817 * reference. 9818 */ 9819 if (!cached) 9820 IRE_REFRELE_NOTR(ire); 9821 } 9822 } 9823 } 9824 9825 if (ire != NULL) { 9826 if (do_outrequests) { 9827 /* Handle IRE_LOCAL's that might appear here */ 9828 if (ire->ire_type == IRE_CACHE) { 9829 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9830 ill_ip_mib; 9831 } else { 9832 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9833 } 9834 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9835 } 9836 ASSERT(!attach_if); 9837 9838 /* 9839 * Check if the ire has the RTF_MULTIRT flag, inherited 9840 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9841 */ 9842 if (ire->ire_flags & RTF_MULTIRT) { 9843 /* 9844 * Force hop limit of multirouted packets if required. 9845 * The hop limit of such packets is bounded by the 9846 * ip_multirt_ttl ndd variable. 9847 * NDP packets must have a hop limit of 255; don't 9848 * change the hop limit in that case. 9849 */ 9850 if ((ipst->ips_ip_multirt_ttl > 0) && 9851 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9852 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9853 if (ip_debug > 3) { 9854 ip2dbg(("ip_wput_v6: forcing multirt " 9855 "hop limit to %d (was %d) ", 9856 ipst->ips_ip_multirt_ttl, 9857 ip6h->ip6_hops)); 9858 pr_addr_dbg("v6dst %s\n", AF_INET6, 9859 &ire->ire_addr_v6); 9860 } 9861 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9862 } 9863 9864 /* 9865 * We look at this point if there are pending 9866 * unresolved routes. ire_multirt_need_resolve_v6() 9867 * checks in O(n) that all IRE_OFFSUBNET ire 9868 * entries for the packet's destination and 9869 * flagged RTF_MULTIRT are currently resolved. 9870 * If some remain unresolved, we do a copy 9871 * of the current message. It will be used 9872 * to initiate additional route resolutions. 9873 */ 9874 multirt_need_resolve = 9875 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9876 MBLK_GETLABEL(first_mp), ipst); 9877 ip2dbg(("ip_wput_v6: ire %p, " 9878 "multirt_need_resolve %d, first_mp %p\n", 9879 (void *)ire, multirt_need_resolve, 9880 (void *)first_mp)); 9881 if (multirt_need_resolve) { 9882 copy_mp = copymsg(first_mp); 9883 if (copy_mp != NULL) { 9884 MULTIRT_DEBUG_TAG(copy_mp); 9885 } 9886 } 9887 } 9888 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9889 connp, caller, 0, ip6i_flags, zoneid); 9890 if (need_decref) { 9891 CONN_DEC_REF(connp); 9892 connp = NULL; 9893 } 9894 IRE_REFRELE(ire); 9895 9896 /* 9897 * Try to resolve another multiroute if 9898 * ire_multirt_need_resolve_v6() deemed it necessary. 9899 * copy_mp will be consumed (sent or freed) by 9900 * ip_newroute_v6(). 9901 */ 9902 if (copy_mp != NULL) { 9903 if (mctl_present) { 9904 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9905 } else { 9906 ip6h = (ip6_t *)copy_mp->b_rptr; 9907 } 9908 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9909 &ip6h->ip6_src, NULL, zoneid, ipst); 9910 } 9911 if (ill != NULL) 9912 ill_refrele(ill); 9913 return; 9914 } 9915 9916 /* 9917 * No full IRE for this destination. Send it to 9918 * ip_newroute_v6 to see if anything else matches. 9919 * Mark this packet as having originated on this 9920 * machine. 9921 * Update rptr if there was an ip6i_t header. 9922 */ 9923 mp->b_prev = NULL; 9924 mp->b_next = NULL; 9925 if (ip6i != NULL) 9926 mp->b_rptr -= sizeof (ip6i_t); 9927 9928 if (unspec_src) { 9929 if (ip6i == NULL) { 9930 /* 9931 * Add ip6i_t header to carry unspec_src 9932 * until the packet comes back in ip_wput_v6. 9933 */ 9934 mp = ip_add_info_v6(mp, NULL, v6dstp); 9935 if (mp == NULL) { 9936 if (do_outrequests) 9937 BUMP_MIB(mibptr, 9938 ipIfStatsHCOutRequests); 9939 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9940 if (mctl_present) 9941 freeb(first_mp); 9942 if (ill != NULL) 9943 ill_refrele(ill); 9944 if (need_decref) 9945 CONN_DEC_REF(connp); 9946 return; 9947 } 9948 ip6i = (ip6i_t *)mp->b_rptr; 9949 9950 if (mctl_present) { 9951 ASSERT(first_mp != mp); 9952 first_mp->b_cont = mp; 9953 } else { 9954 first_mp = mp; 9955 } 9956 9957 if ((mp->b_wptr - (uchar_t *)ip6i) == 9958 sizeof (ip6i_t)) { 9959 /* 9960 * ndp_resolver called from ip_newroute_v6 9961 * expects pulled up message. 9962 */ 9963 if (!pullupmsg(mp, -1)) { 9964 ip1dbg(("ip_wput_v6: pullupmsg" 9965 " failed\n")); 9966 if (do_outrequests) { 9967 BUMP_MIB(mibptr, 9968 ipIfStatsHCOutRequests); 9969 } 9970 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9971 freemsg(first_mp); 9972 if (ill != NULL) 9973 ill_refrele(ill); 9974 if (need_decref) 9975 CONN_DEC_REF(connp); 9976 return; 9977 } 9978 ip6i = (ip6i_t *)mp->b_rptr; 9979 } 9980 ip6h = (ip6_t *)&ip6i[1]; 9981 v6dstp = &ip6h->ip6_dst; 9982 } 9983 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9984 if (mctl_present) { 9985 ASSERT(io != NULL); 9986 io->ipsec_out_unspec_src = unspec_src; 9987 } 9988 } 9989 if (do_outrequests) 9990 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9991 if (need_decref) 9992 CONN_DEC_REF(connp); 9993 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 9994 if (ill != NULL) 9995 ill_refrele(ill); 9996 return; 9997 9998 9999 /* 10000 * Handle multicast packets with or without an conn. 10001 * Assumes that the transports set ip6_hops taking 10002 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10003 * into account. 10004 */ 10005 ipv6multicast: 10006 ip2dbg(("ip_wput_v6: multicast\n")); 10007 10008 /* 10009 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10010 * 2. If conn_nofailover_ill is set then use that ill. 10011 * 10012 * Hold the conn_lock till we refhold the ill of interest that is 10013 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10014 * while holding any locks, postpone the refrele until after the 10015 * conn_lock is dropped. 10016 */ 10017 if (connp != NULL) { 10018 mutex_enter(&connp->conn_lock); 10019 conn_lock_held = B_TRUE; 10020 } else { 10021 conn_lock_held = B_FALSE; 10022 } 10023 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10024 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10025 if (err == ILL_LOOKUP_FAILED) { 10026 ip1dbg(("ip_output_v6: multicast" 10027 " conn_outgoing_pill no ipif\n")); 10028 multicast_discard: 10029 ASSERT(saved_ill == NULL); 10030 if (conn_lock_held) 10031 mutex_exit(&connp->conn_lock); 10032 if (ill != NULL) 10033 ill_refrele(ill); 10034 freemsg(first_mp); 10035 if (do_outrequests) 10036 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10037 if (need_decref) 10038 CONN_DEC_REF(connp); 10039 return; 10040 } 10041 saved_ill = ill; 10042 ill = connp->conn_outgoing_pill; 10043 attach_if = B_TRUE; 10044 match_flags = MATCH_IRE_ILL; 10045 mibptr = ill->ill_ip_mib; 10046 10047 /* 10048 * Check if we need an ire that will not be 10049 * looked up by anybody else i.e. HIDDEN. 10050 */ 10051 if (ill_is_probeonly(ill)) 10052 match_flags |= MATCH_IRE_MARK_HIDDEN; 10053 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10054 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10055 if (err == ILL_LOOKUP_FAILED) { 10056 ip1dbg(("ip_output_v6: multicast" 10057 " conn_nofailover_ill no ipif\n")); 10058 goto multicast_discard; 10059 } 10060 saved_ill = ill; 10061 ill = connp->conn_nofailover_ill; 10062 attach_if = B_TRUE; 10063 match_flags = MATCH_IRE_ILL; 10064 10065 /* 10066 * Check if we need an ire that will not be 10067 * looked up by anybody else i.e. HIDDEN. 10068 */ 10069 if (ill_is_probeonly(ill)) 10070 match_flags |= MATCH_IRE_MARK_HIDDEN; 10071 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10072 /* 10073 * Redo 1. If we did not find an IRE_CACHE the first time, 10074 * we should have an ip6i_t with IP6I_ATTACH_IF if 10075 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10076 * used on this endpoint. 10077 */ 10078 ASSERT(ip6i->ip6i_ifindex != 0); 10079 attach_if = B_TRUE; 10080 ASSERT(ill != NULL); 10081 match_flags = MATCH_IRE_ILL; 10082 10083 /* 10084 * Check if we need an ire that will not be 10085 * looked up by anybody else i.e. HIDDEN. 10086 */ 10087 if (ill_is_probeonly(ill)) 10088 match_flags |= MATCH_IRE_MARK_HIDDEN; 10089 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10090 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10091 10092 ASSERT(ill != NULL); 10093 } else if (ill != NULL) { 10094 /* 10095 * 4. If q is an ill queue and (link local or multicast 10096 * destination) then use that ill. 10097 * We don't need the ipif initialization here. 10098 * This useless assert below is just to prevent lint from 10099 * reporting a null body if statement. 10100 */ 10101 ASSERT(ill != NULL); 10102 } else if (connp != NULL) { 10103 /* 10104 * 5. If IPV6_BOUND_IF has been set use that ill. 10105 * 10106 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10107 * Otherwise look for the best IRE match for the unspecified 10108 * group to determine the ill. 10109 * 10110 * conn_multicast_ill is used for only IPv6 packets. 10111 * conn_multicast_ipif is used for only IPv4 packets. 10112 * Thus a PF_INET6 socket send both IPv4 and IPv6 10113 * multicast packets using different IP*_MULTICAST_IF 10114 * interfaces. 10115 */ 10116 if (connp->conn_outgoing_ill != NULL) { 10117 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10118 if (err == ILL_LOOKUP_FAILED) { 10119 ip1dbg(("ip_output_v6: multicast" 10120 " conn_outgoing_ill no ipif\n")); 10121 goto multicast_discard; 10122 } 10123 ill = connp->conn_outgoing_ill; 10124 } else if (connp->conn_multicast_ill != NULL) { 10125 err = ill_check_and_refhold(connp->conn_multicast_ill); 10126 if (err == ILL_LOOKUP_FAILED) { 10127 ip1dbg(("ip_output_v6: multicast" 10128 " conn_multicast_ill no ipif\n")); 10129 goto multicast_discard; 10130 } 10131 ill = connp->conn_multicast_ill; 10132 } else { 10133 mutex_exit(&connp->conn_lock); 10134 conn_lock_held = B_FALSE; 10135 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 10136 if (ipif == NULL) { 10137 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10138 goto multicast_discard; 10139 } 10140 /* 10141 * We have a ref to this ipif, so we can safely 10142 * access ipif_ill. 10143 */ 10144 ill = ipif->ipif_ill; 10145 mutex_enter(&ill->ill_lock); 10146 if (!ILL_CAN_LOOKUP(ill)) { 10147 mutex_exit(&ill->ill_lock); 10148 ipif_refrele(ipif); 10149 ill = NULL; 10150 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10151 goto multicast_discard; 10152 } 10153 ill_refhold_locked(ill); 10154 mutex_exit(&ill->ill_lock); 10155 ipif_refrele(ipif); 10156 /* 10157 * Save binding until IPV6_MULTICAST_IF 10158 * changes it 10159 */ 10160 mutex_enter(&connp->conn_lock); 10161 connp->conn_multicast_ill = ill; 10162 connp->conn_orig_multicast_ifindex = 10163 ill->ill_phyint->phyint_ifindex; 10164 mutex_exit(&connp->conn_lock); 10165 } 10166 } 10167 if (conn_lock_held) 10168 mutex_exit(&connp->conn_lock); 10169 10170 if (saved_ill != NULL) 10171 ill_refrele(saved_ill); 10172 10173 ASSERT(ill != NULL); 10174 /* 10175 * For multicast loopback interfaces replace the multicast address 10176 * with a unicast address for the ire lookup. 10177 */ 10178 if (IS_LOOPBACK(ill)) 10179 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10180 10181 mibptr = ill->ill_ip_mib; 10182 if (do_outrequests) { 10183 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10184 do_outrequests = B_FALSE; 10185 } 10186 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10187 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10188 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10189 10190 /* 10191 * As we may lose the conn by the time we reach ip_wput_ire_v6 10192 * we copy conn_multicast_loop and conn_dontroute on to an 10193 * ipsec_out. In case if this datagram goes out secure, 10194 * we need the ill_index also. Copy that also into the 10195 * ipsec_out. 10196 */ 10197 if (mctl_present) { 10198 io = (ipsec_out_t *)first_mp->b_rptr; 10199 ASSERT(first_mp->b_datap->db_type == M_CTL); 10200 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10201 } else { 10202 ASSERT(mp == first_mp); 10203 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 10204 NULL) { 10205 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10206 freemsg(mp); 10207 if (ill != NULL) 10208 ill_refrele(ill); 10209 if (need_decref) 10210 CONN_DEC_REF(connp); 10211 return; 10212 } 10213 io = (ipsec_out_t *)first_mp->b_rptr; 10214 /* This is not a secure packet */ 10215 io->ipsec_out_secure = B_FALSE; 10216 io->ipsec_out_use_global_policy = B_TRUE; 10217 io->ipsec_out_zoneid = 10218 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10219 first_mp->b_cont = mp; 10220 mctl_present = B_TRUE; 10221 } 10222 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10223 io->ipsec_out_unspec_src = unspec_src; 10224 if (connp != NULL) 10225 io->ipsec_out_dontroute = connp->conn_dontroute; 10226 10227 send_from_ill: 10228 ASSERT(ill != NULL); 10229 ASSERT(mibptr == ill->ill_ip_mib); 10230 if (do_outrequests) { 10231 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10232 do_outrequests = B_FALSE; 10233 } 10234 10235 if (io != NULL) 10236 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10237 10238 /* 10239 * When a specific ill is specified (using IPV6_PKTINFO, 10240 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10241 * on routing entries (ftable and ctable) that have a matching 10242 * ire->ire_ipif->ipif_ill. Thus this can only be used 10243 * for destinations that are on-link for the specific ill 10244 * and that can appear on multiple links. Thus it is useful 10245 * for multicast destinations, link-local destinations, and 10246 * at some point perhaps for site-local destinations (if the 10247 * node sits at a site boundary). 10248 * We create the cache entries in the regular ctable since 10249 * it can not "confuse" things for other destinations. 10250 * table. 10251 * 10252 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10253 * It is used only when ire_cache_lookup is used above. 10254 */ 10255 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10256 zoneid, MBLK_GETLABEL(mp), match_flags, ipst); 10257 if (ire != NULL) { 10258 /* 10259 * Check if the ire has the RTF_MULTIRT flag, inherited 10260 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10261 */ 10262 if (ire->ire_flags & RTF_MULTIRT) { 10263 /* 10264 * Force hop limit of multirouted packets if required. 10265 * The hop limit of such packets is bounded by the 10266 * ip_multirt_ttl ndd variable. 10267 * NDP packets must have a hop limit of 255; don't 10268 * change the hop limit in that case. 10269 */ 10270 if ((ipst->ips_ip_multirt_ttl > 0) && 10271 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10272 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10273 if (ip_debug > 3) { 10274 ip2dbg(("ip_wput_v6: forcing multirt " 10275 "hop limit to %d (was %d) ", 10276 ipst->ips_ip_multirt_ttl, 10277 ip6h->ip6_hops)); 10278 pr_addr_dbg("v6dst %s\n", AF_INET6, 10279 &ire->ire_addr_v6); 10280 } 10281 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10282 } 10283 10284 /* 10285 * We look at this point if there are pending 10286 * unresolved routes. ire_multirt_need_resolve_v6() 10287 * checks in O(n) that all IRE_OFFSUBNET ire 10288 * entries for the packet's destination and 10289 * flagged RTF_MULTIRT are currently resolved. 10290 * If some remain unresolved, we make a copy 10291 * of the current message. It will be used 10292 * to initiate additional route resolutions. 10293 */ 10294 multirt_need_resolve = 10295 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10296 MBLK_GETLABEL(first_mp), ipst); 10297 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10298 "multirt_need_resolve %d, first_mp %p\n", 10299 (void *)ire, multirt_need_resolve, 10300 (void *)first_mp)); 10301 if (multirt_need_resolve) { 10302 copy_mp = copymsg(first_mp); 10303 if (copy_mp != NULL) { 10304 MULTIRT_DEBUG_TAG(copy_mp); 10305 } 10306 } 10307 } 10308 10309 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10310 ill->ill_name, (void *)ire, 10311 ill->ill_phyint->phyint_ifindex)); 10312 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10313 connp, caller, 10314 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10315 ip6i_flags, zoneid); 10316 ire_refrele(ire); 10317 if (need_decref) { 10318 CONN_DEC_REF(connp); 10319 connp = NULL; 10320 } 10321 10322 /* 10323 * Try to resolve another multiroute if 10324 * ire_multirt_need_resolve_v6() deemed it necessary. 10325 * copy_mp will be consumed (sent or freed) by 10326 * ip_newroute_[ipif_]v6(). 10327 */ 10328 if (copy_mp != NULL) { 10329 if (mctl_present) { 10330 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10331 } else { 10332 ip6h = (ip6_t *)copy_mp->b_rptr; 10333 } 10334 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10335 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10336 zoneid, ipst); 10337 if (ipif == NULL) { 10338 ip1dbg(("ip_wput_v6: No ipif for " 10339 "multicast\n")); 10340 MULTIRT_DEBUG_UNTAG(copy_mp); 10341 freemsg(copy_mp); 10342 return; 10343 } 10344 ip_newroute_ipif_v6(q, copy_mp, ipif, 10345 ip6h->ip6_dst, unspec_src, zoneid); 10346 ipif_refrele(ipif); 10347 } else { 10348 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10349 &ip6h->ip6_src, ill, zoneid, ipst); 10350 } 10351 } 10352 ill_refrele(ill); 10353 return; 10354 } 10355 if (need_decref) { 10356 CONN_DEC_REF(connp); 10357 connp = NULL; 10358 } 10359 10360 /* Update rptr if there was an ip6i_t header. */ 10361 if (ip6i != NULL) 10362 mp->b_rptr -= sizeof (ip6i_t); 10363 if (unspec_src || attach_if) { 10364 if (ip6i == NULL) { 10365 /* 10366 * Add ip6i_t header to carry unspec_src 10367 * or attach_if until the packet comes back in 10368 * ip_wput_v6. 10369 */ 10370 if (mctl_present) { 10371 first_mp->b_cont = 10372 ip_add_info_v6(mp, NULL, v6dstp); 10373 mp = first_mp->b_cont; 10374 if (mp == NULL) 10375 freeb(first_mp); 10376 } else { 10377 first_mp = mp = ip_add_info_v6(mp, NULL, 10378 v6dstp); 10379 } 10380 if (mp == NULL) { 10381 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10382 ill_refrele(ill); 10383 return; 10384 } 10385 ip6i = (ip6i_t *)mp->b_rptr; 10386 if ((mp->b_wptr - (uchar_t *)ip6i) == 10387 sizeof (ip6i_t)) { 10388 /* 10389 * ndp_resolver called from ip_newroute_v6 10390 * expects a pulled up message. 10391 */ 10392 if (!pullupmsg(mp, -1)) { 10393 ip1dbg(("ip_wput_v6: pullupmsg" 10394 " failed\n")); 10395 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10396 freemsg(first_mp); 10397 return; 10398 } 10399 ip6i = (ip6i_t *)mp->b_rptr; 10400 } 10401 ip6h = (ip6_t *)&ip6i[1]; 10402 v6dstp = &ip6h->ip6_dst; 10403 } 10404 if (unspec_src) 10405 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10406 if (attach_if) { 10407 /* 10408 * Bind to nofailover/BOUND_PIF overrides ifindex. 10409 */ 10410 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10411 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10412 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10413 if (drop_if_delayed) { 10414 /* This is a multipathing probe packet */ 10415 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10416 } 10417 } 10418 if (mctl_present) { 10419 ASSERT(io != NULL); 10420 io->ipsec_out_unspec_src = unspec_src; 10421 } 10422 } 10423 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10424 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10425 unspec_src, zoneid); 10426 } else { 10427 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10428 zoneid, ipst); 10429 } 10430 ill_refrele(ill); 10431 return; 10432 10433 notv6: 10434 /* FIXME?: assume the caller calls the right version of ip_output? */ 10435 if (q->q_next == NULL) { 10436 connp = Q_TO_CONN(q); 10437 10438 /* 10439 * We can change conn_send for all types of conn, even 10440 * though only TCP uses it right now. 10441 * FIXME: sctp could use conn_send but doesn't currently. 10442 */ 10443 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10444 } 10445 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10446 (void) ip_output(arg, first_mp, arg2, caller); 10447 if (ill != NULL) 10448 ill_refrele(ill); 10449 } 10450 10451 /* 10452 * If this is a conn_t queue, then we pass in the conn. This includes the 10453 * zoneid. 10454 * Otherwise, this is a message for an ill_t queue, 10455 * in which case we use the global zoneid since those are all part of 10456 * the global zone. 10457 */ 10458 void 10459 ip_wput_v6(queue_t *q, mblk_t *mp) 10460 { 10461 if (CONN_Q(q)) 10462 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10463 else 10464 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10465 } 10466 10467 static void 10468 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10469 { 10470 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10471 io->ipsec_out_attach_if = B_TRUE; 10472 io->ipsec_out_ill_index = attach_index; 10473 } 10474 10475 /* 10476 * NULL send-to queue - packet is to be delivered locally. 10477 */ 10478 void 10479 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10480 ire_t *ire, int fanout_flags) 10481 { 10482 uint32_t ports; 10483 mblk_t *mp = first_mp, *first_mp1; 10484 boolean_t mctl_present; 10485 uint8_t nexthdr; 10486 uint16_t hdr_length; 10487 ipsec_out_t *io; 10488 mib2_ipIfStatsEntry_t *mibptr; 10489 ilm_t *ilm; 10490 uint_t nexthdr_offset; 10491 ip_stack_t *ipst = ill->ill_ipst; 10492 10493 if (DB_TYPE(mp) == M_CTL) { 10494 io = (ipsec_out_t *)mp->b_rptr; 10495 if (!io->ipsec_out_secure) { 10496 mp = mp->b_cont; 10497 freeb(first_mp); 10498 first_mp = mp; 10499 mctl_present = B_FALSE; 10500 } else { 10501 mctl_present = B_TRUE; 10502 mp = first_mp->b_cont; 10503 ipsec_out_to_in(first_mp); 10504 } 10505 } else { 10506 mctl_present = B_FALSE; 10507 } 10508 10509 /* 10510 * Remove reachability confirmation bit from version field 10511 * before passing the packet on to any firewall hooks or 10512 * looping back the packet. 10513 */ 10514 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10515 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10516 10517 DTRACE_PROBE4(ip6__loopback__in__start, 10518 ill_t *, ill, ill_t *, NULL, 10519 ip6_t *, ip6h, mblk_t *, first_mp); 10520 10521 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10522 ipst->ips_ipv6firewall_loopback_in, 10523 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10524 10525 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10526 10527 if (first_mp == NULL) 10528 return; 10529 10530 nexthdr = ip6h->ip6_nxt; 10531 mibptr = ill->ill_ip_mib; 10532 10533 /* Fastpath */ 10534 switch (nexthdr) { 10535 case IPPROTO_TCP: 10536 case IPPROTO_UDP: 10537 case IPPROTO_ICMPV6: 10538 case IPPROTO_SCTP: 10539 hdr_length = IPV6_HDR_LEN; 10540 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10541 (uchar_t *)ip6h); 10542 break; 10543 default: { 10544 uint8_t *nexthdrp; 10545 10546 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10547 &hdr_length, &nexthdrp)) { 10548 /* Malformed packet */ 10549 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10550 freemsg(first_mp); 10551 return; 10552 } 10553 nexthdr = *nexthdrp; 10554 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10555 break; 10556 } 10557 } 10558 10559 UPDATE_OB_PKT_COUNT(ire); 10560 ire->ire_last_used_time = lbolt; 10561 10562 switch (nexthdr) { 10563 case IPPROTO_TCP: 10564 if (DB_TYPE(mp) == M_DATA) { 10565 /* 10566 * M_DATA mblk, so init mblk (chain) for 10567 * no struio(). 10568 */ 10569 mblk_t *mp1 = mp; 10570 10571 do { 10572 mp1->b_datap->db_struioflag = 0; 10573 } while ((mp1 = mp1->b_cont) != NULL); 10574 } 10575 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10576 TCP_PORTS_OFFSET); 10577 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10578 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10579 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10580 hdr_length, mctl_present, ire->ire_zoneid); 10581 return; 10582 10583 case IPPROTO_UDP: 10584 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10585 UDP_PORTS_OFFSET); 10586 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10587 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10588 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10589 return; 10590 10591 case IPPROTO_SCTP: 10592 { 10593 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10594 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10595 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10596 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10597 return; 10598 } 10599 case IPPROTO_ICMPV6: { 10600 icmp6_t *icmp6; 10601 10602 /* check for full IPv6+ICMPv6 header */ 10603 if ((mp->b_wptr - mp->b_rptr) < 10604 (hdr_length + ICMP6_MINLEN)) { 10605 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10606 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10607 " failed\n")); 10608 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10609 freemsg(first_mp); 10610 return; 10611 } 10612 ip6h = (ip6_t *)mp->b_rptr; 10613 } 10614 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10615 10616 /* Update output mib stats */ 10617 icmp_update_out_mib_v6(ill, icmp6); 10618 10619 /* Check variable for testing applications */ 10620 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10621 freemsg(first_mp); 10622 return; 10623 } 10624 /* 10625 * Assume that there is always at least one conn for 10626 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10627 * where there is no conn. 10628 */ 10629 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10630 !IS_LOOPBACK(ill)) { 10631 /* 10632 * In the multicast case, applications may have 10633 * joined the group from different zones, so we 10634 * need to deliver the packet to each of them. 10635 * Loop through the multicast memberships 10636 * structures (ilm) on the receive ill and send 10637 * a copy of the packet up each matching one. 10638 * However, we don't do this for multicasts sent 10639 * on the loopback interface (PHYI_LOOPBACK flag 10640 * set) as they must stay in the sender's zone. 10641 */ 10642 ILM_WALKER_HOLD(ill); 10643 for (ilm = ill->ill_ilm; ilm != NULL; 10644 ilm = ilm->ilm_next) { 10645 if (ilm->ilm_flags & ILM_DELETED) 10646 continue; 10647 if (!IN6_ARE_ADDR_EQUAL( 10648 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10649 continue; 10650 if ((fanout_flags & 10651 IP_FF_NO_MCAST_LOOP) && 10652 ilm->ilm_zoneid == ire->ire_zoneid) 10653 continue; 10654 if (!ipif_lookup_zoneid(ill, 10655 ilm->ilm_zoneid, IPIF_UP, NULL)) 10656 continue; 10657 10658 first_mp1 = ip_copymsg(first_mp); 10659 if (first_mp1 == NULL) 10660 continue; 10661 icmp_inbound_v6(q, first_mp1, ill, 10662 hdr_length, mctl_present, 10663 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10664 NULL); 10665 } 10666 ILM_WALKER_RELE(ill); 10667 } else { 10668 first_mp1 = ip_copymsg(first_mp); 10669 if (first_mp1 != NULL) 10670 icmp_inbound_v6(q, first_mp1, ill, 10671 hdr_length, mctl_present, 10672 IP6_NO_IPPOLICY, ire->ire_zoneid, 10673 NULL); 10674 } 10675 } 10676 /* FALLTHRU */ 10677 default: { 10678 /* 10679 * Handle protocols with which IPv6 is less intimate. 10680 */ 10681 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10682 10683 /* 10684 * Enable sending ICMP for "Unknown" nexthdr 10685 * case. i.e. where we did not FALLTHRU from 10686 * IPPROTO_ICMPV6 processing case above. 10687 */ 10688 if (nexthdr != IPPROTO_ICMPV6) 10689 fanout_flags |= IP_FF_SEND_ICMP; 10690 /* 10691 * Note: There can be more than one stream bound 10692 * to a particular protocol. When this is the case, 10693 * each one gets a copy of any incoming packets. 10694 */ 10695 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10696 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10697 mctl_present, ire->ire_zoneid); 10698 return; 10699 } 10700 } 10701 } 10702 10703 /* 10704 * Send packet using IRE. 10705 * Checksumming is controlled by cksum_request: 10706 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10707 * 1 => Skip TCP/UDP/SCTP checksum 10708 * Otherwise => checksum_request contains insert offset for checksum 10709 * 10710 * Assumes that the following set of headers appear in the first 10711 * mblk: 10712 * ip6_t 10713 * Any extension headers 10714 * TCP/UDP/SCTP header (if present) 10715 * The routine can handle an ICMPv6 header that is not in the first mblk. 10716 * 10717 * NOTE : This function does not ire_refrele the ire passed in as the 10718 * argument unlike ip_wput_ire where the REFRELE is done. 10719 * Refer to ip_wput_ire for more on this. 10720 */ 10721 static void 10722 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10723 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10724 zoneid_t zoneid) 10725 { 10726 ip6_t *ip6h; 10727 uint8_t nexthdr; 10728 uint16_t hdr_length; 10729 uint_t reachable = 0x0; 10730 ill_t *ill; 10731 mib2_ipIfStatsEntry_t *mibptr; 10732 mblk_t *first_mp; 10733 boolean_t mctl_present; 10734 ipsec_out_t *io; 10735 boolean_t conn_dontroute; /* conn value for multicast */ 10736 boolean_t conn_multicast_loop; /* conn value for multicast */ 10737 boolean_t multicast_forward; /* Should we forward ? */ 10738 int max_frag; 10739 ip_stack_t *ipst = ire->ire_ipst; 10740 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10741 10742 ill = ire_to_ill(ire); 10743 first_mp = mp; 10744 multicast_forward = B_FALSE; 10745 10746 if (mp->b_datap->db_type != M_CTL) { 10747 ip6h = (ip6_t *)first_mp->b_rptr; 10748 } else { 10749 io = (ipsec_out_t *)first_mp->b_rptr; 10750 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10751 /* 10752 * Grab the zone id now because the M_CTL can be discarded by 10753 * ip_wput_ire_parse_ipsec_out() below. 10754 */ 10755 ASSERT(zoneid == io->ipsec_out_zoneid); 10756 ASSERT(zoneid != ALL_ZONES); 10757 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10758 /* 10759 * For the multicast case, ipsec_out carries conn_dontroute and 10760 * conn_multicast_loop as conn may not be available here. We 10761 * need this for multicast loopback and forwarding which is done 10762 * later in the code. 10763 */ 10764 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10765 conn_dontroute = io->ipsec_out_dontroute; 10766 conn_multicast_loop = io->ipsec_out_multicast_loop; 10767 /* 10768 * If conn_dontroute is not set or conn_multicast_loop 10769 * is set, we need to do forwarding/loopback. For 10770 * datagrams from ip_wput_multicast, conn_dontroute is 10771 * set to B_TRUE and conn_multicast_loop is set to 10772 * B_FALSE so that we neither do forwarding nor 10773 * loopback. 10774 */ 10775 if (!conn_dontroute || conn_multicast_loop) 10776 multicast_forward = B_TRUE; 10777 } 10778 } 10779 10780 /* 10781 * If the sender didn't supply the hop limit and there is a default 10782 * unicast hop limit associated with the output interface, we use 10783 * that if the packet is unicast. Interface specific unicast hop 10784 * limits as set via the SIOCSLIFLNKINFO ioctl. 10785 */ 10786 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10787 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10788 ip6h->ip6_hops = ill->ill_max_hops; 10789 } 10790 10791 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10792 ire->ire_zoneid != ALL_ZONES) { 10793 /* 10794 * When a zone sends a packet to another zone, we try to deliver 10795 * the packet under the same conditions as if the destination 10796 * was a real node on the network. To do so, we look for a 10797 * matching route in the forwarding table. 10798 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10799 * ip_newroute_v6() does. 10800 * Note that IRE_LOCAL are special, since they are used 10801 * when the zoneid doesn't match in some cases. This means that 10802 * we need to handle ipha_src differently since ire_src_addr 10803 * belongs to the receiving zone instead of the sending zone. 10804 * When ip_restrict_interzone_loopback is set, then 10805 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10806 * for loopback between zones when the logical "Ethernet" would 10807 * have looped them back. 10808 */ 10809 ire_t *src_ire; 10810 10811 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10812 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10813 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10814 if (src_ire != NULL && 10815 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10816 (!ipst->ips_ip_restrict_interzone_loopback || 10817 ire_local_same_ill_group(ire, src_ire))) { 10818 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10819 !unspec_src) { 10820 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10821 } 10822 ire_refrele(src_ire); 10823 } else { 10824 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10825 if (src_ire != NULL) { 10826 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10827 ire_refrele(src_ire); 10828 freemsg(first_mp); 10829 return; 10830 } 10831 ire_refrele(src_ire); 10832 } 10833 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10834 /* Failed */ 10835 freemsg(first_mp); 10836 return; 10837 } 10838 icmp_unreachable_v6(q, first_mp, 10839 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10840 zoneid, ipst); 10841 return; 10842 } 10843 } 10844 10845 if (mp->b_datap->db_type == M_CTL || 10846 ipss->ipsec_outbound_v6_policy_present) { 10847 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10848 connp, unspec_src, zoneid); 10849 if (mp == NULL) { 10850 return; 10851 } 10852 } 10853 10854 first_mp = mp; 10855 if (mp->b_datap->db_type == M_CTL) { 10856 io = (ipsec_out_t *)mp->b_rptr; 10857 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10858 mp = mp->b_cont; 10859 mctl_present = B_TRUE; 10860 } else { 10861 mctl_present = B_FALSE; 10862 } 10863 10864 ip6h = (ip6_t *)mp->b_rptr; 10865 nexthdr = ip6h->ip6_nxt; 10866 mibptr = ill->ill_ip_mib; 10867 10868 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10869 ipif_t *ipif; 10870 10871 /* 10872 * Select the source address using ipif_select_source_v6. 10873 */ 10874 if (attach_index != 0) { 10875 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10876 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10877 } else { 10878 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10879 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10880 } 10881 if (ipif == NULL) { 10882 if (ip_debug > 2) { 10883 /* ip1dbg */ 10884 pr_addr_dbg("ip_wput_ire_v6: no src for " 10885 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10886 printf("ip_wput_ire_v6: interface name %s\n", 10887 ill->ill_name); 10888 } 10889 freemsg(first_mp); 10890 return; 10891 } 10892 ip6h->ip6_src = ipif->ipif_v6src_addr; 10893 ipif_refrele(ipif); 10894 } 10895 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10896 if ((connp != NULL && connp->conn_multicast_loop) || 10897 !IS_LOOPBACK(ill)) { 10898 ilm_t *ilm; 10899 10900 ILM_WALKER_HOLD(ill); 10901 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10902 ILM_WALKER_RELE(ill); 10903 if (ilm != NULL) { 10904 mblk_t *nmp; 10905 int fanout_flags = 0; 10906 10907 if (connp != NULL && 10908 !connp->conn_multicast_loop) { 10909 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10910 } 10911 ip1dbg(("ip_wput_ire_v6: " 10912 "Loopback multicast\n")); 10913 nmp = ip_copymsg(first_mp); 10914 if (nmp != NULL) { 10915 ip6_t *nip6h; 10916 mblk_t *mp_ip6h; 10917 10918 if (mctl_present) { 10919 nip6h = (ip6_t *) 10920 nmp->b_cont->b_rptr; 10921 mp_ip6h = nmp->b_cont; 10922 } else { 10923 nip6h = (ip6_t *)nmp->b_rptr; 10924 mp_ip6h = nmp; 10925 } 10926 10927 DTRACE_PROBE4( 10928 ip6__loopback__out__start, 10929 ill_t *, NULL, 10930 ill_t *, ill, 10931 ip6_t *, nip6h, 10932 mblk_t *, nmp); 10933 10934 FW_HOOKS6( 10935 ipst->ips_ip6_loopback_out_event, 10936 ipst->ips_ipv6firewall_loopback_out, 10937 NULL, ill, nip6h, nmp, mp_ip6h, 10938 0, ipst); 10939 10940 DTRACE_PROBE1( 10941 ip6__loopback__out__end, 10942 mblk_t *, nmp); 10943 10944 if (nmp != NULL) { 10945 /* 10946 * Deliver locally and to 10947 * every local zone, except 10948 * the sending zone when 10949 * IPV6_MULTICAST_LOOP is 10950 * disabled. 10951 */ 10952 ip_wput_local_v6(RD(q), ill, 10953 nip6h, nmp, 10954 ire, fanout_flags); 10955 } 10956 } else { 10957 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10958 ip1dbg(("ip_wput_ire_v6: " 10959 "copymsg failed\n")); 10960 } 10961 } 10962 } 10963 if (ip6h->ip6_hops == 0 || 10964 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10965 IS_LOOPBACK(ill)) { 10966 /* 10967 * Local multicast or just loopback on loopback 10968 * interface. 10969 */ 10970 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10971 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10972 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10973 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10974 freemsg(first_mp); 10975 return; 10976 } 10977 } 10978 10979 if (ire->ire_stq != NULL) { 10980 uint32_t sum; 10981 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10982 ill_phyint->phyint_ifindex; 10983 queue_t *dev_q = ire->ire_stq->q_next; 10984 10985 /* 10986 * non-NULL send-to queue - packet is to be sent 10987 * out an interface. 10988 */ 10989 10990 /* Driver is flow-controlling? */ 10991 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10992 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 10993 /* 10994 * Queue packet if we have an conn to give back 10995 * pressure. We can't queue packets intended for 10996 * hardware acceleration since we've tossed that 10997 * state already. If the packet is being fed back 10998 * from ire_send_v6, we don't know the position in 10999 * the queue to enqueue the packet and we discard 11000 * the packet. 11001 */ 11002 if (ipst->ips_ip_output_queue && connp != NULL && 11003 !mctl_present && caller != IRE_SEND) { 11004 if (caller == IP_WSRV) { 11005 connp->conn_did_putbq = 1; 11006 (void) putbq(connp->conn_wq, mp); 11007 conn_drain_insert(connp); 11008 /* 11009 * caller == IP_WSRV implies we are 11010 * the service thread, and the 11011 * queue is already noenabled. 11012 * The check for canput and 11013 * the putbq is not atomic. 11014 * So we need to check again. 11015 */ 11016 if (canput(dev_q)) 11017 connp->conn_did_putbq = 0; 11018 } else { 11019 (void) putq(connp->conn_wq, mp); 11020 } 11021 return; 11022 } 11023 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11024 freemsg(first_mp); 11025 return; 11026 } 11027 11028 /* 11029 * Look for reachability confirmations from the transport. 11030 */ 11031 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11032 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11033 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11034 if (mctl_present) 11035 io->ipsec_out_reachable = B_TRUE; 11036 } 11037 /* Fastpath */ 11038 switch (nexthdr) { 11039 case IPPROTO_TCP: 11040 case IPPROTO_UDP: 11041 case IPPROTO_ICMPV6: 11042 case IPPROTO_SCTP: 11043 hdr_length = IPV6_HDR_LEN; 11044 break; 11045 default: { 11046 uint8_t *nexthdrp; 11047 11048 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11049 &hdr_length, &nexthdrp)) { 11050 /* Malformed packet */ 11051 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11052 freemsg(first_mp); 11053 return; 11054 } 11055 nexthdr = *nexthdrp; 11056 break; 11057 } 11058 } 11059 11060 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11061 uint16_t *up; 11062 uint16_t *insp; 11063 11064 /* 11065 * The packet header is processed once for all, even 11066 * in the multirouting case. We disable hardware 11067 * checksum if the packet is multirouted, as it will be 11068 * replicated via several interfaces, and not all of 11069 * them may have this capability. 11070 */ 11071 if (cksum_request == 1 && 11072 !(ire->ire_flags & RTF_MULTIRT)) { 11073 /* Skip the transport checksum */ 11074 goto cksum_done; 11075 } 11076 /* 11077 * Do user-configured raw checksum. 11078 * Compute checksum and insert at offset "cksum_request" 11079 */ 11080 11081 /* check for enough headers for checksum */ 11082 cksum_request += hdr_length; /* offset from rptr */ 11083 if ((mp->b_wptr - mp->b_rptr) < 11084 (cksum_request + sizeof (int16_t))) { 11085 if (!pullupmsg(mp, 11086 cksum_request + sizeof (int16_t))) { 11087 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11088 " failed\n")); 11089 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11090 freemsg(first_mp); 11091 return; 11092 } 11093 ip6h = (ip6_t *)mp->b_rptr; 11094 } 11095 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11096 ASSERT(((uintptr_t)insp & 0x1) == 0); 11097 up = (uint16_t *)&ip6h->ip6_src; 11098 /* 11099 * icmp has placed length and routing 11100 * header adjustment in *insp. 11101 */ 11102 sum = htons(nexthdr) + 11103 up[0] + up[1] + up[2] + up[3] + 11104 up[4] + up[5] + up[6] + up[7] + 11105 up[8] + up[9] + up[10] + up[11] + 11106 up[12] + up[13] + up[14] + up[15]; 11107 sum = (sum & 0xffff) + (sum >> 16); 11108 *insp = IP_CSUM(mp, hdr_length, sum); 11109 } else if (nexthdr == IPPROTO_TCP) { 11110 uint16_t *up; 11111 11112 /* 11113 * Check for full IPv6 header + enough TCP header 11114 * to get at the checksum field. 11115 */ 11116 if ((mp->b_wptr - mp->b_rptr) < 11117 (hdr_length + TCP_CHECKSUM_OFFSET + 11118 TCP_CHECKSUM_SIZE)) { 11119 if (!pullupmsg(mp, hdr_length + 11120 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11121 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11122 " failed\n")); 11123 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11124 freemsg(first_mp); 11125 return; 11126 } 11127 ip6h = (ip6_t *)mp->b_rptr; 11128 } 11129 11130 up = (uint16_t *)&ip6h->ip6_src; 11131 /* 11132 * Note: The TCP module has stored the length value 11133 * into the tcp checksum field, so we don't 11134 * need to explicitly sum it in here. 11135 */ 11136 sum = up[0] + up[1] + up[2] + up[3] + 11137 up[4] + up[5] + up[6] + up[7] + 11138 up[8] + up[9] + up[10] + up[11] + 11139 up[12] + up[13] + up[14] + up[15]; 11140 11141 /* Fold the initial sum */ 11142 sum = (sum & 0xffff) + (sum >> 16); 11143 11144 up = (uint16_t *)(((uchar_t *)ip6h) + 11145 hdr_length + TCP_CHECKSUM_OFFSET); 11146 11147 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11148 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11149 ire->ire_max_frag, mctl_present, sum); 11150 11151 /* Software checksum? */ 11152 if (DB_CKSUMFLAGS(mp) == 0) { 11153 IP6_STAT(ipst, ip6_out_sw_cksum); 11154 IP6_STAT_UPDATE(ipst, 11155 ip6_tcp_out_sw_cksum_bytes, 11156 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11157 hdr_length); 11158 } 11159 } else if (nexthdr == IPPROTO_UDP) { 11160 uint16_t *up; 11161 11162 /* 11163 * check for full IPv6 header + enough UDP header 11164 * to get at the UDP checksum field 11165 */ 11166 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11167 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11168 if (!pullupmsg(mp, hdr_length + 11169 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11170 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11171 " failed\n")); 11172 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11173 freemsg(first_mp); 11174 return; 11175 } 11176 ip6h = (ip6_t *)mp->b_rptr; 11177 } 11178 up = (uint16_t *)&ip6h->ip6_src; 11179 /* 11180 * Note: The UDP module has stored the length value 11181 * into the udp checksum field, so we don't 11182 * need to explicitly sum it in here. 11183 */ 11184 sum = up[0] + up[1] + up[2] + up[3] + 11185 up[4] + up[5] + up[6] + up[7] + 11186 up[8] + up[9] + up[10] + up[11] + 11187 up[12] + up[13] + up[14] + up[15]; 11188 11189 /* Fold the initial sum */ 11190 sum = (sum & 0xffff) + (sum >> 16); 11191 11192 up = (uint16_t *)(((uchar_t *)ip6h) + 11193 hdr_length + UDP_CHECKSUM_OFFSET); 11194 11195 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11196 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11197 ire->ire_max_frag, mctl_present, sum); 11198 11199 /* Software checksum? */ 11200 if (DB_CKSUMFLAGS(mp) == 0) { 11201 IP6_STAT(ipst, ip6_out_sw_cksum); 11202 IP6_STAT_UPDATE(ipst, 11203 ip6_udp_out_sw_cksum_bytes, 11204 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11205 hdr_length); 11206 } 11207 } else if (nexthdr == IPPROTO_ICMPV6) { 11208 uint16_t *up; 11209 icmp6_t *icmp6; 11210 11211 /* check for full IPv6+ICMPv6 header */ 11212 if ((mp->b_wptr - mp->b_rptr) < 11213 (hdr_length + ICMP6_MINLEN)) { 11214 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11215 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11216 " failed\n")); 11217 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11218 freemsg(first_mp); 11219 return; 11220 } 11221 ip6h = (ip6_t *)mp->b_rptr; 11222 } 11223 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11224 up = (uint16_t *)&ip6h->ip6_src; 11225 /* 11226 * icmp has placed length and routing 11227 * header adjustment in icmp6_cksum. 11228 */ 11229 sum = htons(IPPROTO_ICMPV6) + 11230 up[0] + up[1] + up[2] + up[3] + 11231 up[4] + up[5] + up[6] + up[7] + 11232 up[8] + up[9] + up[10] + up[11] + 11233 up[12] + up[13] + up[14] + up[15]; 11234 sum = (sum & 0xffff) + (sum >> 16); 11235 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11236 11237 /* Update output mib stats */ 11238 icmp_update_out_mib_v6(ill, icmp6); 11239 } else if (nexthdr == IPPROTO_SCTP) { 11240 sctp_hdr_t *sctph; 11241 11242 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11243 if (!pullupmsg(mp, hdr_length + 11244 sizeof (*sctph))) { 11245 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11246 " failed\n")); 11247 BUMP_MIB(ill->ill_ip_mib, 11248 ipIfStatsOutDiscards); 11249 freemsg(mp); 11250 return; 11251 } 11252 ip6h = (ip6_t *)mp->b_rptr; 11253 } 11254 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11255 sctph->sh_chksum = 0; 11256 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11257 } 11258 11259 cksum_done: 11260 /* 11261 * We force the insertion of a fragment header using the 11262 * IPH_FRAG_HDR flag in two cases: 11263 * - after reception of an ICMPv6 "packet too big" message 11264 * with a MTU < 1280 (cf. RFC 2460 section 5) 11265 * - for multirouted IPv6 packets, so that the receiver can 11266 * discard duplicates according to their fragment identifier 11267 * 11268 * Two flags modifed from the API can modify this behavior. 11269 * The first is IPV6_USE_MIN_MTU. With this API the user 11270 * can specify how to manage PMTUD for unicast and multicast. 11271 * 11272 * IPV6_DONTFRAG disallows fragmentation. 11273 */ 11274 max_frag = ire->ire_max_frag; 11275 switch (IP6I_USE_MIN_MTU_API(flags)) { 11276 case IPV6_USE_MIN_MTU_DEFAULT: 11277 case IPV6_USE_MIN_MTU_UNICAST: 11278 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11279 max_frag = IPV6_MIN_MTU; 11280 } 11281 break; 11282 11283 case IPV6_USE_MIN_MTU_NEVER: 11284 max_frag = IPV6_MIN_MTU; 11285 break; 11286 } 11287 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11288 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11289 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11290 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11291 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11292 return; 11293 } 11294 11295 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11296 (mp->b_cont ? msgdsize(mp) : 11297 mp->b_wptr - (uchar_t *)ip6h)) { 11298 ip0dbg(("Packet length mismatch: %d, %ld\n", 11299 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11300 msgdsize(mp))); 11301 freemsg(first_mp); 11302 return; 11303 } 11304 /* Do IPSEC processing first */ 11305 if (mctl_present) { 11306 if (attach_index != 0) 11307 ipsec_out_attach_if(io, attach_index); 11308 ipsec_out_process(q, first_mp, ire, ill_index); 11309 return; 11310 } 11311 ASSERT(mp->b_prev == NULL); 11312 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11313 ntohs(ip6h->ip6_plen) + 11314 IPV6_HDR_LEN, max_frag)); 11315 ASSERT(mp == first_mp); 11316 /* Initiate IPPF processing */ 11317 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11318 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11319 if (mp == NULL) { 11320 return; 11321 } 11322 } 11323 ip_wput_frag_v6(mp, ire, reachable, connp, 11324 caller, max_frag); 11325 return; 11326 } 11327 /* Do IPSEC processing first */ 11328 if (mctl_present) { 11329 int extra_len = ipsec_out_extra_length(first_mp); 11330 11331 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11332 max_frag) { 11333 /* 11334 * IPsec headers will push the packet over the 11335 * MTU limit. Issue an ICMPv6 Packet Too Big 11336 * message for this packet if the upper-layer 11337 * that issued this packet will be able to 11338 * react to the icmp_pkt2big_v6() that we'll 11339 * generate. 11340 */ 11341 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11342 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11343 return; 11344 } 11345 if (attach_index != 0) 11346 ipsec_out_attach_if(io, attach_index); 11347 ipsec_out_process(q, first_mp, ire, ill_index); 11348 return; 11349 } 11350 /* 11351 * XXX multicast: add ip_mforward_v6() here. 11352 * Check conn_dontroute 11353 */ 11354 #ifdef lint 11355 /* 11356 * XXX The only purpose of this statement is to avoid lint 11357 * errors. See the above "XXX multicast". When that gets 11358 * fixed, remove this whole #ifdef lint section. 11359 */ 11360 ip3dbg(("multicast forward is %s.\n", 11361 (multicast_forward ? "TRUE" : "FALSE"))); 11362 #endif 11363 11364 UPDATE_OB_PKT_COUNT(ire); 11365 ire->ire_last_used_time = lbolt; 11366 ASSERT(mp == first_mp); 11367 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11368 } else { 11369 DTRACE_PROBE4(ip6__loopback__out__start, 11370 ill_t *, NULL, ill_t *, ill, 11371 ip6_t *, ip6h, mblk_t *, first_mp); 11372 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11373 ipst->ips_ipv6firewall_loopback_out, 11374 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11375 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11376 if (first_mp != NULL) 11377 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11378 } 11379 } 11380 11381 /* 11382 * Outbound IPv6 fragmentation routine using MDT. 11383 */ 11384 static void 11385 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11386 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11387 { 11388 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11389 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11390 mblk_t *hdr_mp, *md_mp = NULL; 11391 int i1; 11392 multidata_t *mmd; 11393 unsigned char *hdr_ptr, *pld_ptr; 11394 ip_pdescinfo_t pdi; 11395 uint32_t ident; 11396 size_t len; 11397 uint16_t offset; 11398 queue_t *stq = ire->ire_stq; 11399 ill_t *ill = (ill_t *)stq->q_ptr; 11400 ip_stack_t *ipst = ill->ill_ipst; 11401 11402 ASSERT(DB_TYPE(mp) == M_DATA); 11403 ASSERT(MBLKL(mp) > unfragmentable_len); 11404 11405 /* 11406 * Move read ptr past unfragmentable portion, we don't want this part 11407 * of the data in our fragments. 11408 */ 11409 mp->b_rptr += unfragmentable_len; 11410 11411 /* Calculate how many packets we will send out */ 11412 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11413 pkts = (i1 + max_chunk - 1) / max_chunk; 11414 ASSERT(pkts > 1); 11415 11416 /* Allocate a message block which will hold all the IP Headers. */ 11417 wroff = ipst->ips_ip_wroff_extra; 11418 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11419 11420 i1 = pkts * hdr_chunk_len; 11421 /* 11422 * Create the header buffer, Multidata and destination address 11423 * and SAP attribute that should be associated with it. 11424 */ 11425 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11426 ((hdr_mp->b_wptr += i1), 11427 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11428 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11429 freemsg(mp); 11430 if (md_mp == NULL) { 11431 freemsg(hdr_mp); 11432 } else { 11433 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11434 freemsg(md_mp); 11435 } 11436 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11437 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11438 return; 11439 } 11440 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11441 11442 /* 11443 * Add a payload buffer to the Multidata; this operation must not 11444 * fail, or otherwise our logic in this routine is broken. There 11445 * is no memory allocation done by the routine, so any returned 11446 * failure simply tells us that we've done something wrong. 11447 * 11448 * A failure tells us that either we're adding the same payload 11449 * buffer more than once, or we're trying to add more buffers than 11450 * allowed. None of the above cases should happen, and we panic 11451 * because either there's horrible heap corruption, and/or 11452 * programming mistake. 11453 */ 11454 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11455 goto pbuf_panic; 11456 } 11457 11458 hdr_ptr = hdr_mp->b_rptr; 11459 pld_ptr = mp->b_rptr; 11460 11461 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11462 11463 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11464 11465 /* 11466 * len is the total length of the fragmentable data in this 11467 * datagram. For each fragment sent, we will decrement len 11468 * by the amount of fragmentable data sent in that fragment 11469 * until len reaches zero. 11470 */ 11471 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11472 11473 offset = 0; 11474 prev_nexthdr_offset += wroff; 11475 11476 while (len != 0) { 11477 size_t mlen; 11478 ip6_t *fip6h; 11479 ip6_frag_t *fraghdr; 11480 int error; 11481 11482 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11483 mlen = MIN(len, max_chunk); 11484 len -= mlen; 11485 11486 fip6h = (ip6_t *)(hdr_ptr + wroff); 11487 ASSERT(OK_32PTR(fip6h)); 11488 bcopy(ip6h, fip6h, unfragmentable_len); 11489 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11490 11491 fip6h->ip6_plen = htons((uint16_t)(mlen + 11492 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11493 11494 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11495 unfragmentable_len); 11496 fraghdr->ip6f_nxt = nexthdr; 11497 fraghdr->ip6f_reserved = 0; 11498 fraghdr->ip6f_offlg = htons(offset) | 11499 ((len != 0) ? IP6F_MORE_FRAG : 0); 11500 fraghdr->ip6f_ident = ident; 11501 11502 /* 11503 * Record offset and size of header and data of the next packet 11504 * in the multidata message. 11505 */ 11506 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11507 unfragmentable_len + sizeof (ip6_frag_t), 0); 11508 PDESC_PLD_INIT(&pdi); 11509 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11510 ASSERT(i1 > 0); 11511 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11512 if (i1 == mlen) { 11513 pld_ptr += mlen; 11514 } else { 11515 i1 = mlen - i1; 11516 mp = mp->b_cont; 11517 ASSERT(mp != NULL); 11518 ASSERT(MBLKL(mp) >= i1); 11519 /* 11520 * Attach the next payload message block to the 11521 * multidata message. 11522 */ 11523 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11524 goto pbuf_panic; 11525 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11526 pld_ptr = mp->b_rptr + i1; 11527 } 11528 11529 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11530 KM_NOSLEEP)) == NULL) { 11531 /* 11532 * Any failure other than ENOMEM indicates that we 11533 * have passed in invalid pdesc info or parameters 11534 * to mmd_addpdesc, which must not happen. 11535 * 11536 * EINVAL is a result of failure on boundary checks 11537 * against the pdesc info contents. It should not 11538 * happen, and we panic because either there's 11539 * horrible heap corruption, and/or programming 11540 * mistake. 11541 */ 11542 if (error != ENOMEM) { 11543 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11544 "pdesc logic error detected for " 11545 "mmd %p pinfo %p (%d)\n", 11546 (void *)mmd, (void *)&pdi, error); 11547 /* NOTREACHED */ 11548 } 11549 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11550 /* Free unattached payload message blocks as well */ 11551 md_mp->b_cont = mp->b_cont; 11552 goto free_mmd; 11553 } 11554 11555 /* Advance fragment offset. */ 11556 offset += mlen; 11557 11558 /* Advance to location for next header in the buffer. */ 11559 hdr_ptr += hdr_chunk_len; 11560 11561 /* Did we reach the next payload message block? */ 11562 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11563 mp = mp->b_cont; 11564 /* 11565 * Attach the next message block with payload 11566 * data to the multidata message. 11567 */ 11568 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11569 goto pbuf_panic; 11570 pld_ptr = mp->b_rptr; 11571 } 11572 } 11573 11574 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11575 ASSERT(mp->b_wptr == pld_ptr); 11576 11577 /* Update IP statistics */ 11578 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11579 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11580 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11581 /* 11582 * The ipv6 header len is accounted for in unfragmentable_len so 11583 * when calculating the fragmentation overhead just add the frag 11584 * header len. 11585 */ 11586 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11587 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11588 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11589 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11590 11591 ire->ire_ob_pkt_count += pkts; 11592 if (ire->ire_ipif != NULL) 11593 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11594 11595 ire->ire_last_used_time = lbolt; 11596 /* Send it down */ 11597 putnext(stq, md_mp); 11598 return; 11599 11600 pbuf_panic: 11601 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11602 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11603 pbuf_idx); 11604 /* NOTREACHED */ 11605 } 11606 11607 /* 11608 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11609 * We have not optimized this in terms of number of mblks 11610 * allocated. For instance, for each fragment sent we always allocate a 11611 * mblk to hold the IPv6 header and fragment header. 11612 * 11613 * Assumes that all the extension headers are contained in the first mblk. 11614 * 11615 * The fragment header is inserted after an hop-by-hop options header 11616 * and after [an optional destinations header followed by] a routing header. 11617 * 11618 * NOTE : This function does not ire_refrele the ire passed in as 11619 * the argument. 11620 */ 11621 void 11622 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11623 int caller, int max_frag) 11624 { 11625 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11626 ip6_t *fip6h; 11627 mblk_t *hmp; 11628 mblk_t *hmp0; 11629 mblk_t *dmp; 11630 ip6_frag_t *fraghdr; 11631 size_t unfragmentable_len; 11632 size_t len; 11633 size_t mlen; 11634 size_t max_chunk; 11635 uint32_t ident; 11636 uint16_t off_flags; 11637 uint16_t offset = 0; 11638 ill_t *ill; 11639 uint8_t nexthdr; 11640 uint_t prev_nexthdr_offset; 11641 uint8_t *ptr; 11642 ip_stack_t *ipst = ire->ire_ipst; 11643 11644 ASSERT(ire->ire_type == IRE_CACHE); 11645 ill = (ill_t *)ire->ire_stq->q_ptr; 11646 11647 if (max_frag <= 0) { 11648 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11649 freemsg(mp); 11650 return; 11651 } 11652 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11653 11654 /* 11655 * Determine the length of the unfragmentable portion of this 11656 * datagram. This consists of the IPv6 header, a potential 11657 * hop-by-hop options header, a potential pre-routing-header 11658 * destination options header, and a potential routing header. 11659 */ 11660 nexthdr = ip6h->ip6_nxt; 11661 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11662 ptr = (uint8_t *)&ip6h[1]; 11663 11664 if (nexthdr == IPPROTO_HOPOPTS) { 11665 ip6_hbh_t *hbh_hdr; 11666 uint_t hdr_len; 11667 11668 hbh_hdr = (ip6_hbh_t *)ptr; 11669 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11670 nexthdr = hbh_hdr->ip6h_nxt; 11671 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11672 - (uint8_t *)ip6h; 11673 ptr += hdr_len; 11674 } 11675 if (nexthdr == IPPROTO_DSTOPTS) { 11676 ip6_dest_t *dest_hdr; 11677 uint_t hdr_len; 11678 11679 dest_hdr = (ip6_dest_t *)ptr; 11680 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11681 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11682 nexthdr = dest_hdr->ip6d_nxt; 11683 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11684 - (uint8_t *)ip6h; 11685 ptr += hdr_len; 11686 } 11687 } 11688 if (nexthdr == IPPROTO_ROUTING) { 11689 ip6_rthdr_t *rthdr; 11690 uint_t hdr_len; 11691 11692 rthdr = (ip6_rthdr_t *)ptr; 11693 nexthdr = rthdr->ip6r_nxt; 11694 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11695 - (uint8_t *)ip6h; 11696 hdr_len = 8 * (rthdr->ip6r_len + 1); 11697 ptr += hdr_len; 11698 } 11699 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11700 11701 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11702 sizeof (ip6_frag_t)) & ~7; 11703 11704 /* Check if we can use MDT to send out the frags. */ 11705 ASSERT(!IRE_IS_LOCAL(ire)); 11706 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11707 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11708 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11709 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11710 nexthdr, prev_nexthdr_offset); 11711 return; 11712 } 11713 11714 /* 11715 * Allocate an mblk with enough room for the link-layer 11716 * header, the unfragmentable part of the datagram, and the 11717 * fragment header. This (or a copy) will be used as the 11718 * first mblk for each fragment we send. 11719 */ 11720 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + 11721 ipst->ips_ip_wroff_extra, BPRI_HI); 11722 if (hmp == NULL) { 11723 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11724 freemsg(mp); 11725 return; 11726 } 11727 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11728 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11729 11730 fip6h = (ip6_t *)hmp->b_rptr; 11731 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11732 11733 bcopy(ip6h, fip6h, unfragmentable_len); 11734 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11735 11736 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11737 11738 fraghdr->ip6f_nxt = nexthdr; 11739 fraghdr->ip6f_reserved = 0; 11740 fraghdr->ip6f_offlg = 0; 11741 fraghdr->ip6f_ident = htonl(ident); 11742 11743 /* 11744 * len is the total length of the fragmentable data in this 11745 * datagram. For each fragment sent, we will decrement len 11746 * by the amount of fragmentable data sent in that fragment 11747 * until len reaches zero. 11748 */ 11749 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11750 11751 /* 11752 * Move read ptr past unfragmentable portion, we don't want this part 11753 * of the data in our fragments. 11754 */ 11755 mp->b_rptr += unfragmentable_len; 11756 11757 while (len != 0) { 11758 mlen = MIN(len, max_chunk); 11759 len -= mlen; 11760 if (len != 0) { 11761 /* Not last */ 11762 hmp0 = copyb(hmp); 11763 if (hmp0 == NULL) { 11764 freeb(hmp); 11765 freemsg(mp); 11766 BUMP_MIB(ill->ill_ip_mib, 11767 ipIfStatsOutFragFails); 11768 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11769 return; 11770 } 11771 off_flags = IP6F_MORE_FRAG; 11772 } else { 11773 /* Last fragment */ 11774 hmp0 = hmp; 11775 hmp = NULL; 11776 off_flags = 0; 11777 } 11778 fip6h = (ip6_t *)(hmp0->b_rptr); 11779 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11780 11781 fip6h->ip6_plen = htons((uint16_t)(mlen + 11782 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11783 /* 11784 * Note: Optimization alert. 11785 * In IPv6 (and IPv4) protocol header, Fragment Offset 11786 * ("offset") is 13 bits wide and in 8-octet units. 11787 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11788 * it occupies the most significant 13 bits. 11789 * (least significant 13 bits in IPv4). 11790 * We do not do any shifts here. Not shifting is same effect 11791 * as taking offset value in octet units, dividing by 8 and 11792 * then shifting 3 bits left to line it up in place in proper 11793 * place protocol header. 11794 */ 11795 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11796 11797 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11798 /* mp has already been freed by ip_carve_mp() */ 11799 if (hmp != NULL) 11800 freeb(hmp); 11801 freeb(hmp0); 11802 ip1dbg(("ip_carve_mp: failed\n")); 11803 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11804 return; 11805 } 11806 hmp0->b_cont = dmp; 11807 /* Get the priority marking, if any */ 11808 hmp0->b_band = dmp->b_band; 11809 UPDATE_OB_PKT_COUNT(ire); 11810 ire->ire_last_used_time = lbolt; 11811 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11812 caller, NULL); 11813 reachable = 0; /* No need to redo state machine in loop */ 11814 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11815 offset += mlen; 11816 } 11817 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11818 } 11819 11820 /* 11821 * Determine if the ill and multicast aspects of that packets 11822 * "matches" the conn. 11823 */ 11824 boolean_t 11825 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11826 zoneid_t zoneid) 11827 { 11828 ill_t *in_ill; 11829 boolean_t wantpacket = B_TRUE; 11830 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11831 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11832 11833 /* 11834 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11835 * unicast and multicast reception to conn_incoming_ill. 11836 * conn_wantpacket_v6 is called both for unicast and 11837 * multicast. 11838 * 11839 * 1) The unicast copy of the packet can come anywhere in 11840 * the ill group if it is part of the group. Thus, we 11841 * need to check to see whether the ill group matches 11842 * if in_ill is part of a group. 11843 * 11844 * 2) ip_rput does not suppress duplicate multicast packets. 11845 * If there are two interfaces in a ill group and we have 11846 * 2 applications (conns) joined a multicast group G on 11847 * both the interfaces, ilm_lookup_ill filter in ip_rput 11848 * will give us two packets because we join G on both the 11849 * interfaces rather than nominating just one interface 11850 * for receiving multicast like broadcast above. So, 11851 * we have to call ilg_lookup_ill to filter out duplicate 11852 * copies, if ill is part of a group, to supress duplicates. 11853 */ 11854 in_ill = connp->conn_incoming_ill; 11855 if (in_ill != NULL) { 11856 mutex_enter(&connp->conn_lock); 11857 in_ill = connp->conn_incoming_ill; 11858 mutex_enter(&ill->ill_lock); 11859 /* 11860 * No IPMP, and the packet did not arrive on conn_incoming_ill 11861 * OR, IPMP in use and the packet arrived on an IPMP group 11862 * different from the conn_incoming_ill's IPMP group. 11863 * Reject the packet. 11864 */ 11865 if ((in_ill->ill_group == NULL && in_ill != ill) || 11866 (in_ill->ill_group != NULL && 11867 in_ill->ill_group != ill->ill_group)) { 11868 wantpacket = B_FALSE; 11869 } 11870 mutex_exit(&ill->ill_lock); 11871 mutex_exit(&connp->conn_lock); 11872 if (!wantpacket) 11873 return (B_FALSE); 11874 } 11875 11876 if (connp->conn_multi_router) 11877 return (B_TRUE); 11878 11879 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11880 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11881 /* 11882 * Unicast case: we match the conn only if it's in the specified 11883 * zone. 11884 */ 11885 return (IPCL_ZONE_MATCH(connp, zoneid)); 11886 } 11887 11888 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11889 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11890 /* 11891 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11892 * disabled, therefore we don't dispatch the multicast packet to 11893 * the sending zone. 11894 */ 11895 return (B_FALSE); 11896 } 11897 11898 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11899 zoneid != ALL_ZONES) { 11900 /* 11901 * Multicast packet on the loopback interface: we only match 11902 * conns who joined the group in the specified zone. 11903 */ 11904 return (B_FALSE); 11905 } 11906 11907 mutex_enter(&connp->conn_lock); 11908 wantpacket = 11909 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11910 mutex_exit(&connp->conn_lock); 11911 11912 return (wantpacket); 11913 } 11914 11915 11916 /* 11917 * Transmit a packet and update any NUD state based on the flags 11918 * XXX need to "recover" any ip6i_t when doing putq! 11919 * 11920 * NOTE : This function does not ire_refrele the ire passed in as the 11921 * argument. 11922 */ 11923 void 11924 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11925 int caller, ipsec_out_t *io) 11926 { 11927 mblk_t *mp1; 11928 nce_t *nce = ire->ire_nce; 11929 ill_t *ill; 11930 ill_t *out_ill; 11931 uint64_t delta; 11932 ip6_t *ip6h; 11933 queue_t *stq = ire->ire_stq; 11934 ire_t *ire1 = NULL; 11935 ire_t *save_ire = ire; 11936 boolean_t multirt_send = B_FALSE; 11937 mblk_t *next_mp = NULL; 11938 ip_stack_t *ipst = ire->ire_ipst; 11939 11940 ip6h = (ip6_t *)mp->b_rptr; 11941 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11942 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11943 ASSERT(nce != NULL); 11944 ASSERT(mp->b_datap->db_type == M_DATA); 11945 ASSERT(stq != NULL); 11946 11947 ill = ire_to_ill(ire); 11948 if (!ill) { 11949 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11950 freemsg(mp); 11951 return; 11952 } 11953 11954 /* 11955 * If a packet is to be sent out an interface that is a 6to4 11956 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11957 * destination, must be checked to have a 6to4 prefix 11958 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11959 * address configured on the sending interface. Otherwise, 11960 * the packet was delivered to this interface in error and the 11961 * packet must be dropped. 11962 */ 11963 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11964 ipif_t *ipif = ill->ill_ipif; 11965 11966 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11967 &ip6h->ip6_dst)) { 11968 if (ip_debug > 2) { 11969 /* ip1dbg */ 11970 pr_addr_dbg("ip_xmit_v6: attempting to " 11971 "send 6to4 addressed IPv6 " 11972 "destination (%s) out the wrong " 11973 "interface.\n", AF_INET6, 11974 &ip6h->ip6_dst); 11975 } 11976 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 11977 freemsg(mp); 11978 return; 11979 } 11980 } 11981 11982 /* Flow-control check has been done in ip_wput_ire_v6 */ 11983 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11984 caller == IP_WSRV || canput(stq->q_next)) { 11985 uint32_t ill_index; 11986 11987 /* 11988 * In most cases, the emission loop below is entered only 11989 * once. Only in the case where the ire holds the 11990 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11991 * flagged ires in the bucket, and send the packet 11992 * through all crossed RTF_MULTIRT routes. 11993 */ 11994 if (ire->ire_flags & RTF_MULTIRT) { 11995 /* 11996 * Multirouting case. The bucket where ire is stored 11997 * probably holds other RTF_MULTIRT flagged ires 11998 * to the destination. In this call to ip_xmit_v6, 11999 * we attempt to send the packet through all 12000 * those ires. Thus, we first ensure that ire is the 12001 * first RTF_MULTIRT ire in the bucket, 12002 * before walking the ire list. 12003 */ 12004 ire_t *first_ire; 12005 irb_t *irb = ire->ire_bucket; 12006 ASSERT(irb != NULL); 12007 multirt_send = B_TRUE; 12008 12009 /* Make sure we do not omit any multiroute ire. */ 12010 IRB_REFHOLD(irb); 12011 for (first_ire = irb->irb_ire; 12012 first_ire != NULL; 12013 first_ire = first_ire->ire_next) { 12014 if ((first_ire->ire_flags & RTF_MULTIRT) && 12015 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12016 &ire->ire_addr_v6)) && 12017 !(first_ire->ire_marks & 12018 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12019 break; 12020 } 12021 12022 if ((first_ire != NULL) && (first_ire != ire)) { 12023 IRE_REFHOLD(first_ire); 12024 /* ire will be released by the caller */ 12025 ire = first_ire; 12026 nce = ire->ire_nce; 12027 stq = ire->ire_stq; 12028 ill = ire_to_ill(ire); 12029 } 12030 IRB_REFRELE(irb); 12031 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12032 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12033 ILL_MDT_USABLE(ill)) { 12034 /* 12035 * This tcp connection was marked as MDT-capable, but 12036 * it has been turned off due changes in the interface. 12037 * Now that the interface support is back, turn it on 12038 * by notifying tcp. We don't directly modify tcp_mdt, 12039 * since we leave all the details to the tcp code that 12040 * knows better. 12041 */ 12042 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12043 12044 if (mdimp == NULL) { 12045 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12046 "connp %p (ENOMEM)\n", (void *)connp)); 12047 } else { 12048 CONN_INC_REF(connp); 12049 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 12050 connp, SQTAG_TCP_INPUT_MCTL); 12051 } 12052 } 12053 12054 do { 12055 mblk_t *mp_ip6h; 12056 12057 if (multirt_send) { 12058 irb_t *irb; 12059 /* 12060 * We are in a multiple send case, need to get 12061 * the next ire and make a duplicate of the 12062 * packet. ire1 holds here the next ire to 12063 * process in the bucket. If multirouting is 12064 * expected, any non-RTF_MULTIRT ire that has 12065 * the right destination address is ignored. 12066 */ 12067 irb = ire->ire_bucket; 12068 ASSERT(irb != NULL); 12069 12070 IRB_REFHOLD(irb); 12071 for (ire1 = ire->ire_next; 12072 ire1 != NULL; 12073 ire1 = ire1->ire_next) { 12074 if (!(ire1->ire_flags & RTF_MULTIRT)) 12075 continue; 12076 if (!IN6_ARE_ADDR_EQUAL( 12077 &ire1->ire_addr_v6, 12078 &ire->ire_addr_v6)) 12079 continue; 12080 if (ire1->ire_marks & 12081 (IRE_MARK_CONDEMNED| 12082 IRE_MARK_HIDDEN)) 12083 continue; 12084 12085 /* Got one */ 12086 if (ire1 != save_ire) { 12087 IRE_REFHOLD(ire1); 12088 } 12089 break; 12090 } 12091 IRB_REFRELE(irb); 12092 12093 if (ire1 != NULL) { 12094 next_mp = copyb(mp); 12095 if ((next_mp == NULL) || 12096 ((mp->b_cont != NULL) && 12097 ((next_mp->b_cont = 12098 dupmsg(mp->b_cont)) == NULL))) { 12099 freemsg(next_mp); 12100 next_mp = NULL; 12101 ire_refrele(ire1); 12102 ire1 = NULL; 12103 } 12104 } 12105 12106 /* Last multiroute ire; don't loop anymore. */ 12107 if (ire1 == NULL) { 12108 multirt_send = B_FALSE; 12109 } 12110 } 12111 12112 ill_index = 12113 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12114 12115 /* Initiate IPPF processing */ 12116 if (IP6_OUT_IPP(flags, ipst)) { 12117 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12118 if (mp == NULL) { 12119 BUMP_MIB(ill->ill_ip_mib, 12120 ipIfStatsOutDiscards); 12121 if (next_mp != NULL) 12122 freemsg(next_mp); 12123 if (ire != save_ire) { 12124 ire_refrele(ire); 12125 } 12126 return; 12127 } 12128 ip6h = (ip6_t *)mp->b_rptr; 12129 } 12130 mp_ip6h = mp; 12131 12132 /* 12133 * Check for fastpath, we need to hold nce_lock to 12134 * prevent fastpath update from chaining nce_fp_mp. 12135 */ 12136 12137 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12138 mutex_enter(&nce->nce_lock); 12139 if ((mp1 = nce->nce_fp_mp) != NULL) { 12140 uint32_t hlen; 12141 uchar_t *rptr; 12142 12143 hlen = MBLKL(mp1); 12144 rptr = mp->b_rptr - hlen; 12145 /* 12146 * make sure there is room for the fastpath 12147 * datalink header 12148 */ 12149 if (rptr < mp->b_datap->db_base) { 12150 mp1 = copyb(mp1); 12151 mutex_exit(&nce->nce_lock); 12152 if (mp1 == NULL) { 12153 BUMP_MIB(ill->ill_ip_mib, 12154 ipIfStatsOutDiscards); 12155 freemsg(mp); 12156 if (next_mp != NULL) 12157 freemsg(next_mp); 12158 if (ire != save_ire) { 12159 ire_refrele(ire); 12160 } 12161 return; 12162 } 12163 mp1->b_cont = mp; 12164 12165 /* Get the priority marking, if any */ 12166 mp1->b_band = mp->b_band; 12167 mp = mp1; 12168 } else { 12169 mp->b_rptr = rptr; 12170 /* 12171 * fastpath - pre-pend datalink 12172 * header 12173 */ 12174 bcopy(mp1->b_rptr, rptr, hlen); 12175 mutex_exit(&nce->nce_lock); 12176 } 12177 } else { 12178 /* 12179 * Get the DL_UNITDATA_REQ. 12180 */ 12181 mp1 = nce->nce_res_mp; 12182 if (mp1 == NULL) { 12183 mutex_exit(&nce->nce_lock); 12184 ip1dbg(("ip_xmit_v6: No resolution " 12185 "block ire = %p\n", (void *)ire)); 12186 freemsg(mp); 12187 if (next_mp != NULL) 12188 freemsg(next_mp); 12189 if (ire != save_ire) { 12190 ire_refrele(ire); 12191 } 12192 return; 12193 } 12194 /* 12195 * Prepend the DL_UNITDATA_REQ. 12196 */ 12197 mp1 = copyb(mp1); 12198 mutex_exit(&nce->nce_lock); 12199 if (mp1 == NULL) { 12200 BUMP_MIB(ill->ill_ip_mib, 12201 ipIfStatsOutDiscards); 12202 freemsg(mp); 12203 if (next_mp != NULL) 12204 freemsg(next_mp); 12205 if (ire != save_ire) { 12206 ire_refrele(ire); 12207 } 12208 return; 12209 } 12210 mp1->b_cont = mp; 12211 12212 /* Get the priority marking, if any */ 12213 mp1->b_band = mp->b_band; 12214 mp = mp1; 12215 } 12216 12217 out_ill = (ill_t *)stq->q_ptr; 12218 12219 DTRACE_PROBE4(ip6__physical__out__start, 12220 ill_t *, NULL, ill_t *, out_ill, 12221 ip6_t *, ip6h, mblk_t *, mp); 12222 12223 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12224 ipst->ips_ipv6firewall_physical_out, 12225 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 12226 12227 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12228 12229 if (mp == NULL) { 12230 if (multirt_send) { 12231 ASSERT(ire1 != NULL); 12232 if (ire != save_ire) { 12233 ire_refrele(ire); 12234 } 12235 /* 12236 * Proceed with the next RTF_MULTIRT 12237 * ire, also set up the send-to queue 12238 * accordingly. 12239 */ 12240 ire = ire1; 12241 ire1 = NULL; 12242 stq = ire->ire_stq; 12243 nce = ire->ire_nce; 12244 ill = ire_to_ill(ire); 12245 mp = next_mp; 12246 next_mp = NULL; 12247 continue; 12248 } else { 12249 ASSERT(next_mp == NULL); 12250 ASSERT(ire1 == NULL); 12251 break; 12252 } 12253 } 12254 12255 /* 12256 * Update ire and MIB counters; for save_ire, this has 12257 * been done by the caller. 12258 */ 12259 if (ire != save_ire) { 12260 UPDATE_OB_PKT_COUNT(ire); 12261 ire->ire_last_used_time = lbolt; 12262 12263 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12264 BUMP_MIB(ill->ill_ip_mib, 12265 ipIfStatsHCOutMcastPkts); 12266 UPDATE_MIB(ill->ill_ip_mib, 12267 ipIfStatsHCOutMcastOctets, 12268 ntohs(ip6h->ip6_plen) + 12269 IPV6_HDR_LEN); 12270 } 12271 } 12272 12273 /* 12274 * Send it down. XXX Do we want to flow control AH/ESP 12275 * packets that carry TCP payloads? We don't flow 12276 * control TCP packets, but we should also not 12277 * flow-control TCP packets that have been protected. 12278 * We don't have an easy way to find out if an AH/ESP 12279 * packet was originally TCP or not currently. 12280 */ 12281 if (io == NULL) { 12282 BUMP_MIB(ill->ill_ip_mib, 12283 ipIfStatsHCOutTransmits); 12284 UPDATE_MIB(ill->ill_ip_mib, 12285 ipIfStatsHCOutOctets, 12286 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12287 putnext(stq, mp); 12288 } else { 12289 /* 12290 * Safety Pup says: make sure this is 12291 * going to the right interface! 12292 */ 12293 if (io->ipsec_out_capab_ill_index != 12294 ill_index) { 12295 /* IPsec kstats: bump lose counter */ 12296 freemsg(mp1); 12297 } else { 12298 BUMP_MIB(ill->ill_ip_mib, 12299 ipIfStatsHCOutTransmits); 12300 UPDATE_MIB(ill->ill_ip_mib, 12301 ipIfStatsHCOutOctets, 12302 ntohs(ip6h->ip6_plen) + 12303 IPV6_HDR_LEN); 12304 ipsec_hw_putnext(stq, mp); 12305 } 12306 } 12307 12308 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12309 if (ire != save_ire) { 12310 ire_refrele(ire); 12311 } 12312 if (multirt_send) { 12313 ASSERT(ire1 != NULL); 12314 /* 12315 * Proceed with the next RTF_MULTIRT 12316 * ire, also set up the send-to queue 12317 * accordingly. 12318 */ 12319 ire = ire1; 12320 ire1 = NULL; 12321 stq = ire->ire_stq; 12322 nce = ire->ire_nce; 12323 ill = ire_to_ill(ire); 12324 mp = next_mp; 12325 next_mp = NULL; 12326 continue; 12327 } 12328 ASSERT(next_mp == NULL); 12329 ASSERT(ire1 == NULL); 12330 return; 12331 } 12332 12333 ASSERT(nce->nce_state != ND_INCOMPLETE); 12334 12335 /* 12336 * Check for upper layer advice 12337 */ 12338 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12339 /* 12340 * It should be o.k. to check the state without 12341 * a lock here, at most we lose an advice. 12342 */ 12343 nce->nce_last = TICK_TO_MSEC(lbolt64); 12344 if (nce->nce_state != ND_REACHABLE) { 12345 12346 mutex_enter(&nce->nce_lock); 12347 nce->nce_state = ND_REACHABLE; 12348 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12349 mutex_exit(&nce->nce_lock); 12350 (void) untimeout(nce->nce_timeout_id); 12351 if (ip_debug > 2) { 12352 /* ip1dbg */ 12353 pr_addr_dbg("ip_xmit_v6: state" 12354 " for %s changed to" 12355 " REACHABLE\n", AF_INET6, 12356 &ire->ire_addr_v6); 12357 } 12358 } 12359 if (ire != save_ire) { 12360 ire_refrele(ire); 12361 } 12362 if (multirt_send) { 12363 ASSERT(ire1 != NULL); 12364 /* 12365 * Proceed with the next RTF_MULTIRT 12366 * ire, also set up the send-to queue 12367 * accordingly. 12368 */ 12369 ire = ire1; 12370 ire1 = NULL; 12371 stq = ire->ire_stq; 12372 nce = ire->ire_nce; 12373 ill = ire_to_ill(ire); 12374 mp = next_mp; 12375 next_mp = NULL; 12376 continue; 12377 } 12378 ASSERT(next_mp == NULL); 12379 ASSERT(ire1 == NULL); 12380 return; 12381 } 12382 12383 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12384 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12385 " ill_reachable_time = %d \n", delta, 12386 ill->ill_reachable_time)); 12387 if (delta > (uint64_t)ill->ill_reachable_time) { 12388 nce = ire->ire_nce; 12389 mutex_enter(&nce->nce_lock); 12390 switch (nce->nce_state) { 12391 case ND_REACHABLE: 12392 case ND_STALE: 12393 /* 12394 * ND_REACHABLE is identical to 12395 * ND_STALE in this specific case. If 12396 * reachable time has expired for this 12397 * neighbor (delta is greater than 12398 * reachable time), conceptually, the 12399 * neighbor cache is no longer in 12400 * REACHABLE state, but already in 12401 * STALE state. So the correct 12402 * transition here is to ND_DELAY. 12403 */ 12404 nce->nce_state = ND_DELAY; 12405 mutex_exit(&nce->nce_lock); 12406 NDP_RESTART_TIMER(nce, 12407 ipst->ips_delay_first_probe_time); 12408 if (ip_debug > 3) { 12409 /* ip2dbg */ 12410 pr_addr_dbg("ip_xmit_v6: state" 12411 " for %s changed to" 12412 " DELAY\n", AF_INET6, 12413 &ire->ire_addr_v6); 12414 } 12415 break; 12416 case ND_DELAY: 12417 case ND_PROBE: 12418 mutex_exit(&nce->nce_lock); 12419 /* Timers have already started */ 12420 break; 12421 case ND_UNREACHABLE: 12422 /* 12423 * ndp timer has detected that this nce 12424 * is unreachable and initiated deleting 12425 * this nce and all its associated IREs. 12426 * This is a race where we found the 12427 * ire before it was deleted and have 12428 * just sent out a packet using this 12429 * unreachable nce. 12430 */ 12431 mutex_exit(&nce->nce_lock); 12432 break; 12433 default: 12434 ASSERT(0); 12435 } 12436 } 12437 12438 if (multirt_send) { 12439 ASSERT(ire1 != NULL); 12440 /* 12441 * Proceed with the next RTF_MULTIRT ire, 12442 * Also set up the send-to queue accordingly. 12443 */ 12444 if (ire != save_ire) { 12445 ire_refrele(ire); 12446 } 12447 ire = ire1; 12448 ire1 = NULL; 12449 stq = ire->ire_stq; 12450 nce = ire->ire_nce; 12451 ill = ire_to_ill(ire); 12452 mp = next_mp; 12453 next_mp = NULL; 12454 } 12455 } while (multirt_send); 12456 /* 12457 * In the multirouting case, release the last ire used for 12458 * emission. save_ire will be released by the caller. 12459 */ 12460 if (ire != save_ire) { 12461 ire_refrele(ire); 12462 } 12463 } else { 12464 /* 12465 * Queue packet if we have an conn to give back pressure. 12466 * We can't queue packets intended for hardware acceleration 12467 * since we've tossed that state already. If the packet is 12468 * being fed back from ire_send_v6, we don't know the 12469 * position in the queue to enqueue the packet and we discard 12470 * the packet. 12471 */ 12472 if (ipst->ips_ip_output_queue && (connp != NULL) && 12473 (io == NULL) && (caller != IRE_SEND)) { 12474 if (caller == IP_WSRV) { 12475 connp->conn_did_putbq = 1; 12476 (void) putbq(connp->conn_wq, mp); 12477 conn_drain_insert(connp); 12478 /* 12479 * caller == IP_WSRV implies we are 12480 * the service thread, and the 12481 * queue is already noenabled. 12482 * The check for canput and 12483 * the putbq is not atomic. 12484 * So we need to check again. 12485 */ 12486 if (canput(stq->q_next)) 12487 connp->conn_did_putbq = 0; 12488 } else { 12489 (void) putq(connp->conn_wq, mp); 12490 } 12491 return; 12492 } 12493 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12494 freemsg(mp); 12495 return; 12496 } 12497 } 12498 12499 /* 12500 * pr_addr_dbg function provides the needed buffer space to call 12501 * inet_ntop() function's 3rd argument. This function should be 12502 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12503 * stack buffer space in it's own stack frame. This function uses 12504 * a buffer from it's own stack and prints the information. 12505 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12506 * 12507 * Note: This function can call inet_ntop() once. 12508 */ 12509 void 12510 pr_addr_dbg(char *fmt1, int af, const void *addr) 12511 { 12512 char buf[INET6_ADDRSTRLEN]; 12513 12514 if (fmt1 == NULL) { 12515 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12516 return; 12517 } 12518 12519 /* 12520 * This does not compare debug level and just prints 12521 * out. Thus it is the responsibility of the caller 12522 * to check the appropriate debug-level before calling 12523 * this function. 12524 */ 12525 if (ip_debug > 0) { 12526 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12527 } 12528 12529 12530 } 12531 12532 12533 /* 12534 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12535 * if needed and extension headers) that will be needed based on the 12536 * ip6_pkt_t structure passed by the caller. 12537 * 12538 * The returned length does not include the length of the upper level 12539 * protocol (ULP) header. 12540 */ 12541 int 12542 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12543 { 12544 int len; 12545 12546 len = IPV6_HDR_LEN; 12547 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12548 len += sizeof (ip6i_t); 12549 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12550 ASSERT(ipp->ipp_hopoptslen != 0); 12551 len += ipp->ipp_hopoptslen; 12552 } 12553 if (ipp->ipp_fields & IPPF_RTHDR) { 12554 ASSERT(ipp->ipp_rthdrlen != 0); 12555 len += ipp->ipp_rthdrlen; 12556 } 12557 /* 12558 * En-route destination options 12559 * Only do them if there's a routing header as well 12560 */ 12561 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12562 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12563 ASSERT(ipp->ipp_rtdstoptslen != 0); 12564 len += ipp->ipp_rtdstoptslen; 12565 } 12566 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12567 ASSERT(ipp->ipp_dstoptslen != 0); 12568 len += ipp->ipp_dstoptslen; 12569 } 12570 return (len); 12571 } 12572 12573 /* 12574 * All-purpose routine to build a header chain of an IPv6 header 12575 * followed by any required extension headers and a proto header, 12576 * preceeded (where necessary) by an ip6i_t private header. 12577 * 12578 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12579 * will be filled in appropriately. 12580 * Thus the caller must fill in the rest of the IPv6 header, such as 12581 * traffic class/flowid, source address (if not set here), hoplimit (if not 12582 * set here) and destination address. 12583 * 12584 * The extension headers and ip6i_t header will all be fully filled in. 12585 */ 12586 void 12587 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12588 ip6_pkt_t *ipp, uint8_t protocol) 12589 { 12590 uint8_t *nxthdr_ptr; 12591 uint8_t *cp; 12592 ip6i_t *ip6i; 12593 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12594 12595 /* 12596 * If sending private ip6i_t header down (checksum info, nexthop, 12597 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12598 * then fill it in. (The checksum info will be filled in by icmp). 12599 */ 12600 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12601 ip6i = (ip6i_t *)ip6h; 12602 ip6h = (ip6_t *)&ip6i[1]; 12603 12604 ip6i->ip6i_flags = 0; 12605 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12606 if (ipp->ipp_fields & IPPF_IFINDEX || 12607 ipp->ipp_fields & IPPF_SCOPE_ID) { 12608 ASSERT(ipp->ipp_ifindex != 0); 12609 ip6i->ip6i_flags |= IP6I_IFINDEX; 12610 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12611 } 12612 if (ipp->ipp_fields & IPPF_ADDR) { 12613 /* 12614 * Enable per-packet source address verification if 12615 * IPV6_PKTINFO specified the source address. 12616 * ip6_src is set in the transport's _wput function. 12617 */ 12618 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12619 &ipp->ipp_addr)); 12620 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12621 } 12622 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12623 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12624 /* 12625 * We need to set this flag so that IP doesn't 12626 * rewrite the IPv6 header's hoplimit with the 12627 * current default value. 12628 */ 12629 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12630 } 12631 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12632 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12633 &ipp->ipp_nexthop)); 12634 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12635 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12636 } 12637 /* 12638 * tell IP this is an ip6i_t private header 12639 */ 12640 ip6i->ip6i_nxt = IPPROTO_RAW; 12641 } 12642 /* Initialize IPv6 header */ 12643 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12644 if (ipp->ipp_fields & IPPF_TCLASS) { 12645 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12646 (ipp->ipp_tclass << 20); 12647 } 12648 if (ipp->ipp_fields & IPPF_ADDR) 12649 ip6h->ip6_src = ipp->ipp_addr; 12650 12651 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12652 cp = (uint8_t *)&ip6h[1]; 12653 /* 12654 * Here's where we have to start stringing together 12655 * any extension headers in the right order: 12656 * Hop-by-hop, destination, routing, and final destination opts. 12657 */ 12658 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12659 /* Hop-by-hop options */ 12660 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12661 12662 *nxthdr_ptr = IPPROTO_HOPOPTS; 12663 nxthdr_ptr = &hbh->ip6h_nxt; 12664 12665 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12666 cp += ipp->ipp_hopoptslen; 12667 } 12668 /* 12669 * En-route destination options 12670 * Only do them if there's a routing header as well 12671 */ 12672 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12673 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12674 ip6_dest_t *dst = (ip6_dest_t *)cp; 12675 12676 *nxthdr_ptr = IPPROTO_DSTOPTS; 12677 nxthdr_ptr = &dst->ip6d_nxt; 12678 12679 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12680 cp += ipp->ipp_rtdstoptslen; 12681 } 12682 /* 12683 * Routing header next 12684 */ 12685 if (ipp->ipp_fields & IPPF_RTHDR) { 12686 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12687 12688 *nxthdr_ptr = IPPROTO_ROUTING; 12689 nxthdr_ptr = &rt->ip6r_nxt; 12690 12691 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12692 cp += ipp->ipp_rthdrlen; 12693 } 12694 /* 12695 * Do ultimate destination options 12696 */ 12697 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12698 ip6_dest_t *dest = (ip6_dest_t *)cp; 12699 12700 *nxthdr_ptr = IPPROTO_DSTOPTS; 12701 nxthdr_ptr = &dest->ip6d_nxt; 12702 12703 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12704 cp += ipp->ipp_dstoptslen; 12705 } 12706 /* 12707 * Now set the last header pointer to the proto passed in 12708 */ 12709 *nxthdr_ptr = protocol; 12710 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12711 } 12712 12713 /* 12714 * Return a pointer to the routing header extension header 12715 * in the IPv6 header(s) chain passed in. 12716 * If none found, return NULL 12717 * Assumes that all extension headers are in same mblk as the v6 header 12718 */ 12719 ip6_rthdr_t * 12720 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12721 { 12722 ip6_dest_t *desthdr; 12723 ip6_frag_t *fraghdr; 12724 uint_t hdrlen; 12725 uint8_t nexthdr; 12726 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12727 12728 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12729 return ((ip6_rthdr_t *)ptr); 12730 12731 /* 12732 * The routing header will precede all extension headers 12733 * other than the hop-by-hop and destination options 12734 * extension headers, so if we see anything other than those, 12735 * we're done and didn't find it. 12736 * We could see a destination options header alone but no 12737 * routing header, in which case we'll return NULL as soon as 12738 * we see anything after that. 12739 * Hop-by-hop and destination option headers are identical, 12740 * so we can use either one we want as a template. 12741 */ 12742 nexthdr = ip6h->ip6_nxt; 12743 while (ptr < endptr) { 12744 /* Is there enough left for len + nexthdr? */ 12745 if (ptr + MIN_EHDR_LEN > endptr) 12746 return (NULL); 12747 12748 switch (nexthdr) { 12749 case IPPROTO_HOPOPTS: 12750 case IPPROTO_DSTOPTS: 12751 /* Assumes the headers are identical for hbh and dst */ 12752 desthdr = (ip6_dest_t *)ptr; 12753 hdrlen = 8 * (desthdr->ip6d_len + 1); 12754 nexthdr = desthdr->ip6d_nxt; 12755 break; 12756 12757 case IPPROTO_ROUTING: 12758 return ((ip6_rthdr_t *)ptr); 12759 12760 case IPPROTO_FRAGMENT: 12761 fraghdr = (ip6_frag_t *)ptr; 12762 hdrlen = sizeof (ip6_frag_t); 12763 nexthdr = fraghdr->ip6f_nxt; 12764 break; 12765 12766 default: 12767 return (NULL); 12768 } 12769 ptr += hdrlen; 12770 } 12771 return (NULL); 12772 } 12773 12774 /* 12775 * Called for source-routed packets originating on this node. 12776 * Manipulates the original routing header by moving every entry up 12777 * one slot, placing the first entry in the v6 header's v6_dst field, 12778 * and placing the ultimate destination in the routing header's last 12779 * slot. 12780 * 12781 * Returns the checksum diference between the ultimate destination 12782 * (last hop in the routing header when the packet is sent) and 12783 * the first hop (ip6_dst when the packet is sent) 12784 */ 12785 /* ARGSUSED2 */ 12786 uint32_t 12787 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12788 { 12789 uint_t numaddr; 12790 uint_t i; 12791 in6_addr_t *addrptr; 12792 in6_addr_t tmp; 12793 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12794 uint32_t cksm; 12795 uint32_t addrsum = 0; 12796 uint16_t *ptr; 12797 12798 /* 12799 * Perform any processing needed for source routing. 12800 * We know that all extension headers will be in the same mblk 12801 * as the IPv6 header. 12802 */ 12803 12804 /* 12805 * If no segments left in header, or the header length field is zero, 12806 * don't move hop addresses around; 12807 * Checksum difference is zero. 12808 */ 12809 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12810 return (0); 12811 12812 ptr = (uint16_t *)&ip6h->ip6_dst; 12813 cksm = 0; 12814 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12815 cksm += ptr[i]; 12816 } 12817 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12818 12819 /* 12820 * Here's where the fun begins - we have to 12821 * move all addresses up one spot, take the 12822 * first hop and make it our first ip6_dst, 12823 * and place the ultimate destination in the 12824 * newly-opened last slot. 12825 */ 12826 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12827 numaddr = rthdr->ip6r0_len / 2; 12828 tmp = *addrptr; 12829 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12830 *addrptr = addrptr[1]; 12831 } 12832 *addrptr = ip6h->ip6_dst; 12833 ip6h->ip6_dst = tmp; 12834 12835 /* 12836 * From the checksummed ultimate destination subtract the checksummed 12837 * current ip6_dst (the first hop address). Return that number. 12838 * (In the v4 case, the second part of this is done in each routine 12839 * that calls ip_massage_options(). We do it all in this one place 12840 * for v6). 12841 */ 12842 ptr = (uint16_t *)&ip6h->ip6_dst; 12843 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12844 addrsum += ptr[i]; 12845 } 12846 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12847 if ((int)cksm < 0) 12848 cksm--; 12849 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12850 12851 return (cksm); 12852 } 12853 12854 /* 12855 * Propagate a multicast group membership operation (join/leave) (*fn) on 12856 * all interfaces crossed by the related multirt routes. 12857 * The call is considered successful if the operation succeeds 12858 * on at least one interface. 12859 * The function is called if the destination address in the packet to send 12860 * is multirouted. 12861 */ 12862 int 12863 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12864 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12865 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12866 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12867 { 12868 ire_t *ire_gw; 12869 irb_t *irb; 12870 int index, error = 0; 12871 opt_restart_t *or; 12872 ip_stack_t *ipst = ire->ire_ipst; 12873 12874 irb = ire->ire_bucket; 12875 ASSERT(irb != NULL); 12876 12877 ASSERT(DB_TYPE(first_mp) == M_CTL); 12878 or = (opt_restart_t *)first_mp->b_rptr; 12879 12880 IRB_REFHOLD(irb); 12881 for (; ire != NULL; ire = ire->ire_next) { 12882 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12883 continue; 12884 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12885 continue; 12886 12887 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12888 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12889 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12890 /* No resolver exists for the gateway; skip this ire. */ 12891 if (ire_gw == NULL) 12892 continue; 12893 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12894 /* 12895 * A resolver exists: we can get the interface on which we have 12896 * to apply the operation. 12897 */ 12898 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12899 first_mp); 12900 if (error == 0) 12901 or->or_private = CGTP_MCAST_SUCCESS; 12902 12903 if (ip_debug > 0) { 12904 ulong_t off; 12905 char *ksym; 12906 12907 ksym = kobj_getsymname((uintptr_t)fn, &off); 12908 ip2dbg(("ip_multirt_apply_membership_v6: " 12909 "called %s, multirt group 0x%08x via itf 0x%08x, " 12910 "error %d [success %u]\n", 12911 ksym ? ksym : "?", 12912 ntohl(V4_PART_OF_V6((*v6grp))), 12913 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12914 error, or->or_private)); 12915 } 12916 12917 ire_refrele(ire_gw); 12918 if (error == EINPROGRESS) { 12919 IRB_REFRELE(irb); 12920 return (error); 12921 } 12922 } 12923 IRB_REFRELE(irb); 12924 /* 12925 * Consider the call as successful if we succeeded on at least 12926 * one interface. Otherwise, return the last encountered error. 12927 */ 12928 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12929 } 12930 12931 void 12932 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 12933 { 12934 kstat_t *ksp; 12935 12936 ip6_stat_t template = { 12937 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 12938 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 12939 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 12940 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 12941 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 12942 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 12943 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12944 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12945 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12946 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12947 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12948 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12949 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12950 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12951 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 12952 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 12953 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 12954 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 12955 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 12956 }; 12957 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 12958 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 12959 KSTAT_FLAG_VIRTUAL, stackid); 12960 12961 if (ksp == NULL) 12962 return (NULL); 12963 12964 bcopy(&template, ip6_statisticsp, sizeof (template)); 12965 ksp->ks_data = (void *)ip6_statisticsp; 12966 ksp->ks_private = (void *)(uintptr_t)stackid; 12967 12968 kstat_install(ksp); 12969 return (ksp); 12970 } 12971 12972 void 12973 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 12974 { 12975 if (ksp != NULL) { 12976 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 12977 kstat_delete_netstack(ksp, stackid); 12978 } 12979 } 12980 12981 /* 12982 * The following two functions set and get the value for the 12983 * IPV6_SRC_PREFERENCES socket option. 12984 */ 12985 int 12986 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12987 { 12988 /* 12989 * We only support preferences that are covered by 12990 * IPV6_PREFER_SRC_MASK. 12991 */ 12992 if (prefs & ~IPV6_PREFER_SRC_MASK) 12993 return (EINVAL); 12994 12995 /* 12996 * Look for conflicting preferences or default preferences. If 12997 * both bits of a related pair are clear, the application wants the 12998 * system's default value for that pair. Both bits in a pair can't 12999 * be set. 13000 */ 13001 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13002 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13003 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13004 IPV6_PREFER_SRC_MIPMASK) { 13005 return (EINVAL); 13006 } 13007 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13008 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13009 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13010 IPV6_PREFER_SRC_TMPMASK) { 13011 return (EINVAL); 13012 } 13013 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13014 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13015 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13016 IPV6_PREFER_SRC_CGAMASK) { 13017 return (EINVAL); 13018 } 13019 13020 connp->conn_src_preferences = prefs; 13021 return (0); 13022 } 13023 13024 size_t 13025 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13026 { 13027 *val = connp->conn_src_preferences; 13028 return (sizeof (connp->conn_src_preferences)); 13029 } 13030 13031 int 13032 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13033 { 13034 ill_t *ill; 13035 ire_t *ire; 13036 int error; 13037 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13038 13039 /* 13040 * Verify the source address and ifindex. Privileged users can use 13041 * any source address. For ancillary data the source address is 13042 * checked in ip_wput_v6. 13043 */ 13044 if (pkti->ipi6_ifindex != 0) { 13045 ASSERT(connp != NULL); 13046 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13047 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error, ipst); 13048 if (ill == NULL) { 13049 /* 13050 * We just want to know if the interface exists, we 13051 * don't really care about the ill pointer itself. 13052 */ 13053 if (error != EINPROGRESS) 13054 return (error); 13055 error = 0; /* Ensure we don't use it below */ 13056 } else { 13057 ill_refrele(ill); 13058 } 13059 } 13060 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13061 secpolicy_net_rawaccess(cr) != 0) { 13062 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13063 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13064 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 13065 if (ire != NULL) 13066 ire_refrele(ire); 13067 else 13068 return (ENXIO); 13069 } 13070 return (0); 13071 } 13072 13073 /* 13074 * Get the size of the IP options (including the IP headers size) 13075 * without including the AH header's size. If till_ah is B_FALSE, 13076 * and if AH header is present, dest options beyond AH header will 13077 * also be included in the returned size. 13078 */ 13079 int 13080 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13081 { 13082 ip6_t *ip6h; 13083 uint8_t nexthdr; 13084 uint8_t *whereptr; 13085 ip6_hbh_t *hbhhdr; 13086 ip6_dest_t *dsthdr; 13087 ip6_rthdr_t *rthdr; 13088 int ehdrlen; 13089 int size; 13090 ah_t *ah; 13091 13092 ip6h = (ip6_t *)mp->b_rptr; 13093 size = IPV6_HDR_LEN; 13094 nexthdr = ip6h->ip6_nxt; 13095 whereptr = (uint8_t *)&ip6h[1]; 13096 for (;;) { 13097 /* Assume IP has already stripped it */ 13098 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13099 switch (nexthdr) { 13100 case IPPROTO_HOPOPTS: 13101 hbhhdr = (ip6_hbh_t *)whereptr; 13102 nexthdr = hbhhdr->ip6h_nxt; 13103 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13104 break; 13105 case IPPROTO_DSTOPTS: 13106 dsthdr = (ip6_dest_t *)whereptr; 13107 nexthdr = dsthdr->ip6d_nxt; 13108 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13109 break; 13110 case IPPROTO_ROUTING: 13111 rthdr = (ip6_rthdr_t *)whereptr; 13112 nexthdr = rthdr->ip6r_nxt; 13113 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13114 break; 13115 default : 13116 if (till_ah) { 13117 ASSERT(nexthdr == IPPROTO_AH); 13118 return (size); 13119 } 13120 /* 13121 * If we don't have a AH header to traverse, 13122 * return now. This happens normally for 13123 * outbound datagrams where we have not inserted 13124 * the AH header. 13125 */ 13126 if (nexthdr != IPPROTO_AH) { 13127 return (size); 13128 } 13129 13130 /* 13131 * We don't include the AH header's size 13132 * to be symmetrical with other cases where 13133 * we either don't have a AH header (outbound) 13134 * or peek into the AH header yet (inbound and 13135 * not pulled up yet). 13136 */ 13137 ah = (ah_t *)whereptr; 13138 nexthdr = ah->ah_nexthdr; 13139 ehdrlen = (ah->ah_length << 2) + 8; 13140 13141 if (nexthdr == IPPROTO_DSTOPTS) { 13142 if (whereptr + ehdrlen >= mp->b_wptr) { 13143 /* 13144 * The destination options header 13145 * is not part of the first mblk. 13146 */ 13147 whereptr = mp->b_cont->b_rptr; 13148 } else { 13149 whereptr += ehdrlen; 13150 } 13151 13152 dsthdr = (ip6_dest_t *)whereptr; 13153 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13154 size += ehdrlen; 13155 } 13156 return (size); 13157 } 13158 whereptr += ehdrlen; 13159 size += ehdrlen; 13160 } 13161 } 13162