1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* 27 * Copyright (c) 1990 Mentat Inc. 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/stream.h> 34 #include <sys/dlpi.h> 35 #include <sys/stropts.h> 36 #include <sys/sysmacros.h> 37 #include <sys/strsun.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #define _SUN_TPI_VERSION 2 41 #include <sys/tihdr.h> 42 #include <sys/tiuser.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/cmn_err.h> 46 #include <sys/debug.h> 47 #include <sys/kobj.h> 48 #include <sys/zone.h> 49 50 #include <sys/kmem.h> 51 #include <sys/systm.h> 52 #include <sys/param.h> 53 #include <sys/socket.h> 54 #include <sys/vtrace.h> 55 #include <sys/isa_defs.h> 56 #include <sys/atomic.h> 57 #include <sys/iphada.h> 58 #include <sys/policy.h> 59 #include <net/if.h> 60 #include <net/if_arp.h> 61 #include <net/route.h> 62 #include <net/if_dl.h> 63 #include <sys/sockio.h> 64 #include <netinet/in.h> 65 #include <netinet/ip6.h> 66 #include <netinet/icmp6.h> 67 #include <netinet/sctp.h> 68 69 #include <inet/common.h> 70 #include <inet/mi.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 #include <inet/snmpcom.h> 75 76 #include <inet/ip.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/ipp_common.h> 81 82 #include <inet/ip_multi.h> 83 #include <inet/ip_if.h> 84 #include <inet/ip_ire.h> 85 #include <inet/ip_rts.h> 86 #include <inet/optcom.h> 87 #include <inet/ip_ndp.h> 88 #include <net/pfkeyv2.h> 89 #include <inet/ipsec_info.h> 90 #include <inet/sadb.h> 91 #include <inet/ipsec_impl.h> 92 #include <inet/tun.h> 93 #include <inet/sctp_ip.h> 94 #include <sys/multidata.h> 95 #include <sys/pattr.h> 96 #include <inet/ipclassifier.h> 97 #include <inet/ipsecah.h> 98 #include <inet/udp_impl.h> 99 #include <sys/squeue.h> 100 101 extern squeue_func_t ip_input_proc; 102 103 /* 104 * IP statistics. 105 */ 106 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 107 108 typedef struct ip6_stat { 109 kstat_named_t ip6_udp_fast_path; 110 kstat_named_t ip6_udp_slow_path; 111 kstat_named_t ip6_udp_fannorm; 112 kstat_named_t ip6_udp_fanmb; 113 } ip6_stat_t; 114 115 static ip6_stat_t ip6_statistics = { 116 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 117 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 118 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 119 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 120 }; 121 122 static kstat_t *ip6_kstat; 123 124 /* 125 * Naming conventions: 126 * These rules should be judiciously applied 127 * if there is a need to identify something as IPv6 versus IPv4 128 * IPv6 funcions will end with _v6 in the ip module. 129 * IPv6 funcions will end with _ipv6 in the transport modules. 130 * IPv6 macros: 131 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 132 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 133 * And then there are ..V4_PART_OF_V6. 134 * The intent is that macros in the ip module end with _V6. 135 * IPv6 global variables will start with ipv6_ 136 * IPv6 structures will start with ipv6 137 * IPv6 defined constants should start with IPV6_ 138 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 139 */ 140 141 /* 142 * IPv6 mibs when the interface (ill) is not known. 143 * When the ill is known the per-interface mib in the ill is used. 144 */ 145 mib2_ipv6IfStatsEntry_t ip6_mib; 146 mib2_ipv6IfIcmpEntry_t icmp6_mib; 147 148 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 149 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 150 151 const in6_addr_t ipv6_all_ones = 152 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 153 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 154 155 #ifdef _BIG_ENDIAN 156 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 157 #else /* _BIG_ENDIAN */ 158 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 159 #endif /* _BIG_ENDIAN */ 160 161 #ifdef _BIG_ENDIAN 162 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 163 #else /* _BIG_ENDIAN */ 164 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 165 #endif /* _BIG_ENDIAN */ 166 167 #ifdef _BIG_ENDIAN 168 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 169 #else /* _BIG_ENDIAN */ 170 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 171 #endif /* _BIG_ENDIAN */ 172 173 #ifdef _BIG_ENDIAN 174 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 175 #else /* _BIG_ENDIAN */ 176 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 177 #endif /* _BIG_ENDIAN */ 178 179 #ifdef _BIG_ENDIAN 180 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 181 #else /* _BIG_ENDIAN */ 182 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 183 #endif /* _BIG_ENDIAN */ 184 185 #ifdef _BIG_ENDIAN 186 const in6_addr_t ipv6_solicited_node_mcast = 187 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 188 #else /* _BIG_ENDIAN */ 189 const in6_addr_t ipv6_solicited_node_mcast = 190 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 191 #endif /* _BIG_ENDIAN */ 192 193 /* 194 * Used by icmp_send_redirect_v6 for picking random src. 195 */ 196 uint_t icmp_redirect_v6_src_index; 197 198 /* Leave room for ip_newroute to tack on the src and target addresses */ 199 #define OK_RESOLVER_MP_V6(mp) \ 200 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 201 202 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 203 boolean_t, zoneid_t); 204 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 205 const in6_addr_t *, boolean_t); 206 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 207 static boolean_t icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp); 208 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 209 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 210 boolean_t, boolean_t, boolean_t, boolean_t); 211 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 212 iulp_t *); 213 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 214 uint16_t, boolean_t, boolean_t, boolean_t); 215 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 216 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 217 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 218 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 219 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 220 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 221 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 222 uint8_t *, uint_t, uint8_t); 223 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 224 ip6_frag_t *, uint_t, uint_t *); 225 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 226 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 227 conn_t *, int, int, int); 228 static boolean_t ip_ulp_cando_pkt2big(int); 229 230 static void ip_rput_v6(queue_t *, mblk_t *); 231 static void ip_wput_v6(queue_t *, mblk_t *); 232 233 /* 234 * A template for an IPv6 AR_ENTRY_QUERY 235 */ 236 static areq_t ipv6_areq_template = { 237 AR_ENTRY_QUERY, /* cmd */ 238 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 239 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 240 IP6_DL_SAP, /* protocol, from arps perspective */ 241 sizeof (areq_t), /* target addr offset */ 242 IPV6_ADDR_LEN, /* target addr_length */ 243 0, /* flags */ 244 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 245 IPV6_ADDR_LEN, /* sender addr length */ 246 6, /* xmit_count */ 247 1000, /* (re)xmit_interval in milliseconds */ 248 4 /* max # of requests to buffer */ 249 /* anything else filled in by the code */ 250 }; 251 252 struct qinit rinit_ipv6 = { 253 (pfi_t)ip_rput_v6, 254 NULL, 255 ip_open, 256 ip_close, 257 NULL, 258 &ip_mod_info 259 }; 260 261 struct qinit winit_ipv6 = { 262 (pfi_t)ip_wput_v6, 263 (pfi_t)ip_wsrv, 264 ip_open, 265 ip_close, 266 NULL, 267 &ip_mod_info 268 }; 269 270 /* 271 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 272 * The message has already been checksummed and if needed, 273 * a copy has been made to be sent any interested ICMP client (conn) 274 * Note that this is different than icmp_inbound() which does the fanout 275 * to conn's as well as local processing of the ICMP packets. 276 * 277 * All error messages are passed to the matching transport stream. 278 * 279 * Zones notes: 280 * The packet is only processed in the context of the specified zone: typically 281 * only this zone will reply to an echo request. This means that the caller must 282 * call icmp_inbound_v6() for each relevant zone. 283 */ 284 static void 285 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 286 boolean_t mctl_present, uint_t flags, zoneid_t zoneid) 287 { 288 icmp6_t *icmp6; 289 ip6_t *ip6h; 290 boolean_t interested; 291 ip6i_t *ip6i; 292 in6_addr_t origsrc; 293 ire_t *ire; 294 mblk_t *first_mp; 295 ipsec_in_t *ii; 296 297 ASSERT(ill != NULL); 298 first_mp = mp; 299 if (mctl_present) { 300 mp = first_mp->b_cont; 301 ASSERT(mp != NULL); 302 303 ii = (ipsec_in_t *)first_mp->b_rptr; 304 ASSERT(ii->ipsec_in_type == IPSEC_IN); 305 } 306 307 ip6h = (ip6_t *)mp->b_rptr; 308 309 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 310 311 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 312 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 313 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 314 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 315 freemsg(first_mp); 316 return; 317 } 318 ip6h = (ip6_t *)mp->b_rptr; 319 } 320 if (icmp_accept_clear_messages == 0) { 321 first_mp = ipsec_check_global_policy(first_mp, NULL, 322 NULL, ip6h, mctl_present); 323 if (first_mp == NULL) 324 return; 325 } 326 327 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 328 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 329 icmp6->icmp6_code)); 330 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 331 332 /* Initiate IPPF processing here */ 333 if (IP6_IN_IPP(flags)) { 334 335 /* 336 * If the ifindex changes due to SIOCSLIFINDEX 337 * packet may return to IP on the wrong ill. 338 */ 339 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 340 if (mp == NULL) { 341 if (mctl_present) { 342 freeb(first_mp); 343 } 344 return; 345 } 346 } 347 348 switch (icmp6->icmp6_type) { 349 case ICMP6_DST_UNREACH: 350 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 351 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 352 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 353 break; 354 355 case ICMP6_TIME_EXCEEDED: 356 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 357 break; 358 359 case ICMP6_PARAM_PROB: 360 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 361 break; 362 363 case ICMP6_PACKET_TOO_BIG: 364 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 365 zoneid); 366 return; 367 case ICMP6_ECHO_REQUEST: 368 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 369 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 370 !ipv6_resp_echo_mcast) 371 break; 372 373 /* 374 * We must have exclusive use of the mblk to convert it to 375 * a response. 376 * If not, we copy it. 377 */ 378 if (mp->b_datap->db_ref > 1) { 379 mblk_t *mp1; 380 381 mp1 = copymsg(mp); 382 freemsg(mp); 383 if (mp1 == NULL) { 384 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 385 if (mctl_present) 386 freeb(first_mp); 387 return; 388 } 389 mp = mp1; 390 ip6h = (ip6_t *)mp->b_rptr; 391 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 392 if (mctl_present) 393 first_mp->b_cont = mp; 394 else 395 first_mp = mp; 396 } 397 398 /* 399 * Turn the echo into an echo reply. 400 * Remove any extension headers (do not reverse a source route) 401 * and clear the flow id (keep traffic class for now). 402 */ 403 if (hdr_length != IPV6_HDR_LEN) { 404 int i; 405 406 for (i = 0; i < IPV6_HDR_LEN; i++) 407 mp->b_rptr[hdr_length - i - 1] = 408 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 409 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 410 ip6h = (ip6_t *)mp->b_rptr; 411 ip6h->ip6_nxt = IPPROTO_ICMPV6; 412 hdr_length = IPV6_HDR_LEN; 413 } 414 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 415 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 416 417 ip6h->ip6_plen = 418 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 419 origsrc = ip6h->ip6_src; 420 /* 421 * Reverse the source and destination addresses. 422 * If the return address is a multicast, zero out the source 423 * (ip_wput_v6 will set an address). 424 */ 425 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 426 ip6h->ip6_src = ipv6_all_zeros; 427 ip6h->ip6_dst = origsrc; 428 } else { 429 ip6h->ip6_src = ip6h->ip6_dst; 430 ip6h->ip6_dst = origsrc; 431 } 432 433 /* set the hop limit */ 434 ip6h->ip6_hops = ipv6_def_hops; 435 436 /* 437 * Prepare for checksum by putting icmp length in the icmp 438 * checksum field. The checksum is calculated in ip_wput_v6. 439 */ 440 icmp6->icmp6_cksum = ip6h->ip6_plen; 441 /* 442 * ICMP echo replies should go out on the same interface 443 * the request came on as probes used by in.mpathd for 444 * detecting NIC failures are ECHO packets. We turn-off load 445 * spreading by allocating a ip6i and setting ip6i_attach_if 446 * to B_TRUE which is handled both by ip_wput_v6 and 447 * ip_newroute_v6. If we don't turnoff load spreading, 448 * the packets might get dropped if there are no 449 * non-FAILED/INACTIVE interfaces for it to go out on and 450 * in.mpathd would wrongly detect a failure or mis-detect 451 * a NIC failure as a link failure. As load spreading can 452 * happen only if ill_group is not NULL, we do only for 453 * that case and this does not affect the normal case. 454 * 455 * We force this only on echo packets that came from on-link 456 * hosts. We restrict this to link-local addresses which 457 * is used by in.mpathd for probing. In the IPv6 case, 458 * default routes typically have an ire_ipif pointer and 459 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 460 * might work. As a default route out of this interface 461 * may not be present, enforcing this packet to go out in 462 * this case may not work. 463 */ 464 if (ill->ill_group != NULL && 465 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 466 /* 467 * If we are sending replies to ourselves, don't 468 * set ATTACH_IF as we may not be able to find 469 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 470 * causes ip_wput_v6 to look for an IRE_LOCAL on 471 * "ill" which it may not find and will try to 472 * create an IRE_CACHE for our local address. Once 473 * we do this, we will try to forward all packets 474 * meant to our LOCAL address. 475 */ 476 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES); 477 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 478 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 479 if (mp == NULL) { 480 BUMP_MIB(ill->ill_icmp6_mib, 481 ipv6IfIcmpInErrors); 482 if (ire != NULL) 483 ire_refrele(ire); 484 if (mctl_present) 485 freeb(first_mp); 486 return; 487 } else if (mctl_present) { 488 first_mp->b_cont = mp; 489 } else { 490 first_mp = mp; 491 } 492 ip6i = (ip6i_t *)mp->b_rptr; 493 ip6i->ip6i_flags = IP6I_ATTACH_IF; 494 ip6i->ip6i_ifindex = 495 ill->ill_phyint->phyint_ifindex; 496 } 497 if (ire != NULL) 498 ire_refrele(ire); 499 } 500 501 if (!mctl_present) { 502 /* 503 * This packet should go out the same way as it 504 * came in i.e in clear. To make sure that global 505 * policy will not be applied to this in ip_wput, 506 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 507 */ 508 ASSERT(first_mp == mp); 509 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 510 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 511 freemsg(mp); 512 return; 513 } 514 ii = (ipsec_in_t *)first_mp->b_rptr; 515 516 /* This is not a secure packet */ 517 ii->ipsec_in_secure = B_FALSE; 518 first_mp->b_cont = mp; 519 } 520 ii->ipsec_in_zoneid = zoneid; 521 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 522 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 523 return; 524 } 525 put(WR(q), first_mp); 526 return; 527 528 case ICMP6_ECHO_REPLY: 529 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 530 break; 531 532 case ND_ROUTER_SOLICIT: 533 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 534 break; 535 536 case ND_ROUTER_ADVERT: 537 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 538 break; 539 540 case ND_NEIGHBOR_SOLICIT: 541 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 542 if (mctl_present) 543 freeb(first_mp); 544 /* XXX may wish to pass first_mp up to ndp_input someday. */ 545 ndp_input(ill, mp); 546 return; 547 548 case ND_NEIGHBOR_ADVERT: 549 BUMP_MIB(ill->ill_icmp6_mib, 550 ipv6IfIcmpInNeighborAdvertisements); 551 if (mctl_present) 552 freeb(first_mp); 553 /* XXX may wish to pass first_mp up to ndp_input someday. */ 554 ndp_input(ill, mp); 555 return; 556 557 case ND_REDIRECT: { 558 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 559 560 if (ipv6_ignore_redirect) 561 break; 562 563 /* 564 * As there is no upper client to deliver, we don't 565 * need the first_mp any more. 566 */ 567 if (mctl_present) 568 freeb(first_mp); 569 if (!pullupmsg(mp, -1) || 570 !icmp_redirect_ok_v6(ill, mp)) { 571 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 572 break; 573 } 574 icmp_redirect_v6(q, mp, ill); 575 return; 576 } 577 578 /* 579 * The next three icmp messages will be handled by MLD. 580 * Pass all valid MLD packets up to any process(es) 581 * listening on a raw ICMP socket. MLD messages are 582 * freed by mld_input function. 583 */ 584 case MLD_LISTENER_QUERY: 585 case MLD_LISTENER_REPORT: 586 case MLD_LISTENER_REDUCTION: 587 if (mctl_present) 588 freeb(first_mp); 589 mld_input(q, mp, ill); 590 return; 591 default: 592 break; 593 } 594 if (interested) { 595 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 596 mctl_present, zoneid); 597 } else { 598 freemsg(first_mp); 599 } 600 } 601 602 /* 603 * Process received IPv6 ICMP Packet too big. 604 * After updating any IRE it does the fanout to any matching transport streams. 605 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 606 */ 607 /* ARGSUSED */ 608 static void 609 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 610 boolean_t mctl_present, zoneid_t zoneid) 611 { 612 ip6_t *ip6h; 613 ip6_t *inner_ip6h; 614 icmp6_t *icmp6; 615 uint16_t hdr_length; 616 uint32_t mtu; 617 ire_t *ire, *first_ire; 618 mblk_t *first_mp; 619 620 first_mp = mp; 621 if (mctl_present) 622 mp = first_mp->b_cont; 623 /* 624 * We must have exclusive use of the mblk to update the MTU 625 * in the packet. 626 * If not, we copy it. 627 * 628 * If there's an M_CTL present, we know that allocated first_mp 629 * earlier in this function, so we know first_mp has refcnt of one. 630 */ 631 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 632 if (mp->b_datap->db_ref > 1) { 633 mblk_t *mp1; 634 635 mp1 = copymsg(mp); 636 freemsg(mp); 637 if (mp1 == NULL) { 638 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 639 if (mctl_present) 640 freeb(first_mp); 641 return; 642 } 643 mp = mp1; 644 if (mctl_present) 645 first_mp->b_cont = mp; 646 else 647 first_mp = mp; 648 } 649 ip6h = (ip6_t *)mp->b_rptr; 650 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 651 hdr_length = ip_hdr_length_v6(mp, ip6h); 652 else 653 hdr_length = IPV6_HDR_LEN; 654 655 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 656 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 657 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 658 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 659 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 660 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 661 freemsg(first_mp); 662 return; 663 } 664 ip6h = (ip6_t *)mp->b_rptr; 665 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 666 inner_ip6h = (ip6_t *)&icmp6[1]; 667 } 668 669 /* 670 * For link local destinations matching simply on IRE type is not 671 * sufficient. Same link local addresses for different ILL's is 672 * possible. 673 */ 674 675 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 676 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 677 IRE_CACHE, ill->ill_ipif, ALL_ZONES, 678 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 679 680 if (first_ire == NULL) { 681 if (ip_debug > 2) { 682 /* ip1dbg */ 683 pr_addr_dbg("icmp_inbound_too_big_v6:" 684 "no ire for dst %s\n", AF_INET6, 685 &inner_ip6h->ip6_dst); 686 } 687 freemsg(first_mp); 688 return; 689 } 690 691 mtu = ntohl(icmp6->icmp6_mtu); 692 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 693 for (ire = first_ire; ire != NULL && 694 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 695 ire = ire->ire_next) { 696 mutex_enter(&ire->ire_lock); 697 if (mtu < IPV6_MIN_MTU) { 698 ip1dbg(("Received mtu less than IPv6 " 699 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 700 mtu = IPV6_MIN_MTU; 701 /* 702 * If an mtu less than IPv6 min mtu is received, 703 * we must include a fragment header in 704 * subsequent packets. 705 */ 706 ire->ire_frag_flag |= IPH_FRAG_HDR; 707 } 708 ip1dbg(("Received mtu from router: %d\n", mtu)); 709 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 710 /* Record the new max frag size for the ULP. */ 711 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 712 /* 713 * If we need a fragment header in every packet 714 * (above case or multirouting), make sure the 715 * ULP takes it into account when computing the 716 * payload size. 717 */ 718 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 719 sizeof (ip6_frag_t)); 720 } else { 721 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 722 } 723 mutex_exit(&ire->ire_lock); 724 } 725 rw_exit(&first_ire->ire_bucket->irb_lock); 726 ire_refrele(first_ire); 727 } else { 728 irb_t *irb = NULL; 729 /* 730 * for non-link local destinations we match only on the IRE type 731 */ 732 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 733 IRE_CACHE, ill->ill_ipif, ALL_ZONES, MATCH_IRE_TYPE); 734 if (ire == NULL) { 735 if (ip_debug > 2) { 736 /* ip1dbg */ 737 pr_addr_dbg("icmp_inbound_too_big_v6:" 738 "no ire for dst %s\n", 739 AF_INET6, &inner_ip6h->ip6_dst); 740 } 741 freemsg(first_mp); 742 return; 743 } 744 irb = ire->ire_bucket; 745 ire_refrele(ire); 746 rw_enter(&irb->irb_lock, RW_READER); 747 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 748 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 749 &inner_ip6h->ip6_dst)) { 750 mtu = ntohl(icmp6->icmp6_mtu); 751 mutex_enter(&ire->ire_lock); 752 if (mtu < IPV6_MIN_MTU) { 753 ip1dbg(("Received mtu less than IPv6" 754 "min mtu %d: %d\n", 755 IPV6_MIN_MTU, mtu)); 756 mtu = IPV6_MIN_MTU; 757 /* 758 * If an mtu less than IPv6 min mtu is 759 * received, we must include a fragment 760 * header in subsequent packets. 761 */ 762 ire->ire_frag_flag |= IPH_FRAG_HDR; 763 } 764 765 ip1dbg(("Received mtu from router: %d\n", mtu)); 766 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 767 /* Record the new max frag size for the ULP. */ 768 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 769 /* 770 * If we need a fragment header in 771 * every packet (above case or 772 * multirouting), make sure the ULP 773 * takes it into account when computing 774 * the payload size. 775 */ 776 icmp6->icmp6_mtu = 777 htonl(ire->ire_max_frag - 778 sizeof (ip6_frag_t)); 779 } else { 780 icmp6->icmp6_mtu = 781 htonl(ire->ire_max_frag); 782 } 783 mutex_exit(&ire->ire_lock); 784 } 785 } 786 rw_exit(&irb->irb_lock); 787 } 788 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 789 mctl_present, zoneid); 790 } 791 792 static void 793 pkt_too_big(conn_t *connp, void *arg) 794 { 795 mblk_t *mp; 796 797 if (!connp->conn_ipv6_recvpathmtu) 798 return; 799 800 /* create message and drop it on this connections read queue */ 801 if ((mp = dupb((mblk_t *)arg)) == NULL) { 802 return; 803 } 804 mp->b_datap->db_type = M_CTL; 805 806 putnext(connp->conn_rq, mp); 807 } 808 809 /* 810 * Fanout received ICMPv6 error packets to the transports. 811 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 812 */ 813 void 814 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 815 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 816 { 817 uint16_t *up; /* Pointer to ports in ULP header */ 818 uint32_t ports; /* reversed ports for fanout */ 819 ip6_t rip6h; /* With reversed addresses */ 820 uint16_t hdr_length; 821 uint8_t *nexthdrp; 822 uint8_t nexthdr; 823 mblk_t *first_mp; 824 ipsec_in_t *ii; 825 tcpha_t *tcpha; 826 conn_t *connp; 827 828 first_mp = mp; 829 if (mctl_present) { 830 mp = first_mp->b_cont; 831 ASSERT(mp != NULL); 832 833 ii = (ipsec_in_t *)first_mp->b_rptr; 834 ASSERT(ii->ipsec_in_type == IPSEC_IN); 835 } else { 836 ii = NULL; 837 } 838 839 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 840 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 841 842 /* 843 * Need to pullup everything in order to use 844 * ip_hdr_length_nexthdr_v6() 845 */ 846 if (mp->b_cont != NULL) { 847 if (!pullupmsg(mp, -1)) { 848 ip1dbg(("icmp_inbound_error_fanout_v6: " 849 "pullupmsg failed\n")); 850 goto drop_pkt; 851 } 852 ip6h = (ip6_t *)mp->b_rptr; 853 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 854 } 855 856 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 857 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 858 goto drop_pkt; 859 860 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 861 goto drop_pkt; 862 nexthdr = *nexthdrp; 863 864 /* Set message type, must be done after pullups */ 865 mp->b_datap->db_type = M_CTL; 866 867 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 868 /* 869 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 870 * sockets. 871 * 872 * Note I don't like walking every connection to deliver 873 * this information to a set of listeners. A separate 874 * list could be kept to keep the cost of this down. 875 */ 876 ipcl_walk(pkt_too_big, (void *)mp); 877 } 878 879 /* Try to pass the ICMP message to clients who need it */ 880 switch (nexthdr) { 881 case IPPROTO_UDP: { 882 /* 883 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 884 * UDP header to get the port information. 885 */ 886 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 887 mp->b_wptr) { 888 break; 889 } 890 /* 891 * Attempt to find a client stream based on port. 892 * Note that we do a reverse lookup since the header is 893 * in the form we sent it out. 894 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 895 * and we only set the src and dst addresses and nexthdr. 896 */ 897 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 898 rip6h.ip6_src = ip6h->ip6_dst; 899 rip6h.ip6_dst = ip6h->ip6_src; 900 rip6h.ip6_nxt = nexthdr; 901 ((uint16_t *)&ports)[0] = up[1]; 902 ((uint16_t *)&ports)[1] = up[0]; 903 904 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 905 IP6_NO_IPPOLICY, mctl_present, zoneid); 906 return; 907 } 908 case IPPROTO_TCP: { 909 /* 910 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 911 * the TCP header to get the port information. 912 */ 913 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 914 mp->b_wptr) { 915 break; 916 } 917 918 /* 919 * Attempt to find a client stream based on port. 920 * Note that we do a reverse lookup since the header is 921 * in the form we sent it out. 922 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 923 * we only set the src and dst addresses and nexthdr. 924 */ 925 926 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 927 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 928 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 929 if (connp == NULL) { 930 goto drop_pkt; 931 } 932 933 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 934 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 935 return; 936 937 } 938 case IPPROTO_SCTP: 939 /* 940 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 941 * the SCTP header to get the port information. 942 */ 943 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 944 mp->b_wptr) { 945 break; 946 } 947 948 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 949 ((uint16_t *)&ports)[0] = up[1]; 950 ((uint16_t *)&ports)[1] = up[0]; 951 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 952 IP6_NO_IPPOLICY, 0, zoneid); 953 return; 954 case IPPROTO_ESP: 955 case IPPROTO_AH: { 956 int ipsec_rc; 957 958 /* 959 * We need a IPSEC_IN in the front to fanout to AH/ESP. 960 * We will re-use the IPSEC_IN if it is already present as 961 * AH/ESP will not affect any fields in the IPSEC_IN for 962 * ICMP errors. If there is no IPSEC_IN, allocate a new 963 * one and attach it in the front. 964 */ 965 if (ii != NULL) { 966 /* 967 * ip_fanout_proto_again converts the ICMP errors 968 * that come back from AH/ESP to M_DATA so that 969 * if it is non-AH/ESP and we do a pullupmsg in 970 * this function, it would work. Convert it back 971 * to M_CTL before we send up as this is a ICMP 972 * error. This could have been generated locally or 973 * by some router. Validate the inner IPSEC 974 * headers. 975 * 976 * NOTE : ill_index is used by ip_fanout_proto_again 977 * to locate the ill. 978 */ 979 ASSERT(ill != NULL); 980 ii->ipsec_in_ill_index = 981 ill->ill_phyint->phyint_ifindex; 982 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 983 first_mp->b_cont->b_datap->db_type = M_CTL; 984 } else { 985 /* 986 * IPSEC_IN is not present. We attach a ipsec_in 987 * message and send up to IPSEC for validating 988 * and removing the IPSEC headers. Clear 989 * ipsec_in_secure so that when we return 990 * from IPSEC, we don't mistakenly think that this 991 * is a secure packet came from the network. 992 * 993 * NOTE : ill_index is used by ip_fanout_proto_again 994 * to locate the ill. 995 */ 996 ASSERT(first_mp == mp); 997 first_mp = ipsec_in_alloc(B_FALSE); 998 if (first_mp == NULL) { 999 freemsg(mp); 1000 BUMP_MIB(&ip_mib, ipInDiscards); 1001 return; 1002 } 1003 ii = (ipsec_in_t *)first_mp->b_rptr; 1004 1005 /* This is not a secure packet */ 1006 ii->ipsec_in_secure = B_FALSE; 1007 first_mp->b_cont = mp; 1008 mp->b_datap->db_type = M_CTL; 1009 ASSERT(ill != NULL); 1010 ii->ipsec_in_ill_index = 1011 ill->ill_phyint->phyint_ifindex; 1012 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1013 } 1014 1015 if (!ipsec_loaded()) { 1016 ip_proto_not_sup(q, first_mp, 0, zoneid); 1017 return; 1018 } 1019 1020 if (nexthdr == IPPROTO_ESP) 1021 ipsec_rc = ipsecesp_icmp_error(first_mp); 1022 else 1023 ipsec_rc = ipsecah_icmp_error(first_mp); 1024 if (ipsec_rc == IPSEC_STATUS_FAILED) 1025 return; 1026 1027 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1028 return; 1029 } 1030 case IPPROTO_ENCAP: 1031 case IPPROTO_IPV6: 1032 if ((uint8_t *)ip6h + hdr_length + 1033 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1034 sizeof (ip6_t)) > mp->b_wptr) 1035 goto drop_pkt; 1036 1037 if (nexthdr == IPPROTO_ENCAP || 1038 !IN6_ARE_ADDR_EQUAL( 1039 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1040 &ip6h->ip6_src) || 1041 !IN6_ARE_ADDR_EQUAL( 1042 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1043 &ip6h->ip6_dst)) { 1044 /* 1045 * For tunnels that have used IPsec protection, 1046 * we need to adjust the MTU to take into account 1047 * the IPsec overhead. 1048 */ 1049 if (ii != NULL) 1050 icmp6->icmp6_mtu = htons( 1051 ntohs(icmp6->icmp6_mtu) - 1052 ipsec_in_extra_length(first_mp)); 1053 } else { 1054 /* 1055 * Self-encapsulated case. As in the ipv4 case, 1056 * we need to strip the 2nd IP header. Since mp 1057 * is already pulled-up, we can simply bcopy 1058 * the 3rd header + data over the 2nd header. 1059 */ 1060 uint16_t unused_len; 1061 ip6_t *inner_ip6h = (ip6_t *) 1062 ((uchar_t *)ip6h + hdr_length); 1063 1064 /* 1065 * Make sure we don't do recursion more than once. 1066 */ 1067 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1068 &unused_len, &nexthdrp) || 1069 *nexthdrp == IPPROTO_IPV6) { 1070 goto drop_pkt; 1071 } 1072 1073 /* 1074 * We are about to modify the packet. Make a copy if 1075 * someone else has a reference to it. 1076 */ 1077 if (DB_REF(mp) > 1) { 1078 mblk_t *mp1; 1079 uint16_t icmp6_offset; 1080 1081 mp1 = copymsg(mp); 1082 if (mp1 == NULL) { 1083 goto drop_pkt; 1084 } 1085 icmp6_offset = (uint16_t) 1086 ((uchar_t *)icmp6 - mp->b_rptr); 1087 freemsg(mp); 1088 mp = mp1; 1089 1090 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1091 ip6h = (ip6_t *)&icmp6[1]; 1092 inner_ip6h = (ip6_t *) 1093 ((uchar_t *)ip6h + hdr_length); 1094 1095 if (mctl_present) 1096 first_mp->b_cont = mp; 1097 else 1098 first_mp = mp; 1099 } 1100 1101 /* 1102 * Need to set db_type back to M_DATA before 1103 * refeeding mp into this function. 1104 */ 1105 DB_TYPE(mp) = M_DATA; 1106 1107 /* 1108 * Copy the 3rd header + remaining data on top 1109 * of the 2nd header. 1110 */ 1111 bcopy(inner_ip6h, ip6h, 1112 mp->b_wptr - (uchar_t *)inner_ip6h); 1113 1114 /* 1115 * Subtract length of the 2nd header. 1116 */ 1117 mp->b_wptr -= hdr_length; 1118 1119 /* 1120 * Now recurse, and see what I _really_ should be 1121 * doing here. 1122 */ 1123 icmp_inbound_error_fanout_v6(q, first_mp, 1124 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1125 zoneid); 1126 return; 1127 } 1128 /* FALLTHRU */ 1129 default: 1130 /* 1131 * The rip6h header is only used for the lookup and we 1132 * only set the src and dst addresses and nexthdr. 1133 */ 1134 rip6h.ip6_src = ip6h->ip6_dst; 1135 rip6h.ip6_dst = ip6h->ip6_src; 1136 rip6h.ip6_nxt = nexthdr; 1137 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1138 IP6_NO_IPPOLICY, mctl_present, zoneid); 1139 return; 1140 } 1141 /* NOTREACHED */ 1142 drop_pkt: 1143 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1144 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1145 freemsg(first_mp); 1146 } 1147 1148 /* 1149 * Validate the incoming redirect message, if valid redirect 1150 * processing is done later. This is separated from the actual 1151 * redirect processing to avoid becoming single threaded when not 1152 * necessary. (i.e invalid packet) 1153 * Assumes that any AH or ESP headers have already been removed. 1154 * The mp has already been pulled up. 1155 */ 1156 boolean_t 1157 icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp) 1158 { 1159 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1160 nd_redirect_t *rd; 1161 ire_t *ire; 1162 uint16_t len; 1163 uint16_t hdr_length; 1164 1165 ASSERT(mp->b_cont == NULL); 1166 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1167 hdr_length = ip_hdr_length_v6(mp, ip6h); 1168 else 1169 hdr_length = IPV6_HDR_LEN; 1170 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1171 len = mp->b_wptr - mp->b_rptr - hdr_length; 1172 if (!IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1173 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1174 (rd->nd_rd_code != 0) || 1175 (len < sizeof (nd_redirect_t)) || 1176 (IN6_IS_ADDR_V4MAPPED(&rd->nd_rd_dst)) || 1177 (IN6_IS_ADDR_MULTICAST(&rd->nd_rd_dst))) { 1178 return (B_FALSE); 1179 } 1180 if (!(IN6_IS_ADDR_LINKLOCAL(&rd->nd_rd_target) || 1181 IN6_ARE_ADDR_EQUAL(&rd->nd_rd_target, &rd->nd_rd_dst))) { 1182 return (B_FALSE); 1183 } 1184 1185 /* 1186 * Verify that the IP source address of the redirect is 1187 * the same as the current first-hop router for the specified 1188 * ICMP destination address. Just to be cautious, this test 1189 * will be done again before we add the redirect, in case 1190 * router goes away between now and then. 1191 */ 1192 ire = ire_route_lookup_v6(&rd->nd_rd_dst, 0, 1193 &ip6h->ip6_src, 0, ill->ill_ipif, NULL, ALL_ZONES, 1194 MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1195 if (ire == NULL) 1196 return (B_FALSE); 1197 ire_refrele(ire); 1198 if (len > sizeof (nd_redirect_t)) { 1199 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1200 len - sizeof (nd_redirect_t))) 1201 return (B_FALSE); 1202 } 1203 return (B_TRUE); 1204 } 1205 1206 /* 1207 * Process received IPv6 ICMP Redirect messages. 1208 * Assumes that the icmp packet has already been verfied to be 1209 * valid, aligned and in a single mblk all done in icmp_redirect_ok_v6(). 1210 */ 1211 /* ARGSUSED */ 1212 static void 1213 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1214 { 1215 ip6_t *ip6h; 1216 uint16_t hdr_length; 1217 nd_redirect_t *rd; 1218 ire_t *ire; 1219 ire_t *prev_ire; 1220 ire_t *redir_ire; 1221 in6_addr_t *src, *dst, *gateway; 1222 nd_opt_hdr_t *opt; 1223 nce_t *nce; 1224 int nce_flags = 0; 1225 int err = 0; 1226 boolean_t redirect_to_router = B_FALSE; 1227 int len; 1228 iulp_t ulp_info = { 0 }; 1229 ill_t *prev_ire_ill; 1230 ipif_t *ipif; 1231 1232 ip6h = (ip6_t *)mp->b_rptr; 1233 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1234 hdr_length = ip_hdr_length_v6(mp, ip6h); 1235 else 1236 hdr_length = IPV6_HDR_LEN; 1237 1238 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1239 src = &ip6h->ip6_src; 1240 dst = &rd->nd_rd_dst; 1241 gateway = &rd->nd_rd_target; 1242 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1243 redirect_to_router = B_TRUE; 1244 nce_flags |= NCE_F_ISROUTER; 1245 } 1246 /* 1247 * Make sure we had a route for the dest in question and that 1248 * route was pointing to the old gateway (the source of the 1249 * redirect packet.) 1250 */ 1251 ipif = ipif_get_next_ipif(NULL, ill); 1252 if (ipif == NULL) { 1253 freemsg(mp); 1254 return; 1255 } 1256 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1257 ALL_ZONES, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1258 ipif_refrele(ipif); 1259 /* 1260 * Check that 1261 * the redirect was not from ourselves 1262 * old gateway is still directly reachable 1263 */ 1264 if (prev_ire == NULL || 1265 prev_ire->ire_type == IRE_LOCAL) { 1266 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1267 goto fail_redirect; 1268 } 1269 prev_ire_ill = ire_to_ill(prev_ire); 1270 ASSERT(prev_ire_ill != NULL); 1271 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1272 nce_flags |= NCE_F_NONUD; 1273 1274 /* 1275 * Should we use the old ULP info to create the new gateway? From 1276 * a user's perspective, we should inherit the info so that it 1277 * is a "smooth" transition. If we do not do that, then new 1278 * connections going thru the new gateway will have no route metrics, 1279 * which is counter-intuitive to user. From a network point of 1280 * view, this may or may not make sense even though the new gateway 1281 * is still directly connected to us so the route metrics should not 1282 * change much. 1283 * 1284 * But if the old ire_uinfo is not initialized, we do another 1285 * recursive lookup on the dest using the new gateway. There may 1286 * be a route to that. If so, use it to initialize the redirect 1287 * route. 1288 */ 1289 if (prev_ire->ire_uinfo.iulp_set) { 1290 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1291 } else if (redirect_to_router) { 1292 /* 1293 * Only do the following if the redirection is really to 1294 * a router. 1295 */ 1296 ire_t *tmp_ire; 1297 ire_t *sire; 1298 1299 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1300 ALL_ZONES, 0, 1301 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1302 if (sire != NULL) { 1303 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1304 ASSERT(tmp_ire != NULL); 1305 ire_refrele(tmp_ire); 1306 ire_refrele(sire); 1307 } else if (tmp_ire != NULL) { 1308 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1309 sizeof (iulp_t)); 1310 ire_refrele(tmp_ire); 1311 } 1312 } 1313 1314 len = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1315 opt = (nd_opt_hdr_t *)&rd[1]; 1316 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1317 if (opt != NULL) { 1318 err = ndp_lookup_then_add(ill, 1319 (uchar_t *)&opt[1], /* Link layer address */ 1320 gateway, 1321 &ipv6_all_ones, /* prefix mask */ 1322 &ipv6_all_zeros, /* Mapping mask */ 1323 0, 1324 nce_flags, 1325 ND_STALE, 1326 &nce); 1327 switch (err) { 1328 case 0: 1329 NCE_REFRELE(nce); 1330 break; 1331 case EEXIST: 1332 /* 1333 * Check to see if link layer address has changed and 1334 * process the nce_state accordingly. 1335 */ 1336 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1337 NCE_REFRELE(nce); 1338 break; 1339 default: 1340 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1341 err)); 1342 goto fail_redirect; 1343 } 1344 } 1345 if (redirect_to_router) { 1346 /* icmp_redirect_ok_v6() must have already verified this */ 1347 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1348 1349 /* 1350 * Create a Route Association. This will allow us to remember 1351 * a router told us to use the particular gateway. 1352 */ 1353 ire = ire_create_v6( 1354 dst, 1355 &ipv6_all_ones, /* mask */ 1356 &prev_ire->ire_src_addr_v6, /* source addr */ 1357 gateway, /* gateway addr */ 1358 &prev_ire->ire_max_frag, /* max frag */ 1359 NULL, /* Fast Path header */ 1360 NULL, /* no rfq */ 1361 NULL, /* no stq */ 1362 IRE_HOST_REDIRECT, 1363 NULL, 1364 prev_ire->ire_ipif, 1365 NULL, 1366 0, 1367 0, 1368 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1369 &ulp_info); 1370 } else { 1371 /* 1372 * Just create an on link entry, may or may not be a router 1373 * If there is no link layer address option ire_add() won't 1374 * add this. 1375 */ 1376 ire = ire_create_v6( 1377 dst, /* gateway == dst */ 1378 &ipv6_all_ones, /* mask */ 1379 &prev_ire->ire_src_addr_v6, /* source addr */ 1380 &ipv6_all_zeros, /* gateway addr */ 1381 &prev_ire->ire_max_frag, /* max frag */ 1382 NULL, /* Fast Path header */ 1383 prev_ire->ire_rfq, /* ire rfq */ 1384 prev_ire->ire_stq, /* ire stq */ 1385 IRE_CACHE, 1386 NULL, 1387 prev_ire->ire_ipif, 1388 &ipv6_all_ones, 1389 0, 1390 0, 1391 0, 1392 &ulp_info); 1393 } 1394 if (ire == NULL) 1395 goto fail_redirect; 1396 1397 /* 1398 * XXX If there is no nce i.e there is no target link layer address 1399 * option with the redirect message, ire_add will fail. In that 1400 * case we never add the IRE_CACHE/IRE_HOST_REDIRECT. We need 1401 * to fix this. 1402 */ 1403 if (ire_add(&ire, NULL, NULL, NULL) == 0) { 1404 1405 /* tell routing sockets that we received a redirect */ 1406 ip_rts_change_v6(RTM_REDIRECT, 1407 &rd->nd_rd_dst, 1408 &rd->nd_rd_target, 1409 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1410 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1411 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1412 1413 /* 1414 * Delete any existing IRE_HOST_REDIRECT for this destination. 1415 * This together with the added IRE has the effect of 1416 * modifying an existing redirect. 1417 */ 1418 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST_REDIRECT, 1419 ire->ire_ipif, NULL, ALL_ZONES, 0, 1420 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1421 1422 ire_refrele(ire); /* Held in ire_add_v6 */ 1423 1424 if (redir_ire != NULL) { 1425 ire_delete(redir_ire); 1426 ire_refrele(redir_ire); 1427 } 1428 } 1429 1430 if (prev_ire->ire_type == IRE_CACHE) 1431 ire_delete(prev_ire); 1432 ire_refrele(prev_ire); 1433 prev_ire = NULL; 1434 1435 fail_redirect: 1436 if (prev_ire != NULL) 1437 ire_refrele(prev_ire); 1438 freemsg(mp); 1439 } 1440 1441 static ill_t * 1442 ip_queue_to_ill_v6(queue_t *q) 1443 { 1444 ill_t *ill; 1445 1446 ASSERT(WR(q) == q); 1447 1448 if (q->q_next != NULL) { 1449 ill = (ill_t *)q->q_ptr; 1450 if (ILL_CAN_LOOKUP(ill)) 1451 ill_refhold(ill); 1452 else 1453 ill = NULL; 1454 } else { 1455 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1456 NULL, NULL, NULL, NULL, NULL); 1457 } 1458 if (ill == NULL) 1459 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1460 return (ill); 1461 } 1462 1463 /* 1464 * Assigns an appropriate source address to the packet. 1465 * If origdst is one of our IP addresses that use it as the source. 1466 * If the queue is an ill queue then select a source from that ill. 1467 * Otherwise pick a source based on a route lookup back to the origsrc. 1468 * 1469 * src is the return parameter. Returns a pointer to src or NULL if failure. 1470 */ 1471 static in6_addr_t * 1472 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1473 in6_addr_t *src) 1474 { 1475 ill_t *ill; 1476 ire_t *ire; 1477 ipif_t *ipif; 1478 zoneid_t zoneid; 1479 1480 ASSERT(!(wq->q_flag & QREADR)); 1481 if (wq->q_next != NULL) { 1482 ill = (ill_t *)wq->q_ptr; 1483 zoneid = GLOBAL_ZONEID; 1484 } else { 1485 ill = NULL; 1486 zoneid = Q_TO_CONN(wq)->conn_zoneid; 1487 } 1488 1489 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1490 NULL, NULL, zoneid, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1491 if (ire != NULL) { 1492 /* Destined to one of our addresses */ 1493 *src = *origdst; 1494 ire_refrele(ire); 1495 return (src); 1496 } 1497 if (ire != NULL) { 1498 ire_refrele(ire); 1499 ire = NULL; 1500 } 1501 if (ill == NULL) { 1502 /* What is the route back to the original source? */ 1503 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1504 NULL, NULL, zoneid, 1505 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1506 if (ire == NULL) { 1507 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1508 return (NULL); 1509 } 1510 /* 1511 * Does not matter whether we use ire_stq or ire_ipif here. 1512 * Just pick an ill for ICMP replies. 1513 */ 1514 ASSERT(ire->ire_ipif != NULL); 1515 ill = ire->ire_ipif->ipif_ill; 1516 ire_refrele(ire); 1517 } 1518 ipif = ipif_select_source_v6(ill, origsrc, B_FALSE, 1519 IPV6_PREFER_SRC_DEFAULT, zoneid); 1520 if (ipif != NULL) { 1521 *src = ipif->ipif_v6src_addr; 1522 ipif_refrele(ipif); 1523 return (src); 1524 } 1525 /* 1526 * Unusual case - can't find a usable source address to reach the 1527 * original source. Use what in the route to the source. 1528 */ 1529 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1530 NULL, NULL, zoneid, (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1531 if (ire == NULL) { 1532 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1533 return (NULL); 1534 } 1535 ASSERT(ire != NULL); 1536 *src = ire->ire_src_addr_v6; 1537 ire_refrele(ire); 1538 return (src); 1539 } 1540 1541 /* 1542 * Build and ship an IPv6 ICMP message using the packet data in mp, 1543 * and the ICMP header pointed to by "stuff". (May be called as 1544 * writer.) 1545 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1546 * verify that an icmp error packet can be sent. 1547 * 1548 * If q is an ill write side queue (which is the case when packets 1549 * arrive from ip_rput) then ip_wput code will ensure that packets to 1550 * link-local destinations are sent out that ill. 1551 * 1552 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1553 * source address (see above function). 1554 */ 1555 static void 1556 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1557 const in6_addr_t *v6src_ptr, boolean_t mctl_present) 1558 { 1559 ip6_t *ip6h; 1560 in6_addr_t v6dst; 1561 size_t len_needed; 1562 size_t msg_len; 1563 mblk_t *mp1; 1564 icmp6_t *icmp6; 1565 ill_t *ill; 1566 in6_addr_t v6src; 1567 mblk_t *ipsec_mp; 1568 ipsec_out_t *io; 1569 1570 ill = ip_queue_to_ill_v6(q); 1571 if (ill == NULL) { 1572 freemsg(mp); 1573 return; 1574 } 1575 1576 if (mctl_present) { 1577 /* 1578 * If it is : 1579 * 1580 * 1) a IPSEC_OUT, then this is caused by outbound 1581 * datagram originating on this host. IPSEC processing 1582 * may or may not have been done. Refer to comments above 1583 * icmp_inbound_error_fanout for details. 1584 * 1585 * 2) a IPSEC_IN if we are generating a icmp_message 1586 * for an incoming datagram destined for us i.e called 1587 * from ip_fanout_send_icmp. 1588 */ 1589 ipsec_info_t *in; 1590 1591 ipsec_mp = mp; 1592 mp = ipsec_mp->b_cont; 1593 1594 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1595 ip6h = (ip6_t *)mp->b_rptr; 1596 1597 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1598 in->ipsec_info_type == IPSEC_IN); 1599 1600 if (in->ipsec_info_type == IPSEC_IN) { 1601 /* 1602 * Convert the IPSEC_IN to IPSEC_OUT. 1603 */ 1604 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1605 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1606 ill_refrele(ill); 1607 return; 1608 } 1609 } else { 1610 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1611 io = (ipsec_out_t *)in; 1612 /* 1613 * Clear out ipsec_out_proc_begin, so we do a fresh 1614 * ire lookup. 1615 */ 1616 io->ipsec_out_proc_begin = B_FALSE; 1617 } 1618 } else { 1619 /* 1620 * This is in clear. The icmp message we are building 1621 * here should go out in clear. 1622 */ 1623 ipsec_in_t *ii; 1624 ASSERT(mp->b_datap->db_type == M_DATA); 1625 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1626 freemsg(mp); 1627 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1628 ill_refrele(ill); 1629 return; 1630 } 1631 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1632 1633 /* This is not a secure packet */ 1634 ii->ipsec_in_secure = B_FALSE; 1635 ipsec_mp->b_cont = mp; 1636 ip6h = (ip6_t *)mp->b_rptr; 1637 /* 1638 * Convert the IPSEC_IN to IPSEC_OUT. 1639 */ 1640 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1641 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1642 ill_refrele(ill); 1643 return; 1644 } 1645 } 1646 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1647 1648 if (v6src_ptr != NULL) { 1649 v6src = *v6src_ptr; 1650 } else { 1651 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1652 &v6src) == NULL) { 1653 freemsg(ipsec_mp); 1654 ill_refrele(ill); 1655 return; 1656 } 1657 } 1658 v6dst = ip6h->ip6_src; 1659 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1660 msg_len = msgdsize(mp); 1661 if (msg_len > len_needed) { 1662 if (!adjmsg(mp, len_needed - msg_len)) { 1663 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1664 freemsg(ipsec_mp); 1665 ill_refrele(ill); 1666 return; 1667 } 1668 msg_len = len_needed; 1669 } 1670 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1671 if (mp1 == NULL) { 1672 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1673 freemsg(ipsec_mp); 1674 ill_refrele(ill); 1675 return; 1676 } 1677 ill_refrele(ill); 1678 mp1->b_cont = mp; 1679 mp = mp1; 1680 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1681 io->ipsec_out_type == IPSEC_OUT); 1682 ipsec_mp->b_cont = mp; 1683 1684 /* 1685 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1686 * node generates be accepted in peace by all on-host destinations. 1687 * If we do NOT assume that all on-host destinations trust 1688 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1689 * (Look for ipsec_out_icmp_loopback). 1690 */ 1691 io->ipsec_out_icmp_loopback = B_TRUE; 1692 1693 ip6h = (ip6_t *)mp->b_rptr; 1694 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1695 1696 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1697 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1698 ip6h->ip6_hops = ipv6_def_hops; 1699 ip6h->ip6_dst = v6dst; 1700 ip6h->ip6_src = v6src; 1701 msg_len += IPV6_HDR_LEN + len; 1702 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1703 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1704 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1705 } 1706 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1707 icmp6 = (icmp6_t *)&ip6h[1]; 1708 bcopy(stuff, (char *)icmp6, len); 1709 /* 1710 * Prepare for checksum by putting icmp length in the icmp 1711 * checksum field. The checksum is calculated in ip_wput_v6. 1712 */ 1713 icmp6->icmp6_cksum = ip6h->ip6_plen; 1714 if (icmp6->icmp6_type == ND_REDIRECT) { 1715 ip6h->ip6_hops = IPV6_MAX_HOPS; 1716 } 1717 /* Send to V6 writeside put routine */ 1718 put(q, ipsec_mp); 1719 } 1720 1721 /* 1722 * Update the output mib when ICMPv6 packets are sent. 1723 */ 1724 static void 1725 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1726 { 1727 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1728 1729 switch (icmp6->icmp6_type) { 1730 case ICMP6_DST_UNREACH: 1731 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1732 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1733 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1734 break; 1735 1736 case ICMP6_TIME_EXCEEDED: 1737 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1738 break; 1739 1740 case ICMP6_PARAM_PROB: 1741 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1742 break; 1743 1744 case ICMP6_PACKET_TOO_BIG: 1745 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1746 break; 1747 1748 case ICMP6_ECHO_REQUEST: 1749 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1750 break; 1751 1752 case ICMP6_ECHO_REPLY: 1753 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1754 break; 1755 1756 case ND_ROUTER_SOLICIT: 1757 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1758 break; 1759 1760 case ND_ROUTER_ADVERT: 1761 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1762 break; 1763 1764 case ND_NEIGHBOR_SOLICIT: 1765 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1766 break; 1767 1768 case ND_NEIGHBOR_ADVERT: 1769 BUMP_MIB(ill->ill_icmp6_mib, 1770 ipv6IfIcmpOutNeighborAdvertisements); 1771 break; 1772 1773 case ND_REDIRECT: 1774 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1775 break; 1776 1777 case MLD_LISTENER_QUERY: 1778 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1779 break; 1780 1781 case MLD_LISTENER_REPORT: 1782 case MLD_V2_LISTENER_REPORT: 1783 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1784 break; 1785 1786 case MLD_LISTENER_REDUCTION: 1787 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1788 break; 1789 } 1790 } 1791 1792 /* 1793 * Check if it is ok to send an ICMPv6 error packet in 1794 * response to the IP packet in mp. 1795 * Free the message and return null if no 1796 * ICMP error packet should be sent. 1797 */ 1798 static mblk_t * 1799 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1800 boolean_t llbcast, boolean_t mcast_ok) 1801 { 1802 ip6_t *ip6h; 1803 1804 if (!mp) 1805 return (NULL); 1806 1807 ip6h = (ip6_t *)mp->b_rptr; 1808 1809 /* Check if source address uniquely identifies the host */ 1810 1811 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1812 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1813 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1814 freemsg(mp); 1815 return (NULL); 1816 } 1817 1818 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1819 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1820 icmp6_t *icmp6; 1821 1822 if (mp->b_wptr - mp->b_rptr < len_needed) { 1823 if (!pullupmsg(mp, len_needed)) { 1824 ill_t *ill; 1825 1826 ill = ip_queue_to_ill_v6(q); 1827 if (ill == NULL) { 1828 BUMP_MIB(&icmp6_mib, 1829 ipv6IfIcmpInErrors); 1830 } else { 1831 BUMP_MIB(ill->ill_icmp6_mib, 1832 ipv6IfIcmpInErrors); 1833 ill_refrele(ill); 1834 } 1835 freemsg(mp); 1836 return (NULL); 1837 } 1838 ip6h = (ip6_t *)mp->b_rptr; 1839 } 1840 icmp6 = (icmp6_t *)&ip6h[1]; 1841 /* Explicitly do not generate errors in response to redirects */ 1842 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1843 icmp6->icmp6_type == ND_REDIRECT) { 1844 freemsg(mp); 1845 return (NULL); 1846 } 1847 } 1848 /* 1849 * Check that the destination is not multicast and that the packet 1850 * was not sent on link layer broadcast or multicast. (Exception 1851 * is Packet too big message as per the draft - when mcast_ok is set.) 1852 */ 1853 if (!mcast_ok && 1854 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1855 freemsg(mp); 1856 return (NULL); 1857 } 1858 if (icmp_err_rate_limit()) { 1859 /* 1860 * Only send ICMP error packets every so often. 1861 * This should be done on a per port/source basis, 1862 * but for now this will suffice. 1863 */ 1864 freemsg(mp); 1865 return (NULL); 1866 } 1867 return (mp); 1868 } 1869 1870 /* 1871 * Generate an ICMPv6 redirect message. 1872 * Include target link layer address option if it exits. 1873 * Always include redirect header. 1874 */ 1875 static void 1876 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1877 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1878 { 1879 nd_redirect_t *rd; 1880 nd_opt_rd_hdr_t *rdh; 1881 uchar_t *buf; 1882 nce_t *nce = NULL; 1883 nd_opt_hdr_t *opt; 1884 int len; 1885 int ll_opt_len = 0; 1886 int max_redir_hdr_data_len; 1887 int pkt_len; 1888 in6_addr_t *srcp; 1889 1890 /* 1891 * We are called from ip_rput where we could 1892 * not have attached an IPSEC_IN. 1893 */ 1894 ASSERT(mp->b_datap->db_type == M_DATA); 1895 1896 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1897 if (mp == NULL) 1898 return; 1899 nce = ndp_lookup(ill, targetp, B_FALSE); 1900 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1901 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1902 ill->ill_phys_addr_length + 7)/8 * 8; 1903 } 1904 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1905 ASSERT(len % 4 == 0); 1906 buf = kmem_alloc(len, KM_NOSLEEP); 1907 if (buf == NULL) { 1908 if (nce != NULL) 1909 NCE_REFRELE(nce); 1910 freemsg(mp); 1911 return; 1912 } 1913 1914 rd = (nd_redirect_t *)buf; 1915 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1916 rd->nd_rd_code = 0; 1917 rd->nd_rd_reserved = 0; 1918 rd->nd_rd_target = *targetp; 1919 rd->nd_rd_dst = *dest; 1920 1921 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1922 if (nce != NULL && ll_opt_len != 0) { 1923 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1924 opt->nd_opt_len = ll_opt_len/8; 1925 bcopy((char *)nce->nce_res_mp->b_rptr + 1926 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1927 ill->ill_phys_addr_length); 1928 } 1929 if (nce != NULL) 1930 NCE_REFRELE(nce); 1931 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1932 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1933 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1934 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1935 pkt_len = msgdsize(mp); 1936 /* Make sure mp is 8 byte aligned */ 1937 if (pkt_len > max_redir_hdr_data_len) { 1938 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1939 sizeof (nd_opt_rd_hdr_t))/8; 1940 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1941 } else { 1942 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1943 (void) adjmsg(mp, -(pkt_len % 8)); 1944 } 1945 rdh->nd_opt_rh_reserved1 = 0; 1946 rdh->nd_opt_rh_reserved2 = 0; 1947 /* ipif_v6src_addr contains the link-local source address */ 1948 rw_enter(&ill_g_lock, RW_READER); 1949 if (ill->ill_group != NULL) { 1950 /* 1951 * The receiver of the redirect will verify whether it 1952 * had a route through us (srcp that we will use in 1953 * the redirect) or not. As we load spread even link-locals, 1954 * we don't know which source address the receiver of 1955 * redirect has in its route for communicating with us. 1956 * Thus we randomly choose a source here and finally we 1957 * should get to the right one and it will eventually 1958 * accept the redirect from us. We can't call 1959 * ip_lookup_scope_v6 because we don't have the right 1960 * link-local address here. Thus we randomly choose one. 1961 */ 1962 int cnt = ill->ill_group->illgrp_ill_count; 1963 1964 ill = ill->ill_group->illgrp_ill; 1965 cnt = ++icmp_redirect_v6_src_index % cnt; 1966 while (cnt--) 1967 ill = ill->ill_group_next; 1968 srcp = &ill->ill_ipif->ipif_v6src_addr; 1969 } else { 1970 srcp = &ill->ill_ipif->ipif_v6src_addr; 1971 } 1972 rw_exit(&ill_g_lock); 1973 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE); 1974 kmem_free(buf, len); 1975 } 1976 1977 1978 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1979 void 1980 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1981 boolean_t llbcast, boolean_t mcast_ok) 1982 { 1983 icmp6_t icmp6; 1984 boolean_t mctl_present; 1985 mblk_t *first_mp; 1986 1987 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1988 1989 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 1990 if (mp == NULL) { 1991 if (mctl_present) 1992 freeb(first_mp); 1993 return; 1994 } 1995 bzero(&icmp6, sizeof (icmp6_t)); 1996 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1997 icmp6.icmp6_code = code; 1998 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 1999 } 2000 2001 /* 2002 * Generate an ICMP unreachable message. 2003 */ 2004 void 2005 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2006 boolean_t llbcast, boolean_t mcast_ok) 2007 { 2008 icmp6_t icmp6; 2009 boolean_t mctl_present; 2010 mblk_t *first_mp; 2011 2012 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2013 2014 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2015 if (mp == NULL) { 2016 if (mctl_present) 2017 freeb(first_mp); 2018 return; 2019 } 2020 bzero(&icmp6, sizeof (icmp6_t)); 2021 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2022 icmp6.icmp6_code = code; 2023 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2024 } 2025 2026 /* 2027 * Generate an ICMP pkt too big message. 2028 */ 2029 static void 2030 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2031 boolean_t llbcast, boolean_t mcast_ok) 2032 { 2033 icmp6_t icmp6; 2034 mblk_t *first_mp; 2035 boolean_t mctl_present; 2036 2037 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2038 2039 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2040 if (mp == NULL) { 2041 if (mctl_present) 2042 freeb(first_mp); 2043 return; 2044 } 2045 bzero(&icmp6, sizeof (icmp6_t)); 2046 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2047 icmp6.icmp6_code = 0; 2048 icmp6.icmp6_mtu = htonl(mtu); 2049 2050 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2051 } 2052 2053 /* 2054 * Generate an ICMP parameter problem message. (May be called as writer.) 2055 * 'offset' is the offset from the beginning of the packet in error. 2056 */ 2057 static void 2058 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2059 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok) 2060 { 2061 icmp6_t icmp6; 2062 boolean_t mctl_present; 2063 mblk_t *first_mp; 2064 2065 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2066 2067 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2068 if (mp == NULL) { 2069 if (mctl_present) 2070 freeb(first_mp); 2071 return; 2072 } 2073 bzero((char *)&icmp6, sizeof (icmp6_t)); 2074 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2075 icmp6.icmp6_code = code; 2076 icmp6.icmp6_pptr = htonl(offset); 2077 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2078 } 2079 2080 /* 2081 * This code will need to take into account the possibility of binding 2082 * to a link local address on a multi-homed host, in which case the 2083 * outgoing interface (from the conn) will need to be used when getting 2084 * an ire for the dst. Going through proper outgoing interface and 2085 * choosing the source address corresponding to the outgoing interface 2086 * is necessary when the destination address is a link-local address and 2087 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2088 * This can happen when active connection is setup; thus ipp pointer 2089 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2090 * pointer is passed as ipp pointer. 2091 */ 2092 mblk_t * 2093 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2094 { 2095 ssize_t len; 2096 int protocol; 2097 struct T_bind_req *tbr; 2098 sin6_t *sin6; 2099 ipa6_conn_t *ac6; 2100 in6_addr_t *v6srcp; 2101 in6_addr_t *v6dstp; 2102 uint16_t lport; 2103 uint16_t fport; 2104 uchar_t *ucp; 2105 mblk_t *mp1; 2106 boolean_t ire_requested; 2107 boolean_t ipsec_policy_set; 2108 int error = 0; 2109 boolean_t local_bind; 2110 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2111 ipa6_conn_x_t *acx6; 2112 boolean_t verify_dst; 2113 2114 ASSERT(connp->conn_af_isv6); 2115 len = mp->b_wptr - mp->b_rptr; 2116 if (len < (sizeof (*tbr) + 1)) { 2117 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2118 "ip_bind_v6: bogus msg, len %ld", len); 2119 goto bad_addr; 2120 } 2121 /* Back up and extract the protocol identifier. */ 2122 mp->b_wptr--; 2123 tbr = (struct T_bind_req *)mp->b_rptr; 2124 /* Reset the message type in preparation for shipping it back. */ 2125 mp->b_datap->db_type = M_PCPROTO; 2126 2127 protocol = *mp->b_wptr & 0xFF; 2128 connp->conn_ulp = (uint8_t)protocol; 2129 2130 /* 2131 * Check for a zero length address. This is from a protocol that 2132 * wants to register to receive all packets of its type. 2133 */ 2134 if (tbr->ADDR_length == 0) { 2135 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2136 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2137 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2138 /* 2139 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2140 * Do not allow others to bind to these. 2141 */ 2142 goto bad_addr; 2143 } 2144 2145 connp->conn_srcv6 = ipv6_all_zeros; 2146 ipcl_proto_insert_v6(connp, protocol); 2147 2148 tbr->PRIM_type = T_BIND_ACK; 2149 return (mp); 2150 } 2151 2152 /* Extract the address pointer from the message. */ 2153 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2154 tbr->ADDR_length); 2155 if (ucp == NULL) { 2156 ip1dbg(("ip_bind_v6: no address\n")); 2157 goto bad_addr; 2158 } 2159 if (!OK_32PTR(ucp)) { 2160 ip1dbg(("ip_bind_v6: unaligned address\n")); 2161 goto bad_addr; 2162 } 2163 mp1 = mp->b_cont; /* trailing mp if any */ 2164 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2165 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2166 2167 switch (tbr->ADDR_length) { 2168 default: 2169 ip1dbg(("ip_bind_v6: bad address length %d\n", 2170 (int)tbr->ADDR_length)); 2171 goto bad_addr; 2172 2173 case IPV6_ADDR_LEN: 2174 /* Verification of local address only */ 2175 v6srcp = (in6_addr_t *)ucp; 2176 lport = 0; 2177 local_bind = B_TRUE; 2178 break; 2179 2180 case sizeof (sin6_t): 2181 sin6 = (sin6_t *)ucp; 2182 v6srcp = &sin6->sin6_addr; 2183 lport = sin6->sin6_port; 2184 local_bind = B_TRUE; 2185 break; 2186 2187 case sizeof (ipa6_conn_t): 2188 /* 2189 * Verify that both the source and destination addresses 2190 * are valid. 2191 * Note that we allow connect to broadcast and multicast 2192 * addresses when ire_requested is set. Thus the ULP 2193 * has to check for IRE_BROADCAST and multicast. 2194 */ 2195 ac6 = (ipa6_conn_t *)ucp; 2196 v6srcp = &ac6->ac6_laddr; 2197 v6dstp = &ac6->ac6_faddr; 2198 fport = ac6->ac6_fport; 2199 /* For raw socket, the local port is not set. */ 2200 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2201 connp->conn_lport; 2202 local_bind = B_FALSE; 2203 /* Always verify destination reachability. */ 2204 verify_dst = B_TRUE; 2205 break; 2206 2207 case sizeof (ipa6_conn_x_t): 2208 /* 2209 * Verify that the source address is valid. 2210 * Note that we allow connect to broadcast and multicast 2211 * addresses when ire_requested is set. Thus the ULP 2212 * has to check for IRE_BROADCAST and multicast. 2213 */ 2214 acx6 = (ipa6_conn_x_t *)ucp; 2215 ac6 = &acx6->ac6x_conn; 2216 v6srcp = &ac6->ac6_laddr; 2217 v6dstp = &ac6->ac6_faddr; 2218 fport = ac6->ac6_fport; 2219 lport = ac6->ac6_lport; 2220 local_bind = B_FALSE; 2221 /* 2222 * Client that passed ipa6_conn_x_t to us specifies whether to 2223 * verify destination reachability. 2224 */ 2225 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2226 break; 2227 } 2228 if (local_bind) { 2229 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2230 /* Bind to IPv4 address */ 2231 ipaddr_t v4src; 2232 2233 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2234 2235 error = ip_bind_laddr(connp, mp, v4src, lport, 2236 ire_requested, ipsec_policy_set, 2237 tbr->ADDR_length != IPV6_ADDR_LEN); 2238 if (error != 0) 2239 goto bad_addr; 2240 connp->conn_pkt_isv6 = B_FALSE; 2241 } else { 2242 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2243 error = 0; 2244 goto bad_addr; 2245 } 2246 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2247 ire_requested, ipsec_policy_set, 2248 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2249 if (error != 0) 2250 goto bad_addr; 2251 connp->conn_pkt_isv6 = B_TRUE; 2252 } 2253 if (protocol == IPPROTO_TCP) 2254 connp->conn_recv = tcp_conn_request; 2255 } else { 2256 /* 2257 * Bind to local and remote address. Local might be 2258 * unspecified in which case it will be extracted from 2259 * ire_src_addr_v6 2260 */ 2261 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2262 /* Connect to IPv4 address */ 2263 ipaddr_t v4src; 2264 ipaddr_t v4dst; 2265 2266 /* Is the source unspecified or mapped? */ 2267 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2268 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2269 ip1dbg(("ip_bind_v6: " 2270 "dst is mapped, but not the src\n")); 2271 goto bad_addr; 2272 } 2273 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2274 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2275 2276 /* 2277 * XXX Fix needed. Need to pass ipsec_policy_set 2278 * instead of B_FALSE. 2279 */ 2280 2281 /* Always verify destination reachability. */ 2282 error = ip_bind_connected(connp, mp, &v4src, lport, 2283 v4dst, fport, ire_requested, ipsec_policy_set, 2284 B_TRUE, B_TRUE); 2285 if (error != 0) 2286 goto bad_addr; 2287 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2288 connp->conn_pkt_isv6 = B_FALSE; 2289 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2290 ip1dbg(("ip_bind_v6: " 2291 "src is mapped, but not the dst\n")); 2292 goto bad_addr; 2293 } else { 2294 error = ip_bind_connected_v6(connp, mp, v6srcp, 2295 lport, v6dstp, ipp, fport, ire_requested, 2296 ipsec_policy_set, B_TRUE, verify_dst); 2297 if (error != 0) 2298 goto bad_addr; 2299 connp->conn_pkt_isv6 = B_TRUE; 2300 } 2301 if (protocol == IPPROTO_TCP) 2302 connp->conn_recv = tcp_input; 2303 } 2304 /* Update qinfo if v4/v6 changed */ 2305 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && !IS_TCP_CONN(connp)) { 2306 if (connp->conn_pkt_isv6) 2307 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2308 else 2309 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2310 } 2311 2312 /* 2313 * Pass the IPSEC headers size in ire_ipsec_overhead. 2314 * We can't do this in ip_bind_insert_ire because the policy 2315 * may not have been inherited at that point in time and hence 2316 * conn_out_enforce_policy may not be set. 2317 */ 2318 mp1 = mp->b_cont; 2319 if (ire_requested && connp->conn_out_enforce_policy && 2320 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2321 ire_t *ire = (ire_t *)mp1->b_rptr; 2322 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2323 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2324 } 2325 2326 /* Send it home. */ 2327 mp->b_datap->db_type = M_PCPROTO; 2328 tbr->PRIM_type = T_BIND_ACK; 2329 return (mp); 2330 2331 bad_addr: 2332 if (error == EINPROGRESS) 2333 return (NULL); 2334 if (error > 0) 2335 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2336 else 2337 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2338 return (mp); 2339 } 2340 2341 /* 2342 * Here address is verified to be a valid local address. 2343 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2344 * address is also considered a valid local address. 2345 * In the case of a multicast address, however, the 2346 * upper protocol is expected to reset the src address 2347 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2348 * no packets are emitted with multicast address as 2349 * source address. 2350 * The addresses valid for bind are: 2351 * (1) - in6addr_any 2352 * (2) - IP address of an UP interface 2353 * (3) - IP address of a DOWN interface 2354 * (4) - a multicast address. In this case 2355 * the conn will only receive packets destined to 2356 * the specified multicast address. Note: the 2357 * application still has to issue an 2358 * IPV6_JOIN_GROUP socket option. 2359 * 2360 * In all the above cases, the bound address must be valid in the current zone. 2361 * When the address is loopback or multicast, there might be many matching IREs 2362 * so bind has to look up based on the zone. 2363 */ 2364 static int 2365 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2366 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2367 boolean_t fanout_insert) 2368 { 2369 int error = 0; 2370 ire_t *src_ire = NULL; 2371 ipif_t *ipif = NULL; 2372 mblk_t *policy_mp; 2373 zoneid_t zoneid; 2374 2375 if (ipsec_policy_set) 2376 policy_mp = mp->b_cont; 2377 2378 /* 2379 * If it was previously connected, conn_fully_bound would have 2380 * been set. 2381 */ 2382 connp->conn_fully_bound = B_FALSE; 2383 2384 zoneid = connp->conn_zoneid; 2385 2386 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2387 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2388 0, NULL, NULL, zoneid, MATCH_IRE_ZONEONLY); 2389 /* 2390 * If an address other than in6addr_any is requested, 2391 * we verify that it is a valid address for bind 2392 * Note: Following code is in if-else-if form for 2393 * readability compared to a condition check. 2394 */ 2395 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2396 /* LINTED - statement has no consequent */ 2397 if (IRE_IS_LOCAL(src_ire)) { 2398 /* 2399 * (2) Bind to address of local UP interface 2400 */ 2401 ipif = src_ire->ire_ipif; 2402 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2403 ipif_t *multi_ipif = NULL; 2404 ire_t *save_ire; 2405 /* 2406 * (4) bind to multicast address. 2407 * Fake out the IRE returned to upper 2408 * layer to be a broadcast IRE in 2409 * ip_bind_insert_ire_v6(). 2410 * Pass other information that matches 2411 * the ipif (e.g. the source address). 2412 * conn_multicast_ill is only used for 2413 * IPv6 packets 2414 */ 2415 mutex_enter(&connp->conn_lock); 2416 if (connp->conn_multicast_ill != NULL) { 2417 (void) ipif_lookup_zoneid( 2418 connp->conn_multicast_ill, zoneid, 0, 2419 &multi_ipif); 2420 } else { 2421 /* 2422 * Look for default like 2423 * ip_wput_v6 2424 */ 2425 multi_ipif = ipif_lookup_group_v6( 2426 &ipv6_unspecified_group, zoneid); 2427 } 2428 mutex_exit(&connp->conn_lock); 2429 save_ire = src_ire; 2430 src_ire = NULL; 2431 if (multi_ipif == NULL || 2432 !ire_requested || (src_ire = 2433 ipif_to_ire_v6(multi_ipif)) == 2434 NULL) { 2435 src_ire = save_ire; 2436 error = EADDRNOTAVAIL; 2437 } else { 2438 ASSERT(src_ire != NULL); 2439 if (save_ire != NULL) 2440 ire_refrele(save_ire); 2441 } 2442 if (multi_ipif != NULL) 2443 ipif_refrele(multi_ipif); 2444 } else { 2445 *mp->b_wptr++ = (char)connp->conn_ulp; 2446 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2447 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2448 if (ipif == NULL) { 2449 if (error == EINPROGRESS) { 2450 if (src_ire != NULL) 2451 ire_refrele(src_ire); 2452 return (error); 2453 } 2454 /* 2455 * Not a valid address for bind 2456 */ 2457 error = EADDRNOTAVAIL; 2458 } else { 2459 ipif_refrele(ipif); 2460 } 2461 /* 2462 * Just to keep it consistent with the processing in 2463 * ip_bind_v6(). 2464 */ 2465 mp->b_wptr--; 2466 } 2467 2468 if (error != 0) { 2469 /* Red Alert! Attempting to be a bogon! */ 2470 if (ip_debug > 2) { 2471 /* ip1dbg */ 2472 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2473 " address %s\n", AF_INET6, v6src); 2474 } 2475 goto bad_addr; 2476 } 2477 } 2478 2479 /* 2480 * Allow setting new policies. For example, disconnects come 2481 * down as ipa_t bind. As we would have set conn_policy_cached 2482 * to B_TRUE before, we should set it to B_FALSE, so that policy 2483 * can change after the disconnect. 2484 */ 2485 connp->conn_policy_cached = B_FALSE; 2486 2487 /* If not fanout_insert this was just an address verification */ 2488 if (fanout_insert) { 2489 /* 2490 * The addresses have been verified. Time to insert in 2491 * the correct fanout list. 2492 */ 2493 connp->conn_srcv6 = *v6src; 2494 connp->conn_remv6 = ipv6_all_zeros; 2495 connp->conn_lport = lport; 2496 connp->conn_fport = 0; 2497 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2498 } 2499 if (error == 0) { 2500 if (ire_requested) { 2501 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2502 error = -1; 2503 goto bad_addr; 2504 } 2505 } else if (ipsec_policy_set) { 2506 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2507 error = -1; 2508 goto bad_addr; 2509 } 2510 } 2511 } 2512 bad_addr: 2513 if (src_ire != NULL) 2514 ire_refrele(src_ire); 2515 2516 if (ipsec_policy_set) { 2517 ASSERT(policy_mp != NULL); 2518 freeb(policy_mp); 2519 /* 2520 * As of now assume that nothing else accompanies 2521 * IPSEC_POLICY_SET. 2522 */ 2523 mp->b_cont = NULL; 2524 } 2525 return (error); 2526 } 2527 2528 /* ARGSUSED */ 2529 static void 2530 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2531 void *dummy_arg) 2532 { 2533 conn_t *connp = NULL; 2534 tcp_t *tcp; 2535 t_scalar_t prim; 2536 2537 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2538 2539 if (CONN_Q(q)) 2540 connp = Q_TO_CONN(q); 2541 ASSERT(connp != NULL); 2542 2543 prim = ((union T_primitives *)mp->b_rptr)->type; 2544 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2545 2546 tcp = connp->conn_tcp; 2547 if (tcp != NULL) { 2548 /* Pass sticky_ipp for scope_id and pktinfo */ 2549 mp = ip_bind_v6(q, mp, connp, &tcp->tcp_sticky_ipp); 2550 } else { 2551 /* For UDP and ICMP */ 2552 mp = ip_bind_v6(q, mp, connp, NULL); 2553 } 2554 if (mp != NULL) { 2555 if (tcp != NULL) { 2556 CONN_INC_REF(connp); 2557 squeue_fill(connp->conn_sqp, mp, 2558 ip_resume_tcp_bind, connp, SQTAG_TCP_RPUTOTHER); 2559 return; 2560 } else { 2561 qreply(q, mp); 2562 } 2563 CONN_OPER_PENDING_DONE(connp); 2564 } 2565 } 2566 2567 /* 2568 * Verify that both the source and destination addresses 2569 * are valid. If verify_dst, then destination address must also be reachable, 2570 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2571 * It takes ip6_pkt_t * as one of the arguments to determine correct 2572 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2573 * destination address. Note that parameter ipp is only useful for TCP connect 2574 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2575 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2576 * 2577 */ 2578 static int 2579 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2580 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2581 boolean_t ire_requested, boolean_t ipsec_policy_set, 2582 boolean_t fanout_insert, boolean_t verify_dst) 2583 { 2584 ire_t *src_ire; 2585 ire_t *dst_ire; 2586 int error = 0; 2587 int protocol; 2588 mblk_t *policy_mp; 2589 ire_t *sire = NULL; 2590 ire_t *md_dst_ire = NULL; 2591 ill_t *md_ill = NULL; 2592 ill_t *dst_ill = NULL; 2593 ipif_t *src_ipif = NULL; 2594 zoneid_t zoneid; 2595 boolean_t ill_held = B_FALSE; 2596 2597 src_ire = dst_ire = NULL; 2598 /* 2599 * NOTE: The protocol is beyond the wptr because that's how 2600 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2601 */ 2602 protocol = *mp->b_wptr & 0xFF; 2603 2604 /* 2605 * If we never got a disconnect before, clear it now. 2606 */ 2607 connp->conn_fully_bound = B_FALSE; 2608 2609 if (ipsec_policy_set) { 2610 policy_mp = mp->b_cont; 2611 } 2612 2613 zoneid = connp->conn_zoneid; 2614 2615 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2616 ipif_t *ipif; 2617 2618 /* 2619 * Use an "emulated" IRE_BROADCAST to tell the transport it 2620 * is a multicast. 2621 * Pass other information that matches 2622 * the ipif (e.g. the source address). 2623 * 2624 * conn_multicast_ill is only used for IPv6 packets 2625 */ 2626 mutex_enter(&connp->conn_lock); 2627 if (connp->conn_multicast_ill != NULL) { 2628 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2629 zoneid, 0, &ipif); 2630 } else { 2631 /* Look for default like ip_wput_v6 */ 2632 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2633 } 2634 mutex_exit(&connp->conn_lock); 2635 if (ipif == NULL || !ire_requested || 2636 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2637 if (ipif != NULL) 2638 ipif_refrele(ipif); 2639 if (ip_debug > 2) { 2640 /* ip1dbg */ 2641 pr_addr_dbg("ip_bind_connected_v6: bad " 2642 "connected multicast %s\n", AF_INET6, 2643 v6dst); 2644 } 2645 error = ENETUNREACH; 2646 goto bad_addr; 2647 } 2648 if (ipif != NULL) 2649 ipif_refrele(ipif); 2650 } else { 2651 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2652 NULL, &sire, zoneid, 2653 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2654 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE); 2655 /* 2656 * We also prevent ire's with src address INADDR_ANY to 2657 * be used, which are created temporarily for 2658 * sending out packets from endpoints that have 2659 * conn_unspec_src set. 2660 */ 2661 if (dst_ire == NULL || 2662 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2663 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2664 /* 2665 * When verifying destination reachability, we always 2666 * complain. 2667 * 2668 * When not verifying destination reachability but we 2669 * found an IRE, i.e. the destination is reachable, 2670 * then the other tests still apply and we complain. 2671 */ 2672 if (verify_dst || (dst_ire != NULL)) { 2673 if (ip_debug > 2) { 2674 /* ip1dbg */ 2675 pr_addr_dbg("ip_bind_connected_v6: bad" 2676 " connected dst %s\n", AF_INET6, 2677 v6dst); 2678 } 2679 if (dst_ire == NULL || 2680 !(dst_ire->ire_type & IRE_HOST)) { 2681 error = ENETUNREACH; 2682 } else { 2683 error = EHOSTUNREACH; 2684 } 2685 goto bad_addr; 2686 } 2687 } 2688 } 2689 2690 /* 2691 * If the app does a connect(), it means that it will most likely 2692 * send more than 1 packet to the destination. It makes sense 2693 * to clear the temporary flag. 2694 */ 2695 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2696 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2697 irb_t *irb = dst_ire->ire_bucket; 2698 2699 rw_enter(&irb->irb_lock, RW_WRITER); 2700 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2701 irb->irb_tmp_ire_cnt--; 2702 rw_exit(&irb->irb_lock); 2703 } 2704 2705 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2706 2707 /* 2708 * See if we should notify ULP about MDT; we do this whether or not 2709 * ire_requested is TRUE, in order to handle active connects; MDT 2710 * eligibility tests for passive connects are handled separately 2711 * through tcp_adapt_ire(). We do this before the source address 2712 * selection, because dst_ire may change after a call to 2713 * ipif_select_source_v6(). This is a best-effort check, as the 2714 * packet for this connection may not actually go through 2715 * dst_ire->ire_stq, and the exact IRE can only be known after 2716 * calling ip_newroute_v6(). This is why we further check on the 2717 * IRE during Multidata packet transmission in tcp_multisend(). 2718 */ 2719 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2720 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2721 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2722 (md_ill->ill_capabilities & ILL_CAPAB_MDT)) { 2723 md_dst_ire = dst_ire; 2724 IRE_REFHOLD(md_dst_ire); 2725 } 2726 2727 if (dst_ire != NULL && 2728 dst_ire->ire_type == IRE_LOCAL && 2729 dst_ire->ire_zoneid != zoneid) { 2730 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2731 zoneid, 0, 2732 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2733 MATCH_IRE_RJ_BHOLE); 2734 if (src_ire == NULL) { 2735 error = EHOSTUNREACH; 2736 goto bad_addr; 2737 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2738 if (!(src_ire->ire_type & IRE_HOST)) 2739 error = ENETUNREACH; 2740 else 2741 error = EHOSTUNREACH; 2742 goto bad_addr; 2743 } 2744 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2745 src_ipif = src_ire->ire_ipif; 2746 ipif_refhold(src_ipif); 2747 *v6src = src_ipif->ipif_v6lcl_addr; 2748 } 2749 ire_refrele(src_ire); 2750 src_ire = NULL; 2751 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2752 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2753 *v6src = sire->ire_src_addr_v6; 2754 ire_refrele(dst_ire); 2755 dst_ire = sire; 2756 sire = NULL; 2757 } else if (dst_ire->ire_type == IRE_CACHE && 2758 (dst_ire->ire_flags & RTF_SETSRC)) { 2759 ASSERT(dst_ire->ire_zoneid == zoneid); 2760 *v6src = dst_ire->ire_src_addr_v6; 2761 } else { 2762 /* 2763 * Pick a source address so that a proper inbound load 2764 * spreading would happen. Use dst_ill specified by the 2765 * app. when socket option or scopeid is set. 2766 */ 2767 int err; 2768 2769 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2770 uint_t if_index; 2771 2772 /* 2773 * Scope id or IPV6_PKTINFO 2774 */ 2775 2776 if_index = ipp->ipp_ifindex; 2777 dst_ill = ill_lookup_on_ifindex( 2778 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2779 if (dst_ill == NULL) { 2780 ip1dbg(("ip_bind_connected_v6:" 2781 " bad ifindex %d\n", if_index)); 2782 error = EADDRNOTAVAIL; 2783 goto bad_addr; 2784 } 2785 ill_held = B_TRUE; 2786 } else if (connp->conn_outgoing_ill != NULL) { 2787 /* 2788 * For IPV6_BOUND_IF socket option, 2789 * conn_outgoing_ill should be set 2790 * already in TCP or UDP/ICMP. 2791 */ 2792 dst_ill = conn_get_held_ill(connp, 2793 &connp->conn_outgoing_ill, &err); 2794 if (err == ILL_LOOKUP_FAILED) { 2795 ip1dbg(("ip_bind_connected_v6:" 2796 "no ill for bound_if\n")); 2797 error = EADDRNOTAVAIL; 2798 goto bad_addr; 2799 } 2800 ill_held = B_TRUE; 2801 } else if (dst_ire->ire_stq != NULL) { 2802 /* No need to hold ill here */ 2803 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2804 } else { 2805 /* No need to hold ill here */ 2806 dst_ill = dst_ire->ire_ipif->ipif_ill; 2807 } 2808 if (!ip6_asp_can_lookup()) { 2809 *mp->b_wptr++ = (char)protocol; 2810 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2811 ip_bind_connected_resume_v6); 2812 error = EINPROGRESS; 2813 goto refrele_and_quit; 2814 } 2815 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2816 B_FALSE, connp->conn_src_preferences, zoneid); 2817 ip6_asp_table_refrele(); 2818 if (src_ipif == NULL) { 2819 pr_addr_dbg("ip_bind_connected_v6: " 2820 "no usable source address for " 2821 "connection to %s\n", AF_INET6, v6dst); 2822 error = EADDRNOTAVAIL; 2823 goto bad_addr; 2824 } 2825 *v6src = src_ipif->ipif_v6lcl_addr; 2826 } 2827 } 2828 2829 /* 2830 * We do ire_route_lookup_v6() here (and not an interface lookup) 2831 * as we assert that v6src should only come from an 2832 * UP interface for hard binding. 2833 */ 2834 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2835 NULL, zoneid, MATCH_IRE_ZONEONLY); 2836 2837 /* src_ire must be a local|loopback */ 2838 if (!IRE_IS_LOCAL(src_ire)) { 2839 if (ip_debug > 2) { 2840 /* ip1dbg */ 2841 pr_addr_dbg("ip_bind_connected_v6: bad " 2842 "connected src %s\n", AF_INET6, v6src); 2843 } 2844 error = EADDRNOTAVAIL; 2845 goto bad_addr; 2846 } 2847 2848 /* 2849 * If the source address is a loopback address, the 2850 * destination had best be local or multicast. 2851 * The transports that can't handle multicast will reject 2852 * those addresses. 2853 */ 2854 if (src_ire->ire_type == IRE_LOOPBACK && 2855 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2856 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2857 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2858 error = -1; 2859 goto bad_addr; 2860 } 2861 /* 2862 * Allow setting new policies. For example, disconnects come 2863 * down as ipa_t bind. As we would have set conn_policy_cached 2864 * to B_TRUE before, we should set it to B_FALSE, so that policy 2865 * can change after the disconnect. 2866 */ 2867 connp->conn_policy_cached = B_FALSE; 2868 2869 /* 2870 * The addresses have been verified. Initialize the conn 2871 * before calling the policy as they expect the conns 2872 * initialized. 2873 */ 2874 connp->conn_srcv6 = *v6src; 2875 connp->conn_remv6 = *v6dst; 2876 connp->conn_lport = lport; 2877 connp->conn_fport = fport; 2878 2879 ASSERT(!(ipsec_policy_set && ire_requested)); 2880 if (ire_requested) { 2881 iulp_t *ulp_info = NULL; 2882 2883 /* 2884 * Note that sire will not be NULL if this is an off-link 2885 * connection and there is not cache for that dest yet. 2886 * 2887 * XXX Because of an existing bug, if there are multiple 2888 * default routes, the IRE returned now may not be the actual 2889 * default route used (default routes are chosen in a 2890 * round robin fashion). So if the metrics for different 2891 * default routes are different, we may return the wrong 2892 * metrics. This will not be a problem if the existing 2893 * bug is fixed. 2894 */ 2895 if (sire != NULL) 2896 ulp_info = &(sire->ire_uinfo); 2897 2898 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 2899 error = -1; 2900 goto bad_addr; 2901 } 2902 } else if (ipsec_policy_set) { 2903 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2904 error = -1; 2905 goto bad_addr; 2906 } 2907 } 2908 2909 /* 2910 * Cache IPsec policy in this conn. If we have per-socket policy, 2911 * we'll cache that. If we don't, we'll inherit global policy. 2912 * 2913 * We can't insert until the conn reflects the policy. Note that 2914 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2915 * connections where we don't have a policy. This is to prevent 2916 * global policy lookups in the inbound path. 2917 * 2918 * If we insert before we set conn_policy_cached, 2919 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2920 * because global policy cound be non-empty. We normally call 2921 * ipsec_check_policy() for conn_policy_cached connections only if 2922 * conn_in_enforce_policy is set. But in this case, 2923 * conn_policy_cached can get set anytime since we made the 2924 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2925 * is called, which will make the above assumption false. Thus, we 2926 * need to insert after we set conn_policy_cached. 2927 */ 2928 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2929 goto bad_addr; 2930 2931 /* If not fanout_insert this was just an address verification */ 2932 if (fanout_insert) { 2933 /* 2934 * The addresses have been verified. Time to insert in 2935 * the correct fanout list. 2936 */ 2937 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2938 connp->conn_ports, 2939 IS_TCP_CONN(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2940 } 2941 if (error == 0) { 2942 connp->conn_fully_bound = B_TRUE; 2943 /* 2944 * Our initial checks for MDT have passed; the IRE is not 2945 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2946 * be supporting MDT. Pass the IRE, IPC and ILL into 2947 * ip_mdinfo_return(), which performs further checks 2948 * against them and upon success, returns the MDT info 2949 * mblk which we will attach to the bind acknowledgment. 2950 */ 2951 if (md_dst_ire != NULL) { 2952 mblk_t *mdinfo_mp; 2953 2954 ASSERT(md_ill != NULL); 2955 ASSERT(md_ill->ill_mdt_capab != NULL); 2956 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2957 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 2958 linkb(mp, mdinfo_mp); 2959 } 2960 } 2961 bad_addr: 2962 if (ipsec_policy_set) { 2963 ASSERT(policy_mp != NULL); 2964 freeb(policy_mp); 2965 /* 2966 * As of now assume that nothing else accompanies 2967 * IPSEC_POLICY_SET. 2968 */ 2969 mp->b_cont = NULL; 2970 } 2971 refrele_and_quit: 2972 if (src_ire != NULL) 2973 IRE_REFRELE(src_ire); 2974 if (dst_ire != NULL) 2975 IRE_REFRELE(dst_ire); 2976 if (sire != NULL) 2977 IRE_REFRELE(sire); 2978 if (src_ipif != NULL) 2979 ipif_refrele(src_ipif); 2980 if (md_dst_ire != NULL) 2981 IRE_REFRELE(md_dst_ire); 2982 if (ill_held && dst_ill != NULL) 2983 ill_refrele(dst_ill); 2984 return (error); 2985 } 2986 2987 /* 2988 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 2989 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 2990 */ 2991 static boolean_t 2992 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 2993 iulp_t *ulp_info) 2994 { 2995 mblk_t *mp1; 2996 ire_t *ret_ire; 2997 2998 mp1 = mp->b_cont; 2999 ASSERT(mp1 != NULL); 3000 3001 if (ire != NULL) { 3002 /* 3003 * mp1 initialized above to IRE_DB_REQ_TYPE 3004 * appended mblk. Its <upper protocol>'s 3005 * job to make sure there is room. 3006 */ 3007 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3008 return (B_FALSE); 3009 3010 mp1->b_datap->db_type = IRE_DB_TYPE; 3011 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3012 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3013 ret_ire = (ire_t *)mp1->b_rptr; 3014 if (IN6_IS_ADDR_MULTICAST(dst) || 3015 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3016 ret_ire->ire_type = IRE_BROADCAST; 3017 ret_ire->ire_addr_v6 = *dst; 3018 } 3019 if (ulp_info != NULL) { 3020 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3021 sizeof (iulp_t)); 3022 } 3023 ret_ire->ire_mp = mp1; 3024 } else { 3025 /* 3026 * No IRE was found. Remove IRE mblk. 3027 */ 3028 mp->b_cont = mp1->b_cont; 3029 freeb(mp1); 3030 } 3031 return (B_TRUE); 3032 } 3033 3034 /* 3035 * Add an ip6i_t header to the front of the mblk. 3036 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3037 * Returns NULL if allocation fails (and frees original message). 3038 * Used in outgoing path when going through ip_newroute_*v6(). 3039 * Used in incoming path to pass ifindex to transports. 3040 */ 3041 mblk_t * 3042 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3043 { 3044 mblk_t *mp1; 3045 ip6i_t *ip6i; 3046 ip6_t *ip6h; 3047 3048 ip6h = (ip6_t *)mp->b_rptr; 3049 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3050 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3051 mp->b_datap->db_ref > 1) { 3052 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3053 if (mp1 == NULL) { 3054 freemsg(mp); 3055 return (NULL); 3056 } 3057 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3058 mp1->b_cont = mp; 3059 mp = mp1; 3060 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3061 } 3062 mp->b_rptr = (uchar_t *)ip6i; 3063 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3064 ip6i->ip6i_nxt = IPPROTO_RAW; 3065 if (ill != NULL) { 3066 ip6i->ip6i_flags = IP6I_IFINDEX; 3067 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3068 } else { 3069 ip6i->ip6i_flags = 0; 3070 } 3071 ip6i->ip6i_nexthop = *dst; 3072 return (mp); 3073 } 3074 3075 /* 3076 * Handle protocols with which IP is less intimate. There 3077 * can be more than one stream bound to a particular 3078 * protocol. When this is the case, normally each one gets a copy 3079 * of any incoming packets. 3080 * However, if the packet was tunneled and not multicast we only send to it 3081 * the first match. 3082 * 3083 * Zones notes: 3084 * Packets will be distributed to streams in all zones. This is really only 3085 * useful for ICMPv6 as only applications in the global zone can create raw 3086 * sockets for other protocols. 3087 */ 3088 static void 3089 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3090 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3091 boolean_t mctl_present, zoneid_t zoneid) 3092 { 3093 queue_t *rq; 3094 mblk_t *mp1, *first_mp1; 3095 in6_addr_t dst = ip6h->ip6_dst; 3096 in6_addr_t src = ip6h->ip6_src; 3097 boolean_t one_only; 3098 mblk_t *first_mp = mp; 3099 boolean_t secure; 3100 conn_t *connp, *first_connp, *next_connp; 3101 connf_t *connfp; 3102 3103 if (mctl_present) { 3104 mp = first_mp->b_cont; 3105 secure = ipsec_in_is_secure(first_mp); 3106 ASSERT(mp != NULL); 3107 } else { 3108 secure = B_FALSE; 3109 } 3110 3111 /* 3112 * If the packet was tunneled and not multicast we only send to it 3113 * the first match. 3114 */ 3115 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3116 !IN6_IS_ADDR_MULTICAST(&dst)); 3117 3118 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3119 mutex_enter(&connfp->connf_lock); 3120 connp = connfp->connf_head; 3121 for (connp = connfp->connf_head; connp != NULL; 3122 connp = connp->conn_next) { 3123 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3124 zoneid)) 3125 break; 3126 } 3127 3128 if (connp == NULL || connp->conn_upq == NULL) { 3129 /* 3130 * No one bound to this port. Is 3131 * there a client that wants all 3132 * unclaimed datagrams? 3133 */ 3134 mutex_exit(&connfp->connf_lock); 3135 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3136 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3137 nexthdr_offset, mctl_present, zoneid)) { 3138 BUMP_MIB(ill->ill_ip6_mib, ipv6InUnknownProtos); 3139 } 3140 3141 return; 3142 } 3143 3144 CONN_INC_REF(connp); 3145 first_connp = connp; 3146 3147 /* 3148 * XXX: Fix the multiple protocol listeners case. We should not 3149 * be walking the conn->next list here. 3150 */ 3151 if (one_only) { 3152 /* 3153 * Only send message to one tunnel driver by immediately 3154 * terminating the loop. 3155 */ 3156 connp = NULL; 3157 } else { 3158 connp = connp->conn_next; 3159 3160 } 3161 for (;;) { 3162 while (connp != NULL) { 3163 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3164 flags, zoneid)) 3165 break; 3166 connp = connp->conn_next; 3167 } 3168 3169 /* 3170 * Just copy the data part alone. The mctl part is 3171 * needed just for verifying policy and it is never 3172 * sent up. 3173 */ 3174 if (connp == NULL || connp->conn_upq == NULL || 3175 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3176 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3177 /* 3178 * No more intested clients or memory 3179 * allocation failed 3180 */ 3181 connp = first_connp; 3182 break; 3183 } 3184 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3185 CONN_INC_REF(connp); 3186 mutex_exit(&connfp->connf_lock); 3187 rq = connp->conn_rq; 3188 /* 3189 * For link-local always add ifindex so that transport can set 3190 * sin6_scope_id. Avoid it for ICMP error fanout. 3191 */ 3192 if ((connp->conn_ipv6_recvpktinfo || 3193 IN6_IS_ADDR_LINKLOCAL(&src)) && 3194 (flags & IP_FF_IP6INFO)) { 3195 /* Add header */ 3196 mp1 = ip_add_info_v6(mp1, inill, &dst); 3197 } 3198 if (mp1 == NULL) { 3199 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3200 } else if (!canputnext(rq)) { 3201 if (flags & IP_FF_RAWIP) { 3202 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3203 } else { 3204 BUMP_MIB(ill->ill_icmp6_mib, 3205 ipv6IfIcmpInOverflows); 3206 } 3207 3208 freemsg(mp1); 3209 } else { 3210 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3211 first_mp1 = ipsec_check_inbound_policy 3212 (first_mp1, connp, NULL, ip6h, 3213 mctl_present); 3214 } 3215 if (first_mp1 != NULL) { 3216 if (mctl_present) 3217 freeb(first_mp1); 3218 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3219 putnext(rq, mp1); 3220 } 3221 } 3222 mutex_enter(&connfp->connf_lock); 3223 /* Follow the next pointer before releasing the conn. */ 3224 next_connp = connp->conn_next; 3225 CONN_DEC_REF(connp); 3226 connp = next_connp; 3227 } 3228 3229 /* Last one. Send it upstream. */ 3230 mutex_exit(&connfp->connf_lock); 3231 3232 /* Initiate IPPF processing */ 3233 if (IP6_IN_IPP(flags)) { 3234 uint_t ifindex; 3235 3236 mutex_enter(&ill->ill_lock); 3237 ifindex = ill->ill_phyint->phyint_ifindex; 3238 mutex_exit(&ill->ill_lock); 3239 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3240 if (mp == NULL) { 3241 CONN_DEC_REF(connp); 3242 if (mctl_present) 3243 freeb(first_mp); 3244 return; 3245 } 3246 } 3247 3248 /* 3249 * For link-local always add ifindex so that transport can set 3250 * sin6_scope_id. Avoid it for ICMP error fanout. 3251 */ 3252 if ((connp->conn_ipv6_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3253 (flags & IP_FF_IP6INFO)) { 3254 /* Add header */ 3255 mp = ip_add_info_v6(mp, inill, &dst); 3256 if (mp == NULL) { 3257 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3258 CONN_DEC_REF(connp); 3259 if (mctl_present) 3260 freeb(first_mp); 3261 return; 3262 } else if (mctl_present) { 3263 first_mp->b_cont = mp; 3264 } else { 3265 first_mp = mp; 3266 } 3267 } 3268 3269 rq = connp->conn_rq; 3270 if (!canputnext(rq)) { 3271 if (flags & IP_FF_RAWIP) { 3272 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3273 } else { 3274 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3275 } 3276 3277 freemsg(first_mp); 3278 } else { 3279 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3280 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3281 NULL, ip6h, mctl_present); 3282 if (first_mp == NULL) { 3283 CONN_DEC_REF(connp); 3284 return; 3285 } 3286 } 3287 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3288 putnext(rq, mp); 3289 if (mctl_present) 3290 freeb(first_mp); 3291 } 3292 CONN_DEC_REF(connp); 3293 } 3294 3295 /* 3296 * Send an ICMP error after patching up the packet appropriately. Returns 3297 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3298 */ 3299 int 3300 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3301 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3302 boolean_t mctl_present, zoneid_t zoneid) 3303 { 3304 ip6_t *ip6h; 3305 mblk_t *first_mp; 3306 boolean_t secure; 3307 unsigned char db_type; 3308 3309 first_mp = mp; 3310 if (mctl_present) { 3311 mp = mp->b_cont; 3312 secure = ipsec_in_is_secure(first_mp); 3313 ASSERT(mp != NULL); 3314 } else { 3315 /* 3316 * If this is an ICMP error being reported - which goes 3317 * up as M_CTLs, we need to convert them to M_DATA till 3318 * we finish checking with global policy because 3319 * ipsec_check_global_policy() assumes M_DATA as clear 3320 * and M_CTL as secure. 3321 */ 3322 db_type = mp->b_datap->db_type; 3323 mp->b_datap->db_type = M_DATA; 3324 secure = B_FALSE; 3325 } 3326 /* 3327 * We are generating an icmp error for some inbound packet. 3328 * Called from all ip_fanout_(udp, tcp, proto) functions. 3329 * Before we generate an error, check with global policy 3330 * to see whether this is allowed to enter the system. As 3331 * there is no "conn", we are checking with global policy. 3332 */ 3333 ip6h = (ip6_t *)mp->b_rptr; 3334 if (secure || ipsec_inbound_v6_policy_present) { 3335 first_mp = ipsec_check_global_policy(first_mp, NULL, 3336 NULL, ip6h, mctl_present); 3337 if (first_mp == NULL) 3338 return (0); 3339 } 3340 3341 if (!mctl_present) 3342 mp->b_datap->db_type = db_type; 3343 3344 if (flags & IP_FF_SEND_ICMP) { 3345 if (flags & IP_FF_HDR_COMPLETE) { 3346 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3347 freemsg(first_mp); 3348 return (1); 3349 } 3350 } 3351 switch (icmp_type) { 3352 case ICMP6_DST_UNREACH: 3353 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3354 B_FALSE, B_FALSE); 3355 break; 3356 case ICMP6_PARAM_PROB: 3357 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3358 nexthdr_offset, B_FALSE, B_FALSE); 3359 break; 3360 default: 3361 #ifdef DEBUG 3362 panic("ip_fanout_send_icmp_v6: wrong type"); 3363 /*NOTREACHED*/ 3364 #else 3365 freemsg(first_mp); 3366 break; 3367 #endif 3368 } 3369 } else { 3370 freemsg(first_mp); 3371 return (0); 3372 } 3373 3374 return (1); 3375 } 3376 3377 3378 /* 3379 * Fanout for TCP packets 3380 * The caller puts <fport, lport> in the ports parameter. 3381 */ 3382 static void 3383 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3384 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3385 { 3386 mblk_t *first_mp; 3387 boolean_t secure; 3388 conn_t *connp; 3389 tcph_t *tcph; 3390 boolean_t syn_present = B_FALSE; 3391 3392 first_mp = mp; 3393 if (mctl_present) { 3394 mp = first_mp->b_cont; 3395 secure = ipsec_in_is_secure(first_mp); 3396 ASSERT(mp != NULL); 3397 } else { 3398 secure = B_FALSE; 3399 } 3400 3401 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3402 3403 if (connp == NULL || 3404 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3405 /* 3406 * No hard-bound match. Send Reset. 3407 */ 3408 dblk_t *dp = mp->b_datap; 3409 uint32_t ill_index; 3410 3411 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3412 3413 /* Initiate IPPf processing, if needed. */ 3414 if (IPP_ENABLED(IPP_LOCAL_IN) && 3415 (flags & (IP6_NO_IPPOLICY|IP6_IN_NOCKSUM))) { 3416 ill_index = ill->ill_phyint->phyint_ifindex; 3417 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3418 if (first_mp == NULL) { 3419 if (connp != NULL) 3420 CONN_DEC_REF(connp); 3421 return; 3422 } 3423 } 3424 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3425 tcp_xmit_listeners_reset(first_mp, hdr_len); 3426 if (connp != NULL) 3427 CONN_DEC_REF(connp); 3428 return; 3429 } 3430 3431 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3432 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3433 if (connp->conn_flags & IPCL_TCP) { 3434 squeue_t *sqp; 3435 3436 /* 3437 * For fused tcp loopback, assign the eager's 3438 * squeue to be that of the active connect's. 3439 */ 3440 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3441 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3442 !IP6_IN_IPP(flags)) { 3443 ASSERT(Q_TO_CONN(q) != NULL); 3444 sqp = Q_TO_CONN(q)->conn_sqp; 3445 } else { 3446 sqp = IP_SQUEUE_GET(lbolt); 3447 } 3448 3449 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3450 mp->b_datap->db_cksumstart = (intptr_t)sqp; 3451 3452 /* 3453 * db_cksumstuff is unused in the incoming 3454 * path; Thus store the ifindex here. It will 3455 * be cleared in tcp_conn_create_v6(). 3456 */ 3457 mp->b_datap->db_cksumstuff = 3458 (intptr_t)ill->ill_phyint->phyint_ifindex; 3459 syn_present = B_TRUE; 3460 } 3461 } 3462 3463 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3464 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3465 if ((flags & TH_RST) || (flags & TH_URG)) { 3466 CONN_DEC_REF(connp); 3467 freemsg(first_mp); 3468 return; 3469 } 3470 if (flags & TH_ACK) { 3471 tcp_xmit_listeners_reset(first_mp, hdr_len); 3472 CONN_DEC_REF(connp); 3473 return; 3474 } 3475 3476 CONN_DEC_REF(connp); 3477 freemsg(first_mp); 3478 return; 3479 } 3480 3481 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3482 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3483 NULL, ip6h, mctl_present); 3484 if (first_mp == NULL) { 3485 CONN_DEC_REF(connp); 3486 return; 3487 } 3488 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3489 ASSERT(syn_present); 3490 if (mctl_present) { 3491 ASSERT(first_mp != mp); 3492 first_mp->b_datap->db_struioflag |= 3493 STRUIO_POLICY; 3494 } else { 3495 ASSERT(first_mp == mp); 3496 mp->b_datap->db_struioflag &= 3497 ~STRUIO_EAGER; 3498 mp->b_datap->db_struioflag |= 3499 STRUIO_POLICY; 3500 } 3501 } else { 3502 /* 3503 * Discard first_mp early since we're dealing with a 3504 * fully-connected conn_t and tcp doesn't do policy in 3505 * this case. Also, if someone is bound to IPPROTO_TCP 3506 * over raw IP, they don't expect to see a M_CTL. 3507 */ 3508 if (mctl_present) { 3509 freeb(first_mp); 3510 mctl_present = B_FALSE; 3511 } 3512 first_mp = mp; 3513 } 3514 } 3515 3516 /* Initiate IPPF processing */ 3517 if (IP6_IN_IPP(flags)) { 3518 uint_t ifindex; 3519 3520 mutex_enter(&ill->ill_lock); 3521 ifindex = ill->ill_phyint->phyint_ifindex; 3522 mutex_exit(&ill->ill_lock); 3523 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3524 if (mp == NULL) { 3525 CONN_DEC_REF(connp); 3526 if (mctl_present) { 3527 freeb(first_mp); 3528 } 3529 return; 3530 } else if (mctl_present) { 3531 /* 3532 * ip_add_info_v6 might return a new mp. 3533 */ 3534 ASSERT(first_mp != mp); 3535 first_mp->b_cont = mp; 3536 } else { 3537 first_mp = mp; 3538 } 3539 } 3540 3541 /* 3542 * For link-local always add ifindex so that TCP can bind to that 3543 * interface. Avoid it for ICMP error fanout. 3544 */ 3545 if (!syn_present && ((connp->conn_ipv6_recvpktinfo || 3546 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3547 (flags & IP_FF_IP6INFO))) { 3548 /* Add header */ 3549 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3550 if (mp == NULL) { 3551 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3552 CONN_DEC_REF(connp); 3553 if (mctl_present) 3554 freeb(first_mp); 3555 return; 3556 } else if (mctl_present) { 3557 ASSERT(first_mp != mp); 3558 first_mp->b_cont = mp; 3559 } else { 3560 first_mp = mp; 3561 } 3562 } 3563 3564 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3565 if (IPCL_IS_TCP(connp)) { 3566 (*ip_input_proc)(connp->conn_sqp, first_mp, 3567 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3568 } else { 3569 putnext(connp->conn_rq, first_mp); 3570 CONN_DEC_REF(connp); 3571 } 3572 } 3573 3574 /* 3575 * Fanout for UDP packets. 3576 * The caller puts <fport, lport> in the ports parameter. 3577 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3578 * 3579 * If SO_REUSEADDR is set all multicast and broadcast packets 3580 * will be delivered to all streams bound to the same port. 3581 * 3582 * Zones notes: 3583 * Multicast packets will be distributed to streams in all zones. 3584 */ 3585 static void 3586 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3587 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3588 zoneid_t zoneid) 3589 { 3590 queue_t *rq; 3591 uint32_t dstport, srcport; 3592 in6_addr_t dst; 3593 mblk_t *first_mp; 3594 boolean_t secure; 3595 conn_t *connp; 3596 connf_t *connfp; 3597 conn_t *first_conn; 3598 conn_t *next_conn; 3599 mblk_t *mp1, *first_mp1; 3600 in6_addr_t src; 3601 3602 first_mp = mp; 3603 if (mctl_present) { 3604 mp = first_mp->b_cont; 3605 secure = ipsec_in_is_secure(first_mp); 3606 ASSERT(mp != NULL); 3607 } else { 3608 secure = B_FALSE; 3609 } 3610 3611 /* Extract ports in net byte order */ 3612 dstport = htons(ntohl(ports) & 0xFFFF); 3613 srcport = htons(ntohl(ports) >> 16); 3614 dst = ip6h->ip6_dst; 3615 src = ip6h->ip6_src; 3616 3617 /* Attempt to find a client stream based on destination port. */ 3618 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3619 mutex_enter(&connfp->connf_lock); 3620 connp = connfp->connf_head; 3621 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3622 /* 3623 * Not multicast. Send to the one (first) client we find. 3624 */ 3625 while (connp != NULL) { 3626 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3627 src) && connp->conn_zoneid == zoneid && 3628 conn_wantpacket_v6(connp, ill, ip6h, 3629 flags, zoneid)) { 3630 break; 3631 } 3632 connp = connp->conn_next; 3633 } 3634 if (connp == NULL || connp->conn_upq == NULL) 3635 goto notfound; 3636 3637 /* Found a client */ 3638 CONN_INC_REF(connp); 3639 mutex_exit(&connfp->connf_lock); 3640 rq = connp->conn_rq; 3641 3642 if (!canputnext(rq)) { 3643 freemsg(first_mp); 3644 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3645 CONN_DEC_REF(connp); 3646 return; 3647 } 3648 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3649 first_mp = ipsec_check_inbound_policy(first_mp, 3650 connp, NULL, ip6h, mctl_present); 3651 if (first_mp == NULL) { 3652 CONN_DEC_REF(connp); 3653 return; 3654 } 3655 } 3656 /* Initiate IPPF processing */ 3657 if (IP6_IN_IPP(flags)) { 3658 uint_t ifindex; 3659 3660 mutex_enter(&ill->ill_lock); 3661 ifindex = ill->ill_phyint->phyint_ifindex; 3662 mutex_exit(&ill->ill_lock); 3663 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3664 if (mp == NULL) { 3665 CONN_DEC_REF(connp); 3666 if (mctl_present) 3667 freeb(first_mp); 3668 return; 3669 } 3670 } 3671 /* 3672 * For link-local always add ifindex so that 3673 * transport can set sin6_scope_id. Avoid it for 3674 * ICMP error fanout. 3675 */ 3676 if ((connp->conn_ipv6_recvpktinfo || 3677 IN6_IS_ADDR_LINKLOCAL(&src)) && 3678 (flags & IP_FF_IP6INFO)) { 3679 /* Add header */ 3680 mp = ip_add_info_v6(mp, inill, &dst); 3681 if (mp == NULL) { 3682 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3683 CONN_DEC_REF(connp); 3684 if (mctl_present) 3685 freeb(first_mp); 3686 return; 3687 } else if (mctl_present) { 3688 first_mp->b_cont = mp; 3689 } else { 3690 first_mp = mp; 3691 } 3692 } 3693 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3694 putnext(rq, mp); 3695 IP6_STAT(ip6_udp_fannorm); 3696 CONN_DEC_REF(connp); 3697 if (mctl_present) 3698 freeb(first_mp); 3699 return; 3700 } 3701 3702 /* 3703 * The code is fine but we shouldn't be walking the conn_next 3704 * list in IPv6 (its a classifier private data struct). Maybe create 3705 * a classifier API to put a REF_HOLD on all matching conn in the 3706 * list and return an array. 3707 */ 3708 while (connp != NULL) { 3709 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3710 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) 3711 break; 3712 connp = connp->conn_next; 3713 } 3714 3715 if (connp == NULL || connp->conn_upq == NULL) 3716 goto notfound; 3717 3718 first_conn = connp; 3719 3720 CONN_INC_REF(connp); 3721 connp = connp->conn_next; 3722 for (;;) { 3723 while (connp != NULL) { 3724 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3725 src) && conn_wantpacket_v6(connp, ill, ip6h, 3726 flags, zoneid)) 3727 break; 3728 connp = connp->conn_next; 3729 } 3730 /* 3731 * Just copy the data part alone. The mctl part is 3732 * needed just for verifying policy and it is never 3733 * sent up. 3734 */ 3735 if (connp == NULL || 3736 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3737 ((first_mp1 = ip_copymsg(first_mp)) 3738 == NULL))) { 3739 /* 3740 * No more interested clients or memory 3741 * allocation failed 3742 */ 3743 connp = first_conn; 3744 break; 3745 } 3746 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3747 CONN_INC_REF(connp); 3748 mutex_exit(&connfp->connf_lock); 3749 rq = connp->conn_rq; 3750 /* 3751 * For link-local always add ifindex so that transport 3752 * can set sin6_scope_id. Avoid it for ICMP error 3753 * fanout. 3754 */ 3755 if ((connp->conn_ipv6_recvpktinfo || 3756 IN6_IS_ADDR_LINKLOCAL(&src)) && 3757 (flags & IP_FF_IP6INFO)) { 3758 /* Add header */ 3759 mp1 = ip_add_info_v6(mp1, inill, &dst); 3760 } 3761 if (mp1 == NULL) { 3762 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3763 goto next_one; 3764 } 3765 if (!canputnext(rq)) { 3766 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3767 freemsg(mp1); 3768 goto next_one; 3769 } 3770 3771 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3772 secure) { 3773 first_mp1 = ipsec_check_inbound_policy 3774 (first_mp1, connp, NULL, ip6h, 3775 mctl_present); 3776 } 3777 if (first_mp1 != NULL) { 3778 if (mctl_present) 3779 freeb(first_mp1); 3780 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3781 putnext(rq, mp1); 3782 } 3783 next_one: 3784 mutex_enter(&connfp->connf_lock); 3785 /* Follow the next pointer before releasing the conn. */ 3786 next_conn = connp->conn_next; 3787 IP6_STAT(ip6_udp_fanmb); 3788 CONN_DEC_REF(connp); 3789 connp = next_conn; 3790 } 3791 3792 /* Last one. Send it upstream. */ 3793 mutex_exit(&connfp->connf_lock); 3794 rq = connp->conn_rq; 3795 3796 /* Initiate IPPF processing */ 3797 if (IP6_IN_IPP(flags)) { 3798 uint_t ifindex; 3799 3800 mutex_enter(&ill->ill_lock); 3801 ifindex = ill->ill_phyint->phyint_ifindex; 3802 mutex_exit(&ill->ill_lock); 3803 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3804 if (mp == NULL) { 3805 CONN_DEC_REF(connp); 3806 if (mctl_present) { 3807 freeb(first_mp); 3808 } 3809 return; 3810 } 3811 } 3812 3813 /* 3814 * For link-local always add ifindex so that transport can set 3815 * sin6_scope_id. Avoid it for ICMP error fanout. 3816 */ 3817 if ((connp->conn_ipv6_recvpktinfo || 3818 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IP6INFO)) { 3819 /* Add header */ 3820 mp = ip_add_info_v6(mp, inill, &dst); 3821 if (mp == NULL) { 3822 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3823 CONN_DEC_REF(connp); 3824 if (mctl_present) 3825 freeb(first_mp); 3826 return; 3827 } else if (mctl_present) { 3828 first_mp->b_cont = mp; 3829 } else { 3830 first_mp = mp; 3831 } 3832 } 3833 if (!canputnext(rq)) { 3834 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3835 freemsg(mp); 3836 } else { 3837 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3838 first_mp = ipsec_check_inbound_policy(first_mp, 3839 connp, NULL, ip6h, mctl_present); 3840 if (first_mp == NULL) { 3841 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3842 CONN_DEC_REF(connp); 3843 return; 3844 } 3845 } 3846 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3847 putnext(rq, mp); 3848 } 3849 IP6_STAT(ip6_udp_fanmb); 3850 CONN_DEC_REF(connp); 3851 if (mctl_present) 3852 freeb(first_mp); 3853 return; 3854 3855 notfound: 3856 mutex_exit(&connfp->connf_lock); 3857 /* 3858 * No one bound to this port. Is 3859 * there a client that wants all 3860 * unclaimed datagrams? 3861 */ 3862 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3863 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3864 0, flags | IP_FF_RAWIP | IP_FF_IP6INFO, mctl_present, 3865 zoneid); 3866 } else { 3867 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3868 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3869 mctl_present, zoneid)) { 3870 BUMP_MIB(&ip_mib, udpNoPorts); 3871 } 3872 } 3873 } 3874 3875 /* 3876 * int ip_find_hdr_v6() 3877 * 3878 * This routine is used by the upper layer protocols and the IP tunnel 3879 * module to: 3880 * - Set extension header pointers to appropriate locations 3881 * - Determine IPv6 header length and return it 3882 * - Return a pointer to the last nexthdr value 3883 * 3884 * The caller must initialize ipp_fields. 3885 * 3886 * NOTE: If multiple extension headers of the same type are present, 3887 * ip_find_hdr_v6() will set the respective extension header pointers 3888 * to the first one that it encounters in the IPv6 header. It also 3889 * skips fragment headers. This routine deals with malformed packets 3890 * of various sorts in which case the returned length is up to the 3891 * malformed part. 3892 */ 3893 int 3894 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3895 { 3896 uint_t length, ehdrlen; 3897 uint8_t nexthdr; 3898 uint8_t *whereptr, *endptr; 3899 ip6_dest_t *tmpdstopts; 3900 ip6_rthdr_t *tmprthdr; 3901 ip6_hbh_t *tmphopopts; 3902 ip6_frag_t *tmpfraghdr; 3903 3904 length = IPV6_HDR_LEN; 3905 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3906 endptr = mp->b_wptr; 3907 3908 nexthdr = ip6h->ip6_nxt; 3909 while (whereptr < endptr) { 3910 /* Is there enough left for len + nexthdr? */ 3911 if (whereptr + MIN_EHDR_LEN > endptr) 3912 goto done; 3913 3914 switch (nexthdr) { 3915 case IPPROTO_HOPOPTS: 3916 tmphopopts = (ip6_hbh_t *)whereptr; 3917 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 3918 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 3919 goto done; 3920 nexthdr = tmphopopts->ip6h_nxt; 3921 /* return only 1st hbh */ 3922 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 3923 ipp->ipp_fields |= IPPF_HOPOPTS; 3924 ipp->ipp_hopopts = tmphopopts; 3925 ipp->ipp_hopoptslen = ehdrlen; 3926 } 3927 break; 3928 case IPPROTO_DSTOPTS: 3929 tmpdstopts = (ip6_dest_t *)whereptr; 3930 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 3931 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 3932 goto done; 3933 nexthdr = tmpdstopts->ip6d_nxt; 3934 /* 3935 * ipp_dstopts is set to the destination header after a 3936 * routing header. 3937 * Assume it is a post-rthdr destination header 3938 * and adjust when we find an rthdr. 3939 */ 3940 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 3941 ipp->ipp_fields |= IPPF_DSTOPTS; 3942 ipp->ipp_dstopts = tmpdstopts; 3943 ipp->ipp_dstoptslen = ehdrlen; 3944 } 3945 break; 3946 case IPPROTO_ROUTING: 3947 tmprthdr = (ip6_rthdr_t *)whereptr; 3948 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 3949 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 3950 goto done; 3951 nexthdr = tmprthdr->ip6r_nxt; 3952 /* return only 1st rthdr */ 3953 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 3954 ipp->ipp_fields |= IPPF_RTHDR; 3955 ipp->ipp_rthdr = tmprthdr; 3956 ipp->ipp_rthdrlen = ehdrlen; 3957 } 3958 /* 3959 * Make any destination header we've seen be a 3960 * pre-rthdr destination header. 3961 */ 3962 if (ipp->ipp_fields & IPPF_DSTOPTS) { 3963 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3964 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3965 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 3966 ipp->ipp_dstopts = NULL; 3967 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 3968 ipp->ipp_dstoptslen = 0; 3969 } 3970 break; 3971 case IPPROTO_FRAGMENT: 3972 /* 3973 * Fragment headers are skipped. Currently, only 3974 * IP cares for their existence. If anyone other 3975 * than IP ever has the need to know about the 3976 * location of fragment headers, support can be 3977 * added to the ip6_pkt_t at that time. 3978 */ 3979 tmpfraghdr = (ip6_frag_t *)whereptr; 3980 ehdrlen = sizeof (ip6_frag_t); 3981 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 3982 goto done; 3983 nexthdr = tmpfraghdr->ip6f_nxt; 3984 break; 3985 case IPPROTO_NONE: 3986 default: 3987 goto done; 3988 } 3989 length += ehdrlen; 3990 whereptr += ehdrlen; 3991 } 3992 done: 3993 if (nexthdrp != NULL) 3994 *nexthdrp = nexthdr; 3995 return (length); 3996 } 3997 3998 int 3999 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4000 { 4001 ire_t *ire; 4002 4003 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4004 ire = ire_lookup_local_v6(zoneid); 4005 if (ire == NULL) { 4006 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4007 return (1); 4008 } 4009 ip6h->ip6_src = ire->ire_addr_v6; 4010 ire_refrele(ire); 4011 } 4012 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4013 ip6h->ip6_hops = ipv6_def_hops; 4014 return (0); 4015 } 4016 4017 /* 4018 * Try to determine where and what are the IPv6 header length and 4019 * pointer to nexthdr value for the upper layer protocol (or an 4020 * unknown next hdr). 4021 * 4022 * Parameters returns a pointer to the nexthdr value; 4023 * Must handle malformed packets of various sorts. 4024 * Function returns failure for malformed cases. 4025 */ 4026 boolean_t 4027 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4028 uint8_t **nexthdrpp) 4029 { 4030 uint16_t length; 4031 uint_t ehdrlen; 4032 uint8_t *nexthdrp; 4033 uint8_t *whereptr; 4034 uint8_t *endptr; 4035 ip6_dest_t *desthdr; 4036 ip6_rthdr_t *rthdr; 4037 ip6_frag_t *fraghdr; 4038 4039 length = IPV6_HDR_LEN; 4040 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4041 endptr = mp->b_wptr; 4042 4043 nexthdrp = &ip6h->ip6_nxt; 4044 while (whereptr < endptr) { 4045 /* Is there enough left for len + nexthdr? */ 4046 if (whereptr + MIN_EHDR_LEN > endptr) 4047 break; 4048 4049 switch (*nexthdrp) { 4050 case IPPROTO_HOPOPTS: 4051 case IPPROTO_DSTOPTS: 4052 /* Assumes the headers are identical for hbh and dst */ 4053 desthdr = (ip6_dest_t *)whereptr; 4054 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4055 if ((uchar_t *)desthdr + ehdrlen > endptr) 4056 return (B_FALSE); 4057 nexthdrp = &desthdr->ip6d_nxt; 4058 break; 4059 case IPPROTO_ROUTING: 4060 rthdr = (ip6_rthdr_t *)whereptr; 4061 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4062 if ((uchar_t *)rthdr + ehdrlen > endptr) 4063 return (B_FALSE); 4064 nexthdrp = &rthdr->ip6r_nxt; 4065 break; 4066 case IPPROTO_FRAGMENT: 4067 fraghdr = (ip6_frag_t *)whereptr; 4068 ehdrlen = sizeof (ip6_frag_t); 4069 if ((uchar_t *)&fraghdr[1] > endptr) 4070 return (B_FALSE); 4071 nexthdrp = &fraghdr->ip6f_nxt; 4072 break; 4073 case IPPROTO_NONE: 4074 /* No next header means we're finished */ 4075 default: 4076 *hdr_length_ptr = length; 4077 *nexthdrpp = nexthdrp; 4078 return (B_TRUE); 4079 } 4080 length += ehdrlen; 4081 whereptr += ehdrlen; 4082 *hdr_length_ptr = length; 4083 *nexthdrpp = nexthdrp; 4084 } 4085 switch (*nexthdrp) { 4086 case IPPROTO_HOPOPTS: 4087 case IPPROTO_DSTOPTS: 4088 case IPPROTO_ROUTING: 4089 case IPPROTO_FRAGMENT: 4090 /* 4091 * If any know extension headers are still to be processed, 4092 * the packet's malformed (or at least all the IP header(s) are 4093 * not in the same mblk - and that should never happen. 4094 */ 4095 return (B_FALSE); 4096 4097 default: 4098 /* 4099 * If we get here, we know that all of the IP headers were in 4100 * the same mblk, even if the ULP header is in the next mblk. 4101 */ 4102 *hdr_length_ptr = length; 4103 *nexthdrpp = nexthdrp; 4104 return (B_TRUE); 4105 } 4106 } 4107 4108 /* 4109 * Return the length of the IPv6 related headers (including extension headers) 4110 * Returns a length even if the packet is malformed. 4111 */ 4112 int 4113 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4114 { 4115 uint16_t hdr_len; 4116 uint8_t *nexthdrp; 4117 4118 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4119 return (hdr_len); 4120 } 4121 4122 /* 4123 * Select an ill for the packet by considering load spreading across 4124 * a different ill in the group if dst_ill is part of some group. 4125 */ 4126 static ill_t * 4127 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4128 { 4129 ill_t *ill; 4130 4131 /* 4132 * We schedule irrespective of whether the source address is 4133 * INADDR_UNSPECIED or not. 4134 */ 4135 ill = illgrp_scheduler(dst_ill); 4136 if (ill == NULL) 4137 return (NULL); 4138 4139 /* 4140 * For groups with names ip_sioctl_groupname ensures that all 4141 * ills are of same type. For groups without names, ifgrp_insert 4142 * ensures this. 4143 */ 4144 ASSERT(dst_ill->ill_type == ill->ill_type); 4145 4146 return (ill); 4147 } 4148 4149 /* 4150 * IPv6 - 4151 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4152 * to send out a packet to a destination address for which we do not have 4153 * specific routing information. 4154 * 4155 * Handle non-multicast packets. If ill is non-NULL the match is done 4156 * for that ill. 4157 * 4158 * When a specific ill is specified (using IPV6_PKTINFO, 4159 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4160 * on routing entries (ftable and ctable) that have a matching 4161 * ire->ire_ipif->ipif_ill. Thus this can only be used 4162 * for destinations that are on-link for the specific ill 4163 * and that can appear on multiple links. Thus it is useful 4164 * for multicast destinations, link-local destinations, and 4165 * at some point perhaps for site-local destinations (if the 4166 * node sits at a site boundary). 4167 * We create the cache entries in the regular ctable since 4168 * it can not "confuse" things for other destinations. 4169 * table. 4170 * 4171 * When ill is part of a ill group, we subject the packets 4172 * to load spreading even if the ill is specified by the 4173 * means described above. We disable only for IPV6_BOUND_PIF 4174 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4175 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4176 * set. 4177 * 4178 * NOTE : These are the scopes of some of the variables that point at IRE, 4179 * which needs to be followed while making any future modifications 4180 * to avoid memory leaks. 4181 * 4182 * - ire and sire are the entries looked up initially by 4183 * ire_ftable_lookup_v6. 4184 * - ipif_ire is used to hold the interface ire associated with 4185 * the new cache ire. But it's scope is limited, so we always REFRELE 4186 * it before branching out to error paths. 4187 * - save_ire is initialized before ire_create, so that ire returned 4188 * by ire_create will not over-write the ire. We REFRELE save_ire 4189 * before breaking out of the switch. 4190 * 4191 * Thus on failures, we have to REFRELE only ire and sire, if they 4192 * are not NULL. 4193 * 4194 * v6srcp may be used in the future. Currently unused. 4195 */ 4196 /* ARGSUSED */ 4197 void 4198 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4199 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4200 { 4201 in6_addr_t v6gw; 4202 in6_addr_t dst; 4203 ire_t *ire = NULL; 4204 ipif_t *src_ipif = NULL; 4205 ill_t *dst_ill = NULL; 4206 ire_t *sire = NULL; 4207 ire_t *save_ire; 4208 mblk_t *dlureq_mp; 4209 ip6_t *ip6h; 4210 int err = 0; 4211 mblk_t *first_mp; 4212 ipsec_out_t *io; 4213 ill_t *attach_ill = NULL; 4214 ushort_t ire_marks = 0; 4215 int match_flags; 4216 boolean_t ip6i_present; 4217 ire_t *first_sire = NULL; 4218 mblk_t *copy_mp = NULL; 4219 mblk_t *xmit_mp = NULL; 4220 in6_addr_t save_dst; 4221 uint32_t multirt_flags = 4222 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4223 boolean_t multirt_is_resolvable; 4224 boolean_t multirt_resolve_next; 4225 boolean_t need_rele = B_FALSE; 4226 boolean_t do_attach_ill = B_FALSE; 4227 boolean_t ip6_asp_table_held = B_FALSE; 4228 4229 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4230 4231 first_mp = mp; 4232 if (mp->b_datap->db_type == M_CTL) { 4233 mp = mp->b_cont; 4234 io = (ipsec_out_t *)first_mp->b_rptr; 4235 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4236 } else { 4237 io = NULL; 4238 } 4239 4240 /* 4241 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4242 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4243 * could be NULL. 4244 * 4245 * This information can appear either in an ip6i_t or an IPSEC_OUT 4246 * message. 4247 */ 4248 ip6h = (ip6_t *)mp->b_rptr; 4249 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4250 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4251 if (!ip6i_present || 4252 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4253 attach_ill = ip_grab_attach_ill(ill, first_mp, 4254 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4255 io->ipsec_out_ill_index), B_TRUE); 4256 /* Failure case frees things for us. */ 4257 if (attach_ill == NULL) 4258 return; 4259 4260 /* 4261 * Check if we need an ire that will not be 4262 * looked up by anybody else i.e. HIDDEN. 4263 */ 4264 if (ill_is_probeonly(attach_ill)) 4265 ire_marks = IRE_MARK_HIDDEN; 4266 } 4267 } 4268 4269 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4270 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4271 goto icmp_err_ret; 4272 } else if (IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4273 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4274 goto icmp_err_ret; 4275 } 4276 4277 /* 4278 * If this IRE is created for forwarding or it is not for 4279 * TCP traffic, mark it as temporary. 4280 * 4281 * Is it sufficient just to check the next header?? 4282 */ 4283 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4284 ire_marks |= IRE_MARK_TEMPORARY; 4285 4286 /* 4287 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4288 * chain until it gets the most specific information available. 4289 * For example, we know that there is no IRE_CACHE for this dest, 4290 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4291 * ire_ftable_lookup_v6 will look up the gateway, etc. 4292 */ 4293 4294 if (ill == NULL) { 4295 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4296 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE; 4297 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4298 NULL, &sire, zoneid, 0, match_flags); 4299 /* 4300 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4301 * in a NULL ill, but the packet could be a neighbor 4302 * solicitation/advertisment and could have a valid attach_ill. 4303 */ 4304 if (attach_ill != NULL) 4305 ill_refrele(attach_ill); 4306 } else { 4307 if (attach_ill != NULL) { 4308 /* 4309 * attach_ill is set only for communicating with 4310 * on-link hosts. So, don't look for DEFAULT. 4311 * ip_wput_v6 passes the right ill in this case and 4312 * hence we can assert. 4313 */ 4314 ASSERT(ill == attach_ill); 4315 ill_refrele(attach_ill); 4316 do_attach_ill = B_TRUE; 4317 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4318 } else { 4319 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4320 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4321 } 4322 match_flags |= MATCH_IRE_PARENT; 4323 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, ill->ill_ipif, 4324 &sire, zoneid, 0, match_flags); 4325 } 4326 4327 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4328 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4329 4330 if (zoneid == ALL_ZONES && ire != NULL) { 4331 /* 4332 * In the forwarding case, we can use a route from any zone 4333 * since we won't change the source address. We can easily 4334 * assert that the source address is already set when there's no 4335 * ip6_info header - otherwise we'd have to call pullupmsg(). 4336 */ 4337 ASSERT(ip6i_present || 4338 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4339 zoneid = ire->ire_zoneid; 4340 } 4341 4342 /* 4343 * We enter a loop that will be run only once in most cases. 4344 * The loop is re-entered in the case where the destination 4345 * can be reached through multiple RTF_MULTIRT-flagged routes. 4346 * The intention is to compute multiple routes to a single 4347 * destination in a single ip_newroute_v6 call. 4348 * The information is contained in sire->ire_flags. 4349 */ 4350 do { 4351 multirt_resolve_next = B_FALSE; 4352 4353 if (dst_ill != NULL) { 4354 ill_refrele(dst_ill); 4355 dst_ill = NULL; 4356 } 4357 if (src_ipif != NULL) { 4358 ipif_refrele(src_ipif); 4359 src_ipif = NULL; 4360 } 4361 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4362 ip3dbg(("ip_newroute_v6: starting new resolution " 4363 "with first_mp %p, tag %d\n", 4364 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4365 4366 /* 4367 * We check if there are trailing unresolved routes for 4368 * the destination contained in sire. 4369 */ 4370 multirt_is_resolvable = 4371 ire_multirt_lookup_v6(&ire, &sire, multirt_flags); 4372 4373 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4374 "ire %p, sire %p\n", 4375 multirt_is_resolvable, (void *)ire, (void *)sire)); 4376 4377 if (!multirt_is_resolvable) { 4378 /* 4379 * No more multirt routes to resolve; give up 4380 * (all routes resolved or no more resolvable 4381 * routes). 4382 */ 4383 if (ire != NULL) { 4384 ire_refrele(ire); 4385 ire = NULL; 4386 } 4387 } else { 4388 ASSERT(sire != NULL); 4389 ASSERT(ire != NULL); 4390 /* 4391 * We simply use first_sire as a flag that 4392 * indicates if a resolvable multirt route has 4393 * already been found during the preceding 4394 * loops. If it is not the case, we may have 4395 * to send an ICMP error to report that the 4396 * destination is unreachable. We do not 4397 * IRE_REFHOLD first_sire. 4398 */ 4399 if (first_sire == NULL) { 4400 first_sire = sire; 4401 } 4402 } 4403 } 4404 if ((ire == NULL) || (ire == sire)) { 4405 /* 4406 * either ire == NULL (the destination cannot be 4407 * resolved) or ire == sire (the gateway cannot be 4408 * resolved). At this point, there are no more routes 4409 * to resolve for the destination, thus we exit. 4410 */ 4411 if (ip_debug > 3) { 4412 /* ip2dbg */ 4413 pr_addr_dbg("ip_newroute_v6: " 4414 "can't resolve %s\n", AF_INET6, v6dstp); 4415 } 4416 ip3dbg(("ip_newroute_v6: " 4417 "ire %p, sire %p, first_sire %p\n", 4418 (void *)ire, (void *)sire, (void *)first_sire)); 4419 4420 if (sire != NULL) { 4421 ire_refrele(sire); 4422 sire = NULL; 4423 } 4424 4425 if (first_sire != NULL) { 4426 /* 4427 * At least one multirt route has been found 4428 * in the same ip_newroute() call; there is no 4429 * need to report an ICMP error. 4430 * first_sire was not IRE_REFHOLDed. 4431 */ 4432 MULTIRT_DEBUG_UNTAG(first_mp); 4433 freemsg(first_mp); 4434 return; 4435 } 4436 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4437 RTA_DST); 4438 goto icmp_err_ret; 4439 } 4440 4441 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4442 4443 /* 4444 * Verify that the returned IRE does not have either the 4445 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4446 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4447 */ 4448 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4449 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4450 goto icmp_err_ret; 4451 4452 /* 4453 * Increment the ire_ob_pkt_count field for ire if it is an 4454 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4455 * increment the same for the parent IRE, sire, if it is some 4456 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4457 * and HOST_REDIRECT). 4458 */ 4459 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4460 UPDATE_OB_PKT_COUNT(ire); 4461 ire->ire_last_used_time = lbolt; 4462 } 4463 4464 if (sire != NULL) { 4465 mutex_enter(&sire->ire_lock); 4466 v6gw = sire->ire_gateway_addr_v6; 4467 mutex_exit(&sire->ire_lock); 4468 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4469 IRE_INTERFACE)) == 0); 4470 UPDATE_OB_PKT_COUNT(sire); 4471 sire->ire_last_used_time = lbolt; 4472 } else { 4473 v6gw = ipv6_all_zeros; 4474 } 4475 4476 /* 4477 * We have a route to reach the destination. 4478 * 4479 * 1) If the interface is part of ill group, try to get a new 4480 * ill taking load spreading into account. 4481 * 4482 * 2) After selecting the ill, get a source address that might 4483 * create good inbound load spreading and that matches the 4484 * right scope. ipif_select_source_v6 does this for us. 4485 * 4486 * If the application specified the ill (ifindex), we still 4487 * load spread. Only if the packets needs to go out specifically 4488 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4489 * IPV6_BOUND_PIF we don't try to use a different ill for load 4490 * spreading. 4491 */ 4492 if (!do_attach_ill) { 4493 /* 4494 * If the interface belongs to an interface group, 4495 * make sure the next possible interface in the group 4496 * is used. This encourages load spreading among 4497 * peers in an interface group. However, in the case 4498 * of multirouting, load spreading is not used, as we 4499 * actually want to replicate outgoing packets through 4500 * particular interfaces. 4501 * 4502 * Note: While we pick a dst_ill we are really only 4503 * interested in the ill for load spreading. 4504 * The source ipif is determined by source address 4505 * selection below. 4506 */ 4507 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4508 dst_ill = ire->ire_ipif->ipif_ill; 4509 /* For uniformity do a refhold */ 4510 ill_refhold(dst_ill); 4511 } else { 4512 /* 4513 * If we are here trying to create an IRE_CACHE 4514 * for an offlink destination and have the 4515 * IRE_CACHE for the next hop and the latter is 4516 * using virtual IP source address selection i.e 4517 * it's ire->ire_ipif is pointing to a virtual 4518 * network interface (vni) then 4519 * ip_newroute_get_dst_ll() will return the vni 4520 * interface as the dst_ill. Since the vni is 4521 * virtual i.e not associated with any physical 4522 * interface, it cannot be the dst_ill, hence 4523 * in such a case call ip_newroute_get_dst_ll() 4524 * with the stq_ill instead of the ire_ipif ILL. 4525 * The function returns a refheld ill. 4526 */ 4527 if ((ire->ire_type == IRE_CACHE) && 4528 IS_VNI(ire->ire_ipif->ipif_ill)) 4529 dst_ill = ip_newroute_get_dst_ill_v6( 4530 ire->ire_stq->q_ptr); 4531 else 4532 dst_ill = ip_newroute_get_dst_ill_v6( 4533 ire->ire_ipif->ipif_ill); 4534 } 4535 if (dst_ill == NULL) { 4536 if (ip_debug > 2) { 4537 pr_addr_dbg("ip_newroute_v6 : no dst " 4538 "ill for dst %s\n", 4539 AF_INET6, v6dstp); 4540 } 4541 goto icmp_err_ret; 4542 } else if (dst_ill->ill_group == NULL && ill != NULL && 4543 dst_ill != ill) { 4544 /* 4545 * If "ill" is not part of any group, we should 4546 * have found a route matching "ill" as we 4547 * called ire_ftable_lookup_v6 with 4548 * MATCH_IRE_ILL_GROUP. 4549 * Rather than asserting when there is a 4550 * mismatch, we just drop the packet. 4551 */ 4552 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4553 "dst_ill %s ill %s\n", 4554 dst_ill->ill_name, 4555 ill->ill_name)); 4556 goto icmp_err_ret; 4557 } 4558 } else { 4559 dst_ill = ire->ire_ipif->ipif_ill; 4560 /* For uniformity do refhold */ 4561 ill_refhold(dst_ill); 4562 /* 4563 * We should have found a route matching ill as we 4564 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4565 * Rather than asserting, while there is a mismatch, 4566 * we just drop the packet. 4567 */ 4568 if (dst_ill != ill) { 4569 ip0dbg(("ip_newroute_v6: Packet dropped as " 4570 "IP6I_ATTACH_IF ill is %s, " 4571 "ire->ire_ipif->ipif_ill is %s\n", 4572 ill->ill_name, 4573 dst_ill->ill_name)); 4574 goto icmp_err_ret; 4575 } 4576 } 4577 /* 4578 * Pick a source address which matches the scope of the 4579 * destination address. 4580 * For RTF_SETSRC routes, the source address is imposed by the 4581 * parent ire (sire). 4582 */ 4583 ASSERT(src_ipif == NULL); 4584 if (ire->ire_type == IRE_IF_RESOLVER && 4585 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4586 ip6_asp_can_lookup()) { 4587 /* 4588 * The ire cache entry we're adding is for the 4589 * gateway itself. The source address in this case 4590 * is relative to the gateway's address. 4591 */ 4592 ip6_asp_table_held = B_TRUE; 4593 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4594 B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid); 4595 if (src_ipif != NULL) 4596 ire_marks |= IRE_MARK_USESRC_CHECK; 4597 } else { 4598 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4599 /* 4600 * Check that the ipif matching the requested 4601 * source address still exists. 4602 */ 4603 src_ipif = ipif_lookup_addr_v6( 4604 &sire->ire_src_addr_v6, NULL, zoneid, 4605 NULL, NULL, NULL, NULL); 4606 } 4607 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4608 ip6_asp_table_held = B_TRUE; 4609 src_ipif = ipif_select_source_v6(dst_ill, 4610 v6dstp, B_FALSE, IPV6_PREFER_SRC_DEFAULT, 4611 zoneid); 4612 if (src_ipif != NULL) 4613 ire_marks |= IRE_MARK_USESRC_CHECK; 4614 } 4615 } 4616 4617 if (src_ipif == NULL) { 4618 if (ip_debug > 2) { 4619 /* ip1dbg */ 4620 pr_addr_dbg("ip_newroute_v6: no src for " 4621 "dst %s\n, ", AF_INET6, v6dstp); 4622 printf("ip_newroute_v6: interface name %s\n", 4623 dst_ill->ill_name); 4624 } 4625 goto icmp_err_ret; 4626 } 4627 4628 if (ip_debug > 3) { 4629 /* ip2dbg */ 4630 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4631 AF_INET6, &v6gw); 4632 } 4633 ip2dbg(("\tire type %s (%d)\n", 4634 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4635 4636 /* 4637 * At this point in ip_newroute_v6(), ire is either the 4638 * IRE_CACHE of the next-hop gateway for an off-subnet 4639 * destination or an IRE_INTERFACE type that should be used 4640 * to resolve an on-subnet destination or an on-subnet 4641 * next-hop gateway. 4642 * 4643 * In the IRE_CACHE case, we have the following : 4644 * 4645 * 1) src_ipif - used for getting a source address. 4646 * 4647 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4648 * means packets using this IRE_CACHE will go out on dst_ill. 4649 * 4650 * 3) The IRE sire will point to the prefix that is the longest 4651 * matching route for the destination. These prefix types 4652 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST, and 4653 * IRE_HOST_REDIRECT. 4654 * 4655 * The newly created IRE_CACHE entry for the off-subnet 4656 * destination is tied to both the prefix route and the 4657 * interface route used to resolve the next-hop gateway 4658 * via the ire_phandle and ire_ihandle fields, respectively. 4659 * 4660 * In the IRE_INTERFACE case, we have the following : 4661 * 4662 * 1) src_ipif - used for getting a source address. 4663 * 4664 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4665 * means packets using the IRE_CACHE that we will build 4666 * here will go out on dst_ill. 4667 * 4668 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4669 * to be created will only be tied to the IRE_INTERFACE that 4670 * was derived from the ire_ihandle field. 4671 * 4672 * If sire is non-NULL, it means the destination is off-link 4673 * and we will first create the IRE_CACHE for the gateway. 4674 * Next time through ip_newroute_v6, we will create the 4675 * IRE_CACHE for the final destination as described above. 4676 */ 4677 save_ire = ire; 4678 switch (ire->ire_type) { 4679 case IRE_CACHE: { 4680 ire_t *ipif_ire; 4681 4682 ASSERT(sire != NULL); 4683 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4684 mutex_enter(&ire->ire_lock); 4685 v6gw = ire->ire_gateway_addr_v6; 4686 mutex_exit(&ire->ire_lock); 4687 } 4688 /* 4689 * We need 3 ire's to create a new cache ire for an 4690 * off-link destination from the cache ire of the 4691 * gateway. 4692 * 4693 * 1. The prefix ire 'sire' 4694 * 2. The cache ire of the gateway 'ire' 4695 * 3. The interface ire 'ipif_ire' 4696 * 4697 * We have (1) and (2). We lookup (3) below. 4698 * 4699 * If there is no interface route to the gateway, 4700 * it is a race condition, where we found the cache 4701 * but the inteface route has been deleted. 4702 */ 4703 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4704 if (ipif_ire == NULL) { 4705 ip1dbg(("ip_newroute_v6:" 4706 "ire_ihandle_lookup_offlink_v6 failed\n")); 4707 goto icmp_err_ret; 4708 } 4709 /* 4710 * Assume DL_UNITDATA_REQ is same for all physical 4711 * interfaces in the ifgrp. If it isn't, this code will 4712 * have to be seriously rewhacked to allow the 4713 * fastpath probing (such that I cache the link 4714 * header in the IRE_CACHE) to work over ifgrps. 4715 * We have what we need to build an IRE_CACHE. 4716 */ 4717 /* 4718 * Note: the new ire inherits RTF_SETSRC 4719 * and RTF_MULTIRT to propagate these flags from prefix 4720 * to cache. 4721 */ 4722 ire = ire_create_v6( 4723 v6dstp, /* dest address */ 4724 &ipv6_all_ones, /* mask */ 4725 &src_ipif->ipif_v6src_addr, /* source address */ 4726 &v6gw, /* gateway address */ 4727 &save_ire->ire_max_frag, 4728 NULL, /* Fast Path header */ 4729 dst_ill->ill_rq, /* recv-from queue */ 4730 dst_ill->ill_wq, /* send-to queue */ 4731 IRE_CACHE, 4732 NULL, 4733 src_ipif, 4734 &sire->ire_mask_v6, /* Parent mask */ 4735 sire->ire_phandle, /* Parent handle */ 4736 ipif_ire->ire_ihandle, /* Interface handle */ 4737 sire->ire_flags & /* flags if any */ 4738 (RTF_SETSRC | RTF_MULTIRT), 4739 &(sire->ire_uinfo)); 4740 4741 if (ire == NULL) { 4742 ire_refrele(save_ire); 4743 ire_refrele(ipif_ire); 4744 break; 4745 } 4746 ire->ire_marks |= ire_marks; 4747 4748 /* 4749 * Prevent sire and ipif_ire from getting deleted. The 4750 * newly created ire is tied to both of them via the 4751 * phandle and ihandle respectively. 4752 */ 4753 IRB_REFHOLD(sire->ire_bucket); 4754 /* Has it been removed already ? */ 4755 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4756 IRB_REFRELE(sire->ire_bucket); 4757 ire_refrele(ipif_ire); 4758 ire_refrele(save_ire); 4759 break; 4760 } 4761 4762 IRB_REFHOLD(ipif_ire->ire_bucket); 4763 /* Has it been removed already ? */ 4764 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4765 IRB_REFRELE(ipif_ire->ire_bucket); 4766 IRB_REFRELE(sire->ire_bucket); 4767 ire_refrele(ipif_ire); 4768 ire_refrele(save_ire); 4769 break; 4770 } 4771 4772 xmit_mp = first_mp; 4773 if (ire->ire_flags & RTF_MULTIRT) { 4774 copy_mp = copymsg(first_mp); 4775 if (copy_mp != NULL) { 4776 xmit_mp = copy_mp; 4777 MULTIRT_DEBUG_TAG(first_mp); 4778 } 4779 } 4780 ire_add_then_send(q, ire, xmit_mp); 4781 if (ip6_asp_table_held) { 4782 ip6_asp_table_refrele(); 4783 ip6_asp_table_held = B_FALSE; 4784 } 4785 ire_refrele(save_ire); 4786 4787 /* Assert that sire is not deleted yet. */ 4788 ASSERT(sire->ire_ptpn != NULL); 4789 IRB_REFRELE(sire->ire_bucket); 4790 4791 /* Assert that ipif_ire is not deleted yet. */ 4792 ASSERT(ipif_ire->ire_ptpn != NULL); 4793 IRB_REFRELE(ipif_ire->ire_bucket); 4794 ire_refrele(ipif_ire); 4795 4796 if (copy_mp != NULL) { 4797 /* 4798 * Search for the next unresolved 4799 * multirt route. 4800 */ 4801 copy_mp = NULL; 4802 ipif_ire = NULL; 4803 ire = NULL; 4804 /* re-enter the loop */ 4805 multirt_resolve_next = B_TRUE; 4806 continue; 4807 } 4808 ire_refrele(sire); 4809 ill_refrele(dst_ill); 4810 ipif_refrele(src_ipif); 4811 return; 4812 } 4813 case IRE_IF_NORESOLVER: 4814 /* 4815 * We have what we need to build an IRE_CACHE. 4816 * 4817 * Create a new dlureq_mp with the IPv6 gateway 4818 * address in destination address in the DLPI hdr 4819 * if the physical length is exactly 16 bytes. 4820 */ 4821 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 4822 const in6_addr_t *addr; 4823 4824 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4825 addr = &v6gw; 4826 else 4827 addr = v6dstp; 4828 4829 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 4830 dst_ill->ill_phys_addr_length, 4831 dst_ill->ill_sap, 4832 dst_ill->ill_sap_length); 4833 } else { 4834 dlureq_mp = ire->ire_dlureq_mp; 4835 } 4836 if (dlureq_mp == NULL) 4837 break; 4838 4839 /* 4840 * Note: the new ire inherits sire flags RTF_SETSRC 4841 * and RTF_MULTIRT to propagate those rules from prefix 4842 * to cache. 4843 */ 4844 ire = ire_create_v6( 4845 v6dstp, /* dest address */ 4846 &ipv6_all_ones, /* mask */ 4847 &src_ipif->ipif_v6src_addr, /* source address */ 4848 &v6gw, /* gateway address */ 4849 &save_ire->ire_max_frag, 4850 NULL, /* Fast Path header */ 4851 dst_ill->ill_rq, /* recv-from queue */ 4852 dst_ill->ill_wq, /* send-to queue */ 4853 IRE_CACHE, 4854 dlureq_mp, 4855 src_ipif, 4856 &save_ire->ire_mask_v6, /* Parent mask */ 4857 (sire != NULL) ? /* Parent handle */ 4858 sire->ire_phandle : 0, 4859 save_ire->ire_ihandle, /* Interface handle */ 4860 (sire != NULL) ? /* flags if any */ 4861 sire->ire_flags & 4862 (RTF_SETSRC | RTF_MULTIRT) : 0, 4863 &(save_ire->ire_uinfo)); 4864 4865 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 4866 freeb(dlureq_mp); 4867 4868 if (ire == NULL) { 4869 ire_refrele(save_ire); 4870 break; 4871 } 4872 4873 ire->ire_marks |= ire_marks; 4874 4875 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4876 dst = v6gw; 4877 else 4878 dst = *v6dstp; 4879 err = ndp_noresolver(dst_ill, &dst); 4880 if (err != 0) { 4881 ire_refrele(save_ire); 4882 break; 4883 } 4884 4885 /* Prevent save_ire from getting deleted */ 4886 IRB_REFHOLD(save_ire->ire_bucket); 4887 /* Has it been removed already ? */ 4888 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 4889 IRB_REFRELE(save_ire->ire_bucket); 4890 ire_refrele(save_ire); 4891 break; 4892 } 4893 4894 xmit_mp = first_mp; 4895 /* 4896 * In case of MULTIRT, a copy of the current packet 4897 * to send is made to further re-enter the 4898 * loop and attempt another route resolution 4899 */ 4900 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4901 copy_mp = copymsg(first_mp); 4902 if (copy_mp != NULL) { 4903 xmit_mp = copy_mp; 4904 MULTIRT_DEBUG_TAG(first_mp); 4905 } 4906 } 4907 ire_add_then_send(q, ire, xmit_mp); 4908 if (ip6_asp_table_held) { 4909 ip6_asp_table_refrele(); 4910 ip6_asp_table_held = B_FALSE; 4911 } 4912 4913 /* Assert that it is not deleted yet. */ 4914 ASSERT(save_ire->ire_ptpn != NULL); 4915 IRB_REFRELE(save_ire->ire_bucket); 4916 ire_refrele(save_ire); 4917 4918 if (copy_mp != NULL) { 4919 /* 4920 * If we found a (no)resolver, we ignore any 4921 * trailing top priority IRE_CACHE in 4922 * further loops. This ensures that we do not 4923 * omit any (no)resolver despite the priority 4924 * in this call. 4925 * IRE_CACHE, if any, will be processed 4926 * by another thread entering ip_newroute(), 4927 * (on resolver response, for example). 4928 * We use this to force multiple parallel 4929 * resolution as soon as a packet needs to be 4930 * sent. The result is, after one packet 4931 * emission all reachable routes are generally 4932 * resolved. 4933 * Otherwise, complete resolution of MULTIRT 4934 * routes would require several emissions as 4935 * side effect. 4936 */ 4937 multirt_flags &= ~MULTIRT_CACHEGW; 4938 4939 /* 4940 * Search for the next unresolved multirt 4941 * route. 4942 */ 4943 copy_mp = NULL; 4944 save_ire = NULL; 4945 ire = NULL; 4946 /* re-enter the loop */ 4947 multirt_resolve_next = B_TRUE; 4948 continue; 4949 } 4950 4951 /* Don't need sire anymore */ 4952 if (sire != NULL) 4953 ire_refrele(sire); 4954 ill_refrele(dst_ill); 4955 ipif_refrele(src_ipif); 4956 return; 4957 4958 case IRE_IF_RESOLVER: 4959 /* 4960 * We can't build an IRE_CACHE yet, but at least we 4961 * found a resolver that can help. 4962 */ 4963 dst = *v6dstp; 4964 /* 4965 * To be at this point in the code with a non-zero gw 4966 * means that dst is reachable through a gateway that 4967 * we have never resolved. By changing dst to the gw 4968 * addr we resolve the gateway first. When 4969 * ire_add_then_send() tries to put the IP dg to dst, 4970 * it will reenter ip_newroute() at which time we will 4971 * find the IRE_CACHE for the gw and create another 4972 * IRE_CACHE above (for dst itself). 4973 */ 4974 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4975 save_dst = dst; 4976 dst = v6gw; 4977 v6gw = ipv6_all_zeros; 4978 } 4979 if (dst_ill->ill_flags & ILLF_XRESOLV) { 4980 /* 4981 * Ask the external resolver to do its thing. 4982 * Make an mblk chain in the following form: 4983 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 4984 */ 4985 mblk_t *ire_mp; 4986 mblk_t *areq_mp; 4987 areq_t *areq; 4988 in6_addr_t *addrp; 4989 4990 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 4991 if (ip6_asp_table_held) { 4992 ip6_asp_table_refrele(); 4993 ip6_asp_table_held = B_FALSE; 4994 } 4995 ire = ire_create_mp_v6( 4996 &dst, /* dest address */ 4997 &ipv6_all_ones, /* mask */ 4998 &src_ipif->ipif_v6src_addr, 4999 /* source address */ 5000 &v6gw, /* gateway address */ 5001 NULL, /* Fast Path header */ 5002 dst_ill->ill_rq, /* recv-from queue */ 5003 dst_ill->ill_wq, /* send-to queue */ 5004 IRE_CACHE, 5005 NULL, 5006 src_ipif, 5007 &save_ire->ire_mask_v6, 5008 /* Parent mask */ 5009 0, 5010 save_ire->ire_ihandle, 5011 /* Interface handle */ 5012 0, /* flags if any */ 5013 &(save_ire->ire_uinfo)); 5014 5015 ire_refrele(save_ire); 5016 if (ire == NULL) { 5017 ip1dbg(("ip_newroute_v6:" 5018 "ire is NULL\n")); 5019 break; 5020 } 5021 if ((sire != NULL) && 5022 (sire->ire_flags & RTF_MULTIRT)) { 5023 /* 5024 * processing a copy of the packet to 5025 * send for further resolution loops 5026 */ 5027 copy_mp = copymsg(first_mp); 5028 if (copy_mp != NULL) 5029 MULTIRT_DEBUG_TAG(copy_mp); 5030 } 5031 ire->ire_marks |= ire_marks; 5032 ire_mp = ire->ire_mp; 5033 /* 5034 * Now create or find an nce for this interface. 5035 * The hw addr will need to to be set from 5036 * the reply to the AR_ENTRY_QUERY that 5037 * we're about to send. This will be done in 5038 * ire_add_v6(). 5039 */ 5040 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5041 switch (err) { 5042 case 0: 5043 /* 5044 * New cache entry created. 5045 * Break, then ask the external 5046 * resolver. 5047 */ 5048 break; 5049 case EINPROGRESS: 5050 /* 5051 * Resolution in progress; 5052 * packet has been queued by 5053 * ndp_resolver(). 5054 */ 5055 ire_delete(ire); 5056 ire = NULL; 5057 /* 5058 * Check if another multirt 5059 * route must be resolved. 5060 */ 5061 if (copy_mp != NULL) { 5062 /* 5063 * If we found a resolver, we 5064 * ignore any trailing top 5065 * priority IRE_CACHE in 5066 * further loops. The reason is 5067 * the same as for noresolver. 5068 */ 5069 multirt_flags &= 5070 ~MULTIRT_CACHEGW; 5071 /* 5072 * Search for the next 5073 * unresolved multirt route. 5074 */ 5075 first_mp = copy_mp; 5076 copy_mp = NULL; 5077 mp = first_mp; 5078 if (mp->b_datap->db_type == 5079 M_CTL) { 5080 mp = mp->b_cont; 5081 } 5082 ASSERT(sire != NULL); 5083 dst = save_dst; 5084 /* 5085 * re-enter the loop 5086 */ 5087 multirt_resolve_next = 5088 B_TRUE; 5089 continue; 5090 } 5091 5092 if (sire != NULL) 5093 ire_refrele(sire); 5094 ill_refrele(dst_ill); 5095 ipif_refrele(src_ipif); 5096 return; 5097 default: 5098 /* 5099 * Transient error; packet will be 5100 * freed. 5101 */ 5102 ire_delete(ire); 5103 ire = NULL; 5104 break; 5105 } 5106 if (err != 0) 5107 break; 5108 /* 5109 * Now set up the AR_ENTRY_QUERY and send it. 5110 */ 5111 areq_mp = ill_arp_alloc(dst_ill, 5112 (uchar_t *)&ipv6_areq_template, 5113 (caddr_t)&dst); 5114 if (areq_mp == NULL) { 5115 ip1dbg(("ip_newroute_v6:" 5116 "areq_mp is NULL\n")); 5117 freemsg(ire_mp); 5118 break; 5119 } 5120 areq = (areq_t *)areq_mp->b_rptr; 5121 addrp = (in6_addr_t *)((char *)areq + 5122 areq->areq_target_addr_offset); 5123 *addrp = dst; 5124 addrp = (in6_addr_t *)((char *)areq + 5125 areq->areq_sender_addr_offset); 5126 *addrp = src_ipif->ipif_v6src_addr; 5127 /* 5128 * link the chain, then send up to the resolver. 5129 */ 5130 linkb(areq_mp, ire_mp); 5131 linkb(areq_mp, mp); 5132 ip1dbg(("ip_newroute_v6:" 5133 "putnext to resolver\n")); 5134 putnext(dst_ill->ill_rq, areq_mp); 5135 /* 5136 * Check if another multirt route 5137 * must be resolved. 5138 */ 5139 ire = NULL; 5140 if (copy_mp != NULL) { 5141 /* 5142 * If we find a resolver, we ignore any 5143 * trailing top priority IRE_CACHE in 5144 * further loops. The reason is the 5145 * same as for noresolver. 5146 */ 5147 multirt_flags &= ~MULTIRT_CACHEGW; 5148 /* 5149 * Search for the next unresolved 5150 * multirt route. 5151 */ 5152 first_mp = copy_mp; 5153 copy_mp = NULL; 5154 mp = first_mp; 5155 if (mp->b_datap->db_type == M_CTL) { 5156 mp = mp->b_cont; 5157 } 5158 ASSERT(sire != NULL); 5159 dst = save_dst; 5160 /* 5161 * re-enter the loop 5162 */ 5163 multirt_resolve_next = B_TRUE; 5164 continue; 5165 } 5166 5167 if (sire != NULL) 5168 ire_refrele(sire); 5169 ill_refrele(dst_ill); 5170 ipif_refrele(src_ipif); 5171 return; 5172 } 5173 /* 5174 * Non-external resolver case. 5175 */ 5176 ire = ire_create_v6( 5177 &dst, /* dest address */ 5178 &ipv6_all_ones, /* mask */ 5179 &src_ipif->ipif_v6src_addr, /* source address */ 5180 &v6gw, /* gateway address */ 5181 &save_ire->ire_max_frag, 5182 NULL, /* Fast Path header */ 5183 dst_ill->ill_rq, /* recv-from queue */ 5184 dst_ill->ill_wq, /* send-to queue */ 5185 IRE_CACHE, 5186 NULL, 5187 src_ipif, 5188 &save_ire->ire_mask_v6, /* Parent mask */ 5189 0, 5190 save_ire->ire_ihandle, /* Interface handle */ 5191 0, /* flags if any */ 5192 &(save_ire->ire_uinfo)); 5193 5194 if (ire == NULL) { 5195 ire_refrele(save_ire); 5196 break; 5197 } 5198 5199 if ((sire != NULL) && 5200 (sire->ire_flags & RTF_MULTIRT)) { 5201 copy_mp = copymsg(first_mp); 5202 if (copy_mp != NULL) 5203 MULTIRT_DEBUG_TAG(copy_mp); 5204 } 5205 5206 ire->ire_marks |= ire_marks; 5207 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5208 switch (err) { 5209 case 0: 5210 /* Prevent save_ire from getting deleted */ 5211 IRB_REFHOLD(save_ire->ire_bucket); 5212 /* Has it been removed already ? */ 5213 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5214 IRB_REFRELE(save_ire->ire_bucket); 5215 ire_refrele(save_ire); 5216 break; 5217 } 5218 5219 /* 5220 * We have a resolved cache entry, 5221 * add in the IRE. 5222 */ 5223 ire_add_then_send(q, ire, first_mp); 5224 if (ip6_asp_table_held) { 5225 ip6_asp_table_refrele(); 5226 ip6_asp_table_held = B_FALSE; 5227 } 5228 5229 /* Assert that it is not deleted yet. */ 5230 ASSERT(save_ire->ire_ptpn != NULL); 5231 IRB_REFRELE(save_ire->ire_bucket); 5232 ire_refrele(save_ire); 5233 /* 5234 * Check if another multirt route 5235 * must be resolved. 5236 */ 5237 ire = NULL; 5238 if (copy_mp != NULL) { 5239 /* 5240 * If we find a resolver, we ignore any 5241 * trailing top priority IRE_CACHE in 5242 * further loops. The reason is the 5243 * same as for noresolver. 5244 */ 5245 multirt_flags &= ~MULTIRT_CACHEGW; 5246 /* 5247 * Search for the next unresolved 5248 * multirt route. 5249 */ 5250 first_mp = copy_mp; 5251 copy_mp = NULL; 5252 mp = first_mp; 5253 if (mp->b_datap->db_type == M_CTL) { 5254 mp = mp->b_cont; 5255 } 5256 ASSERT(sire != NULL); 5257 dst = save_dst; 5258 /* 5259 * re-enter the loop 5260 */ 5261 multirt_resolve_next = B_TRUE; 5262 continue; 5263 } 5264 5265 if (sire != NULL) 5266 ire_refrele(sire); 5267 ill_refrele(dst_ill); 5268 ipif_refrele(src_ipif); 5269 return; 5270 5271 case EINPROGRESS: 5272 /* 5273 * mp was consumed - presumably queued. 5274 * No need for ire, presumably resolution is 5275 * in progress, and ire will be added when the 5276 * address is resolved. 5277 */ 5278 if (ip6_asp_table_held) { 5279 ip6_asp_table_refrele(); 5280 ip6_asp_table_held = B_FALSE; 5281 } 5282 ASSERT(ire->ire_nce == NULL); 5283 ire_delete(ire); 5284 ire_refrele(save_ire); 5285 /* 5286 * Check if another multirt route 5287 * must be resolved. 5288 */ 5289 ire = NULL; 5290 if (copy_mp != NULL) { 5291 /* 5292 * If we find a resolver, we ignore any 5293 * trailing top priority IRE_CACHE in 5294 * further loops. The reason is the 5295 * same as for noresolver. 5296 */ 5297 multirt_flags &= ~MULTIRT_CACHEGW; 5298 /* 5299 * Search for the next unresolved 5300 * multirt route. 5301 */ 5302 first_mp = copy_mp; 5303 copy_mp = NULL; 5304 mp = first_mp; 5305 if (mp->b_datap->db_type == M_CTL) { 5306 mp = mp->b_cont; 5307 } 5308 ASSERT(sire != NULL); 5309 dst = save_dst; 5310 /* 5311 * re-enter the loop 5312 */ 5313 multirt_resolve_next = B_TRUE; 5314 continue; 5315 } 5316 if (sire != NULL) 5317 ire_refrele(sire); 5318 ill_refrele(dst_ill); 5319 ipif_refrele(src_ipif); 5320 return; 5321 default: 5322 /* Some transient error */ 5323 ASSERT(ire->ire_nce == NULL); 5324 ire_refrele(save_ire); 5325 break; 5326 } 5327 break; 5328 default: 5329 break; 5330 } 5331 if (ip6_asp_table_held) { 5332 ip6_asp_table_refrele(); 5333 ip6_asp_table_held = B_FALSE; 5334 } 5335 } while (multirt_resolve_next); 5336 5337 err_ret: 5338 ip1dbg(("ip_newroute_v6: dropped\n")); 5339 if (src_ipif != NULL) 5340 ipif_refrele(src_ipif); 5341 if (dst_ill != NULL) { 5342 need_rele = B_TRUE; 5343 ill = dst_ill; 5344 } 5345 if (ill != NULL) { 5346 if (mp->b_prev != NULL) { 5347 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 5348 } else { 5349 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 5350 } 5351 5352 if (need_rele) 5353 ill_refrele(ill); 5354 } else { 5355 if (mp->b_prev != NULL) { 5356 BUMP_MIB(&ip6_mib, ipv6InDiscards); 5357 } else { 5358 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 5359 } 5360 } 5361 /* Did this packet originate externally? */ 5362 if (mp->b_prev) { 5363 mp->b_next = NULL; 5364 mp->b_prev = NULL; 5365 } 5366 if (copy_mp != NULL) { 5367 MULTIRT_DEBUG_UNTAG(copy_mp); 5368 freemsg(copy_mp); 5369 } 5370 MULTIRT_DEBUG_UNTAG(first_mp); 5371 freemsg(first_mp); 5372 if (ire != NULL) 5373 ire_refrele(ire); 5374 if (sire != NULL) 5375 ire_refrele(sire); 5376 return; 5377 5378 icmp_err_ret: 5379 if (ip6_asp_table_held) 5380 ip6_asp_table_refrele(); 5381 if (src_ipif != NULL) 5382 ipif_refrele(src_ipif); 5383 if (dst_ill != NULL) { 5384 need_rele = B_TRUE; 5385 ill = dst_ill; 5386 } 5387 ip1dbg(("ip_newroute_v6: no route\n")); 5388 if (sire != NULL) 5389 ire_refrele(sire); 5390 /* 5391 * We need to set sire to NULL to avoid double freeing if we 5392 * ever goto err_ret from below. 5393 */ 5394 sire = NULL; 5395 ip6h = (ip6_t *)mp->b_rptr; 5396 /* Skip ip6i_t header if present */ 5397 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5398 /* Make sure the IPv6 header is present */ 5399 if ((mp->b_wptr - (uchar_t *)ip6h) < 5400 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5401 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5402 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5403 goto err_ret; 5404 } 5405 } 5406 mp->b_rptr += sizeof (ip6i_t); 5407 ip6h = (ip6_t *)mp->b_rptr; 5408 } 5409 /* Did this packet originate externally? */ 5410 if (mp->b_prev) { 5411 if (ill != NULL) { 5412 BUMP_MIB(ill->ill_ip6_mib, ipv6InNoRoutes); 5413 } else { 5414 BUMP_MIB(&ip6_mib, ipv6InNoRoutes); 5415 } 5416 mp->b_next = NULL; 5417 mp->b_prev = NULL; 5418 q = WR(q); 5419 } else { 5420 if (ill != NULL) { 5421 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 5422 } else { 5423 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 5424 } 5425 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5426 /* Failed */ 5427 if (copy_mp != NULL) { 5428 MULTIRT_DEBUG_UNTAG(copy_mp); 5429 freemsg(copy_mp); 5430 } 5431 MULTIRT_DEBUG_UNTAG(first_mp); 5432 freemsg(first_mp); 5433 if (ire != NULL) 5434 ire_refrele(ire); 5435 if (need_rele) 5436 ill_refrele(ill); 5437 return; 5438 } 5439 } 5440 5441 if (need_rele) 5442 ill_refrele(ill); 5443 5444 /* 5445 * At this point we will have ire only if RTF_BLACKHOLE 5446 * or RTF_REJECT flags are set on the IRE. It will not 5447 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5448 */ 5449 if (ire != NULL) { 5450 if (ire->ire_flags & RTF_BLACKHOLE) { 5451 ire_refrele(ire); 5452 if (copy_mp != NULL) { 5453 MULTIRT_DEBUG_UNTAG(copy_mp); 5454 freemsg(copy_mp); 5455 } 5456 MULTIRT_DEBUG_UNTAG(first_mp); 5457 freemsg(first_mp); 5458 return; 5459 } 5460 ire_refrele(ire); 5461 } 5462 if (ip_debug > 3) { 5463 /* ip2dbg */ 5464 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5465 AF_INET6, v6dstp); 5466 } 5467 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5468 B_FALSE, B_FALSE); 5469 } 5470 5471 /* 5472 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5473 * we need to send out a packet to a destination address for which we do not 5474 * have specific routing information. It is only used for multicast packets. 5475 * 5476 * If unspec_src we allow creating an IRE with source address zero. 5477 * ire_send_v6() will delete it after the packet is sent. 5478 */ 5479 void 5480 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5481 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5482 { 5483 ire_t *ire = NULL; 5484 ipif_t *src_ipif = NULL; 5485 int err = 0; 5486 ill_t *dst_ill = NULL; 5487 ire_t *save_ire; 5488 ushort_t ire_marks = 0; 5489 ipsec_out_t *io; 5490 ill_t *attach_ill = NULL; 5491 ill_t *ill; 5492 ip6_t *ip6h; 5493 mblk_t *first_mp; 5494 boolean_t ip6i_present; 5495 ire_t *fire = NULL; 5496 mblk_t *copy_mp = NULL; 5497 boolean_t multirt_resolve_next; 5498 in6_addr_t *v6dstp = &v6dst; 5499 boolean_t ipif_held = B_FALSE; 5500 boolean_t ill_held = B_FALSE; 5501 boolean_t ip6_asp_table_held = B_FALSE; 5502 5503 /* 5504 * This loop is run only once in most cases. 5505 * We loop to resolve further routes only when the destination 5506 * can be reached through multiple RTF_MULTIRT-flagged ires. 5507 */ 5508 do { 5509 multirt_resolve_next = B_FALSE; 5510 if (dst_ill != NULL) { 5511 ill_refrele(dst_ill); 5512 dst_ill = NULL; 5513 } 5514 5515 if (src_ipif != NULL) { 5516 ipif_refrele(src_ipif); 5517 src_ipif = NULL; 5518 } 5519 ASSERT(ipif != NULL); 5520 ill = ipif->ipif_ill; 5521 5522 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5523 if (ip_debug > 2) { 5524 /* ip1dbg */ 5525 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5526 AF_INET6, v6dstp); 5527 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5528 ill->ill_name, ipif->ipif_isv6); 5529 } 5530 5531 first_mp = mp; 5532 if (mp->b_datap->db_type == M_CTL) { 5533 mp = mp->b_cont; 5534 io = (ipsec_out_t *)first_mp->b_rptr; 5535 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5536 } else { 5537 io = NULL; 5538 } 5539 5540 /* 5541 * If the interface is a pt-pt interface we look for an 5542 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5543 * local_address and the pt-pt destination address. 5544 * Otherwise we just match the local address. 5545 */ 5546 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5547 goto err_ret; 5548 } 5549 /* 5550 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5551 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5552 * as it could be NULL. 5553 * 5554 * This information can appear either in an ip6i_t or an 5555 * IPSEC_OUT message. 5556 */ 5557 ip6h = (ip6_t *)mp->b_rptr; 5558 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5559 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5560 if (!ip6i_present || 5561 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5562 attach_ill = ip_grab_attach_ill(ill, first_mp, 5563 (ip6i_present ? 5564 ((ip6i_t *)ip6h)->ip6i_ifindex : 5565 io->ipsec_out_ill_index), B_TRUE); 5566 /* Failure case frees things for us. */ 5567 if (attach_ill == NULL) 5568 return; 5569 5570 /* 5571 * Check if we need an ire that will not be 5572 * looked up by anybody else i.e. HIDDEN. 5573 */ 5574 if (ill_is_probeonly(attach_ill)) 5575 ire_marks = IRE_MARK_HIDDEN; 5576 } 5577 } 5578 5579 /* 5580 * We check if an IRE_OFFSUBNET for the addr that goes through 5581 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5582 * RTF_MULTIRT flags must be honored. 5583 */ 5584 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5585 ip2dbg(("ip_newroute_ipif_v6: " 5586 "ipif_lookup_multi_ire_v6(" 5587 "ipif %p, dst %08x) = fire %p\n", 5588 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5589 (void *)fire)); 5590 5591 /* 5592 * If the application specified the ill (ifindex), we still 5593 * load spread. Only if the packets needs to go out specifically 5594 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5595 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5596 * multirouting, then we don't try to use a different ill for 5597 * load spreading. 5598 */ 5599 if (attach_ill == NULL) { 5600 /* 5601 * If the interface belongs to an interface group, 5602 * make sure the next possible interface in the group 5603 * is used. This encourages load spreading among peers 5604 * in an interface group. 5605 * 5606 * Note: While we pick a dst_ill we are really only 5607 * interested in the ill for load spreading. The source 5608 * ipif is determined by source address selection below. 5609 */ 5610 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5611 dst_ill = ipif->ipif_ill; 5612 /* For uniformity do a refhold */ 5613 ill_refhold(dst_ill); 5614 } else { 5615 /* refheld by ip_newroute_get_dst_ill_v6 */ 5616 dst_ill = 5617 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5618 } 5619 if (dst_ill == NULL) { 5620 if (ip_debug > 2) { 5621 pr_addr_dbg("ip_newroute_ipif_v6: " 5622 "no dst ill for dst %s\n", 5623 AF_INET6, v6dstp); 5624 } 5625 goto err_ret; 5626 } 5627 } else { 5628 dst_ill = ipif->ipif_ill; 5629 /* 5630 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5631 * and IPV6_BOUND_PIF case. 5632 */ 5633 ASSERT(dst_ill == attach_ill); 5634 /* attach_ill is already refheld */ 5635 } 5636 /* 5637 * Pick a source address which matches the scope of the 5638 * destination address. 5639 * For RTF_SETSRC routes, the source address is imposed by the 5640 * parent ire (fire). 5641 */ 5642 ASSERT(src_ipif == NULL); 5643 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5644 /* 5645 * Check that the ipif matching the requested source 5646 * address still exists. 5647 */ 5648 src_ipif = 5649 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5650 NULL, zoneid, NULL, NULL, NULL, NULL); 5651 } 5652 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5653 ip6_asp_table_held = B_TRUE; 5654 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5655 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5656 } 5657 5658 if (src_ipif == NULL) { 5659 if (!unspec_src) { 5660 if (ip_debug > 2) { 5661 /* ip1dbg */ 5662 pr_addr_dbg("ip_newroute_ipif_v6: " 5663 "no src for dst %s\n,", 5664 AF_INET6, v6dstp); 5665 printf(" through interface %s\n", 5666 dst_ill->ill_name); 5667 } 5668 goto err_ret; 5669 } 5670 /* Use any ipif for source */ 5671 for (src_ipif = dst_ill->ill_ipif; src_ipif != NULL; 5672 src_ipif = src_ipif->ipif_next) { 5673 if ((src_ipif->ipif_flags & IPIF_UP) && 5674 IN6_IS_ADDR_UNSPECIFIED( 5675 &src_ipif->ipif_v6src_addr)) 5676 break; 5677 } 5678 if (src_ipif == NULL) { 5679 if (ip_debug > 2) { 5680 /* ip1dbg */ 5681 pr_addr_dbg("ip_newroute_ipif_v6: " 5682 "no src for dst %s\n ", 5683 AF_INET6, v6dstp); 5684 printf("ip_newroute_ipif_v6: if %s" 5685 "(UNSPEC_SRC)\n", 5686 dst_ill->ill_name); 5687 } 5688 goto err_ret; 5689 } 5690 src_ipif = ipif; 5691 ipif_refhold(src_ipif); 5692 } 5693 ire = ipif_to_ire_v6(ipif); 5694 if (ire == NULL) { 5695 if (ip_debug > 2) { 5696 /* ip1dbg */ 5697 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5698 AF_INET6, &ipif->ipif_v6lcl_addr); 5699 printf("ip_newroute_ipif_v6: " 5700 "if %s\n", dst_ill->ill_name); 5701 } 5702 goto err_ret; 5703 } 5704 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5705 goto err_ret; 5706 5707 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5708 5709 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5710 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5711 if (ip_debug > 2) { 5712 /* ip1dbg */ 5713 pr_addr_dbg(" address %s\n", 5714 AF_INET6, &ire->ire_src_addr_v6); 5715 } 5716 save_ire = ire; 5717 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5718 (void *)ire, (void *)ipif)); 5719 5720 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5721 /* 5722 * an IRE_OFFSUBET was looked up 5723 * on that interface. 5724 * this ire has RTF_MULTIRT flag, 5725 * so the resolution loop 5726 * will be re-entered to resolve 5727 * additional routes on other 5728 * interfaces. For that purpose, 5729 * a copy of the packet is 5730 * made at this point. 5731 */ 5732 fire->ire_last_used_time = lbolt; 5733 copy_mp = copymsg(first_mp); 5734 if (copy_mp) { 5735 MULTIRT_DEBUG_TAG(copy_mp); 5736 } 5737 } 5738 5739 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5740 switch (ire->ire_type) { 5741 case IRE_IF_NORESOLVER: { 5742 /* We have what we need to build an IRE_CACHE. */ 5743 mblk_t *dlureq_mp; 5744 5745 /* 5746 * Create a new dlureq_mp with the 5747 * IPv6 gateway address in destination address in the 5748 * DLPI hdr if the physical length is exactly 16 bytes. 5749 */ 5750 ASSERT(dst_ill->ill_isv6); 5751 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5752 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5753 dst_ill->ill_phys_addr_length, 5754 dst_ill->ill_sap, 5755 dst_ill->ill_sap_length); 5756 } else { 5757 dlureq_mp = ire->ire_dlureq_mp; 5758 } 5759 5760 if (dlureq_mp == NULL) 5761 break; 5762 /* 5763 * The newly created ire will inherit the flags of the 5764 * parent ire, if any. 5765 */ 5766 ire = ire_create_v6( 5767 v6dstp, /* dest address */ 5768 &ipv6_all_ones, /* mask */ 5769 &src_ipif->ipif_v6src_addr, /* source address */ 5770 NULL, /* gateway address */ 5771 &save_ire->ire_max_frag, 5772 NULL, /* Fast Path header */ 5773 dst_ill->ill_rq, /* recv-from queue */ 5774 dst_ill->ill_wq, /* send-to queue */ 5775 IRE_CACHE, 5776 dlureq_mp, 5777 src_ipif, 5778 NULL, 5779 (fire != NULL) ? /* Parent handle */ 5780 fire->ire_phandle : 0, 5781 save_ire->ire_ihandle, /* Interface handle */ 5782 (fire != NULL) ? 5783 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5784 0, 5785 &ire_uinfo_null); 5786 5787 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 5788 freeb(dlureq_mp); 5789 5790 if (ire == NULL) { 5791 ire_refrele(save_ire); 5792 break; 5793 } 5794 5795 ire->ire_marks |= ire_marks; 5796 5797 err = ndp_noresolver(dst_ill, v6dstp); 5798 if (err != 0) { 5799 ire_refrele(save_ire); 5800 break; 5801 } 5802 5803 /* Prevent save_ire from getting deleted */ 5804 IRB_REFHOLD(save_ire->ire_bucket); 5805 /* Has it been removed already ? */ 5806 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5807 IRB_REFRELE(save_ire->ire_bucket); 5808 ire_refrele(save_ire); 5809 break; 5810 } 5811 5812 ire_add_then_send(q, ire, first_mp); 5813 if (ip6_asp_table_held) { 5814 ip6_asp_table_refrele(); 5815 ip6_asp_table_held = B_FALSE; 5816 } 5817 5818 /* Assert that it is not deleted yet. */ 5819 ASSERT(save_ire->ire_ptpn != NULL); 5820 IRB_REFRELE(save_ire->ire_bucket); 5821 ire_refrele(save_ire); 5822 if (fire != NULL) { 5823 ire_refrele(fire); 5824 fire = NULL; 5825 } 5826 5827 /* 5828 * The resolution loop is re-entered if we 5829 * actually are in a multirouting case. 5830 */ 5831 if (copy_mp != NULL) { 5832 boolean_t need_resolve = 5833 ire_multirt_need_resolve_v6(v6dstp); 5834 if (!need_resolve) { 5835 MULTIRT_DEBUG_UNTAG(copy_mp); 5836 freemsg(copy_mp); 5837 copy_mp = NULL; 5838 } else { 5839 /* 5840 * ipif_lookup_group_v6() calls 5841 * ire_lookup_multi_v6() that uses 5842 * ire_ftable_lookup_v6() to find 5843 * an IRE_INTERFACE for the group. 5844 * In the multirt case, 5845 * ire_lookup_multi_v6() then invokes 5846 * ire_multirt_lookup_v6() to find 5847 * the next resolvable ire. 5848 * As a result, we obtain a new 5849 * interface, derived from the 5850 * next ire. 5851 */ 5852 if (ipif_held) { 5853 ipif_refrele(ipif); 5854 ipif_held = B_FALSE; 5855 } 5856 ipif = ipif_lookup_group_v6(v6dstp, 5857 zoneid); 5858 ip2dbg(("ip_newroute_ipif: " 5859 "multirt dst %08x, ipif %p\n", 5860 ntohl(V4_PART_OF_V6((*v6dstp))), 5861 (void *)ipif)); 5862 if (ipif != NULL) { 5863 ipif_held = B_TRUE; 5864 mp = copy_mp; 5865 copy_mp = NULL; 5866 multirt_resolve_next = 5867 B_TRUE; 5868 continue; 5869 } else { 5870 freemsg(copy_mp); 5871 } 5872 } 5873 } 5874 ill_refrele(dst_ill); 5875 if (ipif_held) { 5876 ipif_refrele(ipif); 5877 ipif_held = B_FALSE; 5878 } 5879 if (src_ipif != NULL) 5880 ipif_refrele(src_ipif); 5881 return; 5882 } 5883 case IRE_IF_RESOLVER: { 5884 5885 ASSERT(dst_ill->ill_isv6); 5886 5887 /* 5888 * We obtain a partial IRE_CACHE which we will pass 5889 * along with the resolver query. When the response 5890 * comes back it will be there ready for us to add. 5891 */ 5892 /* 5893 * the newly created ire will inherit the flags of the 5894 * parent ire, if any. 5895 */ 5896 ire = ire_create_v6( 5897 v6dstp, /* dest address */ 5898 &ipv6_all_ones, /* mask */ 5899 &src_ipif->ipif_v6src_addr, /* source address */ 5900 NULL, /* gateway address */ 5901 &save_ire->ire_max_frag, 5902 NULL, /* Fast Path header */ 5903 dst_ill->ill_rq, /* recv-from queue */ 5904 dst_ill->ill_wq, /* send-to queue */ 5905 IRE_CACHE, 5906 NULL, 5907 src_ipif, 5908 NULL, 5909 (fire != NULL) ? /* Parent handle */ 5910 fire->ire_phandle : 0, 5911 save_ire->ire_ihandle, /* Interface handle */ 5912 (fire != NULL) ? 5913 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5914 0, 5915 &ire_uinfo_null); 5916 5917 if (ire == NULL) { 5918 ire_refrele(save_ire); 5919 break; 5920 } 5921 5922 ire->ire_marks |= ire_marks; 5923 5924 /* Resolve and add ire to the ctable */ 5925 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 5926 switch (err) { 5927 case 0: 5928 /* Prevent save_ire from getting deleted */ 5929 IRB_REFHOLD(save_ire->ire_bucket); 5930 /* Has it been removed already ? */ 5931 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5932 IRB_REFRELE(save_ire->ire_bucket); 5933 ire_refrele(save_ire); 5934 break; 5935 } 5936 /* 5937 * We have a resolved cache entry, 5938 * add in the IRE. 5939 */ 5940 ire_add_then_send(q, ire, first_mp); 5941 if (ip6_asp_table_held) { 5942 ip6_asp_table_refrele(); 5943 ip6_asp_table_held = B_FALSE; 5944 } 5945 5946 /* Assert that it is not deleted yet. */ 5947 ASSERT(save_ire->ire_ptpn != NULL); 5948 IRB_REFRELE(save_ire->ire_bucket); 5949 ire_refrele(save_ire); 5950 if (fire != NULL) { 5951 ire_refrele(fire); 5952 fire = NULL; 5953 } 5954 5955 /* 5956 * The resolution loop is re-entered if we 5957 * actually are in a multirouting case. 5958 */ 5959 if (copy_mp != NULL) { 5960 boolean_t need_resolve = 5961 ire_multirt_need_resolve_v6(v6dstp); 5962 if (!need_resolve) { 5963 MULTIRT_DEBUG_UNTAG(copy_mp); 5964 freemsg(copy_mp); 5965 copy_mp = NULL; 5966 } else { 5967 /* 5968 * ipif_lookup_group_v6() calls 5969 * ire_lookup_multi_v6() that 5970 * uses ire_ftable_lookup_v6() 5971 * to find an IRE_INTERFACE for 5972 * the group. In the multirt 5973 * case, ire_lookup_multi_v6() 5974 * then invokes 5975 * ire_multirt_lookup_v6() to 5976 * find the next resolvable ire. 5977 * As a result, we obtain a new 5978 * interface, derived from the 5979 * next ire. 5980 */ 5981 if (ipif_held) { 5982 ipif_refrele(ipif); 5983 ipif_held = B_FALSE; 5984 } 5985 ipif = ipif_lookup_group_v6( 5986 v6dstp, zoneid); 5987 ip2dbg(("ip_newroute_ipif: " 5988 "multirt dst %08x, " 5989 "ipif %p\n", 5990 ntohl(V4_PART_OF_V6( 5991 (*v6dstp))), 5992 (void *)ipif)); 5993 if (ipif != NULL) { 5994 ipif_held = B_TRUE; 5995 mp = copy_mp; 5996 copy_mp = NULL; 5997 multirt_resolve_next = 5998 B_TRUE; 5999 continue; 6000 } else { 6001 freemsg(copy_mp); 6002 } 6003 } 6004 } 6005 ill_refrele(dst_ill); 6006 if (ipif_held) { 6007 ipif_refrele(ipif); 6008 ipif_held = B_FALSE; 6009 } 6010 if (src_ipif != NULL) 6011 ipif_refrele(src_ipif); 6012 return; 6013 6014 case EINPROGRESS: 6015 /* 6016 * mp was consumed - presumably queued. 6017 * No need for ire, presumably resolution is 6018 * in progress, and ire will be added when the 6019 * address is resolved. 6020 */ 6021 if (ip6_asp_table_held) { 6022 ip6_asp_table_refrele(); 6023 ip6_asp_table_held = B_FALSE; 6024 } 6025 ire_delete(ire); 6026 ire_refrele(save_ire); 6027 if (fire != NULL) { 6028 ire_refrele(fire); 6029 fire = NULL; 6030 } 6031 6032 /* 6033 * The resolution loop is re-entered if we 6034 * actually are in a multirouting case. 6035 */ 6036 if (copy_mp != NULL) { 6037 boolean_t need_resolve = 6038 ire_multirt_need_resolve_v6(v6dstp); 6039 if (!need_resolve) { 6040 MULTIRT_DEBUG_UNTAG(copy_mp); 6041 freemsg(copy_mp); 6042 copy_mp = NULL; 6043 } else { 6044 /* 6045 * ipif_lookup_group_v6() calls 6046 * ire_lookup_multi_v6() that 6047 * uses ire_ftable_lookup_v6() 6048 * to find an IRE_INTERFACE for 6049 * the group. In the multirt 6050 * case, ire_lookup_multi_v6() 6051 * then invokes 6052 * ire_multirt_lookup_v6() to 6053 * find the next resolvable ire. 6054 * As a result, we obtain a new 6055 * interface, derived from the 6056 * next ire. 6057 */ 6058 if (ipif_held) { 6059 ipif_refrele(ipif); 6060 ipif_held = B_FALSE; 6061 } 6062 ipif = ipif_lookup_group_v6( 6063 v6dstp, zoneid); 6064 ip2dbg(("ip_newroute_ipif: " 6065 "multirt dst %08x, " 6066 "ipif %p\n", 6067 ntohl(V4_PART_OF_V6( 6068 (*v6dstp))), 6069 (void *)ipif)); 6070 if (ipif != NULL) { 6071 ipif_held = B_TRUE; 6072 mp = copy_mp; 6073 copy_mp = NULL; 6074 multirt_resolve_next = 6075 B_TRUE; 6076 continue; 6077 } else { 6078 freemsg(copy_mp); 6079 } 6080 } 6081 } 6082 ill_refrele(dst_ill); 6083 if (ipif_held) { 6084 ipif_refrele(ipif); 6085 ipif_held = B_FALSE; 6086 } 6087 if (src_ipif != NULL) 6088 ipif_refrele(src_ipif); 6089 return; 6090 default: 6091 /* Some transient error */ 6092 ire_refrele(save_ire); 6093 break; 6094 } 6095 break; 6096 } 6097 default: 6098 break; 6099 } 6100 if (ip6_asp_table_held) { 6101 ip6_asp_table_refrele(); 6102 ip6_asp_table_held = B_FALSE; 6103 } 6104 } while (multirt_resolve_next); 6105 6106 err_ret: 6107 if (ip6_asp_table_held) 6108 ip6_asp_table_refrele(); 6109 if (ire != NULL) 6110 ire_refrele(ire); 6111 if (fire != NULL) 6112 ire_refrele(fire); 6113 if (ipif != NULL && ipif_held) 6114 ipif_refrele(ipif); 6115 if (src_ipif != NULL) 6116 ipif_refrele(src_ipif); 6117 /* Multicast - no point in trying to generate ICMP error */ 6118 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6119 if (dst_ill != NULL) { 6120 ill = dst_ill; 6121 ill_held = B_TRUE; 6122 } 6123 if (mp->b_prev || mp->b_next) { 6124 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6125 } else { 6126 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 6127 } 6128 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6129 mp->b_next = NULL; 6130 mp->b_prev = NULL; 6131 freemsg(first_mp); 6132 if (ill_held) 6133 ill_refrele(ill); 6134 } 6135 6136 /* 6137 * Parse and process any hop-by-hop or destination options. 6138 * 6139 * Assumes that q is an ill read queue so that ICMP errors for link-local 6140 * destinations are sent out the correct interface. 6141 * 6142 * Returns -1 if there was an error and mp has been consumed. 6143 * Returns 0 if no special action is needed. 6144 * Returns 1 if the packet contained a router alert option for this node 6145 * which is verified to be "interesting/known" for our implementation. 6146 * 6147 * XXX Note: In future as more hbh or dest options are defined, 6148 * it may be better to have different routines for hbh and dest 6149 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6150 * may have same value in different namespaces. Or is it same namespace ?? 6151 * Current code checks for each opt_type (other than pads) if it is in 6152 * the expected nexthdr (hbh or dest) 6153 */ 6154 static int 6155 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6156 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6157 { 6158 uint8_t opt_type; 6159 uint_t optused; 6160 int ret = 0; 6161 mblk_t *first_mp; 6162 6163 first_mp = mp; 6164 if (mp->b_datap->db_type == M_CTL) { 6165 mp = mp->b_cont; 6166 } 6167 6168 while (optlen != 0) { 6169 opt_type = *optptr; 6170 if (opt_type == IP6OPT_PAD1) { 6171 optused = 1; 6172 } else { 6173 if (optlen < 2) 6174 goto bad_opt; 6175 switch (opt_type) { 6176 case IP6OPT_PADN: 6177 /* 6178 * Note:We don't verify that (N-2) pad octets 6179 * are zero as required by spec. Adhere to 6180 * "be liberal in what you accept..." part of 6181 * implementation philosophy (RFC791,RFC1122) 6182 */ 6183 optused = 2 + optptr[1]; 6184 if (optused > optlen) 6185 goto bad_opt; 6186 break; 6187 6188 case IP6OPT_JUMBO: 6189 if (hdr_type != IPPROTO_HOPOPTS) 6190 goto opt_error; 6191 goto opt_error; /* XXX Not implemented! */ 6192 6193 case IP6OPT_ROUTER_ALERT: { 6194 struct ip6_opt_router *or; 6195 6196 if (hdr_type != IPPROTO_HOPOPTS) 6197 goto opt_error; 6198 optused = 2 + optptr[1]; 6199 if (optused > optlen) 6200 goto bad_opt; 6201 or = (struct ip6_opt_router *)optptr; 6202 /* Check total length and alignment */ 6203 if (optused != sizeof (*or) || 6204 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6205 goto opt_error; 6206 /* Check value */ 6207 switch (*((uint16_t *)or->ip6or_value)) { 6208 case IP6_ALERT_MLD: 6209 case IP6_ALERT_RSVP: 6210 ret = 1; 6211 } 6212 break; 6213 } 6214 case IP6OPT_HOME_ADDRESS: { 6215 /* 6216 * Minimal support for the home address option 6217 * (which is required by all IPv6 nodes). 6218 * Implement by just swapping the home address 6219 * and source address. 6220 * XXX Note: this has IPsec implications since 6221 * AH needs to take this into account. 6222 * Also, when IPsec is used we need to ensure 6223 * that this is only processed once 6224 * in the received packet (to avoid swapping 6225 * back and forth). 6226 * NOTE:This option processing is considered 6227 * to be unsafe and prone to a denial of 6228 * service attack. 6229 * The current processing is not safe even with 6230 * IPsec secured IP packets. Since the home 6231 * address option processing requirement still 6232 * is in the IETF draft and in the process of 6233 * being redefined for its usage, it has been 6234 * decided to turn off the option by default. 6235 * If this section of code needs to be executed, 6236 * ndd variable ip6_ignore_home_address_opt 6237 * should be set to 0 at the user's own risk. 6238 */ 6239 struct ip6_opt_home_address *oh; 6240 in6_addr_t tmp; 6241 6242 if (ipv6_ignore_home_address_opt) 6243 goto opt_error; 6244 6245 if (hdr_type != IPPROTO_DSTOPTS) 6246 goto opt_error; 6247 optused = 2 + optptr[1]; 6248 if (optused > optlen) 6249 goto bad_opt; 6250 6251 /* 6252 * We did this dest. opt the first time 6253 * around (i.e. before AH processing). 6254 * If we've done AH... stop now. 6255 */ 6256 if (first_mp != mp) { 6257 ipsec_in_t *ii; 6258 6259 ii = (ipsec_in_t *)first_mp->b_rptr; 6260 if (ii->ipsec_in_ah_sa != NULL) 6261 break; 6262 } 6263 6264 oh = (struct ip6_opt_home_address *)optptr; 6265 /* Check total length and alignment */ 6266 if (optused < sizeof (*oh) || 6267 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6268 goto opt_error; 6269 /* Swap ip6_src and the home address */ 6270 tmp = ip6h->ip6_src; 6271 /* XXX Note: only 8 byte alignment option */ 6272 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6273 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6274 break; 6275 } 6276 6277 case IP6OPT_TUNNEL_LIMIT: 6278 if (hdr_type != IPPROTO_DSTOPTS) { 6279 goto opt_error; 6280 } 6281 optused = 2 + optptr[1]; 6282 if (optused > optlen) { 6283 goto bad_opt; 6284 } 6285 if (optused != 3) { 6286 goto opt_error; 6287 } 6288 break; 6289 6290 default: 6291 opt_error: 6292 ip1dbg(("ip_process_options_v6: bad opt 0x%x\n", 6293 opt_type)); 6294 switch (IP6OPT_TYPE(opt_type)) { 6295 case IP6OPT_TYPE_SKIP: 6296 optused = 2 + optptr[1]; 6297 if (optused > optlen) 6298 goto bad_opt; 6299 break; 6300 case IP6OPT_TYPE_DISCARD: 6301 freemsg(first_mp); 6302 return (-1); 6303 case IP6OPT_TYPE_ICMP: 6304 icmp_param_problem_v6(WR(q), first_mp, 6305 ICMP6_PARAMPROB_OPTION, 6306 (uint32_t)(optptr - 6307 (uint8_t *)ip6h), 6308 B_FALSE, B_FALSE); 6309 return (-1); 6310 case IP6OPT_TYPE_FORCEICMP: 6311 icmp_param_problem_v6(WR(q), first_mp, 6312 ICMP6_PARAMPROB_OPTION, 6313 (uint32_t)(optptr - 6314 (uint8_t *)ip6h), 6315 B_FALSE, B_TRUE); 6316 return (-1); 6317 } 6318 } 6319 } 6320 optlen -= optused; 6321 optptr += optused; 6322 } 6323 return (ret); 6324 6325 bad_opt: 6326 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6327 (uint32_t)(optptr - (uint8_t *)ip6h), 6328 B_FALSE, B_FALSE); 6329 return (-1); 6330 } 6331 6332 /* 6333 * Process a routing header that is not yet empty. 6334 * Only handles type 0 routing headers. 6335 */ 6336 static void 6337 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6338 ill_t *ill, uint_t flags, mblk_t *hada_mp) 6339 { 6340 ip6_rthdr0_t *rthdr; 6341 uint_t ehdrlen; 6342 uint_t numaddr; 6343 in6_addr_t *addrptr; 6344 in6_addr_t tmp; 6345 6346 ASSERT(rth->ip6r_segleft != 0); 6347 6348 if (!ipv6_forward_src_routed) { 6349 /* XXX Check for source routed out same interface? */ 6350 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 6351 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 6352 freemsg(hada_mp); 6353 freemsg(mp); 6354 return; 6355 } 6356 6357 if (rth->ip6r_type != 0) { 6358 if (hada_mp != NULL) 6359 goto hada_drop; 6360 icmp_param_problem_v6(WR(q), mp, 6361 ICMP6_PARAMPROB_HEADER, 6362 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6363 B_FALSE, B_FALSE); 6364 return; 6365 } 6366 rthdr = (ip6_rthdr0_t *)rth; 6367 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6368 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6369 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6370 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6371 if (rthdr->ip6r0_len & 0x1) { 6372 /* An odd length is impossible */ 6373 if (hada_mp != NULL) 6374 goto hada_drop; 6375 icmp_param_problem_v6(WR(q), mp, 6376 ICMP6_PARAMPROB_HEADER, 6377 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6378 B_FALSE, B_FALSE); 6379 return; 6380 } 6381 numaddr = rthdr->ip6r0_len / 2; 6382 if (rthdr->ip6r0_segleft > numaddr) { 6383 /* segleft exceeds number of addresses in routing header */ 6384 if (hada_mp != NULL) 6385 goto hada_drop; 6386 icmp_param_problem_v6(WR(q), mp, 6387 ICMP6_PARAMPROB_HEADER, 6388 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6389 (uchar_t *)ip6h), 6390 B_FALSE, B_FALSE); 6391 return; 6392 } 6393 addrptr += (numaddr - rthdr->ip6r0_segleft); 6394 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6395 IN6_IS_ADDR_MULTICAST(addrptr)) { 6396 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6397 freemsg(hada_mp); 6398 freemsg(mp); 6399 return; 6400 } 6401 /* Swap */ 6402 tmp = *addrptr; 6403 *addrptr = ip6h->ip6_dst; 6404 ip6h->ip6_dst = tmp; 6405 rthdr->ip6r0_segleft--; 6406 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6407 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6408 if (hada_mp != NULL) 6409 goto hada_drop; 6410 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6411 B_FALSE, B_FALSE); 6412 return; 6413 } 6414 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); 6415 return; 6416 hada_drop: 6417 /* IPsec kstats: bean counter? */ 6418 freemsg(hada_mp); 6419 freemsg(mp); 6420 } 6421 6422 /* 6423 * Read side put procedure for IPv6 module. 6424 */ 6425 static void 6426 ip_rput_v6(queue_t *q, mblk_t *mp) 6427 { 6428 mblk_t *mp1, *first_mp, *hada_mp = NULL; 6429 ip6_t *ip6h; 6430 boolean_t ll_multicast = B_FALSE, mctl_present = B_FALSE; 6431 ill_t *ill; 6432 struct iocblk *iocp; 6433 uint_t flags = 0; 6434 6435 ill = (ill_t *)q->q_ptr; 6436 if (ill->ill_state_flags & ILL_CONDEMNED) { 6437 union DL_primitives *dl; 6438 6439 dl = (union DL_primitives *)mp->b_rptr; 6440 /* 6441 * Things are opening or closing - only accept DLPI 6442 * ack messages. If the stream is closing and ip_wsrv 6443 * has completed, ip_close is out of the qwait, but has 6444 * not yet completed qprocsoff. Don't proceed any further 6445 * because the ill has been cleaned up and things hanging 6446 * off the ill have been freed. 6447 */ 6448 if ((mp->b_datap->db_type != M_PCPROTO) || 6449 (dl->dl_primitive == DL_UNITDATA_IND)) { 6450 ip_ioctl_freemsg(mp); 6451 return; 6452 } 6453 } 6454 6455 switch (mp->b_datap->db_type) { 6456 case M_DATA: 6457 break; 6458 6459 case M_PROTO: 6460 case M_PCPROTO: 6461 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6462 DL_UNITDATA_IND) { 6463 /* Go handle anything other than data elsewhere. */ 6464 ip_rput_dlpi(q, mp); 6465 return; 6466 } 6467 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6468 ll_multicast = dlur->dl_group_address; 6469 #undef dlur 6470 /* Ditch the DLPI header. */ 6471 mp1 = mp; 6472 mp = mp->b_cont; 6473 freeb(mp1); 6474 break; 6475 case M_BREAK: 6476 panic("ip_rput_v6: got an M_BREAK"); 6477 /*NOTREACHED*/ 6478 case M_IOCACK: 6479 iocp = (struct iocblk *)mp->b_rptr; 6480 switch (iocp->ioc_cmd) { 6481 case DL_IOC_HDR_INFO: 6482 ill = (ill_t *)q->q_ptr; 6483 ill_fastpath_ack(ill, mp); 6484 return; 6485 case SIOCSTUNPARAM: 6486 case SIOCGTUNPARAM: 6487 case OSIOCSTUNPARAM: 6488 case OSIOCGTUNPARAM: 6489 /* Go through qwriter */ 6490 break; 6491 default: 6492 putnext(q, mp); 6493 return; 6494 } 6495 /* FALLTHRU */ 6496 case M_ERROR: 6497 case M_HANGUP: 6498 mutex_enter(&ill->ill_lock); 6499 if (ill->ill_state_flags & ILL_CONDEMNED) { 6500 mutex_exit(&ill->ill_lock); 6501 freemsg(mp); 6502 return; 6503 } 6504 ill_refhold_locked(ill); 6505 mutex_exit(&ill->ill_lock); 6506 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6507 return; 6508 case M_CTL: { 6509 if ((MBLKL(mp) > sizeof (int)) && 6510 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6511 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6512 mctl_present = B_TRUE; 6513 break; 6514 } 6515 putnext(q, mp); 6516 return; 6517 } 6518 case M_IOCNAK: 6519 iocp = (struct iocblk *)mp->b_rptr; 6520 switch (iocp->ioc_cmd) { 6521 case DL_IOC_HDR_INFO: 6522 case SIOCSTUNPARAM: 6523 case SIOCGTUNPARAM: 6524 case OSIOCSTUNPARAM: 6525 case OSIOCGTUNPARAM: 6526 mutex_enter(&ill->ill_lock); 6527 if (ill->ill_state_flags & ILL_CONDEMNED) { 6528 mutex_exit(&ill->ill_lock); 6529 freemsg(mp); 6530 return; 6531 } 6532 ill_refhold_locked(ill); 6533 mutex_exit(&ill->ill_lock); 6534 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6535 B_FALSE); 6536 return; 6537 default: 6538 break; 6539 } 6540 /* FALLTHRU */ 6541 default: 6542 putnext(q, mp); 6543 return; 6544 } 6545 6546 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 6547 /* 6548 * if db_ref > 1 then copymsg and free original. Packet may be 6549 * changed and do not want other entity who has a reference to this 6550 * message to trip over the changes. This is a blind change because 6551 * trying to catch all places that might change packet is too 6552 * difficult (since it may be a module above this one). 6553 */ 6554 if (mp->b_datap->db_ref > 1) { 6555 mblk_t *mp1; 6556 6557 mp1 = copymsg(mp); 6558 freemsg(mp); 6559 if (mp1 == NULL) { 6560 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6561 return; 6562 } 6563 mp = mp1; 6564 } 6565 first_mp = mp; 6566 if (mctl_present) { 6567 hada_mp = first_mp; 6568 mp = first_mp->b_cont; 6569 } 6570 6571 ip6h = (ip6_t *)mp->b_rptr; 6572 6573 /* check for alignment and full IPv6 header */ 6574 if (!OK_32PTR((uchar_t *)ip6h) || 6575 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6576 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6577 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6578 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6579 freemsg(first_mp); 6580 return; 6581 } 6582 ip6h = (ip6_t *)mp->b_rptr; 6583 } 6584 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6585 IPV6_DEFAULT_VERS_AND_FLOW) { 6586 /* 6587 * It may be a bit too expensive to do this mapped address 6588 * check here, but in the interest of robustness, it seems 6589 * like the correct place. 6590 * TODO: Avoid this check for e.g. connected TCP sockets 6591 */ 6592 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6593 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6594 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6595 freemsg(first_mp); 6596 return; 6597 } 6598 6599 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6600 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6601 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6602 freemsg(first_mp); 6603 return; 6604 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6605 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6606 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6607 freemsg(first_mp); 6608 return; 6609 } 6610 6611 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6612 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); 6613 } else { 6614 BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4); 6615 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6616 freemsg(first_mp); 6617 } 6618 } 6619 6620 /* 6621 * Walk through the IPv6 packet in mp and see if there's an AH header 6622 * in it. See if the AH header needs to get done before other headers in 6623 * the packet. (Worker function for ipsec_early_ah_v6().) 6624 */ 6625 #define IPSEC_HDR_DONT_PROCESS 0 6626 #define IPSEC_HDR_PROCESS 1 6627 #define IPSEC_MEMORY_ERROR 2 6628 static int 6629 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6630 { 6631 uint_t length; 6632 uint_t ehdrlen; 6633 uint8_t *whereptr; 6634 uint8_t *endptr; 6635 uint8_t *nexthdrp; 6636 ip6_dest_t *desthdr; 6637 ip6_rthdr_t *rthdr; 6638 ip6_t *ip6h; 6639 6640 /* 6641 * For now just pullup everything. In general, the less pullups, 6642 * the better, but there's so much squirrelling through anyway, 6643 * it's just easier this way. 6644 */ 6645 if (!pullupmsg(mp, -1)) { 6646 return (IPSEC_MEMORY_ERROR); 6647 } 6648 6649 ip6h = (ip6_t *)mp->b_rptr; 6650 length = IPV6_HDR_LEN; 6651 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6652 endptr = mp->b_wptr; 6653 6654 /* 6655 * We can't just use the argument nexthdr in the place 6656 * of nexthdrp becaue we don't dereference nexthdrp 6657 * till we confirm whether it is a valid address. 6658 */ 6659 nexthdrp = &ip6h->ip6_nxt; 6660 while (whereptr < endptr) { 6661 /* Is there enough left for len + nexthdr? */ 6662 if (whereptr + MIN_EHDR_LEN > endptr) 6663 return (IPSEC_MEMORY_ERROR); 6664 6665 switch (*nexthdrp) { 6666 case IPPROTO_HOPOPTS: 6667 case IPPROTO_DSTOPTS: 6668 /* Assumes the headers are identical for hbh and dst */ 6669 desthdr = (ip6_dest_t *)whereptr; 6670 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6671 if ((uchar_t *)desthdr + ehdrlen > endptr) 6672 return (IPSEC_MEMORY_ERROR); 6673 /* 6674 * Return DONT_PROCESS because of potential Mobile IPv6 6675 * cruft for destination options. 6676 */ 6677 if (*nexthdrp == IPPROTO_DSTOPTS) 6678 return (IPSEC_HDR_DONT_PROCESS); 6679 nexthdrp = &desthdr->ip6d_nxt; 6680 break; 6681 case IPPROTO_ROUTING: 6682 rthdr = (ip6_rthdr_t *)whereptr; 6683 6684 /* 6685 * If there's more hops left on the routing header, 6686 * return now with DON'T PROCESS. 6687 */ 6688 if (rthdr->ip6r_segleft > 0) 6689 return (IPSEC_HDR_DONT_PROCESS); 6690 6691 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6692 if ((uchar_t *)rthdr + ehdrlen > endptr) 6693 return (IPSEC_MEMORY_ERROR); 6694 nexthdrp = &rthdr->ip6r_nxt; 6695 break; 6696 case IPPROTO_FRAGMENT: 6697 /* Wait for reassembly */ 6698 return (IPSEC_HDR_DONT_PROCESS); 6699 case IPPROTO_AH: 6700 *nexthdr = IPPROTO_AH; 6701 return (IPSEC_HDR_PROCESS); 6702 case IPPROTO_NONE: 6703 /* No next header means we're finished */ 6704 default: 6705 return (IPSEC_HDR_DONT_PROCESS); 6706 } 6707 length += ehdrlen; 6708 whereptr += ehdrlen; 6709 } 6710 panic("ipsec_needs_processing_v6"); 6711 /*NOTREACHED*/ 6712 } 6713 6714 /* 6715 * Path for AH if options are present. If this is the first time we are 6716 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6717 * Otherwise, just fanout. Return value answers the boolean question: 6718 * "Did I consume the mblk you sent me?" 6719 * 6720 * Sometimes AH needs to be done before other IPv6 headers for security 6721 * reasons. This function (and its ipsec_needs_processing_v6() above) 6722 * indicates if that is so, and fans out to the appropriate IPsec protocol 6723 * for the datagram passed in. 6724 */ 6725 static boolean_t 6726 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6727 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 6728 { 6729 mblk_t *mp; 6730 uint8_t nexthdr; 6731 ipsec_in_t *ii = NULL; 6732 ah_t *ah; 6733 ipsec_status_t ipsec_rc; 6734 6735 ASSERT((hada_mp == NULL) || (!mctl_present)); 6736 6737 switch (ipsec_needs_processing_v6( 6738 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6739 case IPSEC_MEMORY_ERROR: 6740 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6741 freemsg(hada_mp); 6742 freemsg(first_mp); 6743 return (B_TRUE); 6744 case IPSEC_HDR_DONT_PROCESS: 6745 return (B_FALSE); 6746 } 6747 6748 /* Default means send it to AH! */ 6749 ASSERT(nexthdr == IPPROTO_AH); 6750 if (!mctl_present) { 6751 mp = first_mp; 6752 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 6753 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6754 "allocation failure.\n")); 6755 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6756 freemsg(hada_mp); 6757 freemsg(mp); 6758 return (B_TRUE); 6759 } 6760 /* 6761 * Store the ill_index so that when we come back 6762 * from IPSEC we ride on the same queue. 6763 */ 6764 ii = (ipsec_in_t *)first_mp->b_rptr; 6765 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 6766 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 6767 first_mp->b_cont = mp; 6768 } 6769 /* 6770 * Cache hardware acceleration info. 6771 */ 6772 if (hada_mp != NULL) { 6773 ASSERT(ii != NULL); 6774 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 6775 "caching data attr.\n")); 6776 ii->ipsec_in_accelerated = B_TRUE; 6777 ii->ipsec_in_da = hada_mp; 6778 } 6779 6780 if (!ipsec_loaded()) { 6781 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 6782 return (B_TRUE); 6783 } 6784 6785 ah = ipsec_inbound_ah_sa(first_mp); 6786 if (ah == NULL) 6787 return (B_TRUE); 6788 ASSERT(ii->ipsec_in_ah_sa != NULL); 6789 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 6790 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 6791 6792 switch (ipsec_rc) { 6793 case IPSEC_STATUS_SUCCESS: 6794 /* we're done with IPsec processing, send it up */ 6795 ip_fanout_proto_again(first_mp, ill, ill, ire); 6796 break; 6797 case IPSEC_STATUS_FAILED: 6798 BUMP_MIB(&ip6_mib, ipv6InDiscards); 6799 break; 6800 case IPSEC_STATUS_PENDING: 6801 /* no action needed */ 6802 break; 6803 } 6804 return (B_TRUE); 6805 } 6806 6807 /* 6808 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 6809 * ip_rput_v6 has already verified alignment, the min length, the version, 6810 * and db_ref = 1. 6811 * 6812 * The ill passed in (the arg named inill) is the ill that the packet 6813 * actually arrived on. We need to remember this when saving the 6814 * input interface index into potential IPV6_PKTINFO data in 6815 * ip_add_info_v6(). 6816 */ 6817 void 6818 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 6819 uint_t flags, mblk_t *hada_mp) 6820 { 6821 ire_t *ire = NULL; 6822 queue_t *rq; 6823 ill_t *ill = inill; 6824 ipif_t *ipif; 6825 uint8_t *whereptr; 6826 uint8_t nexthdr; 6827 uint16_t remlen; 6828 uint_t prev_nexthdr_offset; 6829 uint_t used; 6830 size_t pkt_len; 6831 uint16_t ip6_len; 6832 uint_t hdr_len; 6833 boolean_t mctl_present; 6834 mblk_t *first_mp; 6835 mblk_t *first_mp1; 6836 boolean_t no_forward; 6837 ip6_hbh_t *hbhhdr; 6838 boolean_t no_cksum = (flags & IP6_IN_NOCKSUM); 6839 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 6840 conn_t *connp; 6841 int off; 6842 ilm_t *ilm; 6843 uint32_t ports; 6844 uint_t ipif_id = 0; 6845 zoneid_t zoneid = GLOBAL_ZONEID; 6846 6847 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 6848 6849 if (hada_mp != NULL) { 6850 /* 6851 * It's an IPsec accelerated packet. 6852 * Keep a pointer to the data attributes around until 6853 * we allocate the ipsecinfo structure. 6854 */ 6855 IPSECHW_DEBUG(IPSECHW_PKT, 6856 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 6857 hada_mp->b_cont = NULL; 6858 /* 6859 * Since it is accelerated, it came directly from 6860 * the ill. 6861 */ 6862 ASSERT(mctl_present == B_FALSE); 6863 ASSERT(mp->b_datap->db_type != M_CTL); 6864 } 6865 6866 ASSERT(OK_32PTR((uchar_t *)ip6h) && 6867 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 6868 6869 if (mp->b_cont == NULL) 6870 pkt_len = mp->b_wptr - mp->b_rptr; 6871 else 6872 pkt_len = msgdsize(mp); 6873 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6874 6875 /* 6876 * Check for bogus (too short packet) and packet which 6877 * was padded by the link layer. 6878 */ 6879 if (ip6_len != pkt_len) { 6880 ssize_t diff; 6881 6882 if (ip6_len > pkt_len) { 6883 ip1dbg(("ip_rput_data_v6: packet too short %d %lu\n", 6884 ip6_len, pkt_len)); 6885 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 6886 freemsg(hada_mp); 6887 freemsg(first_mp); 6888 return; 6889 } 6890 diff = (ssize_t)(pkt_len - ip6_len); 6891 6892 if (!adjmsg(mp, -diff)) { 6893 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 6894 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6895 freemsg(hada_mp); 6896 freemsg(first_mp); 6897 return; 6898 } 6899 pkt_len -= diff; 6900 } 6901 6902 /* 6903 * XXX When zero-copy support is added, this turning off of 6904 * checksum flag will need to be done more selectively. 6905 */ 6906 mp->b_datap->db_struioun.cksum.flags &= ~HCK_PARTIALCKSUM; 6907 6908 nexthdr = ip6h->ip6_nxt; 6909 6910 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 6911 (uchar_t *)ip6h); 6912 whereptr = (uint8_t *)&ip6h[1]; 6913 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 6914 6915 /* Process hop by hop header options */ 6916 if (nexthdr == IPPROTO_HOPOPTS) { 6917 uint_t ehdrlen; 6918 uint8_t *optptr; 6919 6920 if (remlen < MIN_EHDR_LEN) 6921 goto pkt_too_short; 6922 if (mp->b_cont != NULL && 6923 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 6924 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 6925 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6926 freemsg(hada_mp); 6927 freemsg(first_mp); 6928 return; 6929 } 6930 ip6h = (ip6_t *)mp->b_rptr; 6931 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 6932 } 6933 hbhhdr = (ip6_hbh_t *)whereptr; 6934 nexthdr = hbhhdr->ip6h_nxt; 6935 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 6936 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 6937 6938 if (remlen < ehdrlen) 6939 goto pkt_too_short; 6940 if (mp->b_cont != NULL && 6941 whereptr + ehdrlen > mp->b_wptr) { 6942 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 6943 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6944 freemsg(hada_mp); 6945 freemsg(first_mp); 6946 return; 6947 } 6948 ip6h = (ip6_t *)mp->b_rptr; 6949 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 6950 hbhhdr = (ip6_hbh_t *)whereptr; 6951 } 6952 6953 optptr = whereptr + 2; 6954 whereptr += ehdrlen; 6955 remlen -= ehdrlen; 6956 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 6957 ehdrlen - 2, IPPROTO_HOPOPTS)) { 6958 case -1: 6959 /* 6960 * Packet has been consumed and any 6961 * needed ICMP messages sent. 6962 */ 6963 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 6964 freemsg(hada_mp); 6965 return; 6966 case 0: 6967 /* no action needed */ 6968 break; 6969 case 1: 6970 /* Known router alert */ 6971 goto ipv6forus; 6972 } 6973 } 6974 6975 /* 6976 * On incoming v6 multicast packets we will bypass the ire table, 6977 * and assume that the read queue corresponds to the targetted 6978 * interface. 6979 * 6980 * The effect of this is the same as the IPv4 original code, but is 6981 * much cleaner I think. See ip_rput for how that was done. 6982 */ 6983 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 6984 BUMP_MIB(ill->ill_ip6_mib, ipv6InMcastPkts); 6985 /* 6986 * XXX TODO Give to mrouted to for multicast forwarding. 6987 */ 6988 ILM_WALKER_HOLD(ill); 6989 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 6990 ILM_WALKER_RELE(ill); 6991 if (ilm == NULL) { 6992 if (ip_debug > 3) { 6993 /* ip2dbg */ 6994 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 6995 " which is not for us: %s\n", AF_INET6, 6996 &ip6h->ip6_dst); 6997 } 6998 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6999 freemsg(hada_mp); 7000 freemsg(first_mp); 7001 return; 7002 } 7003 if (ip_debug > 3) { 7004 /* ip2dbg */ 7005 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7006 AF_INET6, &ip6h->ip6_dst); 7007 } 7008 rq = ill->ill_rq; 7009 zoneid = GLOBAL_ZONEID; 7010 goto ipv6forus; 7011 } 7012 7013 ipif = ill->ill_ipif; 7014 7015 /* 7016 * If a packet was received on an interface that is a 6to4 tunnel, 7017 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7018 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7019 * the 6to4 prefix of the address configured on the receiving interface. 7020 * Otherwise, the packet was delivered to this interface in error and 7021 * the packet must be dropped. 7022 */ 7023 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7024 7025 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7026 &ip6h->ip6_dst)) { 7027 if (ip_debug > 2) { 7028 /* ip1dbg */ 7029 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7030 "addressed packet which is not for us: " 7031 "%s\n", AF_INET6, &ip6h->ip6_dst); 7032 } 7033 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7034 freemsg(first_mp); 7035 return; 7036 } 7037 } 7038 7039 /* 7040 * Find an ire that matches destination. For link-local addresses 7041 * we have to match the ill. 7042 * TBD for site local addresses. 7043 */ 7044 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7045 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7046 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, 7047 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7048 } else { 7049 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES); 7050 } 7051 if (ire == NULL) { 7052 /* 7053 * No matching IRE found. Mark this packet as having 7054 * originated externally. 7055 */ 7056 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7057 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7058 if (!(ill->ill_flags & ILLF_ROUTER)) 7059 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7060 freemsg(hada_mp); 7061 freemsg(first_mp); 7062 return; 7063 } 7064 if (ip6h->ip6_hops <= 1) { 7065 if (hada_mp != NULL) 7066 goto hada_drop; 7067 icmp_time_exceeded_v6(WR(q), first_mp, 7068 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7069 return; 7070 } 7071 /* 7072 * Per RFC 3513 section 2.5.2, we must not forward packets with 7073 * an unspecified source address. 7074 */ 7075 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7076 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7077 freemsg(hada_mp); 7078 freemsg(first_mp); 7079 return; 7080 } 7081 mp->b_prev = (mblk_t *)(uintptr_t) 7082 ill->ill_phyint->phyint_ifindex; 7083 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7084 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7085 ALL_ZONES); 7086 return; 7087 } 7088 ipif_id = ire->ire_ipif->ipif_seqid; 7089 /* we have a matching IRE */ 7090 if (ire->ire_stq != NULL) { 7091 ill_group_t *ill_group; 7092 ill_group_t *ire_group; 7093 7094 /* 7095 * To be quicker, we may wish not to chase pointers 7096 * (ire->ire_ipif->ipif_ill...) and instead store the 7097 * forwarding policy in the ire. An unfortunate side- 7098 * effect of this would be requiring an ire flush whenever 7099 * the ILLF_ROUTER flag changes. For now, chase pointers 7100 * once and store in the boolean no_forward. 7101 * 7102 * This appears twice to keep it out of the non-forwarding, 7103 * yes-it's-for-us-on-the-right-interface case. 7104 */ 7105 no_forward = ((ill->ill_flags & 7106 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7107 7108 7109 ASSERT(first_mp == mp); 7110 /* 7111 * This ire has a send-to queue - forward the packet. 7112 */ 7113 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7114 freemsg(hada_mp); 7115 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7116 if (no_forward) 7117 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7118 freemsg(mp); 7119 ire_refrele(ire); 7120 return; 7121 } 7122 if (ip6h->ip6_hops <= 1) { 7123 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7124 icmp_time_exceeded_v6(WR(q), mp, 7125 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7126 ire_refrele(ire); 7127 return; 7128 } 7129 /* 7130 * Per RFC 3513 section 2.5.2, we must not forward packets with 7131 * an unspecified source address. 7132 */ 7133 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7134 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7135 freemsg(hada_mp); 7136 freemsg(mp); 7137 ire_refrele(ire); 7138 return; 7139 } 7140 if (pkt_len > ire->ire_max_frag) { 7141 BUMP_MIB(ill->ill_ip6_mib, ipv6InTooBigErrors); 7142 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7143 ll_multicast, B_TRUE); 7144 ire_refrele(ire); 7145 return; 7146 } 7147 7148 /* 7149 * Check to see if we're forwarding the packet to a 7150 * different link from which it came. If so, check the 7151 * source and destination addresses since routers must not 7152 * forward any packets with link-local source or 7153 * destination addresses to other links. Otherwise (if 7154 * we're forwarding onto the same link), conditionally send 7155 * a redirect message. 7156 */ 7157 ill_group = ill->ill_group; 7158 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7159 if (ire->ire_rfq != q && (ill_group == NULL || 7160 ill_group != ire_group)) { 7161 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7162 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7163 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7164 freemsg(mp); 7165 ire_refrele(ire); 7166 return; 7167 } 7168 /* TBD add site-local check at site boundary? */ 7169 } else if (ipv6_send_redirects) { 7170 in6_addr_t *v6targ; 7171 mblk_t *mp1; 7172 in6_addr_t gw_addr_v6; 7173 ire_t *src_ire_v6 = NULL; 7174 7175 /* 7176 * Don't send a redirect when forwarding a source 7177 * routed packet. 7178 */ 7179 if (ip_source_routed_v6(ip6h, mp)) 7180 goto forward; 7181 7182 mutex_enter(&ire->ire_lock); 7183 gw_addr_v6 = ire->ire_gateway_addr_v6; 7184 mutex_exit(&ire->ire_lock); 7185 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7186 v6targ = &gw_addr_v6; 7187 /* 7188 * We won't send redirects to a router 7189 * that doesn't have a link local 7190 * address, but will forward. 7191 */ 7192 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7193 BUMP_MIB(ill->ill_ip6_mib, 7194 ipv6InAddrErrors); 7195 goto forward; 7196 } 7197 } else { 7198 v6targ = &ip6h->ip6_dst; 7199 } 7200 7201 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7202 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7203 ALL_ZONES, 0, MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7204 7205 if (src_ire_v6 != NULL) { 7206 /* 7207 * The source is directly connected. 7208 */ 7209 mp1 = copymsg(mp); 7210 if (mp1 != NULL) { 7211 icmp_send_redirect_v6(WR(q), 7212 mp1, v6targ, &ip6h->ip6_dst, 7213 ill, B_FALSE); 7214 } 7215 ire_refrele(src_ire_v6); 7216 } 7217 } 7218 7219 forward: 7220 /* Hoplimit verified above */ 7221 ip6h->ip6_hops--; 7222 UPDATE_IB_PKT_COUNT(ire); 7223 ire->ire_last_used_time = lbolt; 7224 BUMP_MIB(ill->ill_ip6_mib, ipv6OutForwDatagrams); 7225 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7226 IRE_REFRELE(ire); 7227 return; 7228 } 7229 rq = ire->ire_rfq; 7230 7231 /* 7232 * Need to put on correct queue for reassembly to find it. 7233 * No need to use put() since reassembly has its own locks. 7234 * Note: multicast packets and packets destined to addresses 7235 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7236 * the arriving ill. 7237 */ 7238 if (rq != q) { 7239 boolean_t check_multi = B_TRUE; 7240 ill_group_t *ill_group = NULL; 7241 ill_group_t *ire_group = NULL; 7242 ill_t *ire_ill = NULL; 7243 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7244 7245 /* 7246 * To be quicker, we may wish not to chase pointers 7247 * (ire->ire_ipif->ipif_ill...) and instead store the 7248 * forwarding policy in the ire. An unfortunate side- 7249 * effect of this would be requiring an ire flush whenever 7250 * the ILLF_ROUTER flag changes. For now, chase pointers 7251 * once and store in the boolean no_forward. 7252 */ 7253 no_forward = ((ill->ill_flags & 7254 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7255 7256 ill_group = ill->ill_group; 7257 if (rq != NULL) { 7258 ire_ill = (ill_t *)(rq->q_ptr); 7259 ire_group = ire_ill->ill_group; 7260 } 7261 7262 /* 7263 * If it's part of the same IPMP group, or if it's a legal 7264 * address on the 'usesrc' interface, then bypass strict 7265 * checks. 7266 */ 7267 if (ill_group != NULL && ill_group == ire_group) { 7268 check_multi = B_FALSE; 7269 } else if (ill_ifindex != 0 && ire_ill != NULL && 7270 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7271 check_multi = B_FALSE; 7272 } 7273 7274 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7275 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7276 /* 7277 * This packet came in on an interface other than the 7278 * one associated with the destination address 7279 * and we are strict about matches. 7280 * 7281 * As long as the ills belong to the same group, 7282 * we don't consider them to arriving on the wrong 7283 * interface. Thus, when the switch is doing inbound 7284 * load spreading, we won't drop packets when we 7285 * are doing strict multihoming checks. 7286 */ 7287 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7288 freemsg(hada_mp); 7289 freemsg(first_mp); 7290 ire_refrele(ire); 7291 return; 7292 } 7293 7294 if (rq != NULL) 7295 q = rq; 7296 7297 ill = (ill_t *)q->q_ptr; 7298 ASSERT(ill); 7299 } 7300 7301 zoneid = ire->ire_zoneid; 7302 UPDATE_IB_PKT_COUNT(ire); 7303 ire->ire_last_used_time = lbolt; 7304 /* Don't use the ire after this point. */ 7305 ire_refrele(ire); 7306 ipv6forus: 7307 /* 7308 * Looks like this packet is for us one way or another. 7309 * This is where we'll process destination headers etc. 7310 */ 7311 for (; ; ) { 7312 switch (nexthdr) { 7313 case IPPROTO_TCP: { 7314 uint16_t *up; 7315 uint32_t sum; 7316 dblk_t *dp; 7317 int offset; 7318 7319 hdr_len = pkt_len - remlen; 7320 7321 if (hada_mp != NULL) { 7322 ip0dbg(("tcp hada drop\n")); 7323 goto hada_drop; 7324 } 7325 7326 7327 /* TCP needs all of the TCP header */ 7328 if (remlen < TCP_MIN_HEADER_LENGTH) 7329 goto pkt_too_short; 7330 if (mp->b_cont != NULL && 7331 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7332 if (!pullupmsg(mp, 7333 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7334 BUMP_MIB(ill->ill_ip6_mib, 7335 ipv6InDiscards); 7336 freemsg(first_mp); 7337 return; 7338 } 7339 ip6h = (ip6_t *)mp->b_rptr; 7340 whereptr = (uint8_t *)ip6h + hdr_len; 7341 } 7342 /* 7343 * Extract the offset field from the TCP header. 7344 */ 7345 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7346 if (offset != 5) { 7347 if (offset < 5) { 7348 ip1dbg(("ip_rput_data_v6: short " 7349 "TCP data offset")); 7350 BUMP_MIB(ill->ill_ip6_mib, 7351 ipv6InDiscards); 7352 freemsg(first_mp); 7353 return; 7354 } 7355 /* 7356 * There must be TCP options. 7357 * Make sure we can grab them. 7358 */ 7359 offset <<= 2; 7360 if (remlen < offset) 7361 goto pkt_too_short; 7362 if (mp->b_cont != NULL && 7363 whereptr + offset > mp->b_wptr) { 7364 if (!pullupmsg(mp, 7365 hdr_len + offset)) { 7366 BUMP_MIB(ill->ill_ip6_mib, 7367 ipv6InDiscards); 7368 freemsg(first_mp); 7369 return; 7370 } 7371 ip6h = (ip6_t *)mp->b_rptr; 7372 whereptr = (uint8_t *)ip6h + hdr_len; 7373 } 7374 } 7375 7376 /* 7377 * If packet is being looped back locally checksums 7378 * aren't used 7379 */ 7380 if (no_cksum) { 7381 if (mp->b_datap->db_type == M_DATA) { 7382 /* 7383 * M_DATA mblk, so init mblk (chain) 7384 * for no struio(). 7385 */ 7386 mblk_t *mp1 = mp; 7387 7388 do { 7389 mp1->b_datap->db_struioflag = 0; 7390 } while ((mp1 = mp1->b_cont) != NULL); 7391 } 7392 goto tcp_fanout; 7393 } 7394 7395 up = (uint16_t *)&ip6h->ip6_src; 7396 /* 7397 * TCP checksum calculation. First sum up the 7398 * pseudo-header fields: 7399 * - Source IPv6 address 7400 * - Destination IPv6 address 7401 * - TCP payload length 7402 * - TCP protocol ID 7403 * XXX need zero-copy support here 7404 */ 7405 sum = htons(IPPROTO_TCP + remlen) + 7406 up[0] + up[1] + up[2] + up[3] + 7407 up[4] + up[5] + up[6] + up[7] + 7408 up[8] + up[9] + up[10] + up[11] + 7409 up[12] + up[13] + up[14] + up[15]; 7410 sum = (sum & 0xffff) + (sum >> 16); 7411 dp = mp->b_datap; 7412 if (dp->db_type != M_DATA || dp->db_ref > 1) { 7413 /* 7414 * Not M_DATA mblk or its a dup, so do the 7415 * checksum now. 7416 */ 7417 sum = IP_CSUM(mp, hdr_len, sum); 7418 if (sum) { 7419 /* checksum failed */ 7420 ip1dbg(("ip_rput_data_v6: TCP checksum" 7421 " failed %x off %d\n", 7422 sum, hdr_len)); 7423 BUMP_MIB(&ip_mib, tcpInErrs); 7424 freemsg(first_mp); 7425 return; 7426 } 7427 } else { 7428 /* 7429 * M_DATA mblk and not a dup 7430 * compute checksum here 7431 */ 7432 off = (int)(whereptr - mp->b_rptr); 7433 7434 if (IP_CSUM(mp, off, sum)) { 7435 BUMP_MIB(&ip_mib, tcpInErrs); 7436 ipcsumdbg("ip_rput_data_v6 " 7437 "swcksumerr\n", mp); 7438 freemsg(first_mp); 7439 return; 7440 } 7441 } 7442 tcp_fanout: 7443 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7444 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7445 IP_FF_IP6INFO), hdr_len, mctl_present, zoneid); 7446 return; 7447 } 7448 case IPPROTO_SCTP: 7449 { 7450 sctp_hdr_t *sctph; 7451 uint32_t calcsum, pktsum; 7452 uint_t hdr_len = pkt_len - remlen; 7453 7454 /* SCTP needs all of the SCTP header */ 7455 if (remlen < sizeof (*sctph)) { 7456 goto pkt_too_short; 7457 } 7458 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7459 ASSERT(mp->b_cont != NULL); 7460 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7461 BUMP_MIB(ill->ill_ip6_mib, 7462 ipv6InDiscards); 7463 freemsg(mp); 7464 return; 7465 } 7466 ip6h = (ip6_t *)mp->b_rptr; 7467 whereptr = (uint8_t *)ip6h + hdr_len; 7468 } 7469 7470 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7471 if (!no_cksum) { 7472 /* checksum */ 7473 pktsum = sctph->sh_chksum; 7474 sctph->sh_chksum = 0; 7475 calcsum = sctp_cksum(mp, hdr_len); 7476 if (calcsum != pktsum) { 7477 BUMP_MIB(&sctp_mib, sctpChecksumError); 7478 freemsg(mp); 7479 return; 7480 } 7481 sctph->sh_chksum = pktsum; 7482 } 7483 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7484 if ((connp = sctp_find_conn(&ip6h->ip6_src, 7485 &ip6h->ip6_dst, ports, ipif_id, zoneid)) == NULL) { 7486 ip_fanout_sctp_raw(first_mp, ill, 7487 (ipha_t *)ip6h, B_FALSE, ports, 7488 mctl_present, 7489 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), 7490 B_TRUE, ipif_id, zoneid); 7491 return; 7492 } 7493 BUMP_MIB(&ip_mib, ipInDelivers); 7494 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7495 B_FALSE, mctl_present); 7496 return; 7497 } 7498 case IPPROTO_UDP: { 7499 uint16_t *up; 7500 uint32_t sum; 7501 7502 hdr_len = pkt_len - remlen; 7503 7504 #define UDPH_SIZE 8 7505 7506 if (hada_mp != NULL) { 7507 ip0dbg(("udp hada drop\n")); 7508 goto hada_drop; 7509 } 7510 7511 /* Verify that at least the ports are present */ 7512 if (remlen < UDPH_SIZE) 7513 goto pkt_too_short; 7514 if (mp->b_cont != NULL && 7515 whereptr + UDPH_SIZE > mp->b_wptr) { 7516 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7517 BUMP_MIB(ill->ill_ip6_mib, 7518 ipv6InDiscards); 7519 freemsg(first_mp); 7520 return; 7521 } 7522 ip6h = (ip6_t *)mp->b_rptr; 7523 whereptr = (uint8_t *)ip6h + hdr_len; 7524 } 7525 #undef UDPH_SIZE 7526 /* 7527 * If packet is being looped back locally checksums 7528 * aren't used 7529 */ 7530 if (no_cksum) 7531 goto udp_fanout; 7532 7533 /* 7534 * Before going through the regular checksum 7535 * calculation, make sure the received checksum 7536 * is non-zero. RFC 2460 says, a 0x0000 checksum 7537 * in a UDP packet (within IPv6 packet) is invalid 7538 * and should be replaced by 0xffff. This makes 7539 * sense as regular checksum calculation will 7540 * pass for both the cases i.e. 0x0000 and 0xffff. 7541 * Removing one of the case makes error detection 7542 * stronger. 7543 */ 7544 7545 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7546 /* 0x0000 checksum is invalid */ 7547 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7548 "checksum value 0x0000\n")); 7549 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7550 freemsg(first_mp); 7551 return; 7552 } 7553 7554 up = (uint16_t *)&ip6h->ip6_src; 7555 7556 /* 7557 * UDP checksum calculation. First sum up the 7558 * pseudo-header fields: 7559 * - Source IPv6 address 7560 * - Destination IPv6 address 7561 * - UDP payload length 7562 * - UDP protocol ID 7563 */ 7564 7565 sum = htons(IPPROTO_UDP + remlen) + 7566 up[0] + up[1] + up[2] + up[3] + 7567 up[4] + up[5] + up[6] + up[7] + 7568 up[8] + up[9] + up[10] + up[11] + 7569 up[12] + up[13] + up[14] + up[15]; 7570 7571 sum = (sum & 0xffff) + (sum >> 16); 7572 /* Next sum in the UDP packet */ 7573 sum = IP_CSUM(mp, hdr_len, sum); 7574 if (sum) { 7575 /* UDP checksum failed */ 7576 ip1dbg(("ip_rput_data_v6: UDP checksum " 7577 "failed %x\n", 7578 sum)); 7579 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7580 freemsg(first_mp); 7581 return; 7582 } 7583 goto udp_fanout; 7584 } 7585 case IPPROTO_ICMPV6: { 7586 uint16_t *up; 7587 uint32_t sum; 7588 uint_t hdr_len = pkt_len - remlen; 7589 7590 if (hada_mp != NULL) { 7591 ip0dbg(("icmp hada drop\n")); 7592 goto hada_drop; 7593 } 7594 7595 /* 7596 * If packet is being looped back locally checksums 7597 * aren't used 7598 */ 7599 if (no_cksum) 7600 goto icmp_fanout; 7601 7602 up = (uint16_t *)&ip6h->ip6_src; 7603 sum = htons(IPPROTO_ICMPV6 + remlen) + 7604 up[0] + up[1] + up[2] + up[3] + 7605 up[4] + up[5] + up[6] + up[7] + 7606 up[8] + up[9] + up[10] + up[11] + 7607 up[12] + up[13] + up[14] + up[15]; 7608 sum = (sum & 0xffff) + (sum >> 16); 7609 sum = IP_CSUM(mp, hdr_len, sum); 7610 if (sum) { 7611 /* IPv6 ICMP checksum failed */ 7612 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7613 "failed %x\n", 7614 sum)); 7615 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7616 BUMP_MIB(ill->ill_icmp6_mib, 7617 ipv6IfIcmpInErrors); 7618 freemsg(first_mp); 7619 return; 7620 } 7621 7622 icmp_fanout: 7623 /* Check variable for testing applications */ 7624 if (ipv6_drop_inbound_icmpv6) { 7625 freemsg(first_mp); 7626 return; 7627 } 7628 /* 7629 * Assume that there is always at least one conn for 7630 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7631 * where there is no conn. 7632 */ 7633 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7634 ASSERT(!(ill->ill_phyint->phyint_flags & 7635 PHYI_LOOPBACK)); 7636 /* 7637 * In the multicast case, applications may have 7638 * joined the group from different zones, so we 7639 * need to deliver the packet to each of them. 7640 * Loop through the multicast memberships 7641 * structures (ilm) on the receive ill and send 7642 * a copy of the packet up each matching one. 7643 */ 7644 ILM_WALKER_HOLD(ill); 7645 for (ilm = ill->ill_ilm; ilm != NULL; 7646 ilm = ilm->ilm_next) { 7647 if (ilm->ilm_flags & ILM_DELETED) 7648 continue; 7649 if (!IN6_ARE_ADDR_EQUAL( 7650 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7651 continue; 7652 if (!ipif_lookup_zoneid(ill, 7653 ilm->ilm_zoneid, IPIF_UP, NULL)) 7654 continue; 7655 7656 first_mp1 = ip_copymsg(first_mp); 7657 if (first_mp1 == NULL) 7658 continue; 7659 icmp_inbound_v6(q, first_mp1, ill, 7660 hdr_len, mctl_present, 0, 7661 ilm->ilm_zoneid); 7662 } 7663 ILM_WALKER_RELE(ill); 7664 } else { 7665 first_mp1 = ip_copymsg(first_mp); 7666 if (first_mp1 != NULL) 7667 icmp_inbound_v6(q, first_mp1, ill, 7668 hdr_len, mctl_present, 0, zoneid); 7669 } 7670 } 7671 /* FALLTHRU */ 7672 default: { 7673 /* 7674 * Handle protocols with which IPv6 is less intimate. 7675 */ 7676 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IP6INFO; 7677 7678 if (hada_mp != NULL) { 7679 ip0dbg(("default hada drop\n")); 7680 goto hada_drop; 7681 } 7682 7683 /* 7684 * Enable sending ICMP for "Unknown" nexthdr 7685 * case. i.e. where we did not FALLTHRU from 7686 * IPPROTO_ICMPV6 processing case above. 7687 * If we did FALLTHRU, then the packet has already been 7688 * processed for IPPF, don't process it again in 7689 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7690 * flags 7691 */ 7692 if (nexthdr != IPPROTO_ICMPV6) 7693 proto_flags |= IP_FF_SEND_ICMP; 7694 else 7695 proto_flags |= IP6_NO_IPPOLICY; 7696 7697 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7698 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7699 mctl_present, zoneid); 7700 return; 7701 } 7702 7703 case IPPROTO_DSTOPTS: { 7704 uint_t ehdrlen; 7705 uint8_t *optptr; 7706 ip6_dest_t *desthdr; 7707 7708 /* Check if AH is present. */ 7709 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7710 ire, hada_mp, zoneid)) { 7711 ip0dbg(("dst early hada drop\n")); 7712 return; 7713 } 7714 7715 /* 7716 * Reinitialize pointers, as ipsec_early_ah_v6() does 7717 * complete pullups. We don't have to do more pullups 7718 * as a result. 7719 */ 7720 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7721 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7722 ip6h = (ip6_t *)mp->b_rptr; 7723 7724 if (remlen < MIN_EHDR_LEN) 7725 goto pkt_too_short; 7726 7727 desthdr = (ip6_dest_t *)whereptr; 7728 nexthdr = desthdr->ip6d_nxt; 7729 prev_nexthdr_offset = (uint_t)(whereptr - 7730 (uint8_t *)ip6h); 7731 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7732 if (remlen < ehdrlen) 7733 goto pkt_too_short; 7734 optptr = whereptr + 2; 7735 /* 7736 * Note: XXX This code does not seem to make 7737 * distinction between Destination Options Header 7738 * being before/after Routing Header which can 7739 * happen if we are at the end of source route. 7740 * This may become significant in future. 7741 * (No real significant Destination Options are 7742 * defined/implemented yet ). 7743 */ 7744 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7745 ehdrlen - 2, IPPROTO_DSTOPTS)) { 7746 case -1: 7747 /* 7748 * Packet has been consumed and any needed 7749 * ICMP errors sent. 7750 */ 7751 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7752 freemsg(hada_mp); 7753 return; 7754 case 0: 7755 /* No action needed continue */ 7756 break; 7757 case 1: 7758 /* 7759 * Unnexpected return value 7760 * (Router alert is a Hop-by-Hop option) 7761 */ 7762 #ifdef DEBUG 7763 panic("ip_rput_data_v6: router " 7764 "alert hbh opt indication in dest opt"); 7765 /*NOTREACHED*/ 7766 #else 7767 freemsg(hada_mp); 7768 freemsg(first_mp); 7769 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7770 return; 7771 #endif 7772 } 7773 used = ehdrlen; 7774 break; 7775 } 7776 case IPPROTO_FRAGMENT: { 7777 ip6_frag_t *fraghdr; 7778 size_t no_frag_hdr_len; 7779 7780 if (hada_mp != NULL) { 7781 ip0dbg(("frag hada drop\n")); 7782 goto hada_drop; 7783 } 7784 7785 ASSERT(first_mp == mp); 7786 if (remlen < sizeof (ip6_frag_t)) 7787 goto pkt_too_short; 7788 7789 if (mp->b_cont != NULL && 7790 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 7791 if (!pullupmsg(mp, 7792 pkt_len - remlen + sizeof (ip6_frag_t))) { 7793 BUMP_MIB(ill->ill_ip6_mib, 7794 ipv6InDiscards); 7795 freemsg(mp); 7796 return; 7797 } 7798 ip6h = (ip6_t *)mp->b_rptr; 7799 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7800 } 7801 7802 fraghdr = (ip6_frag_t *)whereptr; 7803 used = (uint_t)sizeof (ip6_frag_t); 7804 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmReqds); 7805 7806 /* 7807 * Invoke the CGTP (multirouting) filtering module to 7808 * process the incoming packet. Packets identified as 7809 * duplicates must be discarded. Filtering is active 7810 * only if the the ip_cgtp_filter ndd variable is 7811 * non-zero. 7812 */ 7813 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 7814 int cgtp_flt_pkt = 7815 ip_cgtp_filter_ops->cfo_filter_v6( 7816 inill->ill_rq, ip6h, fraghdr); 7817 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 7818 freemsg(mp); 7819 return; 7820 } 7821 } 7822 7823 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 7824 remlen - used, &prev_nexthdr_offset); 7825 if (mp == NULL) { 7826 /* Reassembly is still pending */ 7827 return; 7828 } 7829 /* The first mblk are the headers before the frag hdr */ 7830 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmOKs); 7831 7832 first_mp = mp; /* mp has most likely changed! */ 7833 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 7834 ip6h = (ip6_t *)mp->b_rptr; 7835 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 7836 whereptr = mp->b_rptr + no_frag_hdr_len; 7837 remlen = ntohs(ip6h->ip6_plen) + 7838 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 7839 pkt_len = msgdsize(mp); 7840 used = 0; 7841 break; 7842 } 7843 case IPPROTO_HOPOPTS: 7844 if (hada_mp != NULL) { 7845 ip0dbg(("hop hada drop\n")); 7846 goto hada_drop; 7847 } 7848 /* 7849 * Illegal header sequence. 7850 * (Hop-by-hop headers are processed above 7851 * and required to immediately follow IPv6 header) 7852 */ 7853 icmp_param_problem_v6(WR(q), first_mp, 7854 ICMP6_PARAMPROB_NEXTHEADER, 7855 prev_nexthdr_offset, 7856 B_FALSE, B_FALSE); 7857 return; 7858 7859 case IPPROTO_ROUTING: { 7860 uint_t ehdrlen; 7861 ip6_rthdr_t *rthdr; 7862 7863 /* Check if AH is present. */ 7864 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7865 ire, hada_mp, zoneid)) { 7866 ip0dbg(("routing hada drop\n")); 7867 return; 7868 } 7869 7870 /* 7871 * Reinitialize pointers, as ipsec_early_ah_v6() does 7872 * complete pullups. We don't have to do more pullups 7873 * as a result. 7874 */ 7875 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7876 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7877 ip6h = (ip6_t *)mp->b_rptr; 7878 7879 if (remlen < MIN_EHDR_LEN) 7880 goto pkt_too_short; 7881 rthdr = (ip6_rthdr_t *)whereptr; 7882 nexthdr = rthdr->ip6r_nxt; 7883 prev_nexthdr_offset = (uint_t)(whereptr - 7884 (uint8_t *)ip6h); 7885 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7886 if (remlen < ehdrlen) 7887 goto pkt_too_short; 7888 if (rthdr->ip6r_segleft != 0) { 7889 /* Not end of source route */ 7890 if (ll_multicast) { 7891 BUMP_MIB(ill->ill_ip6_mib, 7892 ipv6ForwProhibits); 7893 freemsg(hada_mp); 7894 freemsg(mp); 7895 return; 7896 } 7897 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 7898 flags, hada_mp); 7899 return; 7900 } 7901 used = ehdrlen; 7902 break; 7903 } 7904 case IPPROTO_AH: 7905 case IPPROTO_ESP: { 7906 /* 7907 * Fast path for AH/ESP. If this is the first time 7908 * we are sending a datagram to AH/ESP, allocate 7909 * a IPSEC_IN message and prepend it. Otherwise, 7910 * just fanout. 7911 */ 7912 7913 ipsec_in_t *ii; 7914 int ipsec_rc; 7915 7916 if (!mctl_present) { 7917 ASSERT(first_mp == mp); 7918 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 7919 NULL) { 7920 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 7921 "allocation failure.\n")); 7922 BUMP_MIB(ill->ill_ip6_mib, 7923 ipv6InDiscards); 7924 freemsg(mp); 7925 return; 7926 } 7927 /* 7928 * Store the ill_index so that when we come back 7929 * from IPSEC we ride on the same queue. 7930 */ 7931 ii = (ipsec_in_t *)first_mp->b_rptr; 7932 ii->ipsec_in_ill_index = 7933 ill->ill_phyint->phyint_ifindex; 7934 ii->ipsec_in_rill_index = 7935 ii->ipsec_in_ill_index; 7936 first_mp->b_cont = mp; 7937 /* 7938 * Cache hardware acceleration info. 7939 */ 7940 if (hada_mp != NULL) { 7941 IPSECHW_DEBUG(IPSECHW_PKT, 7942 ("ip_rput_data_v6: " 7943 "caching data attr.\n")); 7944 ii->ipsec_in_accelerated = B_TRUE; 7945 ii->ipsec_in_da = hada_mp; 7946 hada_mp = NULL; 7947 } 7948 } else { 7949 ii = (ipsec_in_t *)first_mp->b_rptr; 7950 } 7951 7952 if (!ipsec_loaded()) { 7953 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 7954 ire->ire_zoneid); 7955 return; 7956 } 7957 7958 /* select inbound SA and have IPsec process the pkt */ 7959 if (nexthdr == IPPROTO_ESP) { 7960 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 7961 if (esph == NULL) 7962 return; 7963 ASSERT(ii->ipsec_in_esp_sa != NULL); 7964 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 7965 NULL); 7966 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 7967 first_mp, esph); 7968 } else { 7969 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 7970 if (ah == NULL) 7971 return; 7972 ASSERT(ii->ipsec_in_ah_sa != NULL); 7973 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 7974 NULL); 7975 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 7976 first_mp, ah); 7977 } 7978 7979 switch (ipsec_rc) { 7980 case IPSEC_STATUS_SUCCESS: 7981 break; 7982 case IPSEC_STATUS_FAILED: 7983 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7984 /* FALLTHRU */ 7985 case IPSEC_STATUS_PENDING: 7986 return; 7987 } 7988 /* we're done with IPsec processing, send it up */ 7989 ip_fanout_proto_again(first_mp, ill, inill, ire); 7990 return; 7991 } 7992 case IPPROTO_NONE: 7993 /* All processing is done. Count as "delivered". */ 7994 freemsg(hada_mp); 7995 freemsg(first_mp); 7996 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 7997 return; 7998 } 7999 whereptr += used; 8000 ASSERT(remlen >= used); 8001 remlen -= used; 8002 } 8003 /* NOTREACHED */ 8004 8005 pkt_too_short: 8006 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8007 ip6_len, pkt_len, remlen)); 8008 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 8009 freemsg(hada_mp); 8010 freemsg(first_mp); 8011 return; 8012 udp_fanout: 8013 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8014 connp = NULL; 8015 } else { 8016 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8017 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8018 CONN_DEC_REF(connp); 8019 connp = NULL; 8020 } 8021 } 8022 8023 if (connp == NULL) { 8024 uint32_t ports; 8025 8026 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8027 UDP_PORTS_OFFSET); 8028 IP6_STAT(ip6_udp_slow_path); 8029 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8030 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), mctl_present, 8031 zoneid); 8032 return; 8033 } 8034 8035 if (!canputnext(connp->conn_upq)) { 8036 freemsg(first_mp); 8037 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 8038 CONN_DEC_REF(connp); 8039 return; 8040 } 8041 8042 /* Initiate IPPF processing */ 8043 if (IP6_IN_IPP(flags)) { 8044 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8045 if (mp == NULL) { 8046 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8047 CONN_DEC_REF(connp); 8048 return; 8049 } 8050 } 8051 8052 if (connp->conn_ipv6_recvpktinfo || 8053 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8054 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8055 if (mp == NULL) { 8056 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8057 CONN_DEC_REF(connp); 8058 return; 8059 } 8060 } 8061 8062 IP6_STAT(ip6_udp_fast_path); 8063 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 8064 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8065 putnext(connp->conn_upq, mp); 8066 8067 CONN_DEC_REF(connp); 8068 freemsg(hada_mp); 8069 return; 8070 8071 hada_drop: 8072 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8073 /* IPsec kstats: bump counter here */ 8074 freemsg(hada_mp); 8075 freemsg(first_mp); 8076 } 8077 8078 /* 8079 * Reassemble fragment. 8080 * When it returns a completed message the first mblk will only contain 8081 * the headers prior to the fragment header. 8082 * 8083 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8084 * of the preceding header. This is needed to patch the previous header's 8085 * nexthdr field when reassembly completes. 8086 */ 8087 static mblk_t * 8088 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8089 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset) 8090 { 8091 ill_t *ill = (ill_t *)q->q_ptr; 8092 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8093 uint16_t offset; 8094 boolean_t more_frags; 8095 uint8_t nexthdr = fraghdr->ip6f_nxt; 8096 in6_addr_t *v6dst_ptr; 8097 in6_addr_t *v6src_ptr; 8098 uint_t end; 8099 uint_t hdr_length; 8100 size_t count; 8101 ipf_t *ipf; 8102 ipf_t **ipfp; 8103 ipfb_t *ipfb; 8104 mblk_t *mp1; 8105 uint8_t ecn_info = 0; 8106 size_t msg_len; 8107 mblk_t *tail_mp; 8108 mblk_t *t_mp; 8109 boolean_t pruned = B_FALSE; 8110 8111 /* 8112 * Note: Fragment offset in header is in 8-octet units. 8113 * Clearing least significant 3 bits not only extracts 8114 * it but also gets it in units of octets. 8115 */ 8116 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8117 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8118 8119 /* 8120 * Is the more frags flag on and the payload length not a multiple 8121 * of eight? 8122 */ 8123 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8124 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8125 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8126 (uint32_t)((char *)&ip6h->ip6_plen - 8127 (char *)ip6h), B_FALSE, B_FALSE); 8128 return (NULL); 8129 } 8130 8131 v6src_ptr = &ip6h->ip6_src; 8132 v6dst_ptr = &ip6h->ip6_dst; 8133 end = remlen; 8134 8135 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8136 end += offset; 8137 8138 /* 8139 * Would fragment cause reassembled packet to have a payload length 8140 * greater than IP_MAXPACKET - the max payload size? 8141 */ 8142 if (end > IP_MAXPACKET) { 8143 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8144 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8145 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8146 (char *)ip6h), B_FALSE, B_FALSE); 8147 return (NULL); 8148 } 8149 8150 /* 8151 * This packet just has one fragment. Reassembly not 8152 * needed. 8153 */ 8154 if (!more_frags && offset == 0) { 8155 goto reass_done; 8156 } 8157 8158 /* 8159 * Drop the fragmented as early as possible, if 8160 * we don't have resource(s) to re-assemble. 8161 */ 8162 8163 if (ip_reass_queue_bytes == 0) { 8164 freemsg(mp); 8165 return (NULL); 8166 } 8167 8168 /* Record the ECN field info. */ 8169 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8170 /* 8171 * If this is not the first fragment, dump the unfragmentable 8172 * portion of the packet. 8173 */ 8174 if (offset) 8175 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8176 8177 /* 8178 * Fragmentation reassembly. Each ILL has a hash table for 8179 * queueing packets undergoing reassembly for all IPIFs 8180 * associated with the ILL. The hash is based on the packet 8181 * IP ident field. The ILL frag hash table was allocated 8182 * as a timer block at the time the ILL was created. Whenever 8183 * there is anything on the reassembly queue, the timer will 8184 * be running. 8185 */ 8186 msg_len = mp->b_datap->db_lim - mp->b_datap->db_base; 8187 tail_mp = mp; 8188 while (tail_mp->b_cont != NULL) { 8189 tail_mp = tail_mp->b_cont; 8190 msg_len += tail_mp->b_datap->db_lim - 8191 tail_mp->b_datap->db_base; 8192 } 8193 /* 8194 * If the reassembly list for this ILL will get too big 8195 * prune it. 8196 */ 8197 8198 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8199 ip_reass_queue_bytes) { 8200 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8201 : (ip_reass_queue_bytes - msg_len)); 8202 pruned = B_TRUE; 8203 } 8204 8205 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8206 mutex_enter(&ipfb->ipfb_lock); 8207 8208 ipfp = &ipfb->ipfb_ipf; 8209 /* Try to find an existing fragment queue for this packet. */ 8210 for (;;) { 8211 ipf = ipfp[0]; 8212 if (ipf) { 8213 /* 8214 * It has to match on ident, source address, and 8215 * dest address. 8216 */ 8217 if (ipf->ipf_ident == ident && 8218 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8219 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8220 8221 /* 8222 * If we have received too many 8223 * duplicate fragments for this packet 8224 * free it. 8225 */ 8226 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8227 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8228 freemsg(mp); 8229 mutex_exit(&ipfb->ipfb_lock); 8230 return (NULL); 8231 } 8232 8233 break; 8234 } 8235 ipfp = &ipf->ipf_hash_next; 8236 continue; 8237 } 8238 8239 8240 /* 8241 * If we pruned the list, do we want to store this new 8242 * fragment?. We apply an optimization here based on the 8243 * fact that most fragments will be received in order. 8244 * So if the offset of this incoming fragment is zero, 8245 * it is the first fragment of a new packet. We will 8246 * keep it. Otherwise drop the fragment, as we have 8247 * probably pruned the packet already (since the 8248 * packet cannot be found). 8249 */ 8250 8251 if (pruned && offset != 0) { 8252 mutex_exit(&ipfb->ipfb_lock); 8253 freemsg(mp); 8254 return (NULL); 8255 } 8256 8257 /* New guy. Allocate a frag message. */ 8258 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8259 if (!mp1) { 8260 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8261 freemsg(mp); 8262 partial_reass_done: 8263 mutex_exit(&ipfb->ipfb_lock); 8264 return (NULL); 8265 } 8266 8267 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8268 /* 8269 * Too many fragmented packets in this hash bucket. 8270 * Free the oldest. 8271 */ 8272 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8273 } 8274 8275 mp1->b_cont = mp; 8276 8277 /* Initialize the fragment header. */ 8278 ipf = (ipf_t *)mp1->b_rptr; 8279 ipf->ipf_mp = mp1; 8280 ipf->ipf_ptphn = ipfp; 8281 ipfp[0] = ipf; 8282 ipf->ipf_hash_next = NULL; 8283 ipf->ipf_ident = ident; 8284 ipf->ipf_v6src = *v6src_ptr; 8285 ipf->ipf_v6dst = *v6dst_ptr; 8286 /* Record reassembly start time. */ 8287 ipf->ipf_timestamp = gethrestime_sec(); 8288 /* Record ipf generation and account for frag header */ 8289 ipf->ipf_gen = ill->ill_ipf_gen++; 8290 ipf->ipf_count = mp1->b_datap->db_lim - mp1->b_datap->db_base; 8291 ipf->ipf_protocol = nexthdr; 8292 ipf->ipf_nf_hdr_len = 0; 8293 ipf->ipf_prev_nexthdr_offset = 0; 8294 ipf->ipf_last_frag_seen = B_FALSE; 8295 ipf->ipf_ecn = ecn_info; 8296 ipf->ipf_num_dups = 0; 8297 ipfb->ipfb_frag_pkts++; 8298 8299 /* 8300 * We handle reassembly two ways. In the easy case, 8301 * where all the fragments show up in order, we do 8302 * minimal bookkeeping, and just clip new pieces on 8303 * the end. If we ever see a hole, then we go off 8304 * to ip_reassemble which has to mark the pieces and 8305 * keep track of the number of holes, etc. Obviously, 8306 * the point of having both mechanisms is so we can 8307 * handle the easy case as efficiently as possible. 8308 */ 8309 if (offset == 0) { 8310 /* Easy case, in-order reassembly so far. */ 8311 /* Update the byte count */ 8312 ipf->ipf_count += msg_len; 8313 ipf->ipf_tail_mp = tail_mp; 8314 /* 8315 * Keep track of next expected offset in 8316 * ipf_end. 8317 */ 8318 ipf->ipf_end = end; 8319 ipf->ipf_nf_hdr_len = hdr_length; 8320 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8321 } else { 8322 /* Hard case, hole at the beginning. */ 8323 ipf->ipf_tail_mp = NULL; 8324 /* 8325 * ipf_end == 0 means that we have given up 8326 * on easy reassembly. 8327 */ 8328 ipf->ipf_end = 0; 8329 /* 8330 * ipf_hole_cnt is set by ip_reassemble. 8331 * ipf_count is updated by ip_reassemble. 8332 * No need to check for return value here 8333 * as we don't expect reassembly to complete or 8334 * fail for the first fragment itself. 8335 */ 8336 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8337 msg_len); 8338 } 8339 /* Update per ipfb and ill byte counts */ 8340 ipfb->ipfb_count += ipf->ipf_count; 8341 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8342 ill->ill_frag_count += ipf->ipf_count; 8343 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8344 /* If the frag timer wasn't already going, start it. */ 8345 mutex_enter(&ill->ill_lock); 8346 ill_frag_timer_start(ill); 8347 mutex_exit(&ill->ill_lock); 8348 goto partial_reass_done; 8349 } 8350 8351 /* 8352 * We have a new piece of a datagram which is already being 8353 * reassembled. Update the ECN info if all IP fragments 8354 * are ECN capable. If there is one which is not, clear 8355 * all the info. If there is at least one which has CE 8356 * code point, IP needs to report that up to transport. 8357 */ 8358 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8359 if (ecn_info == IPH_ECN_CE) 8360 ipf->ipf_ecn = IPH_ECN_CE; 8361 } else { 8362 ipf->ipf_ecn = IPH_ECN_NECT; 8363 } 8364 8365 if (offset && ipf->ipf_end == offset) { 8366 /* The new fragment fits at the end */ 8367 ipf->ipf_tail_mp->b_cont = mp; 8368 /* Update the byte count */ 8369 ipf->ipf_count += msg_len; 8370 /* Update per ipfb and ill byte counts */ 8371 ipfb->ipfb_count += msg_len; 8372 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8373 ill->ill_frag_count += msg_len; 8374 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8375 if (more_frags) { 8376 /* More to come. */ 8377 ipf->ipf_end = end; 8378 ipf->ipf_tail_mp = tail_mp; 8379 goto partial_reass_done; 8380 } 8381 } else { 8382 /* 8383 * Go do the hard cases. 8384 * Call ip_reassemble(). 8385 */ 8386 int ret; 8387 8388 if (offset == 0) { 8389 if (ipf->ipf_prev_nexthdr_offset == 0) { 8390 ipf->ipf_nf_hdr_len = hdr_length; 8391 ipf->ipf_prev_nexthdr_offset = 8392 *prev_nexthdr_offset; 8393 } 8394 } 8395 /* Save current byte count */ 8396 count = ipf->ipf_count; 8397 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8398 8399 /* Count of bytes added and subtracted (freeb()ed) */ 8400 count = ipf->ipf_count - count; 8401 if (count) { 8402 /* Update per ipfb and ill byte counts */ 8403 ipfb->ipfb_count += count; 8404 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8405 ill->ill_frag_count += count; 8406 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8407 } 8408 if (ret == IP_REASS_PARTIAL) { 8409 goto partial_reass_done; 8410 } else if (ret == IP_REASS_FAILED) { 8411 /* Reassembly failed. Free up all resources */ 8412 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8413 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8414 IP_REASS_SET_START(t_mp, 0); 8415 IP_REASS_SET_END(t_mp, 0); 8416 } 8417 freemsg(mp); 8418 goto partial_reass_done; 8419 } 8420 8421 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8422 } 8423 /* 8424 * We have completed reassembly. Unhook the frag header from 8425 * the reassembly list. 8426 * 8427 * Grab the unfragmentable header length next header value out 8428 * of the first fragment 8429 */ 8430 ASSERT(ipf->ipf_nf_hdr_len != 0); 8431 hdr_length = ipf->ipf_nf_hdr_len; 8432 8433 /* 8434 * Before we free the frag header, record the ECN info 8435 * to report back to the transport. 8436 */ 8437 ecn_info = ipf->ipf_ecn; 8438 8439 /* 8440 * Store the nextheader field in the header preceding the fragment 8441 * header 8442 */ 8443 nexthdr = ipf->ipf_protocol; 8444 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8445 ipfp = ipf->ipf_ptphn; 8446 mp1 = ipf->ipf_mp; 8447 count = ipf->ipf_count; 8448 ipf = ipf->ipf_hash_next; 8449 if (ipf) 8450 ipf->ipf_ptphn = ipfp; 8451 ipfp[0] = ipf; 8452 ill->ill_frag_count -= count; 8453 ASSERT(ipfb->ipfb_count >= count); 8454 ipfb->ipfb_count -= count; 8455 ipfb->ipfb_frag_pkts--; 8456 mutex_exit(&ipfb->ipfb_lock); 8457 /* Ditch the frag header. */ 8458 mp = mp1->b_cont; 8459 freeb(mp1); 8460 8461 /* 8462 * Make sure the packet is good by doing some sanity 8463 * check. If bad we can silentely drop the packet. 8464 */ 8465 reass_done: 8466 if (hdr_length < sizeof (ip6_frag_t)) { 8467 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8468 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8469 freemsg(mp); 8470 return (NULL); 8471 } 8472 8473 /* 8474 * Remove the fragment header from the initial header by 8475 * splitting the mblk into the non-fragmentable header and 8476 * everthing after the fragment extension header. This has the 8477 * side effect of putting all the headers that need destination 8478 * processing into the b_cont block-- on return this fact is 8479 * used in order to avoid having to look at the extensions 8480 * already processed. 8481 * 8482 * Note that this code assumes that the unfragmentable portion 8483 * of the header is in the first mblk and increments 8484 * the read pointer past it. If this assumption is broken 8485 * this code fails badly. 8486 */ 8487 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8488 mblk_t *nmp; 8489 8490 if (!(nmp = dupb(mp))) { 8491 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8492 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8493 freemsg(mp); 8494 return (NULL); 8495 } 8496 nmp->b_cont = mp->b_cont; 8497 mp->b_cont = nmp; 8498 nmp->b_rptr += hdr_length; 8499 } 8500 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8501 8502 ip6h = (ip6_t *)mp->b_rptr; 8503 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8504 8505 /* Restore original IP length in header. */ 8506 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8507 /* Record the ECN info. */ 8508 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8509 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8510 8511 return (mp); 8512 } 8513 8514 /* 8515 * Walk through the options to see if there is a routing header. 8516 * If present get the destination which is the last address of 8517 * the option. 8518 */ 8519 in6_addr_t 8520 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8521 { 8522 uint8_t nexthdr; 8523 uint8_t *whereptr; 8524 ip6_hbh_t *hbhhdr; 8525 ip6_dest_t *dsthdr; 8526 ip6_rthdr0_t *rthdr; 8527 ip6_frag_t *fraghdr; 8528 int ehdrlen; 8529 int left; 8530 in6_addr_t *ap, rv; 8531 8532 if (is_fragment != NULL) 8533 *is_fragment = B_FALSE; 8534 8535 rv = ip6h->ip6_dst; 8536 8537 nexthdr = ip6h->ip6_nxt; 8538 whereptr = (uint8_t *)&ip6h[1]; 8539 for (;;) { 8540 8541 ASSERT(nexthdr != IPPROTO_RAW); 8542 switch (nexthdr) { 8543 case IPPROTO_HOPOPTS: 8544 hbhhdr = (ip6_hbh_t *)whereptr; 8545 nexthdr = hbhhdr->ip6h_nxt; 8546 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8547 break; 8548 case IPPROTO_DSTOPTS: 8549 dsthdr = (ip6_dest_t *)whereptr; 8550 nexthdr = dsthdr->ip6d_nxt; 8551 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8552 break; 8553 case IPPROTO_ROUTING: 8554 rthdr = (ip6_rthdr0_t *)whereptr; 8555 nexthdr = rthdr->ip6r0_nxt; 8556 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 8557 8558 left = rthdr->ip6r0_segleft; 8559 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 8560 rv = *(ap + left - 1); 8561 /* 8562 * If the caller doesn't care whether the packet 8563 * is a fragment or not, we can stop here since 8564 * we have our destination. 8565 */ 8566 if (is_fragment == NULL) 8567 goto done; 8568 break; 8569 case IPPROTO_FRAGMENT: 8570 fraghdr = (ip6_frag_t *)whereptr; 8571 nexthdr = fraghdr->ip6f_nxt; 8572 ehdrlen = sizeof (ip6_frag_t); 8573 if (is_fragment != NULL) 8574 *is_fragment = B_TRUE; 8575 goto done; 8576 default : 8577 goto done; 8578 } 8579 whereptr += ehdrlen; 8580 } 8581 8582 done: 8583 return (rv); 8584 } 8585 8586 /* 8587 * ip_source_routed_v6: 8588 * This function is called by redirect code in ip_rput_data_v6 to 8589 * know whether this packet is source routed through this node i.e 8590 * whether this node (router) is part of the journey. This 8591 * function is called under two cases : 8592 * 8593 * case 1 : Routing header was processed by this node and 8594 * ip_process_rthdr replaced ip6_dst with the next hop 8595 * and we are forwarding the packet to the next hop. 8596 * 8597 * case 2 : Routing header was not processed by this node and we 8598 * are just forwarding the packet. 8599 * 8600 * For case (1) we don't want to send redirects. For case(2) we 8601 * want to send redirects. 8602 */ 8603 static boolean_t 8604 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 8605 { 8606 uint8_t nexthdr; 8607 in6_addr_t *addrptr; 8608 ip6_rthdr0_t *rthdr; 8609 uint8_t numaddr; 8610 ip6_hbh_t *hbhhdr; 8611 uint_t ehdrlen; 8612 uint8_t *byteptr; 8613 8614 ip2dbg(("ip_source_routed_v6\n")); 8615 nexthdr = ip6h->ip6_nxt; 8616 ehdrlen = IPV6_HDR_LEN; 8617 8618 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 8619 while (nexthdr == IPPROTO_HOPOPTS || 8620 nexthdr == IPPROTO_DSTOPTS) { 8621 byteptr = (uint8_t *)ip6h + ehdrlen; 8622 /* 8623 * Check if we have already processed 8624 * packets or we are just a forwarding 8625 * router which only pulled up msgs up 8626 * to IPV6HDR and one HBH ext header 8627 */ 8628 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8629 ip2dbg(("ip_source_routed_v6: Extension" 8630 " headers not processed\n")); 8631 return (B_FALSE); 8632 } 8633 hbhhdr = (ip6_hbh_t *)byteptr; 8634 nexthdr = hbhhdr->ip6h_nxt; 8635 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 8636 } 8637 switch (nexthdr) { 8638 case IPPROTO_ROUTING: 8639 byteptr = (uint8_t *)ip6h + ehdrlen; 8640 /* 8641 * If for some reason, we haven't pulled up 8642 * the routing hdr data mblk, then we must 8643 * not have processed it at all. So for sure 8644 * we are not part of the source routed journey. 8645 */ 8646 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8647 ip2dbg(("ip_source_routed_v6: Routing" 8648 " header not processed\n")); 8649 return (B_FALSE); 8650 } 8651 rthdr = (ip6_rthdr0_t *)byteptr; 8652 /* 8653 * Either we are an intermediate router or the 8654 * last hop before destination and we have 8655 * already processed the routing header. 8656 * If segment_left is greater than or equal to zero, 8657 * then we must be the (numaddr - segleft) entry 8658 * of the routing header. Although ip6r0_segleft 8659 * is a unit8_t variable, we still check for zero 8660 * or greater value, if in case the data type 8661 * is changed someday in future. 8662 */ 8663 if (rthdr->ip6r0_segleft > 0 || 8664 rthdr->ip6r0_segleft == 0) { 8665 ire_t *ire = NULL; 8666 8667 numaddr = rthdr->ip6r0_len / 2; 8668 addrptr = (in6_addr_t *)((char *)rthdr + 8669 sizeof (*rthdr)); 8670 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 8671 if (addrptr != NULL) { 8672 ire = ire_ctable_lookup_v6(addrptr, NULL, 8673 IRE_LOCAL, NULL, ALL_ZONES, MATCH_IRE_TYPE); 8674 if (ire != NULL) { 8675 ire_refrele(ire); 8676 return (B_TRUE); 8677 } 8678 ip1dbg(("ip_source_routed_v6: No ire found\n")); 8679 } 8680 } 8681 /* FALLTHRU */ 8682 default: 8683 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 8684 return (B_FALSE); 8685 } 8686 } 8687 8688 /* 8689 * ip_wput_v6 -- Packets sent down from transport modules show up here. 8690 * Assumes that the following set of headers appear in the first 8691 * mblk: 8692 * ip6i_t (if present) CAN also appear as a separate mblk. 8693 * ip6_t 8694 * Any extension headers 8695 * TCP/UDP/SCTP header (if present) 8696 * The routine can handle an ICMPv6 header that is not in the first mblk. 8697 * 8698 * The order to determine the outgoing interface is as follows: 8699 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 8700 * 2. If conn_nofailover_ill is set then use that ill. 8701 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 8702 * 4. If q is an ill queue and (link local or multicast destination) then 8703 * use that ill. 8704 * 5. If IPV6_BOUND_IF has been set use that ill. 8705 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 8706 * look for the best IRE match for the unspecified group to determine 8707 * the ill. 8708 * 7. For unicast: Just do an IRE lookup for the best match. 8709 */ 8710 void 8711 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 8712 { 8713 conn_t *connp = NULL; 8714 queue_t *q = (queue_t *)arg2; 8715 ire_t *ire = NULL; 8716 ire_t *sctp_ire = NULL; 8717 ip6_t *ip6h; 8718 in6_addr_t *v6dstp; 8719 ill_t *ill = NULL; 8720 ipif_t *ipif; 8721 ip6i_t *ip6i; 8722 int cksum_request; /* -1 => normal. */ 8723 /* 1 => Skip TCP/UDP/SCTP checksum */ 8724 /* Otherwise contains insert offset for checksum */ 8725 int unspec_src; 8726 boolean_t do_outrequests; /* Increment OutRequests? */ 8727 mib2_ipv6IfStatsEntry_t *mibptr; 8728 int match_flags = MATCH_IRE_ILL_GROUP; 8729 boolean_t attach_if = B_FALSE; 8730 mblk_t *first_mp; 8731 boolean_t mctl_present; 8732 ipsec_out_t *io; 8733 boolean_t drop_if_delayed = B_FALSE; 8734 boolean_t multirt_need_resolve = B_FALSE; 8735 mblk_t *copy_mp = NULL; 8736 int err; 8737 int ip6i_flags = 0; 8738 zoneid_t zoneid; 8739 ill_t *saved_ill = NULL; 8740 boolean_t conn_lock_held; 8741 boolean_t need_decref = B_FALSE; 8742 8743 /* 8744 * Highest bit in version field is Reachability Confirmation bit 8745 * used by NUD in ip_xmit_v6(). 8746 */ 8747 #ifdef _BIG_ENDIAN 8748 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 8749 #else 8750 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 8751 #endif 8752 8753 /* 8754 * M_CTL comes from 5 places 8755 * 8756 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 8757 * both V4 and V6 datagrams. 8758 * 8759 * 2) AH/ESP sends down M_CTL after doing their job with both 8760 * V4 and V6 datagrams. 8761 * 8762 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 8763 * attached. 8764 * 8765 * 4) Notifications from an external resolver (for XRESOLV ifs) 8766 * 8767 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 8768 * IPsec hardware acceleration support. 8769 * 8770 * We need to handle (1)'s IPv6 case and (3) here. For the 8771 * IPv4 case in (1), and (2), IPSEC processing has already 8772 * started. The code in ip_wput() already knows how to handle 8773 * continuing IPSEC processing (for IPv4 and IPv6). All other 8774 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 8775 * for handling. 8776 */ 8777 first_mp = mp; 8778 mctl_present = B_FALSE; 8779 io = NULL; 8780 8781 /* Multidata transmit? */ 8782 if (DB_TYPE(mp) == M_MULTIDATA) { 8783 /* 8784 * We should never get here, since all Multidata messages 8785 * originating from tcp should have been directed over to 8786 * tcp_multisend() in the first place. 8787 */ 8788 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 8789 freemsg(mp); 8790 return; 8791 } else if (DB_TYPE(mp) == M_CTL) { 8792 uint32_t mctltype = 0; 8793 uint32_t mlen = MBLKL(first_mp); 8794 8795 mp = mp->b_cont; 8796 mctl_present = B_TRUE; 8797 io = (ipsec_out_t *)first_mp->b_rptr; 8798 8799 /* 8800 * Validate this M_CTL message. The only three types of 8801 * M_CTL messages we expect to see in this code path are 8802 * ipsec_out_t or ipsec_in_t structures (allocated as 8803 * ipsec_info_t unions), or ipsec_ctl_t structures. 8804 * The ipsec_out_type and ipsec_in_type overlap in the two 8805 * data structures, and they are either set to IPSEC_OUT 8806 * or IPSEC_IN depending on which data structure it is. 8807 * ipsec_ctl_t is an IPSEC_CTL. 8808 * 8809 * All other M_CTL messages are sent to ip_wput_nondata() 8810 * for handling. 8811 */ 8812 if (mlen >= sizeof (io->ipsec_out_type)) 8813 mctltype = io->ipsec_out_type; 8814 8815 if ((mlen == sizeof (ipsec_ctl_t)) && 8816 (mctltype == IPSEC_CTL)) { 8817 ip_output(Q_TO_CONN(q), first_mp, q, caller); 8818 return; 8819 } 8820 8821 if ((mlen < sizeof (ipsec_info_t)) || 8822 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 8823 mp == NULL) { 8824 ip_wput_nondata(NULL, q, first_mp, NULL); 8825 return; 8826 } 8827 /* NDP callbacks have q_next non-NULL. That's case #3. */ 8828 if (q->q_next == NULL) { 8829 ip6h = (ip6_t *)mp->b_rptr; 8830 /* 8831 * For a freshly-generated TCP dgram that needs IPV6 8832 * processing, don't call ip_wput immediately. We can 8833 * tell this by the ipsec_out_proc_begin. In-progress 8834 * IPSEC_OUT messages have proc_begin set to TRUE, 8835 * and we want to send all IPSEC_IN messages to 8836 * ip_wput() for IPsec processing or finishing. 8837 */ 8838 if (mctltype == IPSEC_IN || 8839 IPVER(ip6h) != IPV6_VERSION || 8840 io->ipsec_out_proc_begin) { 8841 mibptr = &ip6_mib; 8842 goto notv6; 8843 } 8844 } 8845 } else if (DB_TYPE(mp) != M_DATA) { 8846 ip_wput_nondata(NULL, q, mp, NULL); 8847 return; 8848 } 8849 8850 ip6h = (ip6_t *)mp->b_rptr; 8851 8852 if (IPVER(ip6h) != IPV6_VERSION) { 8853 mibptr = &ip6_mib; 8854 goto notv6; 8855 } 8856 8857 if (q->q_next != NULL) { 8858 ill = (ill_t *)q->q_ptr; 8859 /* 8860 * We don't know if this ill will be used for IPv6 8861 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 8862 * ipif_set_values() sets the ill_isv6 flag to true if 8863 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 8864 * just drop the packet. 8865 */ 8866 if (!ill->ill_isv6) { 8867 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 8868 "ILLF_IPV6 was set\n")); 8869 freemsg(first_mp); 8870 return; 8871 } 8872 /* For uniformity do a refhold */ 8873 mutex_enter(&ill->ill_lock); 8874 if (!ILL_CAN_LOOKUP(ill)) { 8875 mutex_exit(&ill->ill_lock); 8876 freemsg(first_mp); 8877 return; 8878 } 8879 ill_refhold_locked(ill); 8880 mutex_exit(&ill->ill_lock); 8881 mibptr = ill->ill_ip6_mib; 8882 /* 8883 * ill_ip6_mib is allocated by ipif_set_values() when 8884 * ill_isv6 is set. Thus if ill_isv6 is true, 8885 * ill_ip6_mib had better not be NULL. 8886 */ 8887 ASSERT(mibptr != NULL); 8888 unspec_src = 0; 8889 BUMP_MIB(mibptr, ipv6OutRequests); 8890 do_outrequests = B_FALSE; 8891 } else { 8892 connp = (conn_t *)arg; 8893 ASSERT(connp != NULL); 8894 8895 /* is queue flow controlled? */ 8896 if ((q->q_first || connp->conn_draining) && 8897 (caller == IP_WPUT)) { 8898 /* 8899 * 1) TCP sends down M_CTL for detached connections. 8900 * 2) AH/ESP sends down M_CTL. 8901 * 8902 * We don't flow control either of the above. Only 8903 * UDP and others are flow controlled for which we 8904 * can't have a M_CTL. 8905 */ 8906 ASSERT(first_mp == mp); 8907 (void) putq(q, mp); 8908 return; 8909 } 8910 mibptr = &ip6_mib; 8911 unspec_src = connp->conn_unspec_src; 8912 do_outrequests = B_TRUE; 8913 if (mp->b_flag & MSGHASREF) { 8914 mp->b_flag &= ~MSGHASREF; 8915 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 8916 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 8917 need_decref = B_TRUE; 8918 } 8919 8920 /* 8921 * If there is a policy, try to attach an ipsec_out in 8922 * the front. At the end, first_mp either points to a 8923 * M_DATA message or IPSEC_OUT message linked to a 8924 * M_DATA message. We have to do it now as we might 8925 * lose the "conn" if we go through ip_newroute. 8926 */ 8927 if (!mctl_present && 8928 (connp->conn_out_enforce_policy || 8929 connp->conn_latch != NULL)) { 8930 ASSERT(first_mp == mp); 8931 /* XXX Any better way to get the protocol fast ? */ 8932 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 8933 connp->conn_ulp)) == NULL)) { 8934 if (need_decref) 8935 CONN_DEC_REF(connp); 8936 return; 8937 } else { 8938 ASSERT(mp->b_datap->db_type == M_CTL); 8939 first_mp = mp; 8940 mp = mp->b_cont; 8941 mctl_present = B_TRUE; 8942 io = (ipsec_out_t *)first_mp->b_rptr; 8943 } 8944 } 8945 } 8946 8947 /* check for alignment and full IPv6 header */ 8948 if (!OK_32PTR((uchar_t *)ip6h) || 8949 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 8950 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 8951 if (do_outrequests) 8952 BUMP_MIB(mibptr, ipv6OutRequests); 8953 BUMP_MIB(mibptr, ipv6OutDiscards); 8954 freemsg(first_mp); 8955 if (ill != NULL) 8956 ill_refrele(ill); 8957 if (need_decref) 8958 CONN_DEC_REF(connp); 8959 return; 8960 } 8961 v6dstp = &ip6h->ip6_dst; 8962 cksum_request = -1; 8963 ip6i = NULL; 8964 8965 /* 8966 * Once neighbor discovery has completed, ndp_process() will provide 8967 * locally generated packets for which processing can be reattempted. 8968 * In these cases, connp is NULL and the original zone is part of a 8969 * prepended ipsec_out_t. 8970 */ 8971 if (io != NULL) { 8972 zoneid = io->ipsec_out_zoneid; 8973 ASSERT(zoneid != ALL_ZONES); 8974 } else { 8975 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 8976 } 8977 8978 if (ip6h->ip6_nxt == IPPROTO_RAW) { 8979 /* 8980 * This is an ip6i_t header followed by an ip6_hdr. 8981 * Check which fields are set. 8982 * 8983 * When the packet comes from a transport we should have 8984 * all needed headers in the first mblk. However, when 8985 * going through ip_newroute*_v6 the ip6i might be in 8986 * a separate mblk when we return here. In that case 8987 * we pullup everything to ensure that extension and transport 8988 * headers "stay" in the first mblk. 8989 */ 8990 ip6i = (ip6i_t *)ip6h; 8991 ip6i_flags = ip6i->ip6i_flags; 8992 8993 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 8994 ((mp->b_wptr - (uchar_t *)ip6i) >= 8995 sizeof (ip6i_t) + IPV6_HDR_LEN)); 8996 8997 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 8998 if (!pullupmsg(mp, -1)) { 8999 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9000 if (do_outrequests) 9001 BUMP_MIB(mibptr, ipv6OutRequests); 9002 BUMP_MIB(mibptr, ipv6OutDiscards); 9003 freemsg(first_mp); 9004 if (ill != NULL) 9005 ill_refrele(ill); 9006 if (need_decref) 9007 CONN_DEC_REF(connp); 9008 return; 9009 } 9010 ip6h = (ip6_t *)mp->b_rptr; 9011 v6dstp = &ip6h->ip6_dst; 9012 ip6i = (ip6i_t *)ip6h; 9013 } 9014 ip6h = (ip6_t *)&ip6i[1]; 9015 9016 /* 9017 * Advance rptr past the ip6i_t to get ready for 9018 * transmitting the packet. However, if the packet gets 9019 * passed to ip_newroute*_v6 then rptr is moved back so 9020 * that the ip6i_t header can be inspected when the 9021 * packet comes back here after passing through 9022 * ire_add_then_send. 9023 */ 9024 mp->b_rptr = (uchar_t *)ip6h; 9025 9026 /* 9027 * IP6I_ATTACH_IF is set in this function when we had a 9028 * conn and it was either bound to the IPFF_NOFAILOVER address 9029 * or IPV6_BOUND_PIF was set. These options override other 9030 * options that set the ifindex. We come here with 9031 * IP6I_ATTACH_IF set when we can't find the ire and 9032 * ip_newroute_v6 is feeding the packet for second time. 9033 */ 9034 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9035 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9036 ASSERT(ip6i->ip6i_ifindex != 0); 9037 if (ill != NULL) 9038 ill_refrele(ill); 9039 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9040 NULL, NULL, NULL, NULL); 9041 if (ill == NULL) { 9042 if (do_outrequests) 9043 BUMP_MIB(mibptr, ipv6OutRequests); 9044 BUMP_MIB(mibptr, ipv6OutDiscards); 9045 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9046 ip6i->ip6i_ifindex)); 9047 if (need_decref) 9048 CONN_DEC_REF(connp); 9049 freemsg(first_mp); 9050 return; 9051 } 9052 mibptr = ill->ill_ip6_mib; 9053 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9054 /* 9055 * Preserve the index so that when we return 9056 * from IPSEC processing, we know where to 9057 * send the packet. 9058 */ 9059 if (mctl_present) { 9060 ASSERT(io != NULL); 9061 io->ipsec_out_ill_index = 9062 ip6i->ip6i_ifindex; 9063 } 9064 } 9065 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9066 /* 9067 * This is a multipathing probe packet that has 9068 * been delayed in ND resolution. Drop the 9069 * packet for the reasons mentioned in 9070 * nce_queue_mp() 9071 */ 9072 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9073 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9074 freemsg(first_mp); 9075 ill_refrele(ill); 9076 if (need_decref) 9077 CONN_DEC_REF(connp); 9078 return; 9079 } 9080 } 9081 } 9082 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9083 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9084 9085 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9086 if (secpolicy_net_rawaccess(cr) != 0) { 9087 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9088 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9089 NULL, zoneid, 9090 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9091 if (ire == NULL) { 9092 if (do_outrequests) 9093 BUMP_MIB(mibptr, 9094 ipv6OutRequests); 9095 BUMP_MIB(mibptr, ipv6OutDiscards); 9096 ip1dbg(("ip_wput_v6: bad source " 9097 "addr\n")); 9098 freemsg(first_mp); 9099 if (ill != NULL) 9100 ill_refrele(ill); 9101 if (need_decref) 9102 CONN_DEC_REF(connp); 9103 return; 9104 } 9105 ire_refrele(ire); 9106 } 9107 /* No need to verify again when using ip_newroute */ 9108 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9109 } 9110 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9111 /* 9112 * Make sure they match since ip_newroute*_v6 etc might 9113 * (unknown to them) inspect ip6i_nexthop when 9114 * they think they access ip6_dst. 9115 */ 9116 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9117 } 9118 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9119 cksum_request = 1; 9120 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9121 cksum_request = ip6i->ip6i_checksum_off; 9122 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9123 unspec_src = 1; 9124 9125 if (do_outrequests && ill != NULL) { 9126 BUMP_MIB(mibptr, ipv6OutRequests); 9127 do_outrequests = B_FALSE; 9128 } 9129 /* 9130 * Store ip6i_t info that we need after we come back 9131 * from IPSEC processing. 9132 */ 9133 if (mctl_present) { 9134 ASSERT(io != NULL); 9135 io->ipsec_out_unspec_src = unspec_src; 9136 } 9137 } 9138 if (connp != NULL && connp->conn_dontroute) 9139 ip6h->ip6_hops = 1; 9140 9141 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9142 goto ipv6multicast; 9143 9144 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9145 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9146 ill_t *conn_outgoing_pill; 9147 9148 conn_outgoing_pill = conn_get_held_ill(connp, 9149 &connp->conn_outgoing_pill, &err); 9150 if (err == ILL_LOOKUP_FAILED) { 9151 if (ill != NULL) 9152 ill_refrele(ill); 9153 if (need_decref) 9154 CONN_DEC_REF(connp); 9155 freemsg(first_mp); 9156 return; 9157 } 9158 if (conn_outgoing_pill != NULL) { 9159 if (ill != NULL) 9160 ill_refrele(ill); 9161 ill = conn_outgoing_pill; 9162 attach_if = B_TRUE; 9163 match_flags = MATCH_IRE_ILL; 9164 mibptr = ill->ill_ip6_mib; 9165 9166 /* 9167 * Check if we need an ire that will not be 9168 * looked up by anybody else i.e. HIDDEN. 9169 */ 9170 if (ill_is_probeonly(ill)) 9171 match_flags |= MATCH_IRE_MARK_HIDDEN; 9172 goto send_from_ill; 9173 } 9174 } 9175 9176 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9177 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9178 ill_t *conn_nofailover_ill; 9179 9180 conn_nofailover_ill = conn_get_held_ill(connp, 9181 &connp->conn_nofailover_ill, &err); 9182 if (err == ILL_LOOKUP_FAILED) { 9183 if (ill != NULL) 9184 ill_refrele(ill); 9185 if (need_decref) 9186 CONN_DEC_REF(connp); 9187 freemsg(first_mp); 9188 return; 9189 } 9190 if (conn_nofailover_ill != NULL) { 9191 if (ill != NULL) 9192 ill_refrele(ill); 9193 ill = conn_nofailover_ill; 9194 attach_if = B_TRUE; 9195 /* 9196 * Assumes that ipc_nofailover_ill is used only for 9197 * multipathing probe packets. These packets are better 9198 * dropped, if they are delayed in ND resolution, for 9199 * the reasons described in nce_queue_mp(). 9200 * IP6I_DROP_IFDELAYED will be set later on in this 9201 * function for this packet. 9202 */ 9203 drop_if_delayed = B_TRUE; 9204 match_flags = MATCH_IRE_ILL; 9205 mibptr = ill->ill_ip6_mib; 9206 9207 /* 9208 * Check if we need an ire that will not be 9209 * looked up by anybody else i.e. HIDDEN. 9210 */ 9211 if (ill_is_probeonly(ill)) 9212 match_flags |= MATCH_IRE_MARK_HIDDEN; 9213 goto send_from_ill; 9214 } 9215 } 9216 9217 /* 9218 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9219 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9220 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9221 */ 9222 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9223 ASSERT(ip6i->ip6i_ifindex != 0); 9224 attach_if = B_TRUE; 9225 ASSERT(ill != NULL); 9226 match_flags = MATCH_IRE_ILL; 9227 9228 /* 9229 * Check if we need an ire that will not be 9230 * looked up by anybody else i.e. HIDDEN. 9231 */ 9232 if (ill_is_probeonly(ill)) 9233 match_flags |= MATCH_IRE_MARK_HIDDEN; 9234 goto send_from_ill; 9235 } 9236 9237 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9238 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9239 ASSERT(ill != NULL); 9240 goto send_from_ill; 9241 } 9242 9243 /* 9244 * 4. If q is an ill queue and (link local or multicast destination) 9245 * then use that ill. 9246 */ 9247 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9248 goto send_from_ill; 9249 } 9250 9251 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9252 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9253 ill_t *conn_outgoing_ill; 9254 9255 conn_outgoing_ill = conn_get_held_ill(connp, 9256 &connp->conn_outgoing_ill, &err); 9257 if (err == ILL_LOOKUP_FAILED) { 9258 if (ill != NULL) 9259 ill_refrele(ill); 9260 if (need_decref) 9261 CONN_DEC_REF(connp); 9262 freemsg(first_mp); 9263 return; 9264 } 9265 if (ill != NULL) 9266 ill_refrele(ill); 9267 ill = conn_outgoing_ill; 9268 mibptr = ill->ill_ip6_mib; 9269 goto send_from_ill; 9270 } 9271 9272 /* 9273 * 6. For unicast: Just do an IRE lookup for the best match. 9274 * If we get here for a link-local address it is rather random 9275 * what interface we pick on a multihomed host. 9276 * *If* there is an IRE_CACHE (and the link-local address 9277 * isn't duplicated on multi links) this will find the IRE_CACHE. 9278 * Otherwise it will use one of the matching IRE_INTERFACE routes 9279 * for the link-local prefix. Hence, applications 9280 * *should* be encouraged to specify an outgoing interface when sending 9281 * to a link local address. 9282 */ 9283 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9284 !connp->conn_fully_bound)) { 9285 /* 9286 * We cache IRE_CACHEs to avoid lookups. We don't do 9287 * this for the tcp global queue and listen end point 9288 * as it does not really have a real destination to 9289 * talk to. 9290 */ 9291 ire = ire_cache_lookup_v6(v6dstp, zoneid); 9292 } else { 9293 /* 9294 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9295 * grab a lock here to check for CONDEMNED as it is okay 9296 * to send a packet or two with the IRE_CACHE that is going 9297 * away. 9298 */ 9299 mutex_enter(&connp->conn_lock); 9300 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9301 if (ire != NULL && 9302 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9303 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9304 9305 IRE_REFHOLD(ire); 9306 mutex_exit(&connp->conn_lock); 9307 9308 } else { 9309 boolean_t cached = B_FALSE; 9310 9311 connp->conn_ire_cache = NULL; 9312 mutex_exit(&connp->conn_lock); 9313 /* Release the old ire */ 9314 if (ire != NULL && sctp_ire == NULL) 9315 IRE_REFRELE_NOTR(ire); 9316 9317 ire = (ire_t *)ire_cache_lookup_v6(v6dstp, zoneid); 9318 if (ire != NULL) { 9319 IRE_REFHOLD_NOTR(ire); 9320 9321 mutex_enter(&connp->conn_lock); 9322 if (!(connp->conn_state_flags & CONN_CLOSING) && 9323 (connp->conn_ire_cache == NULL)) { 9324 rw_enter(&ire->ire_bucket->irb_lock, 9325 RW_READER); 9326 if (!(ire->ire_marks & 9327 IRE_MARK_CONDEMNED)) { 9328 connp->conn_ire_cache = ire; 9329 cached = B_TRUE; 9330 } 9331 rw_exit(&ire->ire_bucket->irb_lock); 9332 } 9333 mutex_exit(&connp->conn_lock); 9334 9335 /* 9336 * We can continue to use the ire but since it 9337 * was not cached, we should drop the extra 9338 * reference. 9339 */ 9340 if (!cached) 9341 IRE_REFRELE_NOTR(ire); 9342 } 9343 } 9344 } 9345 9346 if (ire != NULL) { 9347 if (do_outrequests) { 9348 /* Handle IRE_LOCAL's that might appear here */ 9349 if (ire->ire_type == IRE_CACHE) { 9350 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9351 ill_ip6_mib; 9352 } else { 9353 mibptr = ire->ire_ipif->ipif_ill->ill_ip6_mib; 9354 } 9355 BUMP_MIB(mibptr, ipv6OutRequests); 9356 } 9357 ASSERT(!attach_if); 9358 9359 /* 9360 * Check if the ire has the RTF_MULTIRT flag, inherited 9361 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9362 */ 9363 if (ire->ire_flags & RTF_MULTIRT) { 9364 /* 9365 * Force hop limit of multirouted packets if required. 9366 * The hop limit of such packets is bounded by the 9367 * ip_multirt_ttl ndd variable. 9368 * NDP packets must have a hop limit of 255; don't 9369 * change the hop limit in that case. 9370 */ 9371 if ((ip_multirt_ttl > 0) && 9372 (ip6h->ip6_hops > ip_multirt_ttl) && 9373 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9374 if (ip_debug > 3) { 9375 ip2dbg(("ip_wput_v6: forcing multirt " 9376 "hop limit to %d (was %d) ", 9377 ip_multirt_ttl, ip6h->ip6_hops)); 9378 pr_addr_dbg("v6dst %s\n", AF_INET6, 9379 &ire->ire_addr_v6); 9380 } 9381 ip6h->ip6_hops = ip_multirt_ttl; 9382 } 9383 9384 /* 9385 * We look at this point if there are pending 9386 * unresolved routes. ire_multirt_need_resolve_v6() 9387 * checks in O(n) that all IRE_OFFSUBNET ire 9388 * entries for the packet's destination and 9389 * flagged RTF_MULTIRT are currently resolved. 9390 * If some remain unresolved, we do a copy 9391 * of the current message. It will be used 9392 * to initiate additional route resolutions. 9393 */ 9394 multirt_need_resolve = 9395 ire_multirt_need_resolve_v6(&ire->ire_addr_v6); 9396 ip2dbg(("ip_wput_v6: ire %p, " 9397 "multirt_need_resolve %d, first_mp %p\n", 9398 (void *)ire, multirt_need_resolve, 9399 (void *)first_mp)); 9400 if (multirt_need_resolve) { 9401 copy_mp = copymsg(first_mp); 9402 if (copy_mp != NULL) { 9403 MULTIRT_DEBUG_TAG(copy_mp); 9404 } 9405 } 9406 } 9407 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9408 connp, caller, 0, ip6i_flags); 9409 if (need_decref) { 9410 CONN_DEC_REF(connp); 9411 connp = NULL; 9412 } 9413 IRE_REFRELE(ire); 9414 9415 /* 9416 * Try to resolve another multiroute if 9417 * ire_multirt_need_resolve_v6() deemed it necessary. 9418 * copy_mp will be consumed (sent or freed) by 9419 * ip_newroute_v6(). 9420 */ 9421 if (copy_mp != NULL) { 9422 if (mctl_present) { 9423 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9424 } else { 9425 ip6h = (ip6_t *)copy_mp->b_rptr; 9426 } 9427 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9428 &ip6h->ip6_src, NULL, zoneid); 9429 } 9430 if (ill != NULL) 9431 ill_refrele(ill); 9432 return; 9433 } 9434 9435 /* 9436 * No full IRE for this destination. Send it to 9437 * ip_newroute_v6 to see if anything else matches. 9438 * Mark this packet as having originated on this 9439 * machine. 9440 * Update rptr if there was an ip6i_t header. 9441 */ 9442 mp->b_prev = NULL; 9443 mp->b_next = NULL; 9444 if (ip6i != NULL) 9445 mp->b_rptr -= sizeof (ip6i_t); 9446 9447 if (unspec_src) { 9448 if (ip6i == NULL) { 9449 /* 9450 * Add ip6i_t header to carry unspec_src 9451 * until the packet comes back in ip_wput_v6. 9452 */ 9453 mp = ip_add_info_v6(mp, NULL, v6dstp); 9454 if (mp == NULL) { 9455 if (do_outrequests) 9456 BUMP_MIB(mibptr, ipv6OutRequests); 9457 BUMP_MIB(mibptr, ipv6OutDiscards); 9458 if (mctl_present) 9459 freeb(first_mp); 9460 if (ill != NULL) 9461 ill_refrele(ill); 9462 if (need_decref) 9463 CONN_DEC_REF(connp); 9464 return; 9465 } 9466 ip6i = (ip6i_t *)mp->b_rptr; 9467 9468 if (mctl_present) { 9469 ASSERT(first_mp != mp); 9470 first_mp->b_cont = mp; 9471 } else { 9472 first_mp = mp; 9473 } 9474 9475 if ((mp->b_wptr - (uchar_t *)ip6i) == 9476 sizeof (ip6i_t)) { 9477 /* 9478 * ndp_resolver called from ip_newroute_v6 9479 * expects pulled up message. 9480 */ 9481 if (!pullupmsg(mp, -1)) { 9482 ip1dbg(("ip_wput_v6: pullupmsg" 9483 " failed\n")); 9484 if (do_outrequests) { 9485 BUMP_MIB(mibptr, 9486 ipv6OutRequests); 9487 } 9488 BUMP_MIB(mibptr, ipv6OutDiscards); 9489 freemsg(first_mp); 9490 if (ill != NULL) 9491 ill_refrele(ill); 9492 if (need_decref) 9493 CONN_DEC_REF(connp); 9494 return; 9495 } 9496 ip6i = (ip6i_t *)mp->b_rptr; 9497 } 9498 ip6h = (ip6_t *)&ip6i[1]; 9499 v6dstp = &ip6h->ip6_dst; 9500 } 9501 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9502 if (mctl_present) { 9503 ASSERT(io != NULL); 9504 io->ipsec_out_unspec_src = unspec_src; 9505 } 9506 } 9507 if (do_outrequests) 9508 BUMP_MIB(mibptr, ipv6OutRequests); 9509 if (need_decref) 9510 CONN_DEC_REF(connp); 9511 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 9512 if (ill != NULL) 9513 ill_refrele(ill); 9514 return; 9515 9516 9517 /* 9518 * Handle multicast packets with or without an conn. 9519 * Assumes that the transports set ip6_hops taking 9520 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9521 * into account. 9522 */ 9523 ipv6multicast: 9524 ip2dbg(("ip_wput_v6: multicast\n")); 9525 9526 /* 9527 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 9528 * 2. If conn_nofailover_ill is set then use that ill. 9529 * 9530 * Hold the conn_lock till we refhold the ill of interest that is 9531 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9532 * while holding any locks, postpone the refrele until after the 9533 * conn_lock is dropped. 9534 */ 9535 if (connp != NULL) { 9536 mutex_enter(&connp->conn_lock); 9537 conn_lock_held = B_TRUE; 9538 } else { 9539 conn_lock_held = B_FALSE; 9540 } 9541 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9542 err = ill_check_and_refhold(connp->conn_outgoing_pill); 9543 if (err == ILL_LOOKUP_FAILED) { 9544 ip1dbg(("ip_output_v6: multicast" 9545 " conn_outgoing_pill no ipif\n")); 9546 multicast_discard: 9547 ASSERT(saved_ill == NULL); 9548 if (conn_lock_held) 9549 mutex_exit(&connp->conn_lock); 9550 if (ill != NULL) 9551 ill_refrele(ill); 9552 freemsg(first_mp); 9553 if (do_outrequests) 9554 BUMP_MIB(mibptr, ipv6OutDiscards); 9555 if (need_decref) 9556 CONN_DEC_REF(connp); 9557 return; 9558 } 9559 saved_ill = ill; 9560 ill = connp->conn_outgoing_pill; 9561 attach_if = B_TRUE; 9562 match_flags = MATCH_IRE_ILL; 9563 mibptr = ill->ill_ip6_mib; 9564 9565 /* 9566 * Check if we need an ire that will not be 9567 * looked up by anybody else i.e. HIDDEN. 9568 */ 9569 if (ill_is_probeonly(ill)) 9570 match_flags |= MATCH_IRE_MARK_HIDDEN; 9571 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9572 err = ill_check_and_refhold(connp->conn_nofailover_ill); 9573 if (err == ILL_LOOKUP_FAILED) { 9574 ip1dbg(("ip_output_v6: multicast" 9575 " conn_nofailover_ill no ipif\n")); 9576 goto multicast_discard; 9577 } 9578 saved_ill = ill; 9579 ill = connp->conn_nofailover_ill; 9580 attach_if = B_TRUE; 9581 match_flags = MATCH_IRE_ILL; 9582 9583 /* 9584 * Check if we need an ire that will not be 9585 * looked up by anybody else i.e. HIDDEN. 9586 */ 9587 if (ill_is_probeonly(ill)) 9588 match_flags |= MATCH_IRE_MARK_HIDDEN; 9589 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9590 /* 9591 * Redo 1. If we did not find an IRE_CACHE the first time, 9592 * we should have an ip6i_t with IP6I_ATTACH_IF if 9593 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 9594 * used on this endpoint. 9595 */ 9596 ASSERT(ip6i->ip6i_ifindex != 0); 9597 attach_if = B_TRUE; 9598 ASSERT(ill != NULL); 9599 match_flags = MATCH_IRE_ILL; 9600 9601 /* 9602 * Check if we need an ire that will not be 9603 * looked up by anybody else i.e. HIDDEN. 9604 */ 9605 if (ill_is_probeonly(ill)) 9606 match_flags |= MATCH_IRE_MARK_HIDDEN; 9607 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9608 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9609 9610 ASSERT(ill != NULL); 9611 } else if (ill != NULL) { 9612 /* 9613 * 4. If q is an ill queue and (link local or multicast 9614 * destination) then use that ill. 9615 * We don't need the ipif initialization here. 9616 * This useless assert below is just to prevent lint from 9617 * reporting a null body if statement. 9618 */ 9619 ASSERT(ill != NULL); 9620 } else if (connp != NULL) { 9621 /* 9622 * 5. If IPV6_BOUND_IF has been set use that ill. 9623 * 9624 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 9625 * Otherwise look for the best IRE match for the unspecified 9626 * group to determine the ill. 9627 * 9628 * conn_multicast_ill is used for only IPv6 packets. 9629 * conn_multicast_ipif is used for only IPv4 packets. 9630 * Thus a PF_INET6 socket send both IPv4 and IPv6 9631 * multicast packets using different IP*_MULTICAST_IF 9632 * interfaces. 9633 */ 9634 if (connp->conn_outgoing_ill != NULL) { 9635 err = ill_check_and_refhold(connp->conn_outgoing_ill); 9636 if (err == ILL_LOOKUP_FAILED) { 9637 ip1dbg(("ip_output_v6: multicast" 9638 " conn_outgoing_ill no ipif\n")); 9639 goto multicast_discard; 9640 } 9641 ill = connp->conn_outgoing_ill; 9642 } else if (connp->conn_multicast_ill != NULL) { 9643 err = ill_check_and_refhold(connp->conn_multicast_ill); 9644 if (err == ILL_LOOKUP_FAILED) { 9645 ip1dbg(("ip_output_v6: multicast" 9646 " conn_multicast_ill no ipif\n")); 9647 goto multicast_discard; 9648 } 9649 ill = connp->conn_multicast_ill; 9650 } else { 9651 mutex_exit(&connp->conn_lock); 9652 conn_lock_held = B_FALSE; 9653 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 9654 if (ipif == NULL) { 9655 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9656 goto multicast_discard; 9657 } 9658 /* 9659 * We have a ref to this ipif, so we can safely 9660 * access ipif_ill. 9661 */ 9662 ill = ipif->ipif_ill; 9663 mutex_enter(&ill->ill_lock); 9664 if (!ILL_CAN_LOOKUP(ill)) { 9665 mutex_exit(&ill->ill_lock); 9666 ipif_refrele(ipif); 9667 ill = NULL; 9668 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9669 goto multicast_discard; 9670 } 9671 ill_refhold_locked(ill); 9672 mutex_exit(&ill->ill_lock); 9673 ipif_refrele(ipif); 9674 /* 9675 * Save binding until IPV6_MULTICAST_IF 9676 * changes it 9677 */ 9678 mutex_enter(&connp->conn_lock); 9679 connp->conn_multicast_ill = ill; 9680 connp->conn_orig_multicast_ifindex = 9681 ill->ill_phyint->phyint_ifindex; 9682 mutex_exit(&connp->conn_lock); 9683 } 9684 } 9685 if (conn_lock_held) 9686 mutex_exit(&connp->conn_lock); 9687 9688 if (saved_ill != NULL) 9689 ill_refrele(saved_ill); 9690 9691 ASSERT(ill != NULL); 9692 /* 9693 * For multicast loopback interfaces replace the multicast address 9694 * with a unicast address for the ire lookup. 9695 */ 9696 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 9697 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 9698 9699 mibptr = ill->ill_ip6_mib; 9700 if (do_outrequests) { 9701 BUMP_MIB(mibptr, ipv6OutRequests); 9702 do_outrequests = B_FALSE; 9703 } 9704 BUMP_MIB(mibptr, ipv6OutMcastPkts); 9705 9706 /* 9707 * As we may lose the conn by the time we reach ip_wput_ire_v6 9708 * we copy conn_multicast_loop and conn_dontroute on to an 9709 * ipsec_out. In case if this datagram goes out secure, 9710 * we need the ill_index also. Copy that also into the 9711 * ipsec_out. 9712 */ 9713 if (mctl_present) { 9714 io = (ipsec_out_t *)first_mp->b_rptr; 9715 ASSERT(first_mp->b_datap->db_type == M_CTL); 9716 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9717 } else { 9718 ASSERT(mp == first_mp); 9719 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 9720 BUMP_MIB(mibptr, ipv6OutDiscards); 9721 freemsg(mp); 9722 if (ill != NULL) 9723 ill_refrele(ill); 9724 if (need_decref) 9725 CONN_DEC_REF(connp); 9726 return; 9727 } 9728 io = (ipsec_out_t *)first_mp->b_rptr; 9729 /* This is not a secure packet */ 9730 io->ipsec_out_secure = B_FALSE; 9731 io->ipsec_out_use_global_policy = B_TRUE; 9732 io->ipsec_out_zoneid = 9733 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 9734 first_mp->b_cont = mp; 9735 mctl_present = B_TRUE; 9736 } 9737 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9738 io->ipsec_out_unspec_src = unspec_src; 9739 if (connp != NULL) 9740 io->ipsec_out_dontroute = connp->conn_dontroute; 9741 9742 send_from_ill: 9743 ASSERT(ill != NULL); 9744 ASSERT(mibptr == ill->ill_ip6_mib); 9745 if (do_outrequests) { 9746 BUMP_MIB(mibptr, ipv6OutRequests); 9747 do_outrequests = B_FALSE; 9748 } 9749 9750 if (io != NULL) 9751 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9752 9753 /* 9754 * When a specific ill is specified (using IPV6_PKTINFO, 9755 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 9756 * on routing entries (ftable and ctable) that have a matching 9757 * ire->ire_ipif->ipif_ill. Thus this can only be used 9758 * for destinations that are on-link for the specific ill 9759 * and that can appear on multiple links. Thus it is useful 9760 * for multicast destinations, link-local destinations, and 9761 * at some point perhaps for site-local destinations (if the 9762 * node sits at a site boundary). 9763 * We create the cache entries in the regular ctable since 9764 * it can not "confuse" things for other destinations. 9765 * table. 9766 * 9767 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 9768 * It is used only when ire_cache_lookup is used above. 9769 */ 9770 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 9771 zoneid, match_flags); 9772 if (ire != NULL) { 9773 /* 9774 * Check if the ire has the RTF_MULTIRT flag, inherited 9775 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9776 */ 9777 if (ire->ire_flags & RTF_MULTIRT) { 9778 /* 9779 * Force hop limit of multirouted packets if required. 9780 * The hop limit of such packets is bounded by the 9781 * ip_multirt_ttl ndd variable. 9782 * NDP packets must have a hop limit of 255; don't 9783 * change the hop limit in that case. 9784 */ 9785 if ((ip_multirt_ttl > 0) && 9786 (ip6h->ip6_hops > ip_multirt_ttl) && 9787 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9788 if (ip_debug > 3) { 9789 ip2dbg(("ip_wput_v6: forcing multirt " 9790 "hop limit to %d (was %d) ", 9791 ip_multirt_ttl, ip6h->ip6_hops)); 9792 pr_addr_dbg("v6dst %s\n", AF_INET6, 9793 &ire->ire_addr_v6); 9794 } 9795 ip6h->ip6_hops = ip_multirt_ttl; 9796 } 9797 9798 /* 9799 * We look at this point if there are pending 9800 * unresolved routes. ire_multirt_need_resolve_v6() 9801 * checks in O(n) that all IRE_OFFSUBNET ire 9802 * entries for the packet's destination and 9803 * flagged RTF_MULTIRT are currently resolved. 9804 * If some remain unresolved, we make a copy 9805 * of the current message. It will be used 9806 * to initiate additional route resolutions. 9807 */ 9808 multirt_need_resolve = 9809 ire_multirt_need_resolve_v6(&ire->ire_addr_v6); 9810 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 9811 "multirt_need_resolve %d, first_mp %p\n", 9812 (void *)ire, multirt_need_resolve, 9813 (void *)first_mp)); 9814 if (multirt_need_resolve) { 9815 copy_mp = copymsg(first_mp); 9816 if (copy_mp != NULL) { 9817 MULTIRT_DEBUG_TAG(copy_mp); 9818 } 9819 } 9820 } 9821 9822 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 9823 ill->ill_name, (void *)ire, 9824 ill->ill_phyint->phyint_ifindex)); 9825 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9826 connp, caller, 9827 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 9828 ip6i_flags); 9829 ire_refrele(ire); 9830 if (need_decref) { 9831 CONN_DEC_REF(connp); 9832 connp = NULL; 9833 } 9834 9835 /* 9836 * Try to resolve another multiroute if 9837 * ire_multirt_need_resolve_v6() deemed it necessary. 9838 * copy_mp will be consumed (sent or freed) by 9839 * ip_newroute_[ipif_]v6(). 9840 */ 9841 if (copy_mp != NULL) { 9842 if (mctl_present) { 9843 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9844 } else { 9845 ip6h = (ip6_t *)copy_mp->b_rptr; 9846 } 9847 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 9848 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 9849 zoneid); 9850 if (ipif == NULL) { 9851 ip1dbg(("ip_wput_v6: No ipif for " 9852 "multicast\n")); 9853 MULTIRT_DEBUG_UNTAG(copy_mp); 9854 freemsg(copy_mp); 9855 return; 9856 } 9857 ip_newroute_ipif_v6(q, copy_mp, ipif, 9858 ip6h->ip6_dst, unspec_src, zoneid); 9859 ipif_refrele(ipif); 9860 } else { 9861 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9862 &ip6h->ip6_src, ill, zoneid); 9863 } 9864 } 9865 if (ill != NULL) 9866 ill_refrele(ill); 9867 return; 9868 } 9869 if (need_decref) { 9870 CONN_DEC_REF(connp); 9871 connp = NULL; 9872 } 9873 9874 /* Update rptr if there was an ip6i_t header. */ 9875 if (ip6i != NULL) 9876 mp->b_rptr -= sizeof (ip6i_t); 9877 if (unspec_src || attach_if) { 9878 if (ip6i == NULL) { 9879 /* 9880 * Add ip6i_t header to carry unspec_src 9881 * or attach_if until the packet comes back in 9882 * ip_wput_v6. 9883 */ 9884 if (mctl_present) { 9885 first_mp->b_cont = 9886 ip_add_info_v6(mp, NULL, v6dstp); 9887 mp = first_mp->b_cont; 9888 if (mp == NULL) 9889 freeb(first_mp); 9890 } else { 9891 first_mp = mp = ip_add_info_v6(mp, NULL, 9892 v6dstp); 9893 } 9894 if (mp == NULL) { 9895 BUMP_MIB(mibptr, ipv6OutDiscards); 9896 if (ill != NULL) 9897 ill_refrele(ill); 9898 return; 9899 } 9900 ip6i = (ip6i_t *)mp->b_rptr; 9901 if ((mp->b_wptr - (uchar_t *)ip6i) == 9902 sizeof (ip6i_t)) { 9903 /* 9904 * ndp_resolver called from ip_newroute_v6 9905 * expects a pulled up message. 9906 */ 9907 if (!pullupmsg(mp, -1)) { 9908 ip1dbg(("ip_wput_v6: pullupmsg" 9909 " failed\n")); 9910 BUMP_MIB(mibptr, ipv6OutDiscards); 9911 freemsg(first_mp); 9912 return; 9913 } 9914 ip6i = (ip6i_t *)mp->b_rptr; 9915 } 9916 ip6h = (ip6_t *)&ip6i[1]; 9917 v6dstp = &ip6h->ip6_dst; 9918 } 9919 if (unspec_src) 9920 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9921 if (attach_if) { 9922 /* 9923 * Bind to nofailover/BOUND_PIF overrides ifindex. 9924 */ 9925 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 9926 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 9927 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 9928 if (drop_if_delayed) { 9929 /* This is a multipathing probe packet */ 9930 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 9931 } 9932 } 9933 if (mctl_present) { 9934 ASSERT(io != NULL); 9935 io->ipsec_out_unspec_src = unspec_src; 9936 } 9937 } 9938 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 9939 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 9940 unspec_src, zoneid); 9941 } else { 9942 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 9943 zoneid); 9944 } 9945 if (ill != NULL) 9946 ill_refrele(ill); 9947 return; 9948 9949 notv6: 9950 /* 9951 * XXX implement a IPv4 and IPv6 packet counter per conn and 9952 * switch when ratio exceeds e.g. 10:1 9953 */ 9954 if (q->q_next == NULL) { 9955 connp = Q_TO_CONN(q); 9956 9957 if (IS_TCP_CONN(connp)) { 9958 /* change conn_send for the tcp_v4_connections */ 9959 connp->conn_send = ip_output; 9960 } else if (connp->conn_ulp == IPPROTO_SCTP) { 9961 /* The 'q' is the default SCTP queue */ 9962 connp = (conn_t *)arg; 9963 } else { 9964 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 9965 } 9966 } 9967 BUMP_MIB(mibptr, ipv6OutIPv4); 9968 (void) ip_output(connp, first_mp, q, caller); 9969 if (ill != NULL) 9970 ill_refrele(ill); 9971 } 9972 9973 static void 9974 ip_wput_v6(queue_t *q, mblk_t *mp) 9975 { 9976 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 9977 } 9978 9979 static void 9980 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 9981 { 9982 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9983 io->ipsec_out_attach_if = B_TRUE; 9984 io->ipsec_out_ill_index = attach_index; 9985 } 9986 9987 /* 9988 * NULL send-to queue - packet is to be delivered locally. 9989 */ 9990 void 9991 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 9992 ire_t *ire, int fanout_flags) 9993 { 9994 uint32_t ports; 9995 mblk_t *mp = first_mp, *first_mp1; 9996 boolean_t mctl_present; 9997 uint8_t nexthdr; 9998 uint16_t hdr_length = IPV6_HDR_LEN; 9999 ipsec_out_t *io; 10000 mib2_ipv6IfStatsEntry_t *mibptr; 10001 ilm_t *ilm; 10002 10003 if (DB_TYPE(mp) == M_CTL) { 10004 io = (ipsec_out_t *)mp->b_rptr; 10005 if (!io->ipsec_out_secure) { 10006 mp = mp->b_cont; 10007 freeb(first_mp); 10008 first_mp = mp; 10009 mctl_present = B_FALSE; 10010 } else { 10011 mctl_present = B_TRUE; 10012 mp = first_mp->b_cont; 10013 ipsec_out_to_in(first_mp); 10014 } 10015 } else { 10016 mctl_present = B_FALSE; 10017 } 10018 10019 nexthdr = ip6h->ip6_nxt; 10020 mibptr = ill->ill_ip6_mib; 10021 10022 UPDATE_OB_PKT_COUNT(ire); 10023 ire->ire_last_used_time = lbolt; 10024 10025 /* 10026 * Remove reacability confirmation bit from version field 10027 * before looping back the packet. 10028 */ 10029 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10030 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10031 } 10032 10033 switch (nexthdr) { 10034 case IPPROTO_TCP: 10035 if (DB_TYPE(mp) == M_DATA) { 10036 /* 10037 * M_DATA mblk, so init mblk (chain) for 10038 * no struio(). 10039 */ 10040 mblk_t *mp1 = mp; 10041 10042 do { 10043 mp1->b_datap->db_struioflag = 0; 10044 } while ((mp1 = mp1->b_cont) != NULL); 10045 } 10046 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10047 TCP_PORTS_OFFSET); 10048 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10049 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10050 IP_FF_IP6INFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10051 hdr_length, mctl_present, ire->ire_zoneid); 10052 return; 10053 10054 case IPPROTO_UDP: 10055 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10056 UDP_PORTS_OFFSET); 10057 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10058 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO| 10059 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10060 return; 10061 10062 case IPPROTO_SCTP: 10063 { 10064 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10065 10066 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10067 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10068 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO, 10069 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10070 ire->ire_zoneid); 10071 return; 10072 } 10073 case IPPROTO_ICMPV6: { 10074 icmp6_t *icmp6; 10075 10076 /* check for full IPv6+ICMPv6 header */ 10077 if ((mp->b_wptr - mp->b_rptr) < 10078 (hdr_length + ICMP6_MINLEN)) { 10079 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10080 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10081 " failed\n")); 10082 BUMP_MIB(mibptr, ipv6OutDiscards); 10083 freemsg(first_mp); 10084 return; 10085 } 10086 ip6h = (ip6_t *)mp->b_rptr; 10087 } 10088 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10089 10090 /* Update output mib stats */ 10091 icmp_update_out_mib_v6(ill, icmp6); 10092 10093 /* Check variable for testing applications */ 10094 if (ipv6_drop_inbound_icmpv6) { 10095 freemsg(first_mp); 10096 return; 10097 } 10098 /* 10099 * Assume that there is always at least one conn for 10100 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10101 * where there is no conn. 10102 */ 10103 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10104 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10105 /* 10106 * In the multicast case, applications may have 10107 * joined the group from different zones, so we 10108 * need to deliver the packet to each of them. 10109 * Loop through the multicast memberships 10110 * structures (ilm) on the receive ill and send 10111 * a copy of the packet up each matching one. 10112 * However, we don't do this for multicasts sent 10113 * on the loopback interface (PHYI_LOOPBACK flag 10114 * set) as they must stay in the sender's zone. 10115 */ 10116 ILM_WALKER_HOLD(ill); 10117 for (ilm = ill->ill_ilm; ilm != NULL; 10118 ilm = ilm->ilm_next) { 10119 if (ilm->ilm_flags & ILM_DELETED) 10120 continue; 10121 if (!IN6_ARE_ADDR_EQUAL( 10122 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10123 continue; 10124 if ((fanout_flags & 10125 IP_FF_NO_MCAST_LOOP) && 10126 ilm->ilm_zoneid == ire->ire_zoneid) 10127 continue; 10128 if (!ipif_lookup_zoneid(ill, 10129 ilm->ilm_zoneid, IPIF_UP, NULL)) 10130 continue; 10131 10132 first_mp1 = ip_copymsg(first_mp); 10133 if (first_mp1 == NULL) 10134 continue; 10135 icmp_inbound_v6(q, first_mp1, ill, 10136 hdr_length, mctl_present, 10137 IP6_NO_IPPOLICY, ilm->ilm_zoneid); 10138 } 10139 ILM_WALKER_RELE(ill); 10140 } else { 10141 first_mp1 = ip_copymsg(first_mp); 10142 if (first_mp1 != NULL) 10143 icmp_inbound_v6(q, first_mp1, ill, 10144 hdr_length, mctl_present, 10145 IP6_NO_IPPOLICY, ire->ire_zoneid); 10146 } 10147 } 10148 /* FALLTHRU */ 10149 default: { 10150 /* 10151 * Handle protocols with which IPv6 is less intimate. 10152 */ 10153 uint8_t *nexthdrp; 10154 uint_t nexthdr_offset; 10155 10156 fanout_flags |= IP_FF_RAWIP|IP_FF_IP6INFO; 10157 10158 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10159 &hdr_length, &nexthdrp)) { 10160 /* Malformed packet */ 10161 BUMP_MIB(mibptr, ipv6OutDiscards); 10162 freemsg(first_mp); 10163 return; 10164 } 10165 nexthdr = *nexthdrp; 10166 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10167 10168 /* 10169 * Enable sending ICMP for "Unknown" nexthdr 10170 * case. i.e. where we did not FALLTHRU from 10171 * IPPROTO_ICMPV6 processing case above. 10172 */ 10173 if (nexthdr != IPPROTO_ICMPV6) 10174 fanout_flags |= IP_FF_SEND_ICMP; 10175 /* 10176 * Note: There can be more than one stream bound 10177 * to a particular protocol. When this is the case, 10178 * each one gets a copy of any incoming packets. 10179 */ 10180 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10181 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10182 mctl_present, ire->ire_zoneid); 10183 return; 10184 } 10185 } 10186 } 10187 10188 /* 10189 * Send packet using IRE. 10190 * Checksumming is controlled by cksum_request: 10191 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10192 * 1 => Skip TCP/UDP/SCTP checksum 10193 * Otherwise => checksum_request contains insert offset for checksum 10194 * 10195 * Assumes that the following set of headers appear in the first 10196 * mblk: 10197 * ip6_t 10198 * Any extension headers 10199 * TCP/UDP/SCTP header (if present) 10200 * The routine can handle an ICMPv6 header that is not in the first mblk. 10201 * 10202 * NOTE : This function does not ire_refrele the ire passed in as the 10203 * argument unlike ip_wput_ire where the REFRELE is done. 10204 * Refer to ip_wput_ire for more on this. 10205 */ 10206 static void 10207 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10208 int cksum_request, conn_t *connp, int caller, int attach_index, int flags) 10209 { 10210 ip6_t *ip6h; 10211 uint8_t nexthdr; 10212 uint16_t hdr_length; 10213 uint_t reachable = 0x0; 10214 ill_t *ill; 10215 mib2_ipv6IfStatsEntry_t *mibptr; 10216 mblk_t *first_mp; 10217 boolean_t mctl_present; 10218 ipsec_out_t *io; 10219 boolean_t conn_dontroute; /* conn value for multicast */ 10220 boolean_t conn_multicast_loop; /* conn value for multicast */ 10221 boolean_t multicast_forward; /* Should we forward ? */ 10222 int max_frag; 10223 zoneid_t zoneid; 10224 10225 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 10226 ill = ire_to_ill(ire); 10227 first_mp = mp; 10228 multicast_forward = B_FALSE; 10229 10230 if (mp->b_datap->db_type != M_CTL) { 10231 ip6h = (ip6_t *)first_mp->b_rptr; 10232 } else { 10233 io = (ipsec_out_t *)first_mp->b_rptr; 10234 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10235 /* 10236 * Grab the zone id now because the M_CTL can be discarded by 10237 * ip_wput_ire_parse_ipsec_out() below. 10238 */ 10239 zoneid = io->ipsec_out_zoneid; 10240 ASSERT(zoneid != ALL_ZONES); 10241 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10242 /* 10243 * For the multicast case, ipsec_out carries conn_dontroute and 10244 * conn_multicast_loop as conn may not be available here. We 10245 * need this for multicast loopback and forwarding which is done 10246 * later in the code. 10247 */ 10248 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10249 conn_dontroute = io->ipsec_out_dontroute; 10250 conn_multicast_loop = io->ipsec_out_multicast_loop; 10251 /* 10252 * If conn_dontroute is not set or conn_multicast_loop 10253 * is set, we need to do forwarding/loopback. For 10254 * datagrams from ip_wput_multicast, conn_dontroute is 10255 * set to B_TRUE and conn_multicast_loop is set to 10256 * B_FALSE so that we neither do forwarding nor 10257 * loopback. 10258 */ 10259 if (!conn_dontroute || conn_multicast_loop) 10260 multicast_forward = B_TRUE; 10261 } 10262 } 10263 10264 /* 10265 * If the sender didn't supply the hop limit and there is a default 10266 * hop limit associated with the output interface, we use that. 10267 * Interface specific hop limits as set via the SIOCSLIFLNKINFO 10268 * ioctl. 10269 */ 10270 if (!(flags & IP6I_HOPLIMIT) && ill->ill_max_hops != 0) 10271 ip6h->ip6_hops = ill->ill_max_hops; 10272 10273 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid) { 10274 /* 10275 * When a zone sends a packet to another zone, we try to deliver 10276 * the packet under the same conditions as if the destination 10277 * was a real node on the network. To do so, we look for a 10278 * matching route in the forwarding table. 10279 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10280 * ip_newroute_v6() does. 10281 */ 10282 ire_t *src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10283 NULL, NULL, zoneid, 0, (MATCH_IRE_RECURSIVE | 10284 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10285 if (src_ire != NULL && 10286 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 10287 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10288 !unspec_src) { 10289 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10290 } 10291 ire_refrele(src_ire); 10292 } else { 10293 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 10294 if (src_ire != NULL) { 10295 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10296 ire_refrele(src_ire); 10297 freemsg(first_mp); 10298 return; 10299 } 10300 ire_refrele(src_ire); 10301 } 10302 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10303 /* Failed */ 10304 freemsg(first_mp); 10305 return; 10306 } 10307 icmp_unreachable_v6(q, first_mp, 10308 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE); 10309 return; 10310 } 10311 } 10312 10313 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10314 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10315 connp, unspec_src); 10316 if (mp == NULL) { 10317 return; 10318 } 10319 } 10320 10321 first_mp = mp; 10322 if (mp->b_datap->db_type == M_CTL) { 10323 io = (ipsec_out_t *)mp->b_rptr; 10324 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10325 mp = mp->b_cont; 10326 mctl_present = B_TRUE; 10327 } else { 10328 mctl_present = B_FALSE; 10329 } 10330 10331 ip6h = (ip6_t *)mp->b_rptr; 10332 nexthdr = ip6h->ip6_nxt; 10333 mibptr = ill->ill_ip6_mib; 10334 10335 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10336 /* 10337 * The ire_src_addr_v6 always contains a useable source address 10338 * for the destination (based on source address selection rules 10339 * with respect to address scope as well as deprecated vs. 10340 * preferred addresses). 10341 */ 10342 ip6h->ip6_src = ire->ire_src_addr_v6; 10343 } 10344 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10345 if ((connp != NULL && connp->conn_multicast_loop) || 10346 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10347 ilm_t *ilm; 10348 10349 ILM_WALKER_HOLD(ill); 10350 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10351 ILM_WALKER_RELE(ill); 10352 if (ilm != NULL) { 10353 mblk_t *nmp; 10354 int fanout_flags = 0; 10355 10356 if (connp != NULL && 10357 !connp->conn_multicast_loop) { 10358 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10359 } 10360 ip1dbg(("ip_wput_ire_v6: " 10361 "Loopback multicast\n")); 10362 nmp = ip_copymsg(first_mp); 10363 if (nmp != NULL) { 10364 ip6_t *nip6h; 10365 10366 if (mctl_present) { 10367 nip6h = (ip6_t *) 10368 nmp->b_cont->b_rptr; 10369 } else { 10370 nip6h = (ip6_t *)nmp->b_rptr; 10371 } 10372 /* 10373 * Deliver locally and to every local 10374 * zone, except the sending zone when 10375 * IPV6_MULTICAST_LOOP is disabled. 10376 */ 10377 ip_wput_local_v6(RD(q), ill, nip6h, nmp, 10378 ire, fanout_flags); 10379 } else { 10380 BUMP_MIB(mibptr, ipv6OutDiscards); 10381 ip1dbg(("ip_wput_ire_v6: " 10382 "copymsg failed\n")); 10383 } 10384 } 10385 } 10386 if (ip6h->ip6_hops == 0 || 10387 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10388 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10389 /* 10390 * Local multicast or just loopback on loopback 10391 * interface. 10392 */ 10393 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10394 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10395 freemsg(first_mp); 10396 return; 10397 } 10398 } 10399 10400 /* Fastpath */ 10401 switch (nexthdr) { 10402 case IPPROTO_TCP: 10403 case IPPROTO_UDP: 10404 case IPPROTO_ICMPV6: 10405 case IPPROTO_SCTP: 10406 hdr_length = IPV6_HDR_LEN; 10407 break; 10408 default: { 10409 uint8_t *nexthdrp; 10410 10411 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10412 &hdr_length, &nexthdrp)) { 10413 /* Malformed packet */ 10414 BUMP_MIB(mibptr, ipv6OutDiscards); 10415 freemsg(first_mp); 10416 return; 10417 } 10418 nexthdr = *nexthdrp; 10419 break; 10420 } 10421 } 10422 10423 if (ire->ire_stq != NULL) { 10424 uint32_t sum; 10425 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10426 ill_phyint->phyint_ifindex; 10427 10428 /* 10429 * non-NULL send-to queue - packet is to be sent 10430 * out an interface. 10431 */ 10432 10433 /* 10434 * Look for reachability confirmations from the transport. 10435 */ 10436 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10437 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10438 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10439 if (mctl_present) 10440 io->ipsec_out_reachable = B_TRUE; 10441 } 10442 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10443 uint16_t *up; 10444 uint16_t *insp; 10445 10446 /* 10447 * The packet header is processed once for all, even 10448 * in the multirouting case. We disable hardware 10449 * checksum if the packet is multirouted, as it will be 10450 * replicated via several interfaces, and not all of 10451 * them may have this capability. 10452 */ 10453 if (cksum_request == 1 && 10454 !(ire->ire_flags & RTF_MULTIRT)) { 10455 /* Skip the transport checksum */ 10456 goto cksum_done; 10457 } 10458 /* 10459 * Do user-configured raw checksum. 10460 * Compute checksum and insert at offset "cksum_request" 10461 */ 10462 10463 /* check for enough headers for checksum */ 10464 cksum_request += hdr_length; /* offset from rptr */ 10465 if ((mp->b_wptr - mp->b_rptr) < 10466 (cksum_request + sizeof (int16_t))) { 10467 if (!pullupmsg(mp, 10468 cksum_request + sizeof (int16_t))) { 10469 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10470 " failed\n")); 10471 BUMP_MIB(mibptr, ipv6OutDiscards); 10472 freemsg(first_mp); 10473 return; 10474 } 10475 ip6h = (ip6_t *)mp->b_rptr; 10476 } 10477 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10478 ASSERT(((uintptr_t)insp & 0x1) == 0); 10479 up = (uint16_t *)&ip6h->ip6_src; 10480 /* 10481 * icmp has placed length and routing 10482 * header adjustment in *insp. 10483 */ 10484 sum = htons(nexthdr) + 10485 up[0] + up[1] + up[2] + up[3] + 10486 up[4] + up[5] + up[6] + up[7] + 10487 up[8] + up[9] + up[10] + up[11] + 10488 up[12] + up[13] + up[14] + up[15]; 10489 sum = (sum & 0xffff) + (sum >> 16); 10490 *insp = IP_CSUM(mp, hdr_length, sum); 10491 } else if (nexthdr == IPPROTO_TCP) { 10492 uint16_t *up; 10493 10494 /* 10495 * Check for full IPv6 header + enough TCP header 10496 * to get at the checksum field. 10497 * XXX need hardware checksum support. 10498 */ 10499 #define TCP_CSUM_OFFSET 16 10500 #define TCP_CSUM_SIZE 2 10501 if ((mp->b_wptr - mp->b_rptr) < 10502 (hdr_length + TCP_CSUM_OFFSET + TCP_CSUM_SIZE)) { 10503 if (!pullupmsg(mp, hdr_length + 10504 TCP_CSUM_OFFSET + TCP_CSUM_SIZE)) { 10505 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10506 " failed\n")); 10507 BUMP_MIB(mibptr, ipv6OutDiscards); 10508 freemsg(first_mp); 10509 return; 10510 } 10511 ip6h = (ip6_t *)mp->b_rptr; 10512 } 10513 10514 up = (uint16_t *)&ip6h->ip6_src; 10515 /* 10516 * Note: The TCP module has stored the length value 10517 * into the tcp checksum field, so we don't 10518 * need to explicitly sum it in here. 10519 */ 10520 if (hdr_length == IPV6_HDR_LEN) { 10521 /* src, dst, tcp consequtive */ 10522 up = (uint16_t *)(((uchar_t *)ip6h) + 10523 IPV6_HDR_LEN + TCP_CSUM_OFFSET); 10524 *up = IP_CSUM(mp, 10525 IPV6_HDR_LEN - 2 * sizeof (in6_addr_t), 10526 htons(IPPROTO_TCP)); 10527 } else { 10528 sum = htons(IPPROTO_TCP) + 10529 up[0] + up[1] + up[2] + up[3] + 10530 up[4] + up[5] + up[6] + up[7] + 10531 up[8] + up[9] + up[10] + up[11] + 10532 up[12] + up[13] + up[14] + up[15]; 10533 /* 10534 * Fold the initial sum. 10535 */ 10536 sum = (sum & 0xffff) + (sum >> 16); 10537 up = (uint16_t *)(((uchar_t *)ip6h) + 10538 hdr_length + TCP_CSUM_OFFSET); 10539 *up = IP_CSUM(mp, hdr_length, sum); 10540 } 10541 #undef TCP_CSUM_OFFSET 10542 #undef TCP_CSUM_SIZE 10543 10544 } else if (nexthdr == IPPROTO_UDP) { 10545 uint16_t *up; 10546 10547 /* 10548 * check for full IPv6 header + enough UDP header 10549 * to get at the UDP checksum field 10550 */ 10551 #define UDP_CSUM_OFFSET 6 10552 #define UDP_CSUM_SIZE 2 10553 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 10554 UDP_CSUM_OFFSET + UDP_CSUM_SIZE)) { 10555 if (!pullupmsg(mp, hdr_length + 10556 UDP_CSUM_OFFSET + UDP_CSUM_SIZE)) { 10557 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 10558 " failed\n")); 10559 BUMP_MIB(mibptr, ipv6OutDiscards); 10560 freemsg(first_mp); 10561 return; 10562 } 10563 ip6h = (ip6_t *)mp->b_rptr; 10564 } 10565 up = (uint16_t *)&ip6h->ip6_src; 10566 /* 10567 * Note: The UDP module has stored the length value 10568 * into the udp checksum field, so we don't 10569 * need to explicitly sum it in here. 10570 */ 10571 if (hdr_length == IPV6_HDR_LEN) { 10572 /* src, dst, udp consequtive */ 10573 up = (uint16_t *)(((uchar_t *)ip6h) + 10574 IPV6_HDR_LEN + UDP_CSUM_OFFSET); 10575 *up = IP_CSUM(mp, 10576 IPV6_HDR_LEN - 2 * sizeof (in6_addr_t), 10577 htons(IPPROTO_UDP)); 10578 } else { 10579 sum = htons(IPPROTO_UDP) + 10580 up[0] + up[1] + up[2] + up[3] + 10581 up[4] + up[5] + up[6] + up[7] + 10582 up[8] + up[9] + up[10] + up[11] + 10583 up[12] + up[13] + up[14] + up[15]; 10584 sum = (sum & 0xffff) + (sum >> 16); 10585 up = (uint16_t *)(((uchar_t *)ip6h) + 10586 hdr_length + UDP_CSUM_OFFSET); 10587 *up = IP_CSUM(mp, hdr_length, sum); 10588 } 10589 10590 /* 10591 * According to RFC 2460, UDP in IPv6 shouldn't 10592 * appear with all zero checksum on the wire and 10593 * should be changed to 0xffff. 10594 */ 10595 if (*up == 0) 10596 *up = 0xffff; 10597 #undef UDP_CSUM_OFFSET 10598 #undef UDP_CSUM_SIZE 10599 } else if (nexthdr == IPPROTO_ICMPV6) { 10600 uint16_t *up; 10601 icmp6_t *icmp6; 10602 10603 /* check for full IPv6+ICMPv6 header */ 10604 if ((mp->b_wptr - mp->b_rptr) < 10605 (hdr_length + ICMP6_MINLEN)) { 10606 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10607 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10608 " failed\n")); 10609 BUMP_MIB(mibptr, ipv6OutDiscards); 10610 freemsg(first_mp); 10611 return; 10612 } 10613 ip6h = (ip6_t *)mp->b_rptr; 10614 } 10615 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10616 up = (uint16_t *)&ip6h->ip6_src; 10617 /* 10618 * icmp has placed length and routing 10619 * header adjustment in icmp6_cksum. 10620 */ 10621 sum = htons(IPPROTO_ICMPV6) + 10622 up[0] + up[1] + up[2] + up[3] + 10623 up[4] + up[5] + up[6] + up[7] + 10624 up[8] + up[9] + up[10] + up[11] + 10625 up[12] + up[13] + up[14] + up[15]; 10626 sum = (sum & 0xffff) + (sum >> 16); 10627 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 10628 /* Update output mib stats */ 10629 icmp_update_out_mib_v6(ill, icmp6); 10630 } else if (nexthdr == IPPROTO_SCTP) { 10631 sctp_hdr_t *sctph; 10632 10633 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 10634 if (!pullupmsg(mp, hdr_length + 10635 sizeof (*sctph))) { 10636 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 10637 " failed\n")); 10638 BUMP_MIB(ill->ill_ip6_mib, 10639 ipv6OutDiscards); 10640 freemsg(mp); 10641 return; 10642 } 10643 ip6h = (ip6_t *)mp->b_rptr; 10644 } 10645 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 10646 sctph->sh_chksum = 0; 10647 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 10648 } 10649 10650 cksum_done: 10651 /* 10652 * We force the insertion of a fragment header using the 10653 * IPH_FRAG_HDR flag in two cases: 10654 * - after reception of an ICMPv6 "packet too big" message 10655 * with a MTU < 1280 (cf. RFC 2460 section 5) 10656 * - for multirouted IPv6 packets, so that the receiver can 10657 * discard duplicates according to their fragment identifier 10658 * 10659 * Two flags modifed from the API can modify this behavior. 10660 * The first is IPV6_USE_MIN_MTU. With this API the user 10661 * can specify how to manage PMTUD for unicast and multicast. 10662 * 10663 * IPV6_DONTFRAG disallows fragmentation. 10664 */ 10665 max_frag = ire->ire_max_frag; 10666 switch (IP6I_USE_MIN_MTU_API(flags)) { 10667 case IPV6_USE_MIN_MTU_DEFAULT: 10668 case IPV6_USE_MIN_MTU_UNICAST: 10669 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10670 max_frag = IPV6_MIN_MTU; 10671 } 10672 break; 10673 10674 case IPV6_USE_MIN_MTU_NEVER: 10675 max_frag = IPV6_MIN_MTU; 10676 break; 10677 } 10678 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 10679 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 10680 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 10681 icmp_pkt2big_v6(ire->ire_stq, first_mp, 10682 max_frag, B_FALSE, B_TRUE); 10683 return; 10684 } 10685 10686 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 10687 (mp->b_cont ? msgdsize(mp) : 10688 mp->b_wptr - (uchar_t *)ip6h)) { 10689 ip0dbg(("Packet length mismatch: %d, %ld\n", 10690 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10691 msgdsize(mp))); 10692 freemsg(first_mp); 10693 return; 10694 } 10695 /* Do IPSEC processing first */ 10696 if (mctl_present) { 10697 if (attach_index != 0) 10698 ipsec_out_attach_if(io, attach_index); 10699 ipsec_out_process(q, first_mp, ire, ill_index); 10700 return; 10701 } 10702 ASSERT(mp->b_prev == NULL); 10703 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 10704 ntohs(ip6h->ip6_plen) + 10705 IPV6_HDR_LEN, max_frag)); 10706 ASSERT(mp == first_mp); 10707 /* Initiate IPPF processing */ 10708 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 10709 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 10710 if (mp == NULL) { 10711 return; 10712 } 10713 } 10714 ip_wput_frag_v6(mp, ire, reachable, connp, 10715 caller, max_frag); 10716 return; 10717 } 10718 /* Do IPSEC processing first */ 10719 if (mctl_present) { 10720 int extra_len = ipsec_out_extra_length(first_mp); 10721 10722 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 10723 max_frag && ip_ulp_cando_pkt2big(nexthdr)) { 10724 /* 10725 * IPsec headers will push the packet over the 10726 * MTU limit. Issue an ICMPv6 Packet Too Big 10727 * message for this packet if the upper-layer 10728 * that issued this packet will be able to 10729 * react to the icmp_pkt2big_v6() that we'll 10730 * generate. 10731 */ 10732 icmp_pkt2big_v6(ire->ire_stq, first_mp, 10733 max_frag, B_FALSE, B_TRUE); 10734 return; 10735 } 10736 if (attach_index != 0) 10737 ipsec_out_attach_if(io, attach_index); 10738 ipsec_out_process(q, first_mp, ire, ill_index); 10739 return; 10740 } 10741 /* 10742 * XXX multicast: add ip_mforward_v6() here. 10743 * Check conn_dontroute 10744 */ 10745 #ifdef lint 10746 /* 10747 * XXX The only purpose of this statement is to avoid lint 10748 * errors. See the above "XXX multicast". When that gets 10749 * fixed, remove this whole #ifdef lint section. 10750 */ 10751 ip3dbg(("multicast forward is %s.\n", 10752 (multicast_forward ? "TRUE" : "FALSE"))); 10753 #endif 10754 10755 UPDATE_OB_PKT_COUNT(ire); 10756 ire->ire_last_used_time = lbolt; 10757 ASSERT(mp == first_mp); 10758 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 10759 } else { 10760 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 10761 } 10762 } 10763 10764 /* 10765 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 10766 * We have not optimized this in terms of number of mblks 10767 * allocated. For instance, for each fragment sent we always allocate a 10768 * mblk to hold the IPv6 header and fragment header. 10769 * 10770 * Assumes that all the extension headers are contained in the first mblk. 10771 * 10772 * The fragment header is inserted after an hop-by-hop options header 10773 * and after [an optional destinations header followed by] a routing header. 10774 * 10775 * NOTE : This function does not ire_refrele the ire passed in as 10776 * the argument. 10777 */ 10778 void 10779 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 10780 boolean_t caller, int max_frag) 10781 { 10782 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 10783 ip6_t *fip6h; 10784 mblk_t *hmp; 10785 mblk_t *hmp0; 10786 mblk_t *dmp; 10787 ip6_frag_t *fraghdr; 10788 size_t unfragmentable_len; 10789 size_t len; 10790 size_t mlen; 10791 size_t max_chunk; 10792 uint32_t ident; 10793 uint16_t off_flags; 10794 uint16_t offset = 0; 10795 ill_t *ill; 10796 uint8_t nexthdr; 10797 uint_t prev_nexthdr_offset; 10798 uint8_t *ptr; 10799 10800 ASSERT(ire->ire_type == IRE_CACHE); 10801 ill = (ill_t *)ire->ire_stq->q_ptr; 10802 10803 /* 10804 * Determine the length of the unfragmentable portion of this 10805 * datagram. This consists of the IPv6 header, a potential 10806 * hop-by-hop options header, a potential pre-routing-header 10807 * destination options header, and a potential routing header. 10808 */ 10809 nexthdr = ip6h->ip6_nxt; 10810 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 10811 ptr = (uint8_t *)&ip6h[1]; 10812 10813 if (nexthdr == IPPROTO_HOPOPTS) { 10814 ip6_hbh_t *hbh_hdr; 10815 uint_t hdr_len; 10816 10817 hbh_hdr = (ip6_hbh_t *)ptr; 10818 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 10819 nexthdr = hbh_hdr->ip6h_nxt; 10820 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 10821 - (uint8_t *)ip6h; 10822 ptr += hdr_len; 10823 } 10824 if (nexthdr == IPPROTO_DSTOPTS) { 10825 ip6_dest_t *dest_hdr; 10826 uint_t hdr_len; 10827 10828 dest_hdr = (ip6_dest_t *)ptr; 10829 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 10830 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 10831 nexthdr = dest_hdr->ip6d_nxt; 10832 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 10833 - (uint8_t *)ip6h; 10834 ptr += hdr_len; 10835 } 10836 } 10837 if (nexthdr == IPPROTO_ROUTING) { 10838 ip6_rthdr_t *rthdr; 10839 uint_t hdr_len; 10840 10841 rthdr = (ip6_rthdr_t *)ptr; 10842 nexthdr = rthdr->ip6r_nxt; 10843 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 10844 - (uint8_t *)ip6h; 10845 hdr_len = 8 * (rthdr->ip6r_len + 1); 10846 ptr += hdr_len; 10847 } 10848 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 10849 10850 /* 10851 * Allocate an mblk with enough room for the link-layer 10852 * header, the unfragmentable part of the datagram, and the 10853 * fragment header. This (or a copy) will be used as the 10854 * first mblk for each fragment we send. 10855 */ 10856 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 10857 BPRI_HI); 10858 if (hmp == NULL) { 10859 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 10860 freemsg(mp); 10861 return; 10862 } 10863 hmp->b_rptr += ip_wroff_extra; 10864 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 10865 10866 fip6h = (ip6_t *)hmp->b_rptr; 10867 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 10868 10869 bcopy(ip6h, fip6h, unfragmentable_len); 10870 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 10871 10872 ident = atomic_add_32_nv(&ire->ire_ident, 1); 10873 10874 fraghdr->ip6f_nxt = nexthdr; 10875 fraghdr->ip6f_reserved = 0; 10876 fraghdr->ip6f_offlg = htons(0); 10877 fraghdr->ip6f_ident = htonl(ident); 10878 10879 /* 10880 * len is the total length of the fragmentable data in this 10881 * datagram. For each fragment sent, we will decrement len 10882 * by the amount of fragmentable data sent in that fragment 10883 * until len reaches zero. 10884 */ 10885 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 10886 10887 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 10888 sizeof (ip6_frag_t)) & ~7; 10889 10890 /* 10891 * Move read ptr past unfragmentable portion, we don't want this part 10892 * of the data in our fragments. 10893 */ 10894 mp->b_rptr += unfragmentable_len; 10895 10896 while (len != 0) { 10897 mlen = MIN(len, max_chunk); 10898 len -= mlen; 10899 if (len != 0) { 10900 /* Not last */ 10901 hmp0 = copyb(hmp); 10902 if (hmp0 == NULL) { 10903 freeb(hmp); 10904 freemsg(mp); 10905 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 10906 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 10907 return; 10908 } 10909 off_flags = IP6F_MORE_FRAG; 10910 } else { 10911 /* Last fragment */ 10912 hmp0 = hmp; 10913 hmp = NULL; 10914 off_flags = 0; 10915 } 10916 fip6h = (ip6_t *)(hmp0->b_rptr); 10917 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 10918 10919 fip6h->ip6_plen = htons((uint16_t)(mlen + 10920 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 10921 /* 10922 * Note: Optimization alert. 10923 * In IPv6 (and IPv4) protocol header, Fragment Offset 10924 * ("offset") is 13 bits wide and in 8-octet units. 10925 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 10926 * it occupies the most significant 13 bits. 10927 * (least significant 13 bits in IPv4). 10928 * We do not do any shifts here. Not shifting is same effect 10929 * as taking offset value in octet units, dividing by 8 and 10930 * then shifting 3 bits left to line it up in place in proper 10931 * place protocol header. 10932 */ 10933 fraghdr->ip6f_offlg = htons(offset) | off_flags; 10934 10935 if (!(dmp = ip_carve_mp(&mp, mlen))) { 10936 /* mp has already been freed by ip_carve_mp() */ 10937 if (hmp != NULL) 10938 freeb(hmp); 10939 freeb(hmp0); 10940 ip1dbg(("ip_carve_mp: failed\n")); 10941 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 10942 return; 10943 } 10944 hmp0->b_cont = dmp; 10945 /* Get the priority marking, if any */ 10946 hmp0->b_band = dmp->b_band; 10947 UPDATE_OB_PKT_COUNT(ire); 10948 ire->ire_last_used_time = lbolt; 10949 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 10950 caller, NULL); 10951 reachable = 0; /* No need to redo state machine in loop */ 10952 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragCreates); 10953 offset += mlen; 10954 } 10955 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 10956 } 10957 10958 /* 10959 * Determine if the ill and multicast aspects of that packets 10960 * "matches" the conn. 10961 */ 10962 boolean_t 10963 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 10964 zoneid_t zoneid) 10965 { 10966 ill_t *in_ill; 10967 boolean_t wantpacket = B_TRUE; 10968 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 10969 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 10970 10971 /* 10972 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 10973 * unicast and multicast reception to conn_incoming_ill. 10974 * conn_wantpacket_v6 is called both for unicast and 10975 * multicast. 10976 * 10977 * 1) The unicast copy of the packet can come anywhere in 10978 * the ill group if it is part of the group. Thus, we 10979 * need to check to see whether the ill group matches 10980 * if in_ill is part of a group. 10981 * 10982 * 2) ip_rput does not suppress duplicate multicast packets. 10983 * If there are two interfaces in a ill group and we have 10984 * 2 applications (conns) joined a multicast group G on 10985 * both the interfaces, ilm_lookup_ill filter in ip_rput 10986 * will give us two packets because we join G on both the 10987 * interfaces rather than nominating just one interface 10988 * for receiving multicast like broadcast above. So, 10989 * we have to call ilg_lookup_ill to filter out duplicate 10990 * copies, if ill is part of a group, to supress duplicates. 10991 */ 10992 in_ill = connp->conn_incoming_ill; 10993 if (in_ill != NULL) { 10994 mutex_enter(&connp->conn_lock); 10995 in_ill = connp->conn_incoming_ill; 10996 mutex_enter(&ill->ill_lock); 10997 /* 10998 * No IPMP, and the packet did not arrive on conn_incoming_ill 10999 * OR, IPMP in use and the packet arrived on an IPMP group 11000 * different from the conn_incoming_ill's IPMP group. 11001 * Reject the packet. 11002 */ 11003 if ((in_ill->ill_group == NULL && in_ill != ill) || 11004 (in_ill->ill_group != NULL && 11005 in_ill->ill_group != ill->ill_group)) { 11006 wantpacket = B_FALSE; 11007 } 11008 mutex_exit(&ill->ill_lock); 11009 mutex_exit(&connp->conn_lock); 11010 if (!wantpacket) 11011 return (B_FALSE); 11012 } 11013 11014 if (connp->conn_multi_router) 11015 return (B_TRUE); 11016 11017 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11018 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11019 /* 11020 * Unicast case: we match the conn only if it's in the specified 11021 * zone. 11022 */ 11023 return (connp->conn_zoneid == zoneid); 11024 } 11025 11026 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11027 connp->conn_zoneid == zoneid) { 11028 /* 11029 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11030 * disabled, therefore we don't dispatch the multicast packet to 11031 * the sending zone. 11032 */ 11033 return (B_FALSE); 11034 } 11035 11036 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 11037 connp->conn_zoneid != zoneid) { 11038 /* 11039 * Multicast packet on the loopback interface: we only match 11040 * conns who joined the group in the specified zone. 11041 */ 11042 return (B_FALSE); 11043 } 11044 11045 mutex_enter(&connp->conn_lock); 11046 wantpacket = 11047 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11048 mutex_exit(&connp->conn_lock); 11049 11050 return (wantpacket); 11051 } 11052 11053 11054 /* 11055 * Transmit a packet and update any NUD state based on the flags 11056 * XXX need to "recover" any ip6i_t when doing putq! 11057 * 11058 * NOTE : This function does not ire_refrele the ire passed in as the 11059 * argument. 11060 */ 11061 void 11062 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11063 int caller, ipsec_out_t *io) 11064 { 11065 mblk_t *mp1; 11066 nce_t *nce = ire->ire_nce; 11067 ill_t *ill; 11068 uint64_t delta; 11069 ip6_t *ip6h; 11070 queue_t *stq = ire->ire_stq; 11071 ire_t *ire1 = NULL; 11072 ire_t *save_ire = ire; 11073 boolean_t multirt_send = B_FALSE; 11074 mblk_t *next_mp = NULL; 11075 11076 ip6h = (ip6_t *)mp->b_rptr; 11077 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11078 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11079 ASSERT(nce != NULL); 11080 ASSERT(mp->b_datap->db_type == M_DATA); 11081 ASSERT(stq != NULL); 11082 11083 ill = ire_to_ill(ire); 11084 if (!ill) { 11085 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11086 freemsg(mp); 11087 return; 11088 } 11089 11090 /* 11091 * If a packet is to be sent out an interface that is a 6to4 11092 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11093 * destination, must be checked to have a 6to4 prefix 11094 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11095 * address configured on the sending interface. Otherwise, 11096 * the packet was delivered to this interface in error and the 11097 * packet must be dropped. 11098 */ 11099 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11100 ipif_t *ipif = ill->ill_ipif; 11101 11102 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11103 &ip6h->ip6_dst)) { 11104 if (ip_debug > 2) { 11105 /* ip1dbg */ 11106 pr_addr_dbg("ip_xmit_v6: attempting to " 11107 "send 6to4 addressed IPv6 " 11108 "destination (%s) out the wrong " 11109 "interface.\n", AF_INET6, 11110 &ip6h->ip6_dst); 11111 } 11112 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 11113 freemsg(mp); 11114 return; 11115 } 11116 } 11117 11118 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || canput(stq->q_next)) { 11119 uint32_t ill_index; 11120 11121 /* 11122 * In most cases, the emission loop below is entered only 11123 * once. Only in the case where the ire holds the 11124 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11125 * flagged ires in the bucket, and send the packet 11126 * through all crossed RTF_MULTIRT routes. 11127 */ 11128 if (ire->ire_flags & RTF_MULTIRT) { 11129 /* 11130 * Multirouting case. The bucket where ire is stored 11131 * probably holds other RTF_MULTIRT flagged ires 11132 * to the destination. In this call to ip_xmit_v6, 11133 * we attempt to send the packet through all 11134 * those ires. Thus, we first ensure that ire is the 11135 * first RTF_MULTIRT ire in the bucket, 11136 * before walking the ire list. 11137 */ 11138 ire_t *first_ire; 11139 irb_t *irb = ire->ire_bucket; 11140 ASSERT(irb != NULL); 11141 multirt_send = B_TRUE; 11142 11143 /* Make sure we do not omit any multiroute ire. */ 11144 IRB_REFHOLD(irb); 11145 for (first_ire = irb->irb_ire; 11146 first_ire != NULL; 11147 first_ire = first_ire->ire_next) { 11148 if ((first_ire->ire_flags & RTF_MULTIRT) && 11149 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11150 &ire->ire_addr_v6)) && 11151 !(first_ire->ire_marks & 11152 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 11153 break; 11154 } 11155 11156 if ((first_ire != NULL) && (first_ire != ire)) { 11157 IRE_REFHOLD(first_ire); 11158 /* ire will be released by the caller */ 11159 ire = first_ire; 11160 nce = ire->ire_nce; 11161 stq = ire->ire_stq; 11162 ill = ire_to_ill(ire); 11163 } 11164 IRB_REFRELE(irb); 11165 } else if (connp != NULL && IS_TCP_CONN(connp) && 11166 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11167 ILL_MDT_USABLE(ill)) { 11168 /* 11169 * This tcp connection was marked as MDT-capable, but 11170 * it has been turned off due changes in the interface. 11171 * Now that the interface support is back, turn it on 11172 * by notifying tcp. We don't directly modify tcp_mdt, 11173 * since we leave all the details to the tcp code that 11174 * knows better. 11175 */ 11176 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11177 11178 if (mdimp == NULL) { 11179 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11180 "connp %p (ENOMEM)\n", (void *)connp)); 11181 } else { 11182 CONN_INC_REF(connp); 11183 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 11184 connp, SQTAG_TCP_INPUT_MCTL); 11185 } 11186 } 11187 11188 do { 11189 boolean_t qos_done = B_FALSE; 11190 11191 if (multirt_send) { 11192 irb_t *irb; 11193 /* 11194 * We are in a multiple send case, need to get 11195 * the next ire and make a duplicate of the 11196 * packet. ire1 holds here the next ire to 11197 * process in the bucket. If multirouting is 11198 * expected, any non-RTF_MULTIRT ire that has 11199 * the right destination address is ignored. 11200 */ 11201 irb = ire->ire_bucket; 11202 ASSERT(irb != NULL); 11203 11204 IRB_REFHOLD(irb); 11205 for (ire1 = ire->ire_next; 11206 ire1 != NULL; 11207 ire1 = ire1->ire_next) { 11208 if (!(ire1->ire_flags & RTF_MULTIRT)) 11209 continue; 11210 if (!IN6_ARE_ADDR_EQUAL( 11211 &ire1->ire_addr_v6, 11212 &ire->ire_addr_v6)) 11213 continue; 11214 if (ire1->ire_marks & 11215 (IRE_MARK_CONDEMNED| 11216 IRE_MARK_HIDDEN)) 11217 continue; 11218 11219 /* Got one */ 11220 if (ire1 != save_ire) { 11221 IRE_REFHOLD(ire1); 11222 } 11223 break; 11224 } 11225 IRB_REFRELE(irb); 11226 11227 if (ire1 != NULL) { 11228 next_mp = copyb(mp); 11229 if ((next_mp == NULL) || 11230 ((mp->b_cont != NULL) && 11231 ((next_mp->b_cont = 11232 dupmsg(mp->b_cont)) == 11233 NULL))) { 11234 freemsg(next_mp); 11235 next_mp = NULL; 11236 ire_refrele(ire1); 11237 ire1 = NULL; 11238 } 11239 } 11240 11241 /* Last multiroute ire; don't loop anymore. */ 11242 if (ire1 == NULL) { 11243 multirt_send = B_FALSE; 11244 } 11245 } 11246 11247 ill_index = 11248 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11249 11250 /* 11251 * Check for fastpath, we need to hold nce_lock to 11252 * prevent fastpath update from chaining nce_fp_mp. 11253 */ 11254 mutex_enter(&nce->nce_lock); 11255 if ((mp1 = nce->nce_fp_mp) != NULL) { 11256 uint32_t hlen; 11257 uchar_t *rptr; 11258 11259 /* Initiate IPPF processing */ 11260 if (IP6_OUT_IPP(flags)) { 11261 /* 11262 * We have to release the nce lock since 11263 * IPPF components use 11264 * ill_lookup_on_ifindex(), 11265 * which takes the ill_g_lock and the 11266 * ill_lock locks. 11267 */ 11268 mutex_exit(&nce->nce_lock); 11269 ip_process(IPP_LOCAL_OUT, &mp, 11270 ill_index); 11271 if (mp == NULL) { 11272 BUMP_MIB( 11273 ill->ill_ip6_mib, 11274 ipv6OutDiscards); 11275 if (next_mp != NULL) 11276 freemsg(next_mp); 11277 if (ire != save_ire) { 11278 ire_refrele(ire); 11279 } 11280 return; 11281 } 11282 mutex_enter(&nce->nce_lock); 11283 if ((mp1 = nce->nce_fp_mp) == NULL) { 11284 /* 11285 * Probably disappeared during 11286 * IPQoS processing. 11287 */ 11288 qos_done = B_TRUE; 11289 goto prepend_unitdata; 11290 } 11291 } 11292 hlen = MBLKL(mp1); 11293 rptr = mp->b_rptr - hlen; 11294 /* 11295 * make sure there is room for the fastpath 11296 * datalink header 11297 */ 11298 if (rptr < mp->b_datap->db_base) { 11299 mp1 = copyb(mp1); 11300 if (mp1 == NULL) { 11301 mutex_exit(&nce->nce_lock); 11302 BUMP_MIB(ill->ill_ip6_mib, 11303 ipv6OutDiscards); 11304 freemsg(mp); 11305 if (next_mp != NULL) 11306 freemsg(next_mp); 11307 if (ire != save_ire) { 11308 ire_refrele(ire); 11309 } 11310 return; 11311 } 11312 mp1->b_cont = mp; 11313 11314 /* Get the priority marking, if any */ 11315 mp1->b_band = mp->b_band; 11316 mp = mp1; 11317 } else { 11318 mp->b_rptr = rptr; 11319 /* 11320 * fastpath - pre-pend datalink 11321 * header 11322 */ 11323 bcopy(mp1->b_rptr, rptr, hlen); 11324 } 11325 11326 mutex_exit(&nce->nce_lock); 11327 11328 } else { 11329 prepend_unitdata: 11330 mutex_exit(&nce->nce_lock); 11331 mp1 = nce->nce_res_mp; 11332 if (mp1 == NULL) { 11333 ip1dbg(("ip_xmit_v6: No resolution " 11334 "block ire = %p\n", (void *)ire)); 11335 freemsg(mp); 11336 if (next_mp != NULL) 11337 freemsg(next_mp); 11338 if (ire != save_ire) { 11339 ire_refrele(ire); 11340 } 11341 return; 11342 } 11343 /* 11344 * Prepend the DL_UNITDATA_REQ. 11345 */ 11346 mp1 = copyb(mp1); 11347 if (mp1 == NULL) { 11348 BUMP_MIB(ill->ill_ip6_mib, 11349 ipv6OutDiscards); 11350 freemsg(mp); 11351 if (next_mp != NULL) 11352 freemsg(next_mp); 11353 if (ire != save_ire) { 11354 ire_refrele(ire); 11355 } 11356 return; 11357 } 11358 mp1->b_cont = mp; 11359 mp = mp1; 11360 /* 11361 * Initiate IPPF processing, if it is 11362 * already done, bypass. 11363 */ 11364 if (!qos_done && IP6_OUT_IPP(flags)) { 11365 ip_process(IPP_LOCAL_OUT, &mp, 11366 ill_index); 11367 if (mp == NULL) { 11368 BUMP_MIB(ill->ill_ip6_mib, 11369 ipv6OutDiscards); 11370 if (next_mp != NULL) 11371 freemsg(next_mp); 11372 if (ire != save_ire) { 11373 ire_refrele(ire); 11374 } 11375 return; 11376 } 11377 } 11378 } 11379 11380 /* 11381 * Update ire counters; for save_ire, this has been 11382 * done by the caller. 11383 */ 11384 if (ire != save_ire) { 11385 UPDATE_OB_PKT_COUNT(ire); 11386 ire->ire_last_used_time = lbolt; 11387 } 11388 11389 /* 11390 * Send it down. XXX Do we want to flow control AH/ESP 11391 * packets that carry TCP payloads? We don't flow 11392 * control TCP packets, but we should also not 11393 * flow-control TCP packets that have been protected. 11394 * We don't have an easy way to find out if an AH/ESP 11395 * packet was originally TCP or not currently. 11396 */ 11397 if (io == NULL) { 11398 putnext(stq, mp); 11399 } else { 11400 /* 11401 * Safety Pup says: make sure this is 11402 * going to the right interface! 11403 */ 11404 if (io->ipsec_out_capab_ill_index != 11405 ill_index) { 11406 /* IPsec kstats: bump lose counter */ 11407 freemsg(mp1); 11408 } else { 11409 ipsec_hw_putnext(stq, mp); 11410 } 11411 } 11412 11413 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 11414 if (ire != save_ire) { 11415 ire_refrele(ire); 11416 } 11417 if (multirt_send) { 11418 ASSERT(ire1 != NULL); 11419 /* 11420 * Proceed with the next RTF_MULTIRT 11421 * ire, also set up the send-to queue 11422 * accordingly. 11423 */ 11424 ire = ire1; 11425 ire1 = NULL; 11426 stq = ire->ire_stq; 11427 nce = ire->ire_nce; 11428 ill = ire_to_ill(ire); 11429 mp = next_mp; 11430 next_mp = NULL; 11431 continue; 11432 } 11433 ASSERT(next_mp == NULL); 11434 ASSERT(ire1 == NULL); 11435 return; 11436 } 11437 11438 ASSERT(nce->nce_state != ND_INCOMPLETE); 11439 11440 /* 11441 * Check for upper layer advice 11442 */ 11443 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 11444 /* 11445 * It should be o.k. to check the state without 11446 * a lock here, at most we lose an advice. 11447 */ 11448 nce->nce_last = TICK_TO_MSEC(lbolt64); 11449 if (nce->nce_state != ND_REACHABLE) { 11450 11451 mutex_enter(&nce->nce_lock); 11452 nce->nce_state = ND_REACHABLE; 11453 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 11454 mutex_exit(&nce->nce_lock); 11455 (void) untimeout(nce->nce_timeout_id); 11456 if (ip_debug > 2) { 11457 /* ip1dbg */ 11458 pr_addr_dbg("ip_xmit_v6: state" 11459 " for %s changed to" 11460 " REACHABLE\n", AF_INET6, 11461 &ire->ire_addr_v6); 11462 } 11463 } 11464 if (ire != save_ire) { 11465 ire_refrele(ire); 11466 } 11467 if (multirt_send) { 11468 ASSERT(ire1 != NULL); 11469 /* 11470 * Proceed with the next RTF_MULTIRT 11471 * ire, also set up the send-to queue 11472 * accordingly. 11473 */ 11474 ire = ire1; 11475 ire1 = NULL; 11476 stq = ire->ire_stq; 11477 nce = ire->ire_nce; 11478 ill = ire_to_ill(ire); 11479 mp = next_mp; 11480 next_mp = NULL; 11481 continue; 11482 } 11483 ASSERT(next_mp == NULL); 11484 ASSERT(ire1 == NULL); 11485 return; 11486 } 11487 11488 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 11489 ip1dbg(("ip_xmit_v6: delta = %" PRId64 11490 " ill_reachable_time = %d \n", delta, 11491 ill->ill_reachable_time)); 11492 if (delta > (uint64_t)ill->ill_reachable_time) { 11493 nce = ire->ire_nce; 11494 mutex_enter(&nce->nce_lock); 11495 switch (nce->nce_state) { 11496 case ND_REACHABLE: 11497 case ND_STALE: 11498 /* 11499 * ND_REACHABLE is identical to 11500 * ND_STALE in this specific case. If 11501 * reachable time has expired for this 11502 * neighbor (delta is greater than 11503 * reachable time), conceptually, the 11504 * neighbor cache is no longer in 11505 * REACHABLE state, but already in 11506 * STALE state. So the correct 11507 * transition here is to ND_DELAY. 11508 */ 11509 nce->nce_state = ND_DELAY; 11510 mutex_exit(&nce->nce_lock); 11511 NDP_RESTART_TIMER(nce, 11512 delay_first_probe_time); 11513 if (ip_debug > 3) { 11514 /* ip2dbg */ 11515 pr_addr_dbg("ip_xmit_v6: state" 11516 " for %s changed to" 11517 " DELAY\n", AF_INET6, 11518 &ire->ire_addr_v6); 11519 } 11520 break; 11521 case ND_DELAY: 11522 case ND_PROBE: 11523 mutex_exit(&nce->nce_lock); 11524 /* Timers have already started */ 11525 break; 11526 case ND_UNREACHABLE: 11527 /* 11528 * ndp timer has detected that this nce 11529 * is unreachable and initiated deleting 11530 * this nce and all its associated IREs. 11531 * This is a race where we found the 11532 * ire before it was deleted and have 11533 * just sent out a packet using this 11534 * unreachable nce. 11535 */ 11536 mutex_exit(&nce->nce_lock); 11537 break; 11538 default: 11539 ASSERT(0); 11540 } 11541 } 11542 11543 if (multirt_send) { 11544 ASSERT(ire1 != NULL); 11545 /* 11546 * Proceed with the next RTF_MULTIRT ire, 11547 * Also set up the send-to queue accordingly. 11548 */ 11549 if (ire != save_ire) { 11550 ire_refrele(ire); 11551 } 11552 ire = ire1; 11553 ire1 = NULL; 11554 stq = ire->ire_stq; 11555 nce = ire->ire_nce; 11556 ill = ire_to_ill(ire); 11557 mp = next_mp; 11558 next_mp = NULL; 11559 } 11560 } while (multirt_send); 11561 /* 11562 * In the multirouting case, release the last ire used for 11563 * emission. save_ire will be released by the caller. 11564 */ 11565 if (ire != save_ire) { 11566 ire_refrele(ire); 11567 } 11568 } else { 11569 /* 11570 * Queue packet if we have an conn to give back pressure. 11571 * We can't queue packets intended for hardware acceleration 11572 * since we've tossed that state already. If the packet is 11573 * being fed back from ire_send_v6, we don't know the 11574 * position in the queue to enqueue the packet and we discard 11575 * the packet. 11576 */ 11577 if (ip_output_queue && (connp != NULL) && (io == NULL) && 11578 (caller != IRE_SEND)) { 11579 if (caller == IP_WSRV) { 11580 connp->conn_did_putbq = 1; 11581 (void) putbq(connp->conn_wq, mp); 11582 conn_drain_insert(connp); 11583 /* 11584 * called_from_wsrv implies we are 11585 * the service thread, and the 11586 * queue is already noenabled. 11587 * The check for canput and 11588 * the putbq is not atomic. 11589 * So we need to check again. 11590 */ 11591 if (canput(stq->q_next)) 11592 connp->conn_did_putbq = 0; 11593 } else { 11594 (void) putq(connp->conn_wq, mp); 11595 } 11596 return; 11597 } 11598 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 11599 freemsg(mp); 11600 return; 11601 } 11602 } 11603 11604 /* 11605 * pr_addr_dbg function provides the needed buffer space to call 11606 * inet_ntop() function's 3rd argument. This function should be 11607 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 11608 * stack buffer space in it's own stack frame. This function uses 11609 * a buffer from it's own stack and prints the information. 11610 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 11611 * 11612 * Note: This function can call inet_ntop() once. 11613 */ 11614 void 11615 pr_addr_dbg(char *fmt1, int af, const void *addr) 11616 { 11617 char buf[INET6_ADDRSTRLEN]; 11618 11619 if (fmt1 == NULL) { 11620 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 11621 return; 11622 } 11623 11624 /* 11625 * This does not compare debug level and just prints 11626 * out. Thus it is the responsibility of the caller 11627 * to check the appropriate debug-level before calling 11628 * this function. 11629 */ 11630 if (ip_debug > 0) { 11631 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 11632 } 11633 11634 11635 } 11636 11637 11638 /* 11639 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 11640 * if needed and extension headers) that will be needed based on the 11641 * ip6_pkt_t structure passed by the caller. 11642 * 11643 * The returned length does not include the length of the upper level 11644 * protocol (ULP) header. 11645 */ 11646 int 11647 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 11648 { 11649 int len; 11650 11651 len = IPV6_HDR_LEN; 11652 if (ipp->ipp_fields & IPPF_HAS_IP6I) 11653 len += sizeof (ip6i_t); 11654 if (ipp->ipp_fields & IPPF_HOPOPTS) { 11655 ASSERT(ipp->ipp_hopoptslen != 0); 11656 len += ipp->ipp_hopoptslen; 11657 } 11658 if (ipp->ipp_fields & IPPF_RTHDR) { 11659 ASSERT(ipp->ipp_rthdrlen != 0); 11660 len += ipp->ipp_rthdrlen; 11661 } 11662 /* 11663 * En-route destination options 11664 * Only do them if there's a routing header as well 11665 */ 11666 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 11667 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 11668 ASSERT(ipp->ipp_rtdstoptslen != 0); 11669 len += ipp->ipp_rtdstoptslen; 11670 } 11671 if (ipp->ipp_fields & IPPF_DSTOPTS) { 11672 ASSERT(ipp->ipp_dstoptslen != 0); 11673 len += ipp->ipp_dstoptslen; 11674 } 11675 return (len); 11676 } 11677 11678 /* 11679 * All-purpose routine to build a header chain of an IPv6 header 11680 * followed by any required extension headers and a proto header, 11681 * preceeded (where necessary) by an ip6i_t private header. 11682 * 11683 * The fields of the IPv6 header that are derived from the ip6_pkt_t 11684 * will be filled in appropriately. 11685 * Thus the caller must fill in the rest of the IPv6 header, such as 11686 * traffic class/flowid, source address (if not set here), hoplimit (if not 11687 * set here) and destination address. 11688 * 11689 * The extension headers and ip6i_t header will all be fully filled in. 11690 */ 11691 void 11692 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 11693 ip6_pkt_t *ipp, uint8_t protocol) 11694 { 11695 uint8_t *nxthdr_ptr; 11696 uint8_t *cp; 11697 ip6i_t *ip6i; 11698 ip6_t *ip6h = (ip6_t *)ext_hdrs; 11699 11700 /* 11701 * If sending private ip6i_t header down (checksum info, nexthop, 11702 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 11703 * then fill it in. (The checksum info will be filled in by icmp). 11704 */ 11705 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 11706 ip6i = (ip6i_t *)ip6h; 11707 ip6h = (ip6_t *)&ip6i[1]; 11708 11709 ip6i->ip6i_flags = 0; 11710 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 11711 if (ipp->ipp_fields & IPPF_IFINDEX || 11712 ipp->ipp_fields & IPPF_SCOPE_ID) { 11713 ASSERT(ipp->ipp_ifindex != 0); 11714 ip6i->ip6i_flags |= IP6I_IFINDEX; 11715 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 11716 } 11717 if (ipp->ipp_fields & IPPF_ADDR) { 11718 /* 11719 * Enable per-packet source address verification if 11720 * IPV6_PKTINFO specified the source address. 11721 * ip6_src is set in the transport's _wput function. 11722 */ 11723 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 11724 &ipp->ipp_addr)); 11725 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 11726 } 11727 if (ipp->ipp_fields & IPPF_HOPLIMIT) { 11728 ip6i->ip6i_hops = ip6h->ip6_hops = ipp->ipp_hoplimit; 11729 /* 11730 * We need to set this flag so that IP doesn't 11731 * rewrite the IPv6 header's hoplimit with the 11732 * current default value. 11733 */ 11734 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 11735 } 11736 if (ipp->ipp_fields & IPPF_NEXTHOP) { 11737 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 11738 &ipp->ipp_nexthop)); 11739 ip6i->ip6i_flags |= IP6I_NEXTHOP; 11740 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 11741 } 11742 /* 11743 * tell IP this is an ip6i_t private header 11744 */ 11745 ip6i->ip6i_nxt = IPPROTO_RAW; 11746 } 11747 /* Initialize IPv6 header */ 11748 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 11749 if (ipp->ipp_fields & IPPF_TCLASS) { 11750 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 11751 (ipp->ipp_tclass << 20); 11752 } 11753 if (ipp->ipp_fields & IPPF_ADDR) 11754 ip6h->ip6_src = ipp->ipp_addr; 11755 11756 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 11757 cp = (uint8_t *)&ip6h[1]; 11758 /* 11759 * Here's where we have to start stringing together 11760 * any extension headers in the right order: 11761 * Hop-by-hop, destination, routing, and final destination opts. 11762 */ 11763 if (ipp->ipp_fields & IPPF_HOPOPTS) { 11764 /* Hop-by-hop options */ 11765 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 11766 11767 *nxthdr_ptr = IPPROTO_HOPOPTS; 11768 nxthdr_ptr = &hbh->ip6h_nxt; 11769 11770 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 11771 cp += ipp->ipp_hopoptslen; 11772 } 11773 /* 11774 * En-route destination options 11775 * Only do them if there's a routing header as well 11776 */ 11777 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 11778 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 11779 ip6_dest_t *dst = (ip6_dest_t *)cp; 11780 11781 *nxthdr_ptr = IPPROTO_DSTOPTS; 11782 nxthdr_ptr = &dst->ip6d_nxt; 11783 11784 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 11785 cp += ipp->ipp_rtdstoptslen; 11786 } 11787 /* 11788 * Routing header next 11789 */ 11790 if (ipp->ipp_fields & IPPF_RTHDR) { 11791 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 11792 11793 *nxthdr_ptr = IPPROTO_ROUTING; 11794 nxthdr_ptr = &rt->ip6r_nxt; 11795 11796 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 11797 cp += ipp->ipp_rthdrlen; 11798 } 11799 /* 11800 * Do ultimate destination options 11801 */ 11802 if (ipp->ipp_fields & IPPF_DSTOPTS) { 11803 ip6_dest_t *dest = (ip6_dest_t *)cp; 11804 11805 *nxthdr_ptr = IPPROTO_DSTOPTS; 11806 nxthdr_ptr = &dest->ip6d_nxt; 11807 11808 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 11809 cp += ipp->ipp_dstoptslen; 11810 } 11811 /* 11812 * Now set the last header pointer to the proto passed in 11813 */ 11814 *nxthdr_ptr = protocol; 11815 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 11816 } 11817 11818 /* 11819 * Return a pointer to the routing header extension header 11820 * in the IPv6 header(s) chain passed in. 11821 * If none found, return NULL 11822 * Assumes that all extension headers are in same mblk as the v6 header 11823 */ 11824 ip6_rthdr_t * 11825 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 11826 { 11827 ip6_dest_t *desthdr; 11828 ip6_frag_t *fraghdr; 11829 uint_t hdrlen; 11830 uint8_t nexthdr; 11831 uint8_t *ptr = (uint8_t *)&ip6h[1]; 11832 11833 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 11834 return ((ip6_rthdr_t *)ptr); 11835 11836 /* 11837 * The routing header will precede all extension headers 11838 * other than the hop-by-hop and destination options 11839 * extension headers, so if we see anything other than those, 11840 * we're done and didn't find it. 11841 * We could see a destination options header alone but no 11842 * routing header, in which case we'll return NULL as soon as 11843 * we see anything after that. 11844 * Hop-by-hop and destination option headers are identical, 11845 * so we can use either one we want as a template. 11846 */ 11847 nexthdr = ip6h->ip6_nxt; 11848 while (ptr < endptr) { 11849 /* Is there enough left for len + nexthdr? */ 11850 if (ptr + MIN_EHDR_LEN > endptr) 11851 return (NULL); 11852 11853 switch (nexthdr) { 11854 case IPPROTO_HOPOPTS: 11855 case IPPROTO_DSTOPTS: 11856 /* Assumes the headers are identical for hbh and dst */ 11857 desthdr = (ip6_dest_t *)ptr; 11858 hdrlen = 8 * (desthdr->ip6d_len + 1); 11859 nexthdr = desthdr->ip6d_nxt; 11860 break; 11861 11862 case IPPROTO_ROUTING: 11863 return ((ip6_rthdr_t *)ptr); 11864 11865 case IPPROTO_FRAGMENT: 11866 fraghdr = (ip6_frag_t *)ptr; 11867 hdrlen = sizeof (ip6_frag_t); 11868 nexthdr = fraghdr->ip6f_nxt; 11869 break; 11870 11871 default: 11872 return (NULL); 11873 } 11874 ptr += hdrlen; 11875 } 11876 return (NULL); 11877 } 11878 11879 /* 11880 * Called for source-routed packets originating on this node. 11881 * Manipulates the original routing header by moving every entry up 11882 * one slot, placing the first entry in the v6 header's v6_dst field, 11883 * and placing the ultimate destination in the routing header's last 11884 * slot. 11885 * 11886 * Returns the checksum diference between the ultimate destination 11887 * (last hop in the routing header when the packet is sent) and 11888 * the first hop (ip6_dst when the packet is sent) 11889 */ 11890 uint32_t 11891 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 11892 { 11893 uint_t numaddr; 11894 uint_t i; 11895 in6_addr_t *addrptr; 11896 in6_addr_t tmp; 11897 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 11898 uint32_t cksm; 11899 uint32_t addrsum = 0; 11900 uint16_t *ptr; 11901 11902 /* 11903 * Perform any processing needed for source routing. 11904 * We know that all extension headers will be in the same mblk 11905 * as the IPv6 header. 11906 */ 11907 11908 /* 11909 * If no segments left in header, or the header length field is zero, 11910 * don't move hop addresses around; 11911 * Checksum difference is zero. 11912 */ 11913 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 11914 return (0); 11915 11916 ptr = (uint16_t *)&ip6h->ip6_dst; 11917 cksm = 0; 11918 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 11919 cksm += ptr[i]; 11920 } 11921 cksm = (cksm & 0xFFFF) + (cksm >> 16); 11922 11923 /* 11924 * Here's where the fun begins - we have to 11925 * move all addresses up one spot, take the 11926 * first hop and make it our first ip6_dst, 11927 * and place the ultimate destination in the 11928 * newly-opened last slot. 11929 */ 11930 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 11931 numaddr = rthdr->ip6r0_len / 2; 11932 tmp = *addrptr; 11933 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 11934 *addrptr = addrptr[1]; 11935 } 11936 *addrptr = ip6h->ip6_dst; 11937 ip6h->ip6_dst = tmp; 11938 11939 /* 11940 * From the checksummed ultimate destination subtract the checksummed 11941 * current ip6_dst (the first hop address). Return that number. 11942 * (In the v4 case, the second part of this is done in each routine 11943 * that calls ip_massage_options(). We do it all in this one place 11944 * for v6). 11945 */ 11946 ptr = (uint16_t *)&ip6h->ip6_dst; 11947 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 11948 addrsum += ptr[i]; 11949 } 11950 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 11951 if ((int)cksm < 0) 11952 cksm--; 11953 cksm = (cksm & 0xFFFF) + (cksm >> 16); 11954 11955 return (cksm); 11956 } 11957 11958 /* 11959 * See if the upper-level protocol indicated by 'proto' will be able 11960 * to do something with an ICMP_FRAGMENTATION_NEEDED (IPv4) or 11961 * ICMP6_PACKET_TOO_BIG (IPv6). 11962 */ 11963 static boolean_t 11964 ip_ulp_cando_pkt2big(int proto) 11965 { 11966 /* 11967 * For now, only TCP can handle this. 11968 * Tunnels may be able to also, but since tun isn't working over 11969 * IPv6 yet, don't worry about it for now. 11970 */ 11971 return (proto == IPPROTO_TCP); 11972 } 11973 11974 11975 /* 11976 * Propagate a multicast group membership operation (join/leave) (*fn) on 11977 * all interfaces crossed by the related multirt routes. 11978 * The call is considered successful if the operation succeeds 11979 * on at least one interface. 11980 * The function is called if the destination address in the packet to send 11981 * is multirouted. 11982 */ 11983 int 11984 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 11985 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 11986 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 11987 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 11988 { 11989 ire_t *ire_gw; 11990 irb_t *irb; 11991 int index, error = 0; 11992 opt_restart_t *or; 11993 11994 irb = ire->ire_bucket; 11995 ASSERT(irb != NULL); 11996 11997 ASSERT(DB_TYPE(first_mp) == M_CTL); 11998 or = (opt_restart_t *)first_mp->b_rptr; 11999 12000 IRB_REFHOLD(irb); 12001 for (; ire != NULL; ire = ire->ire_next) { 12002 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12003 continue; 12004 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12005 continue; 12006 12007 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12008 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, 12009 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 12010 /* No resolver exists for the gateway; skip this ire. */ 12011 if (ire_gw == NULL) 12012 continue; 12013 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12014 /* 12015 * A resolver exists: we can get the interface on which we have 12016 * to apply the operation. 12017 */ 12018 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12019 first_mp); 12020 if (error == 0) 12021 or->or_private = CGTP_MCAST_SUCCESS; 12022 12023 if (ip_debug > 0) { 12024 ulong_t off; 12025 char *ksym; 12026 12027 ksym = kobj_getsymname((uintptr_t)fn, &off); 12028 ip2dbg(("ip_multirt_apply_membership_v6: " 12029 "called %s, multirt group 0x%08x via itf 0x%08x, " 12030 "error %d [success %u]\n", 12031 ksym ? ksym : "?", 12032 ntohl(V4_PART_OF_V6((*v6grp))), 12033 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12034 error, or->or_private)); 12035 } 12036 12037 ire_refrele(ire_gw); 12038 if (error == EINPROGRESS) { 12039 IRB_REFRELE(irb); 12040 return (error); 12041 } 12042 } 12043 IRB_REFRELE(irb); 12044 /* 12045 * Consider the call as successful if we succeeded on at least 12046 * one interface. Otherwise, return the last encountered error. 12047 */ 12048 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12049 } 12050 12051 void 12052 ip6_kstat_init(void) 12053 { 12054 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 12055 "net", KSTAT_TYPE_NAMED, 12056 sizeof (ip6_statistics) / sizeof (kstat_named_t), 12057 KSTAT_FLAG_VIRTUAL)) != NULL) { 12058 ip6_kstat->ks_data = &ip6_statistics; 12059 kstat_install(ip6_kstat); 12060 } 12061 } 12062 12063 /* 12064 * The following two functions set and get the value for the 12065 * IPV6_SRC_PREFERENCES socket option. 12066 */ 12067 int 12068 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12069 { 12070 /* 12071 * We only support preferences that are covered by 12072 * IPV6_PREFER_SRC_MASK. 12073 */ 12074 if (prefs & ~IPV6_PREFER_SRC_MASK) 12075 return (EINVAL); 12076 12077 /* 12078 * Look for conflicting preferences or default preferences. If 12079 * both bits of a related pair are clear, the application wants the 12080 * system's default value for that pair. Both bits in a pair can't 12081 * be set. 12082 */ 12083 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12084 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12085 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12086 IPV6_PREFER_SRC_MIPMASK) { 12087 return (EINVAL); 12088 } 12089 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12090 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12091 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12092 IPV6_PREFER_SRC_TMPMASK) { 12093 return (EINVAL); 12094 } 12095 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12096 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12097 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12098 IPV6_PREFER_SRC_CGAMASK) { 12099 return (EINVAL); 12100 } 12101 12102 connp->conn_src_preferences = prefs; 12103 return (0); 12104 } 12105 12106 size_t 12107 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12108 { 12109 *val = connp->conn_src_preferences; 12110 return (sizeof (connp->conn_src_preferences)); 12111 } 12112 12113 int 12114 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 12115 { 12116 ill_t *ill; 12117 ire_t *ire; 12118 int error; 12119 12120 /* 12121 * Verify the source address and ifindex. Privileged users can use 12122 * any source address. For ancillary data the source address is 12123 * checked in ip_wput_v6. 12124 */ 12125 if (pkti->ipi6_ifindex != 0) { 12126 ASSERT(connp != NULL); 12127 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 12128 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 12129 if (ill == NULL) { 12130 /* 12131 * We just want to know if the interface exists, we 12132 * don't really care about the ill pointer itself. 12133 */ 12134 if (error != EINPROGRESS) 12135 return (error); 12136 error = 0; /* Ensure we don't use it below */ 12137 } else { 12138 ill_refrele(ill); 12139 } 12140 } 12141 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12142 secpolicy_net_rawaccess(cr) != 0) { 12143 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12144 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12145 connp->conn_zoneid, MATCH_IRE_TYPE); 12146 if (ire != NULL) 12147 ire_refrele(ire); 12148 else 12149 return (ENXIO); 12150 } 12151 return (0); 12152 } 12153 12154 /* 12155 * Get the size of the IP options (including the IP headers size) 12156 * without including the AH header's size. If till_ah is B_FALSE, 12157 * and if AH header is present, dest options beyond AH header will 12158 * also be included in the returned size. 12159 */ 12160 int 12161 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12162 { 12163 ip6_t *ip6h; 12164 uint8_t nexthdr; 12165 uint8_t *whereptr; 12166 ip6_hbh_t *hbhhdr; 12167 ip6_dest_t *dsthdr; 12168 ip6_rthdr_t *rthdr; 12169 int ehdrlen; 12170 int size; 12171 ah_t *ah; 12172 12173 ip6h = (ip6_t *)mp->b_rptr; 12174 size = IPV6_HDR_LEN; 12175 nexthdr = ip6h->ip6_nxt; 12176 whereptr = (uint8_t *)&ip6h[1]; 12177 for (;;) { 12178 /* Assume IP has already stripped it */ 12179 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12180 switch (nexthdr) { 12181 case IPPROTO_HOPOPTS: 12182 hbhhdr = (ip6_hbh_t *)whereptr; 12183 nexthdr = hbhhdr->ip6h_nxt; 12184 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12185 break; 12186 case IPPROTO_DSTOPTS: 12187 dsthdr = (ip6_dest_t *)whereptr; 12188 nexthdr = dsthdr->ip6d_nxt; 12189 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12190 break; 12191 case IPPROTO_ROUTING: 12192 rthdr = (ip6_rthdr_t *)whereptr; 12193 nexthdr = rthdr->ip6r_nxt; 12194 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12195 break; 12196 default : 12197 if (till_ah) { 12198 ASSERT(nexthdr == IPPROTO_AH); 12199 return (size); 12200 } 12201 /* 12202 * If we don't have a AH header to traverse, 12203 * return now. This happens normally for 12204 * outbound datagrams where we have not inserted 12205 * the AH header. 12206 */ 12207 if (nexthdr != IPPROTO_AH) { 12208 return (size); 12209 } 12210 12211 /* 12212 * We don't include the AH header's size 12213 * to be symmetrical with other cases where 12214 * we either don't have a AH header (outbound) 12215 * or peek into the AH header yet (inbound and 12216 * not pulled up yet). 12217 */ 12218 ah = (ah_t *)whereptr; 12219 nexthdr = ah->ah_nexthdr; 12220 ehdrlen = (ah->ah_length << 2) + 8; 12221 12222 if (nexthdr == IPPROTO_DSTOPTS) { 12223 if (whereptr + ehdrlen >= mp->b_wptr) { 12224 /* 12225 * The destination options header 12226 * is not part of the first mblk. 12227 */ 12228 whereptr = mp->b_cont->b_rptr; 12229 } else { 12230 whereptr += ehdrlen; 12231 } 12232 12233 dsthdr = (ip6_dest_t *)whereptr; 12234 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12235 size += ehdrlen; 12236 } 12237 return (size); 12238 } 12239 whereptr += ehdrlen; 12240 size += ehdrlen; 12241 } 12242 } 12243