1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* 27 * Copyright (c) 1990 Mentat Inc. 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/stream.h> 34 #include <sys/dlpi.h> 35 #include <sys/stropts.h> 36 #include <sys/sysmacros.h> 37 #include <sys/strsun.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #define _SUN_TPI_VERSION 2 41 #include <sys/tihdr.h> 42 #include <sys/tiuser.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/cmn_err.h> 46 #include <sys/debug.h> 47 #include <sys/kobj.h> 48 #include <sys/zone.h> 49 50 #include <sys/kmem.h> 51 #include <sys/systm.h> 52 #include <sys/param.h> 53 #include <sys/socket.h> 54 #include <sys/vtrace.h> 55 #include <sys/isa_defs.h> 56 #include <sys/atomic.h> 57 /* EXPORT DELETE START */ 58 #include <sys/iphada.h> 59 /* EXPORT DELETE END */ 60 #include <sys/policy.h> 61 #include <net/if.h> 62 #include <net/if_arp.h> 63 #include <net/route.h> 64 #include <net/if_dl.h> 65 #include <sys/sockio.h> 66 #include <netinet/in.h> 67 #include <netinet/ip6.h> 68 #include <netinet/icmp6.h> 69 #include <netinet/sctp.h> 70 71 #include <inet/common.h> 72 #include <inet/mi.h> 73 #include <inet/mib2.h> 74 #include <inet/nd.h> 75 #include <inet/arp.h> 76 #include <inet/snmpcom.h> 77 78 #include <inet/ip.h> 79 #include <inet/ip6.h> 80 #include <inet/ip6_asp.h> 81 #include <inet/tcp.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/optcom.h> 89 #include <inet/ip_ndp.h> 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 #include <inet/sadb.h> 93 #include <inet/ipsec_impl.h> 94 #include <inet/tun.h> 95 #include <inet/sctp_ip.h> 96 #include <sys/multidata.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/udp_impl.h> 101 #include <sys/squeue.h> 102 103 extern squeue_func_t ip_input_proc; 104 105 /* 106 * IP statistics. 107 */ 108 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 109 110 typedef struct ip6_stat { 111 kstat_named_t ip6_udp_fast_path; 112 kstat_named_t ip6_udp_slow_path; 113 kstat_named_t ip6_udp_fannorm; 114 kstat_named_t ip6_udp_fanmb; 115 } ip6_stat_t; 116 117 static ip6_stat_t ip6_statistics = { 118 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 119 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 120 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 121 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 122 }; 123 124 static kstat_t *ip6_kstat; 125 126 /* 127 * Naming conventions: 128 * These rules should be judiciously applied 129 * if there is a need to identify something as IPv6 versus IPv4 130 * IPv6 funcions will end with _v6 in the ip module. 131 * IPv6 funcions will end with _ipv6 in the transport modules. 132 * IPv6 macros: 133 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 134 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 135 * And then there are ..V4_PART_OF_V6. 136 * The intent is that macros in the ip module end with _V6. 137 * IPv6 global variables will start with ipv6_ 138 * IPv6 structures will start with ipv6 139 * IPv6 defined constants should start with IPV6_ 140 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 141 */ 142 143 /* 144 * IPv6 mibs when the interface (ill) is not known. 145 * When the ill is known the per-interface mib in the ill is used. 146 */ 147 mib2_ipv6IfStatsEntry_t ip6_mib; 148 mib2_ipv6IfIcmpEntry_t icmp6_mib; 149 150 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 151 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 152 153 const in6_addr_t ipv6_all_ones = 154 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 155 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 156 157 #ifdef _BIG_ENDIAN 158 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 159 #else /* _BIG_ENDIAN */ 160 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 161 #endif /* _BIG_ENDIAN */ 162 163 #ifdef _BIG_ENDIAN 164 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 165 #else /* _BIG_ENDIAN */ 166 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 167 #endif /* _BIG_ENDIAN */ 168 169 #ifdef _BIG_ENDIAN 170 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 171 #else /* _BIG_ENDIAN */ 172 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 173 #endif /* _BIG_ENDIAN */ 174 175 #ifdef _BIG_ENDIAN 176 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 177 #else /* _BIG_ENDIAN */ 178 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 179 #endif /* _BIG_ENDIAN */ 180 181 #ifdef _BIG_ENDIAN 182 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 183 #else /* _BIG_ENDIAN */ 184 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 185 #endif /* _BIG_ENDIAN */ 186 187 #ifdef _BIG_ENDIAN 188 const in6_addr_t ipv6_solicited_node_mcast = 189 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 190 #else /* _BIG_ENDIAN */ 191 const in6_addr_t ipv6_solicited_node_mcast = 192 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 193 #endif /* _BIG_ENDIAN */ 194 195 /* 196 * Used by icmp_send_redirect_v6 for picking random src. 197 */ 198 uint_t icmp_redirect_v6_src_index; 199 200 /* Leave room for ip_newroute to tack on the src and target addresses */ 201 #define OK_RESOLVER_MP_V6(mp) \ 202 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 203 204 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 205 boolean_t, zoneid_t); 206 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 207 const in6_addr_t *, boolean_t); 208 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 209 static boolean_t icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp); 210 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 211 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 212 boolean_t, boolean_t, boolean_t, boolean_t); 213 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 214 iulp_t *); 215 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 216 uint16_t, boolean_t, boolean_t, boolean_t); 217 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 218 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 219 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 220 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 221 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 222 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 223 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 224 uint8_t *, uint_t, uint8_t); 225 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 226 ip6_frag_t *, uint_t, uint_t *); 227 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 228 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 229 conn_t *, int, int, int); 230 static boolean_t ip_ulp_cando_pkt2big(int); 231 232 static void ip_rput_v6(queue_t *, mblk_t *); 233 static void ip_wput_v6(queue_t *, mblk_t *); 234 235 /* 236 * A template for an IPv6 AR_ENTRY_QUERY 237 */ 238 static areq_t ipv6_areq_template = { 239 AR_ENTRY_QUERY, /* cmd */ 240 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 241 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 242 IP6_DL_SAP, /* protocol, from arps perspective */ 243 sizeof (areq_t), /* target addr offset */ 244 IPV6_ADDR_LEN, /* target addr_length */ 245 0, /* flags */ 246 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 247 IPV6_ADDR_LEN, /* sender addr length */ 248 6, /* xmit_count */ 249 1000, /* (re)xmit_interval in milliseconds */ 250 4 /* max # of requests to buffer */ 251 /* anything else filled in by the code */ 252 }; 253 254 struct qinit rinit_ipv6 = { 255 (pfi_t)ip_rput_v6, 256 NULL, 257 ip_open, 258 ip_close, 259 NULL, 260 &ip_mod_info 261 }; 262 263 struct qinit winit_ipv6 = { 264 (pfi_t)ip_wput_v6, 265 (pfi_t)ip_wsrv, 266 ip_open, 267 ip_close, 268 NULL, 269 &ip_mod_info 270 }; 271 272 /* 273 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 274 * The message has already been checksummed and if needed, 275 * a copy has been made to be sent any interested ICMP client (conn) 276 * Note that this is different than icmp_inbound() which does the fanout 277 * to conn's as well as local processing of the ICMP packets. 278 * 279 * All error messages are passed to the matching transport stream. 280 * 281 * Zones notes: 282 * The packet is only processed in the context of the specified zone: typically 283 * only this zone will reply to an echo request. This means that the caller must 284 * call icmp_inbound_v6() for each relevant zone. 285 */ 286 static void 287 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 288 boolean_t mctl_present, uint_t flags, zoneid_t zoneid) 289 { 290 icmp6_t *icmp6; 291 ip6_t *ip6h; 292 boolean_t interested; 293 ip6i_t *ip6i; 294 in6_addr_t origsrc; 295 ire_t *ire; 296 mblk_t *first_mp; 297 ipsec_in_t *ii; 298 299 ASSERT(ill != NULL); 300 first_mp = mp; 301 if (mctl_present) { 302 mp = first_mp->b_cont; 303 ASSERT(mp != NULL); 304 305 ii = (ipsec_in_t *)first_mp->b_rptr; 306 ASSERT(ii->ipsec_in_type == IPSEC_IN); 307 } 308 309 ip6h = (ip6_t *)mp->b_rptr; 310 311 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 312 313 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 314 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 315 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 316 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 317 freemsg(first_mp); 318 return; 319 } 320 ip6h = (ip6_t *)mp->b_rptr; 321 } 322 if (icmp_accept_clear_messages == 0) { 323 first_mp = ipsec_check_global_policy(first_mp, NULL, 324 NULL, ip6h, mctl_present); 325 if (first_mp == NULL) 326 return; 327 } 328 329 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 330 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 331 icmp6->icmp6_code)); 332 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 333 334 /* Initiate IPPF processing here */ 335 if (IP6_IN_IPP(flags)) { 336 337 /* 338 * If the ifindex changes due to SIOCSLIFINDEX 339 * packet may return to IP on the wrong ill. 340 */ 341 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 342 if (mp == NULL) { 343 if (mctl_present) { 344 freeb(first_mp); 345 } 346 return; 347 } 348 } 349 350 switch (icmp6->icmp6_type) { 351 case ICMP6_DST_UNREACH: 352 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 353 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 354 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 355 break; 356 357 case ICMP6_TIME_EXCEEDED: 358 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 359 break; 360 361 case ICMP6_PARAM_PROB: 362 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 363 break; 364 365 case ICMP6_PACKET_TOO_BIG: 366 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 367 zoneid); 368 return; 369 case ICMP6_ECHO_REQUEST: 370 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 371 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 372 !ipv6_resp_echo_mcast) 373 break; 374 375 /* 376 * We must have exclusive use of the mblk to convert it to 377 * a response. 378 * If not, we copy it. 379 */ 380 if (mp->b_datap->db_ref > 1) { 381 mblk_t *mp1; 382 383 mp1 = copymsg(mp); 384 freemsg(mp); 385 if (mp1 == NULL) { 386 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 387 if (mctl_present) 388 freeb(first_mp); 389 return; 390 } 391 mp = mp1; 392 ip6h = (ip6_t *)mp->b_rptr; 393 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 394 if (mctl_present) 395 first_mp->b_cont = mp; 396 else 397 first_mp = mp; 398 } 399 400 /* 401 * Turn the echo into an echo reply. 402 * Remove any extension headers (do not reverse a source route) 403 * and clear the flow id (keep traffic class for now). 404 */ 405 if (hdr_length != IPV6_HDR_LEN) { 406 int i; 407 408 for (i = 0; i < IPV6_HDR_LEN; i++) 409 mp->b_rptr[hdr_length - i - 1] = 410 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 411 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 412 ip6h = (ip6_t *)mp->b_rptr; 413 ip6h->ip6_nxt = IPPROTO_ICMPV6; 414 hdr_length = IPV6_HDR_LEN; 415 } 416 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 417 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 418 419 ip6h->ip6_plen = 420 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 421 origsrc = ip6h->ip6_src; 422 /* 423 * Reverse the source and destination addresses. 424 * If the return address is a multicast, zero out the source 425 * (ip_wput_v6 will set an address). 426 */ 427 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 428 ip6h->ip6_src = ipv6_all_zeros; 429 ip6h->ip6_dst = origsrc; 430 } else { 431 ip6h->ip6_src = ip6h->ip6_dst; 432 ip6h->ip6_dst = origsrc; 433 } 434 435 /* set the hop limit */ 436 ip6h->ip6_hops = ipv6_def_hops; 437 438 /* 439 * Prepare for checksum by putting icmp length in the icmp 440 * checksum field. The checksum is calculated in ip_wput_v6. 441 */ 442 icmp6->icmp6_cksum = ip6h->ip6_plen; 443 /* 444 * ICMP echo replies should go out on the same interface 445 * the request came on as probes used by in.mpathd for 446 * detecting NIC failures are ECHO packets. We turn-off load 447 * spreading by allocating a ip6i and setting ip6i_attach_if 448 * to B_TRUE which is handled both by ip_wput_v6 and 449 * ip_newroute_v6. If we don't turnoff load spreading, 450 * the packets might get dropped if there are no 451 * non-FAILED/INACTIVE interfaces for it to go out on and 452 * in.mpathd would wrongly detect a failure or mis-detect 453 * a NIC failure as a link failure. As load spreading can 454 * happen only if ill_group is not NULL, we do only for 455 * that case and this does not affect the normal case. 456 * 457 * We force this only on echo packets that came from on-link 458 * hosts. We restrict this to link-local addresses which 459 * is used by in.mpathd for probing. In the IPv6 case, 460 * default routes typically have an ire_ipif pointer and 461 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 462 * might work. As a default route out of this interface 463 * may not be present, enforcing this packet to go out in 464 * this case may not work. 465 */ 466 if (ill->ill_group != NULL && 467 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 468 /* 469 * If we are sending replies to ourselves, don't 470 * set ATTACH_IF as we may not be able to find 471 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 472 * causes ip_wput_v6 to look for an IRE_LOCAL on 473 * "ill" which it may not find and will try to 474 * create an IRE_CACHE for our local address. Once 475 * we do this, we will try to forward all packets 476 * meant to our LOCAL address. 477 */ 478 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES); 479 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 480 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 481 if (mp == NULL) { 482 BUMP_MIB(ill->ill_icmp6_mib, 483 ipv6IfIcmpInErrors); 484 if (ire != NULL) 485 ire_refrele(ire); 486 if (mctl_present) 487 freeb(first_mp); 488 return; 489 } else if (mctl_present) { 490 first_mp->b_cont = mp; 491 } else { 492 first_mp = mp; 493 } 494 ip6i = (ip6i_t *)mp->b_rptr; 495 ip6i->ip6i_flags = IP6I_ATTACH_IF; 496 ip6i->ip6i_ifindex = 497 ill->ill_phyint->phyint_ifindex; 498 } 499 if (ire != NULL) 500 ire_refrele(ire); 501 } 502 503 if (!mctl_present) { 504 /* 505 * This packet should go out the same way as it 506 * came in i.e in clear. To make sure that global 507 * policy will not be applied to this in ip_wput, 508 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 509 */ 510 ASSERT(first_mp == mp); 511 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 512 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 513 freemsg(mp); 514 return; 515 } 516 ii = (ipsec_in_t *)first_mp->b_rptr; 517 518 /* This is not a secure packet */ 519 ii->ipsec_in_secure = B_FALSE; 520 first_mp->b_cont = mp; 521 } 522 ii->ipsec_in_zoneid = zoneid; 523 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 524 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 525 return; 526 } 527 put(WR(q), first_mp); 528 return; 529 530 case ICMP6_ECHO_REPLY: 531 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 532 break; 533 534 case ND_ROUTER_SOLICIT: 535 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 536 break; 537 538 case ND_ROUTER_ADVERT: 539 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 540 break; 541 542 case ND_NEIGHBOR_SOLICIT: 543 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 544 if (mctl_present) 545 freeb(first_mp); 546 /* XXX may wish to pass first_mp up to ndp_input someday. */ 547 ndp_input(ill, mp); 548 return; 549 550 case ND_NEIGHBOR_ADVERT: 551 BUMP_MIB(ill->ill_icmp6_mib, 552 ipv6IfIcmpInNeighborAdvertisements); 553 if (mctl_present) 554 freeb(first_mp); 555 /* XXX may wish to pass first_mp up to ndp_input someday. */ 556 ndp_input(ill, mp); 557 return; 558 559 case ND_REDIRECT: { 560 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 561 562 if (ipv6_ignore_redirect) 563 break; 564 565 /* 566 * As there is no upper client to deliver, we don't 567 * need the first_mp any more. 568 */ 569 if (mctl_present) 570 freeb(first_mp); 571 if (!pullupmsg(mp, -1) || 572 !icmp_redirect_ok_v6(ill, mp)) { 573 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 574 break; 575 } 576 icmp_redirect_v6(q, mp, ill); 577 return; 578 } 579 580 /* 581 * The next three icmp messages will be handled by MLD. 582 * Pass all valid MLD packets up to any process(es) 583 * listening on a raw ICMP socket. MLD messages are 584 * freed by mld_input function. 585 */ 586 case MLD_LISTENER_QUERY: 587 case MLD_LISTENER_REPORT: 588 case MLD_LISTENER_REDUCTION: 589 if (mctl_present) 590 freeb(first_mp); 591 mld_input(q, mp, ill); 592 return; 593 default: 594 break; 595 } 596 if (interested) { 597 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 598 mctl_present, zoneid); 599 } else { 600 freemsg(first_mp); 601 } 602 } 603 604 /* 605 * Process received IPv6 ICMP Packet too big. 606 * After updating any IRE it does the fanout to any matching transport streams. 607 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 608 */ 609 /* ARGSUSED */ 610 static void 611 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 612 boolean_t mctl_present, zoneid_t zoneid) 613 { 614 ip6_t *ip6h; 615 ip6_t *inner_ip6h; 616 icmp6_t *icmp6; 617 uint16_t hdr_length; 618 uint32_t mtu; 619 ire_t *ire, *first_ire; 620 mblk_t *first_mp; 621 622 first_mp = mp; 623 if (mctl_present) 624 mp = first_mp->b_cont; 625 /* 626 * We must have exclusive use of the mblk to update the MTU 627 * in the packet. 628 * If not, we copy it. 629 * 630 * If there's an M_CTL present, we know that allocated first_mp 631 * earlier in this function, so we know first_mp has refcnt of one. 632 */ 633 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 634 if (mp->b_datap->db_ref > 1) { 635 mblk_t *mp1; 636 637 mp1 = copymsg(mp); 638 freemsg(mp); 639 if (mp1 == NULL) { 640 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 641 if (mctl_present) 642 freeb(first_mp); 643 return; 644 } 645 mp = mp1; 646 if (mctl_present) 647 first_mp->b_cont = mp; 648 else 649 first_mp = mp; 650 } 651 ip6h = (ip6_t *)mp->b_rptr; 652 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 653 hdr_length = ip_hdr_length_v6(mp, ip6h); 654 else 655 hdr_length = IPV6_HDR_LEN; 656 657 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 658 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 659 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 660 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 661 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 662 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 663 freemsg(first_mp); 664 return; 665 } 666 ip6h = (ip6_t *)mp->b_rptr; 667 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 668 inner_ip6h = (ip6_t *)&icmp6[1]; 669 } 670 671 /* 672 * For link local destinations matching simply on IRE type is not 673 * sufficient. Same link local addresses for different ILL's is 674 * possible. 675 */ 676 677 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 678 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 679 IRE_CACHE, ill->ill_ipif, ALL_ZONES, 680 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 681 682 if (first_ire == NULL) { 683 if (ip_debug > 2) { 684 /* ip1dbg */ 685 pr_addr_dbg("icmp_inbound_too_big_v6:" 686 "no ire for dst %s\n", AF_INET6, 687 &inner_ip6h->ip6_dst); 688 } 689 freemsg(first_mp); 690 return; 691 } 692 693 mtu = ntohl(icmp6->icmp6_mtu); 694 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 695 for (ire = first_ire; ire != NULL && 696 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 697 ire = ire->ire_next) { 698 mutex_enter(&ire->ire_lock); 699 if (mtu < IPV6_MIN_MTU) { 700 ip1dbg(("Received mtu less than IPv6 " 701 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 702 mtu = IPV6_MIN_MTU; 703 /* 704 * If an mtu less than IPv6 min mtu is received, 705 * we must include a fragment header in 706 * subsequent packets. 707 */ 708 ire->ire_frag_flag |= IPH_FRAG_HDR; 709 } 710 ip1dbg(("Received mtu from router: %d\n", mtu)); 711 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 712 /* Record the new max frag size for the ULP. */ 713 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 714 /* 715 * If we need a fragment header in every packet 716 * (above case or multirouting), make sure the 717 * ULP takes it into account when computing the 718 * payload size. 719 */ 720 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 721 sizeof (ip6_frag_t)); 722 } else { 723 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 724 } 725 mutex_exit(&ire->ire_lock); 726 } 727 rw_exit(&first_ire->ire_bucket->irb_lock); 728 ire_refrele(first_ire); 729 } else { 730 irb_t *irb = NULL; 731 /* 732 * for non-link local destinations we match only on the IRE type 733 */ 734 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 735 IRE_CACHE, ill->ill_ipif, ALL_ZONES, MATCH_IRE_TYPE); 736 if (ire == NULL) { 737 if (ip_debug > 2) { 738 /* ip1dbg */ 739 pr_addr_dbg("icmp_inbound_too_big_v6:" 740 "no ire for dst %s\n", 741 AF_INET6, &inner_ip6h->ip6_dst); 742 } 743 freemsg(first_mp); 744 return; 745 } 746 irb = ire->ire_bucket; 747 ire_refrele(ire); 748 rw_enter(&irb->irb_lock, RW_READER); 749 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 750 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 751 &inner_ip6h->ip6_dst)) { 752 mtu = ntohl(icmp6->icmp6_mtu); 753 mutex_enter(&ire->ire_lock); 754 if (mtu < IPV6_MIN_MTU) { 755 ip1dbg(("Received mtu less than IPv6" 756 "min mtu %d: %d\n", 757 IPV6_MIN_MTU, mtu)); 758 mtu = IPV6_MIN_MTU; 759 /* 760 * If an mtu less than IPv6 min mtu is 761 * received, we must include a fragment 762 * header in subsequent packets. 763 */ 764 ire->ire_frag_flag |= IPH_FRAG_HDR; 765 } 766 767 ip1dbg(("Received mtu from router: %d\n", mtu)); 768 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 769 /* Record the new max frag size for the ULP. */ 770 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 771 /* 772 * If we need a fragment header in 773 * every packet (above case or 774 * multirouting), make sure the ULP 775 * takes it into account when computing 776 * the payload size. 777 */ 778 icmp6->icmp6_mtu = 779 htonl(ire->ire_max_frag - 780 sizeof (ip6_frag_t)); 781 } else { 782 icmp6->icmp6_mtu = 783 htonl(ire->ire_max_frag); 784 } 785 mutex_exit(&ire->ire_lock); 786 } 787 } 788 rw_exit(&irb->irb_lock); 789 } 790 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 791 mctl_present, zoneid); 792 } 793 794 static void 795 pkt_too_big(conn_t *connp, void *arg) 796 { 797 mblk_t *mp; 798 799 if (!connp->conn_ipv6_recvpathmtu) 800 return; 801 802 /* create message and drop it on this connections read queue */ 803 if ((mp = dupb((mblk_t *)arg)) == NULL) { 804 return; 805 } 806 mp->b_datap->db_type = M_CTL; 807 808 putnext(connp->conn_rq, mp); 809 } 810 811 /* 812 * Fanout received ICMPv6 error packets to the transports. 813 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 814 */ 815 void 816 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 817 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 818 { 819 uint16_t *up; /* Pointer to ports in ULP header */ 820 uint32_t ports; /* reversed ports for fanout */ 821 ip6_t rip6h; /* With reversed addresses */ 822 uint16_t hdr_length; 823 uint8_t *nexthdrp; 824 uint8_t nexthdr; 825 mblk_t *first_mp; 826 ipsec_in_t *ii; 827 tcpha_t *tcpha; 828 conn_t *connp; 829 830 first_mp = mp; 831 if (mctl_present) { 832 mp = first_mp->b_cont; 833 ASSERT(mp != NULL); 834 835 ii = (ipsec_in_t *)first_mp->b_rptr; 836 ASSERT(ii->ipsec_in_type == IPSEC_IN); 837 } else { 838 ii = NULL; 839 } 840 841 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 842 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 843 844 /* 845 * Need to pullup everything in order to use 846 * ip_hdr_length_nexthdr_v6() 847 */ 848 if (mp->b_cont != NULL) { 849 if (!pullupmsg(mp, -1)) { 850 ip1dbg(("icmp_inbound_error_fanout_v6: " 851 "pullupmsg failed\n")); 852 goto drop_pkt; 853 } 854 ip6h = (ip6_t *)mp->b_rptr; 855 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 856 } 857 858 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 859 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 860 goto drop_pkt; 861 862 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 863 goto drop_pkt; 864 nexthdr = *nexthdrp; 865 866 /* Set message type, must be done after pullups */ 867 mp->b_datap->db_type = M_CTL; 868 869 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 870 /* 871 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 872 * sockets. 873 * 874 * Note I don't like walking every connection to deliver 875 * this information to a set of listeners. A separate 876 * list could be kept to keep the cost of this down. 877 */ 878 ipcl_walk(pkt_too_big, (void *)mp); 879 } 880 881 /* Try to pass the ICMP message to clients who need it */ 882 switch (nexthdr) { 883 case IPPROTO_UDP: { 884 /* 885 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 886 * UDP header to get the port information. 887 */ 888 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 889 mp->b_wptr) { 890 break; 891 } 892 /* 893 * Attempt to find a client stream based on port. 894 * Note that we do a reverse lookup since the header is 895 * in the form we sent it out. 896 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 897 * and we only set the src and dst addresses and nexthdr. 898 */ 899 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 900 rip6h.ip6_src = ip6h->ip6_dst; 901 rip6h.ip6_dst = ip6h->ip6_src; 902 rip6h.ip6_nxt = nexthdr; 903 ((uint16_t *)&ports)[0] = up[1]; 904 ((uint16_t *)&ports)[1] = up[0]; 905 906 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 907 IP6_NO_IPPOLICY, mctl_present, zoneid); 908 return; 909 } 910 case IPPROTO_TCP: { 911 /* 912 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 913 * the TCP header to get the port information. 914 */ 915 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 916 mp->b_wptr) { 917 break; 918 } 919 920 /* 921 * Attempt to find a client stream based on port. 922 * Note that we do a reverse lookup since the header is 923 * in the form we sent it out. 924 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 925 * we only set the src and dst addresses and nexthdr. 926 */ 927 928 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 929 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 930 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 931 if (connp == NULL) { 932 goto drop_pkt; 933 } 934 935 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 936 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 937 return; 938 939 } 940 case IPPROTO_SCTP: 941 /* 942 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 943 * the SCTP header to get the port information. 944 */ 945 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 946 mp->b_wptr) { 947 break; 948 } 949 950 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 951 ((uint16_t *)&ports)[0] = up[1]; 952 ((uint16_t *)&ports)[1] = up[0]; 953 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 954 IP6_NO_IPPOLICY, 0, zoneid); 955 return; 956 case IPPROTO_ESP: 957 case IPPROTO_AH: { 958 int ipsec_rc; 959 960 /* 961 * We need a IPSEC_IN in the front to fanout to AH/ESP. 962 * We will re-use the IPSEC_IN if it is already present as 963 * AH/ESP will not affect any fields in the IPSEC_IN for 964 * ICMP errors. If there is no IPSEC_IN, allocate a new 965 * one and attach it in the front. 966 */ 967 if (ii != NULL) { 968 /* 969 * ip_fanout_proto_again converts the ICMP errors 970 * that come back from AH/ESP to M_DATA so that 971 * if it is non-AH/ESP and we do a pullupmsg in 972 * this function, it would work. Convert it back 973 * to M_CTL before we send up as this is a ICMP 974 * error. This could have been generated locally or 975 * by some router. Validate the inner IPSEC 976 * headers. 977 * 978 * NOTE : ill_index is used by ip_fanout_proto_again 979 * to locate the ill. 980 */ 981 ASSERT(ill != NULL); 982 ii->ipsec_in_ill_index = 983 ill->ill_phyint->phyint_ifindex; 984 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 985 first_mp->b_cont->b_datap->db_type = M_CTL; 986 } else { 987 /* 988 * IPSEC_IN is not present. We attach a ipsec_in 989 * message and send up to IPSEC for validating 990 * and removing the IPSEC headers. Clear 991 * ipsec_in_secure so that when we return 992 * from IPSEC, we don't mistakenly think that this 993 * is a secure packet came from the network. 994 * 995 * NOTE : ill_index is used by ip_fanout_proto_again 996 * to locate the ill. 997 */ 998 ASSERT(first_mp == mp); 999 first_mp = ipsec_in_alloc(B_FALSE); 1000 if (first_mp == NULL) { 1001 freemsg(mp); 1002 BUMP_MIB(&ip_mib, ipInDiscards); 1003 return; 1004 } 1005 ii = (ipsec_in_t *)first_mp->b_rptr; 1006 1007 /* This is not a secure packet */ 1008 ii->ipsec_in_secure = B_FALSE; 1009 first_mp->b_cont = mp; 1010 mp->b_datap->db_type = M_CTL; 1011 ASSERT(ill != NULL); 1012 ii->ipsec_in_ill_index = 1013 ill->ill_phyint->phyint_ifindex; 1014 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1015 } 1016 1017 if (!ipsec_loaded()) { 1018 ip_proto_not_sup(q, first_mp, 0, zoneid); 1019 return; 1020 } 1021 1022 if (nexthdr == IPPROTO_ESP) 1023 ipsec_rc = ipsecesp_icmp_error(first_mp); 1024 else 1025 ipsec_rc = ipsecah_icmp_error(first_mp); 1026 if (ipsec_rc == IPSEC_STATUS_FAILED) 1027 return; 1028 1029 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1030 return; 1031 } 1032 case IPPROTO_ENCAP: 1033 case IPPROTO_IPV6: 1034 if ((uint8_t *)ip6h + hdr_length + 1035 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1036 sizeof (ip6_t)) > mp->b_wptr) 1037 goto drop_pkt; 1038 1039 if (nexthdr == IPPROTO_ENCAP || 1040 !IN6_ARE_ADDR_EQUAL( 1041 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1042 &ip6h->ip6_src) || 1043 !IN6_ARE_ADDR_EQUAL( 1044 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1045 &ip6h->ip6_dst)) { 1046 /* 1047 * For tunnels that have used IPsec protection, 1048 * we need to adjust the MTU to take into account 1049 * the IPsec overhead. 1050 */ 1051 if (ii != NULL) 1052 icmp6->icmp6_mtu = htons( 1053 ntohs(icmp6->icmp6_mtu) - 1054 ipsec_in_extra_length(first_mp)); 1055 } else { 1056 /* 1057 * Self-encapsulated case. As in the ipv4 case, 1058 * we need to strip the 2nd IP header. Since mp 1059 * is already pulled-up, we can simply bcopy 1060 * the 3rd header + data over the 2nd header. 1061 */ 1062 uint16_t unused_len; 1063 ip6_t *inner_ip6h = (ip6_t *) 1064 ((uchar_t *)ip6h + hdr_length); 1065 1066 /* 1067 * Make sure we don't do recursion more than once. 1068 */ 1069 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1070 &unused_len, &nexthdrp) || 1071 *nexthdrp == IPPROTO_IPV6) { 1072 goto drop_pkt; 1073 } 1074 1075 /* 1076 * We are about to modify the packet. Make a copy if 1077 * someone else has a reference to it. 1078 */ 1079 if (DB_REF(mp) > 1) { 1080 mblk_t *mp1; 1081 uint16_t icmp6_offset; 1082 1083 mp1 = copymsg(mp); 1084 if (mp1 == NULL) { 1085 goto drop_pkt; 1086 } 1087 icmp6_offset = (uint16_t) 1088 ((uchar_t *)icmp6 - mp->b_rptr); 1089 freemsg(mp); 1090 mp = mp1; 1091 1092 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1093 ip6h = (ip6_t *)&icmp6[1]; 1094 inner_ip6h = (ip6_t *) 1095 ((uchar_t *)ip6h + hdr_length); 1096 1097 if (mctl_present) 1098 first_mp->b_cont = mp; 1099 else 1100 first_mp = mp; 1101 } 1102 1103 /* 1104 * Need to set db_type back to M_DATA before 1105 * refeeding mp into this function. 1106 */ 1107 DB_TYPE(mp) = M_DATA; 1108 1109 /* 1110 * Copy the 3rd header + remaining data on top 1111 * of the 2nd header. 1112 */ 1113 bcopy(inner_ip6h, ip6h, 1114 mp->b_wptr - (uchar_t *)inner_ip6h); 1115 1116 /* 1117 * Subtract length of the 2nd header. 1118 */ 1119 mp->b_wptr -= hdr_length; 1120 1121 /* 1122 * Now recurse, and see what I _really_ should be 1123 * doing here. 1124 */ 1125 icmp_inbound_error_fanout_v6(q, first_mp, 1126 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1127 zoneid); 1128 return; 1129 } 1130 /* FALLTHRU */ 1131 default: 1132 /* 1133 * The rip6h header is only used for the lookup and we 1134 * only set the src and dst addresses and nexthdr. 1135 */ 1136 rip6h.ip6_src = ip6h->ip6_dst; 1137 rip6h.ip6_dst = ip6h->ip6_src; 1138 rip6h.ip6_nxt = nexthdr; 1139 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1140 IP6_NO_IPPOLICY, mctl_present, zoneid); 1141 return; 1142 } 1143 /* NOTREACHED */ 1144 drop_pkt: 1145 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1146 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1147 freemsg(first_mp); 1148 } 1149 1150 /* 1151 * Validate the incoming redirect message, if valid redirect 1152 * processing is done later. This is separated from the actual 1153 * redirect processing to avoid becoming single threaded when not 1154 * necessary. (i.e invalid packet) 1155 * Assumes that any AH or ESP headers have already been removed. 1156 * The mp has already been pulled up. 1157 */ 1158 boolean_t 1159 icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp) 1160 { 1161 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1162 nd_redirect_t *rd; 1163 ire_t *ire; 1164 uint16_t len; 1165 uint16_t hdr_length; 1166 1167 ASSERT(mp->b_cont == NULL); 1168 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1169 hdr_length = ip_hdr_length_v6(mp, ip6h); 1170 else 1171 hdr_length = IPV6_HDR_LEN; 1172 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1173 len = mp->b_wptr - mp->b_rptr - hdr_length; 1174 if (!IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1175 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1176 (rd->nd_rd_code != 0) || 1177 (len < sizeof (nd_redirect_t)) || 1178 (IN6_IS_ADDR_V4MAPPED(&rd->nd_rd_dst)) || 1179 (IN6_IS_ADDR_MULTICAST(&rd->nd_rd_dst))) { 1180 return (B_FALSE); 1181 } 1182 if (!(IN6_IS_ADDR_LINKLOCAL(&rd->nd_rd_target) || 1183 IN6_ARE_ADDR_EQUAL(&rd->nd_rd_target, &rd->nd_rd_dst))) { 1184 return (B_FALSE); 1185 } 1186 1187 /* 1188 * Verify that the IP source address of the redirect is 1189 * the same as the current first-hop router for the specified 1190 * ICMP destination address. Just to be cautious, this test 1191 * will be done again before we add the redirect, in case 1192 * router goes away between now and then. 1193 */ 1194 ire = ire_route_lookup_v6(&rd->nd_rd_dst, 0, 1195 &ip6h->ip6_src, 0, ill->ill_ipif, NULL, ALL_ZONES, 1196 MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1197 if (ire == NULL) 1198 return (B_FALSE); 1199 ire_refrele(ire); 1200 if (len > sizeof (nd_redirect_t)) { 1201 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1202 len - sizeof (nd_redirect_t))) 1203 return (B_FALSE); 1204 } 1205 return (B_TRUE); 1206 } 1207 1208 /* 1209 * Process received IPv6 ICMP Redirect messages. 1210 * Assumes that the icmp packet has already been verfied to be 1211 * valid, aligned and in a single mblk all done in icmp_redirect_ok_v6(). 1212 */ 1213 /* ARGSUSED */ 1214 static void 1215 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1216 { 1217 ip6_t *ip6h; 1218 uint16_t hdr_length; 1219 nd_redirect_t *rd; 1220 ire_t *ire; 1221 ire_t *prev_ire; 1222 ire_t *redir_ire; 1223 in6_addr_t *src, *dst, *gateway; 1224 nd_opt_hdr_t *opt; 1225 nce_t *nce; 1226 int nce_flags = 0; 1227 int err = 0; 1228 boolean_t redirect_to_router = B_FALSE; 1229 int len; 1230 iulp_t ulp_info = { 0 }; 1231 ill_t *prev_ire_ill; 1232 ipif_t *ipif; 1233 1234 ip6h = (ip6_t *)mp->b_rptr; 1235 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1236 hdr_length = ip_hdr_length_v6(mp, ip6h); 1237 else 1238 hdr_length = IPV6_HDR_LEN; 1239 1240 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1241 src = &ip6h->ip6_src; 1242 dst = &rd->nd_rd_dst; 1243 gateway = &rd->nd_rd_target; 1244 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1245 redirect_to_router = B_TRUE; 1246 nce_flags |= NCE_F_ISROUTER; 1247 } 1248 /* 1249 * Make sure we had a route for the dest in question and that 1250 * route was pointing to the old gateway (the source of the 1251 * redirect packet.) 1252 */ 1253 ipif = ipif_get_next_ipif(NULL, ill); 1254 if (ipif == NULL) { 1255 freemsg(mp); 1256 return; 1257 } 1258 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1259 ALL_ZONES, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1260 ipif_refrele(ipif); 1261 /* 1262 * Check that 1263 * the redirect was not from ourselves 1264 * old gateway is still directly reachable 1265 */ 1266 if (prev_ire == NULL || 1267 prev_ire->ire_type == IRE_LOCAL) { 1268 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1269 goto fail_redirect; 1270 } 1271 prev_ire_ill = ire_to_ill(prev_ire); 1272 ASSERT(prev_ire_ill != NULL); 1273 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1274 nce_flags |= NCE_F_NONUD; 1275 1276 /* 1277 * Should we use the old ULP info to create the new gateway? From 1278 * a user's perspective, we should inherit the info so that it 1279 * is a "smooth" transition. If we do not do that, then new 1280 * connections going thru the new gateway will have no route metrics, 1281 * which is counter-intuitive to user. From a network point of 1282 * view, this may or may not make sense even though the new gateway 1283 * is still directly connected to us so the route metrics should not 1284 * change much. 1285 * 1286 * But if the old ire_uinfo is not initialized, we do another 1287 * recursive lookup on the dest using the new gateway. There may 1288 * be a route to that. If so, use it to initialize the redirect 1289 * route. 1290 */ 1291 if (prev_ire->ire_uinfo.iulp_set) { 1292 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1293 } else if (redirect_to_router) { 1294 /* 1295 * Only do the following if the redirection is really to 1296 * a router. 1297 */ 1298 ire_t *tmp_ire; 1299 ire_t *sire; 1300 1301 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1302 ALL_ZONES, 0, 1303 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1304 if (sire != NULL) { 1305 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1306 ASSERT(tmp_ire != NULL); 1307 ire_refrele(tmp_ire); 1308 ire_refrele(sire); 1309 } else if (tmp_ire != NULL) { 1310 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1311 sizeof (iulp_t)); 1312 ire_refrele(tmp_ire); 1313 } 1314 } 1315 1316 len = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1317 opt = (nd_opt_hdr_t *)&rd[1]; 1318 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1319 if (opt != NULL) { 1320 err = ndp_lookup_then_add(ill, 1321 (uchar_t *)&opt[1], /* Link layer address */ 1322 gateway, 1323 &ipv6_all_ones, /* prefix mask */ 1324 &ipv6_all_zeros, /* Mapping mask */ 1325 0, 1326 nce_flags, 1327 ND_STALE, 1328 &nce); 1329 switch (err) { 1330 case 0: 1331 NCE_REFRELE(nce); 1332 break; 1333 case EEXIST: 1334 /* 1335 * Check to see if link layer address has changed and 1336 * process the nce_state accordingly. 1337 */ 1338 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1339 NCE_REFRELE(nce); 1340 break; 1341 default: 1342 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1343 err)); 1344 goto fail_redirect; 1345 } 1346 } 1347 if (redirect_to_router) { 1348 /* icmp_redirect_ok_v6() must have already verified this */ 1349 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1350 1351 /* 1352 * Create a Route Association. This will allow us to remember 1353 * a router told us to use the particular gateway. 1354 */ 1355 ire = ire_create_v6( 1356 dst, 1357 &ipv6_all_ones, /* mask */ 1358 &prev_ire->ire_src_addr_v6, /* source addr */ 1359 gateway, /* gateway addr */ 1360 &prev_ire->ire_max_frag, /* max frag */ 1361 NULL, /* Fast Path header */ 1362 NULL, /* no rfq */ 1363 NULL, /* no stq */ 1364 IRE_HOST_REDIRECT, 1365 NULL, 1366 prev_ire->ire_ipif, 1367 NULL, 1368 0, 1369 0, 1370 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1371 &ulp_info); 1372 } else { 1373 /* 1374 * Just create an on link entry, may or may not be a router 1375 * If there is no link layer address option ire_add() won't 1376 * add this. 1377 */ 1378 ire = ire_create_v6( 1379 dst, /* gateway == dst */ 1380 &ipv6_all_ones, /* mask */ 1381 &prev_ire->ire_src_addr_v6, /* source addr */ 1382 &ipv6_all_zeros, /* gateway addr */ 1383 &prev_ire->ire_max_frag, /* max frag */ 1384 NULL, /* Fast Path header */ 1385 prev_ire->ire_rfq, /* ire rfq */ 1386 prev_ire->ire_stq, /* ire stq */ 1387 IRE_CACHE, 1388 NULL, 1389 prev_ire->ire_ipif, 1390 &ipv6_all_ones, 1391 0, 1392 0, 1393 0, 1394 &ulp_info); 1395 } 1396 if (ire == NULL) 1397 goto fail_redirect; 1398 1399 /* 1400 * XXX If there is no nce i.e there is no target link layer address 1401 * option with the redirect message, ire_add will fail. In that 1402 * case we never add the IRE_CACHE/IRE_HOST_REDIRECT. We need 1403 * to fix this. 1404 */ 1405 if (ire_add(&ire, NULL, NULL, NULL) == 0) { 1406 1407 /* tell routing sockets that we received a redirect */ 1408 ip_rts_change_v6(RTM_REDIRECT, 1409 &rd->nd_rd_dst, 1410 &rd->nd_rd_target, 1411 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1412 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1413 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1414 1415 /* 1416 * Delete any existing IRE_HOST_REDIRECT for this destination. 1417 * This together with the added IRE has the effect of 1418 * modifying an existing redirect. 1419 */ 1420 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST_REDIRECT, 1421 ire->ire_ipif, NULL, ALL_ZONES, 0, 1422 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1423 1424 ire_refrele(ire); /* Held in ire_add_v6 */ 1425 1426 if (redir_ire != NULL) { 1427 ire_delete(redir_ire); 1428 ire_refrele(redir_ire); 1429 } 1430 } 1431 1432 if (prev_ire->ire_type == IRE_CACHE) 1433 ire_delete(prev_ire); 1434 ire_refrele(prev_ire); 1435 prev_ire = NULL; 1436 1437 fail_redirect: 1438 if (prev_ire != NULL) 1439 ire_refrele(prev_ire); 1440 freemsg(mp); 1441 } 1442 1443 static ill_t * 1444 ip_queue_to_ill_v6(queue_t *q) 1445 { 1446 ill_t *ill; 1447 1448 ASSERT(WR(q) == q); 1449 1450 if (q->q_next != NULL) { 1451 ill = (ill_t *)q->q_ptr; 1452 if (ILL_CAN_LOOKUP(ill)) 1453 ill_refhold(ill); 1454 else 1455 ill = NULL; 1456 } else { 1457 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1458 NULL, NULL, NULL, NULL, NULL); 1459 } 1460 if (ill == NULL) 1461 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1462 return (ill); 1463 } 1464 1465 /* 1466 * Assigns an appropriate source address to the packet. 1467 * If origdst is one of our IP addresses that use it as the source. 1468 * If the queue is an ill queue then select a source from that ill. 1469 * Otherwise pick a source based on a route lookup back to the origsrc. 1470 * 1471 * src is the return parameter. Returns a pointer to src or NULL if failure. 1472 */ 1473 static in6_addr_t * 1474 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1475 in6_addr_t *src) 1476 { 1477 ill_t *ill; 1478 ire_t *ire; 1479 ipif_t *ipif; 1480 zoneid_t zoneid; 1481 1482 ASSERT(!(wq->q_flag & QREADR)); 1483 if (wq->q_next != NULL) { 1484 ill = (ill_t *)wq->q_ptr; 1485 zoneid = GLOBAL_ZONEID; 1486 } else { 1487 ill = NULL; 1488 zoneid = Q_TO_CONN(wq)->conn_zoneid; 1489 } 1490 1491 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1492 NULL, NULL, zoneid, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1493 if (ire != NULL) { 1494 /* Destined to one of our addresses */ 1495 *src = *origdst; 1496 ire_refrele(ire); 1497 return (src); 1498 } 1499 if (ire != NULL) { 1500 ire_refrele(ire); 1501 ire = NULL; 1502 } 1503 if (ill == NULL) { 1504 /* What is the route back to the original source? */ 1505 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1506 NULL, NULL, zoneid, 1507 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1508 if (ire == NULL) { 1509 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1510 return (NULL); 1511 } 1512 /* 1513 * Does not matter whether we use ire_stq or ire_ipif here. 1514 * Just pick an ill for ICMP replies. 1515 */ 1516 ASSERT(ire->ire_ipif != NULL); 1517 ill = ire->ire_ipif->ipif_ill; 1518 ire_refrele(ire); 1519 } 1520 ipif = ipif_select_source_v6(ill, origsrc, B_FALSE, 1521 IPV6_PREFER_SRC_DEFAULT, zoneid); 1522 if (ipif != NULL) { 1523 *src = ipif->ipif_v6src_addr; 1524 ipif_refrele(ipif); 1525 return (src); 1526 } 1527 /* 1528 * Unusual case - can't find a usable source address to reach the 1529 * original source. Use what in the route to the source. 1530 */ 1531 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1532 NULL, NULL, zoneid, (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1533 if (ire == NULL) { 1534 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1535 return (NULL); 1536 } 1537 ASSERT(ire != NULL); 1538 *src = ire->ire_src_addr_v6; 1539 ire_refrele(ire); 1540 return (src); 1541 } 1542 1543 /* 1544 * Build and ship an IPv6 ICMP message using the packet data in mp, 1545 * and the ICMP header pointed to by "stuff". (May be called as 1546 * writer.) 1547 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1548 * verify that an icmp error packet can be sent. 1549 * 1550 * If q is an ill write side queue (which is the case when packets 1551 * arrive from ip_rput) then ip_wput code will ensure that packets to 1552 * link-local destinations are sent out that ill. 1553 * 1554 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1555 * source address (see above function). 1556 */ 1557 static void 1558 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1559 const in6_addr_t *v6src_ptr, boolean_t mctl_present) 1560 { 1561 ip6_t *ip6h; 1562 in6_addr_t v6dst; 1563 size_t len_needed; 1564 size_t msg_len; 1565 mblk_t *mp1; 1566 icmp6_t *icmp6; 1567 ill_t *ill; 1568 in6_addr_t v6src; 1569 mblk_t *ipsec_mp; 1570 ipsec_out_t *io; 1571 1572 ill = ip_queue_to_ill_v6(q); 1573 if (ill == NULL) { 1574 freemsg(mp); 1575 return; 1576 } 1577 1578 if (mctl_present) { 1579 /* 1580 * If it is : 1581 * 1582 * 1) a IPSEC_OUT, then this is caused by outbound 1583 * datagram originating on this host. IPSEC processing 1584 * may or may not have been done. Refer to comments above 1585 * icmp_inbound_error_fanout for details. 1586 * 1587 * 2) a IPSEC_IN if we are generating a icmp_message 1588 * for an incoming datagram destined for us i.e called 1589 * from ip_fanout_send_icmp. 1590 */ 1591 ipsec_info_t *in; 1592 1593 ipsec_mp = mp; 1594 mp = ipsec_mp->b_cont; 1595 1596 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1597 ip6h = (ip6_t *)mp->b_rptr; 1598 1599 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1600 in->ipsec_info_type == IPSEC_IN); 1601 1602 if (in->ipsec_info_type == IPSEC_IN) { 1603 /* 1604 * Convert the IPSEC_IN to IPSEC_OUT. 1605 */ 1606 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1607 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1608 ill_refrele(ill); 1609 return; 1610 } 1611 } else { 1612 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1613 io = (ipsec_out_t *)in; 1614 /* 1615 * Clear out ipsec_out_proc_begin, so we do a fresh 1616 * ire lookup. 1617 */ 1618 io->ipsec_out_proc_begin = B_FALSE; 1619 } 1620 } else { 1621 /* 1622 * This is in clear. The icmp message we are building 1623 * here should go out in clear. 1624 */ 1625 ipsec_in_t *ii; 1626 ASSERT(mp->b_datap->db_type == M_DATA); 1627 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1628 freemsg(mp); 1629 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1630 ill_refrele(ill); 1631 return; 1632 } 1633 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1634 1635 /* This is not a secure packet */ 1636 ii->ipsec_in_secure = B_FALSE; 1637 ipsec_mp->b_cont = mp; 1638 ip6h = (ip6_t *)mp->b_rptr; 1639 /* 1640 * Convert the IPSEC_IN to IPSEC_OUT. 1641 */ 1642 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1643 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1644 ill_refrele(ill); 1645 return; 1646 } 1647 } 1648 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1649 1650 if (v6src_ptr != NULL) { 1651 v6src = *v6src_ptr; 1652 } else { 1653 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1654 &v6src) == NULL) { 1655 freemsg(ipsec_mp); 1656 ill_refrele(ill); 1657 return; 1658 } 1659 } 1660 v6dst = ip6h->ip6_src; 1661 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1662 msg_len = msgdsize(mp); 1663 if (msg_len > len_needed) { 1664 if (!adjmsg(mp, len_needed - msg_len)) { 1665 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1666 freemsg(ipsec_mp); 1667 ill_refrele(ill); 1668 return; 1669 } 1670 msg_len = len_needed; 1671 } 1672 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1673 if (mp1 == NULL) { 1674 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1675 freemsg(ipsec_mp); 1676 ill_refrele(ill); 1677 return; 1678 } 1679 ill_refrele(ill); 1680 mp1->b_cont = mp; 1681 mp = mp1; 1682 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1683 io->ipsec_out_type == IPSEC_OUT); 1684 ipsec_mp->b_cont = mp; 1685 1686 /* 1687 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1688 * node generates be accepted in peace by all on-host destinations. 1689 * If we do NOT assume that all on-host destinations trust 1690 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1691 * (Look for ipsec_out_icmp_loopback). 1692 */ 1693 io->ipsec_out_icmp_loopback = B_TRUE; 1694 1695 ip6h = (ip6_t *)mp->b_rptr; 1696 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1697 1698 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1699 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1700 ip6h->ip6_hops = ipv6_def_hops; 1701 ip6h->ip6_dst = v6dst; 1702 ip6h->ip6_src = v6src; 1703 msg_len += IPV6_HDR_LEN + len; 1704 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1705 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1706 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1707 } 1708 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1709 icmp6 = (icmp6_t *)&ip6h[1]; 1710 bcopy(stuff, (char *)icmp6, len); 1711 /* 1712 * Prepare for checksum by putting icmp length in the icmp 1713 * checksum field. The checksum is calculated in ip_wput_v6. 1714 */ 1715 icmp6->icmp6_cksum = ip6h->ip6_plen; 1716 if (icmp6->icmp6_type == ND_REDIRECT) { 1717 ip6h->ip6_hops = IPV6_MAX_HOPS; 1718 } 1719 /* Send to V6 writeside put routine */ 1720 put(q, ipsec_mp); 1721 } 1722 1723 /* 1724 * Update the output mib when ICMPv6 packets are sent. 1725 */ 1726 static void 1727 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1728 { 1729 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1730 1731 switch (icmp6->icmp6_type) { 1732 case ICMP6_DST_UNREACH: 1733 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1734 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1735 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1736 break; 1737 1738 case ICMP6_TIME_EXCEEDED: 1739 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1740 break; 1741 1742 case ICMP6_PARAM_PROB: 1743 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1744 break; 1745 1746 case ICMP6_PACKET_TOO_BIG: 1747 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1748 break; 1749 1750 case ICMP6_ECHO_REQUEST: 1751 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1752 break; 1753 1754 case ICMP6_ECHO_REPLY: 1755 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1756 break; 1757 1758 case ND_ROUTER_SOLICIT: 1759 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1760 break; 1761 1762 case ND_ROUTER_ADVERT: 1763 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1764 break; 1765 1766 case ND_NEIGHBOR_SOLICIT: 1767 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1768 break; 1769 1770 case ND_NEIGHBOR_ADVERT: 1771 BUMP_MIB(ill->ill_icmp6_mib, 1772 ipv6IfIcmpOutNeighborAdvertisements); 1773 break; 1774 1775 case ND_REDIRECT: 1776 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1777 break; 1778 1779 case MLD_LISTENER_QUERY: 1780 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1781 break; 1782 1783 case MLD_LISTENER_REPORT: 1784 case MLD_V2_LISTENER_REPORT: 1785 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1786 break; 1787 1788 case MLD_LISTENER_REDUCTION: 1789 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1790 break; 1791 } 1792 } 1793 1794 /* 1795 * Check if it is ok to send an ICMPv6 error packet in 1796 * response to the IP packet in mp. 1797 * Free the message and return null if no 1798 * ICMP error packet should be sent. 1799 */ 1800 static mblk_t * 1801 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1802 boolean_t llbcast, boolean_t mcast_ok) 1803 { 1804 ip6_t *ip6h; 1805 1806 if (!mp) 1807 return (NULL); 1808 1809 ip6h = (ip6_t *)mp->b_rptr; 1810 1811 /* Check if source address uniquely identifies the host */ 1812 1813 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1814 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1815 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1816 freemsg(mp); 1817 return (NULL); 1818 } 1819 1820 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1821 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1822 icmp6_t *icmp6; 1823 1824 if (mp->b_wptr - mp->b_rptr < len_needed) { 1825 if (!pullupmsg(mp, len_needed)) { 1826 ill_t *ill; 1827 1828 ill = ip_queue_to_ill_v6(q); 1829 if (ill == NULL) { 1830 BUMP_MIB(&icmp6_mib, 1831 ipv6IfIcmpInErrors); 1832 } else { 1833 BUMP_MIB(ill->ill_icmp6_mib, 1834 ipv6IfIcmpInErrors); 1835 ill_refrele(ill); 1836 } 1837 freemsg(mp); 1838 return (NULL); 1839 } 1840 ip6h = (ip6_t *)mp->b_rptr; 1841 } 1842 icmp6 = (icmp6_t *)&ip6h[1]; 1843 /* Explicitly do not generate errors in response to redirects */ 1844 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1845 icmp6->icmp6_type == ND_REDIRECT) { 1846 freemsg(mp); 1847 return (NULL); 1848 } 1849 } 1850 /* 1851 * Check that the destination is not multicast and that the packet 1852 * was not sent on link layer broadcast or multicast. (Exception 1853 * is Packet too big message as per the draft - when mcast_ok is set.) 1854 */ 1855 if (!mcast_ok && 1856 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1857 freemsg(mp); 1858 return (NULL); 1859 } 1860 if (icmp_err_rate_limit()) { 1861 /* 1862 * Only send ICMP error packets every so often. 1863 * This should be done on a per port/source basis, 1864 * but for now this will suffice. 1865 */ 1866 freemsg(mp); 1867 return (NULL); 1868 } 1869 return (mp); 1870 } 1871 1872 /* 1873 * Generate an ICMPv6 redirect message. 1874 * Include target link layer address option if it exits. 1875 * Always include redirect header. 1876 */ 1877 static void 1878 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1879 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1880 { 1881 nd_redirect_t *rd; 1882 nd_opt_rd_hdr_t *rdh; 1883 uchar_t *buf; 1884 nce_t *nce = NULL; 1885 nd_opt_hdr_t *opt; 1886 int len; 1887 int ll_opt_len = 0; 1888 int max_redir_hdr_data_len; 1889 int pkt_len; 1890 in6_addr_t *srcp; 1891 1892 /* 1893 * We are called from ip_rput where we could 1894 * not have attached an IPSEC_IN. 1895 */ 1896 ASSERT(mp->b_datap->db_type == M_DATA); 1897 1898 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1899 if (mp == NULL) 1900 return; 1901 nce = ndp_lookup(ill, targetp, B_FALSE); 1902 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1903 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1904 ill->ill_phys_addr_length + 7)/8 * 8; 1905 } 1906 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1907 ASSERT(len % 4 == 0); 1908 buf = kmem_alloc(len, KM_NOSLEEP); 1909 if (buf == NULL) { 1910 if (nce != NULL) 1911 NCE_REFRELE(nce); 1912 freemsg(mp); 1913 return; 1914 } 1915 1916 rd = (nd_redirect_t *)buf; 1917 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1918 rd->nd_rd_code = 0; 1919 rd->nd_rd_reserved = 0; 1920 rd->nd_rd_target = *targetp; 1921 rd->nd_rd_dst = *dest; 1922 1923 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1924 if (nce != NULL && ll_opt_len != 0) { 1925 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1926 opt->nd_opt_len = ll_opt_len/8; 1927 bcopy((char *)nce->nce_res_mp->b_rptr + 1928 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1929 ill->ill_phys_addr_length); 1930 } 1931 if (nce != NULL) 1932 NCE_REFRELE(nce); 1933 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1934 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1935 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1936 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1937 pkt_len = msgdsize(mp); 1938 /* Make sure mp is 8 byte aligned */ 1939 if (pkt_len > max_redir_hdr_data_len) { 1940 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1941 sizeof (nd_opt_rd_hdr_t))/8; 1942 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1943 } else { 1944 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1945 (void) adjmsg(mp, -(pkt_len % 8)); 1946 } 1947 rdh->nd_opt_rh_reserved1 = 0; 1948 rdh->nd_opt_rh_reserved2 = 0; 1949 /* ipif_v6src_addr contains the link-local source address */ 1950 rw_enter(&ill_g_lock, RW_READER); 1951 if (ill->ill_group != NULL) { 1952 /* 1953 * The receiver of the redirect will verify whether it 1954 * had a route through us (srcp that we will use in 1955 * the redirect) or not. As we load spread even link-locals, 1956 * we don't know which source address the receiver of 1957 * redirect has in its route for communicating with us. 1958 * Thus we randomly choose a source here and finally we 1959 * should get to the right one and it will eventually 1960 * accept the redirect from us. We can't call 1961 * ip_lookup_scope_v6 because we don't have the right 1962 * link-local address here. Thus we randomly choose one. 1963 */ 1964 int cnt = ill->ill_group->illgrp_ill_count; 1965 1966 ill = ill->ill_group->illgrp_ill; 1967 cnt = ++icmp_redirect_v6_src_index % cnt; 1968 while (cnt--) 1969 ill = ill->ill_group_next; 1970 srcp = &ill->ill_ipif->ipif_v6src_addr; 1971 } else { 1972 srcp = &ill->ill_ipif->ipif_v6src_addr; 1973 } 1974 rw_exit(&ill_g_lock); 1975 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE); 1976 kmem_free(buf, len); 1977 } 1978 1979 1980 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1981 void 1982 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1983 boolean_t llbcast, boolean_t mcast_ok) 1984 { 1985 icmp6_t icmp6; 1986 boolean_t mctl_present; 1987 mblk_t *first_mp; 1988 1989 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1990 1991 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 1992 if (mp == NULL) { 1993 if (mctl_present) 1994 freeb(first_mp); 1995 return; 1996 } 1997 bzero(&icmp6, sizeof (icmp6_t)); 1998 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1999 icmp6.icmp6_code = code; 2000 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2001 } 2002 2003 /* 2004 * Generate an ICMP unreachable message. 2005 */ 2006 void 2007 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2008 boolean_t llbcast, boolean_t mcast_ok) 2009 { 2010 icmp6_t icmp6; 2011 boolean_t mctl_present; 2012 mblk_t *first_mp; 2013 2014 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2015 2016 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2017 if (mp == NULL) { 2018 if (mctl_present) 2019 freeb(first_mp); 2020 return; 2021 } 2022 bzero(&icmp6, sizeof (icmp6_t)); 2023 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2024 icmp6.icmp6_code = code; 2025 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2026 } 2027 2028 /* 2029 * Generate an ICMP pkt too big message. 2030 */ 2031 static void 2032 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2033 boolean_t llbcast, boolean_t mcast_ok) 2034 { 2035 icmp6_t icmp6; 2036 mblk_t *first_mp; 2037 boolean_t mctl_present; 2038 2039 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2040 2041 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2042 if (mp == NULL) { 2043 if (mctl_present) 2044 freeb(first_mp); 2045 return; 2046 } 2047 bzero(&icmp6, sizeof (icmp6_t)); 2048 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2049 icmp6.icmp6_code = 0; 2050 icmp6.icmp6_mtu = htonl(mtu); 2051 2052 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2053 } 2054 2055 /* 2056 * Generate an ICMP parameter problem message. (May be called as writer.) 2057 * 'offset' is the offset from the beginning of the packet in error. 2058 */ 2059 static void 2060 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2061 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok) 2062 { 2063 icmp6_t icmp6; 2064 boolean_t mctl_present; 2065 mblk_t *first_mp; 2066 2067 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2068 2069 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2070 if (mp == NULL) { 2071 if (mctl_present) 2072 freeb(first_mp); 2073 return; 2074 } 2075 bzero((char *)&icmp6, sizeof (icmp6_t)); 2076 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2077 icmp6.icmp6_code = code; 2078 icmp6.icmp6_pptr = htonl(offset); 2079 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2080 } 2081 2082 /* 2083 * This code will need to take into account the possibility of binding 2084 * to a link local address on a multi-homed host, in which case the 2085 * outgoing interface (from the conn) will need to be used when getting 2086 * an ire for the dst. Going through proper outgoing interface and 2087 * choosing the source address corresponding to the outgoing interface 2088 * is necessary when the destination address is a link-local address and 2089 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2090 * This can happen when active connection is setup; thus ipp pointer 2091 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2092 * pointer is passed as ipp pointer. 2093 */ 2094 mblk_t * 2095 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2096 { 2097 ssize_t len; 2098 int protocol; 2099 struct T_bind_req *tbr; 2100 sin6_t *sin6; 2101 ipa6_conn_t *ac6; 2102 in6_addr_t *v6srcp; 2103 in6_addr_t *v6dstp; 2104 uint16_t lport; 2105 uint16_t fport; 2106 uchar_t *ucp; 2107 mblk_t *mp1; 2108 boolean_t ire_requested; 2109 boolean_t ipsec_policy_set; 2110 int error = 0; 2111 boolean_t local_bind; 2112 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2113 ipa6_conn_x_t *acx6; 2114 boolean_t verify_dst; 2115 2116 ASSERT(connp->conn_af_isv6); 2117 len = mp->b_wptr - mp->b_rptr; 2118 if (len < (sizeof (*tbr) + 1)) { 2119 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2120 "ip_bind_v6: bogus msg, len %ld", len); 2121 goto bad_addr; 2122 } 2123 /* Back up and extract the protocol identifier. */ 2124 mp->b_wptr--; 2125 tbr = (struct T_bind_req *)mp->b_rptr; 2126 /* Reset the message type in preparation for shipping it back. */ 2127 mp->b_datap->db_type = M_PCPROTO; 2128 2129 protocol = *mp->b_wptr & 0xFF; 2130 connp->conn_ulp = (uint8_t)protocol; 2131 2132 /* 2133 * Check for a zero length address. This is from a protocol that 2134 * wants to register to receive all packets of its type. 2135 */ 2136 if (tbr->ADDR_length == 0) { 2137 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2138 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2139 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2140 /* 2141 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2142 * Do not allow others to bind to these. 2143 */ 2144 goto bad_addr; 2145 } 2146 2147 connp->conn_srcv6 = ipv6_all_zeros; 2148 ipcl_proto_insert_v6(connp, protocol); 2149 2150 tbr->PRIM_type = T_BIND_ACK; 2151 return (mp); 2152 } 2153 2154 /* Extract the address pointer from the message. */ 2155 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2156 tbr->ADDR_length); 2157 if (ucp == NULL) { 2158 ip1dbg(("ip_bind_v6: no address\n")); 2159 goto bad_addr; 2160 } 2161 if (!OK_32PTR(ucp)) { 2162 ip1dbg(("ip_bind_v6: unaligned address\n")); 2163 goto bad_addr; 2164 } 2165 mp1 = mp->b_cont; /* trailing mp if any */ 2166 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2167 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2168 2169 switch (tbr->ADDR_length) { 2170 default: 2171 ip1dbg(("ip_bind_v6: bad address length %d\n", 2172 (int)tbr->ADDR_length)); 2173 goto bad_addr; 2174 2175 case IPV6_ADDR_LEN: 2176 /* Verification of local address only */ 2177 v6srcp = (in6_addr_t *)ucp; 2178 lport = 0; 2179 local_bind = B_TRUE; 2180 break; 2181 2182 case sizeof (sin6_t): 2183 sin6 = (sin6_t *)ucp; 2184 v6srcp = &sin6->sin6_addr; 2185 lport = sin6->sin6_port; 2186 local_bind = B_TRUE; 2187 break; 2188 2189 case sizeof (ipa6_conn_t): 2190 /* 2191 * Verify that both the source and destination addresses 2192 * are valid. 2193 * Note that we allow connect to broadcast and multicast 2194 * addresses when ire_requested is set. Thus the ULP 2195 * has to check for IRE_BROADCAST and multicast. 2196 */ 2197 ac6 = (ipa6_conn_t *)ucp; 2198 v6srcp = &ac6->ac6_laddr; 2199 v6dstp = &ac6->ac6_faddr; 2200 fport = ac6->ac6_fport; 2201 /* For raw socket, the local port is not set. */ 2202 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2203 connp->conn_lport; 2204 local_bind = B_FALSE; 2205 /* Always verify destination reachability. */ 2206 verify_dst = B_TRUE; 2207 break; 2208 2209 case sizeof (ipa6_conn_x_t): 2210 /* 2211 * Verify that the source address is valid. 2212 * Note that we allow connect to broadcast and multicast 2213 * addresses when ire_requested is set. Thus the ULP 2214 * has to check for IRE_BROADCAST and multicast. 2215 */ 2216 acx6 = (ipa6_conn_x_t *)ucp; 2217 ac6 = &acx6->ac6x_conn; 2218 v6srcp = &ac6->ac6_laddr; 2219 v6dstp = &ac6->ac6_faddr; 2220 fport = ac6->ac6_fport; 2221 lport = ac6->ac6_lport; 2222 local_bind = B_FALSE; 2223 /* 2224 * Client that passed ipa6_conn_x_t to us specifies whether to 2225 * verify destination reachability. 2226 */ 2227 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2228 break; 2229 } 2230 if (local_bind) { 2231 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2232 /* Bind to IPv4 address */ 2233 ipaddr_t v4src; 2234 2235 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2236 2237 error = ip_bind_laddr(connp, mp, v4src, lport, 2238 ire_requested, ipsec_policy_set, 2239 tbr->ADDR_length != IPV6_ADDR_LEN); 2240 if (error != 0) 2241 goto bad_addr; 2242 connp->conn_pkt_isv6 = B_FALSE; 2243 } else { 2244 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2245 error = 0; 2246 goto bad_addr; 2247 } 2248 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2249 ire_requested, ipsec_policy_set, 2250 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2251 if (error != 0) 2252 goto bad_addr; 2253 connp->conn_pkt_isv6 = B_TRUE; 2254 } 2255 if (protocol == IPPROTO_TCP) 2256 connp->conn_recv = tcp_conn_request; 2257 } else { 2258 /* 2259 * Bind to local and remote address. Local might be 2260 * unspecified in which case it will be extracted from 2261 * ire_src_addr_v6 2262 */ 2263 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2264 /* Connect to IPv4 address */ 2265 ipaddr_t v4src; 2266 ipaddr_t v4dst; 2267 2268 /* Is the source unspecified or mapped? */ 2269 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2270 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2271 ip1dbg(("ip_bind_v6: " 2272 "dst is mapped, but not the src\n")); 2273 goto bad_addr; 2274 } 2275 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2276 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2277 2278 /* 2279 * XXX Fix needed. Need to pass ipsec_policy_set 2280 * instead of B_FALSE. 2281 */ 2282 2283 /* Always verify destination reachability. */ 2284 error = ip_bind_connected(connp, mp, &v4src, lport, 2285 v4dst, fport, ire_requested, ipsec_policy_set, 2286 B_TRUE, B_TRUE); 2287 if (error != 0) 2288 goto bad_addr; 2289 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2290 connp->conn_pkt_isv6 = B_FALSE; 2291 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2292 ip1dbg(("ip_bind_v6: " 2293 "src is mapped, but not the dst\n")); 2294 goto bad_addr; 2295 } else { 2296 error = ip_bind_connected_v6(connp, mp, v6srcp, 2297 lport, v6dstp, ipp, fport, ire_requested, 2298 ipsec_policy_set, B_TRUE, verify_dst); 2299 if (error != 0) 2300 goto bad_addr; 2301 connp->conn_pkt_isv6 = B_TRUE; 2302 } 2303 if (protocol == IPPROTO_TCP) 2304 connp->conn_recv = tcp_input; 2305 } 2306 /* Update qinfo if v4/v6 changed */ 2307 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && !IS_TCP_CONN(connp)) { 2308 if (connp->conn_pkt_isv6) 2309 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2310 else 2311 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2312 } 2313 2314 /* 2315 * Pass the IPSEC headers size in ire_ipsec_overhead. 2316 * We can't do this in ip_bind_insert_ire because the policy 2317 * may not have been inherited at that point in time and hence 2318 * conn_out_enforce_policy may not be set. 2319 */ 2320 mp1 = mp->b_cont; 2321 if (ire_requested && connp->conn_out_enforce_policy && 2322 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2323 ire_t *ire = (ire_t *)mp1->b_rptr; 2324 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2325 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2326 } 2327 2328 /* Send it home. */ 2329 mp->b_datap->db_type = M_PCPROTO; 2330 tbr->PRIM_type = T_BIND_ACK; 2331 return (mp); 2332 2333 bad_addr: 2334 if (error == EINPROGRESS) 2335 return (NULL); 2336 if (error > 0) 2337 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2338 else 2339 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2340 return (mp); 2341 } 2342 2343 /* 2344 * Here address is verified to be a valid local address. 2345 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2346 * address is also considered a valid local address. 2347 * In the case of a multicast address, however, the 2348 * upper protocol is expected to reset the src address 2349 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2350 * no packets are emitted with multicast address as 2351 * source address. 2352 * The addresses valid for bind are: 2353 * (1) - in6addr_any 2354 * (2) - IP address of an UP interface 2355 * (3) - IP address of a DOWN interface 2356 * (4) - a multicast address. In this case 2357 * the conn will only receive packets destined to 2358 * the specified multicast address. Note: the 2359 * application still has to issue an 2360 * IPV6_JOIN_GROUP socket option. 2361 * 2362 * In all the above cases, the bound address must be valid in the current zone. 2363 * When the address is loopback or multicast, there might be many matching IREs 2364 * so bind has to look up based on the zone. 2365 */ 2366 static int 2367 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2368 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2369 boolean_t fanout_insert) 2370 { 2371 int error = 0; 2372 ire_t *src_ire = NULL; 2373 ipif_t *ipif = NULL; 2374 mblk_t *policy_mp; 2375 zoneid_t zoneid; 2376 2377 if (ipsec_policy_set) 2378 policy_mp = mp->b_cont; 2379 2380 /* 2381 * If it was previously connected, conn_fully_bound would have 2382 * been set. 2383 */ 2384 connp->conn_fully_bound = B_FALSE; 2385 2386 zoneid = connp->conn_zoneid; 2387 2388 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2389 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2390 0, NULL, NULL, zoneid, MATCH_IRE_ZONEONLY); 2391 /* 2392 * If an address other than in6addr_any is requested, 2393 * we verify that it is a valid address for bind 2394 * Note: Following code is in if-else-if form for 2395 * readability compared to a condition check. 2396 */ 2397 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2398 /* LINTED - statement has no consequent */ 2399 if (IRE_IS_LOCAL(src_ire)) { 2400 /* 2401 * (2) Bind to address of local UP interface 2402 */ 2403 ipif = src_ire->ire_ipif; 2404 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2405 ipif_t *multi_ipif = NULL; 2406 ire_t *save_ire; 2407 /* 2408 * (4) bind to multicast address. 2409 * Fake out the IRE returned to upper 2410 * layer to be a broadcast IRE in 2411 * ip_bind_insert_ire_v6(). 2412 * Pass other information that matches 2413 * the ipif (e.g. the source address). 2414 * conn_multicast_ill is only used for 2415 * IPv6 packets 2416 */ 2417 mutex_enter(&connp->conn_lock); 2418 if (connp->conn_multicast_ill != NULL) { 2419 (void) ipif_lookup_zoneid( 2420 connp->conn_multicast_ill, zoneid, 0, 2421 &multi_ipif); 2422 } else { 2423 /* 2424 * Look for default like 2425 * ip_wput_v6 2426 */ 2427 multi_ipif = ipif_lookup_group_v6( 2428 &ipv6_unspecified_group, zoneid); 2429 } 2430 mutex_exit(&connp->conn_lock); 2431 save_ire = src_ire; 2432 src_ire = NULL; 2433 if (multi_ipif == NULL || 2434 !ire_requested || (src_ire = 2435 ipif_to_ire_v6(multi_ipif)) == 2436 NULL) { 2437 src_ire = save_ire; 2438 error = EADDRNOTAVAIL; 2439 } else { 2440 ASSERT(src_ire != NULL); 2441 if (save_ire != NULL) 2442 ire_refrele(save_ire); 2443 } 2444 if (multi_ipif != NULL) 2445 ipif_refrele(multi_ipif); 2446 } else { 2447 *mp->b_wptr++ = (char)connp->conn_ulp; 2448 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2449 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2450 if (ipif == NULL) { 2451 if (error == EINPROGRESS) { 2452 if (src_ire != NULL) 2453 ire_refrele(src_ire); 2454 return (error); 2455 } 2456 /* 2457 * Not a valid address for bind 2458 */ 2459 error = EADDRNOTAVAIL; 2460 } else { 2461 ipif_refrele(ipif); 2462 } 2463 /* 2464 * Just to keep it consistent with the processing in 2465 * ip_bind_v6(). 2466 */ 2467 mp->b_wptr--; 2468 } 2469 2470 if (error != 0) { 2471 /* Red Alert! Attempting to be a bogon! */ 2472 if (ip_debug > 2) { 2473 /* ip1dbg */ 2474 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2475 " address %s\n", AF_INET6, v6src); 2476 } 2477 goto bad_addr; 2478 } 2479 } 2480 2481 /* 2482 * Allow setting new policies. For example, disconnects come 2483 * down as ipa_t bind. As we would have set conn_policy_cached 2484 * to B_TRUE before, we should set it to B_FALSE, so that policy 2485 * can change after the disconnect. 2486 */ 2487 connp->conn_policy_cached = B_FALSE; 2488 2489 /* If not fanout_insert this was just an address verification */ 2490 if (fanout_insert) { 2491 /* 2492 * The addresses have been verified. Time to insert in 2493 * the correct fanout list. 2494 */ 2495 connp->conn_srcv6 = *v6src; 2496 connp->conn_remv6 = ipv6_all_zeros; 2497 connp->conn_lport = lport; 2498 connp->conn_fport = 0; 2499 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2500 } 2501 if (error == 0) { 2502 if (ire_requested) { 2503 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2504 error = -1; 2505 goto bad_addr; 2506 } 2507 } else if (ipsec_policy_set) { 2508 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2509 error = -1; 2510 goto bad_addr; 2511 } 2512 } 2513 } 2514 bad_addr: 2515 if (src_ire != NULL) 2516 ire_refrele(src_ire); 2517 2518 if (ipsec_policy_set) { 2519 ASSERT(policy_mp != NULL); 2520 freeb(policy_mp); 2521 /* 2522 * As of now assume that nothing else accompanies 2523 * IPSEC_POLICY_SET. 2524 */ 2525 mp->b_cont = NULL; 2526 } 2527 return (error); 2528 } 2529 2530 /* ARGSUSED */ 2531 static void 2532 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2533 void *dummy_arg) 2534 { 2535 conn_t *connp = NULL; 2536 tcp_t *tcp; 2537 t_scalar_t prim; 2538 2539 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2540 2541 if (CONN_Q(q)) 2542 connp = Q_TO_CONN(q); 2543 ASSERT(connp != NULL); 2544 2545 prim = ((union T_primitives *)mp->b_rptr)->type; 2546 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2547 2548 tcp = connp->conn_tcp; 2549 if (tcp != NULL) { 2550 /* Pass sticky_ipp for scope_id and pktinfo */ 2551 mp = ip_bind_v6(q, mp, connp, &tcp->tcp_sticky_ipp); 2552 } else { 2553 /* For UDP and ICMP */ 2554 mp = ip_bind_v6(q, mp, connp, NULL); 2555 } 2556 if (mp != NULL) { 2557 if (tcp != NULL) { 2558 CONN_INC_REF(connp); 2559 squeue_fill(connp->conn_sqp, mp, 2560 ip_resume_tcp_bind, connp, SQTAG_TCP_RPUTOTHER); 2561 return; 2562 } else { 2563 qreply(q, mp); 2564 } 2565 CONN_OPER_PENDING_DONE(connp); 2566 } 2567 } 2568 2569 /* 2570 * Verify that both the source and destination addresses 2571 * are valid. If verify_dst, then destination address must also be reachable, 2572 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2573 * It takes ip6_pkt_t * as one of the arguments to determine correct 2574 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2575 * destination address. Note that parameter ipp is only useful for TCP connect 2576 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2577 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2578 * 2579 */ 2580 static int 2581 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2582 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2583 boolean_t ire_requested, boolean_t ipsec_policy_set, 2584 boolean_t fanout_insert, boolean_t verify_dst) 2585 { 2586 ire_t *src_ire; 2587 ire_t *dst_ire; 2588 int error = 0; 2589 int protocol; 2590 mblk_t *policy_mp; 2591 ire_t *sire = NULL; 2592 ire_t *md_dst_ire = NULL; 2593 ill_t *md_ill = NULL; 2594 ill_t *dst_ill = NULL; 2595 ipif_t *src_ipif = NULL; 2596 zoneid_t zoneid; 2597 boolean_t ill_held = B_FALSE; 2598 2599 src_ire = dst_ire = NULL; 2600 /* 2601 * NOTE: The protocol is beyond the wptr because that's how 2602 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2603 */ 2604 protocol = *mp->b_wptr & 0xFF; 2605 2606 /* 2607 * If we never got a disconnect before, clear it now. 2608 */ 2609 connp->conn_fully_bound = B_FALSE; 2610 2611 if (ipsec_policy_set) { 2612 policy_mp = mp->b_cont; 2613 } 2614 2615 zoneid = connp->conn_zoneid; 2616 2617 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2618 ipif_t *ipif; 2619 2620 /* 2621 * Use an "emulated" IRE_BROADCAST to tell the transport it 2622 * is a multicast. 2623 * Pass other information that matches 2624 * the ipif (e.g. the source address). 2625 * 2626 * conn_multicast_ill is only used for IPv6 packets 2627 */ 2628 mutex_enter(&connp->conn_lock); 2629 if (connp->conn_multicast_ill != NULL) { 2630 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2631 zoneid, 0, &ipif); 2632 } else { 2633 /* Look for default like ip_wput_v6 */ 2634 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2635 } 2636 mutex_exit(&connp->conn_lock); 2637 if (ipif == NULL || !ire_requested || 2638 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2639 if (ipif != NULL) 2640 ipif_refrele(ipif); 2641 if (ip_debug > 2) { 2642 /* ip1dbg */ 2643 pr_addr_dbg("ip_bind_connected_v6: bad " 2644 "connected multicast %s\n", AF_INET6, 2645 v6dst); 2646 } 2647 error = ENETUNREACH; 2648 goto bad_addr; 2649 } 2650 if (ipif != NULL) 2651 ipif_refrele(ipif); 2652 } else { 2653 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2654 NULL, &sire, zoneid, 2655 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2656 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE); 2657 /* 2658 * We also prevent ire's with src address INADDR_ANY to 2659 * be used, which are created temporarily for 2660 * sending out packets from endpoints that have 2661 * conn_unspec_src set. 2662 */ 2663 if (dst_ire == NULL || 2664 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2665 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2666 /* 2667 * When verifying destination reachability, we always 2668 * complain. 2669 * 2670 * When not verifying destination reachability but we 2671 * found an IRE, i.e. the destination is reachable, 2672 * then the other tests still apply and we complain. 2673 */ 2674 if (verify_dst || (dst_ire != NULL)) { 2675 if (ip_debug > 2) { 2676 /* ip1dbg */ 2677 pr_addr_dbg("ip_bind_connected_v6: bad" 2678 " connected dst %s\n", AF_INET6, 2679 v6dst); 2680 } 2681 if (dst_ire == NULL || 2682 !(dst_ire->ire_type & IRE_HOST)) { 2683 error = ENETUNREACH; 2684 } else { 2685 error = EHOSTUNREACH; 2686 } 2687 goto bad_addr; 2688 } 2689 } 2690 } 2691 2692 /* 2693 * If the app does a connect(), it means that it will most likely 2694 * send more than 1 packet to the destination. It makes sense 2695 * to clear the temporary flag. 2696 */ 2697 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2698 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2699 irb_t *irb = dst_ire->ire_bucket; 2700 2701 rw_enter(&irb->irb_lock, RW_WRITER); 2702 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2703 irb->irb_tmp_ire_cnt--; 2704 rw_exit(&irb->irb_lock); 2705 } 2706 2707 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2708 2709 /* 2710 * See if we should notify ULP about MDT; we do this whether or not 2711 * ire_requested is TRUE, in order to handle active connects; MDT 2712 * eligibility tests for passive connects are handled separately 2713 * through tcp_adapt_ire(). We do this before the source address 2714 * selection, because dst_ire may change after a call to 2715 * ipif_select_source_v6(). This is a best-effort check, as the 2716 * packet for this connection may not actually go through 2717 * dst_ire->ire_stq, and the exact IRE can only be known after 2718 * calling ip_newroute_v6(). This is why we further check on the 2719 * IRE during Multidata packet transmission in tcp_multisend(). 2720 */ 2721 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2722 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2723 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2724 (md_ill->ill_capabilities & ILL_CAPAB_MDT)) { 2725 md_dst_ire = dst_ire; 2726 IRE_REFHOLD(md_dst_ire); 2727 } 2728 2729 if (dst_ire != NULL && 2730 dst_ire->ire_type == IRE_LOCAL && 2731 dst_ire->ire_zoneid != zoneid) { 2732 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2733 zoneid, 0, 2734 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2735 MATCH_IRE_RJ_BHOLE); 2736 if (src_ire == NULL) { 2737 error = EHOSTUNREACH; 2738 goto bad_addr; 2739 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2740 if (!(src_ire->ire_type & IRE_HOST)) 2741 error = ENETUNREACH; 2742 else 2743 error = EHOSTUNREACH; 2744 goto bad_addr; 2745 } 2746 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2747 src_ipif = src_ire->ire_ipif; 2748 ipif_refhold(src_ipif); 2749 *v6src = src_ipif->ipif_v6lcl_addr; 2750 } 2751 ire_refrele(src_ire); 2752 src_ire = NULL; 2753 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2754 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2755 *v6src = sire->ire_src_addr_v6; 2756 ire_refrele(dst_ire); 2757 dst_ire = sire; 2758 sire = NULL; 2759 } else if (dst_ire->ire_type == IRE_CACHE && 2760 (dst_ire->ire_flags & RTF_SETSRC)) { 2761 ASSERT(dst_ire->ire_zoneid == zoneid); 2762 *v6src = dst_ire->ire_src_addr_v6; 2763 } else { 2764 /* 2765 * Pick a source address so that a proper inbound load 2766 * spreading would happen. Use dst_ill specified by the 2767 * app. when socket option or scopeid is set. 2768 */ 2769 int err; 2770 2771 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2772 uint_t if_index; 2773 2774 /* 2775 * Scope id or IPV6_PKTINFO 2776 */ 2777 2778 if_index = ipp->ipp_ifindex; 2779 dst_ill = ill_lookup_on_ifindex( 2780 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2781 if (dst_ill == NULL) { 2782 ip1dbg(("ip_bind_connected_v6:" 2783 " bad ifindex %d\n", if_index)); 2784 error = EADDRNOTAVAIL; 2785 goto bad_addr; 2786 } 2787 ill_held = B_TRUE; 2788 } else if (connp->conn_outgoing_ill != NULL) { 2789 /* 2790 * For IPV6_BOUND_IF socket option, 2791 * conn_outgoing_ill should be set 2792 * already in TCP or UDP/ICMP. 2793 */ 2794 dst_ill = conn_get_held_ill(connp, 2795 &connp->conn_outgoing_ill, &err); 2796 if (err == ILL_LOOKUP_FAILED) { 2797 ip1dbg(("ip_bind_connected_v6:" 2798 "no ill for bound_if\n")); 2799 error = EADDRNOTAVAIL; 2800 goto bad_addr; 2801 } 2802 ill_held = B_TRUE; 2803 } else if (dst_ire->ire_stq != NULL) { 2804 /* No need to hold ill here */ 2805 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2806 } else { 2807 /* No need to hold ill here */ 2808 dst_ill = dst_ire->ire_ipif->ipif_ill; 2809 } 2810 if (!ip6_asp_can_lookup()) { 2811 *mp->b_wptr++ = (char)protocol; 2812 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2813 ip_bind_connected_resume_v6); 2814 error = EINPROGRESS; 2815 goto refrele_and_quit; 2816 } 2817 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2818 B_FALSE, connp->conn_src_preferences, zoneid); 2819 ip6_asp_table_refrele(); 2820 if (src_ipif == NULL) { 2821 pr_addr_dbg("ip_bind_connected_v6: " 2822 "no usable source address for " 2823 "connection to %s\n", AF_INET6, v6dst); 2824 error = EADDRNOTAVAIL; 2825 goto bad_addr; 2826 } 2827 *v6src = src_ipif->ipif_v6lcl_addr; 2828 } 2829 } 2830 2831 /* 2832 * We do ire_route_lookup_v6() here (and not an interface lookup) 2833 * as we assert that v6src should only come from an 2834 * UP interface for hard binding. 2835 */ 2836 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2837 NULL, zoneid, MATCH_IRE_ZONEONLY); 2838 2839 /* src_ire must be a local|loopback */ 2840 if (!IRE_IS_LOCAL(src_ire)) { 2841 if (ip_debug > 2) { 2842 /* ip1dbg */ 2843 pr_addr_dbg("ip_bind_connected_v6: bad " 2844 "connected src %s\n", AF_INET6, v6src); 2845 } 2846 error = EADDRNOTAVAIL; 2847 goto bad_addr; 2848 } 2849 2850 /* 2851 * If the source address is a loopback address, the 2852 * destination had best be local or multicast. 2853 * The transports that can't handle multicast will reject 2854 * those addresses. 2855 */ 2856 if (src_ire->ire_type == IRE_LOOPBACK && 2857 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2858 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2859 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2860 error = -1; 2861 goto bad_addr; 2862 } 2863 /* 2864 * Allow setting new policies. For example, disconnects come 2865 * down as ipa_t bind. As we would have set conn_policy_cached 2866 * to B_TRUE before, we should set it to B_FALSE, so that policy 2867 * can change after the disconnect. 2868 */ 2869 connp->conn_policy_cached = B_FALSE; 2870 2871 /* 2872 * The addresses have been verified. Initialize the conn 2873 * before calling the policy as they expect the conns 2874 * initialized. 2875 */ 2876 connp->conn_srcv6 = *v6src; 2877 connp->conn_remv6 = *v6dst; 2878 connp->conn_lport = lport; 2879 connp->conn_fport = fport; 2880 2881 ASSERT(!(ipsec_policy_set && ire_requested)); 2882 if (ire_requested) { 2883 iulp_t *ulp_info = NULL; 2884 2885 /* 2886 * Note that sire will not be NULL if this is an off-link 2887 * connection and there is not cache for that dest yet. 2888 * 2889 * XXX Because of an existing bug, if there are multiple 2890 * default routes, the IRE returned now may not be the actual 2891 * default route used (default routes are chosen in a 2892 * round robin fashion). So if the metrics for different 2893 * default routes are different, we may return the wrong 2894 * metrics. This will not be a problem if the existing 2895 * bug is fixed. 2896 */ 2897 if (sire != NULL) 2898 ulp_info = &(sire->ire_uinfo); 2899 2900 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 2901 error = -1; 2902 goto bad_addr; 2903 } 2904 } else if (ipsec_policy_set) { 2905 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2906 error = -1; 2907 goto bad_addr; 2908 } 2909 } 2910 2911 /* 2912 * Cache IPsec policy in this conn. If we have per-socket policy, 2913 * we'll cache that. If we don't, we'll inherit global policy. 2914 * 2915 * We can't insert until the conn reflects the policy. Note that 2916 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2917 * connections where we don't have a policy. This is to prevent 2918 * global policy lookups in the inbound path. 2919 * 2920 * If we insert before we set conn_policy_cached, 2921 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2922 * because global policy cound be non-empty. We normally call 2923 * ipsec_check_policy() for conn_policy_cached connections only if 2924 * conn_in_enforce_policy is set. But in this case, 2925 * conn_policy_cached can get set anytime since we made the 2926 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2927 * is called, which will make the above assumption false. Thus, we 2928 * need to insert after we set conn_policy_cached. 2929 */ 2930 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2931 goto bad_addr; 2932 2933 /* If not fanout_insert this was just an address verification */ 2934 if (fanout_insert) { 2935 /* 2936 * The addresses have been verified. Time to insert in 2937 * the correct fanout list. 2938 */ 2939 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2940 connp->conn_ports, 2941 IS_TCP_CONN(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2942 } 2943 if (error == 0) { 2944 connp->conn_fully_bound = B_TRUE; 2945 /* 2946 * Our initial checks for MDT have passed; the IRE is not 2947 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2948 * be supporting MDT. Pass the IRE, IPC and ILL into 2949 * ip_mdinfo_return(), which performs further checks 2950 * against them and upon success, returns the MDT info 2951 * mblk which we will attach to the bind acknowledgment. 2952 */ 2953 if (md_dst_ire != NULL) { 2954 mblk_t *mdinfo_mp; 2955 2956 ASSERT(md_ill != NULL); 2957 ASSERT(md_ill->ill_mdt_capab != NULL); 2958 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2959 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 2960 linkb(mp, mdinfo_mp); 2961 } 2962 } 2963 bad_addr: 2964 if (ipsec_policy_set) { 2965 ASSERT(policy_mp != NULL); 2966 freeb(policy_mp); 2967 /* 2968 * As of now assume that nothing else accompanies 2969 * IPSEC_POLICY_SET. 2970 */ 2971 mp->b_cont = NULL; 2972 } 2973 refrele_and_quit: 2974 if (src_ire != NULL) 2975 IRE_REFRELE(src_ire); 2976 if (dst_ire != NULL) 2977 IRE_REFRELE(dst_ire); 2978 if (sire != NULL) 2979 IRE_REFRELE(sire); 2980 if (src_ipif != NULL) 2981 ipif_refrele(src_ipif); 2982 if (md_dst_ire != NULL) 2983 IRE_REFRELE(md_dst_ire); 2984 if (ill_held && dst_ill != NULL) 2985 ill_refrele(dst_ill); 2986 return (error); 2987 } 2988 2989 /* 2990 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 2991 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 2992 */ 2993 static boolean_t 2994 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 2995 iulp_t *ulp_info) 2996 { 2997 mblk_t *mp1; 2998 ire_t *ret_ire; 2999 3000 mp1 = mp->b_cont; 3001 ASSERT(mp1 != NULL); 3002 3003 if (ire != NULL) { 3004 /* 3005 * mp1 initialized above to IRE_DB_REQ_TYPE 3006 * appended mblk. Its <upper protocol>'s 3007 * job to make sure there is room. 3008 */ 3009 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3010 return (B_FALSE); 3011 3012 mp1->b_datap->db_type = IRE_DB_TYPE; 3013 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3014 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3015 ret_ire = (ire_t *)mp1->b_rptr; 3016 if (IN6_IS_ADDR_MULTICAST(dst) || 3017 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3018 ret_ire->ire_type = IRE_BROADCAST; 3019 ret_ire->ire_addr_v6 = *dst; 3020 } 3021 if (ulp_info != NULL) { 3022 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3023 sizeof (iulp_t)); 3024 } 3025 ret_ire->ire_mp = mp1; 3026 } else { 3027 /* 3028 * No IRE was found. Remove IRE mblk. 3029 */ 3030 mp->b_cont = mp1->b_cont; 3031 freeb(mp1); 3032 } 3033 return (B_TRUE); 3034 } 3035 3036 /* 3037 * Add an ip6i_t header to the front of the mblk. 3038 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3039 * Returns NULL if allocation fails (and frees original message). 3040 * Used in outgoing path when going through ip_newroute_*v6(). 3041 * Used in incoming path to pass ifindex to transports. 3042 */ 3043 mblk_t * 3044 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3045 { 3046 mblk_t *mp1; 3047 ip6i_t *ip6i; 3048 ip6_t *ip6h; 3049 3050 ip6h = (ip6_t *)mp->b_rptr; 3051 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3052 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3053 mp->b_datap->db_ref > 1) { 3054 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3055 if (mp1 == NULL) { 3056 freemsg(mp); 3057 return (NULL); 3058 } 3059 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3060 mp1->b_cont = mp; 3061 mp = mp1; 3062 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3063 } 3064 mp->b_rptr = (uchar_t *)ip6i; 3065 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3066 ip6i->ip6i_nxt = IPPROTO_RAW; 3067 if (ill != NULL) { 3068 ip6i->ip6i_flags = IP6I_IFINDEX; 3069 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3070 } else { 3071 ip6i->ip6i_flags = 0; 3072 } 3073 ip6i->ip6i_nexthop = *dst; 3074 return (mp); 3075 } 3076 3077 /* 3078 * Handle protocols with which IP is less intimate. There 3079 * can be more than one stream bound to a particular 3080 * protocol. When this is the case, normally each one gets a copy 3081 * of any incoming packets. 3082 * However, if the packet was tunneled and not multicast we only send to it 3083 * the first match. 3084 * 3085 * Zones notes: 3086 * Packets will be distributed to streams in all zones. This is really only 3087 * useful for ICMPv6 as only applications in the global zone can create raw 3088 * sockets for other protocols. 3089 */ 3090 static void 3091 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3092 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3093 boolean_t mctl_present, zoneid_t zoneid) 3094 { 3095 queue_t *rq; 3096 mblk_t *mp1, *first_mp1; 3097 in6_addr_t dst = ip6h->ip6_dst; 3098 in6_addr_t src = ip6h->ip6_src; 3099 boolean_t one_only; 3100 mblk_t *first_mp = mp; 3101 boolean_t secure; 3102 conn_t *connp, *first_connp, *next_connp; 3103 connf_t *connfp; 3104 3105 if (mctl_present) { 3106 mp = first_mp->b_cont; 3107 secure = ipsec_in_is_secure(first_mp); 3108 ASSERT(mp != NULL); 3109 } else { 3110 secure = B_FALSE; 3111 } 3112 3113 /* 3114 * If the packet was tunneled and not multicast we only send to it 3115 * the first match. 3116 */ 3117 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3118 !IN6_IS_ADDR_MULTICAST(&dst)); 3119 3120 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3121 mutex_enter(&connfp->connf_lock); 3122 connp = connfp->connf_head; 3123 for (connp = connfp->connf_head; connp != NULL; 3124 connp = connp->conn_next) { 3125 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3126 zoneid)) 3127 break; 3128 } 3129 3130 if (connp == NULL || connp->conn_upq == NULL) { 3131 /* 3132 * No one bound to this port. Is 3133 * there a client that wants all 3134 * unclaimed datagrams? 3135 */ 3136 mutex_exit(&connfp->connf_lock); 3137 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3138 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3139 nexthdr_offset, mctl_present, zoneid)) { 3140 BUMP_MIB(ill->ill_ip6_mib, ipv6InUnknownProtos); 3141 } 3142 3143 return; 3144 } 3145 3146 CONN_INC_REF(connp); 3147 first_connp = connp; 3148 3149 /* 3150 * XXX: Fix the multiple protocol listeners case. We should not 3151 * be walking the conn->next list here. 3152 */ 3153 if (one_only) { 3154 /* 3155 * Only send message to one tunnel driver by immediately 3156 * terminating the loop. 3157 */ 3158 connp = NULL; 3159 } else { 3160 connp = connp->conn_next; 3161 3162 } 3163 for (;;) { 3164 while (connp != NULL) { 3165 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3166 flags, zoneid)) 3167 break; 3168 connp = connp->conn_next; 3169 } 3170 3171 /* 3172 * Just copy the data part alone. The mctl part is 3173 * needed just for verifying policy and it is never 3174 * sent up. 3175 */ 3176 if (connp == NULL || connp->conn_upq == NULL || 3177 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3178 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3179 /* 3180 * No more intested clients or memory 3181 * allocation failed 3182 */ 3183 connp = first_connp; 3184 break; 3185 } 3186 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3187 CONN_INC_REF(connp); 3188 mutex_exit(&connfp->connf_lock); 3189 rq = connp->conn_rq; 3190 /* 3191 * For link-local always add ifindex so that transport can set 3192 * sin6_scope_id. Avoid it for ICMP error fanout. 3193 */ 3194 if ((connp->conn_ipv6_recvpktinfo || 3195 IN6_IS_ADDR_LINKLOCAL(&src)) && 3196 (flags & IP_FF_IP6INFO)) { 3197 /* Add header */ 3198 mp1 = ip_add_info_v6(mp1, inill, &dst); 3199 } 3200 if (mp1 == NULL) { 3201 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3202 } else if (!canputnext(rq)) { 3203 if (flags & IP_FF_RAWIP) { 3204 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3205 } else { 3206 BUMP_MIB(ill->ill_icmp6_mib, 3207 ipv6IfIcmpInOverflows); 3208 } 3209 3210 freemsg(mp1); 3211 } else { 3212 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3213 first_mp1 = ipsec_check_inbound_policy 3214 (first_mp1, connp, NULL, ip6h, 3215 mctl_present); 3216 } 3217 if (first_mp1 != NULL) { 3218 if (mctl_present) 3219 freeb(first_mp1); 3220 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3221 putnext(rq, mp1); 3222 } 3223 } 3224 mutex_enter(&connfp->connf_lock); 3225 /* Follow the next pointer before releasing the conn. */ 3226 next_connp = connp->conn_next; 3227 CONN_DEC_REF(connp); 3228 connp = next_connp; 3229 } 3230 3231 /* Last one. Send it upstream. */ 3232 mutex_exit(&connfp->connf_lock); 3233 3234 /* Initiate IPPF processing */ 3235 if (IP6_IN_IPP(flags)) { 3236 uint_t ifindex; 3237 3238 mutex_enter(&ill->ill_lock); 3239 ifindex = ill->ill_phyint->phyint_ifindex; 3240 mutex_exit(&ill->ill_lock); 3241 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3242 if (mp == NULL) { 3243 CONN_DEC_REF(connp); 3244 if (mctl_present) 3245 freeb(first_mp); 3246 return; 3247 } 3248 } 3249 3250 /* 3251 * For link-local always add ifindex so that transport can set 3252 * sin6_scope_id. Avoid it for ICMP error fanout. 3253 */ 3254 if ((connp->conn_ipv6_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3255 (flags & IP_FF_IP6INFO)) { 3256 /* Add header */ 3257 mp = ip_add_info_v6(mp, inill, &dst); 3258 if (mp == NULL) { 3259 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3260 CONN_DEC_REF(connp); 3261 if (mctl_present) 3262 freeb(first_mp); 3263 return; 3264 } else if (mctl_present) { 3265 first_mp->b_cont = mp; 3266 } else { 3267 first_mp = mp; 3268 } 3269 } 3270 3271 rq = connp->conn_rq; 3272 if (!canputnext(rq)) { 3273 if (flags & IP_FF_RAWIP) { 3274 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3275 } else { 3276 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3277 } 3278 3279 freemsg(first_mp); 3280 } else { 3281 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3282 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3283 NULL, ip6h, mctl_present); 3284 if (first_mp == NULL) { 3285 CONN_DEC_REF(connp); 3286 return; 3287 } 3288 } 3289 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3290 putnext(rq, mp); 3291 if (mctl_present) 3292 freeb(first_mp); 3293 } 3294 CONN_DEC_REF(connp); 3295 } 3296 3297 /* 3298 * Send an ICMP error after patching up the packet appropriately. Returns 3299 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3300 */ 3301 int 3302 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3303 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3304 boolean_t mctl_present, zoneid_t zoneid) 3305 { 3306 ip6_t *ip6h; 3307 mblk_t *first_mp; 3308 boolean_t secure; 3309 unsigned char db_type; 3310 3311 first_mp = mp; 3312 if (mctl_present) { 3313 mp = mp->b_cont; 3314 secure = ipsec_in_is_secure(first_mp); 3315 ASSERT(mp != NULL); 3316 } else { 3317 /* 3318 * If this is an ICMP error being reported - which goes 3319 * up as M_CTLs, we need to convert them to M_DATA till 3320 * we finish checking with global policy because 3321 * ipsec_check_global_policy() assumes M_DATA as clear 3322 * and M_CTL as secure. 3323 */ 3324 db_type = mp->b_datap->db_type; 3325 mp->b_datap->db_type = M_DATA; 3326 secure = B_FALSE; 3327 } 3328 /* 3329 * We are generating an icmp error for some inbound packet. 3330 * Called from all ip_fanout_(udp, tcp, proto) functions. 3331 * Before we generate an error, check with global policy 3332 * to see whether this is allowed to enter the system. As 3333 * there is no "conn", we are checking with global policy. 3334 */ 3335 ip6h = (ip6_t *)mp->b_rptr; 3336 if (secure || ipsec_inbound_v6_policy_present) { 3337 first_mp = ipsec_check_global_policy(first_mp, NULL, 3338 NULL, ip6h, mctl_present); 3339 if (first_mp == NULL) 3340 return (0); 3341 } 3342 3343 if (!mctl_present) 3344 mp->b_datap->db_type = db_type; 3345 3346 if (flags & IP_FF_SEND_ICMP) { 3347 if (flags & IP_FF_HDR_COMPLETE) { 3348 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3349 freemsg(first_mp); 3350 return (1); 3351 } 3352 } 3353 switch (icmp_type) { 3354 case ICMP6_DST_UNREACH: 3355 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3356 B_FALSE, B_FALSE); 3357 break; 3358 case ICMP6_PARAM_PROB: 3359 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3360 nexthdr_offset, B_FALSE, B_FALSE); 3361 break; 3362 default: 3363 #ifdef DEBUG 3364 panic("ip_fanout_send_icmp_v6: wrong type"); 3365 /*NOTREACHED*/ 3366 #else 3367 freemsg(first_mp); 3368 break; 3369 #endif 3370 } 3371 } else { 3372 freemsg(first_mp); 3373 return (0); 3374 } 3375 3376 return (1); 3377 } 3378 3379 3380 /* 3381 * Fanout for TCP packets 3382 * The caller puts <fport, lport> in the ports parameter. 3383 */ 3384 static void 3385 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3386 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3387 { 3388 mblk_t *first_mp; 3389 boolean_t secure; 3390 conn_t *connp; 3391 tcph_t *tcph; 3392 boolean_t syn_present = B_FALSE; 3393 3394 first_mp = mp; 3395 if (mctl_present) { 3396 mp = first_mp->b_cont; 3397 secure = ipsec_in_is_secure(first_mp); 3398 ASSERT(mp != NULL); 3399 } else { 3400 secure = B_FALSE; 3401 } 3402 3403 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3404 3405 if (connp == NULL || 3406 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3407 /* 3408 * No hard-bound match. Send Reset. 3409 */ 3410 dblk_t *dp = mp->b_datap; 3411 uint32_t ill_index; 3412 3413 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3414 3415 /* Initiate IPPf processing, if needed. */ 3416 if (IPP_ENABLED(IPP_LOCAL_IN) && 3417 (flags & (IP6_NO_IPPOLICY|IP6_IN_NOCKSUM))) { 3418 ill_index = ill->ill_phyint->phyint_ifindex; 3419 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3420 if (first_mp == NULL) { 3421 if (connp != NULL) 3422 CONN_DEC_REF(connp); 3423 return; 3424 } 3425 } 3426 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3427 tcp_xmit_listeners_reset(first_mp, hdr_len); 3428 if (connp != NULL) 3429 CONN_DEC_REF(connp); 3430 return; 3431 } 3432 3433 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3434 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3435 if (connp->conn_flags & IPCL_TCP) { 3436 squeue_t *sqp; 3437 3438 /* 3439 * For fused tcp loopback, assign the eager's 3440 * squeue to be that of the active connect's. 3441 */ 3442 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3443 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3444 !IP6_IN_IPP(flags)) { 3445 ASSERT(Q_TO_CONN(q) != NULL); 3446 sqp = Q_TO_CONN(q)->conn_sqp; 3447 } else { 3448 sqp = IP_SQUEUE_GET(lbolt); 3449 } 3450 3451 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3452 mp->b_datap->db_cksumstart = (intptr_t)sqp; 3453 3454 /* 3455 * db_cksumstuff is unused in the incoming 3456 * path; Thus store the ifindex here. It will 3457 * be cleared in tcp_conn_create_v6(). 3458 */ 3459 mp->b_datap->db_cksumstuff = 3460 (intptr_t)ill->ill_phyint->phyint_ifindex; 3461 syn_present = B_TRUE; 3462 } 3463 } 3464 3465 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3466 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3467 if ((flags & TH_RST) || (flags & TH_URG)) { 3468 CONN_DEC_REF(connp); 3469 freemsg(first_mp); 3470 return; 3471 } 3472 if (flags & TH_ACK) { 3473 tcp_xmit_listeners_reset(first_mp, hdr_len); 3474 CONN_DEC_REF(connp); 3475 return; 3476 } 3477 3478 CONN_DEC_REF(connp); 3479 freemsg(first_mp); 3480 return; 3481 } 3482 3483 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3484 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3485 NULL, ip6h, mctl_present); 3486 if (first_mp == NULL) { 3487 CONN_DEC_REF(connp); 3488 return; 3489 } 3490 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3491 ASSERT(syn_present); 3492 if (mctl_present) { 3493 ASSERT(first_mp != mp); 3494 first_mp->b_datap->db_struioflag |= 3495 STRUIO_POLICY; 3496 } else { 3497 ASSERT(first_mp == mp); 3498 mp->b_datap->db_struioflag &= 3499 ~STRUIO_EAGER; 3500 mp->b_datap->db_struioflag |= 3501 STRUIO_POLICY; 3502 } 3503 } else { 3504 /* 3505 * Discard first_mp early since we're dealing with a 3506 * fully-connected conn_t and tcp doesn't do policy in 3507 * this case. Also, if someone is bound to IPPROTO_TCP 3508 * over raw IP, they don't expect to see a M_CTL. 3509 */ 3510 if (mctl_present) { 3511 freeb(first_mp); 3512 mctl_present = B_FALSE; 3513 } 3514 first_mp = mp; 3515 } 3516 } 3517 3518 /* Initiate IPPF processing */ 3519 if (IP6_IN_IPP(flags)) { 3520 uint_t ifindex; 3521 3522 mutex_enter(&ill->ill_lock); 3523 ifindex = ill->ill_phyint->phyint_ifindex; 3524 mutex_exit(&ill->ill_lock); 3525 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3526 if (mp == NULL) { 3527 CONN_DEC_REF(connp); 3528 if (mctl_present) { 3529 freeb(first_mp); 3530 } 3531 return; 3532 } else if (mctl_present) { 3533 /* 3534 * ip_add_info_v6 might return a new mp. 3535 */ 3536 ASSERT(first_mp != mp); 3537 first_mp->b_cont = mp; 3538 } else { 3539 first_mp = mp; 3540 } 3541 } 3542 3543 /* 3544 * For link-local always add ifindex so that TCP can bind to that 3545 * interface. Avoid it for ICMP error fanout. 3546 */ 3547 if (!syn_present && ((connp->conn_ipv6_recvpktinfo || 3548 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3549 (flags & IP_FF_IP6INFO))) { 3550 /* Add header */ 3551 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3552 if (mp == NULL) { 3553 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3554 CONN_DEC_REF(connp); 3555 if (mctl_present) 3556 freeb(first_mp); 3557 return; 3558 } else if (mctl_present) { 3559 ASSERT(first_mp != mp); 3560 first_mp->b_cont = mp; 3561 } else { 3562 first_mp = mp; 3563 } 3564 } 3565 3566 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3567 if (IPCL_IS_TCP(connp)) { 3568 (*ip_input_proc)(connp->conn_sqp, first_mp, 3569 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3570 } else { 3571 putnext(connp->conn_rq, first_mp); 3572 CONN_DEC_REF(connp); 3573 } 3574 } 3575 3576 /* 3577 * Fanout for UDP packets. 3578 * The caller puts <fport, lport> in the ports parameter. 3579 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3580 * 3581 * If SO_REUSEADDR is set all multicast and broadcast packets 3582 * will be delivered to all streams bound to the same port. 3583 * 3584 * Zones notes: 3585 * Multicast packets will be distributed to streams in all zones. 3586 */ 3587 static void 3588 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3589 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3590 zoneid_t zoneid) 3591 { 3592 queue_t *rq; 3593 uint32_t dstport, srcport; 3594 in6_addr_t dst; 3595 mblk_t *first_mp; 3596 boolean_t secure; 3597 conn_t *connp; 3598 connf_t *connfp; 3599 conn_t *first_conn; 3600 conn_t *next_conn; 3601 mblk_t *mp1, *first_mp1; 3602 in6_addr_t src; 3603 3604 first_mp = mp; 3605 if (mctl_present) { 3606 mp = first_mp->b_cont; 3607 secure = ipsec_in_is_secure(first_mp); 3608 ASSERT(mp != NULL); 3609 } else { 3610 secure = B_FALSE; 3611 } 3612 3613 /* Extract ports in net byte order */ 3614 dstport = htons(ntohl(ports) & 0xFFFF); 3615 srcport = htons(ntohl(ports) >> 16); 3616 dst = ip6h->ip6_dst; 3617 src = ip6h->ip6_src; 3618 3619 /* Attempt to find a client stream based on destination port. */ 3620 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3621 mutex_enter(&connfp->connf_lock); 3622 connp = connfp->connf_head; 3623 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3624 /* 3625 * Not multicast. Send to the one (first) client we find. 3626 */ 3627 while (connp != NULL) { 3628 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3629 src) && connp->conn_zoneid == zoneid && 3630 conn_wantpacket_v6(connp, ill, ip6h, 3631 flags, zoneid)) { 3632 break; 3633 } 3634 connp = connp->conn_next; 3635 } 3636 if (connp == NULL || connp->conn_upq == NULL) 3637 goto notfound; 3638 3639 /* Found a client */ 3640 CONN_INC_REF(connp); 3641 mutex_exit(&connfp->connf_lock); 3642 rq = connp->conn_rq; 3643 3644 if (!canputnext(rq)) { 3645 freemsg(first_mp); 3646 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3647 CONN_DEC_REF(connp); 3648 return; 3649 } 3650 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3651 first_mp = ipsec_check_inbound_policy(first_mp, 3652 connp, NULL, ip6h, mctl_present); 3653 if (first_mp == NULL) { 3654 CONN_DEC_REF(connp); 3655 return; 3656 } 3657 } 3658 /* Initiate IPPF processing */ 3659 if (IP6_IN_IPP(flags)) { 3660 uint_t ifindex; 3661 3662 mutex_enter(&ill->ill_lock); 3663 ifindex = ill->ill_phyint->phyint_ifindex; 3664 mutex_exit(&ill->ill_lock); 3665 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3666 if (mp == NULL) { 3667 CONN_DEC_REF(connp); 3668 if (mctl_present) 3669 freeb(first_mp); 3670 return; 3671 } 3672 } 3673 /* 3674 * For link-local always add ifindex so that 3675 * transport can set sin6_scope_id. Avoid it for 3676 * ICMP error fanout. 3677 */ 3678 if ((connp->conn_ipv6_recvpktinfo || 3679 IN6_IS_ADDR_LINKLOCAL(&src)) && 3680 (flags & IP_FF_IP6INFO)) { 3681 /* Add header */ 3682 mp = ip_add_info_v6(mp, inill, &dst); 3683 if (mp == NULL) { 3684 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3685 CONN_DEC_REF(connp); 3686 if (mctl_present) 3687 freeb(first_mp); 3688 return; 3689 } else if (mctl_present) { 3690 first_mp->b_cont = mp; 3691 } else { 3692 first_mp = mp; 3693 } 3694 } 3695 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3696 putnext(rq, mp); 3697 IP6_STAT(ip6_udp_fannorm); 3698 CONN_DEC_REF(connp); 3699 if (mctl_present) 3700 freeb(first_mp); 3701 return; 3702 } 3703 3704 /* 3705 * The code is fine but we shouldn't be walking the conn_next 3706 * list in IPv6 (its a classifier private data struct). Maybe create 3707 * a classifier API to put a REF_HOLD on all matching conn in the 3708 * list and return an array. 3709 */ 3710 while (connp != NULL) { 3711 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3712 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) 3713 break; 3714 connp = connp->conn_next; 3715 } 3716 3717 if (connp == NULL || connp->conn_upq == NULL) 3718 goto notfound; 3719 3720 first_conn = connp; 3721 3722 CONN_INC_REF(connp); 3723 connp = connp->conn_next; 3724 for (;;) { 3725 while (connp != NULL) { 3726 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3727 src) && conn_wantpacket_v6(connp, ill, ip6h, 3728 flags, zoneid)) 3729 break; 3730 connp = connp->conn_next; 3731 } 3732 /* 3733 * Just copy the data part alone. The mctl part is 3734 * needed just for verifying policy and it is never 3735 * sent up. 3736 */ 3737 if (connp == NULL || 3738 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3739 ((first_mp1 = ip_copymsg(first_mp)) 3740 == NULL))) { 3741 /* 3742 * No more interested clients or memory 3743 * allocation failed 3744 */ 3745 connp = first_conn; 3746 break; 3747 } 3748 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3749 CONN_INC_REF(connp); 3750 mutex_exit(&connfp->connf_lock); 3751 rq = connp->conn_rq; 3752 /* 3753 * For link-local always add ifindex so that transport 3754 * can set sin6_scope_id. Avoid it for ICMP error 3755 * fanout. 3756 */ 3757 if ((connp->conn_ipv6_recvpktinfo || 3758 IN6_IS_ADDR_LINKLOCAL(&src)) && 3759 (flags & IP_FF_IP6INFO)) { 3760 /* Add header */ 3761 mp1 = ip_add_info_v6(mp1, inill, &dst); 3762 } 3763 if (mp1 == NULL) { 3764 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3765 goto next_one; 3766 } 3767 if (!canputnext(rq)) { 3768 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3769 freemsg(mp1); 3770 goto next_one; 3771 } 3772 3773 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3774 secure) { 3775 first_mp1 = ipsec_check_inbound_policy 3776 (first_mp1, connp, NULL, ip6h, 3777 mctl_present); 3778 } 3779 if (first_mp1 != NULL) { 3780 if (mctl_present) 3781 freeb(first_mp1); 3782 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3783 putnext(rq, mp1); 3784 } 3785 next_one: 3786 mutex_enter(&connfp->connf_lock); 3787 /* Follow the next pointer before releasing the conn. */ 3788 next_conn = connp->conn_next; 3789 IP6_STAT(ip6_udp_fanmb); 3790 CONN_DEC_REF(connp); 3791 connp = next_conn; 3792 } 3793 3794 /* Last one. Send it upstream. */ 3795 mutex_exit(&connfp->connf_lock); 3796 rq = connp->conn_rq; 3797 3798 /* Initiate IPPF processing */ 3799 if (IP6_IN_IPP(flags)) { 3800 uint_t ifindex; 3801 3802 mutex_enter(&ill->ill_lock); 3803 ifindex = ill->ill_phyint->phyint_ifindex; 3804 mutex_exit(&ill->ill_lock); 3805 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3806 if (mp == NULL) { 3807 CONN_DEC_REF(connp); 3808 if (mctl_present) { 3809 freeb(first_mp); 3810 } 3811 return; 3812 } 3813 } 3814 3815 /* 3816 * For link-local always add ifindex so that transport can set 3817 * sin6_scope_id. Avoid it for ICMP error fanout. 3818 */ 3819 if ((connp->conn_ipv6_recvpktinfo || 3820 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IP6INFO)) { 3821 /* Add header */ 3822 mp = ip_add_info_v6(mp, inill, &dst); 3823 if (mp == NULL) { 3824 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3825 CONN_DEC_REF(connp); 3826 if (mctl_present) 3827 freeb(first_mp); 3828 return; 3829 } else if (mctl_present) { 3830 first_mp->b_cont = mp; 3831 } else { 3832 first_mp = mp; 3833 } 3834 } 3835 if (!canputnext(rq)) { 3836 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3837 freemsg(mp); 3838 } else { 3839 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3840 first_mp = ipsec_check_inbound_policy(first_mp, 3841 connp, NULL, ip6h, mctl_present); 3842 if (first_mp == NULL) { 3843 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3844 CONN_DEC_REF(connp); 3845 return; 3846 } 3847 } 3848 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3849 putnext(rq, mp); 3850 } 3851 IP6_STAT(ip6_udp_fanmb); 3852 CONN_DEC_REF(connp); 3853 if (mctl_present) 3854 freeb(first_mp); 3855 return; 3856 3857 notfound: 3858 mutex_exit(&connfp->connf_lock); 3859 /* 3860 * No one bound to this port. Is 3861 * there a client that wants all 3862 * unclaimed datagrams? 3863 */ 3864 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3865 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3866 0, flags | IP_FF_RAWIP | IP_FF_IP6INFO, mctl_present, 3867 zoneid); 3868 } else { 3869 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3870 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3871 mctl_present, zoneid)) { 3872 BUMP_MIB(&ip_mib, udpNoPorts); 3873 } 3874 } 3875 } 3876 3877 /* 3878 * int ip_find_hdr_v6() 3879 * 3880 * This routine is used by the upper layer protocols and the IP tunnel 3881 * module to: 3882 * - Set extension header pointers to appropriate locations 3883 * - Determine IPv6 header length and return it 3884 * - Return a pointer to the last nexthdr value 3885 * 3886 * The caller must initialize ipp_fields. 3887 * 3888 * NOTE: If multiple extension headers of the same type are present, 3889 * ip_find_hdr_v6() will set the respective extension header pointers 3890 * to the first one that it encounters in the IPv6 header. It also 3891 * skips fragment headers. This routine deals with malformed packets 3892 * of various sorts in which case the returned length is up to the 3893 * malformed part. 3894 */ 3895 int 3896 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3897 { 3898 uint_t length, ehdrlen; 3899 uint8_t nexthdr; 3900 uint8_t *whereptr, *endptr; 3901 ip6_dest_t *tmpdstopts; 3902 ip6_rthdr_t *tmprthdr; 3903 ip6_hbh_t *tmphopopts; 3904 ip6_frag_t *tmpfraghdr; 3905 3906 length = IPV6_HDR_LEN; 3907 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3908 endptr = mp->b_wptr; 3909 3910 nexthdr = ip6h->ip6_nxt; 3911 while (whereptr < endptr) { 3912 /* Is there enough left for len + nexthdr? */ 3913 if (whereptr + MIN_EHDR_LEN > endptr) 3914 goto done; 3915 3916 switch (nexthdr) { 3917 case IPPROTO_HOPOPTS: 3918 tmphopopts = (ip6_hbh_t *)whereptr; 3919 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 3920 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 3921 goto done; 3922 nexthdr = tmphopopts->ip6h_nxt; 3923 /* return only 1st hbh */ 3924 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 3925 ipp->ipp_fields |= IPPF_HOPOPTS; 3926 ipp->ipp_hopopts = tmphopopts; 3927 ipp->ipp_hopoptslen = ehdrlen; 3928 } 3929 break; 3930 case IPPROTO_DSTOPTS: 3931 tmpdstopts = (ip6_dest_t *)whereptr; 3932 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 3933 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 3934 goto done; 3935 nexthdr = tmpdstopts->ip6d_nxt; 3936 /* 3937 * ipp_dstopts is set to the destination header after a 3938 * routing header. 3939 * Assume it is a post-rthdr destination header 3940 * and adjust when we find an rthdr. 3941 */ 3942 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 3943 ipp->ipp_fields |= IPPF_DSTOPTS; 3944 ipp->ipp_dstopts = tmpdstopts; 3945 ipp->ipp_dstoptslen = ehdrlen; 3946 } 3947 break; 3948 case IPPROTO_ROUTING: 3949 tmprthdr = (ip6_rthdr_t *)whereptr; 3950 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 3951 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 3952 goto done; 3953 nexthdr = tmprthdr->ip6r_nxt; 3954 /* return only 1st rthdr */ 3955 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 3956 ipp->ipp_fields |= IPPF_RTHDR; 3957 ipp->ipp_rthdr = tmprthdr; 3958 ipp->ipp_rthdrlen = ehdrlen; 3959 } 3960 /* 3961 * Make any destination header we've seen be a 3962 * pre-rthdr destination header. 3963 */ 3964 if (ipp->ipp_fields & IPPF_DSTOPTS) { 3965 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3966 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3967 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 3968 ipp->ipp_dstopts = NULL; 3969 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 3970 ipp->ipp_dstoptslen = 0; 3971 } 3972 break; 3973 case IPPROTO_FRAGMENT: 3974 /* 3975 * Fragment headers are skipped. Currently, only 3976 * IP cares for their existence. If anyone other 3977 * than IP ever has the need to know about the 3978 * location of fragment headers, support can be 3979 * added to the ip6_pkt_t at that time. 3980 */ 3981 tmpfraghdr = (ip6_frag_t *)whereptr; 3982 ehdrlen = sizeof (ip6_frag_t); 3983 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 3984 goto done; 3985 nexthdr = tmpfraghdr->ip6f_nxt; 3986 break; 3987 case IPPROTO_NONE: 3988 default: 3989 goto done; 3990 } 3991 length += ehdrlen; 3992 whereptr += ehdrlen; 3993 } 3994 done: 3995 if (nexthdrp != NULL) 3996 *nexthdrp = nexthdr; 3997 return (length); 3998 } 3999 4000 int 4001 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4002 { 4003 ire_t *ire; 4004 4005 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4006 ire = ire_lookup_local_v6(zoneid); 4007 if (ire == NULL) { 4008 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4009 return (1); 4010 } 4011 ip6h->ip6_src = ire->ire_addr_v6; 4012 ire_refrele(ire); 4013 } 4014 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4015 ip6h->ip6_hops = ipv6_def_hops; 4016 return (0); 4017 } 4018 4019 /* 4020 * Try to determine where and what are the IPv6 header length and 4021 * pointer to nexthdr value for the upper layer protocol (or an 4022 * unknown next hdr). 4023 * 4024 * Parameters returns a pointer to the nexthdr value; 4025 * Must handle malformed packets of various sorts. 4026 * Function returns failure for malformed cases. 4027 */ 4028 boolean_t 4029 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4030 uint8_t **nexthdrpp) 4031 { 4032 uint16_t length; 4033 uint_t ehdrlen; 4034 uint8_t *nexthdrp; 4035 uint8_t *whereptr; 4036 uint8_t *endptr; 4037 ip6_dest_t *desthdr; 4038 ip6_rthdr_t *rthdr; 4039 ip6_frag_t *fraghdr; 4040 4041 length = IPV6_HDR_LEN; 4042 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4043 endptr = mp->b_wptr; 4044 4045 nexthdrp = &ip6h->ip6_nxt; 4046 while (whereptr < endptr) { 4047 /* Is there enough left for len + nexthdr? */ 4048 if (whereptr + MIN_EHDR_LEN > endptr) 4049 break; 4050 4051 switch (*nexthdrp) { 4052 case IPPROTO_HOPOPTS: 4053 case IPPROTO_DSTOPTS: 4054 /* Assumes the headers are identical for hbh and dst */ 4055 desthdr = (ip6_dest_t *)whereptr; 4056 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4057 if ((uchar_t *)desthdr + ehdrlen > endptr) 4058 return (B_FALSE); 4059 nexthdrp = &desthdr->ip6d_nxt; 4060 break; 4061 case IPPROTO_ROUTING: 4062 rthdr = (ip6_rthdr_t *)whereptr; 4063 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4064 if ((uchar_t *)rthdr + ehdrlen > endptr) 4065 return (B_FALSE); 4066 nexthdrp = &rthdr->ip6r_nxt; 4067 break; 4068 case IPPROTO_FRAGMENT: 4069 fraghdr = (ip6_frag_t *)whereptr; 4070 ehdrlen = sizeof (ip6_frag_t); 4071 if ((uchar_t *)&fraghdr[1] > endptr) 4072 return (B_FALSE); 4073 nexthdrp = &fraghdr->ip6f_nxt; 4074 break; 4075 case IPPROTO_NONE: 4076 /* No next header means we're finished */ 4077 default: 4078 *hdr_length_ptr = length; 4079 *nexthdrpp = nexthdrp; 4080 return (B_TRUE); 4081 } 4082 length += ehdrlen; 4083 whereptr += ehdrlen; 4084 *hdr_length_ptr = length; 4085 *nexthdrpp = nexthdrp; 4086 } 4087 switch (*nexthdrp) { 4088 case IPPROTO_HOPOPTS: 4089 case IPPROTO_DSTOPTS: 4090 case IPPROTO_ROUTING: 4091 case IPPROTO_FRAGMENT: 4092 /* 4093 * If any know extension headers are still to be processed, 4094 * the packet's malformed (or at least all the IP header(s) are 4095 * not in the same mblk - and that should never happen. 4096 */ 4097 return (B_FALSE); 4098 4099 default: 4100 /* 4101 * If we get here, we know that all of the IP headers were in 4102 * the same mblk, even if the ULP header is in the next mblk. 4103 */ 4104 *hdr_length_ptr = length; 4105 *nexthdrpp = nexthdrp; 4106 return (B_TRUE); 4107 } 4108 } 4109 4110 /* 4111 * Return the length of the IPv6 related headers (including extension headers) 4112 * Returns a length even if the packet is malformed. 4113 */ 4114 int 4115 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4116 { 4117 uint16_t hdr_len; 4118 uint8_t *nexthdrp; 4119 4120 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4121 return (hdr_len); 4122 } 4123 4124 /* 4125 * Select an ill for the packet by considering load spreading across 4126 * a different ill in the group if dst_ill is part of some group. 4127 */ 4128 static ill_t * 4129 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4130 { 4131 ill_t *ill; 4132 4133 /* 4134 * We schedule irrespective of whether the source address is 4135 * INADDR_UNSPECIED or not. 4136 */ 4137 ill = illgrp_scheduler(dst_ill); 4138 if (ill == NULL) 4139 return (NULL); 4140 4141 /* 4142 * For groups with names ip_sioctl_groupname ensures that all 4143 * ills are of same type. For groups without names, ifgrp_insert 4144 * ensures this. 4145 */ 4146 ASSERT(dst_ill->ill_type == ill->ill_type); 4147 4148 return (ill); 4149 } 4150 4151 /* 4152 * IPv6 - 4153 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4154 * to send out a packet to a destination address for which we do not have 4155 * specific routing information. 4156 * 4157 * Handle non-multicast packets. If ill is non-NULL the match is done 4158 * for that ill. 4159 * 4160 * When a specific ill is specified (using IPV6_PKTINFO, 4161 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4162 * on routing entries (ftable and ctable) that have a matching 4163 * ire->ire_ipif->ipif_ill. Thus this can only be used 4164 * for destinations that are on-link for the specific ill 4165 * and that can appear on multiple links. Thus it is useful 4166 * for multicast destinations, link-local destinations, and 4167 * at some point perhaps for site-local destinations (if the 4168 * node sits at a site boundary). 4169 * We create the cache entries in the regular ctable since 4170 * it can not "confuse" things for other destinations. 4171 * table. 4172 * 4173 * When ill is part of a ill group, we subject the packets 4174 * to load spreading even if the ill is specified by the 4175 * means described above. We disable only for IPV6_BOUND_PIF 4176 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4177 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4178 * set. 4179 * 4180 * NOTE : These are the scopes of some of the variables that point at IRE, 4181 * which needs to be followed while making any future modifications 4182 * to avoid memory leaks. 4183 * 4184 * - ire and sire are the entries looked up initially by 4185 * ire_ftable_lookup_v6. 4186 * - ipif_ire is used to hold the interface ire associated with 4187 * the new cache ire. But it's scope is limited, so we always REFRELE 4188 * it before branching out to error paths. 4189 * - save_ire is initialized before ire_create, so that ire returned 4190 * by ire_create will not over-write the ire. We REFRELE save_ire 4191 * before breaking out of the switch. 4192 * 4193 * Thus on failures, we have to REFRELE only ire and sire, if they 4194 * are not NULL. 4195 * 4196 * v6srcp may be used in the future. Currently unused. 4197 */ 4198 /* ARGSUSED */ 4199 void 4200 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4201 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4202 { 4203 in6_addr_t v6gw; 4204 in6_addr_t dst; 4205 ire_t *ire = NULL; 4206 ipif_t *src_ipif = NULL; 4207 ill_t *dst_ill = NULL; 4208 ire_t *sire = NULL; 4209 ire_t *save_ire; 4210 mblk_t *dlureq_mp; 4211 ip6_t *ip6h; 4212 int err = 0; 4213 mblk_t *first_mp; 4214 ipsec_out_t *io; 4215 ill_t *attach_ill = NULL; 4216 ushort_t ire_marks = 0; 4217 int match_flags; 4218 boolean_t ip6i_present; 4219 ire_t *first_sire = NULL; 4220 mblk_t *copy_mp = NULL; 4221 mblk_t *xmit_mp = NULL; 4222 in6_addr_t save_dst; 4223 uint32_t multirt_flags = 4224 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4225 boolean_t multirt_is_resolvable; 4226 boolean_t multirt_resolve_next; 4227 boolean_t need_rele = B_FALSE; 4228 boolean_t do_attach_ill = B_FALSE; 4229 boolean_t ip6_asp_table_held = B_FALSE; 4230 4231 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4232 4233 first_mp = mp; 4234 if (mp->b_datap->db_type == M_CTL) { 4235 mp = mp->b_cont; 4236 io = (ipsec_out_t *)first_mp->b_rptr; 4237 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4238 } else { 4239 io = NULL; 4240 } 4241 4242 /* 4243 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4244 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4245 * could be NULL. 4246 * 4247 * This information can appear either in an ip6i_t or an IPSEC_OUT 4248 * message. 4249 */ 4250 ip6h = (ip6_t *)mp->b_rptr; 4251 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4252 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4253 if (!ip6i_present || 4254 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4255 attach_ill = ip_grab_attach_ill(ill, first_mp, 4256 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4257 io->ipsec_out_ill_index), B_TRUE); 4258 /* Failure case frees things for us. */ 4259 if (attach_ill == NULL) 4260 return; 4261 4262 /* 4263 * Check if we need an ire that will not be 4264 * looked up by anybody else i.e. HIDDEN. 4265 */ 4266 if (ill_is_probeonly(attach_ill)) 4267 ire_marks = IRE_MARK_HIDDEN; 4268 } 4269 } 4270 4271 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4272 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4273 goto icmp_err_ret; 4274 } else if (IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4275 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4276 goto icmp_err_ret; 4277 } 4278 4279 /* 4280 * If this IRE is created for forwarding or it is not for 4281 * TCP traffic, mark it as temporary. 4282 * 4283 * Is it sufficient just to check the next header?? 4284 */ 4285 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4286 ire_marks |= IRE_MARK_TEMPORARY; 4287 4288 /* 4289 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4290 * chain until it gets the most specific information available. 4291 * For example, we know that there is no IRE_CACHE for this dest, 4292 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4293 * ire_ftable_lookup_v6 will look up the gateway, etc. 4294 */ 4295 4296 if (ill == NULL) { 4297 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4298 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE; 4299 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4300 NULL, &sire, zoneid, 0, match_flags); 4301 /* 4302 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4303 * in a NULL ill, but the packet could be a neighbor 4304 * solicitation/advertisment and could have a valid attach_ill. 4305 */ 4306 if (attach_ill != NULL) 4307 ill_refrele(attach_ill); 4308 } else { 4309 if (attach_ill != NULL) { 4310 /* 4311 * attach_ill is set only for communicating with 4312 * on-link hosts. So, don't look for DEFAULT. 4313 * ip_wput_v6 passes the right ill in this case and 4314 * hence we can assert. 4315 */ 4316 ASSERT(ill == attach_ill); 4317 ill_refrele(attach_ill); 4318 do_attach_ill = B_TRUE; 4319 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4320 } else { 4321 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4322 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4323 } 4324 match_flags |= MATCH_IRE_PARENT; 4325 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, ill->ill_ipif, 4326 &sire, zoneid, 0, match_flags); 4327 } 4328 4329 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4330 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4331 4332 if (zoneid == ALL_ZONES && ire != NULL) { 4333 /* 4334 * In the forwarding case, we can use a route from any zone 4335 * since we won't change the source address. We can easily 4336 * assert that the source address is already set when there's no 4337 * ip6_info header - otherwise we'd have to call pullupmsg(). 4338 */ 4339 ASSERT(ip6i_present || 4340 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4341 zoneid = ire->ire_zoneid; 4342 } 4343 4344 /* 4345 * We enter a loop that will be run only once in most cases. 4346 * The loop is re-entered in the case where the destination 4347 * can be reached through multiple RTF_MULTIRT-flagged routes. 4348 * The intention is to compute multiple routes to a single 4349 * destination in a single ip_newroute_v6 call. 4350 * The information is contained in sire->ire_flags. 4351 */ 4352 do { 4353 multirt_resolve_next = B_FALSE; 4354 4355 if (dst_ill != NULL) { 4356 ill_refrele(dst_ill); 4357 dst_ill = NULL; 4358 } 4359 if (src_ipif != NULL) { 4360 ipif_refrele(src_ipif); 4361 src_ipif = NULL; 4362 } 4363 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4364 ip3dbg(("ip_newroute_v6: starting new resolution " 4365 "with first_mp %p, tag %d\n", 4366 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4367 4368 /* 4369 * We check if there are trailing unresolved routes for 4370 * the destination contained in sire. 4371 */ 4372 multirt_is_resolvable = 4373 ire_multirt_lookup_v6(&ire, &sire, multirt_flags); 4374 4375 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4376 "ire %p, sire %p\n", 4377 multirt_is_resolvable, (void *)ire, (void *)sire)); 4378 4379 if (!multirt_is_resolvable) { 4380 /* 4381 * No more multirt routes to resolve; give up 4382 * (all routes resolved or no more resolvable 4383 * routes). 4384 */ 4385 if (ire != NULL) { 4386 ire_refrele(ire); 4387 ire = NULL; 4388 } 4389 } else { 4390 ASSERT(sire != NULL); 4391 ASSERT(ire != NULL); 4392 /* 4393 * We simply use first_sire as a flag that 4394 * indicates if a resolvable multirt route has 4395 * already been found during the preceding 4396 * loops. If it is not the case, we may have 4397 * to send an ICMP error to report that the 4398 * destination is unreachable. We do not 4399 * IRE_REFHOLD first_sire. 4400 */ 4401 if (first_sire == NULL) { 4402 first_sire = sire; 4403 } 4404 } 4405 } 4406 if ((ire == NULL) || (ire == sire)) { 4407 /* 4408 * either ire == NULL (the destination cannot be 4409 * resolved) or ire == sire (the gateway cannot be 4410 * resolved). At this point, there are no more routes 4411 * to resolve for the destination, thus we exit. 4412 */ 4413 if (ip_debug > 3) { 4414 /* ip2dbg */ 4415 pr_addr_dbg("ip_newroute_v6: " 4416 "can't resolve %s\n", AF_INET6, v6dstp); 4417 } 4418 ip3dbg(("ip_newroute_v6: " 4419 "ire %p, sire %p, first_sire %p\n", 4420 (void *)ire, (void *)sire, (void *)first_sire)); 4421 4422 if (sire != NULL) { 4423 ire_refrele(sire); 4424 sire = NULL; 4425 } 4426 4427 if (first_sire != NULL) { 4428 /* 4429 * At least one multirt route has been found 4430 * in the same ip_newroute() call; there is no 4431 * need to report an ICMP error. 4432 * first_sire was not IRE_REFHOLDed. 4433 */ 4434 MULTIRT_DEBUG_UNTAG(first_mp); 4435 freemsg(first_mp); 4436 return; 4437 } 4438 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4439 RTA_DST); 4440 goto icmp_err_ret; 4441 } 4442 4443 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4444 4445 /* 4446 * Verify that the returned IRE does not have either the 4447 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4448 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4449 */ 4450 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4451 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4452 goto icmp_err_ret; 4453 4454 /* 4455 * Increment the ire_ob_pkt_count field for ire if it is an 4456 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4457 * increment the same for the parent IRE, sire, if it is some 4458 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4459 * and HOST_REDIRECT). 4460 */ 4461 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4462 UPDATE_OB_PKT_COUNT(ire); 4463 ire->ire_last_used_time = lbolt; 4464 } 4465 4466 if (sire != NULL) { 4467 mutex_enter(&sire->ire_lock); 4468 v6gw = sire->ire_gateway_addr_v6; 4469 mutex_exit(&sire->ire_lock); 4470 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4471 IRE_INTERFACE)) == 0); 4472 UPDATE_OB_PKT_COUNT(sire); 4473 sire->ire_last_used_time = lbolt; 4474 } else { 4475 v6gw = ipv6_all_zeros; 4476 } 4477 4478 /* 4479 * We have a route to reach the destination. 4480 * 4481 * 1) If the interface is part of ill group, try to get a new 4482 * ill taking load spreading into account. 4483 * 4484 * 2) After selecting the ill, get a source address that might 4485 * create good inbound load spreading and that matches the 4486 * right scope. ipif_select_source_v6 does this for us. 4487 * 4488 * If the application specified the ill (ifindex), we still 4489 * load spread. Only if the packets needs to go out specifically 4490 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4491 * IPV6_BOUND_PIF we don't try to use a different ill for load 4492 * spreading. 4493 */ 4494 if (!do_attach_ill) { 4495 /* 4496 * If the interface belongs to an interface group, 4497 * make sure the next possible interface in the group 4498 * is used. This encourages load spreading among 4499 * peers in an interface group. However, in the case 4500 * of multirouting, load spreading is not used, as we 4501 * actually want to replicate outgoing packets through 4502 * particular interfaces. 4503 * 4504 * Note: While we pick a dst_ill we are really only 4505 * interested in the ill for load spreading. 4506 * The source ipif is determined by source address 4507 * selection below. 4508 */ 4509 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4510 dst_ill = ire->ire_ipif->ipif_ill; 4511 /* For uniformity do a refhold */ 4512 ill_refhold(dst_ill); 4513 } else { 4514 /* 4515 * If we are here trying to create an IRE_CACHE 4516 * for an offlink destination and have the 4517 * IRE_CACHE for the next hop and the latter is 4518 * using virtual IP source address selection i.e 4519 * it's ire->ire_ipif is pointing to a virtual 4520 * network interface (vni) then 4521 * ip_newroute_get_dst_ll() will return the vni 4522 * interface as the dst_ill. Since the vni is 4523 * virtual i.e not associated with any physical 4524 * interface, it cannot be the dst_ill, hence 4525 * in such a case call ip_newroute_get_dst_ll() 4526 * with the stq_ill instead of the ire_ipif ILL. 4527 * The function returns a refheld ill. 4528 */ 4529 if ((ire->ire_type == IRE_CACHE) && 4530 IS_VNI(ire->ire_ipif->ipif_ill)) 4531 dst_ill = ip_newroute_get_dst_ill_v6( 4532 ire->ire_stq->q_ptr); 4533 else 4534 dst_ill = ip_newroute_get_dst_ill_v6( 4535 ire->ire_ipif->ipif_ill); 4536 } 4537 if (dst_ill == NULL) { 4538 if (ip_debug > 2) { 4539 pr_addr_dbg("ip_newroute_v6 : no dst " 4540 "ill for dst %s\n", 4541 AF_INET6, v6dstp); 4542 } 4543 goto icmp_err_ret; 4544 } else if (dst_ill->ill_group == NULL && ill != NULL && 4545 dst_ill != ill) { 4546 /* 4547 * If "ill" is not part of any group, we should 4548 * have found a route matching "ill" as we 4549 * called ire_ftable_lookup_v6 with 4550 * MATCH_IRE_ILL_GROUP. 4551 * Rather than asserting when there is a 4552 * mismatch, we just drop the packet. 4553 */ 4554 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4555 "dst_ill %s ill %s\n", 4556 dst_ill->ill_name, 4557 ill->ill_name)); 4558 goto icmp_err_ret; 4559 } 4560 } else { 4561 dst_ill = ire->ire_ipif->ipif_ill; 4562 /* For uniformity do refhold */ 4563 ill_refhold(dst_ill); 4564 /* 4565 * We should have found a route matching ill as we 4566 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4567 * Rather than asserting, while there is a mismatch, 4568 * we just drop the packet. 4569 */ 4570 if (dst_ill != ill) { 4571 ip0dbg(("ip_newroute_v6: Packet dropped as " 4572 "IP6I_ATTACH_IF ill is %s, " 4573 "ire->ire_ipif->ipif_ill is %s\n", 4574 ill->ill_name, 4575 dst_ill->ill_name)); 4576 goto icmp_err_ret; 4577 } 4578 } 4579 /* 4580 * Pick a source address which matches the scope of the 4581 * destination address. 4582 * For RTF_SETSRC routes, the source address is imposed by the 4583 * parent ire (sire). 4584 */ 4585 ASSERT(src_ipif == NULL); 4586 if (ire->ire_type == IRE_IF_RESOLVER && 4587 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4588 ip6_asp_can_lookup()) { 4589 /* 4590 * The ire cache entry we're adding is for the 4591 * gateway itself. The source address in this case 4592 * is relative to the gateway's address. 4593 */ 4594 ip6_asp_table_held = B_TRUE; 4595 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4596 B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid); 4597 if (src_ipif != NULL) 4598 ire_marks |= IRE_MARK_USESRC_CHECK; 4599 } else { 4600 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4601 /* 4602 * Check that the ipif matching the requested 4603 * source address still exists. 4604 */ 4605 src_ipif = ipif_lookup_addr_v6( 4606 &sire->ire_src_addr_v6, NULL, zoneid, 4607 NULL, NULL, NULL, NULL); 4608 } 4609 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4610 ip6_asp_table_held = B_TRUE; 4611 src_ipif = ipif_select_source_v6(dst_ill, 4612 v6dstp, B_FALSE, IPV6_PREFER_SRC_DEFAULT, 4613 zoneid); 4614 if (src_ipif != NULL) 4615 ire_marks |= IRE_MARK_USESRC_CHECK; 4616 } 4617 } 4618 4619 if (src_ipif == NULL) { 4620 if (ip_debug > 2) { 4621 /* ip1dbg */ 4622 pr_addr_dbg("ip_newroute_v6: no src for " 4623 "dst %s\n, ", AF_INET6, v6dstp); 4624 printf("ip_newroute_v6: interface name %s\n", 4625 dst_ill->ill_name); 4626 } 4627 goto icmp_err_ret; 4628 } 4629 4630 if (ip_debug > 3) { 4631 /* ip2dbg */ 4632 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4633 AF_INET6, &v6gw); 4634 } 4635 ip2dbg(("\tire type %s (%d)\n", 4636 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4637 4638 /* 4639 * At this point in ip_newroute_v6(), ire is either the 4640 * IRE_CACHE of the next-hop gateway for an off-subnet 4641 * destination or an IRE_INTERFACE type that should be used 4642 * to resolve an on-subnet destination or an on-subnet 4643 * next-hop gateway. 4644 * 4645 * In the IRE_CACHE case, we have the following : 4646 * 4647 * 1) src_ipif - used for getting a source address. 4648 * 4649 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4650 * means packets using this IRE_CACHE will go out on dst_ill. 4651 * 4652 * 3) The IRE sire will point to the prefix that is the longest 4653 * matching route for the destination. These prefix types 4654 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST, and 4655 * IRE_HOST_REDIRECT. 4656 * 4657 * The newly created IRE_CACHE entry for the off-subnet 4658 * destination is tied to both the prefix route and the 4659 * interface route used to resolve the next-hop gateway 4660 * via the ire_phandle and ire_ihandle fields, respectively. 4661 * 4662 * In the IRE_INTERFACE case, we have the following : 4663 * 4664 * 1) src_ipif - used for getting a source address. 4665 * 4666 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4667 * means packets using the IRE_CACHE that we will build 4668 * here will go out on dst_ill. 4669 * 4670 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4671 * to be created will only be tied to the IRE_INTERFACE that 4672 * was derived from the ire_ihandle field. 4673 * 4674 * If sire is non-NULL, it means the destination is off-link 4675 * and we will first create the IRE_CACHE for the gateway. 4676 * Next time through ip_newroute_v6, we will create the 4677 * IRE_CACHE for the final destination as described above. 4678 */ 4679 save_ire = ire; 4680 switch (ire->ire_type) { 4681 case IRE_CACHE: { 4682 ire_t *ipif_ire; 4683 4684 ASSERT(sire != NULL); 4685 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4686 mutex_enter(&ire->ire_lock); 4687 v6gw = ire->ire_gateway_addr_v6; 4688 mutex_exit(&ire->ire_lock); 4689 } 4690 /* 4691 * We need 3 ire's to create a new cache ire for an 4692 * off-link destination from the cache ire of the 4693 * gateway. 4694 * 4695 * 1. The prefix ire 'sire' 4696 * 2. The cache ire of the gateway 'ire' 4697 * 3. The interface ire 'ipif_ire' 4698 * 4699 * We have (1) and (2). We lookup (3) below. 4700 * 4701 * If there is no interface route to the gateway, 4702 * it is a race condition, where we found the cache 4703 * but the inteface route has been deleted. 4704 */ 4705 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4706 if (ipif_ire == NULL) { 4707 ip1dbg(("ip_newroute_v6:" 4708 "ire_ihandle_lookup_offlink_v6 failed\n")); 4709 goto icmp_err_ret; 4710 } 4711 /* 4712 * Assume DL_UNITDATA_REQ is same for all physical 4713 * interfaces in the ifgrp. If it isn't, this code will 4714 * have to be seriously rewhacked to allow the 4715 * fastpath probing (such that I cache the link 4716 * header in the IRE_CACHE) to work over ifgrps. 4717 * We have what we need to build an IRE_CACHE. 4718 */ 4719 /* 4720 * Note: the new ire inherits RTF_SETSRC 4721 * and RTF_MULTIRT to propagate these flags from prefix 4722 * to cache. 4723 */ 4724 ire = ire_create_v6( 4725 v6dstp, /* dest address */ 4726 &ipv6_all_ones, /* mask */ 4727 &src_ipif->ipif_v6src_addr, /* source address */ 4728 &v6gw, /* gateway address */ 4729 &save_ire->ire_max_frag, 4730 NULL, /* Fast Path header */ 4731 dst_ill->ill_rq, /* recv-from queue */ 4732 dst_ill->ill_wq, /* send-to queue */ 4733 IRE_CACHE, 4734 NULL, 4735 src_ipif, 4736 &sire->ire_mask_v6, /* Parent mask */ 4737 sire->ire_phandle, /* Parent handle */ 4738 ipif_ire->ire_ihandle, /* Interface handle */ 4739 sire->ire_flags & /* flags if any */ 4740 (RTF_SETSRC | RTF_MULTIRT), 4741 &(sire->ire_uinfo)); 4742 4743 if (ire == NULL) { 4744 ire_refrele(save_ire); 4745 ire_refrele(ipif_ire); 4746 break; 4747 } 4748 ire->ire_marks |= ire_marks; 4749 4750 /* 4751 * Prevent sire and ipif_ire from getting deleted. The 4752 * newly created ire is tied to both of them via the 4753 * phandle and ihandle respectively. 4754 */ 4755 IRB_REFHOLD(sire->ire_bucket); 4756 /* Has it been removed already ? */ 4757 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4758 IRB_REFRELE(sire->ire_bucket); 4759 ire_refrele(ipif_ire); 4760 ire_refrele(save_ire); 4761 break; 4762 } 4763 4764 IRB_REFHOLD(ipif_ire->ire_bucket); 4765 /* Has it been removed already ? */ 4766 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4767 IRB_REFRELE(ipif_ire->ire_bucket); 4768 IRB_REFRELE(sire->ire_bucket); 4769 ire_refrele(ipif_ire); 4770 ire_refrele(save_ire); 4771 break; 4772 } 4773 4774 xmit_mp = first_mp; 4775 if (ire->ire_flags & RTF_MULTIRT) { 4776 copy_mp = copymsg(first_mp); 4777 if (copy_mp != NULL) { 4778 xmit_mp = copy_mp; 4779 MULTIRT_DEBUG_TAG(first_mp); 4780 } 4781 } 4782 ire_add_then_send(q, ire, xmit_mp); 4783 if (ip6_asp_table_held) { 4784 ip6_asp_table_refrele(); 4785 ip6_asp_table_held = B_FALSE; 4786 } 4787 ire_refrele(save_ire); 4788 4789 /* Assert that sire is not deleted yet. */ 4790 ASSERT(sire->ire_ptpn != NULL); 4791 IRB_REFRELE(sire->ire_bucket); 4792 4793 /* Assert that ipif_ire is not deleted yet. */ 4794 ASSERT(ipif_ire->ire_ptpn != NULL); 4795 IRB_REFRELE(ipif_ire->ire_bucket); 4796 ire_refrele(ipif_ire); 4797 4798 if (copy_mp != NULL) { 4799 /* 4800 * Search for the next unresolved 4801 * multirt route. 4802 */ 4803 copy_mp = NULL; 4804 ipif_ire = NULL; 4805 ire = NULL; 4806 /* re-enter the loop */ 4807 multirt_resolve_next = B_TRUE; 4808 continue; 4809 } 4810 ire_refrele(sire); 4811 ill_refrele(dst_ill); 4812 ipif_refrele(src_ipif); 4813 return; 4814 } 4815 case IRE_IF_NORESOLVER: 4816 /* 4817 * We have what we need to build an IRE_CACHE. 4818 * 4819 * Create a new dlureq_mp with the IPv6 gateway 4820 * address in destination address in the DLPI hdr 4821 * if the physical length is exactly 16 bytes. 4822 */ 4823 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 4824 const in6_addr_t *addr; 4825 4826 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4827 addr = &v6gw; 4828 else 4829 addr = v6dstp; 4830 4831 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 4832 dst_ill->ill_phys_addr_length, 4833 dst_ill->ill_sap, 4834 dst_ill->ill_sap_length); 4835 } else { 4836 dlureq_mp = ire->ire_dlureq_mp; 4837 } 4838 if (dlureq_mp == NULL) 4839 break; 4840 4841 /* 4842 * Note: the new ire inherits sire flags RTF_SETSRC 4843 * and RTF_MULTIRT to propagate those rules from prefix 4844 * to cache. 4845 */ 4846 ire = ire_create_v6( 4847 v6dstp, /* dest address */ 4848 &ipv6_all_ones, /* mask */ 4849 &src_ipif->ipif_v6src_addr, /* source address */ 4850 &v6gw, /* gateway address */ 4851 &save_ire->ire_max_frag, 4852 NULL, /* Fast Path header */ 4853 dst_ill->ill_rq, /* recv-from queue */ 4854 dst_ill->ill_wq, /* send-to queue */ 4855 IRE_CACHE, 4856 dlureq_mp, 4857 src_ipif, 4858 &save_ire->ire_mask_v6, /* Parent mask */ 4859 (sire != NULL) ? /* Parent handle */ 4860 sire->ire_phandle : 0, 4861 save_ire->ire_ihandle, /* Interface handle */ 4862 (sire != NULL) ? /* flags if any */ 4863 sire->ire_flags & 4864 (RTF_SETSRC | RTF_MULTIRT) : 0, 4865 &(save_ire->ire_uinfo)); 4866 4867 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 4868 freeb(dlureq_mp); 4869 4870 if (ire == NULL) { 4871 ire_refrele(save_ire); 4872 break; 4873 } 4874 4875 ire->ire_marks |= ire_marks; 4876 4877 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4878 dst = v6gw; 4879 else 4880 dst = *v6dstp; 4881 err = ndp_noresolver(dst_ill, &dst); 4882 if (err != 0) { 4883 ire_refrele(save_ire); 4884 break; 4885 } 4886 4887 /* Prevent save_ire from getting deleted */ 4888 IRB_REFHOLD(save_ire->ire_bucket); 4889 /* Has it been removed already ? */ 4890 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 4891 IRB_REFRELE(save_ire->ire_bucket); 4892 ire_refrele(save_ire); 4893 break; 4894 } 4895 4896 xmit_mp = first_mp; 4897 /* 4898 * In case of MULTIRT, a copy of the current packet 4899 * to send is made to further re-enter the 4900 * loop and attempt another route resolution 4901 */ 4902 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4903 copy_mp = copymsg(first_mp); 4904 if (copy_mp != NULL) { 4905 xmit_mp = copy_mp; 4906 MULTIRT_DEBUG_TAG(first_mp); 4907 } 4908 } 4909 ire_add_then_send(q, ire, xmit_mp); 4910 if (ip6_asp_table_held) { 4911 ip6_asp_table_refrele(); 4912 ip6_asp_table_held = B_FALSE; 4913 } 4914 4915 /* Assert that it is not deleted yet. */ 4916 ASSERT(save_ire->ire_ptpn != NULL); 4917 IRB_REFRELE(save_ire->ire_bucket); 4918 ire_refrele(save_ire); 4919 4920 if (copy_mp != NULL) { 4921 /* 4922 * If we found a (no)resolver, we ignore any 4923 * trailing top priority IRE_CACHE in 4924 * further loops. This ensures that we do not 4925 * omit any (no)resolver despite the priority 4926 * in this call. 4927 * IRE_CACHE, if any, will be processed 4928 * by another thread entering ip_newroute(), 4929 * (on resolver response, for example). 4930 * We use this to force multiple parallel 4931 * resolution as soon as a packet needs to be 4932 * sent. The result is, after one packet 4933 * emission all reachable routes are generally 4934 * resolved. 4935 * Otherwise, complete resolution of MULTIRT 4936 * routes would require several emissions as 4937 * side effect. 4938 */ 4939 multirt_flags &= ~MULTIRT_CACHEGW; 4940 4941 /* 4942 * Search for the next unresolved multirt 4943 * route. 4944 */ 4945 copy_mp = NULL; 4946 save_ire = NULL; 4947 ire = NULL; 4948 /* re-enter the loop */ 4949 multirt_resolve_next = B_TRUE; 4950 continue; 4951 } 4952 4953 /* Don't need sire anymore */ 4954 if (sire != NULL) 4955 ire_refrele(sire); 4956 ill_refrele(dst_ill); 4957 ipif_refrele(src_ipif); 4958 return; 4959 4960 case IRE_IF_RESOLVER: 4961 /* 4962 * We can't build an IRE_CACHE yet, but at least we 4963 * found a resolver that can help. 4964 */ 4965 dst = *v6dstp; 4966 /* 4967 * To be at this point in the code with a non-zero gw 4968 * means that dst is reachable through a gateway that 4969 * we have never resolved. By changing dst to the gw 4970 * addr we resolve the gateway first. When 4971 * ire_add_then_send() tries to put the IP dg to dst, 4972 * it will reenter ip_newroute() at which time we will 4973 * find the IRE_CACHE for the gw and create another 4974 * IRE_CACHE above (for dst itself). 4975 */ 4976 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4977 save_dst = dst; 4978 dst = v6gw; 4979 v6gw = ipv6_all_zeros; 4980 } 4981 if (dst_ill->ill_flags & ILLF_XRESOLV) { 4982 /* 4983 * Ask the external resolver to do its thing. 4984 * Make an mblk chain in the following form: 4985 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 4986 */ 4987 mblk_t *ire_mp; 4988 mblk_t *areq_mp; 4989 areq_t *areq; 4990 in6_addr_t *addrp; 4991 4992 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 4993 if (ip6_asp_table_held) { 4994 ip6_asp_table_refrele(); 4995 ip6_asp_table_held = B_FALSE; 4996 } 4997 ire = ire_create_mp_v6( 4998 &dst, /* dest address */ 4999 &ipv6_all_ones, /* mask */ 5000 &src_ipif->ipif_v6src_addr, 5001 /* source address */ 5002 &v6gw, /* gateway address */ 5003 NULL, /* Fast Path header */ 5004 dst_ill->ill_rq, /* recv-from queue */ 5005 dst_ill->ill_wq, /* send-to queue */ 5006 IRE_CACHE, 5007 NULL, 5008 src_ipif, 5009 &save_ire->ire_mask_v6, 5010 /* Parent mask */ 5011 0, 5012 save_ire->ire_ihandle, 5013 /* Interface handle */ 5014 0, /* flags if any */ 5015 &(save_ire->ire_uinfo)); 5016 5017 ire_refrele(save_ire); 5018 if (ire == NULL) { 5019 ip1dbg(("ip_newroute_v6:" 5020 "ire is NULL\n")); 5021 break; 5022 } 5023 if ((sire != NULL) && 5024 (sire->ire_flags & RTF_MULTIRT)) { 5025 /* 5026 * processing a copy of the packet to 5027 * send for further resolution loops 5028 */ 5029 copy_mp = copymsg(first_mp); 5030 if (copy_mp != NULL) 5031 MULTIRT_DEBUG_TAG(copy_mp); 5032 } 5033 ire->ire_marks |= ire_marks; 5034 ire_mp = ire->ire_mp; 5035 /* 5036 * Now create or find an nce for this interface. 5037 * The hw addr will need to to be set from 5038 * the reply to the AR_ENTRY_QUERY that 5039 * we're about to send. This will be done in 5040 * ire_add_v6(). 5041 */ 5042 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5043 switch (err) { 5044 case 0: 5045 /* 5046 * New cache entry created. 5047 * Break, then ask the external 5048 * resolver. 5049 */ 5050 break; 5051 case EINPROGRESS: 5052 /* 5053 * Resolution in progress; 5054 * packet has been queued by 5055 * ndp_resolver(). 5056 */ 5057 ire_delete(ire); 5058 ire = NULL; 5059 /* 5060 * Check if another multirt 5061 * route must be resolved. 5062 */ 5063 if (copy_mp != NULL) { 5064 /* 5065 * If we found a resolver, we 5066 * ignore any trailing top 5067 * priority IRE_CACHE in 5068 * further loops. The reason is 5069 * the same as for noresolver. 5070 */ 5071 multirt_flags &= 5072 ~MULTIRT_CACHEGW; 5073 /* 5074 * Search for the next 5075 * unresolved multirt route. 5076 */ 5077 first_mp = copy_mp; 5078 copy_mp = NULL; 5079 mp = first_mp; 5080 if (mp->b_datap->db_type == 5081 M_CTL) { 5082 mp = mp->b_cont; 5083 } 5084 ASSERT(sire != NULL); 5085 dst = save_dst; 5086 /* 5087 * re-enter the loop 5088 */ 5089 multirt_resolve_next = 5090 B_TRUE; 5091 continue; 5092 } 5093 5094 if (sire != NULL) 5095 ire_refrele(sire); 5096 ill_refrele(dst_ill); 5097 ipif_refrele(src_ipif); 5098 return; 5099 default: 5100 /* 5101 * Transient error; packet will be 5102 * freed. 5103 */ 5104 ire_delete(ire); 5105 ire = NULL; 5106 break; 5107 } 5108 if (err != 0) 5109 break; 5110 /* 5111 * Now set up the AR_ENTRY_QUERY and send it. 5112 */ 5113 areq_mp = ill_arp_alloc(dst_ill, 5114 (uchar_t *)&ipv6_areq_template, 5115 (caddr_t)&dst); 5116 if (areq_mp == NULL) { 5117 ip1dbg(("ip_newroute_v6:" 5118 "areq_mp is NULL\n")); 5119 freemsg(ire_mp); 5120 break; 5121 } 5122 areq = (areq_t *)areq_mp->b_rptr; 5123 addrp = (in6_addr_t *)((char *)areq + 5124 areq->areq_target_addr_offset); 5125 *addrp = dst; 5126 addrp = (in6_addr_t *)((char *)areq + 5127 areq->areq_sender_addr_offset); 5128 *addrp = src_ipif->ipif_v6src_addr; 5129 /* 5130 * link the chain, then send up to the resolver. 5131 */ 5132 linkb(areq_mp, ire_mp); 5133 linkb(areq_mp, mp); 5134 ip1dbg(("ip_newroute_v6:" 5135 "putnext to resolver\n")); 5136 putnext(dst_ill->ill_rq, areq_mp); 5137 /* 5138 * Check if another multirt route 5139 * must be resolved. 5140 */ 5141 ire = NULL; 5142 if (copy_mp != NULL) { 5143 /* 5144 * If we find a resolver, we ignore any 5145 * trailing top priority IRE_CACHE in 5146 * further loops. The reason is the 5147 * same as for noresolver. 5148 */ 5149 multirt_flags &= ~MULTIRT_CACHEGW; 5150 /* 5151 * Search for the next unresolved 5152 * multirt route. 5153 */ 5154 first_mp = copy_mp; 5155 copy_mp = NULL; 5156 mp = first_mp; 5157 if (mp->b_datap->db_type == M_CTL) { 5158 mp = mp->b_cont; 5159 } 5160 ASSERT(sire != NULL); 5161 dst = save_dst; 5162 /* 5163 * re-enter the loop 5164 */ 5165 multirt_resolve_next = B_TRUE; 5166 continue; 5167 } 5168 5169 if (sire != NULL) 5170 ire_refrele(sire); 5171 ill_refrele(dst_ill); 5172 ipif_refrele(src_ipif); 5173 return; 5174 } 5175 /* 5176 * Non-external resolver case. 5177 */ 5178 ire = ire_create_v6( 5179 &dst, /* dest address */ 5180 &ipv6_all_ones, /* mask */ 5181 &src_ipif->ipif_v6src_addr, /* source address */ 5182 &v6gw, /* gateway address */ 5183 &save_ire->ire_max_frag, 5184 NULL, /* Fast Path header */ 5185 dst_ill->ill_rq, /* recv-from queue */ 5186 dst_ill->ill_wq, /* send-to queue */ 5187 IRE_CACHE, 5188 NULL, 5189 src_ipif, 5190 &save_ire->ire_mask_v6, /* Parent mask */ 5191 0, 5192 save_ire->ire_ihandle, /* Interface handle */ 5193 0, /* flags if any */ 5194 &(save_ire->ire_uinfo)); 5195 5196 if (ire == NULL) { 5197 ire_refrele(save_ire); 5198 break; 5199 } 5200 5201 if ((sire != NULL) && 5202 (sire->ire_flags & RTF_MULTIRT)) { 5203 copy_mp = copymsg(first_mp); 5204 if (copy_mp != NULL) 5205 MULTIRT_DEBUG_TAG(copy_mp); 5206 } 5207 5208 ire->ire_marks |= ire_marks; 5209 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5210 switch (err) { 5211 case 0: 5212 /* Prevent save_ire from getting deleted */ 5213 IRB_REFHOLD(save_ire->ire_bucket); 5214 /* Has it been removed already ? */ 5215 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5216 IRB_REFRELE(save_ire->ire_bucket); 5217 ire_refrele(save_ire); 5218 break; 5219 } 5220 5221 /* 5222 * We have a resolved cache entry, 5223 * add in the IRE. 5224 */ 5225 ire_add_then_send(q, ire, first_mp); 5226 if (ip6_asp_table_held) { 5227 ip6_asp_table_refrele(); 5228 ip6_asp_table_held = B_FALSE; 5229 } 5230 5231 /* Assert that it is not deleted yet. */ 5232 ASSERT(save_ire->ire_ptpn != NULL); 5233 IRB_REFRELE(save_ire->ire_bucket); 5234 ire_refrele(save_ire); 5235 /* 5236 * Check if another multirt route 5237 * must be resolved. 5238 */ 5239 ire = NULL; 5240 if (copy_mp != NULL) { 5241 /* 5242 * If we find a resolver, we ignore any 5243 * trailing top priority IRE_CACHE in 5244 * further loops. The reason is the 5245 * same as for noresolver. 5246 */ 5247 multirt_flags &= ~MULTIRT_CACHEGW; 5248 /* 5249 * Search for the next unresolved 5250 * multirt route. 5251 */ 5252 first_mp = copy_mp; 5253 copy_mp = NULL; 5254 mp = first_mp; 5255 if (mp->b_datap->db_type == M_CTL) { 5256 mp = mp->b_cont; 5257 } 5258 ASSERT(sire != NULL); 5259 dst = save_dst; 5260 /* 5261 * re-enter the loop 5262 */ 5263 multirt_resolve_next = B_TRUE; 5264 continue; 5265 } 5266 5267 if (sire != NULL) 5268 ire_refrele(sire); 5269 ill_refrele(dst_ill); 5270 ipif_refrele(src_ipif); 5271 return; 5272 5273 case EINPROGRESS: 5274 /* 5275 * mp was consumed - presumably queued. 5276 * No need for ire, presumably resolution is 5277 * in progress, and ire will be added when the 5278 * address is resolved. 5279 */ 5280 if (ip6_asp_table_held) { 5281 ip6_asp_table_refrele(); 5282 ip6_asp_table_held = B_FALSE; 5283 } 5284 ASSERT(ire->ire_nce == NULL); 5285 ire_delete(ire); 5286 ire_refrele(save_ire); 5287 /* 5288 * Check if another multirt route 5289 * must be resolved. 5290 */ 5291 ire = NULL; 5292 if (copy_mp != NULL) { 5293 /* 5294 * If we find a resolver, we ignore any 5295 * trailing top priority IRE_CACHE in 5296 * further loops. The reason is the 5297 * same as for noresolver. 5298 */ 5299 multirt_flags &= ~MULTIRT_CACHEGW; 5300 /* 5301 * Search for the next unresolved 5302 * multirt route. 5303 */ 5304 first_mp = copy_mp; 5305 copy_mp = NULL; 5306 mp = first_mp; 5307 if (mp->b_datap->db_type == M_CTL) { 5308 mp = mp->b_cont; 5309 } 5310 ASSERT(sire != NULL); 5311 dst = save_dst; 5312 /* 5313 * re-enter the loop 5314 */ 5315 multirt_resolve_next = B_TRUE; 5316 continue; 5317 } 5318 if (sire != NULL) 5319 ire_refrele(sire); 5320 ill_refrele(dst_ill); 5321 ipif_refrele(src_ipif); 5322 return; 5323 default: 5324 /* Some transient error */ 5325 ASSERT(ire->ire_nce == NULL); 5326 ire_refrele(save_ire); 5327 break; 5328 } 5329 break; 5330 default: 5331 break; 5332 } 5333 if (ip6_asp_table_held) { 5334 ip6_asp_table_refrele(); 5335 ip6_asp_table_held = B_FALSE; 5336 } 5337 } while (multirt_resolve_next); 5338 5339 err_ret: 5340 ip1dbg(("ip_newroute_v6: dropped\n")); 5341 if (src_ipif != NULL) 5342 ipif_refrele(src_ipif); 5343 if (dst_ill != NULL) { 5344 need_rele = B_TRUE; 5345 ill = dst_ill; 5346 } 5347 if (ill != NULL) { 5348 if (mp->b_prev != NULL) { 5349 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 5350 } else { 5351 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 5352 } 5353 5354 if (need_rele) 5355 ill_refrele(ill); 5356 } else { 5357 if (mp->b_prev != NULL) { 5358 BUMP_MIB(&ip6_mib, ipv6InDiscards); 5359 } else { 5360 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 5361 } 5362 } 5363 /* Did this packet originate externally? */ 5364 if (mp->b_prev) { 5365 mp->b_next = NULL; 5366 mp->b_prev = NULL; 5367 } 5368 if (copy_mp != NULL) { 5369 MULTIRT_DEBUG_UNTAG(copy_mp); 5370 freemsg(copy_mp); 5371 } 5372 MULTIRT_DEBUG_UNTAG(first_mp); 5373 freemsg(first_mp); 5374 if (ire != NULL) 5375 ire_refrele(ire); 5376 if (sire != NULL) 5377 ire_refrele(sire); 5378 return; 5379 5380 icmp_err_ret: 5381 if (ip6_asp_table_held) 5382 ip6_asp_table_refrele(); 5383 if (src_ipif != NULL) 5384 ipif_refrele(src_ipif); 5385 if (dst_ill != NULL) { 5386 need_rele = B_TRUE; 5387 ill = dst_ill; 5388 } 5389 ip1dbg(("ip_newroute_v6: no route\n")); 5390 if (sire != NULL) 5391 ire_refrele(sire); 5392 /* 5393 * We need to set sire to NULL to avoid double freeing if we 5394 * ever goto err_ret from below. 5395 */ 5396 sire = NULL; 5397 ip6h = (ip6_t *)mp->b_rptr; 5398 /* Skip ip6i_t header if present */ 5399 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5400 /* Make sure the IPv6 header is present */ 5401 if ((mp->b_wptr - (uchar_t *)ip6h) < 5402 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5403 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5404 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5405 goto err_ret; 5406 } 5407 } 5408 mp->b_rptr += sizeof (ip6i_t); 5409 ip6h = (ip6_t *)mp->b_rptr; 5410 } 5411 /* Did this packet originate externally? */ 5412 if (mp->b_prev) { 5413 if (ill != NULL) { 5414 BUMP_MIB(ill->ill_ip6_mib, ipv6InNoRoutes); 5415 } else { 5416 BUMP_MIB(&ip6_mib, ipv6InNoRoutes); 5417 } 5418 mp->b_next = NULL; 5419 mp->b_prev = NULL; 5420 q = WR(q); 5421 } else { 5422 if (ill != NULL) { 5423 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 5424 } else { 5425 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 5426 } 5427 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5428 /* Failed */ 5429 if (copy_mp != NULL) { 5430 MULTIRT_DEBUG_UNTAG(copy_mp); 5431 freemsg(copy_mp); 5432 } 5433 MULTIRT_DEBUG_UNTAG(first_mp); 5434 freemsg(first_mp); 5435 if (ire != NULL) 5436 ire_refrele(ire); 5437 if (need_rele) 5438 ill_refrele(ill); 5439 return; 5440 } 5441 } 5442 5443 if (need_rele) 5444 ill_refrele(ill); 5445 5446 /* 5447 * At this point we will have ire only if RTF_BLACKHOLE 5448 * or RTF_REJECT flags are set on the IRE. It will not 5449 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5450 */ 5451 if (ire != NULL) { 5452 if (ire->ire_flags & RTF_BLACKHOLE) { 5453 ire_refrele(ire); 5454 if (copy_mp != NULL) { 5455 MULTIRT_DEBUG_UNTAG(copy_mp); 5456 freemsg(copy_mp); 5457 } 5458 MULTIRT_DEBUG_UNTAG(first_mp); 5459 freemsg(first_mp); 5460 return; 5461 } 5462 ire_refrele(ire); 5463 } 5464 if (ip_debug > 3) { 5465 /* ip2dbg */ 5466 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5467 AF_INET6, v6dstp); 5468 } 5469 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5470 B_FALSE, B_FALSE); 5471 } 5472 5473 /* 5474 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5475 * we need to send out a packet to a destination address for which we do not 5476 * have specific routing information. It is only used for multicast packets. 5477 * 5478 * If unspec_src we allow creating an IRE with source address zero. 5479 * ire_send_v6() will delete it after the packet is sent. 5480 */ 5481 void 5482 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5483 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5484 { 5485 ire_t *ire = NULL; 5486 ipif_t *src_ipif = NULL; 5487 int err = 0; 5488 ill_t *dst_ill = NULL; 5489 ire_t *save_ire; 5490 ushort_t ire_marks = 0; 5491 ipsec_out_t *io; 5492 ill_t *attach_ill = NULL; 5493 ill_t *ill; 5494 ip6_t *ip6h; 5495 mblk_t *first_mp; 5496 boolean_t ip6i_present; 5497 ire_t *fire = NULL; 5498 mblk_t *copy_mp = NULL; 5499 boolean_t multirt_resolve_next; 5500 in6_addr_t *v6dstp = &v6dst; 5501 boolean_t ipif_held = B_FALSE; 5502 boolean_t ill_held = B_FALSE; 5503 boolean_t ip6_asp_table_held = B_FALSE; 5504 5505 /* 5506 * This loop is run only once in most cases. 5507 * We loop to resolve further routes only when the destination 5508 * can be reached through multiple RTF_MULTIRT-flagged ires. 5509 */ 5510 do { 5511 multirt_resolve_next = B_FALSE; 5512 if (dst_ill != NULL) { 5513 ill_refrele(dst_ill); 5514 dst_ill = NULL; 5515 } 5516 5517 if (src_ipif != NULL) { 5518 ipif_refrele(src_ipif); 5519 src_ipif = NULL; 5520 } 5521 ASSERT(ipif != NULL); 5522 ill = ipif->ipif_ill; 5523 5524 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5525 if (ip_debug > 2) { 5526 /* ip1dbg */ 5527 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5528 AF_INET6, v6dstp); 5529 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5530 ill->ill_name, ipif->ipif_isv6); 5531 } 5532 5533 first_mp = mp; 5534 if (mp->b_datap->db_type == M_CTL) { 5535 mp = mp->b_cont; 5536 io = (ipsec_out_t *)first_mp->b_rptr; 5537 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5538 } else { 5539 io = NULL; 5540 } 5541 5542 /* 5543 * If the interface is a pt-pt interface we look for an 5544 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5545 * local_address and the pt-pt destination address. 5546 * Otherwise we just match the local address. 5547 */ 5548 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5549 goto err_ret; 5550 } 5551 /* 5552 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5553 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5554 * as it could be NULL. 5555 * 5556 * This information can appear either in an ip6i_t or an 5557 * IPSEC_OUT message. 5558 */ 5559 ip6h = (ip6_t *)mp->b_rptr; 5560 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5561 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5562 if (!ip6i_present || 5563 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5564 attach_ill = ip_grab_attach_ill(ill, first_mp, 5565 (ip6i_present ? 5566 ((ip6i_t *)ip6h)->ip6i_ifindex : 5567 io->ipsec_out_ill_index), B_TRUE); 5568 /* Failure case frees things for us. */ 5569 if (attach_ill == NULL) 5570 return; 5571 5572 /* 5573 * Check if we need an ire that will not be 5574 * looked up by anybody else i.e. HIDDEN. 5575 */ 5576 if (ill_is_probeonly(attach_ill)) 5577 ire_marks = IRE_MARK_HIDDEN; 5578 } 5579 } 5580 5581 /* 5582 * We check if an IRE_OFFSUBNET for the addr that goes through 5583 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5584 * RTF_MULTIRT flags must be honored. 5585 */ 5586 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5587 ip2dbg(("ip_newroute_ipif_v6: " 5588 "ipif_lookup_multi_ire_v6(" 5589 "ipif %p, dst %08x) = fire %p\n", 5590 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5591 (void *)fire)); 5592 5593 /* 5594 * If the application specified the ill (ifindex), we still 5595 * load spread. Only if the packets needs to go out specifically 5596 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5597 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5598 * multirouting, then we don't try to use a different ill for 5599 * load spreading. 5600 */ 5601 if (attach_ill == NULL) { 5602 /* 5603 * If the interface belongs to an interface group, 5604 * make sure the next possible interface in the group 5605 * is used. This encourages load spreading among peers 5606 * in an interface group. 5607 * 5608 * Note: While we pick a dst_ill we are really only 5609 * interested in the ill for load spreading. The source 5610 * ipif is determined by source address selection below. 5611 */ 5612 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5613 dst_ill = ipif->ipif_ill; 5614 /* For uniformity do a refhold */ 5615 ill_refhold(dst_ill); 5616 } else { 5617 /* refheld by ip_newroute_get_dst_ill_v6 */ 5618 dst_ill = 5619 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5620 } 5621 if (dst_ill == NULL) { 5622 if (ip_debug > 2) { 5623 pr_addr_dbg("ip_newroute_ipif_v6: " 5624 "no dst ill for dst %s\n", 5625 AF_INET6, v6dstp); 5626 } 5627 goto err_ret; 5628 } 5629 } else { 5630 dst_ill = ipif->ipif_ill; 5631 /* 5632 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5633 * and IPV6_BOUND_PIF case. 5634 */ 5635 ASSERT(dst_ill == attach_ill); 5636 /* attach_ill is already refheld */ 5637 } 5638 /* 5639 * Pick a source address which matches the scope of the 5640 * destination address. 5641 * For RTF_SETSRC routes, the source address is imposed by the 5642 * parent ire (fire). 5643 */ 5644 ASSERT(src_ipif == NULL); 5645 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5646 /* 5647 * Check that the ipif matching the requested source 5648 * address still exists. 5649 */ 5650 src_ipif = 5651 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5652 NULL, zoneid, NULL, NULL, NULL, NULL); 5653 } 5654 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5655 ip6_asp_table_held = B_TRUE; 5656 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5657 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5658 } 5659 5660 if (src_ipif == NULL) { 5661 if (!unspec_src) { 5662 if (ip_debug > 2) { 5663 /* ip1dbg */ 5664 pr_addr_dbg("ip_newroute_ipif_v6: " 5665 "no src for dst %s\n,", 5666 AF_INET6, v6dstp); 5667 printf(" through interface %s\n", 5668 dst_ill->ill_name); 5669 } 5670 goto err_ret; 5671 } 5672 /* Use any ipif for source */ 5673 for (src_ipif = dst_ill->ill_ipif; src_ipif != NULL; 5674 src_ipif = src_ipif->ipif_next) { 5675 if ((src_ipif->ipif_flags & IPIF_UP) && 5676 IN6_IS_ADDR_UNSPECIFIED( 5677 &src_ipif->ipif_v6src_addr)) 5678 break; 5679 } 5680 if (src_ipif == NULL) { 5681 if (ip_debug > 2) { 5682 /* ip1dbg */ 5683 pr_addr_dbg("ip_newroute_ipif_v6: " 5684 "no src for dst %s\n ", 5685 AF_INET6, v6dstp); 5686 printf("ip_newroute_ipif_v6: if %s" 5687 "(UNSPEC_SRC)\n", 5688 dst_ill->ill_name); 5689 } 5690 goto err_ret; 5691 } 5692 src_ipif = ipif; 5693 ipif_refhold(src_ipif); 5694 } 5695 ire = ipif_to_ire_v6(ipif); 5696 if (ire == NULL) { 5697 if (ip_debug > 2) { 5698 /* ip1dbg */ 5699 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5700 AF_INET6, &ipif->ipif_v6lcl_addr); 5701 printf("ip_newroute_ipif_v6: " 5702 "if %s\n", dst_ill->ill_name); 5703 } 5704 goto err_ret; 5705 } 5706 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5707 goto err_ret; 5708 5709 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5710 5711 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5712 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5713 if (ip_debug > 2) { 5714 /* ip1dbg */ 5715 pr_addr_dbg(" address %s\n", 5716 AF_INET6, &ire->ire_src_addr_v6); 5717 } 5718 save_ire = ire; 5719 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5720 (void *)ire, (void *)ipif)); 5721 5722 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5723 /* 5724 * an IRE_OFFSUBET was looked up 5725 * on that interface. 5726 * this ire has RTF_MULTIRT flag, 5727 * so the resolution loop 5728 * will be re-entered to resolve 5729 * additional routes on other 5730 * interfaces. For that purpose, 5731 * a copy of the packet is 5732 * made at this point. 5733 */ 5734 fire->ire_last_used_time = lbolt; 5735 copy_mp = copymsg(first_mp); 5736 if (copy_mp) { 5737 MULTIRT_DEBUG_TAG(copy_mp); 5738 } 5739 } 5740 5741 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5742 switch (ire->ire_type) { 5743 case IRE_IF_NORESOLVER: { 5744 /* We have what we need to build an IRE_CACHE. */ 5745 mblk_t *dlureq_mp; 5746 5747 /* 5748 * Create a new dlureq_mp with the 5749 * IPv6 gateway address in destination address in the 5750 * DLPI hdr if the physical length is exactly 16 bytes. 5751 */ 5752 ASSERT(dst_ill->ill_isv6); 5753 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5754 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5755 dst_ill->ill_phys_addr_length, 5756 dst_ill->ill_sap, 5757 dst_ill->ill_sap_length); 5758 } else { 5759 dlureq_mp = ire->ire_dlureq_mp; 5760 } 5761 5762 if (dlureq_mp == NULL) 5763 break; 5764 /* 5765 * The newly created ire will inherit the flags of the 5766 * parent ire, if any. 5767 */ 5768 ire = ire_create_v6( 5769 v6dstp, /* dest address */ 5770 &ipv6_all_ones, /* mask */ 5771 &src_ipif->ipif_v6src_addr, /* source address */ 5772 NULL, /* gateway address */ 5773 &save_ire->ire_max_frag, 5774 NULL, /* Fast Path header */ 5775 dst_ill->ill_rq, /* recv-from queue */ 5776 dst_ill->ill_wq, /* send-to queue */ 5777 IRE_CACHE, 5778 dlureq_mp, 5779 src_ipif, 5780 NULL, 5781 (fire != NULL) ? /* Parent handle */ 5782 fire->ire_phandle : 0, 5783 save_ire->ire_ihandle, /* Interface handle */ 5784 (fire != NULL) ? 5785 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5786 0, 5787 &ire_uinfo_null); 5788 5789 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 5790 freeb(dlureq_mp); 5791 5792 if (ire == NULL) { 5793 ire_refrele(save_ire); 5794 break; 5795 } 5796 5797 ire->ire_marks |= ire_marks; 5798 5799 err = ndp_noresolver(dst_ill, v6dstp); 5800 if (err != 0) { 5801 ire_refrele(save_ire); 5802 break; 5803 } 5804 5805 /* Prevent save_ire from getting deleted */ 5806 IRB_REFHOLD(save_ire->ire_bucket); 5807 /* Has it been removed already ? */ 5808 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5809 IRB_REFRELE(save_ire->ire_bucket); 5810 ire_refrele(save_ire); 5811 break; 5812 } 5813 5814 ire_add_then_send(q, ire, first_mp); 5815 if (ip6_asp_table_held) { 5816 ip6_asp_table_refrele(); 5817 ip6_asp_table_held = B_FALSE; 5818 } 5819 5820 /* Assert that it is not deleted yet. */ 5821 ASSERT(save_ire->ire_ptpn != NULL); 5822 IRB_REFRELE(save_ire->ire_bucket); 5823 ire_refrele(save_ire); 5824 if (fire != NULL) { 5825 ire_refrele(fire); 5826 fire = NULL; 5827 } 5828 5829 /* 5830 * The resolution loop is re-entered if we 5831 * actually are in a multirouting case. 5832 */ 5833 if (copy_mp != NULL) { 5834 boolean_t need_resolve = 5835 ire_multirt_need_resolve_v6(v6dstp); 5836 if (!need_resolve) { 5837 MULTIRT_DEBUG_UNTAG(copy_mp); 5838 freemsg(copy_mp); 5839 copy_mp = NULL; 5840 } else { 5841 /* 5842 * ipif_lookup_group_v6() calls 5843 * ire_lookup_multi_v6() that uses 5844 * ire_ftable_lookup_v6() to find 5845 * an IRE_INTERFACE for the group. 5846 * In the multirt case, 5847 * ire_lookup_multi_v6() then invokes 5848 * ire_multirt_lookup_v6() to find 5849 * the next resolvable ire. 5850 * As a result, we obtain a new 5851 * interface, derived from the 5852 * next ire. 5853 */ 5854 if (ipif_held) { 5855 ipif_refrele(ipif); 5856 ipif_held = B_FALSE; 5857 } 5858 ipif = ipif_lookup_group_v6(v6dstp, 5859 zoneid); 5860 ip2dbg(("ip_newroute_ipif: " 5861 "multirt dst %08x, ipif %p\n", 5862 ntohl(V4_PART_OF_V6((*v6dstp))), 5863 (void *)ipif)); 5864 if (ipif != NULL) { 5865 ipif_held = B_TRUE; 5866 mp = copy_mp; 5867 copy_mp = NULL; 5868 multirt_resolve_next = 5869 B_TRUE; 5870 continue; 5871 } else { 5872 freemsg(copy_mp); 5873 } 5874 } 5875 } 5876 ill_refrele(dst_ill); 5877 if (ipif_held) { 5878 ipif_refrele(ipif); 5879 ipif_held = B_FALSE; 5880 } 5881 if (src_ipif != NULL) 5882 ipif_refrele(src_ipif); 5883 return; 5884 } 5885 case IRE_IF_RESOLVER: { 5886 5887 ASSERT(dst_ill->ill_isv6); 5888 5889 /* 5890 * We obtain a partial IRE_CACHE which we will pass 5891 * along with the resolver query. When the response 5892 * comes back it will be there ready for us to add. 5893 */ 5894 /* 5895 * the newly created ire will inherit the flags of the 5896 * parent ire, if any. 5897 */ 5898 ire = ire_create_v6( 5899 v6dstp, /* dest address */ 5900 &ipv6_all_ones, /* mask */ 5901 &src_ipif->ipif_v6src_addr, /* source address */ 5902 NULL, /* gateway address */ 5903 &save_ire->ire_max_frag, 5904 NULL, /* Fast Path header */ 5905 dst_ill->ill_rq, /* recv-from queue */ 5906 dst_ill->ill_wq, /* send-to queue */ 5907 IRE_CACHE, 5908 NULL, 5909 src_ipif, 5910 NULL, 5911 (fire != NULL) ? /* Parent handle */ 5912 fire->ire_phandle : 0, 5913 save_ire->ire_ihandle, /* Interface handle */ 5914 (fire != NULL) ? 5915 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5916 0, 5917 &ire_uinfo_null); 5918 5919 if (ire == NULL) { 5920 ire_refrele(save_ire); 5921 break; 5922 } 5923 5924 ire->ire_marks |= ire_marks; 5925 5926 /* Resolve and add ire to the ctable */ 5927 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 5928 switch (err) { 5929 case 0: 5930 /* Prevent save_ire from getting deleted */ 5931 IRB_REFHOLD(save_ire->ire_bucket); 5932 /* Has it been removed already ? */ 5933 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5934 IRB_REFRELE(save_ire->ire_bucket); 5935 ire_refrele(save_ire); 5936 break; 5937 } 5938 /* 5939 * We have a resolved cache entry, 5940 * add in the IRE. 5941 */ 5942 ire_add_then_send(q, ire, first_mp); 5943 if (ip6_asp_table_held) { 5944 ip6_asp_table_refrele(); 5945 ip6_asp_table_held = B_FALSE; 5946 } 5947 5948 /* Assert that it is not deleted yet. */ 5949 ASSERT(save_ire->ire_ptpn != NULL); 5950 IRB_REFRELE(save_ire->ire_bucket); 5951 ire_refrele(save_ire); 5952 if (fire != NULL) { 5953 ire_refrele(fire); 5954 fire = NULL; 5955 } 5956 5957 /* 5958 * The resolution loop is re-entered if we 5959 * actually are in a multirouting case. 5960 */ 5961 if (copy_mp != NULL) { 5962 boolean_t need_resolve = 5963 ire_multirt_need_resolve_v6(v6dstp); 5964 if (!need_resolve) { 5965 MULTIRT_DEBUG_UNTAG(copy_mp); 5966 freemsg(copy_mp); 5967 copy_mp = NULL; 5968 } else { 5969 /* 5970 * ipif_lookup_group_v6() calls 5971 * ire_lookup_multi_v6() that 5972 * uses ire_ftable_lookup_v6() 5973 * to find an IRE_INTERFACE for 5974 * the group. In the multirt 5975 * case, ire_lookup_multi_v6() 5976 * then invokes 5977 * ire_multirt_lookup_v6() to 5978 * find the next resolvable ire. 5979 * As a result, we obtain a new 5980 * interface, derived from the 5981 * next ire. 5982 */ 5983 if (ipif_held) { 5984 ipif_refrele(ipif); 5985 ipif_held = B_FALSE; 5986 } 5987 ipif = ipif_lookup_group_v6( 5988 v6dstp, zoneid); 5989 ip2dbg(("ip_newroute_ipif: " 5990 "multirt dst %08x, " 5991 "ipif %p\n", 5992 ntohl(V4_PART_OF_V6( 5993 (*v6dstp))), 5994 (void *)ipif)); 5995 if (ipif != NULL) { 5996 ipif_held = B_TRUE; 5997 mp = copy_mp; 5998 copy_mp = NULL; 5999 multirt_resolve_next = 6000 B_TRUE; 6001 continue; 6002 } else { 6003 freemsg(copy_mp); 6004 } 6005 } 6006 } 6007 ill_refrele(dst_ill); 6008 if (ipif_held) { 6009 ipif_refrele(ipif); 6010 ipif_held = B_FALSE; 6011 } 6012 if (src_ipif != NULL) 6013 ipif_refrele(src_ipif); 6014 return; 6015 6016 case EINPROGRESS: 6017 /* 6018 * mp was consumed - presumably queued. 6019 * No need for ire, presumably resolution is 6020 * in progress, and ire will be added when the 6021 * address is resolved. 6022 */ 6023 if (ip6_asp_table_held) { 6024 ip6_asp_table_refrele(); 6025 ip6_asp_table_held = B_FALSE; 6026 } 6027 ire_delete(ire); 6028 ire_refrele(save_ire); 6029 if (fire != NULL) { 6030 ire_refrele(fire); 6031 fire = NULL; 6032 } 6033 6034 /* 6035 * The resolution loop is re-entered if we 6036 * actually are in a multirouting case. 6037 */ 6038 if (copy_mp != NULL) { 6039 boolean_t need_resolve = 6040 ire_multirt_need_resolve_v6(v6dstp); 6041 if (!need_resolve) { 6042 MULTIRT_DEBUG_UNTAG(copy_mp); 6043 freemsg(copy_mp); 6044 copy_mp = NULL; 6045 } else { 6046 /* 6047 * ipif_lookup_group_v6() calls 6048 * ire_lookup_multi_v6() that 6049 * uses ire_ftable_lookup_v6() 6050 * to find an IRE_INTERFACE for 6051 * the group. In the multirt 6052 * case, ire_lookup_multi_v6() 6053 * then invokes 6054 * ire_multirt_lookup_v6() to 6055 * find the next resolvable ire. 6056 * As a result, we obtain a new 6057 * interface, derived from the 6058 * next ire. 6059 */ 6060 if (ipif_held) { 6061 ipif_refrele(ipif); 6062 ipif_held = B_FALSE; 6063 } 6064 ipif = ipif_lookup_group_v6( 6065 v6dstp, zoneid); 6066 ip2dbg(("ip_newroute_ipif: " 6067 "multirt dst %08x, " 6068 "ipif %p\n", 6069 ntohl(V4_PART_OF_V6( 6070 (*v6dstp))), 6071 (void *)ipif)); 6072 if (ipif != NULL) { 6073 ipif_held = B_TRUE; 6074 mp = copy_mp; 6075 copy_mp = NULL; 6076 multirt_resolve_next = 6077 B_TRUE; 6078 continue; 6079 } else { 6080 freemsg(copy_mp); 6081 } 6082 } 6083 } 6084 ill_refrele(dst_ill); 6085 if (ipif_held) { 6086 ipif_refrele(ipif); 6087 ipif_held = B_FALSE; 6088 } 6089 if (src_ipif != NULL) 6090 ipif_refrele(src_ipif); 6091 return; 6092 default: 6093 /* Some transient error */ 6094 ire_refrele(save_ire); 6095 break; 6096 } 6097 break; 6098 } 6099 default: 6100 break; 6101 } 6102 if (ip6_asp_table_held) { 6103 ip6_asp_table_refrele(); 6104 ip6_asp_table_held = B_FALSE; 6105 } 6106 } while (multirt_resolve_next); 6107 6108 err_ret: 6109 if (ip6_asp_table_held) 6110 ip6_asp_table_refrele(); 6111 if (ire != NULL) 6112 ire_refrele(ire); 6113 if (fire != NULL) 6114 ire_refrele(fire); 6115 if (ipif != NULL && ipif_held) 6116 ipif_refrele(ipif); 6117 if (src_ipif != NULL) 6118 ipif_refrele(src_ipif); 6119 /* Multicast - no point in trying to generate ICMP error */ 6120 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6121 if (dst_ill != NULL) { 6122 ill = dst_ill; 6123 ill_held = B_TRUE; 6124 } 6125 if (mp->b_prev || mp->b_next) { 6126 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6127 } else { 6128 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 6129 } 6130 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6131 mp->b_next = NULL; 6132 mp->b_prev = NULL; 6133 freemsg(first_mp); 6134 if (ill_held) 6135 ill_refrele(ill); 6136 } 6137 6138 /* 6139 * Parse and process any hop-by-hop or destination options. 6140 * 6141 * Assumes that q is an ill read queue so that ICMP errors for link-local 6142 * destinations are sent out the correct interface. 6143 * 6144 * Returns -1 if there was an error and mp has been consumed. 6145 * Returns 0 if no special action is needed. 6146 * Returns 1 if the packet contained a router alert option for this node 6147 * which is verified to be "interesting/known" for our implementation. 6148 * 6149 * XXX Note: In future as more hbh or dest options are defined, 6150 * it may be better to have different routines for hbh and dest 6151 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6152 * may have same value in different namespaces. Or is it same namespace ?? 6153 * Current code checks for each opt_type (other than pads) if it is in 6154 * the expected nexthdr (hbh or dest) 6155 */ 6156 static int 6157 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6158 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6159 { 6160 uint8_t opt_type; 6161 uint_t optused; 6162 int ret = 0; 6163 mblk_t *first_mp; 6164 6165 first_mp = mp; 6166 if (mp->b_datap->db_type == M_CTL) { 6167 mp = mp->b_cont; 6168 } 6169 6170 while (optlen != 0) { 6171 opt_type = *optptr; 6172 if (opt_type == IP6OPT_PAD1) { 6173 optused = 1; 6174 } else { 6175 if (optlen < 2) 6176 goto bad_opt; 6177 switch (opt_type) { 6178 case IP6OPT_PADN: 6179 /* 6180 * Note:We don't verify that (N-2) pad octets 6181 * are zero as required by spec. Adhere to 6182 * "be liberal in what you accept..." part of 6183 * implementation philosophy (RFC791,RFC1122) 6184 */ 6185 optused = 2 + optptr[1]; 6186 if (optused > optlen) 6187 goto bad_opt; 6188 break; 6189 6190 case IP6OPT_JUMBO: 6191 if (hdr_type != IPPROTO_HOPOPTS) 6192 goto opt_error; 6193 goto opt_error; /* XXX Not implemented! */ 6194 6195 case IP6OPT_ROUTER_ALERT: { 6196 struct ip6_opt_router *or; 6197 6198 if (hdr_type != IPPROTO_HOPOPTS) 6199 goto opt_error; 6200 optused = 2 + optptr[1]; 6201 if (optused > optlen) 6202 goto bad_opt; 6203 or = (struct ip6_opt_router *)optptr; 6204 /* Check total length and alignment */ 6205 if (optused != sizeof (*or) || 6206 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6207 goto opt_error; 6208 /* Check value */ 6209 switch (*((uint16_t *)or->ip6or_value)) { 6210 case IP6_ALERT_MLD: 6211 case IP6_ALERT_RSVP: 6212 ret = 1; 6213 } 6214 break; 6215 } 6216 case IP6OPT_HOME_ADDRESS: { 6217 /* 6218 * Minimal support for the home address option 6219 * (which is required by all IPv6 nodes). 6220 * Implement by just swapping the home address 6221 * and source address. 6222 * XXX Note: this has IPsec implications since 6223 * AH needs to take this into account. 6224 * Also, when IPsec is used we need to ensure 6225 * that this is only processed once 6226 * in the received packet (to avoid swapping 6227 * back and forth). 6228 * NOTE:This option processing is considered 6229 * to be unsafe and prone to a denial of 6230 * service attack. 6231 * The current processing is not safe even with 6232 * IPsec secured IP packets. Since the home 6233 * address option processing requirement still 6234 * is in the IETF draft and in the process of 6235 * being redefined for its usage, it has been 6236 * decided to turn off the option by default. 6237 * If this section of code needs to be executed, 6238 * ndd variable ip6_ignore_home_address_opt 6239 * should be set to 0 at the user's own risk. 6240 */ 6241 struct ip6_opt_home_address *oh; 6242 in6_addr_t tmp; 6243 6244 if (ipv6_ignore_home_address_opt) 6245 goto opt_error; 6246 6247 if (hdr_type != IPPROTO_DSTOPTS) 6248 goto opt_error; 6249 optused = 2 + optptr[1]; 6250 if (optused > optlen) 6251 goto bad_opt; 6252 6253 /* 6254 * We did this dest. opt the first time 6255 * around (i.e. before AH processing). 6256 * If we've done AH... stop now. 6257 */ 6258 if (first_mp != mp) { 6259 ipsec_in_t *ii; 6260 6261 ii = (ipsec_in_t *)first_mp->b_rptr; 6262 if (ii->ipsec_in_ah_sa != NULL) 6263 break; 6264 } 6265 6266 oh = (struct ip6_opt_home_address *)optptr; 6267 /* Check total length and alignment */ 6268 if (optused < sizeof (*oh) || 6269 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6270 goto opt_error; 6271 /* Swap ip6_src and the home address */ 6272 tmp = ip6h->ip6_src; 6273 /* XXX Note: only 8 byte alignment option */ 6274 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6275 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6276 break; 6277 } 6278 6279 case IP6OPT_TUNNEL_LIMIT: 6280 if (hdr_type != IPPROTO_DSTOPTS) { 6281 goto opt_error; 6282 } 6283 optused = 2 + optptr[1]; 6284 if (optused > optlen) { 6285 goto bad_opt; 6286 } 6287 if (optused != 3) { 6288 goto opt_error; 6289 } 6290 break; 6291 6292 default: 6293 opt_error: 6294 ip1dbg(("ip_process_options_v6: bad opt 0x%x\n", 6295 opt_type)); 6296 switch (IP6OPT_TYPE(opt_type)) { 6297 case IP6OPT_TYPE_SKIP: 6298 optused = 2 + optptr[1]; 6299 if (optused > optlen) 6300 goto bad_opt; 6301 break; 6302 case IP6OPT_TYPE_DISCARD: 6303 freemsg(first_mp); 6304 return (-1); 6305 case IP6OPT_TYPE_ICMP: 6306 icmp_param_problem_v6(WR(q), first_mp, 6307 ICMP6_PARAMPROB_OPTION, 6308 (uint32_t)(optptr - 6309 (uint8_t *)ip6h), 6310 B_FALSE, B_FALSE); 6311 return (-1); 6312 case IP6OPT_TYPE_FORCEICMP: 6313 icmp_param_problem_v6(WR(q), first_mp, 6314 ICMP6_PARAMPROB_OPTION, 6315 (uint32_t)(optptr - 6316 (uint8_t *)ip6h), 6317 B_FALSE, B_TRUE); 6318 return (-1); 6319 } 6320 } 6321 } 6322 optlen -= optused; 6323 optptr += optused; 6324 } 6325 return (ret); 6326 6327 bad_opt: 6328 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6329 (uint32_t)(optptr - (uint8_t *)ip6h), 6330 B_FALSE, B_FALSE); 6331 return (-1); 6332 } 6333 6334 /* 6335 * Process a routing header that is not yet empty. 6336 * Only handles type 0 routing headers. 6337 */ 6338 static void 6339 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6340 ill_t *ill, uint_t flags, mblk_t *hada_mp) 6341 { 6342 ip6_rthdr0_t *rthdr; 6343 uint_t ehdrlen; 6344 uint_t numaddr; 6345 in6_addr_t *addrptr; 6346 in6_addr_t tmp; 6347 6348 ASSERT(rth->ip6r_segleft != 0); 6349 6350 if (!ipv6_forward_src_routed) { 6351 /* XXX Check for source routed out same interface? */ 6352 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 6353 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 6354 freemsg(hada_mp); 6355 freemsg(mp); 6356 return; 6357 } 6358 6359 if (rth->ip6r_type != 0) { 6360 if (hada_mp != NULL) 6361 goto hada_drop; 6362 icmp_param_problem_v6(WR(q), mp, 6363 ICMP6_PARAMPROB_HEADER, 6364 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6365 B_FALSE, B_FALSE); 6366 return; 6367 } 6368 rthdr = (ip6_rthdr0_t *)rth; 6369 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6370 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6371 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6372 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6373 if (rthdr->ip6r0_len & 0x1) { 6374 /* An odd length is impossible */ 6375 if (hada_mp != NULL) 6376 goto hada_drop; 6377 icmp_param_problem_v6(WR(q), mp, 6378 ICMP6_PARAMPROB_HEADER, 6379 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6380 B_FALSE, B_FALSE); 6381 return; 6382 } 6383 numaddr = rthdr->ip6r0_len / 2; 6384 if (rthdr->ip6r0_segleft > numaddr) { 6385 /* segleft exceeds number of addresses in routing header */ 6386 if (hada_mp != NULL) 6387 goto hada_drop; 6388 icmp_param_problem_v6(WR(q), mp, 6389 ICMP6_PARAMPROB_HEADER, 6390 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6391 (uchar_t *)ip6h), 6392 B_FALSE, B_FALSE); 6393 return; 6394 } 6395 addrptr += (numaddr - rthdr->ip6r0_segleft); 6396 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6397 IN6_IS_ADDR_MULTICAST(addrptr)) { 6398 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6399 freemsg(hada_mp); 6400 freemsg(mp); 6401 return; 6402 } 6403 /* Swap */ 6404 tmp = *addrptr; 6405 *addrptr = ip6h->ip6_dst; 6406 ip6h->ip6_dst = tmp; 6407 rthdr->ip6r0_segleft--; 6408 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6409 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6410 if (hada_mp != NULL) 6411 goto hada_drop; 6412 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6413 B_FALSE, B_FALSE); 6414 return; 6415 } 6416 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); 6417 return; 6418 hada_drop: 6419 /* IPsec kstats: bean counter? */ 6420 freemsg(hada_mp); 6421 freemsg(mp); 6422 } 6423 6424 /* 6425 * Read side put procedure for IPv6 module. 6426 */ 6427 static void 6428 ip_rput_v6(queue_t *q, mblk_t *mp) 6429 { 6430 mblk_t *mp1, *first_mp, *hada_mp = NULL; 6431 ip6_t *ip6h; 6432 boolean_t ll_multicast = B_FALSE, mctl_present = B_FALSE; 6433 ill_t *ill; 6434 struct iocblk *iocp; 6435 uint_t flags = 0; 6436 6437 ill = (ill_t *)q->q_ptr; 6438 if (ill->ill_state_flags & ILL_CONDEMNED) { 6439 union DL_primitives *dl; 6440 6441 dl = (union DL_primitives *)mp->b_rptr; 6442 /* 6443 * Things are opening or closing - only accept DLPI 6444 * ack messages. If the stream is closing and ip_wsrv 6445 * has completed, ip_close is out of the qwait, but has 6446 * not yet completed qprocsoff. Don't proceed any further 6447 * because the ill has been cleaned up and things hanging 6448 * off the ill have been freed. 6449 */ 6450 if ((mp->b_datap->db_type != M_PCPROTO) || 6451 (dl->dl_primitive == DL_UNITDATA_IND)) { 6452 ip_ioctl_freemsg(mp); 6453 return; 6454 } 6455 } 6456 6457 switch (mp->b_datap->db_type) { 6458 case M_DATA: 6459 break; 6460 6461 case M_PROTO: 6462 case M_PCPROTO: 6463 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6464 DL_UNITDATA_IND) { 6465 /* Go handle anything other than data elsewhere. */ 6466 ip_rput_dlpi(q, mp); 6467 return; 6468 } 6469 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6470 ll_multicast = dlur->dl_group_address; 6471 #undef dlur 6472 /* Ditch the DLPI header. */ 6473 mp1 = mp; 6474 mp = mp->b_cont; 6475 freeb(mp1); 6476 break; 6477 case M_BREAK: 6478 panic("ip_rput_v6: got an M_BREAK"); 6479 /*NOTREACHED*/ 6480 case M_IOCACK: 6481 iocp = (struct iocblk *)mp->b_rptr; 6482 switch (iocp->ioc_cmd) { 6483 case DL_IOC_HDR_INFO: 6484 ill = (ill_t *)q->q_ptr; 6485 ill_fastpath_ack(ill, mp); 6486 return; 6487 case SIOCSTUNPARAM: 6488 case SIOCGTUNPARAM: 6489 case OSIOCSTUNPARAM: 6490 case OSIOCGTUNPARAM: 6491 /* Go through qwriter */ 6492 break; 6493 default: 6494 putnext(q, mp); 6495 return; 6496 } 6497 /* FALLTHRU */ 6498 case M_ERROR: 6499 case M_HANGUP: 6500 mutex_enter(&ill->ill_lock); 6501 if (ill->ill_state_flags & ILL_CONDEMNED) { 6502 mutex_exit(&ill->ill_lock); 6503 freemsg(mp); 6504 return; 6505 } 6506 ill_refhold_locked(ill); 6507 mutex_exit(&ill->ill_lock); 6508 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6509 return; 6510 case M_CTL: { 6511 /* EXPORT DELETE START */ 6512 if ((MBLKL(mp) > sizeof (int)) && 6513 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6514 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6515 mctl_present = B_TRUE; 6516 break; 6517 } 6518 /* EXPORT DELETE END */ 6519 putnext(q, mp); 6520 return; 6521 } 6522 case M_IOCNAK: 6523 iocp = (struct iocblk *)mp->b_rptr; 6524 switch (iocp->ioc_cmd) { 6525 case DL_IOC_HDR_INFO: 6526 case SIOCSTUNPARAM: 6527 case SIOCGTUNPARAM: 6528 case OSIOCSTUNPARAM: 6529 case OSIOCGTUNPARAM: 6530 mutex_enter(&ill->ill_lock); 6531 if (ill->ill_state_flags & ILL_CONDEMNED) { 6532 mutex_exit(&ill->ill_lock); 6533 freemsg(mp); 6534 return; 6535 } 6536 ill_refhold_locked(ill); 6537 mutex_exit(&ill->ill_lock); 6538 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6539 B_FALSE); 6540 return; 6541 default: 6542 break; 6543 } 6544 /* FALLTHRU */ 6545 default: 6546 putnext(q, mp); 6547 return; 6548 } 6549 6550 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 6551 /* 6552 * if db_ref > 1 then copymsg and free original. Packet may be 6553 * changed and do not want other entity who has a reference to this 6554 * message to trip over the changes. This is a blind change because 6555 * trying to catch all places that might change packet is too 6556 * difficult (since it may be a module above this one). 6557 */ 6558 if (mp->b_datap->db_ref > 1) { 6559 mblk_t *mp1; 6560 6561 mp1 = copymsg(mp); 6562 freemsg(mp); 6563 if (mp1 == NULL) { 6564 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6565 return; 6566 } 6567 mp = mp1; 6568 } 6569 first_mp = mp; 6570 if (mctl_present) { 6571 hada_mp = first_mp; 6572 mp = first_mp->b_cont; 6573 } 6574 6575 ip6h = (ip6_t *)mp->b_rptr; 6576 6577 /* check for alignment and full IPv6 header */ 6578 if (!OK_32PTR((uchar_t *)ip6h) || 6579 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6580 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6581 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6582 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6583 freemsg(first_mp); 6584 return; 6585 } 6586 ip6h = (ip6_t *)mp->b_rptr; 6587 } 6588 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6589 IPV6_DEFAULT_VERS_AND_FLOW) { 6590 /* 6591 * It may be a bit too expensive to do this mapped address 6592 * check here, but in the interest of robustness, it seems 6593 * like the correct place. 6594 * TODO: Avoid this check for e.g. connected TCP sockets 6595 */ 6596 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6597 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6598 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6599 freemsg(first_mp); 6600 return; 6601 } 6602 6603 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6604 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6605 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6606 freemsg(first_mp); 6607 return; 6608 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6609 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6610 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6611 freemsg(first_mp); 6612 return; 6613 } 6614 6615 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6616 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); 6617 } else { 6618 BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4); 6619 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6620 freemsg(first_mp); 6621 } 6622 } 6623 6624 /* 6625 * Walk through the IPv6 packet in mp and see if there's an AH header 6626 * in it. See if the AH header needs to get done before other headers in 6627 * the packet. (Worker function for ipsec_early_ah_v6().) 6628 */ 6629 #define IPSEC_HDR_DONT_PROCESS 0 6630 #define IPSEC_HDR_PROCESS 1 6631 #define IPSEC_MEMORY_ERROR 2 6632 static int 6633 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6634 { 6635 uint_t length; 6636 uint_t ehdrlen; 6637 uint8_t *whereptr; 6638 uint8_t *endptr; 6639 uint8_t *nexthdrp; 6640 ip6_dest_t *desthdr; 6641 ip6_rthdr_t *rthdr; 6642 ip6_t *ip6h; 6643 6644 /* 6645 * For now just pullup everything. In general, the less pullups, 6646 * the better, but there's so much squirrelling through anyway, 6647 * it's just easier this way. 6648 */ 6649 if (!pullupmsg(mp, -1)) { 6650 return (IPSEC_MEMORY_ERROR); 6651 } 6652 6653 ip6h = (ip6_t *)mp->b_rptr; 6654 length = IPV6_HDR_LEN; 6655 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6656 endptr = mp->b_wptr; 6657 6658 /* 6659 * We can't just use the argument nexthdr in the place 6660 * of nexthdrp becaue we don't dereference nexthdrp 6661 * till we confirm whether it is a valid address. 6662 */ 6663 nexthdrp = &ip6h->ip6_nxt; 6664 while (whereptr < endptr) { 6665 /* Is there enough left for len + nexthdr? */ 6666 if (whereptr + MIN_EHDR_LEN > endptr) 6667 return (IPSEC_MEMORY_ERROR); 6668 6669 switch (*nexthdrp) { 6670 case IPPROTO_HOPOPTS: 6671 case IPPROTO_DSTOPTS: 6672 /* Assumes the headers are identical for hbh and dst */ 6673 desthdr = (ip6_dest_t *)whereptr; 6674 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6675 if ((uchar_t *)desthdr + ehdrlen > endptr) 6676 return (IPSEC_MEMORY_ERROR); 6677 /* 6678 * Return DONT_PROCESS because of potential Mobile IPv6 6679 * cruft for destination options. 6680 */ 6681 if (*nexthdrp == IPPROTO_DSTOPTS) 6682 return (IPSEC_HDR_DONT_PROCESS); 6683 nexthdrp = &desthdr->ip6d_nxt; 6684 break; 6685 case IPPROTO_ROUTING: 6686 rthdr = (ip6_rthdr_t *)whereptr; 6687 6688 /* 6689 * If there's more hops left on the routing header, 6690 * return now with DON'T PROCESS. 6691 */ 6692 if (rthdr->ip6r_segleft > 0) 6693 return (IPSEC_HDR_DONT_PROCESS); 6694 6695 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6696 if ((uchar_t *)rthdr + ehdrlen > endptr) 6697 return (IPSEC_MEMORY_ERROR); 6698 nexthdrp = &rthdr->ip6r_nxt; 6699 break; 6700 case IPPROTO_FRAGMENT: 6701 /* Wait for reassembly */ 6702 return (IPSEC_HDR_DONT_PROCESS); 6703 case IPPROTO_AH: 6704 *nexthdr = IPPROTO_AH; 6705 return (IPSEC_HDR_PROCESS); 6706 case IPPROTO_NONE: 6707 /* No next header means we're finished */ 6708 default: 6709 return (IPSEC_HDR_DONT_PROCESS); 6710 } 6711 length += ehdrlen; 6712 whereptr += ehdrlen; 6713 } 6714 panic("ipsec_needs_processing_v6"); 6715 /*NOTREACHED*/ 6716 } 6717 6718 /* 6719 * Path for AH if options are present. If this is the first time we are 6720 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6721 * Otherwise, just fanout. Return value answers the boolean question: 6722 * "Did I consume the mblk you sent me?" 6723 * 6724 * Sometimes AH needs to be done before other IPv6 headers for security 6725 * reasons. This function (and its ipsec_needs_processing_v6() above) 6726 * indicates if that is so, and fans out to the appropriate IPsec protocol 6727 * for the datagram passed in. 6728 */ 6729 static boolean_t 6730 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6731 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 6732 { 6733 mblk_t *mp; 6734 uint8_t nexthdr; 6735 ipsec_in_t *ii = NULL; 6736 ah_t *ah; 6737 ipsec_status_t ipsec_rc; 6738 6739 ASSERT((hada_mp == NULL) || (!mctl_present)); 6740 6741 switch (ipsec_needs_processing_v6( 6742 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6743 case IPSEC_MEMORY_ERROR: 6744 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6745 freemsg(hada_mp); 6746 freemsg(first_mp); 6747 return (B_TRUE); 6748 case IPSEC_HDR_DONT_PROCESS: 6749 return (B_FALSE); 6750 } 6751 6752 /* Default means send it to AH! */ 6753 ASSERT(nexthdr == IPPROTO_AH); 6754 if (!mctl_present) { 6755 mp = first_mp; 6756 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 6757 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6758 "allocation failure.\n")); 6759 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6760 freemsg(hada_mp); 6761 freemsg(mp); 6762 return (B_TRUE); 6763 } 6764 /* 6765 * Store the ill_index so that when we come back 6766 * from IPSEC we ride on the same queue. 6767 */ 6768 ii = (ipsec_in_t *)first_mp->b_rptr; 6769 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 6770 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 6771 first_mp->b_cont = mp; 6772 } 6773 /* 6774 * Cache hardware acceleration info. 6775 */ 6776 if (hada_mp != NULL) { 6777 ASSERT(ii != NULL); 6778 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 6779 "caching data attr.\n")); 6780 ii->ipsec_in_accelerated = B_TRUE; 6781 ii->ipsec_in_da = hada_mp; 6782 } 6783 6784 if (!ipsec_loaded()) { 6785 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 6786 return (B_TRUE); 6787 } 6788 6789 ah = ipsec_inbound_ah_sa(first_mp); 6790 if (ah == NULL) 6791 return (B_TRUE); 6792 ASSERT(ii->ipsec_in_ah_sa != NULL); 6793 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 6794 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 6795 6796 switch (ipsec_rc) { 6797 case IPSEC_STATUS_SUCCESS: 6798 /* we're done with IPsec processing, send it up */ 6799 ip_fanout_proto_again(first_mp, ill, ill, ire); 6800 break; 6801 case IPSEC_STATUS_FAILED: 6802 BUMP_MIB(&ip6_mib, ipv6InDiscards); 6803 break; 6804 case IPSEC_STATUS_PENDING: 6805 /* no action needed */ 6806 break; 6807 } 6808 return (B_TRUE); 6809 } 6810 6811 /* 6812 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 6813 * ip_rput_v6 has already verified alignment, the min length, the version, 6814 * and db_ref = 1. 6815 * 6816 * The ill passed in (the arg named inill) is the ill that the packet 6817 * actually arrived on. We need to remember this when saving the 6818 * input interface index into potential IPV6_PKTINFO data in 6819 * ip_add_info_v6(). 6820 */ 6821 void 6822 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 6823 uint_t flags, mblk_t *hada_mp) 6824 { 6825 ire_t *ire = NULL; 6826 queue_t *rq; 6827 ill_t *ill = inill; 6828 ipif_t *ipif; 6829 uint8_t *whereptr; 6830 uint8_t nexthdr; 6831 uint16_t remlen; 6832 uint_t prev_nexthdr_offset; 6833 uint_t used; 6834 size_t pkt_len; 6835 uint16_t ip6_len; 6836 uint_t hdr_len; 6837 boolean_t mctl_present; 6838 mblk_t *first_mp; 6839 mblk_t *first_mp1; 6840 boolean_t no_forward; 6841 ip6_hbh_t *hbhhdr; 6842 boolean_t no_cksum = (flags & IP6_IN_NOCKSUM); 6843 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 6844 conn_t *connp; 6845 int off; 6846 ilm_t *ilm; 6847 uint32_t ports; 6848 uint_t ipif_id = 0; 6849 zoneid_t zoneid = GLOBAL_ZONEID; 6850 6851 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 6852 6853 if (hada_mp != NULL) { 6854 /* 6855 * It's an IPsec accelerated packet. 6856 * Keep a pointer to the data attributes around until 6857 * we allocate the ipsecinfo structure. 6858 */ 6859 IPSECHW_DEBUG(IPSECHW_PKT, 6860 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 6861 hada_mp->b_cont = NULL; 6862 /* 6863 * Since it is accelerated, it came directly from 6864 * the ill. 6865 */ 6866 ASSERT(mctl_present == B_FALSE); 6867 ASSERT(mp->b_datap->db_type != M_CTL); 6868 } 6869 6870 ASSERT(OK_32PTR((uchar_t *)ip6h) && 6871 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 6872 6873 if (mp->b_cont == NULL) 6874 pkt_len = mp->b_wptr - mp->b_rptr; 6875 else 6876 pkt_len = msgdsize(mp); 6877 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6878 6879 /* 6880 * Check for bogus (too short packet) and packet which 6881 * was padded by the link layer. 6882 */ 6883 if (ip6_len != pkt_len) { 6884 ssize_t diff; 6885 6886 if (ip6_len > pkt_len) { 6887 ip1dbg(("ip_rput_data_v6: packet too short %d %lu\n", 6888 ip6_len, pkt_len)); 6889 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 6890 freemsg(hada_mp); 6891 freemsg(first_mp); 6892 return; 6893 } 6894 diff = (ssize_t)(pkt_len - ip6_len); 6895 6896 if (!adjmsg(mp, -diff)) { 6897 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 6898 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6899 freemsg(hada_mp); 6900 freemsg(first_mp); 6901 return; 6902 } 6903 pkt_len -= diff; 6904 } 6905 6906 /* 6907 * XXX When zero-copy support is added, this turning off of 6908 * checksum flag will need to be done more selectively. 6909 */ 6910 mp->b_datap->db_struioun.cksum.flags &= ~HCK_PARTIALCKSUM; 6911 6912 nexthdr = ip6h->ip6_nxt; 6913 6914 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 6915 (uchar_t *)ip6h); 6916 whereptr = (uint8_t *)&ip6h[1]; 6917 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 6918 6919 /* Process hop by hop header options */ 6920 if (nexthdr == IPPROTO_HOPOPTS) { 6921 uint_t ehdrlen; 6922 uint8_t *optptr; 6923 6924 if (remlen < MIN_EHDR_LEN) 6925 goto pkt_too_short; 6926 if (mp->b_cont != NULL && 6927 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 6928 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 6929 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6930 freemsg(hada_mp); 6931 freemsg(first_mp); 6932 return; 6933 } 6934 ip6h = (ip6_t *)mp->b_rptr; 6935 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 6936 } 6937 hbhhdr = (ip6_hbh_t *)whereptr; 6938 nexthdr = hbhhdr->ip6h_nxt; 6939 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 6940 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 6941 6942 if (remlen < ehdrlen) 6943 goto pkt_too_short; 6944 if (mp->b_cont != NULL && 6945 whereptr + ehdrlen > mp->b_wptr) { 6946 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 6947 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6948 freemsg(hada_mp); 6949 freemsg(first_mp); 6950 return; 6951 } 6952 ip6h = (ip6_t *)mp->b_rptr; 6953 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 6954 hbhhdr = (ip6_hbh_t *)whereptr; 6955 } 6956 6957 optptr = whereptr + 2; 6958 whereptr += ehdrlen; 6959 remlen -= ehdrlen; 6960 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 6961 ehdrlen - 2, IPPROTO_HOPOPTS)) { 6962 case -1: 6963 /* 6964 * Packet has been consumed and any 6965 * needed ICMP messages sent. 6966 */ 6967 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 6968 freemsg(hada_mp); 6969 return; 6970 case 0: 6971 /* no action needed */ 6972 break; 6973 case 1: 6974 /* Known router alert */ 6975 goto ipv6forus; 6976 } 6977 } 6978 6979 /* 6980 * On incoming v6 multicast packets we will bypass the ire table, 6981 * and assume that the read queue corresponds to the targetted 6982 * interface. 6983 * 6984 * The effect of this is the same as the IPv4 original code, but is 6985 * much cleaner I think. See ip_rput for how that was done. 6986 */ 6987 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 6988 BUMP_MIB(ill->ill_ip6_mib, ipv6InMcastPkts); 6989 /* 6990 * XXX TODO Give to mrouted to for multicast forwarding. 6991 */ 6992 ILM_WALKER_HOLD(ill); 6993 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 6994 ILM_WALKER_RELE(ill); 6995 if (ilm == NULL) { 6996 if (ip_debug > 3) { 6997 /* ip2dbg */ 6998 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 6999 " which is not for us: %s\n", AF_INET6, 7000 &ip6h->ip6_dst); 7001 } 7002 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7003 freemsg(hada_mp); 7004 freemsg(first_mp); 7005 return; 7006 } 7007 if (ip_debug > 3) { 7008 /* ip2dbg */ 7009 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7010 AF_INET6, &ip6h->ip6_dst); 7011 } 7012 rq = ill->ill_rq; 7013 zoneid = GLOBAL_ZONEID; 7014 goto ipv6forus; 7015 } 7016 7017 ipif = ill->ill_ipif; 7018 7019 /* 7020 * If a packet was received on an interface that is a 6to4 tunnel, 7021 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7022 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7023 * the 6to4 prefix of the address configured on the receiving interface. 7024 * Otherwise, the packet was delivered to this interface in error and 7025 * the packet must be dropped. 7026 */ 7027 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7028 7029 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7030 &ip6h->ip6_dst)) { 7031 if (ip_debug > 2) { 7032 /* ip1dbg */ 7033 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7034 "addressed packet which is not for us: " 7035 "%s\n", AF_INET6, &ip6h->ip6_dst); 7036 } 7037 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7038 freemsg(first_mp); 7039 return; 7040 } 7041 } 7042 7043 /* 7044 * Find an ire that matches destination. For link-local addresses 7045 * we have to match the ill. 7046 * TBD for site local addresses. 7047 */ 7048 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7049 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7050 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, 7051 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7052 } else { 7053 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES); 7054 } 7055 if (ire == NULL) { 7056 /* 7057 * No matching IRE found. Mark this packet as having 7058 * originated externally. 7059 */ 7060 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7061 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7062 if (!(ill->ill_flags & ILLF_ROUTER)) 7063 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7064 freemsg(hada_mp); 7065 freemsg(first_mp); 7066 return; 7067 } 7068 if (ip6h->ip6_hops <= 1) { 7069 if (hada_mp != NULL) 7070 goto hada_drop; 7071 icmp_time_exceeded_v6(WR(q), first_mp, 7072 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7073 return; 7074 } 7075 /* 7076 * Per RFC 3513 section 2.5.2, we must not forward packets with 7077 * an unspecified source address. 7078 */ 7079 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7080 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7081 freemsg(hada_mp); 7082 freemsg(first_mp); 7083 return; 7084 } 7085 mp->b_prev = (mblk_t *)(uintptr_t) 7086 ill->ill_phyint->phyint_ifindex; 7087 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7088 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7089 ALL_ZONES); 7090 return; 7091 } 7092 ipif_id = ire->ire_ipif->ipif_seqid; 7093 /* we have a matching IRE */ 7094 if (ire->ire_stq != NULL) { 7095 ill_group_t *ill_group; 7096 ill_group_t *ire_group; 7097 7098 /* 7099 * To be quicker, we may wish not to chase pointers 7100 * (ire->ire_ipif->ipif_ill...) and instead store the 7101 * forwarding policy in the ire. An unfortunate side- 7102 * effect of this would be requiring an ire flush whenever 7103 * the ILLF_ROUTER flag changes. For now, chase pointers 7104 * once and store in the boolean no_forward. 7105 * 7106 * This appears twice to keep it out of the non-forwarding, 7107 * yes-it's-for-us-on-the-right-interface case. 7108 */ 7109 no_forward = ((ill->ill_flags & 7110 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7111 7112 7113 ASSERT(first_mp == mp); 7114 /* 7115 * This ire has a send-to queue - forward the packet. 7116 */ 7117 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7118 freemsg(hada_mp); 7119 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7120 if (no_forward) 7121 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7122 freemsg(mp); 7123 ire_refrele(ire); 7124 return; 7125 } 7126 if (ip6h->ip6_hops <= 1) { 7127 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7128 icmp_time_exceeded_v6(WR(q), mp, 7129 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7130 ire_refrele(ire); 7131 return; 7132 } 7133 /* 7134 * Per RFC 3513 section 2.5.2, we must not forward packets with 7135 * an unspecified source address. 7136 */ 7137 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7138 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7139 freemsg(hada_mp); 7140 freemsg(mp); 7141 ire_refrele(ire); 7142 return; 7143 } 7144 if (pkt_len > ire->ire_max_frag) { 7145 BUMP_MIB(ill->ill_ip6_mib, ipv6InTooBigErrors); 7146 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7147 ll_multicast, B_TRUE); 7148 ire_refrele(ire); 7149 return; 7150 } 7151 7152 /* 7153 * Check to see if we're forwarding the packet to a 7154 * different link from which it came. If so, check the 7155 * source and destination addresses since routers must not 7156 * forward any packets with link-local source or 7157 * destination addresses to other links. Otherwise (if 7158 * we're forwarding onto the same link), conditionally send 7159 * a redirect message. 7160 */ 7161 ill_group = ill->ill_group; 7162 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7163 if (ire->ire_rfq != q && (ill_group == NULL || 7164 ill_group != ire_group)) { 7165 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7166 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7167 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7168 freemsg(mp); 7169 ire_refrele(ire); 7170 return; 7171 } 7172 /* TBD add site-local check at site boundary? */ 7173 } else if (ipv6_send_redirects) { 7174 in6_addr_t *v6targ; 7175 mblk_t *mp1; 7176 in6_addr_t gw_addr_v6; 7177 ire_t *src_ire_v6 = NULL; 7178 7179 /* 7180 * Don't send a redirect when forwarding a source 7181 * routed packet. 7182 */ 7183 if (ip_source_routed_v6(ip6h, mp)) 7184 goto forward; 7185 7186 mutex_enter(&ire->ire_lock); 7187 gw_addr_v6 = ire->ire_gateway_addr_v6; 7188 mutex_exit(&ire->ire_lock); 7189 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7190 v6targ = &gw_addr_v6; 7191 /* 7192 * We won't send redirects to a router 7193 * that doesn't have a link local 7194 * address, but will forward. 7195 */ 7196 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7197 BUMP_MIB(ill->ill_ip6_mib, 7198 ipv6InAddrErrors); 7199 goto forward; 7200 } 7201 } else { 7202 v6targ = &ip6h->ip6_dst; 7203 } 7204 7205 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7206 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7207 ALL_ZONES, 0, MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7208 7209 if (src_ire_v6 != NULL) { 7210 /* 7211 * The source is directly connected. 7212 */ 7213 mp1 = copymsg(mp); 7214 if (mp1 != NULL) { 7215 icmp_send_redirect_v6(WR(q), 7216 mp1, v6targ, &ip6h->ip6_dst, 7217 ill, B_FALSE); 7218 } 7219 ire_refrele(src_ire_v6); 7220 } 7221 } 7222 7223 forward: 7224 /* Hoplimit verified above */ 7225 ip6h->ip6_hops--; 7226 UPDATE_IB_PKT_COUNT(ire); 7227 ire->ire_last_used_time = lbolt; 7228 BUMP_MIB(ill->ill_ip6_mib, ipv6OutForwDatagrams); 7229 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7230 IRE_REFRELE(ire); 7231 return; 7232 } 7233 rq = ire->ire_rfq; 7234 7235 /* 7236 * Need to put on correct queue for reassembly to find it. 7237 * No need to use put() since reassembly has its own locks. 7238 * Note: multicast packets and packets destined to addresses 7239 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7240 * the arriving ill. 7241 */ 7242 if (rq != q) { 7243 boolean_t check_multi = B_TRUE; 7244 ill_group_t *ill_group = NULL; 7245 ill_group_t *ire_group = NULL; 7246 ill_t *ire_ill = NULL; 7247 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7248 7249 /* 7250 * To be quicker, we may wish not to chase pointers 7251 * (ire->ire_ipif->ipif_ill...) and instead store the 7252 * forwarding policy in the ire. An unfortunate side- 7253 * effect of this would be requiring an ire flush whenever 7254 * the ILLF_ROUTER flag changes. For now, chase pointers 7255 * once and store in the boolean no_forward. 7256 */ 7257 no_forward = ((ill->ill_flags & 7258 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7259 7260 ill_group = ill->ill_group; 7261 if (rq != NULL) { 7262 ire_ill = (ill_t *)(rq->q_ptr); 7263 ire_group = ire_ill->ill_group; 7264 } 7265 7266 /* 7267 * If it's part of the same IPMP group, or if it's a legal 7268 * address on the 'usesrc' interface, then bypass strict 7269 * checks. 7270 */ 7271 if (ill_group != NULL && ill_group == ire_group) { 7272 check_multi = B_FALSE; 7273 } else if (ill_ifindex != 0 && ire_ill != NULL && 7274 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7275 check_multi = B_FALSE; 7276 } 7277 7278 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7279 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7280 /* 7281 * This packet came in on an interface other than the 7282 * one associated with the destination address 7283 * and we are strict about matches. 7284 * 7285 * As long as the ills belong to the same group, 7286 * we don't consider them to arriving on the wrong 7287 * interface. Thus, when the switch is doing inbound 7288 * load spreading, we won't drop packets when we 7289 * are doing strict multihoming checks. 7290 */ 7291 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7292 freemsg(hada_mp); 7293 freemsg(first_mp); 7294 ire_refrele(ire); 7295 return; 7296 } 7297 7298 if (rq != NULL) 7299 q = rq; 7300 7301 ill = (ill_t *)q->q_ptr; 7302 ASSERT(ill); 7303 } 7304 7305 zoneid = ire->ire_zoneid; 7306 UPDATE_IB_PKT_COUNT(ire); 7307 ire->ire_last_used_time = lbolt; 7308 /* Don't use the ire after this point. */ 7309 ire_refrele(ire); 7310 ipv6forus: 7311 /* 7312 * Looks like this packet is for us one way or another. 7313 * This is where we'll process destination headers etc. 7314 */ 7315 for (; ; ) { 7316 switch (nexthdr) { 7317 case IPPROTO_TCP: { 7318 uint16_t *up; 7319 uint32_t sum; 7320 dblk_t *dp; 7321 int offset; 7322 7323 hdr_len = pkt_len - remlen; 7324 7325 if (hada_mp != NULL) { 7326 ip0dbg(("tcp hada drop\n")); 7327 goto hada_drop; 7328 } 7329 7330 7331 /* TCP needs all of the TCP header */ 7332 if (remlen < TCP_MIN_HEADER_LENGTH) 7333 goto pkt_too_short; 7334 if (mp->b_cont != NULL && 7335 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7336 if (!pullupmsg(mp, 7337 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7338 BUMP_MIB(ill->ill_ip6_mib, 7339 ipv6InDiscards); 7340 freemsg(first_mp); 7341 return; 7342 } 7343 ip6h = (ip6_t *)mp->b_rptr; 7344 whereptr = (uint8_t *)ip6h + hdr_len; 7345 } 7346 /* 7347 * Extract the offset field from the TCP header. 7348 */ 7349 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7350 if (offset != 5) { 7351 if (offset < 5) { 7352 ip1dbg(("ip_rput_data_v6: short " 7353 "TCP data offset")); 7354 BUMP_MIB(ill->ill_ip6_mib, 7355 ipv6InDiscards); 7356 freemsg(first_mp); 7357 return; 7358 } 7359 /* 7360 * There must be TCP options. 7361 * Make sure we can grab them. 7362 */ 7363 offset <<= 2; 7364 if (remlen < offset) 7365 goto pkt_too_short; 7366 if (mp->b_cont != NULL && 7367 whereptr + offset > mp->b_wptr) { 7368 if (!pullupmsg(mp, 7369 hdr_len + offset)) { 7370 BUMP_MIB(ill->ill_ip6_mib, 7371 ipv6InDiscards); 7372 freemsg(first_mp); 7373 return; 7374 } 7375 ip6h = (ip6_t *)mp->b_rptr; 7376 whereptr = (uint8_t *)ip6h + hdr_len; 7377 } 7378 } 7379 7380 /* 7381 * If packet is being looped back locally checksums 7382 * aren't used 7383 */ 7384 if (no_cksum) { 7385 if (mp->b_datap->db_type == M_DATA) { 7386 /* 7387 * M_DATA mblk, so init mblk (chain) 7388 * for no struio(). 7389 */ 7390 mblk_t *mp1 = mp; 7391 7392 do { 7393 mp1->b_datap->db_struioflag = 0; 7394 } while ((mp1 = mp1->b_cont) != NULL); 7395 } 7396 goto tcp_fanout; 7397 } 7398 7399 up = (uint16_t *)&ip6h->ip6_src; 7400 /* 7401 * TCP checksum calculation. First sum up the 7402 * pseudo-header fields: 7403 * - Source IPv6 address 7404 * - Destination IPv6 address 7405 * - TCP payload length 7406 * - TCP protocol ID 7407 * XXX need zero-copy support here 7408 */ 7409 sum = htons(IPPROTO_TCP + remlen) + 7410 up[0] + up[1] + up[2] + up[3] + 7411 up[4] + up[5] + up[6] + up[7] + 7412 up[8] + up[9] + up[10] + up[11] + 7413 up[12] + up[13] + up[14] + up[15]; 7414 sum = (sum & 0xffff) + (sum >> 16); 7415 dp = mp->b_datap; 7416 if (dp->db_type != M_DATA || dp->db_ref > 1) { 7417 /* 7418 * Not M_DATA mblk or its a dup, so do the 7419 * checksum now. 7420 */ 7421 sum = IP_CSUM(mp, hdr_len, sum); 7422 if (sum) { 7423 /* checksum failed */ 7424 ip1dbg(("ip_rput_data_v6: TCP checksum" 7425 " failed %x off %d\n", 7426 sum, hdr_len)); 7427 BUMP_MIB(&ip_mib, tcpInErrs); 7428 freemsg(first_mp); 7429 return; 7430 } 7431 } else { 7432 /* 7433 * M_DATA mblk and not a dup 7434 * compute checksum here 7435 */ 7436 off = (int)(whereptr - mp->b_rptr); 7437 7438 if (IP_CSUM(mp, off, sum)) { 7439 BUMP_MIB(&ip_mib, tcpInErrs); 7440 ipcsumdbg("ip_rput_data_v6 " 7441 "swcksumerr\n", mp); 7442 freemsg(first_mp); 7443 return; 7444 } 7445 } 7446 tcp_fanout: 7447 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7448 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7449 IP_FF_IP6INFO), hdr_len, mctl_present, zoneid); 7450 return; 7451 } 7452 case IPPROTO_SCTP: 7453 { 7454 sctp_hdr_t *sctph; 7455 uint32_t calcsum, pktsum; 7456 uint_t hdr_len = pkt_len - remlen; 7457 7458 /* SCTP needs all of the SCTP header */ 7459 if (remlen < sizeof (*sctph)) { 7460 goto pkt_too_short; 7461 } 7462 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7463 ASSERT(mp->b_cont != NULL); 7464 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7465 BUMP_MIB(ill->ill_ip6_mib, 7466 ipv6InDiscards); 7467 freemsg(mp); 7468 return; 7469 } 7470 ip6h = (ip6_t *)mp->b_rptr; 7471 whereptr = (uint8_t *)ip6h + hdr_len; 7472 } 7473 7474 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7475 if (!no_cksum) { 7476 /* checksum */ 7477 pktsum = sctph->sh_chksum; 7478 sctph->sh_chksum = 0; 7479 calcsum = sctp_cksum(mp, hdr_len); 7480 if (calcsum != pktsum) { 7481 BUMP_MIB(&sctp_mib, sctpChecksumError); 7482 freemsg(mp); 7483 return; 7484 } 7485 sctph->sh_chksum = pktsum; 7486 } 7487 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7488 if ((connp = sctp_find_conn(&ip6h->ip6_src, 7489 &ip6h->ip6_dst, ports, ipif_id, zoneid)) == NULL) { 7490 ip_fanout_sctp_raw(first_mp, ill, 7491 (ipha_t *)ip6h, B_FALSE, ports, 7492 mctl_present, 7493 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), 7494 B_TRUE, ipif_id, zoneid); 7495 return; 7496 } 7497 BUMP_MIB(&ip_mib, ipInDelivers); 7498 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7499 B_FALSE, mctl_present); 7500 return; 7501 } 7502 case IPPROTO_UDP: { 7503 uint16_t *up; 7504 uint32_t sum; 7505 7506 hdr_len = pkt_len - remlen; 7507 7508 #define UDPH_SIZE 8 7509 7510 if (hada_mp != NULL) { 7511 ip0dbg(("udp hada drop\n")); 7512 goto hada_drop; 7513 } 7514 7515 /* Verify that at least the ports are present */ 7516 if (remlen < UDPH_SIZE) 7517 goto pkt_too_short; 7518 if (mp->b_cont != NULL && 7519 whereptr + UDPH_SIZE > mp->b_wptr) { 7520 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7521 BUMP_MIB(ill->ill_ip6_mib, 7522 ipv6InDiscards); 7523 freemsg(first_mp); 7524 return; 7525 } 7526 ip6h = (ip6_t *)mp->b_rptr; 7527 whereptr = (uint8_t *)ip6h + hdr_len; 7528 } 7529 #undef UDPH_SIZE 7530 /* 7531 * If packet is being looped back locally checksums 7532 * aren't used 7533 */ 7534 if (no_cksum) 7535 goto udp_fanout; 7536 7537 /* 7538 * Before going through the regular checksum 7539 * calculation, make sure the received checksum 7540 * is non-zero. RFC 2460 says, a 0x0000 checksum 7541 * in a UDP packet (within IPv6 packet) is invalid 7542 * and should be replaced by 0xffff. This makes 7543 * sense as regular checksum calculation will 7544 * pass for both the cases i.e. 0x0000 and 0xffff. 7545 * Removing one of the case makes error detection 7546 * stronger. 7547 */ 7548 7549 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7550 /* 0x0000 checksum is invalid */ 7551 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7552 "checksum value 0x0000\n")); 7553 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7554 freemsg(first_mp); 7555 return; 7556 } 7557 7558 up = (uint16_t *)&ip6h->ip6_src; 7559 7560 /* 7561 * UDP checksum calculation. First sum up the 7562 * pseudo-header fields: 7563 * - Source IPv6 address 7564 * - Destination IPv6 address 7565 * - UDP payload length 7566 * - UDP protocol ID 7567 */ 7568 7569 sum = htons(IPPROTO_UDP + remlen) + 7570 up[0] + up[1] + up[2] + up[3] + 7571 up[4] + up[5] + up[6] + up[7] + 7572 up[8] + up[9] + up[10] + up[11] + 7573 up[12] + up[13] + up[14] + up[15]; 7574 7575 sum = (sum & 0xffff) + (sum >> 16); 7576 /* Next sum in the UDP packet */ 7577 sum = IP_CSUM(mp, hdr_len, sum); 7578 if (sum) { 7579 /* UDP checksum failed */ 7580 ip1dbg(("ip_rput_data_v6: UDP checksum " 7581 "failed %x\n", 7582 sum)); 7583 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7584 freemsg(first_mp); 7585 return; 7586 } 7587 goto udp_fanout; 7588 } 7589 case IPPROTO_ICMPV6: { 7590 uint16_t *up; 7591 uint32_t sum; 7592 uint_t hdr_len = pkt_len - remlen; 7593 7594 if (hada_mp != NULL) { 7595 ip0dbg(("icmp hada drop\n")); 7596 goto hada_drop; 7597 } 7598 7599 /* 7600 * If packet is being looped back locally checksums 7601 * aren't used 7602 */ 7603 if (no_cksum) 7604 goto icmp_fanout; 7605 7606 up = (uint16_t *)&ip6h->ip6_src; 7607 sum = htons(IPPROTO_ICMPV6 + remlen) + 7608 up[0] + up[1] + up[2] + up[3] + 7609 up[4] + up[5] + up[6] + up[7] + 7610 up[8] + up[9] + up[10] + up[11] + 7611 up[12] + up[13] + up[14] + up[15]; 7612 sum = (sum & 0xffff) + (sum >> 16); 7613 sum = IP_CSUM(mp, hdr_len, sum); 7614 if (sum) { 7615 /* IPv6 ICMP checksum failed */ 7616 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7617 "failed %x\n", 7618 sum)); 7619 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7620 BUMP_MIB(ill->ill_icmp6_mib, 7621 ipv6IfIcmpInErrors); 7622 freemsg(first_mp); 7623 return; 7624 } 7625 7626 icmp_fanout: 7627 /* Check variable for testing applications */ 7628 if (ipv6_drop_inbound_icmpv6) { 7629 freemsg(first_mp); 7630 return; 7631 } 7632 /* 7633 * Assume that there is always at least one conn for 7634 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7635 * where there is no conn. 7636 */ 7637 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7638 ASSERT(!(ill->ill_phyint->phyint_flags & 7639 PHYI_LOOPBACK)); 7640 /* 7641 * In the multicast case, applications may have 7642 * joined the group from different zones, so we 7643 * need to deliver the packet to each of them. 7644 * Loop through the multicast memberships 7645 * structures (ilm) on the receive ill and send 7646 * a copy of the packet up each matching one. 7647 */ 7648 ILM_WALKER_HOLD(ill); 7649 for (ilm = ill->ill_ilm; ilm != NULL; 7650 ilm = ilm->ilm_next) { 7651 if (ilm->ilm_flags & ILM_DELETED) 7652 continue; 7653 if (!IN6_ARE_ADDR_EQUAL( 7654 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7655 continue; 7656 if (!ipif_lookup_zoneid(ill, 7657 ilm->ilm_zoneid, IPIF_UP, NULL)) 7658 continue; 7659 7660 first_mp1 = ip_copymsg(first_mp); 7661 if (first_mp1 == NULL) 7662 continue; 7663 icmp_inbound_v6(q, first_mp1, ill, 7664 hdr_len, mctl_present, 0, 7665 ilm->ilm_zoneid); 7666 } 7667 ILM_WALKER_RELE(ill); 7668 } else { 7669 first_mp1 = ip_copymsg(first_mp); 7670 if (first_mp1 != NULL) 7671 icmp_inbound_v6(q, first_mp1, ill, 7672 hdr_len, mctl_present, 0, zoneid); 7673 } 7674 } 7675 /* FALLTHRU */ 7676 default: { 7677 /* 7678 * Handle protocols with which IPv6 is less intimate. 7679 */ 7680 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IP6INFO; 7681 7682 if (hada_mp != NULL) { 7683 ip0dbg(("default hada drop\n")); 7684 goto hada_drop; 7685 } 7686 7687 /* 7688 * Enable sending ICMP for "Unknown" nexthdr 7689 * case. i.e. where we did not FALLTHRU from 7690 * IPPROTO_ICMPV6 processing case above. 7691 * If we did FALLTHRU, then the packet has already been 7692 * processed for IPPF, don't process it again in 7693 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7694 * flags 7695 */ 7696 if (nexthdr != IPPROTO_ICMPV6) 7697 proto_flags |= IP_FF_SEND_ICMP; 7698 else 7699 proto_flags |= IP6_NO_IPPOLICY; 7700 7701 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7702 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7703 mctl_present, zoneid); 7704 return; 7705 } 7706 7707 case IPPROTO_DSTOPTS: { 7708 uint_t ehdrlen; 7709 uint8_t *optptr; 7710 ip6_dest_t *desthdr; 7711 7712 /* Check if AH is present. */ 7713 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7714 ire, hada_mp, zoneid)) { 7715 ip0dbg(("dst early hada drop\n")); 7716 return; 7717 } 7718 7719 /* 7720 * Reinitialize pointers, as ipsec_early_ah_v6() does 7721 * complete pullups. We don't have to do more pullups 7722 * as a result. 7723 */ 7724 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7725 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7726 ip6h = (ip6_t *)mp->b_rptr; 7727 7728 if (remlen < MIN_EHDR_LEN) 7729 goto pkt_too_short; 7730 7731 desthdr = (ip6_dest_t *)whereptr; 7732 nexthdr = desthdr->ip6d_nxt; 7733 prev_nexthdr_offset = (uint_t)(whereptr - 7734 (uint8_t *)ip6h); 7735 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7736 if (remlen < ehdrlen) 7737 goto pkt_too_short; 7738 optptr = whereptr + 2; 7739 /* 7740 * Note: XXX This code does not seem to make 7741 * distinction between Destination Options Header 7742 * being before/after Routing Header which can 7743 * happen if we are at the end of source route. 7744 * This may become significant in future. 7745 * (No real significant Destination Options are 7746 * defined/implemented yet ). 7747 */ 7748 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7749 ehdrlen - 2, IPPROTO_DSTOPTS)) { 7750 case -1: 7751 /* 7752 * Packet has been consumed and any needed 7753 * ICMP errors sent. 7754 */ 7755 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7756 freemsg(hada_mp); 7757 return; 7758 case 0: 7759 /* No action needed continue */ 7760 break; 7761 case 1: 7762 /* 7763 * Unnexpected return value 7764 * (Router alert is a Hop-by-Hop option) 7765 */ 7766 #ifdef DEBUG 7767 panic("ip_rput_data_v6: router " 7768 "alert hbh opt indication in dest opt"); 7769 /*NOTREACHED*/ 7770 #else 7771 freemsg(hada_mp); 7772 freemsg(first_mp); 7773 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7774 return; 7775 #endif 7776 } 7777 used = ehdrlen; 7778 break; 7779 } 7780 case IPPROTO_FRAGMENT: { 7781 ip6_frag_t *fraghdr; 7782 size_t no_frag_hdr_len; 7783 7784 if (hada_mp != NULL) { 7785 ip0dbg(("frag hada drop\n")); 7786 goto hada_drop; 7787 } 7788 7789 ASSERT(first_mp == mp); 7790 if (remlen < sizeof (ip6_frag_t)) 7791 goto pkt_too_short; 7792 7793 if (mp->b_cont != NULL && 7794 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 7795 if (!pullupmsg(mp, 7796 pkt_len - remlen + sizeof (ip6_frag_t))) { 7797 BUMP_MIB(ill->ill_ip6_mib, 7798 ipv6InDiscards); 7799 freemsg(mp); 7800 return; 7801 } 7802 ip6h = (ip6_t *)mp->b_rptr; 7803 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7804 } 7805 7806 fraghdr = (ip6_frag_t *)whereptr; 7807 used = (uint_t)sizeof (ip6_frag_t); 7808 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmReqds); 7809 7810 /* 7811 * Invoke the CGTP (multirouting) filtering module to 7812 * process the incoming packet. Packets identified as 7813 * duplicates must be discarded. Filtering is active 7814 * only if the the ip_cgtp_filter ndd variable is 7815 * non-zero. 7816 */ 7817 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 7818 int cgtp_flt_pkt = 7819 ip_cgtp_filter_ops->cfo_filter_v6( 7820 inill->ill_rq, ip6h, fraghdr); 7821 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 7822 freemsg(mp); 7823 return; 7824 } 7825 } 7826 7827 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 7828 remlen - used, &prev_nexthdr_offset); 7829 if (mp == NULL) { 7830 /* Reassembly is still pending */ 7831 return; 7832 } 7833 /* The first mblk are the headers before the frag hdr */ 7834 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmOKs); 7835 7836 first_mp = mp; /* mp has most likely changed! */ 7837 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 7838 ip6h = (ip6_t *)mp->b_rptr; 7839 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 7840 whereptr = mp->b_rptr + no_frag_hdr_len; 7841 remlen = ntohs(ip6h->ip6_plen) + 7842 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 7843 pkt_len = msgdsize(mp); 7844 used = 0; 7845 break; 7846 } 7847 case IPPROTO_HOPOPTS: 7848 if (hada_mp != NULL) { 7849 ip0dbg(("hop hada drop\n")); 7850 goto hada_drop; 7851 } 7852 /* 7853 * Illegal header sequence. 7854 * (Hop-by-hop headers are processed above 7855 * and required to immediately follow IPv6 header) 7856 */ 7857 icmp_param_problem_v6(WR(q), first_mp, 7858 ICMP6_PARAMPROB_NEXTHEADER, 7859 prev_nexthdr_offset, 7860 B_FALSE, B_FALSE); 7861 return; 7862 7863 case IPPROTO_ROUTING: { 7864 uint_t ehdrlen; 7865 ip6_rthdr_t *rthdr; 7866 7867 /* Check if AH is present. */ 7868 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7869 ire, hada_mp, zoneid)) { 7870 ip0dbg(("routing hada drop\n")); 7871 return; 7872 } 7873 7874 /* 7875 * Reinitialize pointers, as ipsec_early_ah_v6() does 7876 * complete pullups. We don't have to do more pullups 7877 * as a result. 7878 */ 7879 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7880 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7881 ip6h = (ip6_t *)mp->b_rptr; 7882 7883 if (remlen < MIN_EHDR_LEN) 7884 goto pkt_too_short; 7885 rthdr = (ip6_rthdr_t *)whereptr; 7886 nexthdr = rthdr->ip6r_nxt; 7887 prev_nexthdr_offset = (uint_t)(whereptr - 7888 (uint8_t *)ip6h); 7889 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7890 if (remlen < ehdrlen) 7891 goto pkt_too_short; 7892 if (rthdr->ip6r_segleft != 0) { 7893 /* Not end of source route */ 7894 if (ll_multicast) { 7895 BUMP_MIB(ill->ill_ip6_mib, 7896 ipv6ForwProhibits); 7897 freemsg(hada_mp); 7898 freemsg(mp); 7899 return; 7900 } 7901 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 7902 flags, hada_mp); 7903 return; 7904 } 7905 used = ehdrlen; 7906 break; 7907 } 7908 case IPPROTO_AH: 7909 case IPPROTO_ESP: { 7910 /* 7911 * Fast path for AH/ESP. If this is the first time 7912 * we are sending a datagram to AH/ESP, allocate 7913 * a IPSEC_IN message and prepend it. Otherwise, 7914 * just fanout. 7915 */ 7916 7917 ipsec_in_t *ii; 7918 int ipsec_rc; 7919 7920 if (!mctl_present) { 7921 ASSERT(first_mp == mp); 7922 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 7923 NULL) { 7924 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 7925 "allocation failure.\n")); 7926 BUMP_MIB(ill->ill_ip6_mib, 7927 ipv6InDiscards); 7928 freemsg(mp); 7929 return; 7930 } 7931 /* 7932 * Store the ill_index so that when we come back 7933 * from IPSEC we ride on the same queue. 7934 */ 7935 ii = (ipsec_in_t *)first_mp->b_rptr; 7936 ii->ipsec_in_ill_index = 7937 ill->ill_phyint->phyint_ifindex; 7938 ii->ipsec_in_rill_index = 7939 ii->ipsec_in_ill_index; 7940 first_mp->b_cont = mp; 7941 /* 7942 * Cache hardware acceleration info. 7943 */ 7944 if (hada_mp != NULL) { 7945 IPSECHW_DEBUG(IPSECHW_PKT, 7946 ("ip_rput_data_v6: " 7947 "caching data attr.\n")); 7948 ii->ipsec_in_accelerated = B_TRUE; 7949 ii->ipsec_in_da = hada_mp; 7950 hada_mp = NULL; 7951 } 7952 } else { 7953 ii = (ipsec_in_t *)first_mp->b_rptr; 7954 } 7955 7956 if (!ipsec_loaded()) { 7957 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 7958 ire->ire_zoneid); 7959 return; 7960 } 7961 7962 /* select inbound SA and have IPsec process the pkt */ 7963 if (nexthdr == IPPROTO_ESP) { 7964 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 7965 if (esph == NULL) 7966 return; 7967 ASSERT(ii->ipsec_in_esp_sa != NULL); 7968 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 7969 NULL); 7970 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 7971 first_mp, esph); 7972 } else { 7973 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 7974 if (ah == NULL) 7975 return; 7976 ASSERT(ii->ipsec_in_ah_sa != NULL); 7977 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 7978 NULL); 7979 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 7980 first_mp, ah); 7981 } 7982 7983 switch (ipsec_rc) { 7984 case IPSEC_STATUS_SUCCESS: 7985 break; 7986 case IPSEC_STATUS_FAILED: 7987 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7988 /* FALLTHRU */ 7989 case IPSEC_STATUS_PENDING: 7990 return; 7991 } 7992 /* we're done with IPsec processing, send it up */ 7993 ip_fanout_proto_again(first_mp, ill, inill, ire); 7994 return; 7995 } 7996 case IPPROTO_NONE: 7997 /* All processing is done. Count as "delivered". */ 7998 freemsg(hada_mp); 7999 freemsg(first_mp); 8000 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8001 return; 8002 } 8003 whereptr += used; 8004 ASSERT(remlen >= used); 8005 remlen -= used; 8006 } 8007 /* NOTREACHED */ 8008 8009 pkt_too_short: 8010 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8011 ip6_len, pkt_len, remlen)); 8012 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 8013 freemsg(hada_mp); 8014 freemsg(first_mp); 8015 return; 8016 udp_fanout: 8017 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8018 connp = NULL; 8019 } else { 8020 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8021 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8022 CONN_DEC_REF(connp); 8023 connp = NULL; 8024 } 8025 } 8026 8027 if (connp == NULL) { 8028 uint32_t ports; 8029 8030 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8031 UDP_PORTS_OFFSET); 8032 IP6_STAT(ip6_udp_slow_path); 8033 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8034 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), mctl_present, 8035 zoneid); 8036 return; 8037 } 8038 8039 if (!canputnext(connp->conn_upq)) { 8040 freemsg(first_mp); 8041 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 8042 CONN_DEC_REF(connp); 8043 return; 8044 } 8045 8046 /* Initiate IPPF processing */ 8047 if (IP6_IN_IPP(flags)) { 8048 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8049 if (mp == NULL) { 8050 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8051 CONN_DEC_REF(connp); 8052 return; 8053 } 8054 } 8055 8056 if (connp->conn_ipv6_recvpktinfo || 8057 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8058 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8059 if (mp == NULL) { 8060 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8061 CONN_DEC_REF(connp); 8062 return; 8063 } 8064 } 8065 8066 IP6_STAT(ip6_udp_fast_path); 8067 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 8068 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8069 putnext(connp->conn_upq, mp); 8070 8071 CONN_DEC_REF(connp); 8072 freemsg(hada_mp); 8073 return; 8074 8075 hada_drop: 8076 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8077 /* IPsec kstats: bump counter here */ 8078 freemsg(hada_mp); 8079 freemsg(first_mp); 8080 } 8081 8082 /* 8083 * Reassemble fragment. 8084 * When it returns a completed message the first mblk will only contain 8085 * the headers prior to the fragment header. 8086 * 8087 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8088 * of the preceding header. This is needed to patch the previous header's 8089 * nexthdr field when reassembly completes. 8090 */ 8091 static mblk_t * 8092 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8093 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset) 8094 { 8095 ill_t *ill = (ill_t *)q->q_ptr; 8096 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8097 uint16_t offset; 8098 boolean_t more_frags; 8099 uint8_t nexthdr = fraghdr->ip6f_nxt; 8100 in6_addr_t *v6dst_ptr; 8101 in6_addr_t *v6src_ptr; 8102 uint_t end; 8103 uint_t hdr_length; 8104 size_t count; 8105 ipf_t *ipf; 8106 ipf_t **ipfp; 8107 ipfb_t *ipfb; 8108 mblk_t *mp1; 8109 uint8_t ecn_info = 0; 8110 size_t msg_len; 8111 mblk_t *tail_mp; 8112 mblk_t *t_mp; 8113 boolean_t pruned = B_FALSE; 8114 8115 /* 8116 * Note: Fragment offset in header is in 8-octet units. 8117 * Clearing least significant 3 bits not only extracts 8118 * it but also gets it in units of octets. 8119 */ 8120 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8121 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8122 8123 /* 8124 * Is the more frags flag on and the payload length not a multiple 8125 * of eight? 8126 */ 8127 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8128 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8129 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8130 (uint32_t)((char *)&ip6h->ip6_plen - 8131 (char *)ip6h), B_FALSE, B_FALSE); 8132 return (NULL); 8133 } 8134 8135 v6src_ptr = &ip6h->ip6_src; 8136 v6dst_ptr = &ip6h->ip6_dst; 8137 end = remlen; 8138 8139 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8140 end += offset; 8141 8142 /* 8143 * Would fragment cause reassembled packet to have a payload length 8144 * greater than IP_MAXPACKET - the max payload size? 8145 */ 8146 if (end > IP_MAXPACKET) { 8147 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8148 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8149 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8150 (char *)ip6h), B_FALSE, B_FALSE); 8151 return (NULL); 8152 } 8153 8154 /* 8155 * This packet just has one fragment. Reassembly not 8156 * needed. 8157 */ 8158 if (!more_frags && offset == 0) { 8159 goto reass_done; 8160 } 8161 8162 /* 8163 * Drop the fragmented as early as possible, if 8164 * we don't have resource(s) to re-assemble. 8165 */ 8166 8167 if (ip_reass_queue_bytes == 0) { 8168 freemsg(mp); 8169 return (NULL); 8170 } 8171 8172 /* Record the ECN field info. */ 8173 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8174 /* 8175 * If this is not the first fragment, dump the unfragmentable 8176 * portion of the packet. 8177 */ 8178 if (offset) 8179 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8180 8181 /* 8182 * Fragmentation reassembly. Each ILL has a hash table for 8183 * queueing packets undergoing reassembly for all IPIFs 8184 * associated with the ILL. The hash is based on the packet 8185 * IP ident field. The ILL frag hash table was allocated 8186 * as a timer block at the time the ILL was created. Whenever 8187 * there is anything on the reassembly queue, the timer will 8188 * be running. 8189 */ 8190 msg_len = mp->b_datap->db_lim - mp->b_datap->db_base; 8191 tail_mp = mp; 8192 while (tail_mp->b_cont != NULL) { 8193 tail_mp = tail_mp->b_cont; 8194 msg_len += tail_mp->b_datap->db_lim - 8195 tail_mp->b_datap->db_base; 8196 } 8197 /* 8198 * If the reassembly list for this ILL will get too big 8199 * prune it. 8200 */ 8201 8202 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8203 ip_reass_queue_bytes) { 8204 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8205 : (ip_reass_queue_bytes - msg_len)); 8206 pruned = B_TRUE; 8207 } 8208 8209 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8210 mutex_enter(&ipfb->ipfb_lock); 8211 8212 ipfp = &ipfb->ipfb_ipf; 8213 /* Try to find an existing fragment queue for this packet. */ 8214 for (;;) { 8215 ipf = ipfp[0]; 8216 if (ipf) { 8217 /* 8218 * It has to match on ident, source address, and 8219 * dest address. 8220 */ 8221 if (ipf->ipf_ident == ident && 8222 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8223 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8224 8225 /* 8226 * If we have received too many 8227 * duplicate fragments for this packet 8228 * free it. 8229 */ 8230 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8231 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8232 freemsg(mp); 8233 mutex_exit(&ipfb->ipfb_lock); 8234 return (NULL); 8235 } 8236 8237 break; 8238 } 8239 ipfp = &ipf->ipf_hash_next; 8240 continue; 8241 } 8242 8243 8244 /* 8245 * If we pruned the list, do we want to store this new 8246 * fragment?. We apply an optimization here based on the 8247 * fact that most fragments will be received in order. 8248 * So if the offset of this incoming fragment is zero, 8249 * it is the first fragment of a new packet. We will 8250 * keep it. Otherwise drop the fragment, as we have 8251 * probably pruned the packet already (since the 8252 * packet cannot be found). 8253 */ 8254 8255 if (pruned && offset != 0) { 8256 mutex_exit(&ipfb->ipfb_lock); 8257 freemsg(mp); 8258 return (NULL); 8259 } 8260 8261 /* New guy. Allocate a frag message. */ 8262 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8263 if (!mp1) { 8264 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8265 freemsg(mp); 8266 partial_reass_done: 8267 mutex_exit(&ipfb->ipfb_lock); 8268 return (NULL); 8269 } 8270 8271 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8272 /* 8273 * Too many fragmented packets in this hash bucket. 8274 * Free the oldest. 8275 */ 8276 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8277 } 8278 8279 mp1->b_cont = mp; 8280 8281 /* Initialize the fragment header. */ 8282 ipf = (ipf_t *)mp1->b_rptr; 8283 ipf->ipf_mp = mp1; 8284 ipf->ipf_ptphn = ipfp; 8285 ipfp[0] = ipf; 8286 ipf->ipf_hash_next = NULL; 8287 ipf->ipf_ident = ident; 8288 ipf->ipf_v6src = *v6src_ptr; 8289 ipf->ipf_v6dst = *v6dst_ptr; 8290 /* Record reassembly start time. */ 8291 ipf->ipf_timestamp = gethrestime_sec(); 8292 /* Record ipf generation and account for frag header */ 8293 ipf->ipf_gen = ill->ill_ipf_gen++; 8294 ipf->ipf_count = mp1->b_datap->db_lim - mp1->b_datap->db_base; 8295 ipf->ipf_protocol = nexthdr; 8296 ipf->ipf_nf_hdr_len = 0; 8297 ipf->ipf_prev_nexthdr_offset = 0; 8298 ipf->ipf_last_frag_seen = B_FALSE; 8299 ipf->ipf_ecn = ecn_info; 8300 ipf->ipf_num_dups = 0; 8301 ipfb->ipfb_frag_pkts++; 8302 8303 /* 8304 * We handle reassembly two ways. In the easy case, 8305 * where all the fragments show up in order, we do 8306 * minimal bookkeeping, and just clip new pieces on 8307 * the end. If we ever see a hole, then we go off 8308 * to ip_reassemble which has to mark the pieces and 8309 * keep track of the number of holes, etc. Obviously, 8310 * the point of having both mechanisms is so we can 8311 * handle the easy case as efficiently as possible. 8312 */ 8313 if (offset == 0) { 8314 /* Easy case, in-order reassembly so far. */ 8315 /* Update the byte count */ 8316 ipf->ipf_count += msg_len; 8317 ipf->ipf_tail_mp = tail_mp; 8318 /* 8319 * Keep track of next expected offset in 8320 * ipf_end. 8321 */ 8322 ipf->ipf_end = end; 8323 ipf->ipf_nf_hdr_len = hdr_length; 8324 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8325 } else { 8326 /* Hard case, hole at the beginning. */ 8327 ipf->ipf_tail_mp = NULL; 8328 /* 8329 * ipf_end == 0 means that we have given up 8330 * on easy reassembly. 8331 */ 8332 ipf->ipf_end = 0; 8333 /* 8334 * ipf_hole_cnt is set by ip_reassemble. 8335 * ipf_count is updated by ip_reassemble. 8336 * No need to check for return value here 8337 * as we don't expect reassembly to complete or 8338 * fail for the first fragment itself. 8339 */ 8340 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8341 msg_len); 8342 } 8343 /* Update per ipfb and ill byte counts */ 8344 ipfb->ipfb_count += ipf->ipf_count; 8345 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8346 ill->ill_frag_count += ipf->ipf_count; 8347 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8348 /* If the frag timer wasn't already going, start it. */ 8349 mutex_enter(&ill->ill_lock); 8350 ill_frag_timer_start(ill); 8351 mutex_exit(&ill->ill_lock); 8352 goto partial_reass_done; 8353 } 8354 8355 /* 8356 * We have a new piece of a datagram which is already being 8357 * reassembled. Update the ECN info if all IP fragments 8358 * are ECN capable. If there is one which is not, clear 8359 * all the info. If there is at least one which has CE 8360 * code point, IP needs to report that up to transport. 8361 */ 8362 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8363 if (ecn_info == IPH_ECN_CE) 8364 ipf->ipf_ecn = IPH_ECN_CE; 8365 } else { 8366 ipf->ipf_ecn = IPH_ECN_NECT; 8367 } 8368 8369 if (offset && ipf->ipf_end == offset) { 8370 /* The new fragment fits at the end */ 8371 ipf->ipf_tail_mp->b_cont = mp; 8372 /* Update the byte count */ 8373 ipf->ipf_count += msg_len; 8374 /* Update per ipfb and ill byte counts */ 8375 ipfb->ipfb_count += msg_len; 8376 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8377 ill->ill_frag_count += msg_len; 8378 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8379 if (more_frags) { 8380 /* More to come. */ 8381 ipf->ipf_end = end; 8382 ipf->ipf_tail_mp = tail_mp; 8383 goto partial_reass_done; 8384 } 8385 } else { 8386 /* 8387 * Go do the hard cases. 8388 * Call ip_reassemble(). 8389 */ 8390 int ret; 8391 8392 if (offset == 0) { 8393 if (ipf->ipf_prev_nexthdr_offset == 0) { 8394 ipf->ipf_nf_hdr_len = hdr_length; 8395 ipf->ipf_prev_nexthdr_offset = 8396 *prev_nexthdr_offset; 8397 } 8398 } 8399 /* Save current byte count */ 8400 count = ipf->ipf_count; 8401 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8402 8403 /* Count of bytes added and subtracted (freeb()ed) */ 8404 count = ipf->ipf_count - count; 8405 if (count) { 8406 /* Update per ipfb and ill byte counts */ 8407 ipfb->ipfb_count += count; 8408 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8409 ill->ill_frag_count += count; 8410 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8411 } 8412 if (ret == IP_REASS_PARTIAL) { 8413 goto partial_reass_done; 8414 } else if (ret == IP_REASS_FAILED) { 8415 /* Reassembly failed. Free up all resources */ 8416 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8417 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8418 IP_REASS_SET_START(t_mp, 0); 8419 IP_REASS_SET_END(t_mp, 0); 8420 } 8421 freemsg(mp); 8422 goto partial_reass_done; 8423 } 8424 8425 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8426 } 8427 /* 8428 * We have completed reassembly. Unhook the frag header from 8429 * the reassembly list. 8430 * 8431 * Grab the unfragmentable header length next header value out 8432 * of the first fragment 8433 */ 8434 ASSERT(ipf->ipf_nf_hdr_len != 0); 8435 hdr_length = ipf->ipf_nf_hdr_len; 8436 8437 /* 8438 * Before we free the frag header, record the ECN info 8439 * to report back to the transport. 8440 */ 8441 ecn_info = ipf->ipf_ecn; 8442 8443 /* 8444 * Store the nextheader field in the header preceding the fragment 8445 * header 8446 */ 8447 nexthdr = ipf->ipf_protocol; 8448 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8449 ipfp = ipf->ipf_ptphn; 8450 mp1 = ipf->ipf_mp; 8451 count = ipf->ipf_count; 8452 ipf = ipf->ipf_hash_next; 8453 if (ipf) 8454 ipf->ipf_ptphn = ipfp; 8455 ipfp[0] = ipf; 8456 ill->ill_frag_count -= count; 8457 ASSERT(ipfb->ipfb_count >= count); 8458 ipfb->ipfb_count -= count; 8459 ipfb->ipfb_frag_pkts--; 8460 mutex_exit(&ipfb->ipfb_lock); 8461 /* Ditch the frag header. */ 8462 mp = mp1->b_cont; 8463 freeb(mp1); 8464 8465 /* 8466 * Make sure the packet is good by doing some sanity 8467 * check. If bad we can silentely drop the packet. 8468 */ 8469 reass_done: 8470 if (hdr_length < sizeof (ip6_frag_t)) { 8471 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8472 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8473 freemsg(mp); 8474 return (NULL); 8475 } 8476 8477 /* 8478 * Remove the fragment header from the initial header by 8479 * splitting the mblk into the non-fragmentable header and 8480 * everthing after the fragment extension header. This has the 8481 * side effect of putting all the headers that need destination 8482 * processing into the b_cont block-- on return this fact is 8483 * used in order to avoid having to look at the extensions 8484 * already processed. 8485 * 8486 * Note that this code assumes that the unfragmentable portion 8487 * of the header is in the first mblk and increments 8488 * the read pointer past it. If this assumption is broken 8489 * this code fails badly. 8490 */ 8491 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8492 mblk_t *nmp; 8493 8494 if (!(nmp = dupb(mp))) { 8495 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8496 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8497 freemsg(mp); 8498 return (NULL); 8499 } 8500 nmp->b_cont = mp->b_cont; 8501 mp->b_cont = nmp; 8502 nmp->b_rptr += hdr_length; 8503 } 8504 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8505 8506 ip6h = (ip6_t *)mp->b_rptr; 8507 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8508 8509 /* Restore original IP length in header. */ 8510 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8511 /* Record the ECN info. */ 8512 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8513 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8514 8515 return (mp); 8516 } 8517 8518 /* 8519 * Walk through the options to see if there is a routing header. 8520 * If present get the destination which is the last address of 8521 * the option. 8522 */ 8523 in6_addr_t 8524 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8525 { 8526 uint8_t nexthdr; 8527 uint8_t *whereptr; 8528 ip6_hbh_t *hbhhdr; 8529 ip6_dest_t *dsthdr; 8530 ip6_rthdr0_t *rthdr; 8531 ip6_frag_t *fraghdr; 8532 int ehdrlen; 8533 int left; 8534 in6_addr_t *ap, rv; 8535 8536 if (is_fragment != NULL) 8537 *is_fragment = B_FALSE; 8538 8539 rv = ip6h->ip6_dst; 8540 8541 nexthdr = ip6h->ip6_nxt; 8542 whereptr = (uint8_t *)&ip6h[1]; 8543 for (;;) { 8544 8545 ASSERT(nexthdr != IPPROTO_RAW); 8546 switch (nexthdr) { 8547 case IPPROTO_HOPOPTS: 8548 hbhhdr = (ip6_hbh_t *)whereptr; 8549 nexthdr = hbhhdr->ip6h_nxt; 8550 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8551 break; 8552 case IPPROTO_DSTOPTS: 8553 dsthdr = (ip6_dest_t *)whereptr; 8554 nexthdr = dsthdr->ip6d_nxt; 8555 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8556 break; 8557 case IPPROTO_ROUTING: 8558 rthdr = (ip6_rthdr0_t *)whereptr; 8559 nexthdr = rthdr->ip6r0_nxt; 8560 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 8561 8562 left = rthdr->ip6r0_segleft; 8563 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 8564 rv = *(ap + left - 1); 8565 /* 8566 * If the caller doesn't care whether the packet 8567 * is a fragment or not, we can stop here since 8568 * we have our destination. 8569 */ 8570 if (is_fragment == NULL) 8571 goto done; 8572 break; 8573 case IPPROTO_FRAGMENT: 8574 fraghdr = (ip6_frag_t *)whereptr; 8575 nexthdr = fraghdr->ip6f_nxt; 8576 ehdrlen = sizeof (ip6_frag_t); 8577 if (is_fragment != NULL) 8578 *is_fragment = B_TRUE; 8579 goto done; 8580 default : 8581 goto done; 8582 } 8583 whereptr += ehdrlen; 8584 } 8585 8586 done: 8587 return (rv); 8588 } 8589 8590 /* 8591 * ip_source_routed_v6: 8592 * This function is called by redirect code in ip_rput_data_v6 to 8593 * know whether this packet is source routed through this node i.e 8594 * whether this node (router) is part of the journey. This 8595 * function is called under two cases : 8596 * 8597 * case 1 : Routing header was processed by this node and 8598 * ip_process_rthdr replaced ip6_dst with the next hop 8599 * and we are forwarding the packet to the next hop. 8600 * 8601 * case 2 : Routing header was not processed by this node and we 8602 * are just forwarding the packet. 8603 * 8604 * For case (1) we don't want to send redirects. For case(2) we 8605 * want to send redirects. 8606 */ 8607 static boolean_t 8608 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 8609 { 8610 uint8_t nexthdr; 8611 in6_addr_t *addrptr; 8612 ip6_rthdr0_t *rthdr; 8613 uint8_t numaddr; 8614 ip6_hbh_t *hbhhdr; 8615 uint_t ehdrlen; 8616 uint8_t *byteptr; 8617 8618 ip2dbg(("ip_source_routed_v6\n")); 8619 nexthdr = ip6h->ip6_nxt; 8620 ehdrlen = IPV6_HDR_LEN; 8621 8622 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 8623 while (nexthdr == IPPROTO_HOPOPTS || 8624 nexthdr == IPPROTO_DSTOPTS) { 8625 byteptr = (uint8_t *)ip6h + ehdrlen; 8626 /* 8627 * Check if we have already processed 8628 * packets or we are just a forwarding 8629 * router which only pulled up msgs up 8630 * to IPV6HDR and one HBH ext header 8631 */ 8632 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8633 ip2dbg(("ip_source_routed_v6: Extension" 8634 " headers not processed\n")); 8635 return (B_FALSE); 8636 } 8637 hbhhdr = (ip6_hbh_t *)byteptr; 8638 nexthdr = hbhhdr->ip6h_nxt; 8639 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 8640 } 8641 switch (nexthdr) { 8642 case IPPROTO_ROUTING: 8643 byteptr = (uint8_t *)ip6h + ehdrlen; 8644 /* 8645 * If for some reason, we haven't pulled up 8646 * the routing hdr data mblk, then we must 8647 * not have processed it at all. So for sure 8648 * we are not part of the source routed journey. 8649 */ 8650 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8651 ip2dbg(("ip_source_routed_v6: Routing" 8652 " header not processed\n")); 8653 return (B_FALSE); 8654 } 8655 rthdr = (ip6_rthdr0_t *)byteptr; 8656 /* 8657 * Either we are an intermediate router or the 8658 * last hop before destination and we have 8659 * already processed the routing header. 8660 * If segment_left is greater than or equal to zero, 8661 * then we must be the (numaddr - segleft) entry 8662 * of the routing header. Although ip6r0_segleft 8663 * is a unit8_t variable, we still check for zero 8664 * or greater value, if in case the data type 8665 * is changed someday in future. 8666 */ 8667 if (rthdr->ip6r0_segleft > 0 || 8668 rthdr->ip6r0_segleft == 0) { 8669 ire_t *ire = NULL; 8670 8671 numaddr = rthdr->ip6r0_len / 2; 8672 addrptr = (in6_addr_t *)((char *)rthdr + 8673 sizeof (*rthdr)); 8674 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 8675 if (addrptr != NULL) { 8676 ire = ire_ctable_lookup_v6(addrptr, NULL, 8677 IRE_LOCAL, NULL, ALL_ZONES, MATCH_IRE_TYPE); 8678 if (ire != NULL) { 8679 ire_refrele(ire); 8680 return (B_TRUE); 8681 } 8682 ip1dbg(("ip_source_routed_v6: No ire found\n")); 8683 } 8684 } 8685 /* FALLTHRU */ 8686 default: 8687 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 8688 return (B_FALSE); 8689 } 8690 } 8691 8692 /* 8693 * ip_wput_v6 -- Packets sent down from transport modules show up here. 8694 * Assumes that the following set of headers appear in the first 8695 * mblk: 8696 * ip6i_t (if present) CAN also appear as a separate mblk. 8697 * ip6_t 8698 * Any extension headers 8699 * TCP/UDP/SCTP header (if present) 8700 * The routine can handle an ICMPv6 header that is not in the first mblk. 8701 * 8702 * The order to determine the outgoing interface is as follows: 8703 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 8704 * 2. If conn_nofailover_ill is set then use that ill. 8705 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 8706 * 4. If q is an ill queue and (link local or multicast destination) then 8707 * use that ill. 8708 * 5. If IPV6_BOUND_IF has been set use that ill. 8709 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 8710 * look for the best IRE match for the unspecified group to determine 8711 * the ill. 8712 * 7. For unicast: Just do an IRE lookup for the best match. 8713 */ 8714 void 8715 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 8716 { 8717 conn_t *connp = NULL; 8718 queue_t *q = (queue_t *)arg2; 8719 ire_t *ire = NULL; 8720 ire_t *sctp_ire = NULL; 8721 ip6_t *ip6h; 8722 in6_addr_t *v6dstp; 8723 ill_t *ill = NULL; 8724 ipif_t *ipif; 8725 ip6i_t *ip6i; 8726 int cksum_request; /* -1 => normal. */ 8727 /* 1 => Skip TCP/UDP/SCTP checksum */ 8728 /* Otherwise contains insert offset for checksum */ 8729 int unspec_src; 8730 boolean_t do_outrequests; /* Increment OutRequests? */ 8731 mib2_ipv6IfStatsEntry_t *mibptr; 8732 int match_flags = MATCH_IRE_ILL_GROUP; 8733 boolean_t attach_if = B_FALSE; 8734 mblk_t *first_mp; 8735 boolean_t mctl_present; 8736 ipsec_out_t *io; 8737 boolean_t drop_if_delayed = B_FALSE; 8738 boolean_t multirt_need_resolve = B_FALSE; 8739 mblk_t *copy_mp = NULL; 8740 int err; 8741 int ip6i_flags = 0; 8742 zoneid_t zoneid; 8743 ill_t *saved_ill = NULL; 8744 boolean_t conn_lock_held; 8745 boolean_t need_decref = B_FALSE; 8746 8747 /* 8748 * Highest bit in version field is Reachability Confirmation bit 8749 * used by NUD in ip_xmit_v6(). 8750 */ 8751 #ifdef _BIG_ENDIAN 8752 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 8753 #else 8754 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 8755 #endif 8756 8757 /* 8758 * M_CTL comes from 5 places 8759 * 8760 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 8761 * both V4 and V6 datagrams. 8762 * 8763 * 2) AH/ESP sends down M_CTL after doing their job with both 8764 * V4 and V6 datagrams. 8765 * 8766 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 8767 * attached. 8768 * 8769 * 4) Notifications from an external resolver (for XRESOLV ifs) 8770 * 8771 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 8772 * IPsec hardware acceleration support. 8773 * 8774 * We need to handle (1)'s IPv6 case and (3) here. For the 8775 * IPv4 case in (1), and (2), IPSEC processing has already 8776 * started. The code in ip_wput() already knows how to handle 8777 * continuing IPSEC processing (for IPv4 and IPv6). All other 8778 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 8779 * for handling. 8780 */ 8781 first_mp = mp; 8782 mctl_present = B_FALSE; 8783 io = NULL; 8784 8785 /* Multidata transmit? */ 8786 if (DB_TYPE(mp) == M_MULTIDATA) { 8787 /* 8788 * We should never get here, since all Multidata messages 8789 * originating from tcp should have been directed over to 8790 * tcp_multisend() in the first place. 8791 */ 8792 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 8793 freemsg(mp); 8794 return; 8795 } else if (DB_TYPE(mp) == M_CTL) { 8796 uint32_t mctltype = 0; 8797 uint32_t mlen = MBLKL(first_mp); 8798 8799 mp = mp->b_cont; 8800 mctl_present = B_TRUE; 8801 io = (ipsec_out_t *)first_mp->b_rptr; 8802 8803 /* 8804 * Validate this M_CTL message. The only three types of 8805 * M_CTL messages we expect to see in this code path are 8806 * ipsec_out_t or ipsec_in_t structures (allocated as 8807 * ipsec_info_t unions), or ipsec_ctl_t structures. 8808 * The ipsec_out_type and ipsec_in_type overlap in the two 8809 * data structures, and they are either set to IPSEC_OUT 8810 * or IPSEC_IN depending on which data structure it is. 8811 * ipsec_ctl_t is an IPSEC_CTL. 8812 * 8813 * All other M_CTL messages are sent to ip_wput_nondata() 8814 * for handling. 8815 */ 8816 if (mlen >= sizeof (io->ipsec_out_type)) 8817 mctltype = io->ipsec_out_type; 8818 8819 if ((mlen == sizeof (ipsec_ctl_t)) && 8820 (mctltype == IPSEC_CTL)) { 8821 ip_output(Q_TO_CONN(q), first_mp, q, caller); 8822 return; 8823 } 8824 8825 if ((mlen < sizeof (ipsec_info_t)) || 8826 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 8827 mp == NULL) { 8828 ip_wput_nondata(NULL, q, first_mp, NULL); 8829 return; 8830 } 8831 /* NDP callbacks have q_next non-NULL. That's case #3. */ 8832 if (q->q_next == NULL) { 8833 ip6h = (ip6_t *)mp->b_rptr; 8834 /* 8835 * For a freshly-generated TCP dgram that needs IPV6 8836 * processing, don't call ip_wput immediately. We can 8837 * tell this by the ipsec_out_proc_begin. In-progress 8838 * IPSEC_OUT messages have proc_begin set to TRUE, 8839 * and we want to send all IPSEC_IN messages to 8840 * ip_wput() for IPsec processing or finishing. 8841 */ 8842 if (mctltype == IPSEC_IN || 8843 IPVER(ip6h) != IPV6_VERSION || 8844 io->ipsec_out_proc_begin) { 8845 mibptr = &ip6_mib; 8846 goto notv6; 8847 } 8848 } 8849 } else if (DB_TYPE(mp) != M_DATA) { 8850 ip_wput_nondata(NULL, q, mp, NULL); 8851 return; 8852 } 8853 8854 ip6h = (ip6_t *)mp->b_rptr; 8855 8856 if (IPVER(ip6h) != IPV6_VERSION) { 8857 mibptr = &ip6_mib; 8858 goto notv6; 8859 } 8860 8861 if (q->q_next != NULL) { 8862 ill = (ill_t *)q->q_ptr; 8863 /* 8864 * We don't know if this ill will be used for IPv6 8865 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 8866 * ipif_set_values() sets the ill_isv6 flag to true if 8867 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 8868 * just drop the packet. 8869 */ 8870 if (!ill->ill_isv6) { 8871 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 8872 "ILLF_IPV6 was set\n")); 8873 freemsg(first_mp); 8874 return; 8875 } 8876 /* For uniformity do a refhold */ 8877 mutex_enter(&ill->ill_lock); 8878 if (!ILL_CAN_LOOKUP(ill)) { 8879 mutex_exit(&ill->ill_lock); 8880 freemsg(first_mp); 8881 return; 8882 } 8883 ill_refhold_locked(ill); 8884 mutex_exit(&ill->ill_lock); 8885 mibptr = ill->ill_ip6_mib; 8886 /* 8887 * ill_ip6_mib is allocated by ipif_set_values() when 8888 * ill_isv6 is set. Thus if ill_isv6 is true, 8889 * ill_ip6_mib had better not be NULL. 8890 */ 8891 ASSERT(mibptr != NULL); 8892 unspec_src = 0; 8893 BUMP_MIB(mibptr, ipv6OutRequests); 8894 do_outrequests = B_FALSE; 8895 } else { 8896 connp = (conn_t *)arg; 8897 ASSERT(connp != NULL); 8898 8899 /* is queue flow controlled? */ 8900 if ((q->q_first || connp->conn_draining) && 8901 (caller == IP_WPUT)) { 8902 /* 8903 * 1) TCP sends down M_CTL for detached connections. 8904 * 2) AH/ESP sends down M_CTL. 8905 * 8906 * We don't flow control either of the above. Only 8907 * UDP and others are flow controlled for which we 8908 * can't have a M_CTL. 8909 */ 8910 ASSERT(first_mp == mp); 8911 (void) putq(q, mp); 8912 return; 8913 } 8914 mibptr = &ip6_mib; 8915 unspec_src = connp->conn_unspec_src; 8916 do_outrequests = B_TRUE; 8917 if (mp->b_flag & MSGHASREF) { 8918 mp->b_flag &= ~MSGHASREF; 8919 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 8920 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 8921 need_decref = B_TRUE; 8922 } 8923 8924 /* 8925 * If there is a policy, try to attach an ipsec_out in 8926 * the front. At the end, first_mp either points to a 8927 * M_DATA message or IPSEC_OUT message linked to a 8928 * M_DATA message. We have to do it now as we might 8929 * lose the "conn" if we go through ip_newroute. 8930 */ 8931 if (!mctl_present && 8932 (connp->conn_out_enforce_policy || 8933 connp->conn_latch != NULL)) { 8934 ASSERT(first_mp == mp); 8935 /* XXX Any better way to get the protocol fast ? */ 8936 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 8937 connp->conn_ulp)) == NULL)) { 8938 if (need_decref) 8939 CONN_DEC_REF(connp); 8940 return; 8941 } else { 8942 ASSERT(mp->b_datap->db_type == M_CTL); 8943 first_mp = mp; 8944 mp = mp->b_cont; 8945 mctl_present = B_TRUE; 8946 io = (ipsec_out_t *)first_mp->b_rptr; 8947 } 8948 } 8949 } 8950 8951 /* check for alignment and full IPv6 header */ 8952 if (!OK_32PTR((uchar_t *)ip6h) || 8953 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 8954 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 8955 if (do_outrequests) 8956 BUMP_MIB(mibptr, ipv6OutRequests); 8957 BUMP_MIB(mibptr, ipv6OutDiscards); 8958 freemsg(first_mp); 8959 if (ill != NULL) 8960 ill_refrele(ill); 8961 if (need_decref) 8962 CONN_DEC_REF(connp); 8963 return; 8964 } 8965 v6dstp = &ip6h->ip6_dst; 8966 cksum_request = -1; 8967 ip6i = NULL; 8968 8969 /* 8970 * Once neighbor discovery has completed, ndp_process() will provide 8971 * locally generated packets for which processing can be reattempted. 8972 * In these cases, connp is NULL and the original zone is part of a 8973 * prepended ipsec_out_t. 8974 */ 8975 if (io != NULL) { 8976 zoneid = io->ipsec_out_zoneid; 8977 ASSERT(zoneid != ALL_ZONES); 8978 } else { 8979 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 8980 } 8981 8982 if (ip6h->ip6_nxt == IPPROTO_RAW) { 8983 /* 8984 * This is an ip6i_t header followed by an ip6_hdr. 8985 * Check which fields are set. 8986 * 8987 * When the packet comes from a transport we should have 8988 * all needed headers in the first mblk. However, when 8989 * going through ip_newroute*_v6 the ip6i might be in 8990 * a separate mblk when we return here. In that case 8991 * we pullup everything to ensure that extension and transport 8992 * headers "stay" in the first mblk. 8993 */ 8994 ip6i = (ip6i_t *)ip6h; 8995 ip6i_flags = ip6i->ip6i_flags; 8996 8997 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 8998 ((mp->b_wptr - (uchar_t *)ip6i) >= 8999 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9000 9001 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9002 if (!pullupmsg(mp, -1)) { 9003 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9004 if (do_outrequests) 9005 BUMP_MIB(mibptr, ipv6OutRequests); 9006 BUMP_MIB(mibptr, ipv6OutDiscards); 9007 freemsg(first_mp); 9008 if (ill != NULL) 9009 ill_refrele(ill); 9010 if (need_decref) 9011 CONN_DEC_REF(connp); 9012 return; 9013 } 9014 ip6h = (ip6_t *)mp->b_rptr; 9015 v6dstp = &ip6h->ip6_dst; 9016 ip6i = (ip6i_t *)ip6h; 9017 } 9018 ip6h = (ip6_t *)&ip6i[1]; 9019 9020 /* 9021 * Advance rptr past the ip6i_t to get ready for 9022 * transmitting the packet. However, if the packet gets 9023 * passed to ip_newroute*_v6 then rptr is moved back so 9024 * that the ip6i_t header can be inspected when the 9025 * packet comes back here after passing through 9026 * ire_add_then_send. 9027 */ 9028 mp->b_rptr = (uchar_t *)ip6h; 9029 9030 /* 9031 * IP6I_ATTACH_IF is set in this function when we had a 9032 * conn and it was either bound to the IPFF_NOFAILOVER address 9033 * or IPV6_BOUND_PIF was set. These options override other 9034 * options that set the ifindex. We come here with 9035 * IP6I_ATTACH_IF set when we can't find the ire and 9036 * ip_newroute_v6 is feeding the packet for second time. 9037 */ 9038 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9039 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9040 ASSERT(ip6i->ip6i_ifindex != 0); 9041 if (ill != NULL) 9042 ill_refrele(ill); 9043 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9044 NULL, NULL, NULL, NULL); 9045 if (ill == NULL) { 9046 if (do_outrequests) 9047 BUMP_MIB(mibptr, ipv6OutRequests); 9048 BUMP_MIB(mibptr, ipv6OutDiscards); 9049 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9050 ip6i->ip6i_ifindex)); 9051 if (need_decref) 9052 CONN_DEC_REF(connp); 9053 freemsg(first_mp); 9054 return; 9055 } 9056 mibptr = ill->ill_ip6_mib; 9057 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9058 /* 9059 * Preserve the index so that when we return 9060 * from IPSEC processing, we know where to 9061 * send the packet. 9062 */ 9063 if (mctl_present) { 9064 ASSERT(io != NULL); 9065 io->ipsec_out_ill_index = 9066 ip6i->ip6i_ifindex; 9067 } 9068 } 9069 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9070 /* 9071 * This is a multipathing probe packet that has 9072 * been delayed in ND resolution. Drop the 9073 * packet for the reasons mentioned in 9074 * nce_queue_mp() 9075 */ 9076 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9077 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9078 freemsg(first_mp); 9079 ill_refrele(ill); 9080 if (need_decref) 9081 CONN_DEC_REF(connp); 9082 return; 9083 } 9084 } 9085 } 9086 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9087 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9088 9089 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9090 if (secpolicy_net_rawaccess(cr) != 0) { 9091 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9092 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9093 NULL, zoneid, 9094 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9095 if (ire == NULL) { 9096 if (do_outrequests) 9097 BUMP_MIB(mibptr, 9098 ipv6OutRequests); 9099 BUMP_MIB(mibptr, ipv6OutDiscards); 9100 ip1dbg(("ip_wput_v6: bad source " 9101 "addr\n")); 9102 freemsg(first_mp); 9103 if (ill != NULL) 9104 ill_refrele(ill); 9105 if (need_decref) 9106 CONN_DEC_REF(connp); 9107 return; 9108 } 9109 ire_refrele(ire); 9110 } 9111 /* No need to verify again when using ip_newroute */ 9112 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9113 } 9114 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9115 /* 9116 * Make sure they match since ip_newroute*_v6 etc might 9117 * (unknown to them) inspect ip6i_nexthop when 9118 * they think they access ip6_dst. 9119 */ 9120 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9121 } 9122 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9123 cksum_request = 1; 9124 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9125 cksum_request = ip6i->ip6i_checksum_off; 9126 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9127 unspec_src = 1; 9128 9129 if (do_outrequests && ill != NULL) { 9130 BUMP_MIB(mibptr, ipv6OutRequests); 9131 do_outrequests = B_FALSE; 9132 } 9133 /* 9134 * Store ip6i_t info that we need after we come back 9135 * from IPSEC processing. 9136 */ 9137 if (mctl_present) { 9138 ASSERT(io != NULL); 9139 io->ipsec_out_unspec_src = unspec_src; 9140 } 9141 } 9142 if (connp != NULL && connp->conn_dontroute) 9143 ip6h->ip6_hops = 1; 9144 9145 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9146 goto ipv6multicast; 9147 9148 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9149 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9150 ill_t *conn_outgoing_pill; 9151 9152 conn_outgoing_pill = conn_get_held_ill(connp, 9153 &connp->conn_outgoing_pill, &err); 9154 if (err == ILL_LOOKUP_FAILED) { 9155 if (ill != NULL) 9156 ill_refrele(ill); 9157 if (need_decref) 9158 CONN_DEC_REF(connp); 9159 freemsg(first_mp); 9160 return; 9161 } 9162 if (conn_outgoing_pill != NULL) { 9163 if (ill != NULL) 9164 ill_refrele(ill); 9165 ill = conn_outgoing_pill; 9166 attach_if = B_TRUE; 9167 match_flags = MATCH_IRE_ILL; 9168 mibptr = ill->ill_ip6_mib; 9169 9170 /* 9171 * Check if we need an ire that will not be 9172 * looked up by anybody else i.e. HIDDEN. 9173 */ 9174 if (ill_is_probeonly(ill)) 9175 match_flags |= MATCH_IRE_MARK_HIDDEN; 9176 goto send_from_ill; 9177 } 9178 } 9179 9180 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9181 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9182 ill_t *conn_nofailover_ill; 9183 9184 conn_nofailover_ill = conn_get_held_ill(connp, 9185 &connp->conn_nofailover_ill, &err); 9186 if (err == ILL_LOOKUP_FAILED) { 9187 if (ill != NULL) 9188 ill_refrele(ill); 9189 if (need_decref) 9190 CONN_DEC_REF(connp); 9191 freemsg(first_mp); 9192 return; 9193 } 9194 if (conn_nofailover_ill != NULL) { 9195 if (ill != NULL) 9196 ill_refrele(ill); 9197 ill = conn_nofailover_ill; 9198 attach_if = B_TRUE; 9199 /* 9200 * Assumes that ipc_nofailover_ill is used only for 9201 * multipathing probe packets. These packets are better 9202 * dropped, if they are delayed in ND resolution, for 9203 * the reasons described in nce_queue_mp(). 9204 * IP6I_DROP_IFDELAYED will be set later on in this 9205 * function for this packet. 9206 */ 9207 drop_if_delayed = B_TRUE; 9208 match_flags = MATCH_IRE_ILL; 9209 mibptr = ill->ill_ip6_mib; 9210 9211 /* 9212 * Check if we need an ire that will not be 9213 * looked up by anybody else i.e. HIDDEN. 9214 */ 9215 if (ill_is_probeonly(ill)) 9216 match_flags |= MATCH_IRE_MARK_HIDDEN; 9217 goto send_from_ill; 9218 } 9219 } 9220 9221 /* 9222 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9223 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9224 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9225 */ 9226 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9227 ASSERT(ip6i->ip6i_ifindex != 0); 9228 attach_if = B_TRUE; 9229 ASSERT(ill != NULL); 9230 match_flags = MATCH_IRE_ILL; 9231 9232 /* 9233 * Check if we need an ire that will not be 9234 * looked up by anybody else i.e. HIDDEN. 9235 */ 9236 if (ill_is_probeonly(ill)) 9237 match_flags |= MATCH_IRE_MARK_HIDDEN; 9238 goto send_from_ill; 9239 } 9240 9241 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9242 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9243 ASSERT(ill != NULL); 9244 goto send_from_ill; 9245 } 9246 9247 /* 9248 * 4. If q is an ill queue and (link local or multicast destination) 9249 * then use that ill. 9250 */ 9251 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9252 goto send_from_ill; 9253 } 9254 9255 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9256 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9257 ill_t *conn_outgoing_ill; 9258 9259 conn_outgoing_ill = conn_get_held_ill(connp, 9260 &connp->conn_outgoing_ill, &err); 9261 if (err == ILL_LOOKUP_FAILED) { 9262 if (ill != NULL) 9263 ill_refrele(ill); 9264 if (need_decref) 9265 CONN_DEC_REF(connp); 9266 freemsg(first_mp); 9267 return; 9268 } 9269 if (ill != NULL) 9270 ill_refrele(ill); 9271 ill = conn_outgoing_ill; 9272 mibptr = ill->ill_ip6_mib; 9273 goto send_from_ill; 9274 } 9275 9276 /* 9277 * 6. For unicast: Just do an IRE lookup for the best match. 9278 * If we get here for a link-local address it is rather random 9279 * what interface we pick on a multihomed host. 9280 * *If* there is an IRE_CACHE (and the link-local address 9281 * isn't duplicated on multi links) this will find the IRE_CACHE. 9282 * Otherwise it will use one of the matching IRE_INTERFACE routes 9283 * for the link-local prefix. Hence, applications 9284 * *should* be encouraged to specify an outgoing interface when sending 9285 * to a link local address. 9286 */ 9287 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9288 !connp->conn_fully_bound)) { 9289 /* 9290 * We cache IRE_CACHEs to avoid lookups. We don't do 9291 * this for the tcp global queue and listen end point 9292 * as it does not really have a real destination to 9293 * talk to. 9294 */ 9295 ire = ire_cache_lookup_v6(v6dstp, zoneid); 9296 } else { 9297 /* 9298 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9299 * grab a lock here to check for CONDEMNED as it is okay 9300 * to send a packet or two with the IRE_CACHE that is going 9301 * away. 9302 */ 9303 mutex_enter(&connp->conn_lock); 9304 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9305 if (ire != NULL && 9306 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9307 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9308 9309 IRE_REFHOLD(ire); 9310 mutex_exit(&connp->conn_lock); 9311 9312 } else { 9313 boolean_t cached = B_FALSE; 9314 9315 connp->conn_ire_cache = NULL; 9316 mutex_exit(&connp->conn_lock); 9317 /* Release the old ire */ 9318 if (ire != NULL && sctp_ire == NULL) 9319 IRE_REFRELE_NOTR(ire); 9320 9321 ire = (ire_t *)ire_cache_lookup_v6(v6dstp, zoneid); 9322 if (ire != NULL) { 9323 IRE_REFHOLD_NOTR(ire); 9324 9325 mutex_enter(&connp->conn_lock); 9326 if (!(connp->conn_state_flags & CONN_CLOSING) && 9327 (connp->conn_ire_cache == NULL)) { 9328 rw_enter(&ire->ire_bucket->irb_lock, 9329 RW_READER); 9330 if (!(ire->ire_marks & 9331 IRE_MARK_CONDEMNED)) { 9332 connp->conn_ire_cache = ire; 9333 cached = B_TRUE; 9334 } 9335 rw_exit(&ire->ire_bucket->irb_lock); 9336 } 9337 mutex_exit(&connp->conn_lock); 9338 9339 /* 9340 * We can continue to use the ire but since it 9341 * was not cached, we should drop the extra 9342 * reference. 9343 */ 9344 if (!cached) 9345 IRE_REFRELE_NOTR(ire); 9346 } 9347 } 9348 } 9349 9350 if (ire != NULL) { 9351 if (do_outrequests) { 9352 /* Handle IRE_LOCAL's that might appear here */ 9353 if (ire->ire_type == IRE_CACHE) { 9354 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9355 ill_ip6_mib; 9356 } else { 9357 mibptr = ire->ire_ipif->ipif_ill->ill_ip6_mib; 9358 } 9359 BUMP_MIB(mibptr, ipv6OutRequests); 9360 } 9361 ASSERT(!attach_if); 9362 9363 /* 9364 * Check if the ire has the RTF_MULTIRT flag, inherited 9365 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9366 */ 9367 if (ire->ire_flags & RTF_MULTIRT) { 9368 /* 9369 * Force hop limit of multirouted packets if required. 9370 * The hop limit of such packets is bounded by the 9371 * ip_multirt_ttl ndd variable. 9372 * NDP packets must have a hop limit of 255; don't 9373 * change the hop limit in that case. 9374 */ 9375 if ((ip_multirt_ttl > 0) && 9376 (ip6h->ip6_hops > ip_multirt_ttl) && 9377 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9378 if (ip_debug > 3) { 9379 ip2dbg(("ip_wput_v6: forcing multirt " 9380 "hop limit to %d (was %d) ", 9381 ip_multirt_ttl, ip6h->ip6_hops)); 9382 pr_addr_dbg("v6dst %s\n", AF_INET6, 9383 &ire->ire_addr_v6); 9384 } 9385 ip6h->ip6_hops = ip_multirt_ttl; 9386 } 9387 9388 /* 9389 * We look at this point if there are pending 9390 * unresolved routes. ire_multirt_need_resolve_v6() 9391 * checks in O(n) that all IRE_OFFSUBNET ire 9392 * entries for the packet's destination and 9393 * flagged RTF_MULTIRT are currently resolved. 9394 * If some remain unresolved, we do a copy 9395 * of the current message. It will be used 9396 * to initiate additional route resolutions. 9397 */ 9398 multirt_need_resolve = 9399 ire_multirt_need_resolve_v6(&ire->ire_addr_v6); 9400 ip2dbg(("ip_wput_v6: ire %p, " 9401 "multirt_need_resolve %d, first_mp %p\n", 9402 (void *)ire, multirt_need_resolve, 9403 (void *)first_mp)); 9404 if (multirt_need_resolve) { 9405 copy_mp = copymsg(first_mp); 9406 if (copy_mp != NULL) { 9407 MULTIRT_DEBUG_TAG(copy_mp); 9408 } 9409 } 9410 } 9411 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9412 connp, caller, 0, ip6i_flags); 9413 if (need_decref) { 9414 CONN_DEC_REF(connp); 9415 connp = NULL; 9416 } 9417 IRE_REFRELE(ire); 9418 9419 /* 9420 * Try to resolve another multiroute if 9421 * ire_multirt_need_resolve_v6() deemed it necessary. 9422 * copy_mp will be consumed (sent or freed) by 9423 * ip_newroute_v6(). 9424 */ 9425 if (copy_mp != NULL) { 9426 if (mctl_present) { 9427 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9428 } else { 9429 ip6h = (ip6_t *)copy_mp->b_rptr; 9430 } 9431 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9432 &ip6h->ip6_src, NULL, zoneid); 9433 } 9434 if (ill != NULL) 9435 ill_refrele(ill); 9436 return; 9437 } 9438 9439 /* 9440 * No full IRE for this destination. Send it to 9441 * ip_newroute_v6 to see if anything else matches. 9442 * Mark this packet as having originated on this 9443 * machine. 9444 * Update rptr if there was an ip6i_t header. 9445 */ 9446 mp->b_prev = NULL; 9447 mp->b_next = NULL; 9448 if (ip6i != NULL) 9449 mp->b_rptr -= sizeof (ip6i_t); 9450 9451 if (unspec_src) { 9452 if (ip6i == NULL) { 9453 /* 9454 * Add ip6i_t header to carry unspec_src 9455 * until the packet comes back in ip_wput_v6. 9456 */ 9457 mp = ip_add_info_v6(mp, NULL, v6dstp); 9458 if (mp == NULL) { 9459 if (do_outrequests) 9460 BUMP_MIB(mibptr, ipv6OutRequests); 9461 BUMP_MIB(mibptr, ipv6OutDiscards); 9462 if (mctl_present) 9463 freeb(first_mp); 9464 if (ill != NULL) 9465 ill_refrele(ill); 9466 if (need_decref) 9467 CONN_DEC_REF(connp); 9468 return; 9469 } 9470 ip6i = (ip6i_t *)mp->b_rptr; 9471 9472 if (mctl_present) { 9473 ASSERT(first_mp != mp); 9474 first_mp->b_cont = mp; 9475 } else { 9476 first_mp = mp; 9477 } 9478 9479 if ((mp->b_wptr - (uchar_t *)ip6i) == 9480 sizeof (ip6i_t)) { 9481 /* 9482 * ndp_resolver called from ip_newroute_v6 9483 * expects pulled up message. 9484 */ 9485 if (!pullupmsg(mp, -1)) { 9486 ip1dbg(("ip_wput_v6: pullupmsg" 9487 " failed\n")); 9488 if (do_outrequests) { 9489 BUMP_MIB(mibptr, 9490 ipv6OutRequests); 9491 } 9492 BUMP_MIB(mibptr, ipv6OutDiscards); 9493 freemsg(first_mp); 9494 if (ill != NULL) 9495 ill_refrele(ill); 9496 if (need_decref) 9497 CONN_DEC_REF(connp); 9498 return; 9499 } 9500 ip6i = (ip6i_t *)mp->b_rptr; 9501 } 9502 ip6h = (ip6_t *)&ip6i[1]; 9503 v6dstp = &ip6h->ip6_dst; 9504 } 9505 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9506 if (mctl_present) { 9507 ASSERT(io != NULL); 9508 io->ipsec_out_unspec_src = unspec_src; 9509 } 9510 } 9511 if (do_outrequests) 9512 BUMP_MIB(mibptr, ipv6OutRequests); 9513 if (need_decref) 9514 CONN_DEC_REF(connp); 9515 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 9516 if (ill != NULL) 9517 ill_refrele(ill); 9518 return; 9519 9520 9521 /* 9522 * Handle multicast packets with or without an conn. 9523 * Assumes that the transports set ip6_hops taking 9524 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9525 * into account. 9526 */ 9527 ipv6multicast: 9528 ip2dbg(("ip_wput_v6: multicast\n")); 9529 9530 /* 9531 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 9532 * 2. If conn_nofailover_ill is set then use that ill. 9533 * 9534 * Hold the conn_lock till we refhold the ill of interest that is 9535 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9536 * while holding any locks, postpone the refrele until after the 9537 * conn_lock is dropped. 9538 */ 9539 if (connp != NULL) { 9540 mutex_enter(&connp->conn_lock); 9541 conn_lock_held = B_TRUE; 9542 } else { 9543 conn_lock_held = B_FALSE; 9544 } 9545 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9546 err = ill_check_and_refhold(connp->conn_outgoing_pill); 9547 if (err == ILL_LOOKUP_FAILED) { 9548 ip1dbg(("ip_output_v6: multicast" 9549 " conn_outgoing_pill no ipif\n")); 9550 multicast_discard: 9551 ASSERT(saved_ill == NULL); 9552 if (conn_lock_held) 9553 mutex_exit(&connp->conn_lock); 9554 if (ill != NULL) 9555 ill_refrele(ill); 9556 freemsg(first_mp); 9557 if (do_outrequests) 9558 BUMP_MIB(mibptr, ipv6OutDiscards); 9559 if (need_decref) 9560 CONN_DEC_REF(connp); 9561 return; 9562 } 9563 saved_ill = ill; 9564 ill = connp->conn_outgoing_pill; 9565 attach_if = B_TRUE; 9566 match_flags = MATCH_IRE_ILL; 9567 mibptr = ill->ill_ip6_mib; 9568 9569 /* 9570 * Check if we need an ire that will not be 9571 * looked up by anybody else i.e. HIDDEN. 9572 */ 9573 if (ill_is_probeonly(ill)) 9574 match_flags |= MATCH_IRE_MARK_HIDDEN; 9575 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9576 err = ill_check_and_refhold(connp->conn_nofailover_ill); 9577 if (err == ILL_LOOKUP_FAILED) { 9578 ip1dbg(("ip_output_v6: multicast" 9579 " conn_nofailover_ill no ipif\n")); 9580 goto multicast_discard; 9581 } 9582 saved_ill = ill; 9583 ill = connp->conn_nofailover_ill; 9584 attach_if = B_TRUE; 9585 match_flags = MATCH_IRE_ILL; 9586 9587 /* 9588 * Check if we need an ire that will not be 9589 * looked up by anybody else i.e. HIDDEN. 9590 */ 9591 if (ill_is_probeonly(ill)) 9592 match_flags |= MATCH_IRE_MARK_HIDDEN; 9593 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9594 /* 9595 * Redo 1. If we did not find an IRE_CACHE the first time, 9596 * we should have an ip6i_t with IP6I_ATTACH_IF if 9597 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 9598 * used on this endpoint. 9599 */ 9600 ASSERT(ip6i->ip6i_ifindex != 0); 9601 attach_if = B_TRUE; 9602 ASSERT(ill != NULL); 9603 match_flags = MATCH_IRE_ILL; 9604 9605 /* 9606 * Check if we need an ire that will not be 9607 * looked up by anybody else i.e. HIDDEN. 9608 */ 9609 if (ill_is_probeonly(ill)) 9610 match_flags |= MATCH_IRE_MARK_HIDDEN; 9611 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9612 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9613 9614 ASSERT(ill != NULL); 9615 } else if (ill != NULL) { 9616 /* 9617 * 4. If q is an ill queue and (link local or multicast 9618 * destination) then use that ill. 9619 * We don't need the ipif initialization here. 9620 * This useless assert below is just to prevent lint from 9621 * reporting a null body if statement. 9622 */ 9623 ASSERT(ill != NULL); 9624 } else if (connp != NULL) { 9625 /* 9626 * 5. If IPV6_BOUND_IF has been set use that ill. 9627 * 9628 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 9629 * Otherwise look for the best IRE match for the unspecified 9630 * group to determine the ill. 9631 * 9632 * conn_multicast_ill is used for only IPv6 packets. 9633 * conn_multicast_ipif is used for only IPv4 packets. 9634 * Thus a PF_INET6 socket send both IPv4 and IPv6 9635 * multicast packets using different IP*_MULTICAST_IF 9636 * interfaces. 9637 */ 9638 if (connp->conn_outgoing_ill != NULL) { 9639 err = ill_check_and_refhold(connp->conn_outgoing_ill); 9640 if (err == ILL_LOOKUP_FAILED) { 9641 ip1dbg(("ip_output_v6: multicast" 9642 " conn_outgoing_ill no ipif\n")); 9643 goto multicast_discard; 9644 } 9645 ill = connp->conn_outgoing_ill; 9646 } else if (connp->conn_multicast_ill != NULL) { 9647 err = ill_check_and_refhold(connp->conn_multicast_ill); 9648 if (err == ILL_LOOKUP_FAILED) { 9649 ip1dbg(("ip_output_v6: multicast" 9650 " conn_multicast_ill no ipif\n")); 9651 goto multicast_discard; 9652 } 9653 ill = connp->conn_multicast_ill; 9654 } else { 9655 mutex_exit(&connp->conn_lock); 9656 conn_lock_held = B_FALSE; 9657 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 9658 if (ipif == NULL) { 9659 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9660 goto multicast_discard; 9661 } 9662 /* 9663 * We have a ref to this ipif, so we can safely 9664 * access ipif_ill. 9665 */ 9666 ill = ipif->ipif_ill; 9667 mutex_enter(&ill->ill_lock); 9668 if (!ILL_CAN_LOOKUP(ill)) { 9669 mutex_exit(&ill->ill_lock); 9670 ipif_refrele(ipif); 9671 ill = NULL; 9672 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9673 goto multicast_discard; 9674 } 9675 ill_refhold_locked(ill); 9676 mutex_exit(&ill->ill_lock); 9677 ipif_refrele(ipif); 9678 /* 9679 * Save binding until IPV6_MULTICAST_IF 9680 * changes it 9681 */ 9682 mutex_enter(&connp->conn_lock); 9683 connp->conn_multicast_ill = ill; 9684 connp->conn_orig_multicast_ifindex = 9685 ill->ill_phyint->phyint_ifindex; 9686 mutex_exit(&connp->conn_lock); 9687 } 9688 } 9689 if (conn_lock_held) 9690 mutex_exit(&connp->conn_lock); 9691 9692 if (saved_ill != NULL) 9693 ill_refrele(saved_ill); 9694 9695 ASSERT(ill != NULL); 9696 /* 9697 * For multicast loopback interfaces replace the multicast address 9698 * with a unicast address for the ire lookup. 9699 */ 9700 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 9701 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 9702 9703 mibptr = ill->ill_ip6_mib; 9704 if (do_outrequests) { 9705 BUMP_MIB(mibptr, ipv6OutRequests); 9706 do_outrequests = B_FALSE; 9707 } 9708 BUMP_MIB(mibptr, ipv6OutMcastPkts); 9709 9710 /* 9711 * As we may lose the conn by the time we reach ip_wput_ire_v6 9712 * we copy conn_multicast_loop and conn_dontroute on to an 9713 * ipsec_out. In case if this datagram goes out secure, 9714 * we need the ill_index also. Copy that also into the 9715 * ipsec_out. 9716 */ 9717 if (mctl_present) { 9718 io = (ipsec_out_t *)first_mp->b_rptr; 9719 ASSERT(first_mp->b_datap->db_type == M_CTL); 9720 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9721 } else { 9722 ASSERT(mp == first_mp); 9723 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 9724 BUMP_MIB(mibptr, ipv6OutDiscards); 9725 freemsg(mp); 9726 if (ill != NULL) 9727 ill_refrele(ill); 9728 if (need_decref) 9729 CONN_DEC_REF(connp); 9730 return; 9731 } 9732 io = (ipsec_out_t *)first_mp->b_rptr; 9733 /* This is not a secure packet */ 9734 io->ipsec_out_secure = B_FALSE; 9735 io->ipsec_out_use_global_policy = B_TRUE; 9736 io->ipsec_out_zoneid = 9737 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 9738 first_mp->b_cont = mp; 9739 mctl_present = B_TRUE; 9740 } 9741 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9742 io->ipsec_out_unspec_src = unspec_src; 9743 if (connp != NULL) 9744 io->ipsec_out_dontroute = connp->conn_dontroute; 9745 9746 send_from_ill: 9747 ASSERT(ill != NULL); 9748 ASSERT(mibptr == ill->ill_ip6_mib); 9749 if (do_outrequests) { 9750 BUMP_MIB(mibptr, ipv6OutRequests); 9751 do_outrequests = B_FALSE; 9752 } 9753 9754 if (io != NULL) 9755 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9756 9757 /* 9758 * When a specific ill is specified (using IPV6_PKTINFO, 9759 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 9760 * on routing entries (ftable and ctable) that have a matching 9761 * ire->ire_ipif->ipif_ill. Thus this can only be used 9762 * for destinations that are on-link for the specific ill 9763 * and that can appear on multiple links. Thus it is useful 9764 * for multicast destinations, link-local destinations, and 9765 * at some point perhaps for site-local destinations (if the 9766 * node sits at a site boundary). 9767 * We create the cache entries in the regular ctable since 9768 * it can not "confuse" things for other destinations. 9769 * table. 9770 * 9771 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 9772 * It is used only when ire_cache_lookup is used above. 9773 */ 9774 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 9775 zoneid, match_flags); 9776 if (ire != NULL) { 9777 /* 9778 * Check if the ire has the RTF_MULTIRT flag, inherited 9779 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9780 */ 9781 if (ire->ire_flags & RTF_MULTIRT) { 9782 /* 9783 * Force hop limit of multirouted packets if required. 9784 * The hop limit of such packets is bounded by the 9785 * ip_multirt_ttl ndd variable. 9786 * NDP packets must have a hop limit of 255; don't 9787 * change the hop limit in that case. 9788 */ 9789 if ((ip_multirt_ttl > 0) && 9790 (ip6h->ip6_hops > ip_multirt_ttl) && 9791 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9792 if (ip_debug > 3) { 9793 ip2dbg(("ip_wput_v6: forcing multirt " 9794 "hop limit to %d (was %d) ", 9795 ip_multirt_ttl, ip6h->ip6_hops)); 9796 pr_addr_dbg("v6dst %s\n", AF_INET6, 9797 &ire->ire_addr_v6); 9798 } 9799 ip6h->ip6_hops = ip_multirt_ttl; 9800 } 9801 9802 /* 9803 * We look at this point if there are pending 9804 * unresolved routes. ire_multirt_need_resolve_v6() 9805 * checks in O(n) that all IRE_OFFSUBNET ire 9806 * entries for the packet's destination and 9807 * flagged RTF_MULTIRT are currently resolved. 9808 * If some remain unresolved, we make a copy 9809 * of the current message. It will be used 9810 * to initiate additional route resolutions. 9811 */ 9812 multirt_need_resolve = 9813 ire_multirt_need_resolve_v6(&ire->ire_addr_v6); 9814 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 9815 "multirt_need_resolve %d, first_mp %p\n", 9816 (void *)ire, multirt_need_resolve, 9817 (void *)first_mp)); 9818 if (multirt_need_resolve) { 9819 copy_mp = copymsg(first_mp); 9820 if (copy_mp != NULL) { 9821 MULTIRT_DEBUG_TAG(copy_mp); 9822 } 9823 } 9824 } 9825 9826 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 9827 ill->ill_name, (void *)ire, 9828 ill->ill_phyint->phyint_ifindex)); 9829 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9830 connp, caller, 9831 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 9832 ip6i_flags); 9833 ire_refrele(ire); 9834 if (need_decref) { 9835 CONN_DEC_REF(connp); 9836 connp = NULL; 9837 } 9838 9839 /* 9840 * Try to resolve another multiroute if 9841 * ire_multirt_need_resolve_v6() deemed it necessary. 9842 * copy_mp will be consumed (sent or freed) by 9843 * ip_newroute_[ipif_]v6(). 9844 */ 9845 if (copy_mp != NULL) { 9846 if (mctl_present) { 9847 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9848 } else { 9849 ip6h = (ip6_t *)copy_mp->b_rptr; 9850 } 9851 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 9852 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 9853 zoneid); 9854 if (ipif == NULL) { 9855 ip1dbg(("ip_wput_v6: No ipif for " 9856 "multicast\n")); 9857 MULTIRT_DEBUG_UNTAG(copy_mp); 9858 freemsg(copy_mp); 9859 return; 9860 } 9861 ip_newroute_ipif_v6(q, copy_mp, ipif, 9862 ip6h->ip6_dst, unspec_src, zoneid); 9863 ipif_refrele(ipif); 9864 } else { 9865 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9866 &ip6h->ip6_src, ill, zoneid); 9867 } 9868 } 9869 if (ill != NULL) 9870 ill_refrele(ill); 9871 return; 9872 } 9873 if (need_decref) { 9874 CONN_DEC_REF(connp); 9875 connp = NULL; 9876 } 9877 9878 /* Update rptr if there was an ip6i_t header. */ 9879 if (ip6i != NULL) 9880 mp->b_rptr -= sizeof (ip6i_t); 9881 if (unspec_src || attach_if) { 9882 if (ip6i == NULL) { 9883 /* 9884 * Add ip6i_t header to carry unspec_src 9885 * or attach_if until the packet comes back in 9886 * ip_wput_v6. 9887 */ 9888 if (mctl_present) { 9889 first_mp->b_cont = 9890 ip_add_info_v6(mp, NULL, v6dstp); 9891 mp = first_mp->b_cont; 9892 if (mp == NULL) 9893 freeb(first_mp); 9894 } else { 9895 first_mp = mp = ip_add_info_v6(mp, NULL, 9896 v6dstp); 9897 } 9898 if (mp == NULL) { 9899 BUMP_MIB(mibptr, ipv6OutDiscards); 9900 if (ill != NULL) 9901 ill_refrele(ill); 9902 return; 9903 } 9904 ip6i = (ip6i_t *)mp->b_rptr; 9905 if ((mp->b_wptr - (uchar_t *)ip6i) == 9906 sizeof (ip6i_t)) { 9907 /* 9908 * ndp_resolver called from ip_newroute_v6 9909 * expects a pulled up message. 9910 */ 9911 if (!pullupmsg(mp, -1)) { 9912 ip1dbg(("ip_wput_v6: pullupmsg" 9913 " failed\n")); 9914 BUMP_MIB(mibptr, ipv6OutDiscards); 9915 freemsg(first_mp); 9916 return; 9917 } 9918 ip6i = (ip6i_t *)mp->b_rptr; 9919 } 9920 ip6h = (ip6_t *)&ip6i[1]; 9921 v6dstp = &ip6h->ip6_dst; 9922 } 9923 if (unspec_src) 9924 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9925 if (attach_if) { 9926 /* 9927 * Bind to nofailover/BOUND_PIF overrides ifindex. 9928 */ 9929 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 9930 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 9931 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 9932 if (drop_if_delayed) { 9933 /* This is a multipathing probe packet */ 9934 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 9935 } 9936 } 9937 if (mctl_present) { 9938 ASSERT(io != NULL); 9939 io->ipsec_out_unspec_src = unspec_src; 9940 } 9941 } 9942 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 9943 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 9944 unspec_src, zoneid); 9945 } else { 9946 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 9947 zoneid); 9948 } 9949 if (ill != NULL) 9950 ill_refrele(ill); 9951 return; 9952 9953 notv6: 9954 /* 9955 * XXX implement a IPv4 and IPv6 packet counter per conn and 9956 * switch when ratio exceeds e.g. 10:1 9957 */ 9958 if (q->q_next == NULL) { 9959 connp = Q_TO_CONN(q); 9960 9961 if (IS_TCP_CONN(connp)) { 9962 /* change conn_send for the tcp_v4_connections */ 9963 connp->conn_send = ip_output; 9964 } else if (connp->conn_ulp == IPPROTO_SCTP) { 9965 /* The 'q' is the default SCTP queue */ 9966 connp = (conn_t *)arg; 9967 } else { 9968 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 9969 } 9970 } 9971 BUMP_MIB(mibptr, ipv6OutIPv4); 9972 (void) ip_output(connp, first_mp, q, caller); 9973 if (ill != NULL) 9974 ill_refrele(ill); 9975 } 9976 9977 static void 9978 ip_wput_v6(queue_t *q, mblk_t *mp) 9979 { 9980 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 9981 } 9982 9983 static void 9984 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 9985 { 9986 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9987 io->ipsec_out_attach_if = B_TRUE; 9988 io->ipsec_out_ill_index = attach_index; 9989 } 9990 9991 /* 9992 * NULL send-to queue - packet is to be delivered locally. 9993 */ 9994 void 9995 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 9996 ire_t *ire, int fanout_flags) 9997 { 9998 uint32_t ports; 9999 mblk_t *mp = first_mp, *first_mp1; 10000 boolean_t mctl_present; 10001 uint8_t nexthdr; 10002 uint16_t hdr_length = IPV6_HDR_LEN; 10003 ipsec_out_t *io; 10004 mib2_ipv6IfStatsEntry_t *mibptr; 10005 ilm_t *ilm; 10006 10007 if (DB_TYPE(mp) == M_CTL) { 10008 io = (ipsec_out_t *)mp->b_rptr; 10009 if (!io->ipsec_out_secure) { 10010 mp = mp->b_cont; 10011 freeb(first_mp); 10012 first_mp = mp; 10013 mctl_present = B_FALSE; 10014 } else { 10015 mctl_present = B_TRUE; 10016 mp = first_mp->b_cont; 10017 ipsec_out_to_in(first_mp); 10018 } 10019 } else { 10020 mctl_present = B_FALSE; 10021 } 10022 10023 nexthdr = ip6h->ip6_nxt; 10024 mibptr = ill->ill_ip6_mib; 10025 10026 UPDATE_OB_PKT_COUNT(ire); 10027 ire->ire_last_used_time = lbolt; 10028 10029 /* 10030 * Remove reacability confirmation bit from version field 10031 * before looping back the packet. 10032 */ 10033 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10034 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10035 } 10036 10037 switch (nexthdr) { 10038 case IPPROTO_TCP: 10039 if (DB_TYPE(mp) == M_DATA) { 10040 /* 10041 * M_DATA mblk, so init mblk (chain) for 10042 * no struio(). 10043 */ 10044 mblk_t *mp1 = mp; 10045 10046 do { 10047 mp1->b_datap->db_struioflag = 0; 10048 } while ((mp1 = mp1->b_cont) != NULL); 10049 } 10050 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10051 TCP_PORTS_OFFSET); 10052 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10053 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10054 IP_FF_IP6INFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10055 hdr_length, mctl_present, ire->ire_zoneid); 10056 return; 10057 10058 case IPPROTO_UDP: 10059 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10060 UDP_PORTS_OFFSET); 10061 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10062 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO| 10063 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10064 return; 10065 10066 case IPPROTO_SCTP: 10067 { 10068 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10069 10070 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10071 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10072 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO, 10073 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10074 ire->ire_zoneid); 10075 return; 10076 } 10077 case IPPROTO_ICMPV6: { 10078 icmp6_t *icmp6; 10079 10080 /* check for full IPv6+ICMPv6 header */ 10081 if ((mp->b_wptr - mp->b_rptr) < 10082 (hdr_length + ICMP6_MINLEN)) { 10083 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10084 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10085 " failed\n")); 10086 BUMP_MIB(mibptr, ipv6OutDiscards); 10087 freemsg(first_mp); 10088 return; 10089 } 10090 ip6h = (ip6_t *)mp->b_rptr; 10091 } 10092 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10093 10094 /* Update output mib stats */ 10095 icmp_update_out_mib_v6(ill, icmp6); 10096 10097 /* Check variable for testing applications */ 10098 if (ipv6_drop_inbound_icmpv6) { 10099 freemsg(first_mp); 10100 return; 10101 } 10102 /* 10103 * Assume that there is always at least one conn for 10104 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10105 * where there is no conn. 10106 */ 10107 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10108 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10109 /* 10110 * In the multicast case, applications may have 10111 * joined the group from different zones, so we 10112 * need to deliver the packet to each of them. 10113 * Loop through the multicast memberships 10114 * structures (ilm) on the receive ill and send 10115 * a copy of the packet up each matching one. 10116 * However, we don't do this for multicasts sent 10117 * on the loopback interface (PHYI_LOOPBACK flag 10118 * set) as they must stay in the sender's zone. 10119 */ 10120 ILM_WALKER_HOLD(ill); 10121 for (ilm = ill->ill_ilm; ilm != NULL; 10122 ilm = ilm->ilm_next) { 10123 if (ilm->ilm_flags & ILM_DELETED) 10124 continue; 10125 if (!IN6_ARE_ADDR_EQUAL( 10126 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10127 continue; 10128 if ((fanout_flags & 10129 IP_FF_NO_MCAST_LOOP) && 10130 ilm->ilm_zoneid == ire->ire_zoneid) 10131 continue; 10132 if (!ipif_lookup_zoneid(ill, 10133 ilm->ilm_zoneid, IPIF_UP, NULL)) 10134 continue; 10135 10136 first_mp1 = ip_copymsg(first_mp); 10137 if (first_mp1 == NULL) 10138 continue; 10139 icmp_inbound_v6(q, first_mp1, ill, 10140 hdr_length, mctl_present, 10141 IP6_NO_IPPOLICY, ilm->ilm_zoneid); 10142 } 10143 ILM_WALKER_RELE(ill); 10144 } else { 10145 first_mp1 = ip_copymsg(first_mp); 10146 if (first_mp1 != NULL) 10147 icmp_inbound_v6(q, first_mp1, ill, 10148 hdr_length, mctl_present, 10149 IP6_NO_IPPOLICY, ire->ire_zoneid); 10150 } 10151 } 10152 /* FALLTHRU */ 10153 default: { 10154 /* 10155 * Handle protocols with which IPv6 is less intimate. 10156 */ 10157 uint8_t *nexthdrp; 10158 uint_t nexthdr_offset; 10159 10160 fanout_flags |= IP_FF_RAWIP|IP_FF_IP6INFO; 10161 10162 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10163 &hdr_length, &nexthdrp)) { 10164 /* Malformed packet */ 10165 BUMP_MIB(mibptr, ipv6OutDiscards); 10166 freemsg(first_mp); 10167 return; 10168 } 10169 nexthdr = *nexthdrp; 10170 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10171 10172 /* 10173 * Enable sending ICMP for "Unknown" nexthdr 10174 * case. i.e. where we did not FALLTHRU from 10175 * IPPROTO_ICMPV6 processing case above. 10176 */ 10177 if (nexthdr != IPPROTO_ICMPV6) 10178 fanout_flags |= IP_FF_SEND_ICMP; 10179 /* 10180 * Note: There can be more than one stream bound 10181 * to a particular protocol. When this is the case, 10182 * each one gets a copy of any incoming packets. 10183 */ 10184 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10185 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10186 mctl_present, ire->ire_zoneid); 10187 return; 10188 } 10189 } 10190 } 10191 10192 /* 10193 * Send packet using IRE. 10194 * Checksumming is controlled by cksum_request: 10195 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10196 * 1 => Skip TCP/UDP/SCTP checksum 10197 * Otherwise => checksum_request contains insert offset for checksum 10198 * 10199 * Assumes that the following set of headers appear in the first 10200 * mblk: 10201 * ip6_t 10202 * Any extension headers 10203 * TCP/UDP/SCTP header (if present) 10204 * The routine can handle an ICMPv6 header that is not in the first mblk. 10205 * 10206 * NOTE : This function does not ire_refrele the ire passed in as the 10207 * argument unlike ip_wput_ire where the REFRELE is done. 10208 * Refer to ip_wput_ire for more on this. 10209 */ 10210 static void 10211 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10212 int cksum_request, conn_t *connp, int caller, int attach_index, int flags) 10213 { 10214 ip6_t *ip6h; 10215 uint8_t nexthdr; 10216 uint16_t hdr_length; 10217 uint_t reachable = 0x0; 10218 ill_t *ill; 10219 mib2_ipv6IfStatsEntry_t *mibptr; 10220 mblk_t *first_mp; 10221 boolean_t mctl_present; 10222 ipsec_out_t *io; 10223 boolean_t conn_dontroute; /* conn value for multicast */ 10224 boolean_t conn_multicast_loop; /* conn value for multicast */ 10225 boolean_t multicast_forward; /* Should we forward ? */ 10226 int max_frag; 10227 zoneid_t zoneid; 10228 10229 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 10230 ill = ire_to_ill(ire); 10231 first_mp = mp; 10232 multicast_forward = B_FALSE; 10233 10234 if (mp->b_datap->db_type != M_CTL) { 10235 ip6h = (ip6_t *)first_mp->b_rptr; 10236 } else { 10237 io = (ipsec_out_t *)first_mp->b_rptr; 10238 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10239 /* 10240 * Grab the zone id now because the M_CTL can be discarded by 10241 * ip_wput_ire_parse_ipsec_out() below. 10242 */ 10243 zoneid = io->ipsec_out_zoneid; 10244 ASSERT(zoneid != ALL_ZONES); 10245 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10246 /* 10247 * For the multicast case, ipsec_out carries conn_dontroute and 10248 * conn_multicast_loop as conn may not be available here. We 10249 * need this for multicast loopback and forwarding which is done 10250 * later in the code. 10251 */ 10252 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10253 conn_dontroute = io->ipsec_out_dontroute; 10254 conn_multicast_loop = io->ipsec_out_multicast_loop; 10255 /* 10256 * If conn_dontroute is not set or conn_multicast_loop 10257 * is set, we need to do forwarding/loopback. For 10258 * datagrams from ip_wput_multicast, conn_dontroute is 10259 * set to B_TRUE and conn_multicast_loop is set to 10260 * B_FALSE so that we neither do forwarding nor 10261 * loopback. 10262 */ 10263 if (!conn_dontroute || conn_multicast_loop) 10264 multicast_forward = B_TRUE; 10265 } 10266 } 10267 10268 /* 10269 * If the sender didn't supply the hop limit and there is a default 10270 * hop limit associated with the output interface, we use that. 10271 * Interface specific hop limits as set via the SIOCSLIFLNKINFO 10272 * ioctl. 10273 */ 10274 if (!(flags & IP6I_HOPLIMIT) && ill->ill_max_hops != 0) 10275 ip6h->ip6_hops = ill->ill_max_hops; 10276 10277 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid) { 10278 /* 10279 * When a zone sends a packet to another zone, we try to deliver 10280 * the packet under the same conditions as if the destination 10281 * was a real node on the network. To do so, we look for a 10282 * matching route in the forwarding table. 10283 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10284 * ip_newroute_v6() does. 10285 */ 10286 ire_t *src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10287 NULL, NULL, zoneid, 0, (MATCH_IRE_RECURSIVE | 10288 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10289 if (src_ire != NULL && 10290 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 10291 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10292 !unspec_src) { 10293 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10294 } 10295 ire_refrele(src_ire); 10296 } else { 10297 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 10298 if (src_ire != NULL) { 10299 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10300 ire_refrele(src_ire); 10301 freemsg(first_mp); 10302 return; 10303 } 10304 ire_refrele(src_ire); 10305 } 10306 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10307 /* Failed */ 10308 freemsg(first_mp); 10309 return; 10310 } 10311 icmp_unreachable_v6(q, first_mp, 10312 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE); 10313 return; 10314 } 10315 } 10316 10317 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10318 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10319 connp, unspec_src); 10320 if (mp == NULL) { 10321 return; 10322 } 10323 } 10324 10325 first_mp = mp; 10326 if (mp->b_datap->db_type == M_CTL) { 10327 io = (ipsec_out_t *)mp->b_rptr; 10328 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10329 mp = mp->b_cont; 10330 mctl_present = B_TRUE; 10331 } else { 10332 mctl_present = B_FALSE; 10333 } 10334 10335 ip6h = (ip6_t *)mp->b_rptr; 10336 nexthdr = ip6h->ip6_nxt; 10337 mibptr = ill->ill_ip6_mib; 10338 10339 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10340 /* 10341 * The ire_src_addr_v6 always contains a useable source address 10342 * for the destination (based on source address selection rules 10343 * with respect to address scope as well as deprecated vs. 10344 * preferred addresses). 10345 */ 10346 ip6h->ip6_src = ire->ire_src_addr_v6; 10347 } 10348 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10349 if ((connp != NULL && connp->conn_multicast_loop) || 10350 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10351 ilm_t *ilm; 10352 10353 ILM_WALKER_HOLD(ill); 10354 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10355 ILM_WALKER_RELE(ill); 10356 if (ilm != NULL) { 10357 mblk_t *nmp; 10358 int fanout_flags = 0; 10359 10360 if (connp != NULL && 10361 !connp->conn_multicast_loop) { 10362 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10363 } 10364 ip1dbg(("ip_wput_ire_v6: " 10365 "Loopback multicast\n")); 10366 nmp = ip_copymsg(first_mp); 10367 if (nmp != NULL) { 10368 ip6_t *nip6h; 10369 10370 if (mctl_present) { 10371 nip6h = (ip6_t *) 10372 nmp->b_cont->b_rptr; 10373 } else { 10374 nip6h = (ip6_t *)nmp->b_rptr; 10375 } 10376 /* 10377 * Deliver locally and to every local 10378 * zone, except the sending zone when 10379 * IPV6_MULTICAST_LOOP is disabled. 10380 */ 10381 ip_wput_local_v6(RD(q), ill, nip6h, nmp, 10382 ire, fanout_flags); 10383 } else { 10384 BUMP_MIB(mibptr, ipv6OutDiscards); 10385 ip1dbg(("ip_wput_ire_v6: " 10386 "copymsg failed\n")); 10387 } 10388 } 10389 } 10390 if (ip6h->ip6_hops == 0 || 10391 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10392 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10393 /* 10394 * Local multicast or just loopback on loopback 10395 * interface. 10396 */ 10397 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10398 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10399 freemsg(first_mp); 10400 return; 10401 } 10402 } 10403 10404 /* Fastpath */ 10405 switch (nexthdr) { 10406 case IPPROTO_TCP: 10407 case IPPROTO_UDP: 10408 case IPPROTO_ICMPV6: 10409 case IPPROTO_SCTP: 10410 hdr_length = IPV6_HDR_LEN; 10411 break; 10412 default: { 10413 uint8_t *nexthdrp; 10414 10415 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10416 &hdr_length, &nexthdrp)) { 10417 /* Malformed packet */ 10418 BUMP_MIB(mibptr, ipv6OutDiscards); 10419 freemsg(first_mp); 10420 return; 10421 } 10422 nexthdr = *nexthdrp; 10423 break; 10424 } 10425 } 10426 10427 if (ire->ire_stq != NULL) { 10428 uint32_t sum; 10429 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10430 ill_phyint->phyint_ifindex; 10431 10432 /* 10433 * non-NULL send-to queue - packet is to be sent 10434 * out an interface. 10435 */ 10436 10437 /* 10438 * Look for reachability confirmations from the transport. 10439 */ 10440 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10441 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10442 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10443 if (mctl_present) 10444 io->ipsec_out_reachable = B_TRUE; 10445 } 10446 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10447 uint16_t *up; 10448 uint16_t *insp; 10449 10450 /* 10451 * The packet header is processed once for all, even 10452 * in the multirouting case. We disable hardware 10453 * checksum if the packet is multirouted, as it will be 10454 * replicated via several interfaces, and not all of 10455 * them may have this capability. 10456 */ 10457 if (cksum_request == 1 && 10458 !(ire->ire_flags & RTF_MULTIRT)) { 10459 /* Skip the transport checksum */ 10460 goto cksum_done; 10461 } 10462 /* 10463 * Do user-configured raw checksum. 10464 * Compute checksum and insert at offset "cksum_request" 10465 */ 10466 10467 /* check for enough headers for checksum */ 10468 cksum_request += hdr_length; /* offset from rptr */ 10469 if ((mp->b_wptr - mp->b_rptr) < 10470 (cksum_request + sizeof (int16_t))) { 10471 if (!pullupmsg(mp, 10472 cksum_request + sizeof (int16_t))) { 10473 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10474 " failed\n")); 10475 BUMP_MIB(mibptr, ipv6OutDiscards); 10476 freemsg(first_mp); 10477 return; 10478 } 10479 ip6h = (ip6_t *)mp->b_rptr; 10480 } 10481 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10482 ASSERT(((uintptr_t)insp & 0x1) == 0); 10483 up = (uint16_t *)&ip6h->ip6_src; 10484 /* 10485 * icmp has placed length and routing 10486 * header adjustment in *insp. 10487 */ 10488 sum = htons(nexthdr) + 10489 up[0] + up[1] + up[2] + up[3] + 10490 up[4] + up[5] + up[6] + up[7] + 10491 up[8] + up[9] + up[10] + up[11] + 10492 up[12] + up[13] + up[14] + up[15]; 10493 sum = (sum & 0xffff) + (sum >> 16); 10494 *insp = IP_CSUM(mp, hdr_length, sum); 10495 } else if (nexthdr == IPPROTO_TCP) { 10496 uint16_t *up; 10497 10498 /* 10499 * Check for full IPv6 header + enough TCP header 10500 * to get at the checksum field. 10501 * XXX need hardware checksum support. 10502 */ 10503 #define TCP_CSUM_OFFSET 16 10504 #define TCP_CSUM_SIZE 2 10505 if ((mp->b_wptr - mp->b_rptr) < 10506 (hdr_length + TCP_CSUM_OFFSET + TCP_CSUM_SIZE)) { 10507 if (!pullupmsg(mp, hdr_length + 10508 TCP_CSUM_OFFSET + TCP_CSUM_SIZE)) { 10509 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10510 " failed\n")); 10511 BUMP_MIB(mibptr, ipv6OutDiscards); 10512 freemsg(first_mp); 10513 return; 10514 } 10515 ip6h = (ip6_t *)mp->b_rptr; 10516 } 10517 10518 up = (uint16_t *)&ip6h->ip6_src; 10519 /* 10520 * Note: The TCP module has stored the length value 10521 * into the tcp checksum field, so we don't 10522 * need to explicitly sum it in here. 10523 */ 10524 if (hdr_length == IPV6_HDR_LEN) { 10525 /* src, dst, tcp consequtive */ 10526 up = (uint16_t *)(((uchar_t *)ip6h) + 10527 IPV6_HDR_LEN + TCP_CSUM_OFFSET); 10528 *up = IP_CSUM(mp, 10529 IPV6_HDR_LEN - 2 * sizeof (in6_addr_t), 10530 htons(IPPROTO_TCP)); 10531 } else { 10532 sum = htons(IPPROTO_TCP) + 10533 up[0] + up[1] + up[2] + up[3] + 10534 up[4] + up[5] + up[6] + up[7] + 10535 up[8] + up[9] + up[10] + up[11] + 10536 up[12] + up[13] + up[14] + up[15]; 10537 /* 10538 * Fold the initial sum. 10539 */ 10540 sum = (sum & 0xffff) + (sum >> 16); 10541 up = (uint16_t *)(((uchar_t *)ip6h) + 10542 hdr_length + TCP_CSUM_OFFSET); 10543 *up = IP_CSUM(mp, hdr_length, sum); 10544 } 10545 #undef TCP_CSUM_OFFSET 10546 #undef TCP_CSUM_SIZE 10547 10548 } else if (nexthdr == IPPROTO_UDP) { 10549 uint16_t *up; 10550 10551 /* 10552 * check for full IPv6 header + enough UDP header 10553 * to get at the UDP checksum field 10554 */ 10555 #define UDP_CSUM_OFFSET 6 10556 #define UDP_CSUM_SIZE 2 10557 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 10558 UDP_CSUM_OFFSET + UDP_CSUM_SIZE)) { 10559 if (!pullupmsg(mp, hdr_length + 10560 UDP_CSUM_OFFSET + UDP_CSUM_SIZE)) { 10561 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 10562 " failed\n")); 10563 BUMP_MIB(mibptr, ipv6OutDiscards); 10564 freemsg(first_mp); 10565 return; 10566 } 10567 ip6h = (ip6_t *)mp->b_rptr; 10568 } 10569 up = (uint16_t *)&ip6h->ip6_src; 10570 /* 10571 * Note: The UDP module has stored the length value 10572 * into the udp checksum field, so we don't 10573 * need to explicitly sum it in here. 10574 */ 10575 if (hdr_length == IPV6_HDR_LEN) { 10576 /* src, dst, udp consequtive */ 10577 up = (uint16_t *)(((uchar_t *)ip6h) + 10578 IPV6_HDR_LEN + UDP_CSUM_OFFSET); 10579 *up = IP_CSUM(mp, 10580 IPV6_HDR_LEN - 2 * sizeof (in6_addr_t), 10581 htons(IPPROTO_UDP)); 10582 } else { 10583 sum = htons(IPPROTO_UDP) + 10584 up[0] + up[1] + up[2] + up[3] + 10585 up[4] + up[5] + up[6] + up[7] + 10586 up[8] + up[9] + up[10] + up[11] + 10587 up[12] + up[13] + up[14] + up[15]; 10588 sum = (sum & 0xffff) + (sum >> 16); 10589 up = (uint16_t *)(((uchar_t *)ip6h) + 10590 hdr_length + UDP_CSUM_OFFSET); 10591 *up = IP_CSUM(mp, hdr_length, sum); 10592 } 10593 10594 /* 10595 * According to RFC 2460, UDP in IPv6 shouldn't 10596 * appear with all zero checksum on the wire and 10597 * should be changed to 0xffff. 10598 */ 10599 if (*up == 0) 10600 *up = 0xffff; 10601 #undef UDP_CSUM_OFFSET 10602 #undef UDP_CSUM_SIZE 10603 } else if (nexthdr == IPPROTO_ICMPV6) { 10604 uint16_t *up; 10605 icmp6_t *icmp6; 10606 10607 /* check for full IPv6+ICMPv6 header */ 10608 if ((mp->b_wptr - mp->b_rptr) < 10609 (hdr_length + ICMP6_MINLEN)) { 10610 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10611 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10612 " failed\n")); 10613 BUMP_MIB(mibptr, ipv6OutDiscards); 10614 freemsg(first_mp); 10615 return; 10616 } 10617 ip6h = (ip6_t *)mp->b_rptr; 10618 } 10619 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10620 up = (uint16_t *)&ip6h->ip6_src; 10621 /* 10622 * icmp has placed length and routing 10623 * header adjustment in icmp6_cksum. 10624 */ 10625 sum = htons(IPPROTO_ICMPV6) + 10626 up[0] + up[1] + up[2] + up[3] + 10627 up[4] + up[5] + up[6] + up[7] + 10628 up[8] + up[9] + up[10] + up[11] + 10629 up[12] + up[13] + up[14] + up[15]; 10630 sum = (sum & 0xffff) + (sum >> 16); 10631 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 10632 /* Update output mib stats */ 10633 icmp_update_out_mib_v6(ill, icmp6); 10634 } else if (nexthdr == IPPROTO_SCTP) { 10635 sctp_hdr_t *sctph; 10636 10637 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 10638 if (!pullupmsg(mp, hdr_length + 10639 sizeof (*sctph))) { 10640 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 10641 " failed\n")); 10642 BUMP_MIB(ill->ill_ip6_mib, 10643 ipv6OutDiscards); 10644 freemsg(mp); 10645 return; 10646 } 10647 ip6h = (ip6_t *)mp->b_rptr; 10648 } 10649 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 10650 sctph->sh_chksum = 0; 10651 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 10652 } 10653 10654 cksum_done: 10655 /* 10656 * We force the insertion of a fragment header using the 10657 * IPH_FRAG_HDR flag in two cases: 10658 * - after reception of an ICMPv6 "packet too big" message 10659 * with a MTU < 1280 (cf. RFC 2460 section 5) 10660 * - for multirouted IPv6 packets, so that the receiver can 10661 * discard duplicates according to their fragment identifier 10662 * 10663 * Two flags modifed from the API can modify this behavior. 10664 * The first is IPV6_USE_MIN_MTU. With this API the user 10665 * can specify how to manage PMTUD for unicast and multicast. 10666 * 10667 * IPV6_DONTFRAG disallows fragmentation. 10668 */ 10669 max_frag = ire->ire_max_frag; 10670 switch (IP6I_USE_MIN_MTU_API(flags)) { 10671 case IPV6_USE_MIN_MTU_DEFAULT: 10672 case IPV6_USE_MIN_MTU_UNICAST: 10673 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10674 max_frag = IPV6_MIN_MTU; 10675 } 10676 break; 10677 10678 case IPV6_USE_MIN_MTU_NEVER: 10679 max_frag = IPV6_MIN_MTU; 10680 break; 10681 } 10682 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 10683 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 10684 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 10685 icmp_pkt2big_v6(ire->ire_stq, first_mp, 10686 max_frag, B_FALSE, B_TRUE); 10687 return; 10688 } 10689 10690 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 10691 (mp->b_cont ? msgdsize(mp) : 10692 mp->b_wptr - (uchar_t *)ip6h)) { 10693 ip0dbg(("Packet length mismatch: %d, %ld\n", 10694 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10695 msgdsize(mp))); 10696 freemsg(first_mp); 10697 return; 10698 } 10699 /* Do IPSEC processing first */ 10700 if (mctl_present) { 10701 if (attach_index != 0) 10702 ipsec_out_attach_if(io, attach_index); 10703 ipsec_out_process(q, first_mp, ire, ill_index); 10704 return; 10705 } 10706 ASSERT(mp->b_prev == NULL); 10707 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 10708 ntohs(ip6h->ip6_plen) + 10709 IPV6_HDR_LEN, max_frag)); 10710 ASSERT(mp == first_mp); 10711 /* Initiate IPPF processing */ 10712 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 10713 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 10714 if (mp == NULL) { 10715 return; 10716 } 10717 } 10718 ip_wput_frag_v6(mp, ire, reachable, connp, 10719 caller, max_frag); 10720 return; 10721 } 10722 /* Do IPSEC processing first */ 10723 if (mctl_present) { 10724 int extra_len = ipsec_out_extra_length(first_mp); 10725 10726 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 10727 max_frag && ip_ulp_cando_pkt2big(nexthdr)) { 10728 /* 10729 * IPsec headers will push the packet over the 10730 * MTU limit. Issue an ICMPv6 Packet Too Big 10731 * message for this packet if the upper-layer 10732 * that issued this packet will be able to 10733 * react to the icmp_pkt2big_v6() that we'll 10734 * generate. 10735 */ 10736 icmp_pkt2big_v6(ire->ire_stq, first_mp, 10737 max_frag, B_FALSE, B_TRUE); 10738 return; 10739 } 10740 if (attach_index != 0) 10741 ipsec_out_attach_if(io, attach_index); 10742 ipsec_out_process(q, first_mp, ire, ill_index); 10743 return; 10744 } 10745 /* 10746 * XXX multicast: add ip_mforward_v6() here. 10747 * Check conn_dontroute 10748 */ 10749 #ifdef lint 10750 /* 10751 * XXX The only purpose of this statement is to avoid lint 10752 * errors. See the above "XXX multicast". When that gets 10753 * fixed, remove this whole #ifdef lint section. 10754 */ 10755 ip3dbg(("multicast forward is %s.\n", 10756 (multicast_forward ? "TRUE" : "FALSE"))); 10757 #endif 10758 10759 UPDATE_OB_PKT_COUNT(ire); 10760 ire->ire_last_used_time = lbolt; 10761 ASSERT(mp == first_mp); 10762 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 10763 } else { 10764 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 10765 } 10766 } 10767 10768 /* 10769 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 10770 * We have not optimized this in terms of number of mblks 10771 * allocated. For instance, for each fragment sent we always allocate a 10772 * mblk to hold the IPv6 header and fragment header. 10773 * 10774 * Assumes that all the extension headers are contained in the first mblk. 10775 * 10776 * The fragment header is inserted after an hop-by-hop options header 10777 * and after [an optional destinations header followed by] a routing header. 10778 * 10779 * NOTE : This function does not ire_refrele the ire passed in as 10780 * the argument. 10781 */ 10782 void 10783 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 10784 boolean_t caller, int max_frag) 10785 { 10786 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 10787 ip6_t *fip6h; 10788 mblk_t *hmp; 10789 mblk_t *hmp0; 10790 mblk_t *dmp; 10791 ip6_frag_t *fraghdr; 10792 size_t unfragmentable_len; 10793 size_t len; 10794 size_t mlen; 10795 size_t max_chunk; 10796 uint32_t ident; 10797 uint16_t off_flags; 10798 uint16_t offset = 0; 10799 ill_t *ill; 10800 uint8_t nexthdr; 10801 uint_t prev_nexthdr_offset; 10802 uint8_t *ptr; 10803 10804 ASSERT(ire->ire_type == IRE_CACHE); 10805 ill = (ill_t *)ire->ire_stq->q_ptr; 10806 10807 /* 10808 * Determine the length of the unfragmentable portion of this 10809 * datagram. This consists of the IPv6 header, a potential 10810 * hop-by-hop options header, a potential pre-routing-header 10811 * destination options header, and a potential routing header. 10812 */ 10813 nexthdr = ip6h->ip6_nxt; 10814 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 10815 ptr = (uint8_t *)&ip6h[1]; 10816 10817 if (nexthdr == IPPROTO_HOPOPTS) { 10818 ip6_hbh_t *hbh_hdr; 10819 uint_t hdr_len; 10820 10821 hbh_hdr = (ip6_hbh_t *)ptr; 10822 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 10823 nexthdr = hbh_hdr->ip6h_nxt; 10824 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 10825 - (uint8_t *)ip6h; 10826 ptr += hdr_len; 10827 } 10828 if (nexthdr == IPPROTO_DSTOPTS) { 10829 ip6_dest_t *dest_hdr; 10830 uint_t hdr_len; 10831 10832 dest_hdr = (ip6_dest_t *)ptr; 10833 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 10834 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 10835 nexthdr = dest_hdr->ip6d_nxt; 10836 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 10837 - (uint8_t *)ip6h; 10838 ptr += hdr_len; 10839 } 10840 } 10841 if (nexthdr == IPPROTO_ROUTING) { 10842 ip6_rthdr_t *rthdr; 10843 uint_t hdr_len; 10844 10845 rthdr = (ip6_rthdr_t *)ptr; 10846 nexthdr = rthdr->ip6r_nxt; 10847 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 10848 - (uint8_t *)ip6h; 10849 hdr_len = 8 * (rthdr->ip6r_len + 1); 10850 ptr += hdr_len; 10851 } 10852 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 10853 10854 /* 10855 * Allocate an mblk with enough room for the link-layer 10856 * header, the unfragmentable part of the datagram, and the 10857 * fragment header. This (or a copy) will be used as the 10858 * first mblk for each fragment we send. 10859 */ 10860 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 10861 BPRI_HI); 10862 if (hmp == NULL) { 10863 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 10864 freemsg(mp); 10865 return; 10866 } 10867 hmp->b_rptr += ip_wroff_extra; 10868 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 10869 10870 fip6h = (ip6_t *)hmp->b_rptr; 10871 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 10872 10873 bcopy(ip6h, fip6h, unfragmentable_len); 10874 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 10875 10876 ident = atomic_add_32_nv(&ire->ire_ident, 1); 10877 10878 fraghdr->ip6f_nxt = nexthdr; 10879 fraghdr->ip6f_reserved = 0; 10880 fraghdr->ip6f_offlg = htons(0); 10881 fraghdr->ip6f_ident = htonl(ident); 10882 10883 /* 10884 * len is the total length of the fragmentable data in this 10885 * datagram. For each fragment sent, we will decrement len 10886 * by the amount of fragmentable data sent in that fragment 10887 * until len reaches zero. 10888 */ 10889 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 10890 10891 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 10892 sizeof (ip6_frag_t)) & ~7; 10893 10894 /* 10895 * Move read ptr past unfragmentable portion, we don't want this part 10896 * of the data in our fragments. 10897 */ 10898 mp->b_rptr += unfragmentable_len; 10899 10900 while (len != 0) { 10901 mlen = MIN(len, max_chunk); 10902 len -= mlen; 10903 if (len != 0) { 10904 /* Not last */ 10905 hmp0 = copyb(hmp); 10906 if (hmp0 == NULL) { 10907 freeb(hmp); 10908 freemsg(mp); 10909 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 10910 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 10911 return; 10912 } 10913 off_flags = IP6F_MORE_FRAG; 10914 } else { 10915 /* Last fragment */ 10916 hmp0 = hmp; 10917 hmp = NULL; 10918 off_flags = 0; 10919 } 10920 fip6h = (ip6_t *)(hmp0->b_rptr); 10921 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 10922 10923 fip6h->ip6_plen = htons((uint16_t)(mlen + 10924 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 10925 /* 10926 * Note: Optimization alert. 10927 * In IPv6 (and IPv4) protocol header, Fragment Offset 10928 * ("offset") is 13 bits wide and in 8-octet units. 10929 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 10930 * it occupies the most significant 13 bits. 10931 * (least significant 13 bits in IPv4). 10932 * We do not do any shifts here. Not shifting is same effect 10933 * as taking offset value in octet units, dividing by 8 and 10934 * then shifting 3 bits left to line it up in place in proper 10935 * place protocol header. 10936 */ 10937 fraghdr->ip6f_offlg = htons(offset) | off_flags; 10938 10939 if (!(dmp = ip_carve_mp(&mp, mlen))) { 10940 /* mp has already been freed by ip_carve_mp() */ 10941 if (hmp != NULL) 10942 freeb(hmp); 10943 freeb(hmp0); 10944 ip1dbg(("ip_carve_mp: failed\n")); 10945 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 10946 return; 10947 } 10948 hmp0->b_cont = dmp; 10949 /* Get the priority marking, if any */ 10950 hmp0->b_band = dmp->b_band; 10951 UPDATE_OB_PKT_COUNT(ire); 10952 ire->ire_last_used_time = lbolt; 10953 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 10954 caller, NULL); 10955 reachable = 0; /* No need to redo state machine in loop */ 10956 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragCreates); 10957 offset += mlen; 10958 } 10959 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 10960 } 10961 10962 /* 10963 * Determine if the ill and multicast aspects of that packets 10964 * "matches" the conn. 10965 */ 10966 boolean_t 10967 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 10968 zoneid_t zoneid) 10969 { 10970 ill_t *in_ill; 10971 boolean_t wantpacket = B_TRUE; 10972 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 10973 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 10974 10975 /* 10976 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 10977 * unicast and multicast reception to conn_incoming_ill. 10978 * conn_wantpacket_v6 is called both for unicast and 10979 * multicast. 10980 * 10981 * 1) The unicast copy of the packet can come anywhere in 10982 * the ill group if it is part of the group. Thus, we 10983 * need to check to see whether the ill group matches 10984 * if in_ill is part of a group. 10985 * 10986 * 2) ip_rput does not suppress duplicate multicast packets. 10987 * If there are two interfaces in a ill group and we have 10988 * 2 applications (conns) joined a multicast group G on 10989 * both the interfaces, ilm_lookup_ill filter in ip_rput 10990 * will give us two packets because we join G on both the 10991 * interfaces rather than nominating just one interface 10992 * for receiving multicast like broadcast above. So, 10993 * we have to call ilg_lookup_ill to filter out duplicate 10994 * copies, if ill is part of a group, to supress duplicates. 10995 */ 10996 in_ill = connp->conn_incoming_ill; 10997 if (in_ill != NULL) { 10998 mutex_enter(&connp->conn_lock); 10999 in_ill = connp->conn_incoming_ill; 11000 mutex_enter(&ill->ill_lock); 11001 /* 11002 * No IPMP, and the packet did not arrive on conn_incoming_ill 11003 * OR, IPMP in use and the packet arrived on an IPMP group 11004 * different from the conn_incoming_ill's IPMP group. 11005 * Reject the packet. 11006 */ 11007 if ((in_ill->ill_group == NULL && in_ill != ill) || 11008 (in_ill->ill_group != NULL && 11009 in_ill->ill_group != ill->ill_group)) { 11010 wantpacket = B_FALSE; 11011 } 11012 mutex_exit(&ill->ill_lock); 11013 mutex_exit(&connp->conn_lock); 11014 if (!wantpacket) 11015 return (B_FALSE); 11016 } 11017 11018 if (connp->conn_multi_router) 11019 return (B_TRUE); 11020 11021 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11022 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11023 /* 11024 * Unicast case: we match the conn only if it's in the specified 11025 * zone. 11026 */ 11027 return (connp->conn_zoneid == zoneid); 11028 } 11029 11030 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11031 connp->conn_zoneid == zoneid) { 11032 /* 11033 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11034 * disabled, therefore we don't dispatch the multicast packet to 11035 * the sending zone. 11036 */ 11037 return (B_FALSE); 11038 } 11039 11040 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 11041 connp->conn_zoneid != zoneid) { 11042 /* 11043 * Multicast packet on the loopback interface: we only match 11044 * conns who joined the group in the specified zone. 11045 */ 11046 return (B_FALSE); 11047 } 11048 11049 mutex_enter(&connp->conn_lock); 11050 wantpacket = 11051 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11052 mutex_exit(&connp->conn_lock); 11053 11054 return (wantpacket); 11055 } 11056 11057 11058 /* 11059 * Transmit a packet and update any NUD state based on the flags 11060 * XXX need to "recover" any ip6i_t when doing putq! 11061 * 11062 * NOTE : This function does not ire_refrele the ire passed in as the 11063 * argument. 11064 */ 11065 void 11066 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11067 int caller, ipsec_out_t *io) 11068 { 11069 mblk_t *mp1; 11070 nce_t *nce = ire->ire_nce; 11071 ill_t *ill; 11072 uint64_t delta; 11073 ip6_t *ip6h; 11074 queue_t *stq = ire->ire_stq; 11075 ire_t *ire1 = NULL; 11076 ire_t *save_ire = ire; 11077 boolean_t multirt_send = B_FALSE; 11078 mblk_t *next_mp = NULL; 11079 11080 ip6h = (ip6_t *)mp->b_rptr; 11081 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11082 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11083 ASSERT(nce != NULL); 11084 ASSERT(mp->b_datap->db_type == M_DATA); 11085 ASSERT(stq != NULL); 11086 11087 ill = ire_to_ill(ire); 11088 if (!ill) { 11089 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11090 freemsg(mp); 11091 return; 11092 } 11093 11094 /* 11095 * If a packet is to be sent out an interface that is a 6to4 11096 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11097 * destination, must be checked to have a 6to4 prefix 11098 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11099 * address configured on the sending interface. Otherwise, 11100 * the packet was delivered to this interface in error and the 11101 * packet must be dropped. 11102 */ 11103 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11104 ipif_t *ipif = ill->ill_ipif; 11105 11106 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11107 &ip6h->ip6_dst)) { 11108 if (ip_debug > 2) { 11109 /* ip1dbg */ 11110 pr_addr_dbg("ip_xmit_v6: attempting to " 11111 "send 6to4 addressed IPv6 " 11112 "destination (%s) out the wrong " 11113 "interface.\n", AF_INET6, 11114 &ip6h->ip6_dst); 11115 } 11116 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 11117 freemsg(mp); 11118 return; 11119 } 11120 } 11121 11122 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || canput(stq->q_next)) { 11123 uint32_t ill_index; 11124 11125 /* 11126 * In most cases, the emission loop below is entered only 11127 * once. Only in the case where the ire holds the 11128 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11129 * flagged ires in the bucket, and send the packet 11130 * through all crossed RTF_MULTIRT routes. 11131 */ 11132 if (ire->ire_flags & RTF_MULTIRT) { 11133 /* 11134 * Multirouting case. The bucket where ire is stored 11135 * probably holds other RTF_MULTIRT flagged ires 11136 * to the destination. In this call to ip_xmit_v6, 11137 * we attempt to send the packet through all 11138 * those ires. Thus, we first ensure that ire is the 11139 * first RTF_MULTIRT ire in the bucket, 11140 * before walking the ire list. 11141 */ 11142 ire_t *first_ire; 11143 irb_t *irb = ire->ire_bucket; 11144 ASSERT(irb != NULL); 11145 multirt_send = B_TRUE; 11146 11147 /* Make sure we do not omit any multiroute ire. */ 11148 IRB_REFHOLD(irb); 11149 for (first_ire = irb->irb_ire; 11150 first_ire != NULL; 11151 first_ire = first_ire->ire_next) { 11152 if ((first_ire->ire_flags & RTF_MULTIRT) && 11153 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11154 &ire->ire_addr_v6)) && 11155 !(first_ire->ire_marks & 11156 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 11157 break; 11158 } 11159 11160 if ((first_ire != NULL) && (first_ire != ire)) { 11161 IRE_REFHOLD(first_ire); 11162 /* ire will be released by the caller */ 11163 ire = first_ire; 11164 nce = ire->ire_nce; 11165 stq = ire->ire_stq; 11166 ill = ire_to_ill(ire); 11167 } 11168 IRB_REFRELE(irb); 11169 } else if (connp != NULL && IS_TCP_CONN(connp) && 11170 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11171 ILL_MDT_USABLE(ill)) { 11172 /* 11173 * This tcp connection was marked as MDT-capable, but 11174 * it has been turned off due changes in the interface. 11175 * Now that the interface support is back, turn it on 11176 * by notifying tcp. We don't directly modify tcp_mdt, 11177 * since we leave all the details to the tcp code that 11178 * knows better. 11179 */ 11180 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11181 11182 if (mdimp == NULL) { 11183 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11184 "connp %p (ENOMEM)\n", (void *)connp)); 11185 } else { 11186 CONN_INC_REF(connp); 11187 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 11188 connp, SQTAG_TCP_INPUT_MCTL); 11189 } 11190 } 11191 11192 do { 11193 boolean_t qos_done = B_FALSE; 11194 11195 if (multirt_send) { 11196 irb_t *irb; 11197 /* 11198 * We are in a multiple send case, need to get 11199 * the next ire and make a duplicate of the 11200 * packet. ire1 holds here the next ire to 11201 * process in the bucket. If multirouting is 11202 * expected, any non-RTF_MULTIRT ire that has 11203 * the right destination address is ignored. 11204 */ 11205 irb = ire->ire_bucket; 11206 ASSERT(irb != NULL); 11207 11208 IRB_REFHOLD(irb); 11209 for (ire1 = ire->ire_next; 11210 ire1 != NULL; 11211 ire1 = ire1->ire_next) { 11212 if (!(ire1->ire_flags & RTF_MULTIRT)) 11213 continue; 11214 if (!IN6_ARE_ADDR_EQUAL( 11215 &ire1->ire_addr_v6, 11216 &ire->ire_addr_v6)) 11217 continue; 11218 if (ire1->ire_marks & 11219 (IRE_MARK_CONDEMNED| 11220 IRE_MARK_HIDDEN)) 11221 continue; 11222 11223 /* Got one */ 11224 if (ire1 != save_ire) { 11225 IRE_REFHOLD(ire1); 11226 } 11227 break; 11228 } 11229 IRB_REFRELE(irb); 11230 11231 if (ire1 != NULL) { 11232 next_mp = copyb(mp); 11233 if ((next_mp == NULL) || 11234 ((mp->b_cont != NULL) && 11235 ((next_mp->b_cont = 11236 dupmsg(mp->b_cont)) == 11237 NULL))) { 11238 freemsg(next_mp); 11239 next_mp = NULL; 11240 ire_refrele(ire1); 11241 ire1 = NULL; 11242 } 11243 } 11244 11245 /* Last multiroute ire; don't loop anymore. */ 11246 if (ire1 == NULL) { 11247 multirt_send = B_FALSE; 11248 } 11249 } 11250 11251 ill_index = 11252 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11253 11254 /* 11255 * Check for fastpath, we need to hold nce_lock to 11256 * prevent fastpath update from chaining nce_fp_mp. 11257 */ 11258 mutex_enter(&nce->nce_lock); 11259 if ((mp1 = nce->nce_fp_mp) != NULL) { 11260 uint32_t hlen; 11261 uchar_t *rptr; 11262 11263 /* Initiate IPPF processing */ 11264 if (IP6_OUT_IPP(flags)) { 11265 /* 11266 * We have to release the nce lock since 11267 * IPPF components use 11268 * ill_lookup_on_ifindex(), 11269 * which takes the ill_g_lock and the 11270 * ill_lock locks. 11271 */ 11272 mutex_exit(&nce->nce_lock); 11273 ip_process(IPP_LOCAL_OUT, &mp, 11274 ill_index); 11275 if (mp == NULL) { 11276 BUMP_MIB( 11277 ill->ill_ip6_mib, 11278 ipv6OutDiscards); 11279 if (next_mp != NULL) 11280 freemsg(next_mp); 11281 if (ire != save_ire) { 11282 ire_refrele(ire); 11283 } 11284 return; 11285 } 11286 mutex_enter(&nce->nce_lock); 11287 if ((mp1 = nce->nce_fp_mp) == NULL) { 11288 /* 11289 * Probably disappeared during 11290 * IPQoS processing. 11291 */ 11292 qos_done = B_TRUE; 11293 goto prepend_unitdata; 11294 } 11295 } 11296 hlen = MBLKL(mp1); 11297 rptr = mp->b_rptr - hlen; 11298 /* 11299 * make sure there is room for the fastpath 11300 * datalink header 11301 */ 11302 if (rptr < mp->b_datap->db_base) { 11303 mp1 = copyb(mp1); 11304 if (mp1 == NULL) { 11305 mutex_exit(&nce->nce_lock); 11306 BUMP_MIB(ill->ill_ip6_mib, 11307 ipv6OutDiscards); 11308 freemsg(mp); 11309 if (next_mp != NULL) 11310 freemsg(next_mp); 11311 if (ire != save_ire) { 11312 ire_refrele(ire); 11313 } 11314 return; 11315 } 11316 mp1->b_cont = mp; 11317 11318 /* Get the priority marking, if any */ 11319 mp1->b_band = mp->b_band; 11320 mp = mp1; 11321 } else { 11322 mp->b_rptr = rptr; 11323 /* 11324 * fastpath - pre-pend datalink 11325 * header 11326 */ 11327 bcopy(mp1->b_rptr, rptr, hlen); 11328 } 11329 11330 mutex_exit(&nce->nce_lock); 11331 11332 } else { 11333 prepend_unitdata: 11334 mutex_exit(&nce->nce_lock); 11335 mp1 = nce->nce_res_mp; 11336 if (mp1 == NULL) { 11337 ip1dbg(("ip_xmit_v6: No resolution " 11338 "block ire = %p\n", (void *)ire)); 11339 freemsg(mp); 11340 if (next_mp != NULL) 11341 freemsg(next_mp); 11342 if (ire != save_ire) { 11343 ire_refrele(ire); 11344 } 11345 return; 11346 } 11347 /* 11348 * Prepend the DL_UNITDATA_REQ. 11349 */ 11350 mp1 = copyb(mp1); 11351 if (mp1 == NULL) { 11352 BUMP_MIB(ill->ill_ip6_mib, 11353 ipv6OutDiscards); 11354 freemsg(mp); 11355 if (next_mp != NULL) 11356 freemsg(next_mp); 11357 if (ire != save_ire) { 11358 ire_refrele(ire); 11359 } 11360 return; 11361 } 11362 mp1->b_cont = mp; 11363 mp = mp1; 11364 /* 11365 * Initiate IPPF processing, if it is 11366 * already done, bypass. 11367 */ 11368 if (!qos_done && IP6_OUT_IPP(flags)) { 11369 ip_process(IPP_LOCAL_OUT, &mp, 11370 ill_index); 11371 if (mp == NULL) { 11372 BUMP_MIB(ill->ill_ip6_mib, 11373 ipv6OutDiscards); 11374 if (next_mp != NULL) 11375 freemsg(next_mp); 11376 if (ire != save_ire) { 11377 ire_refrele(ire); 11378 } 11379 return; 11380 } 11381 } 11382 } 11383 11384 /* 11385 * Update ire counters; for save_ire, this has been 11386 * done by the caller. 11387 */ 11388 if (ire != save_ire) { 11389 UPDATE_OB_PKT_COUNT(ire); 11390 ire->ire_last_used_time = lbolt; 11391 } 11392 11393 /* 11394 * Send it down. XXX Do we want to flow control AH/ESP 11395 * packets that carry TCP payloads? We don't flow 11396 * control TCP packets, but we should also not 11397 * flow-control TCP packets that have been protected. 11398 * We don't have an easy way to find out if an AH/ESP 11399 * packet was originally TCP or not currently. 11400 */ 11401 if (io == NULL) { 11402 putnext(stq, mp); 11403 } else { 11404 /* 11405 * Safety Pup says: make sure this is 11406 * going to the right interface! 11407 */ 11408 if (io->ipsec_out_capab_ill_index != 11409 ill_index) { 11410 /* IPsec kstats: bump lose counter */ 11411 freemsg(mp1); 11412 } else { 11413 ipsec_hw_putnext(stq, mp); 11414 } 11415 } 11416 11417 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 11418 if (ire != save_ire) { 11419 ire_refrele(ire); 11420 } 11421 if (multirt_send) { 11422 ASSERT(ire1 != NULL); 11423 /* 11424 * Proceed with the next RTF_MULTIRT 11425 * ire, also set up the send-to queue 11426 * accordingly. 11427 */ 11428 ire = ire1; 11429 ire1 = NULL; 11430 stq = ire->ire_stq; 11431 nce = ire->ire_nce; 11432 ill = ire_to_ill(ire); 11433 mp = next_mp; 11434 next_mp = NULL; 11435 continue; 11436 } 11437 ASSERT(next_mp == NULL); 11438 ASSERT(ire1 == NULL); 11439 return; 11440 } 11441 11442 ASSERT(nce->nce_state != ND_INCOMPLETE); 11443 11444 /* 11445 * Check for upper layer advice 11446 */ 11447 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 11448 /* 11449 * It should be o.k. to check the state without 11450 * a lock here, at most we lose an advice. 11451 */ 11452 nce->nce_last = TICK_TO_MSEC(lbolt64); 11453 if (nce->nce_state != ND_REACHABLE) { 11454 11455 mutex_enter(&nce->nce_lock); 11456 nce->nce_state = ND_REACHABLE; 11457 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 11458 mutex_exit(&nce->nce_lock); 11459 (void) untimeout(nce->nce_timeout_id); 11460 if (ip_debug > 2) { 11461 /* ip1dbg */ 11462 pr_addr_dbg("ip_xmit_v6: state" 11463 " for %s changed to" 11464 " REACHABLE\n", AF_INET6, 11465 &ire->ire_addr_v6); 11466 } 11467 } 11468 if (ire != save_ire) { 11469 ire_refrele(ire); 11470 } 11471 if (multirt_send) { 11472 ASSERT(ire1 != NULL); 11473 /* 11474 * Proceed with the next RTF_MULTIRT 11475 * ire, also set up the send-to queue 11476 * accordingly. 11477 */ 11478 ire = ire1; 11479 ire1 = NULL; 11480 stq = ire->ire_stq; 11481 nce = ire->ire_nce; 11482 ill = ire_to_ill(ire); 11483 mp = next_mp; 11484 next_mp = NULL; 11485 continue; 11486 } 11487 ASSERT(next_mp == NULL); 11488 ASSERT(ire1 == NULL); 11489 return; 11490 } 11491 11492 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 11493 ip1dbg(("ip_xmit_v6: delta = %" PRId64 11494 " ill_reachable_time = %d \n", delta, 11495 ill->ill_reachable_time)); 11496 if (delta > (uint64_t)ill->ill_reachable_time) { 11497 nce = ire->ire_nce; 11498 mutex_enter(&nce->nce_lock); 11499 switch (nce->nce_state) { 11500 case ND_REACHABLE: 11501 case ND_STALE: 11502 /* 11503 * ND_REACHABLE is identical to 11504 * ND_STALE in this specific case. If 11505 * reachable time has expired for this 11506 * neighbor (delta is greater than 11507 * reachable time), conceptually, the 11508 * neighbor cache is no longer in 11509 * REACHABLE state, but already in 11510 * STALE state. So the correct 11511 * transition here is to ND_DELAY. 11512 */ 11513 nce->nce_state = ND_DELAY; 11514 mutex_exit(&nce->nce_lock); 11515 NDP_RESTART_TIMER(nce, 11516 delay_first_probe_time); 11517 if (ip_debug > 3) { 11518 /* ip2dbg */ 11519 pr_addr_dbg("ip_xmit_v6: state" 11520 " for %s changed to" 11521 " DELAY\n", AF_INET6, 11522 &ire->ire_addr_v6); 11523 } 11524 break; 11525 case ND_DELAY: 11526 case ND_PROBE: 11527 mutex_exit(&nce->nce_lock); 11528 /* Timers have already started */ 11529 break; 11530 case ND_UNREACHABLE: 11531 /* 11532 * ndp timer has detected that this nce 11533 * is unreachable and initiated deleting 11534 * this nce and all its associated IREs. 11535 * This is a race where we found the 11536 * ire before it was deleted and have 11537 * just sent out a packet using this 11538 * unreachable nce. 11539 */ 11540 mutex_exit(&nce->nce_lock); 11541 break; 11542 default: 11543 ASSERT(0); 11544 } 11545 } 11546 11547 if (multirt_send) { 11548 ASSERT(ire1 != NULL); 11549 /* 11550 * Proceed with the next RTF_MULTIRT ire, 11551 * Also set up the send-to queue accordingly. 11552 */ 11553 if (ire != save_ire) { 11554 ire_refrele(ire); 11555 } 11556 ire = ire1; 11557 ire1 = NULL; 11558 stq = ire->ire_stq; 11559 nce = ire->ire_nce; 11560 ill = ire_to_ill(ire); 11561 mp = next_mp; 11562 next_mp = NULL; 11563 } 11564 } while (multirt_send); 11565 /* 11566 * In the multirouting case, release the last ire used for 11567 * emission. save_ire will be released by the caller. 11568 */ 11569 if (ire != save_ire) { 11570 ire_refrele(ire); 11571 } 11572 } else { 11573 /* 11574 * Queue packet if we have an conn to give back pressure. 11575 * We can't queue packets intended for hardware acceleration 11576 * since we've tossed that state already. If the packet is 11577 * being fed back from ire_send_v6, we don't know the 11578 * position in the queue to enqueue the packet and we discard 11579 * the packet. 11580 */ 11581 if (ip_output_queue && (connp != NULL) && (io == NULL) && 11582 (caller != IRE_SEND)) { 11583 if (caller == IP_WSRV) { 11584 connp->conn_did_putbq = 1; 11585 (void) putbq(connp->conn_wq, mp); 11586 conn_drain_insert(connp); 11587 /* 11588 * called_from_wsrv implies we are 11589 * the service thread, and the 11590 * queue is already noenabled. 11591 * The check for canput and 11592 * the putbq is not atomic. 11593 * So we need to check again. 11594 */ 11595 if (canput(stq->q_next)) 11596 connp->conn_did_putbq = 0; 11597 } else { 11598 (void) putq(connp->conn_wq, mp); 11599 } 11600 return; 11601 } 11602 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 11603 freemsg(mp); 11604 return; 11605 } 11606 } 11607 11608 /* 11609 * pr_addr_dbg function provides the needed buffer space to call 11610 * inet_ntop() function's 3rd argument. This function should be 11611 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 11612 * stack buffer space in it's own stack frame. This function uses 11613 * a buffer from it's own stack and prints the information. 11614 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 11615 * 11616 * Note: This function can call inet_ntop() once. 11617 */ 11618 void 11619 pr_addr_dbg(char *fmt1, int af, const void *addr) 11620 { 11621 char buf[INET6_ADDRSTRLEN]; 11622 11623 if (fmt1 == NULL) { 11624 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 11625 return; 11626 } 11627 11628 /* 11629 * This does not compare debug level and just prints 11630 * out. Thus it is the responsibility of the caller 11631 * to check the appropriate debug-level before calling 11632 * this function. 11633 */ 11634 if (ip_debug > 0) { 11635 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 11636 } 11637 11638 11639 } 11640 11641 11642 /* 11643 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 11644 * if needed and extension headers) that will be needed based on the 11645 * ip6_pkt_t structure passed by the caller. 11646 * 11647 * The returned length does not include the length of the upper level 11648 * protocol (ULP) header. 11649 */ 11650 int 11651 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 11652 { 11653 int len; 11654 11655 len = IPV6_HDR_LEN; 11656 if (ipp->ipp_fields & IPPF_HAS_IP6I) 11657 len += sizeof (ip6i_t); 11658 if (ipp->ipp_fields & IPPF_HOPOPTS) { 11659 ASSERT(ipp->ipp_hopoptslen != 0); 11660 len += ipp->ipp_hopoptslen; 11661 } 11662 if (ipp->ipp_fields & IPPF_RTHDR) { 11663 ASSERT(ipp->ipp_rthdrlen != 0); 11664 len += ipp->ipp_rthdrlen; 11665 } 11666 /* 11667 * En-route destination options 11668 * Only do them if there's a routing header as well 11669 */ 11670 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 11671 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 11672 ASSERT(ipp->ipp_rtdstoptslen != 0); 11673 len += ipp->ipp_rtdstoptslen; 11674 } 11675 if (ipp->ipp_fields & IPPF_DSTOPTS) { 11676 ASSERT(ipp->ipp_dstoptslen != 0); 11677 len += ipp->ipp_dstoptslen; 11678 } 11679 return (len); 11680 } 11681 11682 /* 11683 * All-purpose routine to build a header chain of an IPv6 header 11684 * followed by any required extension headers and a proto header, 11685 * preceeded (where necessary) by an ip6i_t private header. 11686 * 11687 * The fields of the IPv6 header that are derived from the ip6_pkt_t 11688 * will be filled in appropriately. 11689 * Thus the caller must fill in the rest of the IPv6 header, such as 11690 * traffic class/flowid, source address (if not set here), hoplimit (if not 11691 * set here) and destination address. 11692 * 11693 * The extension headers and ip6i_t header will all be fully filled in. 11694 */ 11695 void 11696 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 11697 ip6_pkt_t *ipp, uint8_t protocol) 11698 { 11699 uint8_t *nxthdr_ptr; 11700 uint8_t *cp; 11701 ip6i_t *ip6i; 11702 ip6_t *ip6h = (ip6_t *)ext_hdrs; 11703 11704 /* 11705 * If sending private ip6i_t header down (checksum info, nexthop, 11706 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 11707 * then fill it in. (The checksum info will be filled in by icmp). 11708 */ 11709 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 11710 ip6i = (ip6i_t *)ip6h; 11711 ip6h = (ip6_t *)&ip6i[1]; 11712 11713 ip6i->ip6i_flags = 0; 11714 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 11715 if (ipp->ipp_fields & IPPF_IFINDEX || 11716 ipp->ipp_fields & IPPF_SCOPE_ID) { 11717 ASSERT(ipp->ipp_ifindex != 0); 11718 ip6i->ip6i_flags |= IP6I_IFINDEX; 11719 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 11720 } 11721 if (ipp->ipp_fields & IPPF_ADDR) { 11722 /* 11723 * Enable per-packet source address verification if 11724 * IPV6_PKTINFO specified the source address. 11725 * ip6_src is set in the transport's _wput function. 11726 */ 11727 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 11728 &ipp->ipp_addr)); 11729 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 11730 } 11731 if (ipp->ipp_fields & IPPF_HOPLIMIT) { 11732 ip6i->ip6i_hops = ip6h->ip6_hops = ipp->ipp_hoplimit; 11733 /* 11734 * We need to set this flag so that IP doesn't 11735 * rewrite the IPv6 header's hoplimit with the 11736 * current default value. 11737 */ 11738 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 11739 } 11740 if (ipp->ipp_fields & IPPF_NEXTHOP) { 11741 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 11742 &ipp->ipp_nexthop)); 11743 ip6i->ip6i_flags |= IP6I_NEXTHOP; 11744 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 11745 } 11746 /* 11747 * tell IP this is an ip6i_t private header 11748 */ 11749 ip6i->ip6i_nxt = IPPROTO_RAW; 11750 } 11751 /* Initialize IPv6 header */ 11752 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 11753 if (ipp->ipp_fields & IPPF_TCLASS) { 11754 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 11755 (ipp->ipp_tclass << 20); 11756 } 11757 if (ipp->ipp_fields & IPPF_ADDR) 11758 ip6h->ip6_src = ipp->ipp_addr; 11759 11760 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 11761 cp = (uint8_t *)&ip6h[1]; 11762 /* 11763 * Here's where we have to start stringing together 11764 * any extension headers in the right order: 11765 * Hop-by-hop, destination, routing, and final destination opts. 11766 */ 11767 if (ipp->ipp_fields & IPPF_HOPOPTS) { 11768 /* Hop-by-hop options */ 11769 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 11770 11771 *nxthdr_ptr = IPPROTO_HOPOPTS; 11772 nxthdr_ptr = &hbh->ip6h_nxt; 11773 11774 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 11775 cp += ipp->ipp_hopoptslen; 11776 } 11777 /* 11778 * En-route destination options 11779 * Only do them if there's a routing header as well 11780 */ 11781 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 11782 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 11783 ip6_dest_t *dst = (ip6_dest_t *)cp; 11784 11785 *nxthdr_ptr = IPPROTO_DSTOPTS; 11786 nxthdr_ptr = &dst->ip6d_nxt; 11787 11788 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 11789 cp += ipp->ipp_rtdstoptslen; 11790 } 11791 /* 11792 * Routing header next 11793 */ 11794 if (ipp->ipp_fields & IPPF_RTHDR) { 11795 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 11796 11797 *nxthdr_ptr = IPPROTO_ROUTING; 11798 nxthdr_ptr = &rt->ip6r_nxt; 11799 11800 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 11801 cp += ipp->ipp_rthdrlen; 11802 } 11803 /* 11804 * Do ultimate destination options 11805 */ 11806 if (ipp->ipp_fields & IPPF_DSTOPTS) { 11807 ip6_dest_t *dest = (ip6_dest_t *)cp; 11808 11809 *nxthdr_ptr = IPPROTO_DSTOPTS; 11810 nxthdr_ptr = &dest->ip6d_nxt; 11811 11812 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 11813 cp += ipp->ipp_dstoptslen; 11814 } 11815 /* 11816 * Now set the last header pointer to the proto passed in 11817 */ 11818 *nxthdr_ptr = protocol; 11819 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 11820 } 11821 11822 /* 11823 * Return a pointer to the routing header extension header 11824 * in the IPv6 header(s) chain passed in. 11825 * If none found, return NULL 11826 * Assumes that all extension headers are in same mblk as the v6 header 11827 */ 11828 ip6_rthdr_t * 11829 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 11830 { 11831 ip6_dest_t *desthdr; 11832 ip6_frag_t *fraghdr; 11833 uint_t hdrlen; 11834 uint8_t nexthdr; 11835 uint8_t *ptr = (uint8_t *)&ip6h[1]; 11836 11837 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 11838 return ((ip6_rthdr_t *)ptr); 11839 11840 /* 11841 * The routing header will precede all extension headers 11842 * other than the hop-by-hop and destination options 11843 * extension headers, so if we see anything other than those, 11844 * we're done and didn't find it. 11845 * We could see a destination options header alone but no 11846 * routing header, in which case we'll return NULL as soon as 11847 * we see anything after that. 11848 * Hop-by-hop and destination option headers are identical, 11849 * so we can use either one we want as a template. 11850 */ 11851 nexthdr = ip6h->ip6_nxt; 11852 while (ptr < endptr) { 11853 /* Is there enough left for len + nexthdr? */ 11854 if (ptr + MIN_EHDR_LEN > endptr) 11855 return (NULL); 11856 11857 switch (nexthdr) { 11858 case IPPROTO_HOPOPTS: 11859 case IPPROTO_DSTOPTS: 11860 /* Assumes the headers are identical for hbh and dst */ 11861 desthdr = (ip6_dest_t *)ptr; 11862 hdrlen = 8 * (desthdr->ip6d_len + 1); 11863 nexthdr = desthdr->ip6d_nxt; 11864 break; 11865 11866 case IPPROTO_ROUTING: 11867 return ((ip6_rthdr_t *)ptr); 11868 11869 case IPPROTO_FRAGMENT: 11870 fraghdr = (ip6_frag_t *)ptr; 11871 hdrlen = sizeof (ip6_frag_t); 11872 nexthdr = fraghdr->ip6f_nxt; 11873 break; 11874 11875 default: 11876 return (NULL); 11877 } 11878 ptr += hdrlen; 11879 } 11880 return (NULL); 11881 } 11882 11883 /* 11884 * Called for source-routed packets originating on this node. 11885 * Manipulates the original routing header by moving every entry up 11886 * one slot, placing the first entry in the v6 header's v6_dst field, 11887 * and placing the ultimate destination in the routing header's last 11888 * slot. 11889 * 11890 * Returns the checksum diference between the ultimate destination 11891 * (last hop in the routing header when the packet is sent) and 11892 * the first hop (ip6_dst when the packet is sent) 11893 */ 11894 uint32_t 11895 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 11896 { 11897 uint_t numaddr; 11898 uint_t i; 11899 in6_addr_t *addrptr; 11900 in6_addr_t tmp; 11901 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 11902 uint32_t cksm; 11903 uint32_t addrsum = 0; 11904 uint16_t *ptr; 11905 11906 /* 11907 * Perform any processing needed for source routing. 11908 * We know that all extension headers will be in the same mblk 11909 * as the IPv6 header. 11910 */ 11911 11912 /* 11913 * If no segments left in header, or the header length field is zero, 11914 * don't move hop addresses around; 11915 * Checksum difference is zero. 11916 */ 11917 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 11918 return (0); 11919 11920 ptr = (uint16_t *)&ip6h->ip6_dst; 11921 cksm = 0; 11922 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 11923 cksm += ptr[i]; 11924 } 11925 cksm = (cksm & 0xFFFF) + (cksm >> 16); 11926 11927 /* 11928 * Here's where the fun begins - we have to 11929 * move all addresses up one spot, take the 11930 * first hop and make it our first ip6_dst, 11931 * and place the ultimate destination in the 11932 * newly-opened last slot. 11933 */ 11934 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 11935 numaddr = rthdr->ip6r0_len / 2; 11936 tmp = *addrptr; 11937 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 11938 *addrptr = addrptr[1]; 11939 } 11940 *addrptr = ip6h->ip6_dst; 11941 ip6h->ip6_dst = tmp; 11942 11943 /* 11944 * From the checksummed ultimate destination subtract the checksummed 11945 * current ip6_dst (the first hop address). Return that number. 11946 * (In the v4 case, the second part of this is done in each routine 11947 * that calls ip_massage_options(). We do it all in this one place 11948 * for v6). 11949 */ 11950 ptr = (uint16_t *)&ip6h->ip6_dst; 11951 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 11952 addrsum += ptr[i]; 11953 } 11954 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 11955 if ((int)cksm < 0) 11956 cksm--; 11957 cksm = (cksm & 0xFFFF) + (cksm >> 16); 11958 11959 return (cksm); 11960 } 11961 11962 /* 11963 * See if the upper-level protocol indicated by 'proto' will be able 11964 * to do something with an ICMP_FRAGMENTATION_NEEDED (IPv4) or 11965 * ICMP6_PACKET_TOO_BIG (IPv6). 11966 */ 11967 static boolean_t 11968 ip_ulp_cando_pkt2big(int proto) 11969 { 11970 /* 11971 * For now, only TCP can handle this. 11972 * Tunnels may be able to also, but since tun isn't working over 11973 * IPv6 yet, don't worry about it for now. 11974 */ 11975 return (proto == IPPROTO_TCP); 11976 } 11977 11978 11979 /* 11980 * Propagate a multicast group membership operation (join/leave) (*fn) on 11981 * all interfaces crossed by the related multirt routes. 11982 * The call is considered successful if the operation succeeds 11983 * on at least one interface. 11984 * The function is called if the destination address in the packet to send 11985 * is multirouted. 11986 */ 11987 int 11988 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 11989 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 11990 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 11991 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 11992 { 11993 ire_t *ire_gw; 11994 irb_t *irb; 11995 int index, error = 0; 11996 opt_restart_t *or; 11997 11998 irb = ire->ire_bucket; 11999 ASSERT(irb != NULL); 12000 12001 ASSERT(DB_TYPE(first_mp) == M_CTL); 12002 or = (opt_restart_t *)first_mp->b_rptr; 12003 12004 IRB_REFHOLD(irb); 12005 for (; ire != NULL; ire = ire->ire_next) { 12006 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12007 continue; 12008 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12009 continue; 12010 12011 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12012 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, 12013 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 12014 /* No resolver exists for the gateway; skip this ire. */ 12015 if (ire_gw == NULL) 12016 continue; 12017 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12018 /* 12019 * A resolver exists: we can get the interface on which we have 12020 * to apply the operation. 12021 */ 12022 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12023 first_mp); 12024 if (error == 0) 12025 or->or_private = CGTP_MCAST_SUCCESS; 12026 12027 if (ip_debug > 0) { 12028 ulong_t off; 12029 char *ksym; 12030 12031 ksym = kobj_getsymname((uintptr_t)fn, &off); 12032 ip2dbg(("ip_multirt_apply_membership_v6: " 12033 "called %s, multirt group 0x%08x via itf 0x%08x, " 12034 "error %d [success %u]\n", 12035 ksym ? ksym : "?", 12036 ntohl(V4_PART_OF_V6((*v6grp))), 12037 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12038 error, or->or_private)); 12039 } 12040 12041 ire_refrele(ire_gw); 12042 if (error == EINPROGRESS) { 12043 IRB_REFRELE(irb); 12044 return (error); 12045 } 12046 } 12047 IRB_REFRELE(irb); 12048 /* 12049 * Consider the call as successful if we succeeded on at least 12050 * one interface. Otherwise, return the last encountered error. 12051 */ 12052 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12053 } 12054 12055 void 12056 ip6_kstat_init(void) 12057 { 12058 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 12059 "net", KSTAT_TYPE_NAMED, 12060 sizeof (ip6_statistics) / sizeof (kstat_named_t), 12061 KSTAT_FLAG_VIRTUAL)) != NULL) { 12062 ip6_kstat->ks_data = &ip6_statistics; 12063 kstat_install(ip6_kstat); 12064 } 12065 } 12066 12067 /* 12068 * The following two functions set and get the value for the 12069 * IPV6_SRC_PREFERENCES socket option. 12070 */ 12071 int 12072 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12073 { 12074 /* 12075 * We only support preferences that are covered by 12076 * IPV6_PREFER_SRC_MASK. 12077 */ 12078 if (prefs & ~IPV6_PREFER_SRC_MASK) 12079 return (EINVAL); 12080 12081 /* 12082 * Look for conflicting preferences or default preferences. If 12083 * both bits of a related pair are clear, the application wants the 12084 * system's default value for that pair. Both bits in a pair can't 12085 * be set. 12086 */ 12087 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12088 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12089 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12090 IPV6_PREFER_SRC_MIPMASK) { 12091 return (EINVAL); 12092 } 12093 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12094 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12095 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12096 IPV6_PREFER_SRC_TMPMASK) { 12097 return (EINVAL); 12098 } 12099 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12100 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12101 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12102 IPV6_PREFER_SRC_CGAMASK) { 12103 return (EINVAL); 12104 } 12105 12106 connp->conn_src_preferences = prefs; 12107 return (0); 12108 } 12109 12110 size_t 12111 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12112 { 12113 *val = connp->conn_src_preferences; 12114 return (sizeof (connp->conn_src_preferences)); 12115 } 12116 12117 int 12118 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 12119 { 12120 ill_t *ill; 12121 ire_t *ire; 12122 int error; 12123 12124 /* 12125 * Verify the source address and ifindex. Privileged users can use 12126 * any source address. For ancillary data the source address is 12127 * checked in ip_wput_v6. 12128 */ 12129 if (pkti->ipi6_ifindex != 0) { 12130 ASSERT(connp != NULL); 12131 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 12132 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 12133 if (ill == NULL) { 12134 /* 12135 * We just want to know if the interface exists, we 12136 * don't really care about the ill pointer itself. 12137 */ 12138 if (error != EINPROGRESS) 12139 return (error); 12140 error = 0; /* Ensure we don't use it below */ 12141 } else { 12142 ill_refrele(ill); 12143 } 12144 } 12145 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12146 secpolicy_net_rawaccess(cr) != 0) { 12147 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12148 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12149 connp->conn_zoneid, MATCH_IRE_TYPE); 12150 if (ire != NULL) 12151 ire_refrele(ire); 12152 else 12153 return (ENXIO); 12154 } 12155 return (0); 12156 } 12157 12158 /* 12159 * Get the size of the IP options (including the IP headers size) 12160 * without including the AH header's size. If till_ah is B_FALSE, 12161 * and if AH header is present, dest options beyond AH header will 12162 * also be included in the returned size. 12163 */ 12164 int 12165 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12166 { 12167 ip6_t *ip6h; 12168 uint8_t nexthdr; 12169 uint8_t *whereptr; 12170 ip6_hbh_t *hbhhdr; 12171 ip6_dest_t *dsthdr; 12172 ip6_rthdr_t *rthdr; 12173 int ehdrlen; 12174 int size; 12175 ah_t *ah; 12176 12177 ip6h = (ip6_t *)mp->b_rptr; 12178 size = IPV6_HDR_LEN; 12179 nexthdr = ip6h->ip6_nxt; 12180 whereptr = (uint8_t *)&ip6h[1]; 12181 for (;;) { 12182 /* Assume IP has already stripped it */ 12183 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12184 switch (nexthdr) { 12185 case IPPROTO_HOPOPTS: 12186 hbhhdr = (ip6_hbh_t *)whereptr; 12187 nexthdr = hbhhdr->ip6h_nxt; 12188 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12189 break; 12190 case IPPROTO_DSTOPTS: 12191 dsthdr = (ip6_dest_t *)whereptr; 12192 nexthdr = dsthdr->ip6d_nxt; 12193 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12194 break; 12195 case IPPROTO_ROUTING: 12196 rthdr = (ip6_rthdr_t *)whereptr; 12197 nexthdr = rthdr->ip6r_nxt; 12198 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12199 break; 12200 default : 12201 if (till_ah) { 12202 ASSERT(nexthdr == IPPROTO_AH); 12203 return (size); 12204 } 12205 /* 12206 * If we don't have a AH header to traverse, 12207 * return now. This happens normally for 12208 * outbound datagrams where we have not inserted 12209 * the AH header. 12210 */ 12211 if (nexthdr != IPPROTO_AH) { 12212 return (size); 12213 } 12214 12215 /* 12216 * We don't include the AH header's size 12217 * to be symmetrical with other cases where 12218 * we either don't have a AH header (outbound) 12219 * or peek into the AH header yet (inbound and 12220 * not pulled up yet). 12221 */ 12222 ah = (ah_t *)whereptr; 12223 nexthdr = ah->ah_nexthdr; 12224 ehdrlen = (ah->ah_length << 2) + 8; 12225 12226 if (nexthdr == IPPROTO_DSTOPTS) { 12227 if (whereptr + ehdrlen >= mp->b_wptr) { 12228 /* 12229 * The destination options header 12230 * is not part of the first mblk. 12231 */ 12232 whereptr = mp->b_cont->b_rptr; 12233 } else { 12234 whereptr += ehdrlen; 12235 } 12236 12237 dsthdr = (ip6_dest_t *)whereptr; 12238 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12239 size += ehdrlen; 12240 } 12241 return (size); 12242 } 12243 whereptr += ehdrlen; 12244 size += ehdrlen; 12245 } 12246 } 12247