1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* 27 * Copyright (c) 1990 Mentat Inc. 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/stream.h> 34 #include <sys/dlpi.h> 35 #include <sys/stropts.h> 36 #include <sys/sysmacros.h> 37 #include <sys/strsun.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #define _SUN_TPI_VERSION 2 41 #include <sys/tihdr.h> 42 #include <sys/tiuser.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/cmn_err.h> 46 #include <sys/debug.h> 47 #include <sys/kobj.h> 48 #include <sys/zone.h> 49 50 #include <sys/kmem.h> 51 #include <sys/systm.h> 52 #include <sys/param.h> 53 #include <sys/socket.h> 54 #include <sys/vtrace.h> 55 #include <sys/isa_defs.h> 56 #include <sys/atomic.h> 57 #include <sys/iphada.h> 58 #include <sys/policy.h> 59 #include <net/if.h> 60 #include <net/if_arp.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/if_dl.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 #include <netinet/sctp.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/arp.h> 75 #include <inet/snmpcom.h> 76 77 #include <inet/ip.h> 78 #include <inet/ip_impl.h> 79 #include <inet/ip6.h> 80 #include <inet/ip6_asp.h> 81 #include <inet/tcp.h> 82 #include <inet/tcp_impl.h> 83 #include <inet/udp_impl.h> 84 #include <inet/ipp_common.h> 85 86 #include <inet/ip_multi.h> 87 #include <inet/ip_if.h> 88 #include <inet/ip_ire.h> 89 #include <inet/ip_rts.h> 90 #include <inet/optcom.h> 91 #include <inet/ip_ndp.h> 92 #include <net/pfkeyv2.h> 93 #include <inet/ipsec_info.h> 94 #include <inet/sadb.h> 95 #include <inet/ipsec_impl.h> 96 #include <inet/tun.h> 97 #include <inet/sctp_ip.h> 98 #include <sys/multidata.h> 99 #include <sys/pattr.h> 100 #include <inet/ipclassifier.h> 101 #include <inet/ipsecah.h> 102 #include <inet/udp_impl.h> 103 #include <sys/squeue.h> 104 105 extern squeue_func_t ip_input_proc; 106 107 /* 108 * IP statistics. 109 */ 110 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 111 #define IP6_STAT_UPDATE(x, n) (ip6_statistics.x.value.ui64 += (n)) 112 113 typedef struct ip6_stat { 114 kstat_named_t ip6_udp_fast_path; 115 kstat_named_t ip6_udp_slow_path; 116 kstat_named_t ip6_udp_fannorm; 117 kstat_named_t ip6_udp_fanmb; 118 kstat_named_t ip6_out_sw_cksum; 119 kstat_named_t ip6_in_sw_cksum; 120 kstat_named_t ip6_tcp_in_full_hw_cksum_err; 121 kstat_named_t ip6_tcp_in_part_hw_cksum_err; 122 kstat_named_t ip6_tcp_in_sw_cksum_err; 123 kstat_named_t ip6_tcp_out_sw_cksum_bytes; 124 kstat_named_t ip6_udp_in_full_hw_cksum_err; 125 kstat_named_t ip6_udp_in_part_hw_cksum_err; 126 kstat_named_t ip6_udp_in_sw_cksum_err; 127 kstat_named_t ip6_udp_out_sw_cksum_bytes; 128 kstat_named_t ip6_frag_mdt_pkt_out; 129 kstat_named_t ip6_frag_mdt_discarded; 130 kstat_named_t ip6_frag_mdt_allocfail; 131 kstat_named_t ip6_frag_mdt_addpdescfail; 132 kstat_named_t ip6_frag_mdt_allocd; 133 } ip6_stat_t; 134 135 static ip6_stat_t ip6_statistics = { 136 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 137 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 138 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 139 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 140 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 141 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 142 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 143 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 144 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 145 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 146 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 147 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 148 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 149 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 150 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 151 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 152 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 153 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 154 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 155 }; 156 157 static kstat_t *ip6_kstat; 158 159 /* 160 * Naming conventions: 161 * These rules should be judiciously applied 162 * if there is a need to identify something as IPv6 versus IPv4 163 * IPv6 funcions will end with _v6 in the ip module. 164 * IPv6 funcions will end with _ipv6 in the transport modules. 165 * IPv6 macros: 166 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 167 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 168 * And then there are ..V4_PART_OF_V6. 169 * The intent is that macros in the ip module end with _V6. 170 * IPv6 global variables will start with ipv6_ 171 * IPv6 structures will start with ipv6 172 * IPv6 defined constants should start with IPV6_ 173 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 174 */ 175 176 /* 177 * IPv6 mibs when the interface (ill) is not known. 178 * When the ill is known the per-interface mib in the ill is used. 179 */ 180 mib2_ipv6IfStatsEntry_t ip6_mib; 181 mib2_ipv6IfIcmpEntry_t icmp6_mib; 182 183 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 184 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 185 186 const in6_addr_t ipv6_all_ones = 187 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 188 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 189 190 #ifdef _BIG_ENDIAN 191 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 192 #else /* _BIG_ENDIAN */ 193 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 194 #endif /* _BIG_ENDIAN */ 195 196 #ifdef _BIG_ENDIAN 197 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 198 #else /* _BIG_ENDIAN */ 199 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 200 #endif /* _BIG_ENDIAN */ 201 202 #ifdef _BIG_ENDIAN 203 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 204 #else /* _BIG_ENDIAN */ 205 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 206 #endif /* _BIG_ENDIAN */ 207 208 #ifdef _BIG_ENDIAN 209 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 210 #else /* _BIG_ENDIAN */ 211 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 212 #endif /* _BIG_ENDIAN */ 213 214 #ifdef _BIG_ENDIAN 215 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 216 #else /* _BIG_ENDIAN */ 217 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 218 #endif /* _BIG_ENDIAN */ 219 220 #ifdef _BIG_ENDIAN 221 const in6_addr_t ipv6_solicited_node_mcast = 222 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 223 #else /* _BIG_ENDIAN */ 224 const in6_addr_t ipv6_solicited_node_mcast = 225 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 226 #endif /* _BIG_ENDIAN */ 227 228 /* 229 * Used by icmp_send_redirect_v6 for picking random src. 230 */ 231 uint_t icmp_redirect_v6_src_index; 232 233 /* Leave room for ip_newroute to tack on the src and target addresses */ 234 #define OK_RESOLVER_MP_V6(mp) \ 235 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 236 237 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 238 boolean_t, zoneid_t); 239 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 240 const in6_addr_t *, boolean_t); 241 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 242 static boolean_t icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp); 243 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 244 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 245 boolean_t, boolean_t, boolean_t, boolean_t); 246 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 247 iulp_t *); 248 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 249 uint16_t, boolean_t, boolean_t, boolean_t); 250 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 251 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 252 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 253 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 254 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 255 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 256 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 257 uint8_t *, uint_t, uint8_t); 258 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 259 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 260 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 261 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 262 conn_t *, int, int, int); 263 static boolean_t ip_ulp_cando_pkt2big(int); 264 265 static void ip_rput_v6(queue_t *, mblk_t *); 266 static void ip_wput_v6(queue_t *, mblk_t *); 267 268 /* 269 * A template for an IPv6 AR_ENTRY_QUERY 270 */ 271 static areq_t ipv6_areq_template = { 272 AR_ENTRY_QUERY, /* cmd */ 273 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 274 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 275 IP6_DL_SAP, /* protocol, from arps perspective */ 276 sizeof (areq_t), /* target addr offset */ 277 IPV6_ADDR_LEN, /* target addr_length */ 278 0, /* flags */ 279 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 280 IPV6_ADDR_LEN, /* sender addr length */ 281 6, /* xmit_count */ 282 1000, /* (re)xmit_interval in milliseconds */ 283 4 /* max # of requests to buffer */ 284 /* anything else filled in by the code */ 285 }; 286 287 struct qinit rinit_ipv6 = { 288 (pfi_t)ip_rput_v6, 289 NULL, 290 ip_open, 291 ip_close, 292 NULL, 293 &ip_mod_info 294 }; 295 296 struct qinit winit_ipv6 = { 297 (pfi_t)ip_wput_v6, 298 (pfi_t)ip_wsrv, 299 ip_open, 300 ip_close, 301 NULL, 302 &ip_mod_info 303 }; 304 305 /* 306 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 307 * The message has already been checksummed and if needed, 308 * a copy has been made to be sent any interested ICMP client (conn) 309 * Note that this is different than icmp_inbound() which does the fanout 310 * to conn's as well as local processing of the ICMP packets. 311 * 312 * All error messages are passed to the matching transport stream. 313 * 314 * Zones notes: 315 * The packet is only processed in the context of the specified zone: typically 316 * only this zone will reply to an echo request. This means that the caller must 317 * call icmp_inbound_v6() for each relevant zone. 318 */ 319 static void 320 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 321 boolean_t mctl_present, uint_t flags, zoneid_t zoneid) 322 { 323 icmp6_t *icmp6; 324 ip6_t *ip6h; 325 boolean_t interested; 326 ip6i_t *ip6i; 327 in6_addr_t origsrc; 328 ire_t *ire; 329 mblk_t *first_mp; 330 ipsec_in_t *ii; 331 332 ASSERT(ill != NULL); 333 first_mp = mp; 334 if (mctl_present) { 335 mp = first_mp->b_cont; 336 ASSERT(mp != NULL); 337 338 ii = (ipsec_in_t *)first_mp->b_rptr; 339 ASSERT(ii->ipsec_in_type == IPSEC_IN); 340 } 341 342 ip6h = (ip6_t *)mp->b_rptr; 343 344 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 345 346 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 347 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 348 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 349 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 350 freemsg(first_mp); 351 return; 352 } 353 ip6h = (ip6_t *)mp->b_rptr; 354 } 355 if (icmp_accept_clear_messages == 0) { 356 first_mp = ipsec_check_global_policy(first_mp, NULL, 357 NULL, ip6h, mctl_present); 358 if (first_mp == NULL) 359 return; 360 } 361 362 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 363 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 364 icmp6->icmp6_code)); 365 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 366 367 /* Initiate IPPF processing here */ 368 if (IP6_IN_IPP(flags)) { 369 370 /* 371 * If the ifindex changes due to SIOCSLIFINDEX 372 * packet may return to IP on the wrong ill. 373 */ 374 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 375 if (mp == NULL) { 376 if (mctl_present) { 377 freeb(first_mp); 378 } 379 return; 380 } 381 } 382 383 switch (icmp6->icmp6_type) { 384 case ICMP6_DST_UNREACH: 385 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 386 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 387 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 388 break; 389 390 case ICMP6_TIME_EXCEEDED: 391 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 392 break; 393 394 case ICMP6_PARAM_PROB: 395 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 396 break; 397 398 case ICMP6_PACKET_TOO_BIG: 399 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 400 zoneid); 401 return; 402 case ICMP6_ECHO_REQUEST: 403 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 404 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 405 !ipv6_resp_echo_mcast) 406 break; 407 408 /* 409 * We must have exclusive use of the mblk to convert it to 410 * a response. 411 * If not, we copy it. 412 */ 413 if (mp->b_datap->db_ref > 1) { 414 mblk_t *mp1; 415 416 mp1 = copymsg(mp); 417 freemsg(mp); 418 if (mp1 == NULL) { 419 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 420 if (mctl_present) 421 freeb(first_mp); 422 return; 423 } 424 mp = mp1; 425 ip6h = (ip6_t *)mp->b_rptr; 426 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 427 if (mctl_present) 428 first_mp->b_cont = mp; 429 else 430 first_mp = mp; 431 } 432 433 /* 434 * Turn the echo into an echo reply. 435 * Remove any extension headers (do not reverse a source route) 436 * and clear the flow id (keep traffic class for now). 437 */ 438 if (hdr_length != IPV6_HDR_LEN) { 439 int i; 440 441 for (i = 0; i < IPV6_HDR_LEN; i++) 442 mp->b_rptr[hdr_length - i - 1] = 443 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 444 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 445 ip6h = (ip6_t *)mp->b_rptr; 446 ip6h->ip6_nxt = IPPROTO_ICMPV6; 447 hdr_length = IPV6_HDR_LEN; 448 } 449 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 450 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 451 452 ip6h->ip6_plen = 453 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 454 origsrc = ip6h->ip6_src; 455 /* 456 * Reverse the source and destination addresses. 457 * If the return address is a multicast, zero out the source 458 * (ip_wput_v6 will set an address). 459 */ 460 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 461 ip6h->ip6_src = ipv6_all_zeros; 462 ip6h->ip6_dst = origsrc; 463 } else { 464 ip6h->ip6_src = ip6h->ip6_dst; 465 ip6h->ip6_dst = origsrc; 466 } 467 468 /* set the hop limit */ 469 ip6h->ip6_hops = ipv6_def_hops; 470 471 /* 472 * Prepare for checksum by putting icmp length in the icmp 473 * checksum field. The checksum is calculated in ip_wput_v6. 474 */ 475 icmp6->icmp6_cksum = ip6h->ip6_plen; 476 /* 477 * ICMP echo replies should go out on the same interface 478 * the request came on as probes used by in.mpathd for 479 * detecting NIC failures are ECHO packets. We turn-off load 480 * spreading by allocating a ip6i and setting ip6i_attach_if 481 * to B_TRUE which is handled both by ip_wput_v6 and 482 * ip_newroute_v6. If we don't turnoff load spreading, 483 * the packets might get dropped if there are no 484 * non-FAILED/INACTIVE interfaces for it to go out on and 485 * in.mpathd would wrongly detect a failure or mis-detect 486 * a NIC failure as a link failure. As load spreading can 487 * happen only if ill_group is not NULL, we do only for 488 * that case and this does not affect the normal case. 489 * 490 * We force this only on echo packets that came from on-link 491 * hosts. We restrict this to link-local addresses which 492 * is used by in.mpathd for probing. In the IPv6 case, 493 * default routes typically have an ire_ipif pointer and 494 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 495 * might work. As a default route out of this interface 496 * may not be present, enforcing this packet to go out in 497 * this case may not work. 498 */ 499 if (ill->ill_group != NULL && 500 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 501 /* 502 * If we are sending replies to ourselves, don't 503 * set ATTACH_IF as we may not be able to find 504 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 505 * causes ip_wput_v6 to look for an IRE_LOCAL on 506 * "ill" which it may not find and will try to 507 * create an IRE_CACHE for our local address. Once 508 * we do this, we will try to forward all packets 509 * meant to our LOCAL address. 510 */ 511 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES); 512 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 513 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 514 if (mp == NULL) { 515 BUMP_MIB(ill->ill_icmp6_mib, 516 ipv6IfIcmpInErrors); 517 if (ire != NULL) 518 ire_refrele(ire); 519 if (mctl_present) 520 freeb(first_mp); 521 return; 522 } else if (mctl_present) { 523 first_mp->b_cont = mp; 524 } else { 525 first_mp = mp; 526 } 527 ip6i = (ip6i_t *)mp->b_rptr; 528 ip6i->ip6i_flags = IP6I_ATTACH_IF; 529 ip6i->ip6i_ifindex = 530 ill->ill_phyint->phyint_ifindex; 531 } 532 if (ire != NULL) 533 ire_refrele(ire); 534 } 535 536 if (!mctl_present) { 537 /* 538 * This packet should go out the same way as it 539 * came in i.e in clear. To make sure that global 540 * policy will not be applied to this in ip_wput, 541 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 542 */ 543 ASSERT(first_mp == mp); 544 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 545 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 546 freemsg(mp); 547 return; 548 } 549 ii = (ipsec_in_t *)first_mp->b_rptr; 550 551 /* This is not a secure packet */ 552 ii->ipsec_in_secure = B_FALSE; 553 first_mp->b_cont = mp; 554 } 555 ii->ipsec_in_zoneid = zoneid; 556 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 557 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 558 return; 559 } 560 put(WR(q), first_mp); 561 return; 562 563 case ICMP6_ECHO_REPLY: 564 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 565 break; 566 567 case ND_ROUTER_SOLICIT: 568 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 569 break; 570 571 case ND_ROUTER_ADVERT: 572 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 573 break; 574 575 case ND_NEIGHBOR_SOLICIT: 576 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 577 if (mctl_present) 578 freeb(first_mp); 579 /* XXX may wish to pass first_mp up to ndp_input someday. */ 580 ndp_input(ill, mp); 581 return; 582 583 case ND_NEIGHBOR_ADVERT: 584 BUMP_MIB(ill->ill_icmp6_mib, 585 ipv6IfIcmpInNeighborAdvertisements); 586 if (mctl_present) 587 freeb(first_mp); 588 /* XXX may wish to pass first_mp up to ndp_input someday. */ 589 ndp_input(ill, mp); 590 return; 591 592 case ND_REDIRECT: { 593 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 594 595 if (ipv6_ignore_redirect) 596 break; 597 598 /* 599 * As there is no upper client to deliver, we don't 600 * need the first_mp any more. 601 */ 602 if (mctl_present) 603 freeb(first_mp); 604 if (!pullupmsg(mp, -1) || 605 !icmp_redirect_ok_v6(ill, mp)) { 606 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 607 break; 608 } 609 icmp_redirect_v6(q, mp, ill); 610 return; 611 } 612 613 /* 614 * The next three icmp messages will be handled by MLD. 615 * Pass all valid MLD packets up to any process(es) 616 * listening on a raw ICMP socket. MLD messages are 617 * freed by mld_input function. 618 */ 619 case MLD_LISTENER_QUERY: 620 case MLD_LISTENER_REPORT: 621 case MLD_LISTENER_REDUCTION: 622 if (mctl_present) 623 freeb(first_mp); 624 mld_input(q, mp, ill); 625 return; 626 default: 627 break; 628 } 629 if (interested) { 630 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 631 mctl_present, zoneid); 632 } else { 633 freemsg(first_mp); 634 } 635 } 636 637 /* 638 * Process received IPv6 ICMP Packet too big. 639 * After updating any IRE it does the fanout to any matching transport streams. 640 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 641 */ 642 /* ARGSUSED */ 643 static void 644 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 645 boolean_t mctl_present, zoneid_t zoneid) 646 { 647 ip6_t *ip6h; 648 ip6_t *inner_ip6h; 649 icmp6_t *icmp6; 650 uint16_t hdr_length; 651 uint32_t mtu; 652 ire_t *ire, *first_ire; 653 mblk_t *first_mp; 654 655 first_mp = mp; 656 if (mctl_present) 657 mp = first_mp->b_cont; 658 /* 659 * We must have exclusive use of the mblk to update the MTU 660 * in the packet. 661 * If not, we copy it. 662 * 663 * If there's an M_CTL present, we know that allocated first_mp 664 * earlier in this function, so we know first_mp has refcnt of one. 665 */ 666 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 667 if (mp->b_datap->db_ref > 1) { 668 mblk_t *mp1; 669 670 mp1 = copymsg(mp); 671 freemsg(mp); 672 if (mp1 == NULL) { 673 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 674 if (mctl_present) 675 freeb(first_mp); 676 return; 677 } 678 mp = mp1; 679 if (mctl_present) 680 first_mp->b_cont = mp; 681 else 682 first_mp = mp; 683 } 684 ip6h = (ip6_t *)mp->b_rptr; 685 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 686 hdr_length = ip_hdr_length_v6(mp, ip6h); 687 else 688 hdr_length = IPV6_HDR_LEN; 689 690 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 691 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 692 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 693 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 694 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 695 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 696 freemsg(first_mp); 697 return; 698 } 699 ip6h = (ip6_t *)mp->b_rptr; 700 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 701 inner_ip6h = (ip6_t *)&icmp6[1]; 702 } 703 704 /* 705 * For link local destinations matching simply on IRE type is not 706 * sufficient. Same link local addresses for different ILL's is 707 * possible. 708 */ 709 710 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 711 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 712 IRE_CACHE, ill->ill_ipif, ALL_ZONES, 713 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 714 715 if (first_ire == NULL) { 716 if (ip_debug > 2) { 717 /* ip1dbg */ 718 pr_addr_dbg("icmp_inbound_too_big_v6:" 719 "no ire for dst %s\n", AF_INET6, 720 &inner_ip6h->ip6_dst); 721 } 722 freemsg(first_mp); 723 return; 724 } 725 726 mtu = ntohl(icmp6->icmp6_mtu); 727 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 728 for (ire = first_ire; ire != NULL && 729 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 730 ire = ire->ire_next) { 731 mutex_enter(&ire->ire_lock); 732 if (mtu < IPV6_MIN_MTU) { 733 ip1dbg(("Received mtu less than IPv6 " 734 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 735 mtu = IPV6_MIN_MTU; 736 /* 737 * If an mtu less than IPv6 min mtu is received, 738 * we must include a fragment header in 739 * subsequent packets. 740 */ 741 ire->ire_frag_flag |= IPH_FRAG_HDR; 742 } 743 ip1dbg(("Received mtu from router: %d\n", mtu)); 744 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 745 /* Record the new max frag size for the ULP. */ 746 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 747 /* 748 * If we need a fragment header in every packet 749 * (above case or multirouting), make sure the 750 * ULP takes it into account when computing the 751 * payload size. 752 */ 753 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 754 sizeof (ip6_frag_t)); 755 } else { 756 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 757 } 758 mutex_exit(&ire->ire_lock); 759 } 760 rw_exit(&first_ire->ire_bucket->irb_lock); 761 ire_refrele(first_ire); 762 } else { 763 irb_t *irb = NULL; 764 /* 765 * for non-link local destinations we match only on the IRE type 766 */ 767 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 768 IRE_CACHE, ill->ill_ipif, ALL_ZONES, MATCH_IRE_TYPE); 769 if (ire == NULL) { 770 if (ip_debug > 2) { 771 /* ip1dbg */ 772 pr_addr_dbg("icmp_inbound_too_big_v6:" 773 "no ire for dst %s\n", 774 AF_INET6, &inner_ip6h->ip6_dst); 775 } 776 freemsg(first_mp); 777 return; 778 } 779 irb = ire->ire_bucket; 780 ire_refrele(ire); 781 rw_enter(&irb->irb_lock, RW_READER); 782 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 783 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 784 &inner_ip6h->ip6_dst)) { 785 mtu = ntohl(icmp6->icmp6_mtu); 786 mutex_enter(&ire->ire_lock); 787 if (mtu < IPV6_MIN_MTU) { 788 ip1dbg(("Received mtu less than IPv6" 789 "min mtu %d: %d\n", 790 IPV6_MIN_MTU, mtu)); 791 mtu = IPV6_MIN_MTU; 792 /* 793 * If an mtu less than IPv6 min mtu is 794 * received, we must include a fragment 795 * header in subsequent packets. 796 */ 797 ire->ire_frag_flag |= IPH_FRAG_HDR; 798 } 799 800 ip1dbg(("Received mtu from router: %d\n", mtu)); 801 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 802 /* Record the new max frag size for the ULP. */ 803 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 804 /* 805 * If we need a fragment header in 806 * every packet (above case or 807 * multirouting), make sure the ULP 808 * takes it into account when computing 809 * the payload size. 810 */ 811 icmp6->icmp6_mtu = 812 htonl(ire->ire_max_frag - 813 sizeof (ip6_frag_t)); 814 } else { 815 icmp6->icmp6_mtu = 816 htonl(ire->ire_max_frag); 817 } 818 mutex_exit(&ire->ire_lock); 819 } 820 } 821 rw_exit(&irb->irb_lock); 822 } 823 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 824 mctl_present, zoneid); 825 } 826 827 static void 828 pkt_too_big(conn_t *connp, void *arg) 829 { 830 mblk_t *mp; 831 832 if (!connp->conn_ipv6_recvpathmtu) 833 return; 834 835 /* create message and drop it on this connections read queue */ 836 if ((mp = dupb((mblk_t *)arg)) == NULL) { 837 return; 838 } 839 mp->b_datap->db_type = M_CTL; 840 841 putnext(connp->conn_rq, mp); 842 } 843 844 /* 845 * Fanout received ICMPv6 error packets to the transports. 846 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 847 */ 848 void 849 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 850 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 851 { 852 uint16_t *up; /* Pointer to ports in ULP header */ 853 uint32_t ports; /* reversed ports for fanout */ 854 ip6_t rip6h; /* With reversed addresses */ 855 uint16_t hdr_length; 856 uint8_t *nexthdrp; 857 uint8_t nexthdr; 858 mblk_t *first_mp; 859 ipsec_in_t *ii; 860 tcpha_t *tcpha; 861 conn_t *connp; 862 863 first_mp = mp; 864 if (mctl_present) { 865 mp = first_mp->b_cont; 866 ASSERT(mp != NULL); 867 868 ii = (ipsec_in_t *)first_mp->b_rptr; 869 ASSERT(ii->ipsec_in_type == IPSEC_IN); 870 } else { 871 ii = NULL; 872 } 873 874 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 875 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 876 877 /* 878 * Need to pullup everything in order to use 879 * ip_hdr_length_nexthdr_v6() 880 */ 881 if (mp->b_cont != NULL) { 882 if (!pullupmsg(mp, -1)) { 883 ip1dbg(("icmp_inbound_error_fanout_v6: " 884 "pullupmsg failed\n")); 885 goto drop_pkt; 886 } 887 ip6h = (ip6_t *)mp->b_rptr; 888 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 889 } 890 891 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 892 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 893 goto drop_pkt; 894 895 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 896 goto drop_pkt; 897 nexthdr = *nexthdrp; 898 899 /* Set message type, must be done after pullups */ 900 mp->b_datap->db_type = M_CTL; 901 902 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 903 /* 904 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 905 * sockets. 906 * 907 * Note I don't like walking every connection to deliver 908 * this information to a set of listeners. A separate 909 * list could be kept to keep the cost of this down. 910 */ 911 ipcl_walk(pkt_too_big, (void *)mp); 912 } 913 914 /* Try to pass the ICMP message to clients who need it */ 915 switch (nexthdr) { 916 case IPPROTO_UDP: { 917 /* 918 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 919 * UDP header to get the port information. 920 */ 921 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 922 mp->b_wptr) { 923 break; 924 } 925 /* 926 * Attempt to find a client stream based on port. 927 * Note that we do a reverse lookup since the header is 928 * in the form we sent it out. 929 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 930 * and we only set the src and dst addresses and nexthdr. 931 */ 932 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 933 rip6h.ip6_src = ip6h->ip6_dst; 934 rip6h.ip6_dst = ip6h->ip6_src; 935 rip6h.ip6_nxt = nexthdr; 936 ((uint16_t *)&ports)[0] = up[1]; 937 ((uint16_t *)&ports)[1] = up[0]; 938 939 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 940 IP6_NO_IPPOLICY, mctl_present, zoneid); 941 return; 942 } 943 case IPPROTO_TCP: { 944 /* 945 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 946 * the TCP header to get the port information. 947 */ 948 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 949 mp->b_wptr) { 950 break; 951 } 952 953 /* 954 * Attempt to find a client stream based on port. 955 * Note that we do a reverse lookup since the header is 956 * in the form we sent it out. 957 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 958 * we only set the src and dst addresses and nexthdr. 959 */ 960 961 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 962 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 963 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 964 if (connp == NULL) { 965 goto drop_pkt; 966 } 967 968 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 969 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 970 return; 971 972 } 973 case IPPROTO_SCTP: 974 /* 975 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 976 * the SCTP header to get the port information. 977 */ 978 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 979 mp->b_wptr) { 980 break; 981 } 982 983 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 984 ((uint16_t *)&ports)[0] = up[1]; 985 ((uint16_t *)&ports)[1] = up[0]; 986 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 987 IP6_NO_IPPOLICY, 0, zoneid); 988 return; 989 case IPPROTO_ESP: 990 case IPPROTO_AH: { 991 int ipsec_rc; 992 993 /* 994 * We need a IPSEC_IN in the front to fanout to AH/ESP. 995 * We will re-use the IPSEC_IN if it is already present as 996 * AH/ESP will not affect any fields in the IPSEC_IN for 997 * ICMP errors. If there is no IPSEC_IN, allocate a new 998 * one and attach it in the front. 999 */ 1000 if (ii != NULL) { 1001 /* 1002 * ip_fanout_proto_again converts the ICMP errors 1003 * that come back from AH/ESP to M_DATA so that 1004 * if it is non-AH/ESP and we do a pullupmsg in 1005 * this function, it would work. Convert it back 1006 * to M_CTL before we send up as this is a ICMP 1007 * error. This could have been generated locally or 1008 * by some router. Validate the inner IPSEC 1009 * headers. 1010 * 1011 * NOTE : ill_index is used by ip_fanout_proto_again 1012 * to locate the ill. 1013 */ 1014 ASSERT(ill != NULL); 1015 ii->ipsec_in_ill_index = 1016 ill->ill_phyint->phyint_ifindex; 1017 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1018 first_mp->b_cont->b_datap->db_type = M_CTL; 1019 } else { 1020 /* 1021 * IPSEC_IN is not present. We attach a ipsec_in 1022 * message and send up to IPSEC for validating 1023 * and removing the IPSEC headers. Clear 1024 * ipsec_in_secure so that when we return 1025 * from IPSEC, we don't mistakenly think that this 1026 * is a secure packet came from the network. 1027 * 1028 * NOTE : ill_index is used by ip_fanout_proto_again 1029 * to locate the ill. 1030 */ 1031 ASSERT(first_mp == mp); 1032 first_mp = ipsec_in_alloc(B_FALSE); 1033 if (first_mp == NULL) { 1034 freemsg(mp); 1035 BUMP_MIB(&ip_mib, ipInDiscards); 1036 return; 1037 } 1038 ii = (ipsec_in_t *)first_mp->b_rptr; 1039 1040 /* This is not a secure packet */ 1041 ii->ipsec_in_secure = B_FALSE; 1042 first_mp->b_cont = mp; 1043 mp->b_datap->db_type = M_CTL; 1044 ASSERT(ill != NULL); 1045 ii->ipsec_in_ill_index = 1046 ill->ill_phyint->phyint_ifindex; 1047 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1048 } 1049 1050 if (!ipsec_loaded()) { 1051 ip_proto_not_sup(q, first_mp, 0, zoneid); 1052 return; 1053 } 1054 1055 if (nexthdr == IPPROTO_ESP) 1056 ipsec_rc = ipsecesp_icmp_error(first_mp); 1057 else 1058 ipsec_rc = ipsecah_icmp_error(first_mp); 1059 if (ipsec_rc == IPSEC_STATUS_FAILED) 1060 return; 1061 1062 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1063 return; 1064 } 1065 case IPPROTO_ENCAP: 1066 case IPPROTO_IPV6: 1067 if ((uint8_t *)ip6h + hdr_length + 1068 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1069 sizeof (ip6_t)) > mp->b_wptr) 1070 goto drop_pkt; 1071 1072 if (nexthdr == IPPROTO_ENCAP || 1073 !IN6_ARE_ADDR_EQUAL( 1074 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1075 &ip6h->ip6_src) || 1076 !IN6_ARE_ADDR_EQUAL( 1077 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1078 &ip6h->ip6_dst)) { 1079 /* 1080 * For tunnels that have used IPsec protection, 1081 * we need to adjust the MTU to take into account 1082 * the IPsec overhead. 1083 */ 1084 if (ii != NULL) 1085 icmp6->icmp6_mtu = htons( 1086 ntohs(icmp6->icmp6_mtu) - 1087 ipsec_in_extra_length(first_mp)); 1088 } else { 1089 /* 1090 * Self-encapsulated case. As in the ipv4 case, 1091 * we need to strip the 2nd IP header. Since mp 1092 * is already pulled-up, we can simply bcopy 1093 * the 3rd header + data over the 2nd header. 1094 */ 1095 uint16_t unused_len; 1096 ip6_t *inner_ip6h = (ip6_t *) 1097 ((uchar_t *)ip6h + hdr_length); 1098 1099 /* 1100 * Make sure we don't do recursion more than once. 1101 */ 1102 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1103 &unused_len, &nexthdrp) || 1104 *nexthdrp == IPPROTO_IPV6) { 1105 goto drop_pkt; 1106 } 1107 1108 /* 1109 * We are about to modify the packet. Make a copy if 1110 * someone else has a reference to it. 1111 */ 1112 if (DB_REF(mp) > 1) { 1113 mblk_t *mp1; 1114 uint16_t icmp6_offset; 1115 1116 mp1 = copymsg(mp); 1117 if (mp1 == NULL) { 1118 goto drop_pkt; 1119 } 1120 icmp6_offset = (uint16_t) 1121 ((uchar_t *)icmp6 - mp->b_rptr); 1122 freemsg(mp); 1123 mp = mp1; 1124 1125 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1126 ip6h = (ip6_t *)&icmp6[1]; 1127 inner_ip6h = (ip6_t *) 1128 ((uchar_t *)ip6h + hdr_length); 1129 1130 if (mctl_present) 1131 first_mp->b_cont = mp; 1132 else 1133 first_mp = mp; 1134 } 1135 1136 /* 1137 * Need to set db_type back to M_DATA before 1138 * refeeding mp into this function. 1139 */ 1140 DB_TYPE(mp) = M_DATA; 1141 1142 /* 1143 * Copy the 3rd header + remaining data on top 1144 * of the 2nd header. 1145 */ 1146 bcopy(inner_ip6h, ip6h, 1147 mp->b_wptr - (uchar_t *)inner_ip6h); 1148 1149 /* 1150 * Subtract length of the 2nd header. 1151 */ 1152 mp->b_wptr -= hdr_length; 1153 1154 /* 1155 * Now recurse, and see what I _really_ should be 1156 * doing here. 1157 */ 1158 icmp_inbound_error_fanout_v6(q, first_mp, 1159 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1160 zoneid); 1161 return; 1162 } 1163 /* FALLTHRU */ 1164 default: 1165 /* 1166 * The rip6h header is only used for the lookup and we 1167 * only set the src and dst addresses and nexthdr. 1168 */ 1169 rip6h.ip6_src = ip6h->ip6_dst; 1170 rip6h.ip6_dst = ip6h->ip6_src; 1171 rip6h.ip6_nxt = nexthdr; 1172 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1173 IP6_NO_IPPOLICY, mctl_present, zoneid); 1174 return; 1175 } 1176 /* NOTREACHED */ 1177 drop_pkt: 1178 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1179 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1180 freemsg(first_mp); 1181 } 1182 1183 /* 1184 * Validate the incoming redirect message, if valid redirect 1185 * processing is done later. This is separated from the actual 1186 * redirect processing to avoid becoming single threaded when not 1187 * necessary. (i.e invalid packet) 1188 * Assumes that any AH or ESP headers have already been removed. 1189 * The mp has already been pulled up. 1190 */ 1191 boolean_t 1192 icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp) 1193 { 1194 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1195 nd_redirect_t *rd; 1196 ire_t *ire; 1197 uint16_t len; 1198 uint16_t hdr_length; 1199 1200 ASSERT(mp->b_cont == NULL); 1201 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1202 hdr_length = ip_hdr_length_v6(mp, ip6h); 1203 else 1204 hdr_length = IPV6_HDR_LEN; 1205 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1206 len = mp->b_wptr - mp->b_rptr - hdr_length; 1207 if (!IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1208 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1209 (rd->nd_rd_code != 0) || 1210 (len < sizeof (nd_redirect_t)) || 1211 (IN6_IS_ADDR_V4MAPPED(&rd->nd_rd_dst)) || 1212 (IN6_IS_ADDR_MULTICAST(&rd->nd_rd_dst))) { 1213 return (B_FALSE); 1214 } 1215 if (!(IN6_IS_ADDR_LINKLOCAL(&rd->nd_rd_target) || 1216 IN6_ARE_ADDR_EQUAL(&rd->nd_rd_target, &rd->nd_rd_dst))) { 1217 return (B_FALSE); 1218 } 1219 1220 /* 1221 * Verify that the IP source address of the redirect is 1222 * the same as the current first-hop router for the specified 1223 * ICMP destination address. Just to be cautious, this test 1224 * will be done again before we add the redirect, in case 1225 * router goes away between now and then. 1226 */ 1227 ire = ire_route_lookup_v6(&rd->nd_rd_dst, 0, 1228 &ip6h->ip6_src, 0, ill->ill_ipif, NULL, ALL_ZONES, 1229 MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1230 if (ire == NULL) 1231 return (B_FALSE); 1232 ire_refrele(ire); 1233 if (len > sizeof (nd_redirect_t)) { 1234 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1235 len - sizeof (nd_redirect_t))) 1236 return (B_FALSE); 1237 } 1238 return (B_TRUE); 1239 } 1240 1241 /* 1242 * Process received IPv6 ICMP Redirect messages. 1243 * Assumes that the icmp packet has already been verfied to be 1244 * valid, aligned and in a single mblk all done in icmp_redirect_ok_v6(). 1245 */ 1246 /* ARGSUSED */ 1247 static void 1248 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1249 { 1250 ip6_t *ip6h; 1251 uint16_t hdr_length; 1252 nd_redirect_t *rd; 1253 ire_t *ire; 1254 ire_t *prev_ire; 1255 ire_t *redir_ire; 1256 in6_addr_t *src, *dst, *gateway; 1257 nd_opt_hdr_t *opt; 1258 nce_t *nce; 1259 int nce_flags = 0; 1260 int err = 0; 1261 boolean_t redirect_to_router = B_FALSE; 1262 int len; 1263 iulp_t ulp_info = { 0 }; 1264 ill_t *prev_ire_ill; 1265 ipif_t *ipif; 1266 1267 ip6h = (ip6_t *)mp->b_rptr; 1268 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1269 hdr_length = ip_hdr_length_v6(mp, ip6h); 1270 else 1271 hdr_length = IPV6_HDR_LEN; 1272 1273 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1274 src = &ip6h->ip6_src; 1275 dst = &rd->nd_rd_dst; 1276 gateway = &rd->nd_rd_target; 1277 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1278 redirect_to_router = B_TRUE; 1279 nce_flags |= NCE_F_ISROUTER; 1280 } 1281 /* 1282 * Make sure we had a route for the dest in question and that 1283 * route was pointing to the old gateway (the source of the 1284 * redirect packet.) 1285 */ 1286 ipif = ipif_get_next_ipif(NULL, ill); 1287 if (ipif == NULL) { 1288 freemsg(mp); 1289 return; 1290 } 1291 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1292 ALL_ZONES, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1293 ipif_refrele(ipif); 1294 /* 1295 * Check that 1296 * the redirect was not from ourselves 1297 * old gateway is still directly reachable 1298 */ 1299 if (prev_ire == NULL || 1300 prev_ire->ire_type == IRE_LOCAL) { 1301 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1302 goto fail_redirect; 1303 } 1304 prev_ire_ill = ire_to_ill(prev_ire); 1305 ASSERT(prev_ire_ill != NULL); 1306 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1307 nce_flags |= NCE_F_NONUD; 1308 1309 /* 1310 * Should we use the old ULP info to create the new gateway? From 1311 * a user's perspective, we should inherit the info so that it 1312 * is a "smooth" transition. If we do not do that, then new 1313 * connections going thru the new gateway will have no route metrics, 1314 * which is counter-intuitive to user. From a network point of 1315 * view, this may or may not make sense even though the new gateway 1316 * is still directly connected to us so the route metrics should not 1317 * change much. 1318 * 1319 * But if the old ire_uinfo is not initialized, we do another 1320 * recursive lookup on the dest using the new gateway. There may 1321 * be a route to that. If so, use it to initialize the redirect 1322 * route. 1323 */ 1324 if (prev_ire->ire_uinfo.iulp_set) { 1325 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1326 } else if (redirect_to_router) { 1327 /* 1328 * Only do the following if the redirection is really to 1329 * a router. 1330 */ 1331 ire_t *tmp_ire; 1332 ire_t *sire; 1333 1334 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1335 ALL_ZONES, 0, 1336 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1337 if (sire != NULL) { 1338 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1339 ASSERT(tmp_ire != NULL); 1340 ire_refrele(tmp_ire); 1341 ire_refrele(sire); 1342 } else if (tmp_ire != NULL) { 1343 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1344 sizeof (iulp_t)); 1345 ire_refrele(tmp_ire); 1346 } 1347 } 1348 1349 len = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1350 opt = (nd_opt_hdr_t *)&rd[1]; 1351 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1352 if (opt != NULL) { 1353 err = ndp_lookup_then_add(ill, 1354 (uchar_t *)&opt[1], /* Link layer address */ 1355 gateway, 1356 &ipv6_all_ones, /* prefix mask */ 1357 &ipv6_all_zeros, /* Mapping mask */ 1358 0, 1359 nce_flags, 1360 ND_STALE, 1361 &nce); 1362 switch (err) { 1363 case 0: 1364 NCE_REFRELE(nce); 1365 break; 1366 case EEXIST: 1367 /* 1368 * Check to see if link layer address has changed and 1369 * process the nce_state accordingly. 1370 */ 1371 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1372 NCE_REFRELE(nce); 1373 break; 1374 default: 1375 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1376 err)); 1377 goto fail_redirect; 1378 } 1379 } 1380 if (redirect_to_router) { 1381 /* icmp_redirect_ok_v6() must have already verified this */ 1382 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1383 1384 /* 1385 * Create a Route Association. This will allow us to remember 1386 * a router told us to use the particular gateway. 1387 */ 1388 ire = ire_create_v6( 1389 dst, 1390 &ipv6_all_ones, /* mask */ 1391 &prev_ire->ire_src_addr_v6, /* source addr */ 1392 gateway, /* gateway addr */ 1393 &prev_ire->ire_max_frag, /* max frag */ 1394 NULL, /* Fast Path header */ 1395 NULL, /* no rfq */ 1396 NULL, /* no stq */ 1397 IRE_HOST_REDIRECT, 1398 NULL, 1399 prev_ire->ire_ipif, 1400 NULL, 1401 0, 1402 0, 1403 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1404 &ulp_info); 1405 } else { 1406 /* 1407 * Just create an on link entry, may or may not be a router 1408 * If there is no link layer address option ire_add() won't 1409 * add this. 1410 */ 1411 ire = ire_create_v6( 1412 dst, /* gateway == dst */ 1413 &ipv6_all_ones, /* mask */ 1414 &prev_ire->ire_src_addr_v6, /* source addr */ 1415 &ipv6_all_zeros, /* gateway addr */ 1416 &prev_ire->ire_max_frag, /* max frag */ 1417 NULL, /* Fast Path header */ 1418 prev_ire->ire_rfq, /* ire rfq */ 1419 prev_ire->ire_stq, /* ire stq */ 1420 IRE_CACHE, 1421 NULL, 1422 prev_ire->ire_ipif, 1423 &ipv6_all_ones, 1424 0, 1425 0, 1426 0, 1427 &ulp_info); 1428 } 1429 if (ire == NULL) 1430 goto fail_redirect; 1431 1432 /* 1433 * XXX If there is no nce i.e there is no target link layer address 1434 * option with the redirect message, ire_add will fail. In that 1435 * case we never add the IRE_CACHE/IRE_HOST_REDIRECT. We need 1436 * to fix this. 1437 */ 1438 if (ire_add(&ire, NULL, NULL, NULL) == 0) { 1439 1440 /* tell routing sockets that we received a redirect */ 1441 ip_rts_change_v6(RTM_REDIRECT, 1442 &rd->nd_rd_dst, 1443 &rd->nd_rd_target, 1444 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1445 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1446 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1447 1448 /* 1449 * Delete any existing IRE_HOST_REDIRECT for this destination. 1450 * This together with the added IRE has the effect of 1451 * modifying an existing redirect. 1452 */ 1453 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST_REDIRECT, 1454 ire->ire_ipif, NULL, ALL_ZONES, 0, 1455 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1456 1457 ire_refrele(ire); /* Held in ire_add_v6 */ 1458 1459 if (redir_ire != NULL) { 1460 ire_delete(redir_ire); 1461 ire_refrele(redir_ire); 1462 } 1463 } 1464 1465 if (prev_ire->ire_type == IRE_CACHE) 1466 ire_delete(prev_ire); 1467 ire_refrele(prev_ire); 1468 prev_ire = NULL; 1469 1470 fail_redirect: 1471 if (prev_ire != NULL) 1472 ire_refrele(prev_ire); 1473 freemsg(mp); 1474 } 1475 1476 static ill_t * 1477 ip_queue_to_ill_v6(queue_t *q) 1478 { 1479 ill_t *ill; 1480 1481 ASSERT(WR(q) == q); 1482 1483 if (q->q_next != NULL) { 1484 ill = (ill_t *)q->q_ptr; 1485 if (ILL_CAN_LOOKUP(ill)) 1486 ill_refhold(ill); 1487 else 1488 ill = NULL; 1489 } else { 1490 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1491 NULL, NULL, NULL, NULL, NULL); 1492 } 1493 if (ill == NULL) 1494 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1495 return (ill); 1496 } 1497 1498 /* 1499 * Assigns an appropriate source address to the packet. 1500 * If origdst is one of our IP addresses that use it as the source. 1501 * If the queue is an ill queue then select a source from that ill. 1502 * Otherwise pick a source based on a route lookup back to the origsrc. 1503 * 1504 * src is the return parameter. Returns a pointer to src or NULL if failure. 1505 */ 1506 static in6_addr_t * 1507 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1508 in6_addr_t *src) 1509 { 1510 ill_t *ill; 1511 ire_t *ire; 1512 ipif_t *ipif; 1513 zoneid_t zoneid; 1514 1515 ASSERT(!(wq->q_flag & QREADR)); 1516 if (wq->q_next != NULL) { 1517 ill = (ill_t *)wq->q_ptr; 1518 zoneid = GLOBAL_ZONEID; 1519 } else { 1520 ill = NULL; 1521 zoneid = Q_TO_CONN(wq)->conn_zoneid; 1522 } 1523 1524 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1525 NULL, NULL, zoneid, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1526 if (ire != NULL) { 1527 /* Destined to one of our addresses */ 1528 *src = *origdst; 1529 ire_refrele(ire); 1530 return (src); 1531 } 1532 if (ire != NULL) { 1533 ire_refrele(ire); 1534 ire = NULL; 1535 } 1536 if (ill == NULL) { 1537 /* What is the route back to the original source? */ 1538 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1539 NULL, NULL, zoneid, 1540 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1541 if (ire == NULL) { 1542 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1543 return (NULL); 1544 } 1545 /* 1546 * Does not matter whether we use ire_stq or ire_ipif here. 1547 * Just pick an ill for ICMP replies. 1548 */ 1549 ASSERT(ire->ire_ipif != NULL); 1550 ill = ire->ire_ipif->ipif_ill; 1551 ire_refrele(ire); 1552 } 1553 ipif = ipif_select_source_v6(ill, origsrc, B_FALSE, 1554 IPV6_PREFER_SRC_DEFAULT, zoneid); 1555 if (ipif != NULL) { 1556 *src = ipif->ipif_v6src_addr; 1557 ipif_refrele(ipif); 1558 return (src); 1559 } 1560 /* 1561 * Unusual case - can't find a usable source address to reach the 1562 * original source. Use what in the route to the source. 1563 */ 1564 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1565 NULL, NULL, zoneid, (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1566 if (ire == NULL) { 1567 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1568 return (NULL); 1569 } 1570 ASSERT(ire != NULL); 1571 *src = ire->ire_src_addr_v6; 1572 ire_refrele(ire); 1573 return (src); 1574 } 1575 1576 /* 1577 * Build and ship an IPv6 ICMP message using the packet data in mp, 1578 * and the ICMP header pointed to by "stuff". (May be called as 1579 * writer.) 1580 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1581 * verify that an icmp error packet can be sent. 1582 * 1583 * If q is an ill write side queue (which is the case when packets 1584 * arrive from ip_rput) then ip_wput code will ensure that packets to 1585 * link-local destinations are sent out that ill. 1586 * 1587 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1588 * source address (see above function). 1589 */ 1590 static void 1591 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1592 const in6_addr_t *v6src_ptr, boolean_t mctl_present) 1593 { 1594 ip6_t *ip6h; 1595 in6_addr_t v6dst; 1596 size_t len_needed; 1597 size_t msg_len; 1598 mblk_t *mp1; 1599 icmp6_t *icmp6; 1600 ill_t *ill; 1601 in6_addr_t v6src; 1602 mblk_t *ipsec_mp; 1603 ipsec_out_t *io; 1604 1605 ill = ip_queue_to_ill_v6(q); 1606 if (ill == NULL) { 1607 freemsg(mp); 1608 return; 1609 } 1610 1611 if (mctl_present) { 1612 /* 1613 * If it is : 1614 * 1615 * 1) a IPSEC_OUT, then this is caused by outbound 1616 * datagram originating on this host. IPSEC processing 1617 * may or may not have been done. Refer to comments above 1618 * icmp_inbound_error_fanout for details. 1619 * 1620 * 2) a IPSEC_IN if we are generating a icmp_message 1621 * for an incoming datagram destined for us i.e called 1622 * from ip_fanout_send_icmp. 1623 */ 1624 ipsec_info_t *in; 1625 1626 ipsec_mp = mp; 1627 mp = ipsec_mp->b_cont; 1628 1629 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1630 ip6h = (ip6_t *)mp->b_rptr; 1631 1632 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1633 in->ipsec_info_type == IPSEC_IN); 1634 1635 if (in->ipsec_info_type == IPSEC_IN) { 1636 /* 1637 * Convert the IPSEC_IN to IPSEC_OUT. 1638 */ 1639 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1640 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1641 ill_refrele(ill); 1642 return; 1643 } 1644 } else { 1645 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1646 io = (ipsec_out_t *)in; 1647 /* 1648 * Clear out ipsec_out_proc_begin, so we do a fresh 1649 * ire lookup. 1650 */ 1651 io->ipsec_out_proc_begin = B_FALSE; 1652 } 1653 } else { 1654 /* 1655 * This is in clear. The icmp message we are building 1656 * here should go out in clear. 1657 */ 1658 ipsec_in_t *ii; 1659 ASSERT(mp->b_datap->db_type == M_DATA); 1660 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1661 freemsg(mp); 1662 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1663 ill_refrele(ill); 1664 return; 1665 } 1666 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1667 1668 /* This is not a secure packet */ 1669 ii->ipsec_in_secure = B_FALSE; 1670 ipsec_mp->b_cont = mp; 1671 ip6h = (ip6_t *)mp->b_rptr; 1672 /* 1673 * Convert the IPSEC_IN to IPSEC_OUT. 1674 */ 1675 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1676 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1677 ill_refrele(ill); 1678 return; 1679 } 1680 } 1681 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1682 1683 if (v6src_ptr != NULL) { 1684 v6src = *v6src_ptr; 1685 } else { 1686 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1687 &v6src) == NULL) { 1688 freemsg(ipsec_mp); 1689 ill_refrele(ill); 1690 return; 1691 } 1692 } 1693 v6dst = ip6h->ip6_src; 1694 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1695 msg_len = msgdsize(mp); 1696 if (msg_len > len_needed) { 1697 if (!adjmsg(mp, len_needed - msg_len)) { 1698 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1699 freemsg(ipsec_mp); 1700 ill_refrele(ill); 1701 return; 1702 } 1703 msg_len = len_needed; 1704 } 1705 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1706 if (mp1 == NULL) { 1707 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1708 freemsg(ipsec_mp); 1709 ill_refrele(ill); 1710 return; 1711 } 1712 ill_refrele(ill); 1713 mp1->b_cont = mp; 1714 mp = mp1; 1715 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1716 io->ipsec_out_type == IPSEC_OUT); 1717 ipsec_mp->b_cont = mp; 1718 1719 /* 1720 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1721 * node generates be accepted in peace by all on-host destinations. 1722 * If we do NOT assume that all on-host destinations trust 1723 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1724 * (Look for ipsec_out_icmp_loopback). 1725 */ 1726 io->ipsec_out_icmp_loopback = B_TRUE; 1727 1728 ip6h = (ip6_t *)mp->b_rptr; 1729 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1730 1731 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1732 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1733 ip6h->ip6_hops = ipv6_def_hops; 1734 ip6h->ip6_dst = v6dst; 1735 ip6h->ip6_src = v6src; 1736 msg_len += IPV6_HDR_LEN + len; 1737 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1738 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1739 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1740 } 1741 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1742 icmp6 = (icmp6_t *)&ip6h[1]; 1743 bcopy(stuff, (char *)icmp6, len); 1744 /* 1745 * Prepare for checksum by putting icmp length in the icmp 1746 * checksum field. The checksum is calculated in ip_wput_v6. 1747 */ 1748 icmp6->icmp6_cksum = ip6h->ip6_plen; 1749 if (icmp6->icmp6_type == ND_REDIRECT) { 1750 ip6h->ip6_hops = IPV6_MAX_HOPS; 1751 } 1752 /* Send to V6 writeside put routine */ 1753 put(q, ipsec_mp); 1754 } 1755 1756 /* 1757 * Update the output mib when ICMPv6 packets are sent. 1758 */ 1759 static void 1760 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1761 { 1762 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1763 1764 switch (icmp6->icmp6_type) { 1765 case ICMP6_DST_UNREACH: 1766 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1767 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1768 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1769 break; 1770 1771 case ICMP6_TIME_EXCEEDED: 1772 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1773 break; 1774 1775 case ICMP6_PARAM_PROB: 1776 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1777 break; 1778 1779 case ICMP6_PACKET_TOO_BIG: 1780 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1781 break; 1782 1783 case ICMP6_ECHO_REQUEST: 1784 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1785 break; 1786 1787 case ICMP6_ECHO_REPLY: 1788 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1789 break; 1790 1791 case ND_ROUTER_SOLICIT: 1792 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1793 break; 1794 1795 case ND_ROUTER_ADVERT: 1796 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1797 break; 1798 1799 case ND_NEIGHBOR_SOLICIT: 1800 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1801 break; 1802 1803 case ND_NEIGHBOR_ADVERT: 1804 BUMP_MIB(ill->ill_icmp6_mib, 1805 ipv6IfIcmpOutNeighborAdvertisements); 1806 break; 1807 1808 case ND_REDIRECT: 1809 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1810 break; 1811 1812 case MLD_LISTENER_QUERY: 1813 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1814 break; 1815 1816 case MLD_LISTENER_REPORT: 1817 case MLD_V2_LISTENER_REPORT: 1818 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1819 break; 1820 1821 case MLD_LISTENER_REDUCTION: 1822 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1823 break; 1824 } 1825 } 1826 1827 /* 1828 * Check if it is ok to send an ICMPv6 error packet in 1829 * response to the IP packet in mp. 1830 * Free the message and return null if no 1831 * ICMP error packet should be sent. 1832 */ 1833 static mblk_t * 1834 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1835 boolean_t llbcast, boolean_t mcast_ok) 1836 { 1837 ip6_t *ip6h; 1838 1839 if (!mp) 1840 return (NULL); 1841 1842 ip6h = (ip6_t *)mp->b_rptr; 1843 1844 /* Check if source address uniquely identifies the host */ 1845 1846 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1847 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1848 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1849 freemsg(mp); 1850 return (NULL); 1851 } 1852 1853 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1854 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1855 icmp6_t *icmp6; 1856 1857 if (mp->b_wptr - mp->b_rptr < len_needed) { 1858 if (!pullupmsg(mp, len_needed)) { 1859 ill_t *ill; 1860 1861 ill = ip_queue_to_ill_v6(q); 1862 if (ill == NULL) { 1863 BUMP_MIB(&icmp6_mib, 1864 ipv6IfIcmpInErrors); 1865 } else { 1866 BUMP_MIB(ill->ill_icmp6_mib, 1867 ipv6IfIcmpInErrors); 1868 ill_refrele(ill); 1869 } 1870 freemsg(mp); 1871 return (NULL); 1872 } 1873 ip6h = (ip6_t *)mp->b_rptr; 1874 } 1875 icmp6 = (icmp6_t *)&ip6h[1]; 1876 /* Explicitly do not generate errors in response to redirects */ 1877 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1878 icmp6->icmp6_type == ND_REDIRECT) { 1879 freemsg(mp); 1880 return (NULL); 1881 } 1882 } 1883 /* 1884 * Check that the destination is not multicast and that the packet 1885 * was not sent on link layer broadcast or multicast. (Exception 1886 * is Packet too big message as per the draft - when mcast_ok is set.) 1887 */ 1888 if (!mcast_ok && 1889 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1890 freemsg(mp); 1891 return (NULL); 1892 } 1893 if (icmp_err_rate_limit()) { 1894 /* 1895 * Only send ICMP error packets every so often. 1896 * This should be done on a per port/source basis, 1897 * but for now this will suffice. 1898 */ 1899 freemsg(mp); 1900 return (NULL); 1901 } 1902 return (mp); 1903 } 1904 1905 /* 1906 * Generate an ICMPv6 redirect message. 1907 * Include target link layer address option if it exits. 1908 * Always include redirect header. 1909 */ 1910 static void 1911 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1912 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1913 { 1914 nd_redirect_t *rd; 1915 nd_opt_rd_hdr_t *rdh; 1916 uchar_t *buf; 1917 nce_t *nce = NULL; 1918 nd_opt_hdr_t *opt; 1919 int len; 1920 int ll_opt_len = 0; 1921 int max_redir_hdr_data_len; 1922 int pkt_len; 1923 in6_addr_t *srcp; 1924 1925 /* 1926 * We are called from ip_rput where we could 1927 * not have attached an IPSEC_IN. 1928 */ 1929 ASSERT(mp->b_datap->db_type == M_DATA); 1930 1931 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1932 if (mp == NULL) 1933 return; 1934 nce = ndp_lookup(ill, targetp, B_FALSE); 1935 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1936 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1937 ill->ill_phys_addr_length + 7)/8 * 8; 1938 } 1939 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1940 ASSERT(len % 4 == 0); 1941 buf = kmem_alloc(len, KM_NOSLEEP); 1942 if (buf == NULL) { 1943 if (nce != NULL) 1944 NCE_REFRELE(nce); 1945 freemsg(mp); 1946 return; 1947 } 1948 1949 rd = (nd_redirect_t *)buf; 1950 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1951 rd->nd_rd_code = 0; 1952 rd->nd_rd_reserved = 0; 1953 rd->nd_rd_target = *targetp; 1954 rd->nd_rd_dst = *dest; 1955 1956 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1957 if (nce != NULL && ll_opt_len != 0) { 1958 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1959 opt->nd_opt_len = ll_opt_len/8; 1960 bcopy((char *)nce->nce_res_mp->b_rptr + 1961 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1962 ill->ill_phys_addr_length); 1963 } 1964 if (nce != NULL) 1965 NCE_REFRELE(nce); 1966 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1967 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1968 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1969 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1970 pkt_len = msgdsize(mp); 1971 /* Make sure mp is 8 byte aligned */ 1972 if (pkt_len > max_redir_hdr_data_len) { 1973 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1974 sizeof (nd_opt_rd_hdr_t))/8; 1975 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1976 } else { 1977 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1978 (void) adjmsg(mp, -(pkt_len % 8)); 1979 } 1980 rdh->nd_opt_rh_reserved1 = 0; 1981 rdh->nd_opt_rh_reserved2 = 0; 1982 /* ipif_v6src_addr contains the link-local source address */ 1983 rw_enter(&ill_g_lock, RW_READER); 1984 if (ill->ill_group != NULL) { 1985 /* 1986 * The receiver of the redirect will verify whether it 1987 * had a route through us (srcp that we will use in 1988 * the redirect) or not. As we load spread even link-locals, 1989 * we don't know which source address the receiver of 1990 * redirect has in its route for communicating with us. 1991 * Thus we randomly choose a source here and finally we 1992 * should get to the right one and it will eventually 1993 * accept the redirect from us. We can't call 1994 * ip_lookup_scope_v6 because we don't have the right 1995 * link-local address here. Thus we randomly choose one. 1996 */ 1997 int cnt = ill->ill_group->illgrp_ill_count; 1998 1999 ill = ill->ill_group->illgrp_ill; 2000 cnt = ++icmp_redirect_v6_src_index % cnt; 2001 while (cnt--) 2002 ill = ill->ill_group_next; 2003 srcp = &ill->ill_ipif->ipif_v6src_addr; 2004 } else { 2005 srcp = &ill->ill_ipif->ipif_v6src_addr; 2006 } 2007 rw_exit(&ill_g_lock); 2008 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE); 2009 kmem_free(buf, len); 2010 } 2011 2012 2013 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 2014 void 2015 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 2016 boolean_t llbcast, boolean_t mcast_ok) 2017 { 2018 icmp6_t icmp6; 2019 boolean_t mctl_present; 2020 mblk_t *first_mp; 2021 2022 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2023 2024 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2025 if (mp == NULL) { 2026 if (mctl_present) 2027 freeb(first_mp); 2028 return; 2029 } 2030 bzero(&icmp6, sizeof (icmp6_t)); 2031 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2032 icmp6.icmp6_code = code; 2033 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2034 } 2035 2036 /* 2037 * Generate an ICMP unreachable message. 2038 */ 2039 void 2040 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2041 boolean_t llbcast, boolean_t mcast_ok) 2042 { 2043 icmp6_t icmp6; 2044 boolean_t mctl_present; 2045 mblk_t *first_mp; 2046 2047 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2048 2049 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2050 if (mp == NULL) { 2051 if (mctl_present) 2052 freeb(first_mp); 2053 return; 2054 } 2055 bzero(&icmp6, sizeof (icmp6_t)); 2056 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2057 icmp6.icmp6_code = code; 2058 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2059 } 2060 2061 /* 2062 * Generate an ICMP pkt too big message. 2063 */ 2064 static void 2065 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2066 boolean_t llbcast, boolean_t mcast_ok) 2067 { 2068 icmp6_t icmp6; 2069 mblk_t *first_mp; 2070 boolean_t mctl_present; 2071 2072 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2073 2074 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2075 if (mp == NULL) { 2076 if (mctl_present) 2077 freeb(first_mp); 2078 return; 2079 } 2080 bzero(&icmp6, sizeof (icmp6_t)); 2081 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2082 icmp6.icmp6_code = 0; 2083 icmp6.icmp6_mtu = htonl(mtu); 2084 2085 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2086 } 2087 2088 /* 2089 * Generate an ICMP parameter problem message. (May be called as writer.) 2090 * 'offset' is the offset from the beginning of the packet in error. 2091 */ 2092 static void 2093 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2094 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok) 2095 { 2096 icmp6_t icmp6; 2097 boolean_t mctl_present; 2098 mblk_t *first_mp; 2099 2100 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2101 2102 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2103 if (mp == NULL) { 2104 if (mctl_present) 2105 freeb(first_mp); 2106 return; 2107 } 2108 bzero((char *)&icmp6, sizeof (icmp6_t)); 2109 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2110 icmp6.icmp6_code = code; 2111 icmp6.icmp6_pptr = htonl(offset); 2112 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2113 } 2114 2115 /* 2116 * This code will need to take into account the possibility of binding 2117 * to a link local address on a multi-homed host, in which case the 2118 * outgoing interface (from the conn) will need to be used when getting 2119 * an ire for the dst. Going through proper outgoing interface and 2120 * choosing the source address corresponding to the outgoing interface 2121 * is necessary when the destination address is a link-local address and 2122 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2123 * This can happen when active connection is setup; thus ipp pointer 2124 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2125 * pointer is passed as ipp pointer. 2126 */ 2127 mblk_t * 2128 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2129 { 2130 ssize_t len; 2131 int protocol; 2132 struct T_bind_req *tbr; 2133 sin6_t *sin6; 2134 ipa6_conn_t *ac6; 2135 in6_addr_t *v6srcp; 2136 in6_addr_t *v6dstp; 2137 uint16_t lport; 2138 uint16_t fport; 2139 uchar_t *ucp; 2140 mblk_t *mp1; 2141 boolean_t ire_requested; 2142 boolean_t ipsec_policy_set; 2143 int error = 0; 2144 boolean_t local_bind; 2145 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2146 ipa6_conn_x_t *acx6; 2147 boolean_t verify_dst; 2148 2149 ASSERT(connp->conn_af_isv6); 2150 len = mp->b_wptr - mp->b_rptr; 2151 if (len < (sizeof (*tbr) + 1)) { 2152 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2153 "ip_bind_v6: bogus msg, len %ld", len); 2154 goto bad_addr; 2155 } 2156 /* Back up and extract the protocol identifier. */ 2157 mp->b_wptr--; 2158 tbr = (struct T_bind_req *)mp->b_rptr; 2159 /* Reset the message type in preparation for shipping it back. */ 2160 mp->b_datap->db_type = M_PCPROTO; 2161 2162 protocol = *mp->b_wptr & 0xFF; 2163 connp->conn_ulp = (uint8_t)protocol; 2164 2165 /* 2166 * Check for a zero length address. This is from a protocol that 2167 * wants to register to receive all packets of its type. 2168 */ 2169 if (tbr->ADDR_length == 0) { 2170 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2171 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2172 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2173 /* 2174 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2175 * Do not allow others to bind to these. 2176 */ 2177 goto bad_addr; 2178 } 2179 2180 connp->conn_srcv6 = ipv6_all_zeros; 2181 ipcl_proto_insert_v6(connp, protocol); 2182 2183 tbr->PRIM_type = T_BIND_ACK; 2184 return (mp); 2185 } 2186 2187 /* Extract the address pointer from the message. */ 2188 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2189 tbr->ADDR_length); 2190 if (ucp == NULL) { 2191 ip1dbg(("ip_bind_v6: no address\n")); 2192 goto bad_addr; 2193 } 2194 if (!OK_32PTR(ucp)) { 2195 ip1dbg(("ip_bind_v6: unaligned address\n")); 2196 goto bad_addr; 2197 } 2198 mp1 = mp->b_cont; /* trailing mp if any */ 2199 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2200 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2201 2202 switch (tbr->ADDR_length) { 2203 default: 2204 ip1dbg(("ip_bind_v6: bad address length %d\n", 2205 (int)tbr->ADDR_length)); 2206 goto bad_addr; 2207 2208 case IPV6_ADDR_LEN: 2209 /* Verification of local address only */ 2210 v6srcp = (in6_addr_t *)ucp; 2211 lport = 0; 2212 local_bind = B_TRUE; 2213 break; 2214 2215 case sizeof (sin6_t): 2216 sin6 = (sin6_t *)ucp; 2217 v6srcp = &sin6->sin6_addr; 2218 lport = sin6->sin6_port; 2219 local_bind = B_TRUE; 2220 break; 2221 2222 case sizeof (ipa6_conn_t): 2223 /* 2224 * Verify that both the source and destination addresses 2225 * are valid. 2226 * Note that we allow connect to broadcast and multicast 2227 * addresses when ire_requested is set. Thus the ULP 2228 * has to check for IRE_BROADCAST and multicast. 2229 */ 2230 ac6 = (ipa6_conn_t *)ucp; 2231 v6srcp = &ac6->ac6_laddr; 2232 v6dstp = &ac6->ac6_faddr; 2233 fport = ac6->ac6_fport; 2234 /* For raw socket, the local port is not set. */ 2235 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2236 connp->conn_lport; 2237 local_bind = B_FALSE; 2238 /* Always verify destination reachability. */ 2239 verify_dst = B_TRUE; 2240 break; 2241 2242 case sizeof (ipa6_conn_x_t): 2243 /* 2244 * Verify that the source address is valid. 2245 * Note that we allow connect to broadcast and multicast 2246 * addresses when ire_requested is set. Thus the ULP 2247 * has to check for IRE_BROADCAST and multicast. 2248 */ 2249 acx6 = (ipa6_conn_x_t *)ucp; 2250 ac6 = &acx6->ac6x_conn; 2251 v6srcp = &ac6->ac6_laddr; 2252 v6dstp = &ac6->ac6_faddr; 2253 fport = ac6->ac6_fport; 2254 lport = ac6->ac6_lport; 2255 local_bind = B_FALSE; 2256 /* 2257 * Client that passed ipa6_conn_x_t to us specifies whether to 2258 * verify destination reachability. 2259 */ 2260 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2261 break; 2262 } 2263 if (local_bind) { 2264 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2265 /* Bind to IPv4 address */ 2266 ipaddr_t v4src; 2267 2268 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2269 2270 error = ip_bind_laddr(connp, mp, v4src, lport, 2271 ire_requested, ipsec_policy_set, 2272 tbr->ADDR_length != IPV6_ADDR_LEN); 2273 if (error != 0) 2274 goto bad_addr; 2275 connp->conn_pkt_isv6 = B_FALSE; 2276 } else { 2277 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2278 error = 0; 2279 goto bad_addr; 2280 } 2281 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2282 ire_requested, ipsec_policy_set, 2283 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2284 if (error != 0) 2285 goto bad_addr; 2286 connp->conn_pkt_isv6 = B_TRUE; 2287 } 2288 if (protocol == IPPROTO_TCP) 2289 connp->conn_recv = tcp_conn_request; 2290 } else { 2291 /* 2292 * Bind to local and remote address. Local might be 2293 * unspecified in which case it will be extracted from 2294 * ire_src_addr_v6 2295 */ 2296 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2297 /* Connect to IPv4 address */ 2298 ipaddr_t v4src; 2299 ipaddr_t v4dst; 2300 2301 /* Is the source unspecified or mapped? */ 2302 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2303 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2304 ip1dbg(("ip_bind_v6: " 2305 "dst is mapped, but not the src\n")); 2306 goto bad_addr; 2307 } 2308 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2309 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2310 2311 /* 2312 * XXX Fix needed. Need to pass ipsec_policy_set 2313 * instead of B_FALSE. 2314 */ 2315 2316 /* Always verify destination reachability. */ 2317 error = ip_bind_connected(connp, mp, &v4src, lport, 2318 v4dst, fport, ire_requested, ipsec_policy_set, 2319 B_TRUE, B_TRUE); 2320 if (error != 0) 2321 goto bad_addr; 2322 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2323 connp->conn_pkt_isv6 = B_FALSE; 2324 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2325 ip1dbg(("ip_bind_v6: " 2326 "src is mapped, but not the dst\n")); 2327 goto bad_addr; 2328 } else { 2329 error = ip_bind_connected_v6(connp, mp, v6srcp, 2330 lport, v6dstp, ipp, fport, ire_requested, 2331 ipsec_policy_set, B_TRUE, verify_dst); 2332 if (error != 0) 2333 goto bad_addr; 2334 connp->conn_pkt_isv6 = B_TRUE; 2335 } 2336 if (protocol == IPPROTO_TCP) 2337 connp->conn_recv = tcp_input; 2338 } 2339 /* Update qinfo if v4/v6 changed */ 2340 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2341 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2342 if (connp->conn_pkt_isv6) 2343 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2344 else 2345 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2346 } 2347 2348 /* 2349 * Pass the IPSEC headers size in ire_ipsec_overhead. 2350 * We can't do this in ip_bind_insert_ire because the policy 2351 * may not have been inherited at that point in time and hence 2352 * conn_out_enforce_policy may not be set. 2353 */ 2354 mp1 = mp->b_cont; 2355 if (ire_requested && connp->conn_out_enforce_policy && 2356 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2357 ire_t *ire = (ire_t *)mp1->b_rptr; 2358 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2359 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2360 } 2361 2362 /* Send it home. */ 2363 mp->b_datap->db_type = M_PCPROTO; 2364 tbr->PRIM_type = T_BIND_ACK; 2365 return (mp); 2366 2367 bad_addr: 2368 if (error == EINPROGRESS) 2369 return (NULL); 2370 if (error > 0) 2371 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2372 else 2373 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2374 return (mp); 2375 } 2376 2377 /* 2378 * Here address is verified to be a valid local address. 2379 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2380 * address is also considered a valid local address. 2381 * In the case of a multicast address, however, the 2382 * upper protocol is expected to reset the src address 2383 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2384 * no packets are emitted with multicast address as 2385 * source address. 2386 * The addresses valid for bind are: 2387 * (1) - in6addr_any 2388 * (2) - IP address of an UP interface 2389 * (3) - IP address of a DOWN interface 2390 * (4) - a multicast address. In this case 2391 * the conn will only receive packets destined to 2392 * the specified multicast address. Note: the 2393 * application still has to issue an 2394 * IPV6_JOIN_GROUP socket option. 2395 * 2396 * In all the above cases, the bound address must be valid in the current zone. 2397 * When the address is loopback or multicast, there might be many matching IREs 2398 * so bind has to look up based on the zone. 2399 */ 2400 static int 2401 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2402 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2403 boolean_t fanout_insert) 2404 { 2405 int error = 0; 2406 ire_t *src_ire = NULL; 2407 ipif_t *ipif = NULL; 2408 mblk_t *policy_mp; 2409 zoneid_t zoneid; 2410 2411 if (ipsec_policy_set) 2412 policy_mp = mp->b_cont; 2413 2414 /* 2415 * If it was previously connected, conn_fully_bound would have 2416 * been set. 2417 */ 2418 connp->conn_fully_bound = B_FALSE; 2419 2420 zoneid = connp->conn_zoneid; 2421 2422 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2423 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2424 0, NULL, NULL, zoneid, MATCH_IRE_ZONEONLY); 2425 /* 2426 * If an address other than in6addr_any is requested, 2427 * we verify that it is a valid address for bind 2428 * Note: Following code is in if-else-if form for 2429 * readability compared to a condition check. 2430 */ 2431 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2432 /* LINTED - statement has no consequent */ 2433 if (IRE_IS_LOCAL(src_ire)) { 2434 /* 2435 * (2) Bind to address of local UP interface 2436 */ 2437 ipif = src_ire->ire_ipif; 2438 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2439 ipif_t *multi_ipif = NULL; 2440 ire_t *save_ire; 2441 /* 2442 * (4) bind to multicast address. 2443 * Fake out the IRE returned to upper 2444 * layer to be a broadcast IRE in 2445 * ip_bind_insert_ire_v6(). 2446 * Pass other information that matches 2447 * the ipif (e.g. the source address). 2448 * conn_multicast_ill is only used for 2449 * IPv6 packets 2450 */ 2451 mutex_enter(&connp->conn_lock); 2452 if (connp->conn_multicast_ill != NULL) { 2453 (void) ipif_lookup_zoneid( 2454 connp->conn_multicast_ill, zoneid, 0, 2455 &multi_ipif); 2456 } else { 2457 /* 2458 * Look for default like 2459 * ip_wput_v6 2460 */ 2461 multi_ipif = ipif_lookup_group_v6( 2462 &ipv6_unspecified_group, zoneid); 2463 } 2464 mutex_exit(&connp->conn_lock); 2465 save_ire = src_ire; 2466 src_ire = NULL; 2467 if (multi_ipif == NULL || 2468 !ire_requested || (src_ire = 2469 ipif_to_ire_v6(multi_ipif)) == 2470 NULL) { 2471 src_ire = save_ire; 2472 error = EADDRNOTAVAIL; 2473 } else { 2474 ASSERT(src_ire != NULL); 2475 if (save_ire != NULL) 2476 ire_refrele(save_ire); 2477 } 2478 if (multi_ipif != NULL) 2479 ipif_refrele(multi_ipif); 2480 } else { 2481 *mp->b_wptr++ = (char)connp->conn_ulp; 2482 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2483 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2484 if (ipif == NULL) { 2485 if (error == EINPROGRESS) { 2486 if (src_ire != NULL) 2487 ire_refrele(src_ire); 2488 return (error); 2489 } 2490 /* 2491 * Not a valid address for bind 2492 */ 2493 error = EADDRNOTAVAIL; 2494 } else { 2495 ipif_refrele(ipif); 2496 } 2497 /* 2498 * Just to keep it consistent with the processing in 2499 * ip_bind_v6(). 2500 */ 2501 mp->b_wptr--; 2502 } 2503 2504 if (error != 0) { 2505 /* Red Alert! Attempting to be a bogon! */ 2506 if (ip_debug > 2) { 2507 /* ip1dbg */ 2508 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2509 " address %s\n", AF_INET6, v6src); 2510 } 2511 goto bad_addr; 2512 } 2513 } 2514 2515 /* 2516 * Allow setting new policies. For example, disconnects come 2517 * down as ipa_t bind. As we would have set conn_policy_cached 2518 * to B_TRUE before, we should set it to B_FALSE, so that policy 2519 * can change after the disconnect. 2520 */ 2521 connp->conn_policy_cached = B_FALSE; 2522 2523 /* If not fanout_insert this was just an address verification */ 2524 if (fanout_insert) { 2525 /* 2526 * The addresses have been verified. Time to insert in 2527 * the correct fanout list. 2528 */ 2529 connp->conn_srcv6 = *v6src; 2530 connp->conn_remv6 = ipv6_all_zeros; 2531 connp->conn_lport = lport; 2532 connp->conn_fport = 0; 2533 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2534 } 2535 if (error == 0) { 2536 if (ire_requested) { 2537 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2538 error = -1; 2539 goto bad_addr; 2540 } 2541 } else if (ipsec_policy_set) { 2542 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2543 error = -1; 2544 goto bad_addr; 2545 } 2546 } 2547 } 2548 bad_addr: 2549 if (src_ire != NULL) 2550 ire_refrele(src_ire); 2551 2552 if (ipsec_policy_set) { 2553 ASSERT(policy_mp != NULL); 2554 freeb(policy_mp); 2555 /* 2556 * As of now assume that nothing else accompanies 2557 * IPSEC_POLICY_SET. 2558 */ 2559 mp->b_cont = NULL; 2560 } 2561 return (error); 2562 } 2563 2564 /* ARGSUSED */ 2565 static void 2566 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2567 void *dummy_arg) 2568 { 2569 conn_t *connp = NULL; 2570 t_scalar_t prim; 2571 2572 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2573 2574 if (CONN_Q(q)) 2575 connp = Q_TO_CONN(q); 2576 ASSERT(connp != NULL); 2577 2578 prim = ((union T_primitives *)mp->b_rptr)->type; 2579 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2580 2581 if (IPCL_IS_TCP(connp)) { 2582 /* Pass sticky_ipp for scope_id and pktinfo */ 2583 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2584 } else { 2585 /* For UDP and ICMP */ 2586 mp = ip_bind_v6(q, mp, connp, NULL); 2587 } 2588 if (mp != NULL) { 2589 if (IPCL_IS_TCP(connp)) { 2590 CONN_INC_REF(connp); 2591 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2592 connp, SQTAG_TCP_RPUTOTHER); 2593 } else if (IPCL_IS_UDP(connp)) { 2594 udp_resume_bind(connp, mp); 2595 } else { 2596 qreply(q, mp); 2597 CONN_OPER_PENDING_DONE(connp); 2598 } 2599 } 2600 } 2601 2602 /* 2603 * Verify that both the source and destination addresses 2604 * are valid. If verify_dst, then destination address must also be reachable, 2605 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2606 * It takes ip6_pkt_t * as one of the arguments to determine correct 2607 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2608 * destination address. Note that parameter ipp is only useful for TCP connect 2609 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2610 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2611 * 2612 */ 2613 static int 2614 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2615 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2616 boolean_t ire_requested, boolean_t ipsec_policy_set, 2617 boolean_t fanout_insert, boolean_t verify_dst) 2618 { 2619 ire_t *src_ire; 2620 ire_t *dst_ire; 2621 int error = 0; 2622 int protocol; 2623 mblk_t *policy_mp; 2624 ire_t *sire = NULL; 2625 ire_t *md_dst_ire = NULL; 2626 ill_t *md_ill = NULL; 2627 ill_t *dst_ill = NULL; 2628 ipif_t *src_ipif = NULL; 2629 zoneid_t zoneid; 2630 boolean_t ill_held = B_FALSE; 2631 2632 src_ire = dst_ire = NULL; 2633 /* 2634 * NOTE: The protocol is beyond the wptr because that's how 2635 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2636 */ 2637 protocol = *mp->b_wptr & 0xFF; 2638 2639 /* 2640 * If we never got a disconnect before, clear it now. 2641 */ 2642 connp->conn_fully_bound = B_FALSE; 2643 2644 if (ipsec_policy_set) { 2645 policy_mp = mp->b_cont; 2646 } 2647 2648 zoneid = connp->conn_zoneid; 2649 2650 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2651 ipif_t *ipif; 2652 2653 /* 2654 * Use an "emulated" IRE_BROADCAST to tell the transport it 2655 * is a multicast. 2656 * Pass other information that matches 2657 * the ipif (e.g. the source address). 2658 * 2659 * conn_multicast_ill is only used for IPv6 packets 2660 */ 2661 mutex_enter(&connp->conn_lock); 2662 if (connp->conn_multicast_ill != NULL) { 2663 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2664 zoneid, 0, &ipif); 2665 } else { 2666 /* Look for default like ip_wput_v6 */ 2667 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2668 } 2669 mutex_exit(&connp->conn_lock); 2670 if (ipif == NULL || !ire_requested || 2671 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2672 if (ipif != NULL) 2673 ipif_refrele(ipif); 2674 if (ip_debug > 2) { 2675 /* ip1dbg */ 2676 pr_addr_dbg("ip_bind_connected_v6: bad " 2677 "connected multicast %s\n", AF_INET6, 2678 v6dst); 2679 } 2680 error = ENETUNREACH; 2681 goto bad_addr; 2682 } 2683 if (ipif != NULL) 2684 ipif_refrele(ipif); 2685 } else { 2686 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2687 NULL, &sire, zoneid, 2688 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2689 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE); 2690 /* 2691 * We also prevent ire's with src address INADDR_ANY to 2692 * be used, which are created temporarily for 2693 * sending out packets from endpoints that have 2694 * conn_unspec_src set. 2695 */ 2696 if (dst_ire == NULL || 2697 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2698 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2699 /* 2700 * When verifying destination reachability, we always 2701 * complain. 2702 * 2703 * When not verifying destination reachability but we 2704 * found an IRE, i.e. the destination is reachable, 2705 * then the other tests still apply and we complain. 2706 */ 2707 if (verify_dst || (dst_ire != NULL)) { 2708 if (ip_debug > 2) { 2709 /* ip1dbg */ 2710 pr_addr_dbg("ip_bind_connected_v6: bad" 2711 " connected dst %s\n", AF_INET6, 2712 v6dst); 2713 } 2714 if (dst_ire == NULL || 2715 !(dst_ire->ire_type & IRE_HOST)) { 2716 error = ENETUNREACH; 2717 } else { 2718 error = EHOSTUNREACH; 2719 } 2720 goto bad_addr; 2721 } 2722 } 2723 } 2724 2725 /* 2726 * If the app does a connect(), it means that it will most likely 2727 * send more than 1 packet to the destination. It makes sense 2728 * to clear the temporary flag. 2729 */ 2730 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2731 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2732 irb_t *irb = dst_ire->ire_bucket; 2733 2734 rw_enter(&irb->irb_lock, RW_WRITER); 2735 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2736 irb->irb_tmp_ire_cnt--; 2737 rw_exit(&irb->irb_lock); 2738 } 2739 2740 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2741 2742 /* 2743 * See if we should notify ULP about MDT; we do this whether or not 2744 * ire_requested is TRUE, in order to handle active connects; MDT 2745 * eligibility tests for passive connects are handled separately 2746 * through tcp_adapt_ire(). We do this before the source address 2747 * selection, because dst_ire may change after a call to 2748 * ipif_select_source_v6(). This is a best-effort check, as the 2749 * packet for this connection may not actually go through 2750 * dst_ire->ire_stq, and the exact IRE can only be known after 2751 * calling ip_newroute_v6(). This is why we further check on the 2752 * IRE during Multidata packet transmission in tcp_multisend(). 2753 */ 2754 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2755 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2756 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2757 ILL_MDT_CAPABLE(md_ill)) { 2758 md_dst_ire = dst_ire; 2759 IRE_REFHOLD(md_dst_ire); 2760 } 2761 2762 if (dst_ire != NULL && 2763 dst_ire->ire_type == IRE_LOCAL && 2764 dst_ire->ire_zoneid != zoneid) { 2765 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2766 zoneid, 0, 2767 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2768 MATCH_IRE_RJ_BHOLE); 2769 if (src_ire == NULL) { 2770 error = EHOSTUNREACH; 2771 goto bad_addr; 2772 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2773 if (!(src_ire->ire_type & IRE_HOST)) 2774 error = ENETUNREACH; 2775 else 2776 error = EHOSTUNREACH; 2777 goto bad_addr; 2778 } 2779 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2780 src_ipif = src_ire->ire_ipif; 2781 ipif_refhold(src_ipif); 2782 *v6src = src_ipif->ipif_v6lcl_addr; 2783 } 2784 ire_refrele(src_ire); 2785 src_ire = NULL; 2786 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2787 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2788 *v6src = sire->ire_src_addr_v6; 2789 ire_refrele(dst_ire); 2790 dst_ire = sire; 2791 sire = NULL; 2792 } else if (dst_ire->ire_type == IRE_CACHE && 2793 (dst_ire->ire_flags & RTF_SETSRC)) { 2794 ASSERT(dst_ire->ire_zoneid == zoneid); 2795 *v6src = dst_ire->ire_src_addr_v6; 2796 } else { 2797 /* 2798 * Pick a source address so that a proper inbound load 2799 * spreading would happen. Use dst_ill specified by the 2800 * app. when socket option or scopeid is set. 2801 */ 2802 int err; 2803 2804 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2805 uint_t if_index; 2806 2807 /* 2808 * Scope id or IPV6_PKTINFO 2809 */ 2810 2811 if_index = ipp->ipp_ifindex; 2812 dst_ill = ill_lookup_on_ifindex( 2813 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2814 if (dst_ill == NULL) { 2815 ip1dbg(("ip_bind_connected_v6:" 2816 " bad ifindex %d\n", if_index)); 2817 error = EADDRNOTAVAIL; 2818 goto bad_addr; 2819 } 2820 ill_held = B_TRUE; 2821 } else if (connp->conn_outgoing_ill != NULL) { 2822 /* 2823 * For IPV6_BOUND_IF socket option, 2824 * conn_outgoing_ill should be set 2825 * already in TCP or UDP/ICMP. 2826 */ 2827 dst_ill = conn_get_held_ill(connp, 2828 &connp->conn_outgoing_ill, &err); 2829 if (err == ILL_LOOKUP_FAILED) { 2830 ip1dbg(("ip_bind_connected_v6:" 2831 "no ill for bound_if\n")); 2832 error = EADDRNOTAVAIL; 2833 goto bad_addr; 2834 } 2835 ill_held = B_TRUE; 2836 } else if (dst_ire->ire_stq != NULL) { 2837 /* No need to hold ill here */ 2838 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2839 } else { 2840 /* No need to hold ill here */ 2841 dst_ill = dst_ire->ire_ipif->ipif_ill; 2842 } 2843 if (!ip6_asp_can_lookup()) { 2844 *mp->b_wptr++ = (char)protocol; 2845 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2846 ip_bind_connected_resume_v6); 2847 error = EINPROGRESS; 2848 goto refrele_and_quit; 2849 } 2850 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2851 B_FALSE, connp->conn_src_preferences, zoneid); 2852 ip6_asp_table_refrele(); 2853 if (src_ipif == NULL) { 2854 pr_addr_dbg("ip_bind_connected_v6: " 2855 "no usable source address for " 2856 "connection to %s\n", AF_INET6, v6dst); 2857 error = EADDRNOTAVAIL; 2858 goto bad_addr; 2859 } 2860 *v6src = src_ipif->ipif_v6lcl_addr; 2861 } 2862 } 2863 2864 /* 2865 * We do ire_route_lookup_v6() here (and not an interface lookup) 2866 * as we assert that v6src should only come from an 2867 * UP interface for hard binding. 2868 */ 2869 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2870 NULL, zoneid, MATCH_IRE_ZONEONLY); 2871 2872 /* src_ire must be a local|loopback */ 2873 if (!IRE_IS_LOCAL(src_ire)) { 2874 if (ip_debug > 2) { 2875 /* ip1dbg */ 2876 pr_addr_dbg("ip_bind_connected_v6: bad " 2877 "connected src %s\n", AF_INET6, v6src); 2878 } 2879 error = EADDRNOTAVAIL; 2880 goto bad_addr; 2881 } 2882 2883 /* 2884 * If the source address is a loopback address, the 2885 * destination had best be local or multicast. 2886 * The transports that can't handle multicast will reject 2887 * those addresses. 2888 */ 2889 if (src_ire->ire_type == IRE_LOOPBACK && 2890 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2891 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2892 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2893 error = -1; 2894 goto bad_addr; 2895 } 2896 /* 2897 * Allow setting new policies. For example, disconnects come 2898 * down as ipa_t bind. As we would have set conn_policy_cached 2899 * to B_TRUE before, we should set it to B_FALSE, so that policy 2900 * can change after the disconnect. 2901 */ 2902 connp->conn_policy_cached = B_FALSE; 2903 2904 /* 2905 * The addresses have been verified. Initialize the conn 2906 * before calling the policy as they expect the conns 2907 * initialized. 2908 */ 2909 connp->conn_srcv6 = *v6src; 2910 connp->conn_remv6 = *v6dst; 2911 connp->conn_lport = lport; 2912 connp->conn_fport = fport; 2913 2914 ASSERT(!(ipsec_policy_set && ire_requested)); 2915 if (ire_requested) { 2916 iulp_t *ulp_info = NULL; 2917 2918 /* 2919 * Note that sire will not be NULL if this is an off-link 2920 * connection and there is not cache for that dest yet. 2921 * 2922 * XXX Because of an existing bug, if there are multiple 2923 * default routes, the IRE returned now may not be the actual 2924 * default route used (default routes are chosen in a 2925 * round robin fashion). So if the metrics for different 2926 * default routes are different, we may return the wrong 2927 * metrics. This will not be a problem if the existing 2928 * bug is fixed. 2929 */ 2930 if (sire != NULL) 2931 ulp_info = &(sire->ire_uinfo); 2932 2933 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 2934 error = -1; 2935 goto bad_addr; 2936 } 2937 } else if (ipsec_policy_set) { 2938 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2939 error = -1; 2940 goto bad_addr; 2941 } 2942 } 2943 2944 /* 2945 * Cache IPsec policy in this conn. If we have per-socket policy, 2946 * we'll cache that. If we don't, we'll inherit global policy. 2947 * 2948 * We can't insert until the conn reflects the policy. Note that 2949 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2950 * connections where we don't have a policy. This is to prevent 2951 * global policy lookups in the inbound path. 2952 * 2953 * If we insert before we set conn_policy_cached, 2954 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2955 * because global policy cound be non-empty. We normally call 2956 * ipsec_check_policy() for conn_policy_cached connections only if 2957 * conn_in_enforce_policy is set. But in this case, 2958 * conn_policy_cached can get set anytime since we made the 2959 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2960 * is called, which will make the above assumption false. Thus, we 2961 * need to insert after we set conn_policy_cached. 2962 */ 2963 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2964 goto bad_addr; 2965 2966 /* If not fanout_insert this was just an address verification */ 2967 if (fanout_insert) { 2968 /* 2969 * The addresses have been verified. Time to insert in 2970 * the correct fanout list. 2971 */ 2972 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2973 connp->conn_ports, 2974 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2975 } 2976 if (error == 0) { 2977 connp->conn_fully_bound = B_TRUE; 2978 /* 2979 * Our initial checks for MDT have passed; the IRE is not 2980 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2981 * be supporting MDT. Pass the IRE, IPC and ILL into 2982 * ip_mdinfo_return(), which performs further checks 2983 * against them and upon success, returns the MDT info 2984 * mblk which we will attach to the bind acknowledgment. 2985 */ 2986 if (md_dst_ire != NULL) { 2987 mblk_t *mdinfo_mp; 2988 2989 ASSERT(md_ill != NULL); 2990 ASSERT(md_ill->ill_mdt_capab != NULL); 2991 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2992 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 2993 linkb(mp, mdinfo_mp); 2994 } 2995 } 2996 bad_addr: 2997 if (ipsec_policy_set) { 2998 ASSERT(policy_mp != NULL); 2999 freeb(policy_mp); 3000 /* 3001 * As of now assume that nothing else accompanies 3002 * IPSEC_POLICY_SET. 3003 */ 3004 mp->b_cont = NULL; 3005 } 3006 refrele_and_quit: 3007 if (src_ire != NULL) 3008 IRE_REFRELE(src_ire); 3009 if (dst_ire != NULL) 3010 IRE_REFRELE(dst_ire); 3011 if (sire != NULL) 3012 IRE_REFRELE(sire); 3013 if (src_ipif != NULL) 3014 ipif_refrele(src_ipif); 3015 if (md_dst_ire != NULL) 3016 IRE_REFRELE(md_dst_ire); 3017 if (ill_held && dst_ill != NULL) 3018 ill_refrele(dst_ill); 3019 return (error); 3020 } 3021 3022 /* 3023 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3024 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3025 */ 3026 static boolean_t 3027 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3028 iulp_t *ulp_info) 3029 { 3030 mblk_t *mp1; 3031 ire_t *ret_ire; 3032 3033 mp1 = mp->b_cont; 3034 ASSERT(mp1 != NULL); 3035 3036 if (ire != NULL) { 3037 /* 3038 * mp1 initialized above to IRE_DB_REQ_TYPE 3039 * appended mblk. Its <upper protocol>'s 3040 * job to make sure there is room. 3041 */ 3042 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3043 return (B_FALSE); 3044 3045 mp1->b_datap->db_type = IRE_DB_TYPE; 3046 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3047 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3048 ret_ire = (ire_t *)mp1->b_rptr; 3049 if (IN6_IS_ADDR_MULTICAST(dst) || 3050 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3051 ret_ire->ire_type = IRE_BROADCAST; 3052 ret_ire->ire_addr_v6 = *dst; 3053 } 3054 if (ulp_info != NULL) { 3055 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3056 sizeof (iulp_t)); 3057 } 3058 ret_ire->ire_mp = mp1; 3059 } else { 3060 /* 3061 * No IRE was found. Remove IRE mblk. 3062 */ 3063 mp->b_cont = mp1->b_cont; 3064 freeb(mp1); 3065 } 3066 return (B_TRUE); 3067 } 3068 3069 /* 3070 * Add an ip6i_t header to the front of the mblk. 3071 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3072 * Returns NULL if allocation fails (and frees original message). 3073 * Used in outgoing path when going through ip_newroute_*v6(). 3074 * Used in incoming path to pass ifindex to transports. 3075 */ 3076 mblk_t * 3077 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3078 { 3079 mblk_t *mp1; 3080 ip6i_t *ip6i; 3081 ip6_t *ip6h; 3082 3083 ip6h = (ip6_t *)mp->b_rptr; 3084 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3085 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3086 mp->b_datap->db_ref > 1) { 3087 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3088 if (mp1 == NULL) { 3089 freemsg(mp); 3090 return (NULL); 3091 } 3092 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3093 mp1->b_cont = mp; 3094 mp = mp1; 3095 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3096 } 3097 mp->b_rptr = (uchar_t *)ip6i; 3098 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3099 ip6i->ip6i_nxt = IPPROTO_RAW; 3100 if (ill != NULL) { 3101 ip6i->ip6i_flags = IP6I_IFINDEX; 3102 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3103 } else { 3104 ip6i->ip6i_flags = 0; 3105 } 3106 ip6i->ip6i_nexthop = *dst; 3107 return (mp); 3108 } 3109 3110 /* 3111 * Handle protocols with which IP is less intimate. There 3112 * can be more than one stream bound to a particular 3113 * protocol. When this is the case, normally each one gets a copy 3114 * of any incoming packets. 3115 * However, if the packet was tunneled and not multicast we only send to it 3116 * the first match. 3117 * 3118 * Zones notes: 3119 * Packets will be distributed to streams in all zones. This is really only 3120 * useful for ICMPv6 as only applications in the global zone can create raw 3121 * sockets for other protocols. 3122 */ 3123 static void 3124 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3125 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3126 boolean_t mctl_present, zoneid_t zoneid) 3127 { 3128 queue_t *rq; 3129 mblk_t *mp1, *first_mp1; 3130 in6_addr_t dst = ip6h->ip6_dst; 3131 in6_addr_t src = ip6h->ip6_src; 3132 boolean_t one_only; 3133 mblk_t *first_mp = mp; 3134 boolean_t secure; 3135 conn_t *connp, *first_connp, *next_connp; 3136 connf_t *connfp; 3137 3138 if (mctl_present) { 3139 mp = first_mp->b_cont; 3140 secure = ipsec_in_is_secure(first_mp); 3141 ASSERT(mp != NULL); 3142 } else { 3143 secure = B_FALSE; 3144 } 3145 3146 /* 3147 * If the packet was tunneled and not multicast we only send to it 3148 * the first match. 3149 */ 3150 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3151 !IN6_IS_ADDR_MULTICAST(&dst)); 3152 3153 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3154 mutex_enter(&connfp->connf_lock); 3155 connp = connfp->connf_head; 3156 for (connp = connfp->connf_head; connp != NULL; 3157 connp = connp->conn_next) { 3158 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3159 zoneid)) 3160 break; 3161 } 3162 3163 if (connp == NULL || connp->conn_upq == NULL) { 3164 /* 3165 * No one bound to this port. Is 3166 * there a client that wants all 3167 * unclaimed datagrams? 3168 */ 3169 mutex_exit(&connfp->connf_lock); 3170 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3171 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3172 nexthdr_offset, mctl_present, zoneid)) { 3173 BUMP_MIB(ill->ill_ip6_mib, ipv6InUnknownProtos); 3174 } 3175 3176 return; 3177 } 3178 3179 CONN_INC_REF(connp); 3180 first_connp = connp; 3181 3182 /* 3183 * XXX: Fix the multiple protocol listeners case. We should not 3184 * be walking the conn->next list here. 3185 */ 3186 if (one_only) { 3187 /* 3188 * Only send message to one tunnel driver by immediately 3189 * terminating the loop. 3190 */ 3191 connp = NULL; 3192 } else { 3193 connp = connp->conn_next; 3194 3195 } 3196 for (;;) { 3197 while (connp != NULL) { 3198 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3199 flags, zoneid)) 3200 break; 3201 connp = connp->conn_next; 3202 } 3203 3204 /* 3205 * Just copy the data part alone. The mctl part is 3206 * needed just for verifying policy and it is never 3207 * sent up. 3208 */ 3209 if (connp == NULL || connp->conn_upq == NULL || 3210 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3211 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3212 /* 3213 * No more intested clients or memory 3214 * allocation failed 3215 */ 3216 connp = first_connp; 3217 break; 3218 } 3219 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3220 CONN_INC_REF(connp); 3221 mutex_exit(&connfp->connf_lock); 3222 rq = connp->conn_rq; 3223 /* 3224 * For link-local always add ifindex so that transport can set 3225 * sin6_scope_id. Avoid it for ICMP error fanout. 3226 */ 3227 if ((connp->conn_ipv6_recvpktinfo || 3228 IN6_IS_ADDR_LINKLOCAL(&src)) && 3229 (flags & IP_FF_IP6INFO)) { 3230 /* Add header */ 3231 mp1 = ip_add_info_v6(mp1, inill, &dst); 3232 } 3233 if (mp1 == NULL) { 3234 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3235 } else if (!canputnext(rq)) { 3236 if (flags & IP_FF_RAWIP) { 3237 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3238 } else { 3239 BUMP_MIB(ill->ill_icmp6_mib, 3240 ipv6IfIcmpInOverflows); 3241 } 3242 3243 freemsg(mp1); 3244 } else { 3245 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3246 first_mp1 = ipsec_check_inbound_policy 3247 (first_mp1, connp, NULL, ip6h, 3248 mctl_present); 3249 } 3250 if (first_mp1 != NULL) { 3251 if (mctl_present) 3252 freeb(first_mp1); 3253 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3254 putnext(rq, mp1); 3255 } 3256 } 3257 mutex_enter(&connfp->connf_lock); 3258 /* Follow the next pointer before releasing the conn. */ 3259 next_connp = connp->conn_next; 3260 CONN_DEC_REF(connp); 3261 connp = next_connp; 3262 } 3263 3264 /* Last one. Send it upstream. */ 3265 mutex_exit(&connfp->connf_lock); 3266 3267 /* Initiate IPPF processing */ 3268 if (IP6_IN_IPP(flags)) { 3269 uint_t ifindex; 3270 3271 mutex_enter(&ill->ill_lock); 3272 ifindex = ill->ill_phyint->phyint_ifindex; 3273 mutex_exit(&ill->ill_lock); 3274 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3275 if (mp == NULL) { 3276 CONN_DEC_REF(connp); 3277 if (mctl_present) 3278 freeb(first_mp); 3279 return; 3280 } 3281 } 3282 3283 /* 3284 * For link-local always add ifindex so that transport can set 3285 * sin6_scope_id. Avoid it for ICMP error fanout. 3286 */ 3287 if ((connp->conn_ipv6_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3288 (flags & IP_FF_IP6INFO)) { 3289 /* Add header */ 3290 mp = ip_add_info_v6(mp, inill, &dst); 3291 if (mp == NULL) { 3292 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3293 CONN_DEC_REF(connp); 3294 if (mctl_present) 3295 freeb(first_mp); 3296 return; 3297 } else if (mctl_present) { 3298 first_mp->b_cont = mp; 3299 } else { 3300 first_mp = mp; 3301 } 3302 } 3303 3304 rq = connp->conn_rq; 3305 if (!canputnext(rq)) { 3306 if (flags & IP_FF_RAWIP) { 3307 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3308 } else { 3309 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3310 } 3311 3312 freemsg(first_mp); 3313 } else { 3314 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3315 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3316 NULL, ip6h, mctl_present); 3317 if (first_mp == NULL) { 3318 CONN_DEC_REF(connp); 3319 return; 3320 } 3321 } 3322 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3323 putnext(rq, mp); 3324 if (mctl_present) 3325 freeb(first_mp); 3326 } 3327 CONN_DEC_REF(connp); 3328 } 3329 3330 /* 3331 * Send an ICMP error after patching up the packet appropriately. Returns 3332 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3333 */ 3334 int 3335 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3336 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3337 boolean_t mctl_present, zoneid_t zoneid) 3338 { 3339 ip6_t *ip6h; 3340 mblk_t *first_mp; 3341 boolean_t secure; 3342 unsigned char db_type; 3343 3344 first_mp = mp; 3345 if (mctl_present) { 3346 mp = mp->b_cont; 3347 secure = ipsec_in_is_secure(first_mp); 3348 ASSERT(mp != NULL); 3349 } else { 3350 /* 3351 * If this is an ICMP error being reported - which goes 3352 * up as M_CTLs, we need to convert them to M_DATA till 3353 * we finish checking with global policy because 3354 * ipsec_check_global_policy() assumes M_DATA as clear 3355 * and M_CTL as secure. 3356 */ 3357 db_type = mp->b_datap->db_type; 3358 mp->b_datap->db_type = M_DATA; 3359 secure = B_FALSE; 3360 } 3361 /* 3362 * We are generating an icmp error for some inbound packet. 3363 * Called from all ip_fanout_(udp, tcp, proto) functions. 3364 * Before we generate an error, check with global policy 3365 * to see whether this is allowed to enter the system. As 3366 * there is no "conn", we are checking with global policy. 3367 */ 3368 ip6h = (ip6_t *)mp->b_rptr; 3369 if (secure || ipsec_inbound_v6_policy_present) { 3370 first_mp = ipsec_check_global_policy(first_mp, NULL, 3371 NULL, ip6h, mctl_present); 3372 if (first_mp == NULL) 3373 return (0); 3374 } 3375 3376 if (!mctl_present) 3377 mp->b_datap->db_type = db_type; 3378 3379 if (flags & IP_FF_SEND_ICMP) { 3380 if (flags & IP_FF_HDR_COMPLETE) { 3381 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3382 freemsg(first_mp); 3383 return (1); 3384 } 3385 } 3386 switch (icmp_type) { 3387 case ICMP6_DST_UNREACH: 3388 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3389 B_FALSE, B_FALSE); 3390 break; 3391 case ICMP6_PARAM_PROB: 3392 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3393 nexthdr_offset, B_FALSE, B_FALSE); 3394 break; 3395 default: 3396 #ifdef DEBUG 3397 panic("ip_fanout_send_icmp_v6: wrong type"); 3398 /*NOTREACHED*/ 3399 #else 3400 freemsg(first_mp); 3401 break; 3402 #endif 3403 } 3404 } else { 3405 freemsg(first_mp); 3406 return (0); 3407 } 3408 3409 return (1); 3410 } 3411 3412 3413 /* 3414 * Fanout for TCP packets 3415 * The caller puts <fport, lport> in the ports parameter. 3416 */ 3417 static void 3418 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3419 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3420 { 3421 mblk_t *first_mp; 3422 boolean_t secure; 3423 conn_t *connp; 3424 tcph_t *tcph; 3425 boolean_t syn_present = B_FALSE; 3426 3427 first_mp = mp; 3428 if (mctl_present) { 3429 mp = first_mp->b_cont; 3430 secure = ipsec_in_is_secure(first_mp); 3431 ASSERT(mp != NULL); 3432 } else { 3433 secure = B_FALSE; 3434 } 3435 3436 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3437 3438 if (connp == NULL || 3439 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3440 /* 3441 * No hard-bound match. Send Reset. 3442 */ 3443 dblk_t *dp = mp->b_datap; 3444 uint32_t ill_index; 3445 3446 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3447 3448 /* Initiate IPPf processing, if needed. */ 3449 if (IPP_ENABLED(IPP_LOCAL_IN) && (flags & IP6_NO_IPPOLICY)) { 3450 ill_index = ill->ill_phyint->phyint_ifindex; 3451 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3452 if (first_mp == NULL) { 3453 if (connp != NULL) 3454 CONN_DEC_REF(connp); 3455 return; 3456 } 3457 } 3458 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3459 tcp_xmit_listeners_reset(first_mp, hdr_len); 3460 if (connp != NULL) 3461 CONN_DEC_REF(connp); 3462 return; 3463 } 3464 3465 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3466 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3467 if (connp->conn_flags & IPCL_TCP) { 3468 squeue_t *sqp; 3469 3470 /* 3471 * For fused tcp loopback, assign the eager's 3472 * squeue to be that of the active connect's. 3473 */ 3474 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3475 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3476 !IP6_IN_IPP(flags)) { 3477 ASSERT(Q_TO_CONN(q) != NULL); 3478 sqp = Q_TO_CONN(q)->conn_sqp; 3479 } else { 3480 sqp = IP_SQUEUE_GET(lbolt); 3481 } 3482 3483 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3484 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3485 3486 /* 3487 * db_cksumstuff is unused in the incoming 3488 * path; Thus store the ifindex here. It will 3489 * be cleared in tcp_conn_create_v6(). 3490 */ 3491 DB_CKSUMSTUFF(mp) = 3492 (intptr_t)ill->ill_phyint->phyint_ifindex; 3493 syn_present = B_TRUE; 3494 } 3495 } 3496 3497 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3498 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3499 if ((flags & TH_RST) || (flags & TH_URG)) { 3500 CONN_DEC_REF(connp); 3501 freemsg(first_mp); 3502 return; 3503 } 3504 if (flags & TH_ACK) { 3505 tcp_xmit_listeners_reset(first_mp, hdr_len); 3506 CONN_DEC_REF(connp); 3507 return; 3508 } 3509 3510 CONN_DEC_REF(connp); 3511 freemsg(first_mp); 3512 return; 3513 } 3514 3515 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3516 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3517 NULL, ip6h, mctl_present); 3518 if (first_mp == NULL) { 3519 CONN_DEC_REF(connp); 3520 return; 3521 } 3522 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3523 ASSERT(syn_present); 3524 if (mctl_present) { 3525 ASSERT(first_mp != mp); 3526 first_mp->b_datap->db_struioflag |= 3527 STRUIO_POLICY; 3528 } else { 3529 ASSERT(first_mp == mp); 3530 mp->b_datap->db_struioflag &= 3531 ~STRUIO_EAGER; 3532 mp->b_datap->db_struioflag |= 3533 STRUIO_POLICY; 3534 } 3535 } else { 3536 /* 3537 * Discard first_mp early since we're dealing with a 3538 * fully-connected conn_t and tcp doesn't do policy in 3539 * this case. Also, if someone is bound to IPPROTO_TCP 3540 * over raw IP, they don't expect to see a M_CTL. 3541 */ 3542 if (mctl_present) { 3543 freeb(first_mp); 3544 mctl_present = B_FALSE; 3545 } 3546 first_mp = mp; 3547 } 3548 } 3549 3550 /* Initiate IPPF processing */ 3551 if (IP6_IN_IPP(flags)) { 3552 uint_t ifindex; 3553 3554 mutex_enter(&ill->ill_lock); 3555 ifindex = ill->ill_phyint->phyint_ifindex; 3556 mutex_exit(&ill->ill_lock); 3557 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3558 if (mp == NULL) { 3559 CONN_DEC_REF(connp); 3560 if (mctl_present) { 3561 freeb(first_mp); 3562 } 3563 return; 3564 } else if (mctl_present) { 3565 /* 3566 * ip_add_info_v6 might return a new mp. 3567 */ 3568 ASSERT(first_mp != mp); 3569 first_mp->b_cont = mp; 3570 } else { 3571 first_mp = mp; 3572 } 3573 } 3574 3575 /* 3576 * For link-local always add ifindex so that TCP can bind to that 3577 * interface. Avoid it for ICMP error fanout. 3578 */ 3579 if (!syn_present && ((connp->conn_ipv6_recvpktinfo || 3580 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3581 (flags & IP_FF_IP6INFO))) { 3582 /* Add header */ 3583 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3584 if (mp == NULL) { 3585 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3586 CONN_DEC_REF(connp); 3587 if (mctl_present) 3588 freeb(first_mp); 3589 return; 3590 } else if (mctl_present) { 3591 ASSERT(first_mp != mp); 3592 first_mp->b_cont = mp; 3593 } else { 3594 first_mp = mp; 3595 } 3596 } 3597 3598 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3599 if (IPCL_IS_TCP(connp)) { 3600 (*ip_input_proc)(connp->conn_sqp, first_mp, 3601 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3602 } else { 3603 putnext(connp->conn_rq, first_mp); 3604 CONN_DEC_REF(connp); 3605 } 3606 } 3607 3608 /* 3609 * Fanout for UDP packets. 3610 * The caller puts <fport, lport> in the ports parameter. 3611 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3612 * 3613 * If SO_REUSEADDR is set all multicast and broadcast packets 3614 * will be delivered to all streams bound to the same port. 3615 * 3616 * Zones notes: 3617 * Multicast packets will be distributed to streams in all zones. 3618 */ 3619 static void 3620 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3621 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3622 zoneid_t zoneid) 3623 { 3624 uint32_t dstport, srcport; 3625 in6_addr_t dst; 3626 mblk_t *first_mp; 3627 boolean_t secure; 3628 conn_t *connp; 3629 connf_t *connfp; 3630 conn_t *first_conn; 3631 conn_t *next_conn; 3632 mblk_t *mp1, *first_mp1; 3633 in6_addr_t src; 3634 3635 first_mp = mp; 3636 if (mctl_present) { 3637 mp = first_mp->b_cont; 3638 secure = ipsec_in_is_secure(first_mp); 3639 ASSERT(mp != NULL); 3640 } else { 3641 secure = B_FALSE; 3642 } 3643 3644 /* Extract ports in net byte order */ 3645 dstport = htons(ntohl(ports) & 0xFFFF); 3646 srcport = htons(ntohl(ports) >> 16); 3647 dst = ip6h->ip6_dst; 3648 src = ip6h->ip6_src; 3649 3650 /* Attempt to find a client stream based on destination port. */ 3651 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3652 mutex_enter(&connfp->connf_lock); 3653 connp = connfp->connf_head; 3654 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3655 /* 3656 * Not multicast. Send to the one (first) client we find. 3657 */ 3658 while (connp != NULL) { 3659 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3660 src) && connp->conn_zoneid == zoneid && 3661 conn_wantpacket_v6(connp, ill, ip6h, 3662 flags, zoneid)) { 3663 break; 3664 } 3665 connp = connp->conn_next; 3666 } 3667 if (connp == NULL || connp->conn_upq == NULL) 3668 goto notfound; 3669 3670 /* Found a client */ 3671 CONN_INC_REF(connp); 3672 mutex_exit(&connfp->connf_lock); 3673 3674 if (CONN_UDP_FLOWCTLD(connp)) { 3675 freemsg(first_mp); 3676 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3677 CONN_DEC_REF(connp); 3678 return; 3679 } 3680 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3681 first_mp = ipsec_check_inbound_policy(first_mp, 3682 connp, NULL, ip6h, mctl_present); 3683 if (first_mp == NULL) { 3684 CONN_DEC_REF(connp); 3685 return; 3686 } 3687 } 3688 /* Initiate IPPF processing */ 3689 if (IP6_IN_IPP(flags)) { 3690 uint_t ifindex; 3691 3692 mutex_enter(&ill->ill_lock); 3693 ifindex = ill->ill_phyint->phyint_ifindex; 3694 mutex_exit(&ill->ill_lock); 3695 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3696 if (mp == NULL) { 3697 CONN_DEC_REF(connp); 3698 if (mctl_present) 3699 freeb(first_mp); 3700 return; 3701 } 3702 } 3703 /* 3704 * For link-local always add ifindex so that 3705 * transport can set sin6_scope_id. Avoid it for 3706 * ICMP error fanout. 3707 */ 3708 if ((connp->conn_ipv6_recvpktinfo || 3709 IN6_IS_ADDR_LINKLOCAL(&src)) && 3710 (flags & IP_FF_IP6INFO)) { 3711 /* Add header */ 3712 mp = ip_add_info_v6(mp, inill, &dst); 3713 if (mp == NULL) { 3714 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3715 CONN_DEC_REF(connp); 3716 if (mctl_present) 3717 freeb(first_mp); 3718 return; 3719 } else if (mctl_present) { 3720 first_mp->b_cont = mp; 3721 } else { 3722 first_mp = mp; 3723 } 3724 } 3725 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3726 3727 /* Send it upstream */ 3728 CONN_UDP_RECV(connp, mp); 3729 3730 IP6_STAT(ip6_udp_fannorm); 3731 CONN_DEC_REF(connp); 3732 if (mctl_present) 3733 freeb(first_mp); 3734 return; 3735 } 3736 3737 /* 3738 * The code is fine but we shouldn't be walking the conn_next 3739 * list in IPv6 (its a classifier private data struct). Maybe create 3740 * a classifier API to put a REF_HOLD on all matching conn in the 3741 * list and return an array. 3742 */ 3743 while (connp != NULL) { 3744 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3745 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) 3746 break; 3747 connp = connp->conn_next; 3748 } 3749 3750 if (connp == NULL || connp->conn_upq == NULL) 3751 goto notfound; 3752 3753 first_conn = connp; 3754 3755 CONN_INC_REF(connp); 3756 connp = connp->conn_next; 3757 for (;;) { 3758 while (connp != NULL) { 3759 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3760 src) && conn_wantpacket_v6(connp, ill, ip6h, 3761 flags, zoneid)) 3762 break; 3763 connp = connp->conn_next; 3764 } 3765 /* 3766 * Just copy the data part alone. The mctl part is 3767 * needed just for verifying policy and it is never 3768 * sent up. 3769 */ 3770 if (connp == NULL || 3771 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3772 ((first_mp1 = ip_copymsg(first_mp)) 3773 == NULL))) { 3774 /* 3775 * No more interested clients or memory 3776 * allocation failed 3777 */ 3778 connp = first_conn; 3779 break; 3780 } 3781 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3782 CONN_INC_REF(connp); 3783 mutex_exit(&connfp->connf_lock); 3784 /* 3785 * For link-local always add ifindex so that transport 3786 * can set sin6_scope_id. Avoid it for ICMP error 3787 * fanout. 3788 */ 3789 if ((connp->conn_ipv6_recvpktinfo || 3790 IN6_IS_ADDR_LINKLOCAL(&src)) && 3791 (flags & IP_FF_IP6INFO)) { 3792 /* Add header */ 3793 mp1 = ip_add_info_v6(mp1, inill, &dst); 3794 } 3795 if (mp1 == NULL) { 3796 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3797 goto next_one; 3798 } 3799 if (CONN_UDP_FLOWCTLD(connp)) { 3800 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3801 freemsg(mp1); 3802 goto next_one; 3803 } 3804 3805 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3806 secure) { 3807 first_mp1 = ipsec_check_inbound_policy 3808 (first_mp1, connp, NULL, ip6h, 3809 mctl_present); 3810 } 3811 if (first_mp1 != NULL) { 3812 if (mctl_present) 3813 freeb(first_mp1); 3814 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3815 3816 /* Send it upstream */ 3817 CONN_UDP_RECV(connp, mp1); 3818 } 3819 next_one: 3820 mutex_enter(&connfp->connf_lock); 3821 /* Follow the next pointer before releasing the conn. */ 3822 next_conn = connp->conn_next; 3823 IP6_STAT(ip6_udp_fanmb); 3824 CONN_DEC_REF(connp); 3825 connp = next_conn; 3826 } 3827 3828 /* Last one. Send it upstream. */ 3829 mutex_exit(&connfp->connf_lock); 3830 3831 /* Initiate IPPF processing */ 3832 if (IP6_IN_IPP(flags)) { 3833 uint_t ifindex; 3834 3835 mutex_enter(&ill->ill_lock); 3836 ifindex = ill->ill_phyint->phyint_ifindex; 3837 mutex_exit(&ill->ill_lock); 3838 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3839 if (mp == NULL) { 3840 CONN_DEC_REF(connp); 3841 if (mctl_present) { 3842 freeb(first_mp); 3843 } 3844 return; 3845 } 3846 } 3847 3848 /* 3849 * For link-local always add ifindex so that transport can set 3850 * sin6_scope_id. Avoid it for ICMP error fanout. 3851 */ 3852 if ((connp->conn_ipv6_recvpktinfo || 3853 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IP6INFO)) { 3854 /* Add header */ 3855 mp = ip_add_info_v6(mp, inill, &dst); 3856 if (mp == NULL) { 3857 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3858 CONN_DEC_REF(connp); 3859 if (mctl_present) 3860 freeb(first_mp); 3861 return; 3862 } else if (mctl_present) { 3863 first_mp->b_cont = mp; 3864 } else { 3865 first_mp = mp; 3866 } 3867 } 3868 if (CONN_UDP_FLOWCTLD(connp)) { 3869 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3870 freemsg(mp); 3871 } else { 3872 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3873 first_mp = ipsec_check_inbound_policy(first_mp, 3874 connp, NULL, ip6h, mctl_present); 3875 if (first_mp == NULL) { 3876 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3877 CONN_DEC_REF(connp); 3878 return; 3879 } 3880 } 3881 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3882 3883 /* Send it upstream */ 3884 CONN_UDP_RECV(connp, mp); 3885 } 3886 IP6_STAT(ip6_udp_fanmb); 3887 CONN_DEC_REF(connp); 3888 if (mctl_present) 3889 freeb(first_mp); 3890 return; 3891 3892 notfound: 3893 mutex_exit(&connfp->connf_lock); 3894 /* 3895 * No one bound to this port. Is 3896 * there a client that wants all 3897 * unclaimed datagrams? 3898 */ 3899 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3900 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3901 0, flags | IP_FF_RAWIP | IP_FF_IP6INFO, mctl_present, 3902 zoneid); 3903 } else { 3904 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3905 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3906 mctl_present, zoneid)) { 3907 BUMP_MIB(&ip_mib, udpNoPorts); 3908 } 3909 } 3910 } 3911 3912 /* 3913 * int ip_find_hdr_v6() 3914 * 3915 * This routine is used by the upper layer protocols and the IP tunnel 3916 * module to: 3917 * - Set extension header pointers to appropriate locations 3918 * - Determine IPv6 header length and return it 3919 * - Return a pointer to the last nexthdr value 3920 * 3921 * The caller must initialize ipp_fields. 3922 * 3923 * NOTE: If multiple extension headers of the same type are present, 3924 * ip_find_hdr_v6() will set the respective extension header pointers 3925 * to the first one that it encounters in the IPv6 header. It also 3926 * skips fragment headers. This routine deals with malformed packets 3927 * of various sorts in which case the returned length is up to the 3928 * malformed part. 3929 */ 3930 int 3931 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3932 { 3933 uint_t length, ehdrlen; 3934 uint8_t nexthdr; 3935 uint8_t *whereptr, *endptr; 3936 ip6_dest_t *tmpdstopts; 3937 ip6_rthdr_t *tmprthdr; 3938 ip6_hbh_t *tmphopopts; 3939 ip6_frag_t *tmpfraghdr; 3940 3941 length = IPV6_HDR_LEN; 3942 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3943 endptr = mp->b_wptr; 3944 3945 nexthdr = ip6h->ip6_nxt; 3946 while (whereptr < endptr) { 3947 /* Is there enough left for len + nexthdr? */ 3948 if (whereptr + MIN_EHDR_LEN > endptr) 3949 goto done; 3950 3951 switch (nexthdr) { 3952 case IPPROTO_HOPOPTS: 3953 tmphopopts = (ip6_hbh_t *)whereptr; 3954 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 3955 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 3956 goto done; 3957 nexthdr = tmphopopts->ip6h_nxt; 3958 /* return only 1st hbh */ 3959 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 3960 ipp->ipp_fields |= IPPF_HOPOPTS; 3961 ipp->ipp_hopopts = tmphopopts; 3962 ipp->ipp_hopoptslen = ehdrlen; 3963 } 3964 break; 3965 case IPPROTO_DSTOPTS: 3966 tmpdstopts = (ip6_dest_t *)whereptr; 3967 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 3968 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 3969 goto done; 3970 nexthdr = tmpdstopts->ip6d_nxt; 3971 /* 3972 * ipp_dstopts is set to the destination header after a 3973 * routing header. 3974 * Assume it is a post-rthdr destination header 3975 * and adjust when we find an rthdr. 3976 */ 3977 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 3978 ipp->ipp_fields |= IPPF_DSTOPTS; 3979 ipp->ipp_dstopts = tmpdstopts; 3980 ipp->ipp_dstoptslen = ehdrlen; 3981 } 3982 break; 3983 case IPPROTO_ROUTING: 3984 tmprthdr = (ip6_rthdr_t *)whereptr; 3985 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 3986 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 3987 goto done; 3988 nexthdr = tmprthdr->ip6r_nxt; 3989 /* return only 1st rthdr */ 3990 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 3991 ipp->ipp_fields |= IPPF_RTHDR; 3992 ipp->ipp_rthdr = tmprthdr; 3993 ipp->ipp_rthdrlen = ehdrlen; 3994 } 3995 /* 3996 * Make any destination header we've seen be a 3997 * pre-rthdr destination header. 3998 */ 3999 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4000 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4001 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4002 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4003 ipp->ipp_dstopts = NULL; 4004 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4005 ipp->ipp_dstoptslen = 0; 4006 } 4007 break; 4008 case IPPROTO_FRAGMENT: 4009 /* 4010 * Fragment headers are skipped. Currently, only 4011 * IP cares for their existence. If anyone other 4012 * than IP ever has the need to know about the 4013 * location of fragment headers, support can be 4014 * added to the ip6_pkt_t at that time. 4015 */ 4016 tmpfraghdr = (ip6_frag_t *)whereptr; 4017 ehdrlen = sizeof (ip6_frag_t); 4018 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4019 goto done; 4020 nexthdr = tmpfraghdr->ip6f_nxt; 4021 break; 4022 case IPPROTO_NONE: 4023 default: 4024 goto done; 4025 } 4026 length += ehdrlen; 4027 whereptr += ehdrlen; 4028 } 4029 done: 4030 if (nexthdrp != NULL) 4031 *nexthdrp = nexthdr; 4032 return (length); 4033 } 4034 4035 int 4036 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4037 { 4038 ire_t *ire; 4039 4040 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4041 ire = ire_lookup_local_v6(zoneid); 4042 if (ire == NULL) { 4043 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4044 return (1); 4045 } 4046 ip6h->ip6_src = ire->ire_addr_v6; 4047 ire_refrele(ire); 4048 } 4049 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4050 ip6h->ip6_hops = ipv6_def_hops; 4051 return (0); 4052 } 4053 4054 /* 4055 * Try to determine where and what are the IPv6 header length and 4056 * pointer to nexthdr value for the upper layer protocol (or an 4057 * unknown next hdr). 4058 * 4059 * Parameters returns a pointer to the nexthdr value; 4060 * Must handle malformed packets of various sorts. 4061 * Function returns failure for malformed cases. 4062 */ 4063 boolean_t 4064 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4065 uint8_t **nexthdrpp) 4066 { 4067 uint16_t length; 4068 uint_t ehdrlen; 4069 uint8_t *nexthdrp; 4070 uint8_t *whereptr; 4071 uint8_t *endptr; 4072 ip6_dest_t *desthdr; 4073 ip6_rthdr_t *rthdr; 4074 ip6_frag_t *fraghdr; 4075 4076 length = IPV6_HDR_LEN; 4077 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4078 endptr = mp->b_wptr; 4079 4080 nexthdrp = &ip6h->ip6_nxt; 4081 while (whereptr < endptr) { 4082 /* Is there enough left for len + nexthdr? */ 4083 if (whereptr + MIN_EHDR_LEN > endptr) 4084 break; 4085 4086 switch (*nexthdrp) { 4087 case IPPROTO_HOPOPTS: 4088 case IPPROTO_DSTOPTS: 4089 /* Assumes the headers are identical for hbh and dst */ 4090 desthdr = (ip6_dest_t *)whereptr; 4091 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4092 if ((uchar_t *)desthdr + ehdrlen > endptr) 4093 return (B_FALSE); 4094 nexthdrp = &desthdr->ip6d_nxt; 4095 break; 4096 case IPPROTO_ROUTING: 4097 rthdr = (ip6_rthdr_t *)whereptr; 4098 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4099 if ((uchar_t *)rthdr + ehdrlen > endptr) 4100 return (B_FALSE); 4101 nexthdrp = &rthdr->ip6r_nxt; 4102 break; 4103 case IPPROTO_FRAGMENT: 4104 fraghdr = (ip6_frag_t *)whereptr; 4105 ehdrlen = sizeof (ip6_frag_t); 4106 if ((uchar_t *)&fraghdr[1] > endptr) 4107 return (B_FALSE); 4108 nexthdrp = &fraghdr->ip6f_nxt; 4109 break; 4110 case IPPROTO_NONE: 4111 /* No next header means we're finished */ 4112 default: 4113 *hdr_length_ptr = length; 4114 *nexthdrpp = nexthdrp; 4115 return (B_TRUE); 4116 } 4117 length += ehdrlen; 4118 whereptr += ehdrlen; 4119 *hdr_length_ptr = length; 4120 *nexthdrpp = nexthdrp; 4121 } 4122 switch (*nexthdrp) { 4123 case IPPROTO_HOPOPTS: 4124 case IPPROTO_DSTOPTS: 4125 case IPPROTO_ROUTING: 4126 case IPPROTO_FRAGMENT: 4127 /* 4128 * If any know extension headers are still to be processed, 4129 * the packet's malformed (or at least all the IP header(s) are 4130 * not in the same mblk - and that should never happen. 4131 */ 4132 return (B_FALSE); 4133 4134 default: 4135 /* 4136 * If we get here, we know that all of the IP headers were in 4137 * the same mblk, even if the ULP header is in the next mblk. 4138 */ 4139 *hdr_length_ptr = length; 4140 *nexthdrpp = nexthdrp; 4141 return (B_TRUE); 4142 } 4143 } 4144 4145 /* 4146 * Return the length of the IPv6 related headers (including extension headers) 4147 * Returns a length even if the packet is malformed. 4148 */ 4149 int 4150 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4151 { 4152 uint16_t hdr_len; 4153 uint8_t *nexthdrp; 4154 4155 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4156 return (hdr_len); 4157 } 4158 4159 /* 4160 * Select an ill for the packet by considering load spreading across 4161 * a different ill in the group if dst_ill is part of some group. 4162 */ 4163 static ill_t * 4164 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4165 { 4166 ill_t *ill; 4167 4168 /* 4169 * We schedule irrespective of whether the source address is 4170 * INADDR_UNSPECIED or not. 4171 */ 4172 ill = illgrp_scheduler(dst_ill); 4173 if (ill == NULL) 4174 return (NULL); 4175 4176 /* 4177 * For groups with names ip_sioctl_groupname ensures that all 4178 * ills are of same type. For groups without names, ifgrp_insert 4179 * ensures this. 4180 */ 4181 ASSERT(dst_ill->ill_type == ill->ill_type); 4182 4183 return (ill); 4184 } 4185 4186 /* 4187 * IPv6 - 4188 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4189 * to send out a packet to a destination address for which we do not have 4190 * specific routing information. 4191 * 4192 * Handle non-multicast packets. If ill is non-NULL the match is done 4193 * for that ill. 4194 * 4195 * When a specific ill is specified (using IPV6_PKTINFO, 4196 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4197 * on routing entries (ftable and ctable) that have a matching 4198 * ire->ire_ipif->ipif_ill. Thus this can only be used 4199 * for destinations that are on-link for the specific ill 4200 * and that can appear on multiple links. Thus it is useful 4201 * for multicast destinations, link-local destinations, and 4202 * at some point perhaps for site-local destinations (if the 4203 * node sits at a site boundary). 4204 * We create the cache entries in the regular ctable since 4205 * it can not "confuse" things for other destinations. 4206 * table. 4207 * 4208 * When ill is part of a ill group, we subject the packets 4209 * to load spreading even if the ill is specified by the 4210 * means described above. We disable only for IPV6_BOUND_PIF 4211 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4212 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4213 * set. 4214 * 4215 * NOTE : These are the scopes of some of the variables that point at IRE, 4216 * which needs to be followed while making any future modifications 4217 * to avoid memory leaks. 4218 * 4219 * - ire and sire are the entries looked up initially by 4220 * ire_ftable_lookup_v6. 4221 * - ipif_ire is used to hold the interface ire associated with 4222 * the new cache ire. But it's scope is limited, so we always REFRELE 4223 * it before branching out to error paths. 4224 * - save_ire is initialized before ire_create, so that ire returned 4225 * by ire_create will not over-write the ire. We REFRELE save_ire 4226 * before breaking out of the switch. 4227 * 4228 * Thus on failures, we have to REFRELE only ire and sire, if they 4229 * are not NULL. 4230 * 4231 * v6srcp may be used in the future. Currently unused. 4232 */ 4233 /* ARGSUSED */ 4234 void 4235 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4236 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4237 { 4238 in6_addr_t v6gw; 4239 in6_addr_t dst; 4240 ire_t *ire = NULL; 4241 ipif_t *src_ipif = NULL; 4242 ill_t *dst_ill = NULL; 4243 ire_t *sire = NULL; 4244 ire_t *save_ire; 4245 mblk_t *dlureq_mp; 4246 ip6_t *ip6h; 4247 int err = 0; 4248 mblk_t *first_mp; 4249 ipsec_out_t *io; 4250 ill_t *attach_ill = NULL; 4251 ushort_t ire_marks = 0; 4252 int match_flags; 4253 boolean_t ip6i_present; 4254 ire_t *first_sire = NULL; 4255 mblk_t *copy_mp = NULL; 4256 mblk_t *xmit_mp = NULL; 4257 in6_addr_t save_dst; 4258 uint32_t multirt_flags = 4259 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4260 boolean_t multirt_is_resolvable; 4261 boolean_t multirt_resolve_next; 4262 boolean_t need_rele = B_FALSE; 4263 boolean_t do_attach_ill = B_FALSE; 4264 boolean_t ip6_asp_table_held = B_FALSE; 4265 4266 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4267 4268 first_mp = mp; 4269 if (mp->b_datap->db_type == M_CTL) { 4270 mp = mp->b_cont; 4271 io = (ipsec_out_t *)first_mp->b_rptr; 4272 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4273 } else { 4274 io = NULL; 4275 } 4276 4277 /* 4278 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4279 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4280 * could be NULL. 4281 * 4282 * This information can appear either in an ip6i_t or an IPSEC_OUT 4283 * message. 4284 */ 4285 ip6h = (ip6_t *)mp->b_rptr; 4286 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4287 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4288 if (!ip6i_present || 4289 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4290 attach_ill = ip_grab_attach_ill(ill, first_mp, 4291 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4292 io->ipsec_out_ill_index), B_TRUE); 4293 /* Failure case frees things for us. */ 4294 if (attach_ill == NULL) 4295 return; 4296 4297 /* 4298 * Check if we need an ire that will not be 4299 * looked up by anybody else i.e. HIDDEN. 4300 */ 4301 if (ill_is_probeonly(attach_ill)) 4302 ire_marks = IRE_MARK_HIDDEN; 4303 } 4304 } 4305 4306 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4307 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4308 goto icmp_err_ret; 4309 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4310 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4311 goto icmp_err_ret; 4312 } 4313 4314 /* 4315 * If this IRE is created for forwarding or it is not for 4316 * TCP traffic, mark it as temporary. 4317 * 4318 * Is it sufficient just to check the next header?? 4319 */ 4320 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4321 ire_marks |= IRE_MARK_TEMPORARY; 4322 4323 /* 4324 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4325 * chain until it gets the most specific information available. 4326 * For example, we know that there is no IRE_CACHE for this dest, 4327 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4328 * ire_ftable_lookup_v6 will look up the gateway, etc. 4329 */ 4330 4331 if (ill == NULL) { 4332 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4333 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE; 4334 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4335 NULL, &sire, zoneid, 0, match_flags); 4336 /* 4337 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4338 * in a NULL ill, but the packet could be a neighbor 4339 * solicitation/advertisment and could have a valid attach_ill. 4340 */ 4341 if (attach_ill != NULL) 4342 ill_refrele(attach_ill); 4343 } else { 4344 if (attach_ill != NULL) { 4345 /* 4346 * attach_ill is set only for communicating with 4347 * on-link hosts. So, don't look for DEFAULT. 4348 * ip_wput_v6 passes the right ill in this case and 4349 * hence we can assert. 4350 */ 4351 ASSERT(ill == attach_ill); 4352 ill_refrele(attach_ill); 4353 do_attach_ill = B_TRUE; 4354 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4355 } else { 4356 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4357 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4358 } 4359 match_flags |= MATCH_IRE_PARENT; 4360 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, ill->ill_ipif, 4361 &sire, zoneid, 0, match_flags); 4362 } 4363 4364 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4365 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4366 4367 if (zoneid == ALL_ZONES && ire != NULL) { 4368 /* 4369 * In the forwarding case, we can use a route from any zone 4370 * since we won't change the source address. We can easily 4371 * assert that the source address is already set when there's no 4372 * ip6_info header - otherwise we'd have to call pullupmsg(). 4373 */ 4374 ASSERT(ip6i_present || 4375 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4376 zoneid = ire->ire_zoneid; 4377 } 4378 4379 /* 4380 * We enter a loop that will be run only once in most cases. 4381 * The loop is re-entered in the case where the destination 4382 * can be reached through multiple RTF_MULTIRT-flagged routes. 4383 * The intention is to compute multiple routes to a single 4384 * destination in a single ip_newroute_v6 call. 4385 * The information is contained in sire->ire_flags. 4386 */ 4387 do { 4388 multirt_resolve_next = B_FALSE; 4389 4390 if (dst_ill != NULL) { 4391 ill_refrele(dst_ill); 4392 dst_ill = NULL; 4393 } 4394 if (src_ipif != NULL) { 4395 ipif_refrele(src_ipif); 4396 src_ipif = NULL; 4397 } 4398 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4399 ip3dbg(("ip_newroute_v6: starting new resolution " 4400 "with first_mp %p, tag %d\n", 4401 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4402 4403 /* 4404 * We check if there are trailing unresolved routes for 4405 * the destination contained in sire. 4406 */ 4407 multirt_is_resolvable = 4408 ire_multirt_lookup_v6(&ire, &sire, multirt_flags); 4409 4410 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4411 "ire %p, sire %p\n", 4412 multirt_is_resolvable, (void *)ire, (void *)sire)); 4413 4414 if (!multirt_is_resolvable) { 4415 /* 4416 * No more multirt routes to resolve; give up 4417 * (all routes resolved or no more resolvable 4418 * routes). 4419 */ 4420 if (ire != NULL) { 4421 ire_refrele(ire); 4422 ire = NULL; 4423 } 4424 } else { 4425 ASSERT(sire != NULL); 4426 ASSERT(ire != NULL); 4427 /* 4428 * We simply use first_sire as a flag that 4429 * indicates if a resolvable multirt route has 4430 * already been found during the preceding 4431 * loops. If it is not the case, we may have 4432 * to send an ICMP error to report that the 4433 * destination is unreachable. We do not 4434 * IRE_REFHOLD first_sire. 4435 */ 4436 if (first_sire == NULL) { 4437 first_sire = sire; 4438 } 4439 } 4440 } 4441 if ((ire == NULL) || (ire == sire)) { 4442 /* 4443 * either ire == NULL (the destination cannot be 4444 * resolved) or ire == sire (the gateway cannot be 4445 * resolved). At this point, there are no more routes 4446 * to resolve for the destination, thus we exit. 4447 */ 4448 if (ip_debug > 3) { 4449 /* ip2dbg */ 4450 pr_addr_dbg("ip_newroute_v6: " 4451 "can't resolve %s\n", AF_INET6, v6dstp); 4452 } 4453 ip3dbg(("ip_newroute_v6: " 4454 "ire %p, sire %p, first_sire %p\n", 4455 (void *)ire, (void *)sire, (void *)first_sire)); 4456 4457 if (sire != NULL) { 4458 ire_refrele(sire); 4459 sire = NULL; 4460 } 4461 4462 if (first_sire != NULL) { 4463 /* 4464 * At least one multirt route has been found 4465 * in the same ip_newroute() call; there is no 4466 * need to report an ICMP error. 4467 * first_sire was not IRE_REFHOLDed. 4468 */ 4469 MULTIRT_DEBUG_UNTAG(first_mp); 4470 freemsg(first_mp); 4471 return; 4472 } 4473 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4474 RTA_DST); 4475 goto icmp_err_ret; 4476 } 4477 4478 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4479 4480 /* 4481 * Verify that the returned IRE does not have either the 4482 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4483 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4484 */ 4485 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4486 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4487 goto icmp_err_ret; 4488 4489 /* 4490 * Increment the ire_ob_pkt_count field for ire if it is an 4491 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4492 * increment the same for the parent IRE, sire, if it is some 4493 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4494 * and HOST_REDIRECT). 4495 */ 4496 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4497 UPDATE_OB_PKT_COUNT(ire); 4498 ire->ire_last_used_time = lbolt; 4499 } 4500 4501 if (sire != NULL) { 4502 mutex_enter(&sire->ire_lock); 4503 v6gw = sire->ire_gateway_addr_v6; 4504 mutex_exit(&sire->ire_lock); 4505 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4506 IRE_INTERFACE)) == 0); 4507 UPDATE_OB_PKT_COUNT(sire); 4508 sire->ire_last_used_time = lbolt; 4509 } else { 4510 v6gw = ipv6_all_zeros; 4511 } 4512 4513 /* 4514 * We have a route to reach the destination. 4515 * 4516 * 1) If the interface is part of ill group, try to get a new 4517 * ill taking load spreading into account. 4518 * 4519 * 2) After selecting the ill, get a source address that might 4520 * create good inbound load spreading and that matches the 4521 * right scope. ipif_select_source_v6 does this for us. 4522 * 4523 * If the application specified the ill (ifindex), we still 4524 * load spread. Only if the packets needs to go out specifically 4525 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4526 * IPV6_BOUND_PIF we don't try to use a different ill for load 4527 * spreading. 4528 */ 4529 if (!do_attach_ill) { 4530 /* 4531 * If the interface belongs to an interface group, 4532 * make sure the next possible interface in the group 4533 * is used. This encourages load spreading among 4534 * peers in an interface group. However, in the case 4535 * of multirouting, load spreading is not used, as we 4536 * actually want to replicate outgoing packets through 4537 * particular interfaces. 4538 * 4539 * Note: While we pick a dst_ill we are really only 4540 * interested in the ill for load spreading. 4541 * The source ipif is determined by source address 4542 * selection below. 4543 */ 4544 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4545 dst_ill = ire->ire_ipif->ipif_ill; 4546 /* For uniformity do a refhold */ 4547 ill_refhold(dst_ill); 4548 } else { 4549 /* 4550 * If we are here trying to create an IRE_CACHE 4551 * for an offlink destination and have the 4552 * IRE_CACHE for the next hop and the latter is 4553 * using virtual IP source address selection i.e 4554 * it's ire->ire_ipif is pointing to a virtual 4555 * network interface (vni) then 4556 * ip_newroute_get_dst_ll() will return the vni 4557 * interface as the dst_ill. Since the vni is 4558 * virtual i.e not associated with any physical 4559 * interface, it cannot be the dst_ill, hence 4560 * in such a case call ip_newroute_get_dst_ll() 4561 * with the stq_ill instead of the ire_ipif ILL. 4562 * The function returns a refheld ill. 4563 */ 4564 if ((ire->ire_type == IRE_CACHE) && 4565 IS_VNI(ire->ire_ipif->ipif_ill)) 4566 dst_ill = ip_newroute_get_dst_ill_v6( 4567 ire->ire_stq->q_ptr); 4568 else 4569 dst_ill = ip_newroute_get_dst_ill_v6( 4570 ire->ire_ipif->ipif_ill); 4571 } 4572 if (dst_ill == NULL) { 4573 if (ip_debug > 2) { 4574 pr_addr_dbg("ip_newroute_v6 : no dst " 4575 "ill for dst %s\n", 4576 AF_INET6, v6dstp); 4577 } 4578 goto icmp_err_ret; 4579 } else if (dst_ill->ill_group == NULL && ill != NULL && 4580 dst_ill != ill) { 4581 /* 4582 * If "ill" is not part of any group, we should 4583 * have found a route matching "ill" as we 4584 * called ire_ftable_lookup_v6 with 4585 * MATCH_IRE_ILL_GROUP. 4586 * Rather than asserting when there is a 4587 * mismatch, we just drop the packet. 4588 */ 4589 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4590 "dst_ill %s ill %s\n", 4591 dst_ill->ill_name, 4592 ill->ill_name)); 4593 goto icmp_err_ret; 4594 } 4595 } else { 4596 dst_ill = ire->ire_ipif->ipif_ill; 4597 /* For uniformity do refhold */ 4598 ill_refhold(dst_ill); 4599 /* 4600 * We should have found a route matching ill as we 4601 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4602 * Rather than asserting, while there is a mismatch, 4603 * we just drop the packet. 4604 */ 4605 if (dst_ill != ill) { 4606 ip0dbg(("ip_newroute_v6: Packet dropped as " 4607 "IP6I_ATTACH_IF ill is %s, " 4608 "ire->ire_ipif->ipif_ill is %s\n", 4609 ill->ill_name, 4610 dst_ill->ill_name)); 4611 goto icmp_err_ret; 4612 } 4613 } 4614 /* 4615 * Pick a source address which matches the scope of the 4616 * destination address. 4617 * For RTF_SETSRC routes, the source address is imposed by the 4618 * parent ire (sire). 4619 */ 4620 ASSERT(src_ipif == NULL); 4621 if (ire->ire_type == IRE_IF_RESOLVER && 4622 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4623 ip6_asp_can_lookup()) { 4624 /* 4625 * The ire cache entry we're adding is for the 4626 * gateway itself. The source address in this case 4627 * is relative to the gateway's address. 4628 */ 4629 ip6_asp_table_held = B_TRUE; 4630 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4631 B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid); 4632 if (src_ipif != NULL) 4633 ire_marks |= IRE_MARK_USESRC_CHECK; 4634 } else { 4635 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4636 /* 4637 * Check that the ipif matching the requested 4638 * source address still exists. 4639 */ 4640 src_ipif = ipif_lookup_addr_v6( 4641 &sire->ire_src_addr_v6, NULL, zoneid, 4642 NULL, NULL, NULL, NULL); 4643 } 4644 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4645 ip6_asp_table_held = B_TRUE; 4646 src_ipif = ipif_select_source_v6(dst_ill, 4647 v6dstp, B_FALSE, IPV6_PREFER_SRC_DEFAULT, 4648 zoneid); 4649 if (src_ipif != NULL) 4650 ire_marks |= IRE_MARK_USESRC_CHECK; 4651 } 4652 } 4653 4654 if (src_ipif == NULL) { 4655 if (ip_debug > 2) { 4656 /* ip1dbg */ 4657 pr_addr_dbg("ip_newroute_v6: no src for " 4658 "dst %s\n, ", AF_INET6, v6dstp); 4659 printf("ip_newroute_v6: interface name %s\n", 4660 dst_ill->ill_name); 4661 } 4662 goto icmp_err_ret; 4663 } 4664 4665 if (ip_debug > 3) { 4666 /* ip2dbg */ 4667 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4668 AF_INET6, &v6gw); 4669 } 4670 ip2dbg(("\tire type %s (%d)\n", 4671 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4672 4673 /* 4674 * At this point in ip_newroute_v6(), ire is either the 4675 * IRE_CACHE of the next-hop gateway for an off-subnet 4676 * destination or an IRE_INTERFACE type that should be used 4677 * to resolve an on-subnet destination or an on-subnet 4678 * next-hop gateway. 4679 * 4680 * In the IRE_CACHE case, we have the following : 4681 * 4682 * 1) src_ipif - used for getting a source address. 4683 * 4684 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4685 * means packets using this IRE_CACHE will go out on dst_ill. 4686 * 4687 * 3) The IRE sire will point to the prefix that is the longest 4688 * matching route for the destination. These prefix types 4689 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST, and 4690 * IRE_HOST_REDIRECT. 4691 * 4692 * The newly created IRE_CACHE entry for the off-subnet 4693 * destination is tied to both the prefix route and the 4694 * interface route used to resolve the next-hop gateway 4695 * via the ire_phandle and ire_ihandle fields, respectively. 4696 * 4697 * In the IRE_INTERFACE case, we have the following : 4698 * 4699 * 1) src_ipif - used for getting a source address. 4700 * 4701 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4702 * means packets using the IRE_CACHE that we will build 4703 * here will go out on dst_ill. 4704 * 4705 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4706 * to be created will only be tied to the IRE_INTERFACE that 4707 * was derived from the ire_ihandle field. 4708 * 4709 * If sire is non-NULL, it means the destination is off-link 4710 * and we will first create the IRE_CACHE for the gateway. 4711 * Next time through ip_newroute_v6, we will create the 4712 * IRE_CACHE for the final destination as described above. 4713 */ 4714 save_ire = ire; 4715 switch (ire->ire_type) { 4716 case IRE_CACHE: { 4717 ire_t *ipif_ire; 4718 4719 ASSERT(sire != NULL); 4720 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4721 mutex_enter(&ire->ire_lock); 4722 v6gw = ire->ire_gateway_addr_v6; 4723 mutex_exit(&ire->ire_lock); 4724 } 4725 /* 4726 * We need 3 ire's to create a new cache ire for an 4727 * off-link destination from the cache ire of the 4728 * gateway. 4729 * 4730 * 1. The prefix ire 'sire' 4731 * 2. The cache ire of the gateway 'ire' 4732 * 3. The interface ire 'ipif_ire' 4733 * 4734 * We have (1) and (2). We lookup (3) below. 4735 * 4736 * If there is no interface route to the gateway, 4737 * it is a race condition, where we found the cache 4738 * but the inteface route has been deleted. 4739 */ 4740 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4741 if (ipif_ire == NULL) { 4742 ip1dbg(("ip_newroute_v6:" 4743 "ire_ihandle_lookup_offlink_v6 failed\n")); 4744 goto icmp_err_ret; 4745 } 4746 /* 4747 * Assume DL_UNITDATA_REQ is same for all physical 4748 * interfaces in the ifgrp. If it isn't, this code will 4749 * have to be seriously rewhacked to allow the 4750 * fastpath probing (such that I cache the link 4751 * header in the IRE_CACHE) to work over ifgrps. 4752 * We have what we need to build an IRE_CACHE. 4753 */ 4754 /* 4755 * Note: the new ire inherits RTF_SETSRC 4756 * and RTF_MULTIRT to propagate these flags from prefix 4757 * to cache. 4758 */ 4759 ire = ire_create_v6( 4760 v6dstp, /* dest address */ 4761 &ipv6_all_ones, /* mask */ 4762 &src_ipif->ipif_v6src_addr, /* source address */ 4763 &v6gw, /* gateway address */ 4764 &save_ire->ire_max_frag, 4765 NULL, /* Fast Path header */ 4766 dst_ill->ill_rq, /* recv-from queue */ 4767 dst_ill->ill_wq, /* send-to queue */ 4768 IRE_CACHE, 4769 NULL, 4770 src_ipif, 4771 &sire->ire_mask_v6, /* Parent mask */ 4772 sire->ire_phandle, /* Parent handle */ 4773 ipif_ire->ire_ihandle, /* Interface handle */ 4774 sire->ire_flags & /* flags if any */ 4775 (RTF_SETSRC | RTF_MULTIRT), 4776 &(sire->ire_uinfo)); 4777 4778 if (ire == NULL) { 4779 ire_refrele(save_ire); 4780 ire_refrele(ipif_ire); 4781 break; 4782 } 4783 ire->ire_marks |= ire_marks; 4784 4785 /* 4786 * Prevent sire and ipif_ire from getting deleted. The 4787 * newly created ire is tied to both of them via the 4788 * phandle and ihandle respectively. 4789 */ 4790 IRB_REFHOLD(sire->ire_bucket); 4791 /* Has it been removed already ? */ 4792 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4793 IRB_REFRELE(sire->ire_bucket); 4794 ire_refrele(ipif_ire); 4795 ire_refrele(save_ire); 4796 break; 4797 } 4798 4799 IRB_REFHOLD(ipif_ire->ire_bucket); 4800 /* Has it been removed already ? */ 4801 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4802 IRB_REFRELE(ipif_ire->ire_bucket); 4803 IRB_REFRELE(sire->ire_bucket); 4804 ire_refrele(ipif_ire); 4805 ire_refrele(save_ire); 4806 break; 4807 } 4808 4809 xmit_mp = first_mp; 4810 if (ire->ire_flags & RTF_MULTIRT) { 4811 copy_mp = copymsg(first_mp); 4812 if (copy_mp != NULL) { 4813 xmit_mp = copy_mp; 4814 MULTIRT_DEBUG_TAG(first_mp); 4815 } 4816 } 4817 ire_add_then_send(q, ire, xmit_mp); 4818 if (ip6_asp_table_held) { 4819 ip6_asp_table_refrele(); 4820 ip6_asp_table_held = B_FALSE; 4821 } 4822 ire_refrele(save_ire); 4823 4824 /* Assert that sire is not deleted yet. */ 4825 ASSERT(sire->ire_ptpn != NULL); 4826 IRB_REFRELE(sire->ire_bucket); 4827 4828 /* Assert that ipif_ire is not deleted yet. */ 4829 ASSERT(ipif_ire->ire_ptpn != NULL); 4830 IRB_REFRELE(ipif_ire->ire_bucket); 4831 ire_refrele(ipif_ire); 4832 4833 if (copy_mp != NULL) { 4834 /* 4835 * Search for the next unresolved 4836 * multirt route. 4837 */ 4838 copy_mp = NULL; 4839 ipif_ire = NULL; 4840 ire = NULL; 4841 /* re-enter the loop */ 4842 multirt_resolve_next = B_TRUE; 4843 continue; 4844 } 4845 ire_refrele(sire); 4846 ill_refrele(dst_ill); 4847 ipif_refrele(src_ipif); 4848 return; 4849 } 4850 case IRE_IF_NORESOLVER: 4851 /* 4852 * We have what we need to build an IRE_CACHE. 4853 * 4854 * Create a new dlureq_mp with the IPv6 gateway 4855 * address in destination address in the DLPI hdr 4856 * if the physical length is exactly 16 bytes. 4857 */ 4858 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 4859 const in6_addr_t *addr; 4860 4861 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4862 addr = &v6gw; 4863 else 4864 addr = v6dstp; 4865 4866 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 4867 dst_ill->ill_phys_addr_length, 4868 dst_ill->ill_sap, 4869 dst_ill->ill_sap_length); 4870 } else { 4871 dlureq_mp = ire->ire_dlureq_mp; 4872 } 4873 if (dlureq_mp == NULL) 4874 break; 4875 4876 /* 4877 * Note: the new ire inherits sire flags RTF_SETSRC 4878 * and RTF_MULTIRT to propagate those rules from prefix 4879 * to cache. 4880 */ 4881 ire = ire_create_v6( 4882 v6dstp, /* dest address */ 4883 &ipv6_all_ones, /* mask */ 4884 &src_ipif->ipif_v6src_addr, /* source address */ 4885 &v6gw, /* gateway address */ 4886 &save_ire->ire_max_frag, 4887 NULL, /* Fast Path header */ 4888 dst_ill->ill_rq, /* recv-from queue */ 4889 dst_ill->ill_wq, /* send-to queue */ 4890 IRE_CACHE, 4891 dlureq_mp, 4892 src_ipif, 4893 &save_ire->ire_mask_v6, /* Parent mask */ 4894 (sire != NULL) ? /* Parent handle */ 4895 sire->ire_phandle : 0, 4896 save_ire->ire_ihandle, /* Interface handle */ 4897 (sire != NULL) ? /* flags if any */ 4898 sire->ire_flags & 4899 (RTF_SETSRC | RTF_MULTIRT) : 0, 4900 &(save_ire->ire_uinfo)); 4901 4902 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 4903 freeb(dlureq_mp); 4904 4905 if (ire == NULL) { 4906 ire_refrele(save_ire); 4907 break; 4908 } 4909 4910 ire->ire_marks |= ire_marks; 4911 4912 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4913 dst = v6gw; 4914 else 4915 dst = *v6dstp; 4916 err = ndp_noresolver(dst_ill, &dst); 4917 if (err != 0) { 4918 ire_refrele(save_ire); 4919 break; 4920 } 4921 4922 /* Prevent save_ire from getting deleted */ 4923 IRB_REFHOLD(save_ire->ire_bucket); 4924 /* Has it been removed already ? */ 4925 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 4926 IRB_REFRELE(save_ire->ire_bucket); 4927 ire_refrele(save_ire); 4928 break; 4929 } 4930 4931 xmit_mp = first_mp; 4932 /* 4933 * In case of MULTIRT, a copy of the current packet 4934 * to send is made to further re-enter the 4935 * loop and attempt another route resolution 4936 */ 4937 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4938 copy_mp = copymsg(first_mp); 4939 if (copy_mp != NULL) { 4940 xmit_mp = copy_mp; 4941 MULTIRT_DEBUG_TAG(first_mp); 4942 } 4943 } 4944 ire_add_then_send(q, ire, xmit_mp); 4945 if (ip6_asp_table_held) { 4946 ip6_asp_table_refrele(); 4947 ip6_asp_table_held = B_FALSE; 4948 } 4949 4950 /* Assert that it is not deleted yet. */ 4951 ASSERT(save_ire->ire_ptpn != NULL); 4952 IRB_REFRELE(save_ire->ire_bucket); 4953 ire_refrele(save_ire); 4954 4955 if (copy_mp != NULL) { 4956 /* 4957 * If we found a (no)resolver, we ignore any 4958 * trailing top priority IRE_CACHE in 4959 * further loops. This ensures that we do not 4960 * omit any (no)resolver despite the priority 4961 * in this call. 4962 * IRE_CACHE, if any, will be processed 4963 * by another thread entering ip_newroute(), 4964 * (on resolver response, for example). 4965 * We use this to force multiple parallel 4966 * resolution as soon as a packet needs to be 4967 * sent. The result is, after one packet 4968 * emission all reachable routes are generally 4969 * resolved. 4970 * Otherwise, complete resolution of MULTIRT 4971 * routes would require several emissions as 4972 * side effect. 4973 */ 4974 multirt_flags &= ~MULTIRT_CACHEGW; 4975 4976 /* 4977 * Search for the next unresolved multirt 4978 * route. 4979 */ 4980 copy_mp = NULL; 4981 save_ire = NULL; 4982 ire = NULL; 4983 /* re-enter the loop */ 4984 multirt_resolve_next = B_TRUE; 4985 continue; 4986 } 4987 4988 /* Don't need sire anymore */ 4989 if (sire != NULL) 4990 ire_refrele(sire); 4991 ill_refrele(dst_ill); 4992 ipif_refrele(src_ipif); 4993 return; 4994 4995 case IRE_IF_RESOLVER: 4996 /* 4997 * We can't build an IRE_CACHE yet, but at least we 4998 * found a resolver that can help. 4999 */ 5000 dst = *v6dstp; 5001 /* 5002 * To be at this point in the code with a non-zero gw 5003 * means that dst is reachable through a gateway that 5004 * we have never resolved. By changing dst to the gw 5005 * addr we resolve the gateway first. When 5006 * ire_add_then_send() tries to put the IP dg to dst, 5007 * it will reenter ip_newroute() at which time we will 5008 * find the IRE_CACHE for the gw and create another 5009 * IRE_CACHE above (for dst itself). 5010 */ 5011 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5012 save_dst = dst; 5013 dst = v6gw; 5014 v6gw = ipv6_all_zeros; 5015 } 5016 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5017 /* 5018 * Ask the external resolver to do its thing. 5019 * Make an mblk chain in the following form: 5020 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5021 */ 5022 mblk_t *ire_mp; 5023 mblk_t *areq_mp; 5024 areq_t *areq; 5025 in6_addr_t *addrp; 5026 5027 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5028 if (ip6_asp_table_held) { 5029 ip6_asp_table_refrele(); 5030 ip6_asp_table_held = B_FALSE; 5031 } 5032 ire = ire_create_mp_v6( 5033 &dst, /* dest address */ 5034 &ipv6_all_ones, /* mask */ 5035 &src_ipif->ipif_v6src_addr, 5036 /* source address */ 5037 &v6gw, /* gateway address */ 5038 NULL, /* Fast Path header */ 5039 dst_ill->ill_rq, /* recv-from queue */ 5040 dst_ill->ill_wq, /* send-to queue */ 5041 IRE_CACHE, 5042 NULL, 5043 src_ipif, 5044 &save_ire->ire_mask_v6, 5045 /* Parent mask */ 5046 0, 5047 save_ire->ire_ihandle, 5048 /* Interface handle */ 5049 0, /* flags if any */ 5050 &(save_ire->ire_uinfo)); 5051 5052 ire_refrele(save_ire); 5053 if (ire == NULL) { 5054 ip1dbg(("ip_newroute_v6:" 5055 "ire is NULL\n")); 5056 break; 5057 } 5058 if ((sire != NULL) && 5059 (sire->ire_flags & RTF_MULTIRT)) { 5060 /* 5061 * processing a copy of the packet to 5062 * send for further resolution loops 5063 */ 5064 copy_mp = copymsg(first_mp); 5065 if (copy_mp != NULL) 5066 MULTIRT_DEBUG_TAG(copy_mp); 5067 } 5068 ire->ire_marks |= ire_marks; 5069 ire_mp = ire->ire_mp; 5070 /* 5071 * Now create or find an nce for this interface. 5072 * The hw addr will need to to be set from 5073 * the reply to the AR_ENTRY_QUERY that 5074 * we're about to send. This will be done in 5075 * ire_add_v6(). 5076 */ 5077 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5078 switch (err) { 5079 case 0: 5080 /* 5081 * New cache entry created. 5082 * Break, then ask the external 5083 * resolver. 5084 */ 5085 break; 5086 case EINPROGRESS: 5087 /* 5088 * Resolution in progress; 5089 * packet has been queued by 5090 * ndp_resolver(). 5091 */ 5092 ire_delete(ire); 5093 ire = NULL; 5094 /* 5095 * Check if another multirt 5096 * route must be resolved. 5097 */ 5098 if (copy_mp != NULL) { 5099 /* 5100 * If we found a resolver, we 5101 * ignore any trailing top 5102 * priority IRE_CACHE in 5103 * further loops. The reason is 5104 * the same as for noresolver. 5105 */ 5106 multirt_flags &= 5107 ~MULTIRT_CACHEGW; 5108 /* 5109 * Search for the next 5110 * unresolved multirt route. 5111 */ 5112 first_mp = copy_mp; 5113 copy_mp = NULL; 5114 mp = first_mp; 5115 if (mp->b_datap->db_type == 5116 M_CTL) { 5117 mp = mp->b_cont; 5118 } 5119 ASSERT(sire != NULL); 5120 dst = save_dst; 5121 /* 5122 * re-enter the loop 5123 */ 5124 multirt_resolve_next = 5125 B_TRUE; 5126 continue; 5127 } 5128 5129 if (sire != NULL) 5130 ire_refrele(sire); 5131 ill_refrele(dst_ill); 5132 ipif_refrele(src_ipif); 5133 return; 5134 default: 5135 /* 5136 * Transient error; packet will be 5137 * freed. 5138 */ 5139 ire_delete(ire); 5140 ire = NULL; 5141 break; 5142 } 5143 if (err != 0) 5144 break; 5145 /* 5146 * Now set up the AR_ENTRY_QUERY and send it. 5147 */ 5148 areq_mp = ill_arp_alloc(dst_ill, 5149 (uchar_t *)&ipv6_areq_template, 5150 (caddr_t)&dst); 5151 if (areq_mp == NULL) { 5152 ip1dbg(("ip_newroute_v6:" 5153 "areq_mp is NULL\n")); 5154 freemsg(ire_mp); 5155 break; 5156 } 5157 areq = (areq_t *)areq_mp->b_rptr; 5158 addrp = (in6_addr_t *)((char *)areq + 5159 areq->areq_target_addr_offset); 5160 *addrp = dst; 5161 addrp = (in6_addr_t *)((char *)areq + 5162 areq->areq_sender_addr_offset); 5163 *addrp = src_ipif->ipif_v6src_addr; 5164 /* 5165 * link the chain, then send up to the resolver. 5166 */ 5167 linkb(areq_mp, ire_mp); 5168 linkb(areq_mp, mp); 5169 ip1dbg(("ip_newroute_v6:" 5170 "putnext to resolver\n")); 5171 putnext(dst_ill->ill_rq, areq_mp); 5172 /* 5173 * Check if another multirt route 5174 * must be resolved. 5175 */ 5176 ire = NULL; 5177 if (copy_mp != NULL) { 5178 /* 5179 * If we find a resolver, we ignore any 5180 * trailing top priority IRE_CACHE in 5181 * further loops. The reason is the 5182 * same as for noresolver. 5183 */ 5184 multirt_flags &= ~MULTIRT_CACHEGW; 5185 /* 5186 * Search for the next unresolved 5187 * multirt route. 5188 */ 5189 first_mp = copy_mp; 5190 copy_mp = NULL; 5191 mp = first_mp; 5192 if (mp->b_datap->db_type == M_CTL) { 5193 mp = mp->b_cont; 5194 } 5195 ASSERT(sire != NULL); 5196 dst = save_dst; 5197 /* 5198 * re-enter the loop 5199 */ 5200 multirt_resolve_next = B_TRUE; 5201 continue; 5202 } 5203 5204 if (sire != NULL) 5205 ire_refrele(sire); 5206 ill_refrele(dst_ill); 5207 ipif_refrele(src_ipif); 5208 return; 5209 } 5210 /* 5211 * Non-external resolver case. 5212 */ 5213 ire = ire_create_v6( 5214 &dst, /* dest address */ 5215 &ipv6_all_ones, /* mask */ 5216 &src_ipif->ipif_v6src_addr, /* source address */ 5217 &v6gw, /* gateway address */ 5218 &save_ire->ire_max_frag, 5219 NULL, /* Fast Path header */ 5220 dst_ill->ill_rq, /* recv-from queue */ 5221 dst_ill->ill_wq, /* send-to queue */ 5222 IRE_CACHE, 5223 NULL, 5224 src_ipif, 5225 &save_ire->ire_mask_v6, /* Parent mask */ 5226 0, 5227 save_ire->ire_ihandle, /* Interface handle */ 5228 0, /* flags if any */ 5229 &(save_ire->ire_uinfo)); 5230 5231 if (ire == NULL) { 5232 ire_refrele(save_ire); 5233 break; 5234 } 5235 5236 if ((sire != NULL) && 5237 (sire->ire_flags & RTF_MULTIRT)) { 5238 copy_mp = copymsg(first_mp); 5239 if (copy_mp != NULL) 5240 MULTIRT_DEBUG_TAG(copy_mp); 5241 } 5242 5243 ire->ire_marks |= ire_marks; 5244 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5245 switch (err) { 5246 case 0: 5247 /* Prevent save_ire from getting deleted */ 5248 IRB_REFHOLD(save_ire->ire_bucket); 5249 /* Has it been removed already ? */ 5250 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5251 IRB_REFRELE(save_ire->ire_bucket); 5252 ire_refrele(save_ire); 5253 break; 5254 } 5255 5256 /* 5257 * We have a resolved cache entry, 5258 * add in the IRE. 5259 */ 5260 ire_add_then_send(q, ire, first_mp); 5261 if (ip6_asp_table_held) { 5262 ip6_asp_table_refrele(); 5263 ip6_asp_table_held = B_FALSE; 5264 } 5265 5266 /* Assert that it is not deleted yet. */ 5267 ASSERT(save_ire->ire_ptpn != NULL); 5268 IRB_REFRELE(save_ire->ire_bucket); 5269 ire_refrele(save_ire); 5270 /* 5271 * Check if another multirt route 5272 * must be resolved. 5273 */ 5274 ire = NULL; 5275 if (copy_mp != NULL) { 5276 /* 5277 * If we find a resolver, we ignore any 5278 * trailing top priority IRE_CACHE in 5279 * further loops. The reason is the 5280 * same as for noresolver. 5281 */ 5282 multirt_flags &= ~MULTIRT_CACHEGW; 5283 /* 5284 * Search for the next unresolved 5285 * multirt route. 5286 */ 5287 first_mp = copy_mp; 5288 copy_mp = NULL; 5289 mp = first_mp; 5290 if (mp->b_datap->db_type == M_CTL) { 5291 mp = mp->b_cont; 5292 } 5293 ASSERT(sire != NULL); 5294 dst = save_dst; 5295 /* 5296 * re-enter the loop 5297 */ 5298 multirt_resolve_next = B_TRUE; 5299 continue; 5300 } 5301 5302 if (sire != NULL) 5303 ire_refrele(sire); 5304 ill_refrele(dst_ill); 5305 ipif_refrele(src_ipif); 5306 return; 5307 5308 case EINPROGRESS: 5309 /* 5310 * mp was consumed - presumably queued. 5311 * No need for ire, presumably resolution is 5312 * in progress, and ire will be added when the 5313 * address is resolved. 5314 */ 5315 if (ip6_asp_table_held) { 5316 ip6_asp_table_refrele(); 5317 ip6_asp_table_held = B_FALSE; 5318 } 5319 ASSERT(ire->ire_nce == NULL); 5320 ire_delete(ire); 5321 ire_refrele(save_ire); 5322 /* 5323 * Check if another multirt route 5324 * must be resolved. 5325 */ 5326 ire = NULL; 5327 if (copy_mp != NULL) { 5328 /* 5329 * If we find a resolver, we ignore any 5330 * trailing top priority IRE_CACHE in 5331 * further loops. The reason is the 5332 * same as for noresolver. 5333 */ 5334 multirt_flags &= ~MULTIRT_CACHEGW; 5335 /* 5336 * Search for the next unresolved 5337 * multirt route. 5338 */ 5339 first_mp = copy_mp; 5340 copy_mp = NULL; 5341 mp = first_mp; 5342 if (mp->b_datap->db_type == M_CTL) { 5343 mp = mp->b_cont; 5344 } 5345 ASSERT(sire != NULL); 5346 dst = save_dst; 5347 /* 5348 * re-enter the loop 5349 */ 5350 multirt_resolve_next = B_TRUE; 5351 continue; 5352 } 5353 if (sire != NULL) 5354 ire_refrele(sire); 5355 ill_refrele(dst_ill); 5356 ipif_refrele(src_ipif); 5357 return; 5358 default: 5359 /* Some transient error */ 5360 ASSERT(ire->ire_nce == NULL); 5361 ire_refrele(save_ire); 5362 break; 5363 } 5364 break; 5365 default: 5366 break; 5367 } 5368 if (ip6_asp_table_held) { 5369 ip6_asp_table_refrele(); 5370 ip6_asp_table_held = B_FALSE; 5371 } 5372 } while (multirt_resolve_next); 5373 5374 err_ret: 5375 ip1dbg(("ip_newroute_v6: dropped\n")); 5376 if (src_ipif != NULL) 5377 ipif_refrele(src_ipif); 5378 if (dst_ill != NULL) { 5379 need_rele = B_TRUE; 5380 ill = dst_ill; 5381 } 5382 if (ill != NULL) { 5383 if (mp->b_prev != NULL) { 5384 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 5385 } else { 5386 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 5387 } 5388 5389 if (need_rele) 5390 ill_refrele(ill); 5391 } else { 5392 if (mp->b_prev != NULL) { 5393 BUMP_MIB(&ip6_mib, ipv6InDiscards); 5394 } else { 5395 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 5396 } 5397 } 5398 /* Did this packet originate externally? */ 5399 if (mp->b_prev) { 5400 mp->b_next = NULL; 5401 mp->b_prev = NULL; 5402 } 5403 if (copy_mp != NULL) { 5404 MULTIRT_DEBUG_UNTAG(copy_mp); 5405 freemsg(copy_mp); 5406 } 5407 MULTIRT_DEBUG_UNTAG(first_mp); 5408 freemsg(first_mp); 5409 if (ire != NULL) 5410 ire_refrele(ire); 5411 if (sire != NULL) 5412 ire_refrele(sire); 5413 return; 5414 5415 icmp_err_ret: 5416 if (ip6_asp_table_held) 5417 ip6_asp_table_refrele(); 5418 if (src_ipif != NULL) 5419 ipif_refrele(src_ipif); 5420 if (dst_ill != NULL) { 5421 need_rele = B_TRUE; 5422 ill = dst_ill; 5423 } 5424 ip1dbg(("ip_newroute_v6: no route\n")); 5425 if (sire != NULL) 5426 ire_refrele(sire); 5427 /* 5428 * We need to set sire to NULL to avoid double freeing if we 5429 * ever goto err_ret from below. 5430 */ 5431 sire = NULL; 5432 ip6h = (ip6_t *)mp->b_rptr; 5433 /* Skip ip6i_t header if present */ 5434 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5435 /* Make sure the IPv6 header is present */ 5436 if ((mp->b_wptr - (uchar_t *)ip6h) < 5437 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5438 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5439 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5440 goto err_ret; 5441 } 5442 } 5443 mp->b_rptr += sizeof (ip6i_t); 5444 ip6h = (ip6_t *)mp->b_rptr; 5445 } 5446 /* Did this packet originate externally? */ 5447 if (mp->b_prev) { 5448 if (ill != NULL) { 5449 BUMP_MIB(ill->ill_ip6_mib, ipv6InNoRoutes); 5450 } else { 5451 BUMP_MIB(&ip6_mib, ipv6InNoRoutes); 5452 } 5453 mp->b_next = NULL; 5454 mp->b_prev = NULL; 5455 q = WR(q); 5456 } else { 5457 if (ill != NULL) { 5458 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 5459 } else { 5460 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 5461 } 5462 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5463 /* Failed */ 5464 if (copy_mp != NULL) { 5465 MULTIRT_DEBUG_UNTAG(copy_mp); 5466 freemsg(copy_mp); 5467 } 5468 MULTIRT_DEBUG_UNTAG(first_mp); 5469 freemsg(first_mp); 5470 if (ire != NULL) 5471 ire_refrele(ire); 5472 if (need_rele) 5473 ill_refrele(ill); 5474 return; 5475 } 5476 } 5477 5478 if (need_rele) 5479 ill_refrele(ill); 5480 5481 /* 5482 * At this point we will have ire only if RTF_BLACKHOLE 5483 * or RTF_REJECT flags are set on the IRE. It will not 5484 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5485 */ 5486 if (ire != NULL) { 5487 if (ire->ire_flags & RTF_BLACKHOLE) { 5488 ire_refrele(ire); 5489 if (copy_mp != NULL) { 5490 MULTIRT_DEBUG_UNTAG(copy_mp); 5491 freemsg(copy_mp); 5492 } 5493 MULTIRT_DEBUG_UNTAG(first_mp); 5494 freemsg(first_mp); 5495 return; 5496 } 5497 ire_refrele(ire); 5498 } 5499 if (ip_debug > 3) { 5500 /* ip2dbg */ 5501 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5502 AF_INET6, v6dstp); 5503 } 5504 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5505 B_FALSE, B_FALSE); 5506 } 5507 5508 /* 5509 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5510 * we need to send out a packet to a destination address for which we do not 5511 * have specific routing information. It is only used for multicast packets. 5512 * 5513 * If unspec_src we allow creating an IRE with source address zero. 5514 * ire_send_v6() will delete it after the packet is sent. 5515 */ 5516 void 5517 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5518 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5519 { 5520 ire_t *ire = NULL; 5521 ipif_t *src_ipif = NULL; 5522 int err = 0; 5523 ill_t *dst_ill = NULL; 5524 ire_t *save_ire; 5525 ushort_t ire_marks = 0; 5526 ipsec_out_t *io; 5527 ill_t *attach_ill = NULL; 5528 ill_t *ill; 5529 ip6_t *ip6h; 5530 mblk_t *first_mp; 5531 boolean_t ip6i_present; 5532 ire_t *fire = NULL; 5533 mblk_t *copy_mp = NULL; 5534 boolean_t multirt_resolve_next; 5535 in6_addr_t *v6dstp = &v6dst; 5536 boolean_t ipif_held = B_FALSE; 5537 boolean_t ill_held = B_FALSE; 5538 boolean_t ip6_asp_table_held = B_FALSE; 5539 5540 /* 5541 * This loop is run only once in most cases. 5542 * We loop to resolve further routes only when the destination 5543 * can be reached through multiple RTF_MULTIRT-flagged ires. 5544 */ 5545 do { 5546 multirt_resolve_next = B_FALSE; 5547 if (dst_ill != NULL) { 5548 ill_refrele(dst_ill); 5549 dst_ill = NULL; 5550 } 5551 5552 if (src_ipif != NULL) { 5553 ipif_refrele(src_ipif); 5554 src_ipif = NULL; 5555 } 5556 ASSERT(ipif != NULL); 5557 ill = ipif->ipif_ill; 5558 5559 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5560 if (ip_debug > 2) { 5561 /* ip1dbg */ 5562 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5563 AF_INET6, v6dstp); 5564 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5565 ill->ill_name, ipif->ipif_isv6); 5566 } 5567 5568 first_mp = mp; 5569 if (mp->b_datap->db_type == M_CTL) { 5570 mp = mp->b_cont; 5571 io = (ipsec_out_t *)first_mp->b_rptr; 5572 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5573 } else { 5574 io = NULL; 5575 } 5576 5577 /* 5578 * If the interface is a pt-pt interface we look for an 5579 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5580 * local_address and the pt-pt destination address. 5581 * Otherwise we just match the local address. 5582 */ 5583 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5584 goto err_ret; 5585 } 5586 /* 5587 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5588 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5589 * as it could be NULL. 5590 * 5591 * This information can appear either in an ip6i_t or an 5592 * IPSEC_OUT message. 5593 */ 5594 ip6h = (ip6_t *)mp->b_rptr; 5595 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5596 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5597 if (!ip6i_present || 5598 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5599 attach_ill = ip_grab_attach_ill(ill, first_mp, 5600 (ip6i_present ? 5601 ((ip6i_t *)ip6h)->ip6i_ifindex : 5602 io->ipsec_out_ill_index), B_TRUE); 5603 /* Failure case frees things for us. */ 5604 if (attach_ill == NULL) 5605 return; 5606 5607 /* 5608 * Check if we need an ire that will not be 5609 * looked up by anybody else i.e. HIDDEN. 5610 */ 5611 if (ill_is_probeonly(attach_ill)) 5612 ire_marks = IRE_MARK_HIDDEN; 5613 } 5614 } 5615 5616 /* 5617 * We check if an IRE_OFFSUBNET for the addr that goes through 5618 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5619 * RTF_MULTIRT flags must be honored. 5620 */ 5621 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5622 ip2dbg(("ip_newroute_ipif_v6: " 5623 "ipif_lookup_multi_ire_v6(" 5624 "ipif %p, dst %08x) = fire %p\n", 5625 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5626 (void *)fire)); 5627 5628 /* 5629 * If the application specified the ill (ifindex), we still 5630 * load spread. Only if the packets needs to go out specifically 5631 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5632 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5633 * multirouting, then we don't try to use a different ill for 5634 * load spreading. 5635 */ 5636 if (attach_ill == NULL) { 5637 /* 5638 * If the interface belongs to an interface group, 5639 * make sure the next possible interface in the group 5640 * is used. This encourages load spreading among peers 5641 * in an interface group. 5642 * 5643 * Note: While we pick a dst_ill we are really only 5644 * interested in the ill for load spreading. The source 5645 * ipif is determined by source address selection below. 5646 */ 5647 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5648 dst_ill = ipif->ipif_ill; 5649 /* For uniformity do a refhold */ 5650 ill_refhold(dst_ill); 5651 } else { 5652 /* refheld by ip_newroute_get_dst_ill_v6 */ 5653 dst_ill = 5654 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5655 } 5656 if (dst_ill == NULL) { 5657 if (ip_debug > 2) { 5658 pr_addr_dbg("ip_newroute_ipif_v6: " 5659 "no dst ill for dst %s\n", 5660 AF_INET6, v6dstp); 5661 } 5662 goto err_ret; 5663 } 5664 } else { 5665 dst_ill = ipif->ipif_ill; 5666 /* 5667 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5668 * and IPV6_BOUND_PIF case. 5669 */ 5670 ASSERT(dst_ill == attach_ill); 5671 /* attach_ill is already refheld */ 5672 } 5673 /* 5674 * Pick a source address which matches the scope of the 5675 * destination address. 5676 * For RTF_SETSRC routes, the source address is imposed by the 5677 * parent ire (fire). 5678 */ 5679 ASSERT(src_ipif == NULL); 5680 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5681 /* 5682 * Check that the ipif matching the requested source 5683 * address still exists. 5684 */ 5685 src_ipif = 5686 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5687 NULL, zoneid, NULL, NULL, NULL, NULL); 5688 } 5689 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5690 ip6_asp_table_held = B_TRUE; 5691 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5692 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5693 } 5694 5695 if (src_ipif == NULL) { 5696 if (!unspec_src) { 5697 if (ip_debug > 2) { 5698 /* ip1dbg */ 5699 pr_addr_dbg("ip_newroute_ipif_v6: " 5700 "no src for dst %s\n,", 5701 AF_INET6, v6dstp); 5702 printf(" through interface %s\n", 5703 dst_ill->ill_name); 5704 } 5705 goto err_ret; 5706 } 5707 /* Use any ipif for source */ 5708 for (src_ipif = dst_ill->ill_ipif; src_ipif != NULL; 5709 src_ipif = src_ipif->ipif_next) { 5710 if ((src_ipif->ipif_flags & IPIF_UP) && 5711 IN6_IS_ADDR_UNSPECIFIED( 5712 &src_ipif->ipif_v6src_addr)) 5713 break; 5714 } 5715 if (src_ipif == NULL) { 5716 if (ip_debug > 2) { 5717 /* ip1dbg */ 5718 pr_addr_dbg("ip_newroute_ipif_v6: " 5719 "no src for dst %s\n ", 5720 AF_INET6, v6dstp); 5721 printf("ip_newroute_ipif_v6: if %s" 5722 "(UNSPEC_SRC)\n", 5723 dst_ill->ill_name); 5724 } 5725 goto err_ret; 5726 } 5727 src_ipif = ipif; 5728 ipif_refhold(src_ipif); 5729 } 5730 ire = ipif_to_ire_v6(ipif); 5731 if (ire == NULL) { 5732 if (ip_debug > 2) { 5733 /* ip1dbg */ 5734 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5735 AF_INET6, &ipif->ipif_v6lcl_addr); 5736 printf("ip_newroute_ipif_v6: " 5737 "if %s\n", dst_ill->ill_name); 5738 } 5739 goto err_ret; 5740 } 5741 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5742 goto err_ret; 5743 5744 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5745 5746 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5747 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5748 if (ip_debug > 2) { 5749 /* ip1dbg */ 5750 pr_addr_dbg(" address %s\n", 5751 AF_INET6, &ire->ire_src_addr_v6); 5752 } 5753 save_ire = ire; 5754 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5755 (void *)ire, (void *)ipif)); 5756 5757 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5758 /* 5759 * an IRE_OFFSUBET was looked up 5760 * on that interface. 5761 * this ire has RTF_MULTIRT flag, 5762 * so the resolution loop 5763 * will be re-entered to resolve 5764 * additional routes on other 5765 * interfaces. For that purpose, 5766 * a copy of the packet is 5767 * made at this point. 5768 */ 5769 fire->ire_last_used_time = lbolt; 5770 copy_mp = copymsg(first_mp); 5771 if (copy_mp) { 5772 MULTIRT_DEBUG_TAG(copy_mp); 5773 } 5774 } 5775 5776 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5777 switch (ire->ire_type) { 5778 case IRE_IF_NORESOLVER: { 5779 /* We have what we need to build an IRE_CACHE. */ 5780 mblk_t *dlureq_mp; 5781 5782 /* 5783 * Create a new dlureq_mp with the 5784 * IPv6 gateway address in destination address in the 5785 * DLPI hdr if the physical length is exactly 16 bytes. 5786 */ 5787 ASSERT(dst_ill->ill_isv6); 5788 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5789 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5790 dst_ill->ill_phys_addr_length, 5791 dst_ill->ill_sap, 5792 dst_ill->ill_sap_length); 5793 } else { 5794 dlureq_mp = ire->ire_dlureq_mp; 5795 } 5796 5797 if (dlureq_mp == NULL) 5798 break; 5799 /* 5800 * The newly created ire will inherit the flags of the 5801 * parent ire, if any. 5802 */ 5803 ire = ire_create_v6( 5804 v6dstp, /* dest address */ 5805 &ipv6_all_ones, /* mask */ 5806 &src_ipif->ipif_v6src_addr, /* source address */ 5807 NULL, /* gateway address */ 5808 &save_ire->ire_max_frag, 5809 NULL, /* Fast Path header */ 5810 dst_ill->ill_rq, /* recv-from queue */ 5811 dst_ill->ill_wq, /* send-to queue */ 5812 IRE_CACHE, 5813 dlureq_mp, 5814 src_ipif, 5815 NULL, 5816 (fire != NULL) ? /* Parent handle */ 5817 fire->ire_phandle : 0, 5818 save_ire->ire_ihandle, /* Interface handle */ 5819 (fire != NULL) ? 5820 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5821 0, 5822 &ire_uinfo_null); 5823 5824 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 5825 freeb(dlureq_mp); 5826 5827 if (ire == NULL) { 5828 ire_refrele(save_ire); 5829 break; 5830 } 5831 5832 ire->ire_marks |= ire_marks; 5833 5834 err = ndp_noresolver(dst_ill, v6dstp); 5835 if (err != 0) { 5836 ire_refrele(save_ire); 5837 break; 5838 } 5839 5840 /* Prevent save_ire from getting deleted */ 5841 IRB_REFHOLD(save_ire->ire_bucket); 5842 /* Has it been removed already ? */ 5843 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5844 IRB_REFRELE(save_ire->ire_bucket); 5845 ire_refrele(save_ire); 5846 break; 5847 } 5848 5849 ire_add_then_send(q, ire, first_mp); 5850 if (ip6_asp_table_held) { 5851 ip6_asp_table_refrele(); 5852 ip6_asp_table_held = B_FALSE; 5853 } 5854 5855 /* Assert that it is not deleted yet. */ 5856 ASSERT(save_ire->ire_ptpn != NULL); 5857 IRB_REFRELE(save_ire->ire_bucket); 5858 ire_refrele(save_ire); 5859 if (fire != NULL) { 5860 ire_refrele(fire); 5861 fire = NULL; 5862 } 5863 5864 /* 5865 * The resolution loop is re-entered if we 5866 * actually are in a multirouting case. 5867 */ 5868 if (copy_mp != NULL) { 5869 boolean_t need_resolve = 5870 ire_multirt_need_resolve_v6(v6dstp); 5871 if (!need_resolve) { 5872 MULTIRT_DEBUG_UNTAG(copy_mp); 5873 freemsg(copy_mp); 5874 copy_mp = NULL; 5875 } else { 5876 /* 5877 * ipif_lookup_group_v6() calls 5878 * ire_lookup_multi_v6() that uses 5879 * ire_ftable_lookup_v6() to find 5880 * an IRE_INTERFACE for the group. 5881 * In the multirt case, 5882 * ire_lookup_multi_v6() then invokes 5883 * ire_multirt_lookup_v6() to find 5884 * the next resolvable ire. 5885 * As a result, we obtain a new 5886 * interface, derived from the 5887 * next ire. 5888 */ 5889 if (ipif_held) { 5890 ipif_refrele(ipif); 5891 ipif_held = B_FALSE; 5892 } 5893 ipif = ipif_lookup_group_v6(v6dstp, 5894 zoneid); 5895 ip2dbg(("ip_newroute_ipif: " 5896 "multirt dst %08x, ipif %p\n", 5897 ntohl(V4_PART_OF_V6((*v6dstp))), 5898 (void *)ipif)); 5899 if (ipif != NULL) { 5900 ipif_held = B_TRUE; 5901 mp = copy_mp; 5902 copy_mp = NULL; 5903 multirt_resolve_next = 5904 B_TRUE; 5905 continue; 5906 } else { 5907 freemsg(copy_mp); 5908 } 5909 } 5910 } 5911 ill_refrele(dst_ill); 5912 if (ipif_held) { 5913 ipif_refrele(ipif); 5914 ipif_held = B_FALSE; 5915 } 5916 if (src_ipif != NULL) 5917 ipif_refrele(src_ipif); 5918 return; 5919 } 5920 case IRE_IF_RESOLVER: { 5921 5922 ASSERT(dst_ill->ill_isv6); 5923 5924 /* 5925 * We obtain a partial IRE_CACHE which we will pass 5926 * along with the resolver query. When the response 5927 * comes back it will be there ready for us to add. 5928 */ 5929 /* 5930 * the newly created ire will inherit the flags of the 5931 * parent ire, if any. 5932 */ 5933 ire = ire_create_v6( 5934 v6dstp, /* dest address */ 5935 &ipv6_all_ones, /* mask */ 5936 &src_ipif->ipif_v6src_addr, /* source address */ 5937 NULL, /* gateway address */ 5938 &save_ire->ire_max_frag, 5939 NULL, /* Fast Path header */ 5940 dst_ill->ill_rq, /* recv-from queue */ 5941 dst_ill->ill_wq, /* send-to queue */ 5942 IRE_CACHE, 5943 NULL, 5944 src_ipif, 5945 NULL, 5946 (fire != NULL) ? /* Parent handle */ 5947 fire->ire_phandle : 0, 5948 save_ire->ire_ihandle, /* Interface handle */ 5949 (fire != NULL) ? 5950 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5951 0, 5952 &ire_uinfo_null); 5953 5954 if (ire == NULL) { 5955 ire_refrele(save_ire); 5956 break; 5957 } 5958 5959 ire->ire_marks |= ire_marks; 5960 5961 /* Resolve and add ire to the ctable */ 5962 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 5963 switch (err) { 5964 case 0: 5965 /* Prevent save_ire from getting deleted */ 5966 IRB_REFHOLD(save_ire->ire_bucket); 5967 /* Has it been removed already ? */ 5968 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5969 IRB_REFRELE(save_ire->ire_bucket); 5970 ire_refrele(save_ire); 5971 break; 5972 } 5973 /* 5974 * We have a resolved cache entry, 5975 * add in the IRE. 5976 */ 5977 ire_add_then_send(q, ire, first_mp); 5978 if (ip6_asp_table_held) { 5979 ip6_asp_table_refrele(); 5980 ip6_asp_table_held = B_FALSE; 5981 } 5982 5983 /* Assert that it is not deleted yet. */ 5984 ASSERT(save_ire->ire_ptpn != NULL); 5985 IRB_REFRELE(save_ire->ire_bucket); 5986 ire_refrele(save_ire); 5987 if (fire != NULL) { 5988 ire_refrele(fire); 5989 fire = NULL; 5990 } 5991 5992 /* 5993 * The resolution loop is re-entered if we 5994 * actually are in a multirouting case. 5995 */ 5996 if (copy_mp != NULL) { 5997 boolean_t need_resolve = 5998 ire_multirt_need_resolve_v6(v6dstp); 5999 if (!need_resolve) { 6000 MULTIRT_DEBUG_UNTAG(copy_mp); 6001 freemsg(copy_mp); 6002 copy_mp = NULL; 6003 } else { 6004 /* 6005 * ipif_lookup_group_v6() calls 6006 * ire_lookup_multi_v6() that 6007 * uses ire_ftable_lookup_v6() 6008 * to find an IRE_INTERFACE for 6009 * the group. In the multirt 6010 * case, ire_lookup_multi_v6() 6011 * then invokes 6012 * ire_multirt_lookup_v6() to 6013 * find the next resolvable ire. 6014 * As a result, we obtain a new 6015 * interface, derived from the 6016 * next ire. 6017 */ 6018 if (ipif_held) { 6019 ipif_refrele(ipif); 6020 ipif_held = B_FALSE; 6021 } 6022 ipif = ipif_lookup_group_v6( 6023 v6dstp, zoneid); 6024 ip2dbg(("ip_newroute_ipif: " 6025 "multirt dst %08x, " 6026 "ipif %p\n", 6027 ntohl(V4_PART_OF_V6( 6028 (*v6dstp))), 6029 (void *)ipif)); 6030 if (ipif != NULL) { 6031 ipif_held = B_TRUE; 6032 mp = copy_mp; 6033 copy_mp = NULL; 6034 multirt_resolve_next = 6035 B_TRUE; 6036 continue; 6037 } else { 6038 freemsg(copy_mp); 6039 } 6040 } 6041 } 6042 ill_refrele(dst_ill); 6043 if (ipif_held) { 6044 ipif_refrele(ipif); 6045 ipif_held = B_FALSE; 6046 } 6047 if (src_ipif != NULL) 6048 ipif_refrele(src_ipif); 6049 return; 6050 6051 case EINPROGRESS: 6052 /* 6053 * mp was consumed - presumably queued. 6054 * No need for ire, presumably resolution is 6055 * in progress, and ire will be added when the 6056 * address is resolved. 6057 */ 6058 if (ip6_asp_table_held) { 6059 ip6_asp_table_refrele(); 6060 ip6_asp_table_held = B_FALSE; 6061 } 6062 ire_delete(ire); 6063 ire_refrele(save_ire); 6064 if (fire != NULL) { 6065 ire_refrele(fire); 6066 fire = NULL; 6067 } 6068 6069 /* 6070 * The resolution loop is re-entered if we 6071 * actually are in a multirouting case. 6072 */ 6073 if (copy_mp != NULL) { 6074 boolean_t need_resolve = 6075 ire_multirt_need_resolve_v6(v6dstp); 6076 if (!need_resolve) { 6077 MULTIRT_DEBUG_UNTAG(copy_mp); 6078 freemsg(copy_mp); 6079 copy_mp = NULL; 6080 } else { 6081 /* 6082 * ipif_lookup_group_v6() calls 6083 * ire_lookup_multi_v6() that 6084 * uses ire_ftable_lookup_v6() 6085 * to find an IRE_INTERFACE for 6086 * the group. In the multirt 6087 * case, ire_lookup_multi_v6() 6088 * then invokes 6089 * ire_multirt_lookup_v6() to 6090 * find the next resolvable ire. 6091 * As a result, we obtain a new 6092 * interface, derived from the 6093 * next ire. 6094 */ 6095 if (ipif_held) { 6096 ipif_refrele(ipif); 6097 ipif_held = B_FALSE; 6098 } 6099 ipif = ipif_lookup_group_v6( 6100 v6dstp, zoneid); 6101 ip2dbg(("ip_newroute_ipif: " 6102 "multirt dst %08x, " 6103 "ipif %p\n", 6104 ntohl(V4_PART_OF_V6( 6105 (*v6dstp))), 6106 (void *)ipif)); 6107 if (ipif != NULL) { 6108 ipif_held = B_TRUE; 6109 mp = copy_mp; 6110 copy_mp = NULL; 6111 multirt_resolve_next = 6112 B_TRUE; 6113 continue; 6114 } else { 6115 freemsg(copy_mp); 6116 } 6117 } 6118 } 6119 ill_refrele(dst_ill); 6120 if (ipif_held) { 6121 ipif_refrele(ipif); 6122 ipif_held = B_FALSE; 6123 } 6124 if (src_ipif != NULL) 6125 ipif_refrele(src_ipif); 6126 return; 6127 default: 6128 /* Some transient error */ 6129 ire_refrele(save_ire); 6130 break; 6131 } 6132 break; 6133 } 6134 default: 6135 break; 6136 } 6137 if (ip6_asp_table_held) { 6138 ip6_asp_table_refrele(); 6139 ip6_asp_table_held = B_FALSE; 6140 } 6141 } while (multirt_resolve_next); 6142 6143 err_ret: 6144 if (ip6_asp_table_held) 6145 ip6_asp_table_refrele(); 6146 if (ire != NULL) 6147 ire_refrele(ire); 6148 if (fire != NULL) 6149 ire_refrele(fire); 6150 if (ipif != NULL && ipif_held) 6151 ipif_refrele(ipif); 6152 if (src_ipif != NULL) 6153 ipif_refrele(src_ipif); 6154 /* Multicast - no point in trying to generate ICMP error */ 6155 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6156 if (dst_ill != NULL) { 6157 ill = dst_ill; 6158 ill_held = B_TRUE; 6159 } 6160 if (mp->b_prev || mp->b_next) { 6161 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6162 } else { 6163 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 6164 } 6165 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6166 mp->b_next = NULL; 6167 mp->b_prev = NULL; 6168 freemsg(first_mp); 6169 if (ill_held) 6170 ill_refrele(ill); 6171 } 6172 6173 /* 6174 * Parse and process any hop-by-hop or destination options. 6175 * 6176 * Assumes that q is an ill read queue so that ICMP errors for link-local 6177 * destinations are sent out the correct interface. 6178 * 6179 * Returns -1 if there was an error and mp has been consumed. 6180 * Returns 0 if no special action is needed. 6181 * Returns 1 if the packet contained a router alert option for this node 6182 * which is verified to be "interesting/known" for our implementation. 6183 * 6184 * XXX Note: In future as more hbh or dest options are defined, 6185 * it may be better to have different routines for hbh and dest 6186 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6187 * may have same value in different namespaces. Or is it same namespace ?? 6188 * Current code checks for each opt_type (other than pads) if it is in 6189 * the expected nexthdr (hbh or dest) 6190 */ 6191 static int 6192 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6193 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6194 { 6195 uint8_t opt_type; 6196 uint_t optused; 6197 int ret = 0; 6198 mblk_t *first_mp; 6199 6200 first_mp = mp; 6201 if (mp->b_datap->db_type == M_CTL) { 6202 mp = mp->b_cont; 6203 } 6204 6205 while (optlen != 0) { 6206 opt_type = *optptr; 6207 if (opt_type == IP6OPT_PAD1) { 6208 optused = 1; 6209 } else { 6210 if (optlen < 2) 6211 goto bad_opt; 6212 switch (opt_type) { 6213 case IP6OPT_PADN: 6214 /* 6215 * Note:We don't verify that (N-2) pad octets 6216 * are zero as required by spec. Adhere to 6217 * "be liberal in what you accept..." part of 6218 * implementation philosophy (RFC791,RFC1122) 6219 */ 6220 optused = 2 + optptr[1]; 6221 if (optused > optlen) 6222 goto bad_opt; 6223 break; 6224 6225 case IP6OPT_JUMBO: 6226 if (hdr_type != IPPROTO_HOPOPTS) 6227 goto opt_error; 6228 goto opt_error; /* XXX Not implemented! */ 6229 6230 case IP6OPT_ROUTER_ALERT: { 6231 struct ip6_opt_router *or; 6232 6233 if (hdr_type != IPPROTO_HOPOPTS) 6234 goto opt_error; 6235 optused = 2 + optptr[1]; 6236 if (optused > optlen) 6237 goto bad_opt; 6238 or = (struct ip6_opt_router *)optptr; 6239 /* Check total length and alignment */ 6240 if (optused != sizeof (*or) || 6241 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6242 goto opt_error; 6243 /* Check value */ 6244 switch (*((uint16_t *)or->ip6or_value)) { 6245 case IP6_ALERT_MLD: 6246 case IP6_ALERT_RSVP: 6247 ret = 1; 6248 } 6249 break; 6250 } 6251 case IP6OPT_HOME_ADDRESS: { 6252 /* 6253 * Minimal support for the home address option 6254 * (which is required by all IPv6 nodes). 6255 * Implement by just swapping the home address 6256 * and source address. 6257 * XXX Note: this has IPsec implications since 6258 * AH needs to take this into account. 6259 * Also, when IPsec is used we need to ensure 6260 * that this is only processed once 6261 * in the received packet (to avoid swapping 6262 * back and forth). 6263 * NOTE:This option processing is considered 6264 * to be unsafe and prone to a denial of 6265 * service attack. 6266 * The current processing is not safe even with 6267 * IPsec secured IP packets. Since the home 6268 * address option processing requirement still 6269 * is in the IETF draft and in the process of 6270 * being redefined for its usage, it has been 6271 * decided to turn off the option by default. 6272 * If this section of code needs to be executed, 6273 * ndd variable ip6_ignore_home_address_opt 6274 * should be set to 0 at the user's own risk. 6275 */ 6276 struct ip6_opt_home_address *oh; 6277 in6_addr_t tmp; 6278 6279 if (ipv6_ignore_home_address_opt) 6280 goto opt_error; 6281 6282 if (hdr_type != IPPROTO_DSTOPTS) 6283 goto opt_error; 6284 optused = 2 + optptr[1]; 6285 if (optused > optlen) 6286 goto bad_opt; 6287 6288 /* 6289 * We did this dest. opt the first time 6290 * around (i.e. before AH processing). 6291 * If we've done AH... stop now. 6292 */ 6293 if (first_mp != mp) { 6294 ipsec_in_t *ii; 6295 6296 ii = (ipsec_in_t *)first_mp->b_rptr; 6297 if (ii->ipsec_in_ah_sa != NULL) 6298 break; 6299 } 6300 6301 oh = (struct ip6_opt_home_address *)optptr; 6302 /* Check total length and alignment */ 6303 if (optused < sizeof (*oh) || 6304 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6305 goto opt_error; 6306 /* Swap ip6_src and the home address */ 6307 tmp = ip6h->ip6_src; 6308 /* XXX Note: only 8 byte alignment option */ 6309 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6310 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6311 break; 6312 } 6313 6314 case IP6OPT_TUNNEL_LIMIT: 6315 if (hdr_type != IPPROTO_DSTOPTS) { 6316 goto opt_error; 6317 } 6318 optused = 2 + optptr[1]; 6319 if (optused > optlen) { 6320 goto bad_opt; 6321 } 6322 if (optused != 3) { 6323 goto opt_error; 6324 } 6325 break; 6326 6327 default: 6328 opt_error: 6329 ip1dbg(("ip_process_options_v6: bad opt 0x%x\n", 6330 opt_type)); 6331 switch (IP6OPT_TYPE(opt_type)) { 6332 case IP6OPT_TYPE_SKIP: 6333 optused = 2 + optptr[1]; 6334 if (optused > optlen) 6335 goto bad_opt; 6336 break; 6337 case IP6OPT_TYPE_DISCARD: 6338 freemsg(first_mp); 6339 return (-1); 6340 case IP6OPT_TYPE_ICMP: 6341 icmp_param_problem_v6(WR(q), first_mp, 6342 ICMP6_PARAMPROB_OPTION, 6343 (uint32_t)(optptr - 6344 (uint8_t *)ip6h), 6345 B_FALSE, B_FALSE); 6346 return (-1); 6347 case IP6OPT_TYPE_FORCEICMP: 6348 icmp_param_problem_v6(WR(q), first_mp, 6349 ICMP6_PARAMPROB_OPTION, 6350 (uint32_t)(optptr - 6351 (uint8_t *)ip6h), 6352 B_FALSE, B_TRUE); 6353 return (-1); 6354 } 6355 } 6356 } 6357 optlen -= optused; 6358 optptr += optused; 6359 } 6360 return (ret); 6361 6362 bad_opt: 6363 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6364 (uint32_t)(optptr - (uint8_t *)ip6h), 6365 B_FALSE, B_FALSE); 6366 return (-1); 6367 } 6368 6369 /* 6370 * Process a routing header that is not yet empty. 6371 * Only handles type 0 routing headers. 6372 */ 6373 static void 6374 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6375 ill_t *ill, uint_t flags, mblk_t *hada_mp) 6376 { 6377 ip6_rthdr0_t *rthdr; 6378 uint_t ehdrlen; 6379 uint_t numaddr; 6380 in6_addr_t *addrptr; 6381 in6_addr_t tmp; 6382 6383 ASSERT(rth->ip6r_segleft != 0); 6384 6385 if (!ipv6_forward_src_routed) { 6386 /* XXX Check for source routed out same interface? */ 6387 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 6388 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 6389 freemsg(hada_mp); 6390 freemsg(mp); 6391 return; 6392 } 6393 6394 if (rth->ip6r_type != 0) { 6395 if (hada_mp != NULL) 6396 goto hada_drop; 6397 icmp_param_problem_v6(WR(q), mp, 6398 ICMP6_PARAMPROB_HEADER, 6399 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6400 B_FALSE, B_FALSE); 6401 return; 6402 } 6403 rthdr = (ip6_rthdr0_t *)rth; 6404 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6405 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6406 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6407 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6408 if (rthdr->ip6r0_len & 0x1) { 6409 /* An odd length is impossible */ 6410 if (hada_mp != NULL) 6411 goto hada_drop; 6412 icmp_param_problem_v6(WR(q), mp, 6413 ICMP6_PARAMPROB_HEADER, 6414 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6415 B_FALSE, B_FALSE); 6416 return; 6417 } 6418 numaddr = rthdr->ip6r0_len / 2; 6419 if (rthdr->ip6r0_segleft > numaddr) { 6420 /* segleft exceeds number of addresses in routing header */ 6421 if (hada_mp != NULL) 6422 goto hada_drop; 6423 icmp_param_problem_v6(WR(q), mp, 6424 ICMP6_PARAMPROB_HEADER, 6425 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6426 (uchar_t *)ip6h), 6427 B_FALSE, B_FALSE); 6428 return; 6429 } 6430 addrptr += (numaddr - rthdr->ip6r0_segleft); 6431 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6432 IN6_IS_ADDR_MULTICAST(addrptr)) { 6433 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6434 freemsg(hada_mp); 6435 freemsg(mp); 6436 return; 6437 } 6438 /* Swap */ 6439 tmp = *addrptr; 6440 *addrptr = ip6h->ip6_dst; 6441 ip6h->ip6_dst = tmp; 6442 rthdr->ip6r0_segleft--; 6443 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6444 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6445 if (hada_mp != NULL) 6446 goto hada_drop; 6447 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6448 B_FALSE, B_FALSE); 6449 return; 6450 } 6451 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); 6452 return; 6453 hada_drop: 6454 /* IPsec kstats: bean counter? */ 6455 freemsg(hada_mp); 6456 freemsg(mp); 6457 } 6458 6459 /* 6460 * Read side put procedure for IPv6 module. 6461 */ 6462 static void 6463 ip_rput_v6(queue_t *q, mblk_t *mp) 6464 { 6465 mblk_t *mp1, *first_mp, *hada_mp = NULL; 6466 ip6_t *ip6h; 6467 boolean_t ll_multicast = B_FALSE, mctl_present = B_FALSE; 6468 ill_t *ill; 6469 struct iocblk *iocp; 6470 uint_t flags = 0; 6471 6472 ill = (ill_t *)q->q_ptr; 6473 if (ill->ill_state_flags & ILL_CONDEMNED) { 6474 union DL_primitives *dl; 6475 6476 dl = (union DL_primitives *)mp->b_rptr; 6477 /* 6478 * Things are opening or closing - only accept DLPI 6479 * ack messages. If the stream is closing and ip_wsrv 6480 * has completed, ip_close is out of the qwait, but has 6481 * not yet completed qprocsoff. Don't proceed any further 6482 * because the ill has been cleaned up and things hanging 6483 * off the ill have been freed. 6484 */ 6485 if ((mp->b_datap->db_type != M_PCPROTO) || 6486 (dl->dl_primitive == DL_UNITDATA_IND)) { 6487 inet_freemsg(mp); 6488 return; 6489 } 6490 } 6491 6492 switch (mp->b_datap->db_type) { 6493 case M_DATA: 6494 break; 6495 6496 case M_PROTO: 6497 case M_PCPROTO: 6498 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6499 DL_UNITDATA_IND) { 6500 /* Go handle anything other than data elsewhere. */ 6501 ip_rput_dlpi(q, mp); 6502 return; 6503 } 6504 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6505 ll_multicast = dlur->dl_group_address; 6506 #undef dlur 6507 /* Ditch the DLPI header. */ 6508 mp1 = mp; 6509 mp = mp->b_cont; 6510 freeb(mp1); 6511 break; 6512 case M_BREAK: 6513 panic("ip_rput_v6: got an M_BREAK"); 6514 /*NOTREACHED*/ 6515 case M_IOCACK: 6516 iocp = (struct iocblk *)mp->b_rptr; 6517 switch (iocp->ioc_cmd) { 6518 case DL_IOC_HDR_INFO: 6519 ill = (ill_t *)q->q_ptr; 6520 ill_fastpath_ack(ill, mp); 6521 return; 6522 case SIOCSTUNPARAM: 6523 case SIOCGTUNPARAM: 6524 case OSIOCSTUNPARAM: 6525 case OSIOCGTUNPARAM: 6526 /* Go through qwriter */ 6527 break; 6528 default: 6529 putnext(q, mp); 6530 return; 6531 } 6532 /* FALLTHRU */ 6533 case M_ERROR: 6534 case M_HANGUP: 6535 mutex_enter(&ill->ill_lock); 6536 if (ill->ill_state_flags & ILL_CONDEMNED) { 6537 mutex_exit(&ill->ill_lock); 6538 freemsg(mp); 6539 return; 6540 } 6541 ill_refhold_locked(ill); 6542 mutex_exit(&ill->ill_lock); 6543 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6544 return; 6545 case M_CTL: { 6546 if ((MBLKL(mp) > sizeof (int)) && 6547 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6548 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6549 mctl_present = B_TRUE; 6550 break; 6551 } 6552 putnext(q, mp); 6553 return; 6554 } 6555 case M_IOCNAK: 6556 iocp = (struct iocblk *)mp->b_rptr; 6557 switch (iocp->ioc_cmd) { 6558 case DL_IOC_HDR_INFO: 6559 case SIOCSTUNPARAM: 6560 case SIOCGTUNPARAM: 6561 case OSIOCSTUNPARAM: 6562 case OSIOCGTUNPARAM: 6563 mutex_enter(&ill->ill_lock); 6564 if (ill->ill_state_flags & ILL_CONDEMNED) { 6565 mutex_exit(&ill->ill_lock); 6566 freemsg(mp); 6567 return; 6568 } 6569 ill_refhold_locked(ill); 6570 mutex_exit(&ill->ill_lock); 6571 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6572 B_FALSE); 6573 return; 6574 default: 6575 break; 6576 } 6577 /* FALLTHRU */ 6578 default: 6579 putnext(q, mp); 6580 return; 6581 } 6582 6583 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 6584 /* 6585 * if db_ref > 1 then copymsg and free original. Packet may be 6586 * changed and do not want other entity who has a reference to this 6587 * message to trip over the changes. This is a blind change because 6588 * trying to catch all places that might change packet is too 6589 * difficult (since it may be a module above this one). 6590 */ 6591 if (mp->b_datap->db_ref > 1) { 6592 mblk_t *mp1; 6593 6594 mp1 = copymsg(mp); 6595 freemsg(mp); 6596 if (mp1 == NULL) { 6597 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6598 return; 6599 } 6600 mp = mp1; 6601 } 6602 first_mp = mp; 6603 if (mctl_present) { 6604 hada_mp = first_mp; 6605 mp = first_mp->b_cont; 6606 } 6607 6608 ip6h = (ip6_t *)mp->b_rptr; 6609 6610 /* check for alignment and full IPv6 header */ 6611 if (!OK_32PTR((uchar_t *)ip6h) || 6612 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6613 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6614 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6615 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6616 freemsg(first_mp); 6617 return; 6618 } 6619 ip6h = (ip6_t *)mp->b_rptr; 6620 } 6621 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6622 IPV6_DEFAULT_VERS_AND_FLOW) { 6623 /* 6624 * It may be a bit too expensive to do this mapped address 6625 * check here, but in the interest of robustness, it seems 6626 * like the correct place. 6627 * TODO: Avoid this check for e.g. connected TCP sockets 6628 */ 6629 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6630 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6631 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6632 freemsg(first_mp); 6633 return; 6634 } 6635 6636 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6637 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6638 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6639 freemsg(first_mp); 6640 return; 6641 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6642 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6643 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6644 freemsg(first_mp); 6645 return; 6646 } 6647 6648 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6649 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); 6650 } else { 6651 BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4); 6652 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6653 freemsg(first_mp); 6654 } 6655 } 6656 6657 /* 6658 * Walk through the IPv6 packet in mp and see if there's an AH header 6659 * in it. See if the AH header needs to get done before other headers in 6660 * the packet. (Worker function for ipsec_early_ah_v6().) 6661 */ 6662 #define IPSEC_HDR_DONT_PROCESS 0 6663 #define IPSEC_HDR_PROCESS 1 6664 #define IPSEC_MEMORY_ERROR 2 6665 static int 6666 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6667 { 6668 uint_t length; 6669 uint_t ehdrlen; 6670 uint8_t *whereptr; 6671 uint8_t *endptr; 6672 uint8_t *nexthdrp; 6673 ip6_dest_t *desthdr; 6674 ip6_rthdr_t *rthdr; 6675 ip6_t *ip6h; 6676 6677 /* 6678 * For now just pullup everything. In general, the less pullups, 6679 * the better, but there's so much squirrelling through anyway, 6680 * it's just easier this way. 6681 */ 6682 if (!pullupmsg(mp, -1)) { 6683 return (IPSEC_MEMORY_ERROR); 6684 } 6685 6686 ip6h = (ip6_t *)mp->b_rptr; 6687 length = IPV6_HDR_LEN; 6688 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6689 endptr = mp->b_wptr; 6690 6691 /* 6692 * We can't just use the argument nexthdr in the place 6693 * of nexthdrp becaue we don't dereference nexthdrp 6694 * till we confirm whether it is a valid address. 6695 */ 6696 nexthdrp = &ip6h->ip6_nxt; 6697 while (whereptr < endptr) { 6698 /* Is there enough left for len + nexthdr? */ 6699 if (whereptr + MIN_EHDR_LEN > endptr) 6700 return (IPSEC_MEMORY_ERROR); 6701 6702 switch (*nexthdrp) { 6703 case IPPROTO_HOPOPTS: 6704 case IPPROTO_DSTOPTS: 6705 /* Assumes the headers are identical for hbh and dst */ 6706 desthdr = (ip6_dest_t *)whereptr; 6707 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6708 if ((uchar_t *)desthdr + ehdrlen > endptr) 6709 return (IPSEC_MEMORY_ERROR); 6710 /* 6711 * Return DONT_PROCESS because of potential Mobile IPv6 6712 * cruft for destination options. 6713 */ 6714 if (*nexthdrp == IPPROTO_DSTOPTS) 6715 return (IPSEC_HDR_DONT_PROCESS); 6716 nexthdrp = &desthdr->ip6d_nxt; 6717 break; 6718 case IPPROTO_ROUTING: 6719 rthdr = (ip6_rthdr_t *)whereptr; 6720 6721 /* 6722 * If there's more hops left on the routing header, 6723 * return now with DON'T PROCESS. 6724 */ 6725 if (rthdr->ip6r_segleft > 0) 6726 return (IPSEC_HDR_DONT_PROCESS); 6727 6728 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6729 if ((uchar_t *)rthdr + ehdrlen > endptr) 6730 return (IPSEC_MEMORY_ERROR); 6731 nexthdrp = &rthdr->ip6r_nxt; 6732 break; 6733 case IPPROTO_FRAGMENT: 6734 /* Wait for reassembly */ 6735 return (IPSEC_HDR_DONT_PROCESS); 6736 case IPPROTO_AH: 6737 *nexthdr = IPPROTO_AH; 6738 return (IPSEC_HDR_PROCESS); 6739 case IPPROTO_NONE: 6740 /* No next header means we're finished */ 6741 default: 6742 return (IPSEC_HDR_DONT_PROCESS); 6743 } 6744 length += ehdrlen; 6745 whereptr += ehdrlen; 6746 } 6747 panic("ipsec_needs_processing_v6"); 6748 /*NOTREACHED*/ 6749 } 6750 6751 /* 6752 * Path for AH if options are present. If this is the first time we are 6753 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6754 * Otherwise, just fanout. Return value answers the boolean question: 6755 * "Did I consume the mblk you sent me?" 6756 * 6757 * Sometimes AH needs to be done before other IPv6 headers for security 6758 * reasons. This function (and its ipsec_needs_processing_v6() above) 6759 * indicates if that is so, and fans out to the appropriate IPsec protocol 6760 * for the datagram passed in. 6761 */ 6762 static boolean_t 6763 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6764 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 6765 { 6766 mblk_t *mp; 6767 uint8_t nexthdr; 6768 ipsec_in_t *ii = NULL; 6769 ah_t *ah; 6770 ipsec_status_t ipsec_rc; 6771 6772 ASSERT((hada_mp == NULL) || (!mctl_present)); 6773 6774 switch (ipsec_needs_processing_v6( 6775 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6776 case IPSEC_MEMORY_ERROR: 6777 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6778 freemsg(hada_mp); 6779 freemsg(first_mp); 6780 return (B_TRUE); 6781 case IPSEC_HDR_DONT_PROCESS: 6782 return (B_FALSE); 6783 } 6784 6785 /* Default means send it to AH! */ 6786 ASSERT(nexthdr == IPPROTO_AH); 6787 if (!mctl_present) { 6788 mp = first_mp; 6789 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 6790 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6791 "allocation failure.\n")); 6792 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6793 freemsg(hada_mp); 6794 freemsg(mp); 6795 return (B_TRUE); 6796 } 6797 /* 6798 * Store the ill_index so that when we come back 6799 * from IPSEC we ride on the same queue. 6800 */ 6801 ii = (ipsec_in_t *)first_mp->b_rptr; 6802 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 6803 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 6804 first_mp->b_cont = mp; 6805 } 6806 /* 6807 * Cache hardware acceleration info. 6808 */ 6809 if (hada_mp != NULL) { 6810 ASSERT(ii != NULL); 6811 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 6812 "caching data attr.\n")); 6813 ii->ipsec_in_accelerated = B_TRUE; 6814 ii->ipsec_in_da = hada_mp; 6815 } 6816 6817 if (!ipsec_loaded()) { 6818 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 6819 return (B_TRUE); 6820 } 6821 6822 ah = ipsec_inbound_ah_sa(first_mp); 6823 if (ah == NULL) 6824 return (B_TRUE); 6825 ASSERT(ii->ipsec_in_ah_sa != NULL); 6826 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 6827 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 6828 6829 switch (ipsec_rc) { 6830 case IPSEC_STATUS_SUCCESS: 6831 /* we're done with IPsec processing, send it up */ 6832 ip_fanout_proto_again(first_mp, ill, ill, ire); 6833 break; 6834 case IPSEC_STATUS_FAILED: 6835 BUMP_MIB(&ip6_mib, ipv6InDiscards); 6836 break; 6837 case IPSEC_STATUS_PENDING: 6838 /* no action needed */ 6839 break; 6840 } 6841 return (B_TRUE); 6842 } 6843 6844 /* 6845 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 6846 * ip_rput_v6 has already verified alignment, the min length, the version, 6847 * and db_ref = 1. 6848 * 6849 * The ill passed in (the arg named inill) is the ill that the packet 6850 * actually arrived on. We need to remember this when saving the 6851 * input interface index into potential IPV6_PKTINFO data in 6852 * ip_add_info_v6(). 6853 */ 6854 void 6855 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 6856 uint_t flags, mblk_t *hada_mp) 6857 { 6858 ire_t *ire = NULL; 6859 queue_t *rq; 6860 ill_t *ill = inill; 6861 ipif_t *ipif; 6862 uint8_t *whereptr; 6863 uint8_t nexthdr; 6864 uint16_t remlen; 6865 uint_t prev_nexthdr_offset; 6866 uint_t used; 6867 size_t pkt_len; 6868 uint16_t ip6_len; 6869 uint_t hdr_len; 6870 boolean_t mctl_present; 6871 mblk_t *first_mp; 6872 mblk_t *first_mp1; 6873 boolean_t no_forward; 6874 ip6_hbh_t *hbhhdr; 6875 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 6876 conn_t *connp; 6877 ilm_t *ilm; 6878 uint32_t ports; 6879 uint_t ipif_id = 0; 6880 zoneid_t zoneid = GLOBAL_ZONEID; 6881 uint16_t hck_flags, reass_hck_flags; 6882 uint32_t reass_sum; 6883 boolean_t cksum_err; 6884 mblk_t *mp1; 6885 6886 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 6887 6888 if (hada_mp != NULL) { 6889 /* 6890 * It's an IPsec accelerated packet. 6891 * Keep a pointer to the data attributes around until 6892 * we allocate the ipsecinfo structure. 6893 */ 6894 IPSECHW_DEBUG(IPSECHW_PKT, 6895 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 6896 hada_mp->b_cont = NULL; 6897 /* 6898 * Since it is accelerated, it came directly from 6899 * the ill. 6900 */ 6901 ASSERT(mctl_present == B_FALSE); 6902 ASSERT(mp->b_datap->db_type != M_CTL); 6903 } 6904 6905 ASSERT(OK_32PTR((uchar_t *)ip6h) && 6906 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 6907 6908 if (mp->b_cont == NULL) 6909 pkt_len = mp->b_wptr - mp->b_rptr; 6910 else 6911 pkt_len = msgdsize(mp); 6912 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6913 6914 /* 6915 * Check for bogus (too short packet) and packet which 6916 * was padded by the link layer. 6917 */ 6918 if (ip6_len != pkt_len) { 6919 ssize_t diff; 6920 6921 if (ip6_len > pkt_len) { 6922 ip1dbg(("ip_rput_data_v6: packet too short %d %lu\n", 6923 ip6_len, pkt_len)); 6924 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 6925 freemsg(hada_mp); 6926 freemsg(first_mp); 6927 return; 6928 } 6929 diff = (ssize_t)(pkt_len - ip6_len); 6930 6931 if (!adjmsg(mp, -diff)) { 6932 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 6933 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6934 freemsg(hada_mp); 6935 freemsg(first_mp); 6936 return; 6937 } 6938 pkt_len -= diff; 6939 } 6940 6941 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 6942 hck_flags = DB_CKSUMFLAGS(mp); 6943 else 6944 hck_flags = 0; 6945 6946 /* Clear checksum flags in case we need to forward */ 6947 DB_CKSUMFLAGS(mp) = 0; 6948 reass_sum = reass_hck_flags = 0; 6949 6950 nexthdr = ip6h->ip6_nxt; 6951 6952 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 6953 (uchar_t *)ip6h); 6954 whereptr = (uint8_t *)&ip6h[1]; 6955 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 6956 6957 /* Process hop by hop header options */ 6958 if (nexthdr == IPPROTO_HOPOPTS) { 6959 uint_t ehdrlen; 6960 uint8_t *optptr; 6961 6962 if (remlen < MIN_EHDR_LEN) 6963 goto pkt_too_short; 6964 if (mp->b_cont != NULL && 6965 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 6966 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 6967 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6968 freemsg(hada_mp); 6969 freemsg(first_mp); 6970 return; 6971 } 6972 ip6h = (ip6_t *)mp->b_rptr; 6973 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 6974 } 6975 hbhhdr = (ip6_hbh_t *)whereptr; 6976 nexthdr = hbhhdr->ip6h_nxt; 6977 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 6978 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 6979 6980 if (remlen < ehdrlen) 6981 goto pkt_too_short; 6982 if (mp->b_cont != NULL && 6983 whereptr + ehdrlen > mp->b_wptr) { 6984 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 6985 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6986 freemsg(hada_mp); 6987 freemsg(first_mp); 6988 return; 6989 } 6990 ip6h = (ip6_t *)mp->b_rptr; 6991 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 6992 hbhhdr = (ip6_hbh_t *)whereptr; 6993 } 6994 6995 optptr = whereptr + 2; 6996 whereptr += ehdrlen; 6997 remlen -= ehdrlen; 6998 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 6999 ehdrlen - 2, IPPROTO_HOPOPTS)) { 7000 case -1: 7001 /* 7002 * Packet has been consumed and any 7003 * needed ICMP messages sent. 7004 */ 7005 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7006 freemsg(hada_mp); 7007 return; 7008 case 0: 7009 /* no action needed */ 7010 break; 7011 case 1: 7012 /* Known router alert */ 7013 goto ipv6forus; 7014 } 7015 } 7016 7017 /* 7018 * On incoming v6 multicast packets we will bypass the ire table, 7019 * and assume that the read queue corresponds to the targetted 7020 * interface. 7021 * 7022 * The effect of this is the same as the IPv4 original code, but is 7023 * much cleaner I think. See ip_rput for how that was done. 7024 */ 7025 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7026 BUMP_MIB(ill->ill_ip6_mib, ipv6InMcastPkts); 7027 /* 7028 * XXX TODO Give to mrouted to for multicast forwarding. 7029 */ 7030 ILM_WALKER_HOLD(ill); 7031 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7032 ILM_WALKER_RELE(ill); 7033 if (ilm == NULL) { 7034 if (ip_debug > 3) { 7035 /* ip2dbg */ 7036 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7037 " which is not for us: %s\n", AF_INET6, 7038 &ip6h->ip6_dst); 7039 } 7040 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7041 freemsg(hada_mp); 7042 freemsg(first_mp); 7043 return; 7044 } 7045 if (ip_debug > 3) { 7046 /* ip2dbg */ 7047 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7048 AF_INET6, &ip6h->ip6_dst); 7049 } 7050 rq = ill->ill_rq; 7051 zoneid = GLOBAL_ZONEID; 7052 goto ipv6forus; 7053 } 7054 7055 ipif = ill->ill_ipif; 7056 7057 /* 7058 * If a packet was received on an interface that is a 6to4 tunnel, 7059 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7060 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7061 * the 6to4 prefix of the address configured on the receiving interface. 7062 * Otherwise, the packet was delivered to this interface in error and 7063 * the packet must be dropped. 7064 */ 7065 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7066 7067 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7068 &ip6h->ip6_dst)) { 7069 if (ip_debug > 2) { 7070 /* ip1dbg */ 7071 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7072 "addressed packet which is not for us: " 7073 "%s\n", AF_INET6, &ip6h->ip6_dst); 7074 } 7075 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7076 freemsg(first_mp); 7077 return; 7078 } 7079 } 7080 7081 /* 7082 * Find an ire that matches destination. For link-local addresses 7083 * we have to match the ill. 7084 * TBD for site local addresses. 7085 */ 7086 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7087 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7088 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, 7089 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7090 } else { 7091 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES); 7092 } 7093 if (ire == NULL) { 7094 /* 7095 * No matching IRE found. Mark this packet as having 7096 * originated externally. 7097 */ 7098 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7099 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7100 if (!(ill->ill_flags & ILLF_ROUTER)) 7101 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7102 freemsg(hada_mp); 7103 freemsg(first_mp); 7104 return; 7105 } 7106 if (ip6h->ip6_hops <= 1) { 7107 if (hada_mp != NULL) 7108 goto hada_drop; 7109 icmp_time_exceeded_v6(WR(q), first_mp, 7110 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7111 return; 7112 } 7113 /* 7114 * Per RFC 3513 section 2.5.2, we must not forward packets with 7115 * an unspecified source address. 7116 */ 7117 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7118 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7119 freemsg(hada_mp); 7120 freemsg(first_mp); 7121 return; 7122 } 7123 mp->b_prev = (mblk_t *)(uintptr_t) 7124 ill->ill_phyint->phyint_ifindex; 7125 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7126 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7127 ALL_ZONES); 7128 return; 7129 } 7130 ipif_id = ire->ire_ipif->ipif_seqid; 7131 /* we have a matching IRE */ 7132 if (ire->ire_stq != NULL) { 7133 ill_group_t *ill_group; 7134 ill_group_t *ire_group; 7135 7136 /* 7137 * To be quicker, we may wish not to chase pointers 7138 * (ire->ire_ipif->ipif_ill...) and instead store the 7139 * forwarding policy in the ire. An unfortunate side- 7140 * effect of this would be requiring an ire flush whenever 7141 * the ILLF_ROUTER flag changes. For now, chase pointers 7142 * once and store in the boolean no_forward. 7143 * 7144 * This appears twice to keep it out of the non-forwarding, 7145 * yes-it's-for-us-on-the-right-interface case. 7146 */ 7147 no_forward = ((ill->ill_flags & 7148 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7149 7150 7151 ASSERT(first_mp == mp); 7152 /* 7153 * This ire has a send-to queue - forward the packet. 7154 */ 7155 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7156 freemsg(hada_mp); 7157 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7158 if (no_forward) 7159 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7160 freemsg(mp); 7161 ire_refrele(ire); 7162 return; 7163 } 7164 if (ip6h->ip6_hops <= 1) { 7165 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7166 icmp_time_exceeded_v6(WR(q), mp, 7167 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7168 ire_refrele(ire); 7169 return; 7170 } 7171 /* 7172 * Per RFC 3513 section 2.5.2, we must not forward packets with 7173 * an unspecified source address. 7174 */ 7175 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7176 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7177 freemsg(hada_mp); 7178 freemsg(mp); 7179 ire_refrele(ire); 7180 return; 7181 } 7182 if (pkt_len > ire->ire_max_frag) { 7183 BUMP_MIB(ill->ill_ip6_mib, ipv6InTooBigErrors); 7184 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7185 ll_multicast, B_TRUE); 7186 ire_refrele(ire); 7187 return; 7188 } 7189 7190 /* 7191 * Check to see if we're forwarding the packet to a 7192 * different link from which it came. If so, check the 7193 * source and destination addresses since routers must not 7194 * forward any packets with link-local source or 7195 * destination addresses to other links. Otherwise (if 7196 * we're forwarding onto the same link), conditionally send 7197 * a redirect message. 7198 */ 7199 ill_group = ill->ill_group; 7200 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7201 if (ire->ire_rfq != q && (ill_group == NULL || 7202 ill_group != ire_group)) { 7203 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7204 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7205 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7206 freemsg(mp); 7207 ire_refrele(ire); 7208 return; 7209 } 7210 /* TBD add site-local check at site boundary? */ 7211 } else if (ipv6_send_redirects) { 7212 in6_addr_t *v6targ; 7213 in6_addr_t gw_addr_v6; 7214 ire_t *src_ire_v6 = NULL; 7215 7216 /* 7217 * Don't send a redirect when forwarding a source 7218 * routed packet. 7219 */ 7220 if (ip_source_routed_v6(ip6h, mp)) 7221 goto forward; 7222 7223 mutex_enter(&ire->ire_lock); 7224 gw_addr_v6 = ire->ire_gateway_addr_v6; 7225 mutex_exit(&ire->ire_lock); 7226 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7227 v6targ = &gw_addr_v6; 7228 /* 7229 * We won't send redirects to a router 7230 * that doesn't have a link local 7231 * address, but will forward. 7232 */ 7233 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7234 BUMP_MIB(ill->ill_ip6_mib, 7235 ipv6InAddrErrors); 7236 goto forward; 7237 } 7238 } else { 7239 v6targ = &ip6h->ip6_dst; 7240 } 7241 7242 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7243 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7244 ALL_ZONES, 0, MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7245 7246 if (src_ire_v6 != NULL) { 7247 /* 7248 * The source is directly connected. 7249 */ 7250 mp1 = copymsg(mp); 7251 if (mp1 != NULL) { 7252 icmp_send_redirect_v6(WR(q), 7253 mp1, v6targ, &ip6h->ip6_dst, 7254 ill, B_FALSE); 7255 } 7256 ire_refrele(src_ire_v6); 7257 } 7258 } 7259 7260 forward: 7261 /* Hoplimit verified above */ 7262 ip6h->ip6_hops--; 7263 UPDATE_IB_PKT_COUNT(ire); 7264 ire->ire_last_used_time = lbolt; 7265 BUMP_MIB(ill->ill_ip6_mib, ipv6OutForwDatagrams); 7266 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7267 IRE_REFRELE(ire); 7268 return; 7269 } 7270 rq = ire->ire_rfq; 7271 7272 /* 7273 * Need to put on correct queue for reassembly to find it. 7274 * No need to use put() since reassembly has its own locks. 7275 * Note: multicast packets and packets destined to addresses 7276 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7277 * the arriving ill. 7278 */ 7279 if (rq != q) { 7280 boolean_t check_multi = B_TRUE; 7281 ill_group_t *ill_group = NULL; 7282 ill_group_t *ire_group = NULL; 7283 ill_t *ire_ill = NULL; 7284 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7285 7286 /* 7287 * To be quicker, we may wish not to chase pointers 7288 * (ire->ire_ipif->ipif_ill...) and instead store the 7289 * forwarding policy in the ire. An unfortunate side- 7290 * effect of this would be requiring an ire flush whenever 7291 * the ILLF_ROUTER flag changes. For now, chase pointers 7292 * once and store in the boolean no_forward. 7293 */ 7294 no_forward = ((ill->ill_flags & 7295 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7296 7297 ill_group = ill->ill_group; 7298 if (rq != NULL) { 7299 ire_ill = (ill_t *)(rq->q_ptr); 7300 ire_group = ire_ill->ill_group; 7301 } 7302 7303 /* 7304 * If it's part of the same IPMP group, or if it's a legal 7305 * address on the 'usesrc' interface, then bypass strict 7306 * checks. 7307 */ 7308 if (ill_group != NULL && ill_group == ire_group) { 7309 check_multi = B_FALSE; 7310 } else if (ill_ifindex != 0 && ire_ill != NULL && 7311 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7312 check_multi = B_FALSE; 7313 } 7314 7315 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7316 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7317 /* 7318 * This packet came in on an interface other than the 7319 * one associated with the destination address 7320 * and we are strict about matches. 7321 * 7322 * As long as the ills belong to the same group, 7323 * we don't consider them to arriving on the wrong 7324 * interface. Thus, when the switch is doing inbound 7325 * load spreading, we won't drop packets when we 7326 * are doing strict multihoming checks. 7327 */ 7328 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7329 freemsg(hada_mp); 7330 freemsg(first_mp); 7331 ire_refrele(ire); 7332 return; 7333 } 7334 7335 if (rq != NULL) 7336 q = rq; 7337 7338 ill = (ill_t *)q->q_ptr; 7339 ASSERT(ill); 7340 } 7341 7342 zoneid = ire->ire_zoneid; 7343 UPDATE_IB_PKT_COUNT(ire); 7344 ire->ire_last_used_time = lbolt; 7345 /* Don't use the ire after this point. */ 7346 ire_refrele(ire); 7347 ipv6forus: 7348 /* 7349 * Looks like this packet is for us one way or another. 7350 * This is where we'll process destination headers etc. 7351 */ 7352 for (; ; ) { 7353 switch (nexthdr) { 7354 case IPPROTO_TCP: { 7355 uint16_t *up; 7356 uint32_t sum; 7357 int offset; 7358 7359 hdr_len = pkt_len - remlen; 7360 7361 if (hada_mp != NULL) { 7362 ip0dbg(("tcp hada drop\n")); 7363 goto hada_drop; 7364 } 7365 7366 7367 /* TCP needs all of the TCP header */ 7368 if (remlen < TCP_MIN_HEADER_LENGTH) 7369 goto pkt_too_short; 7370 if (mp->b_cont != NULL && 7371 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7372 if (!pullupmsg(mp, 7373 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7374 BUMP_MIB(ill->ill_ip6_mib, 7375 ipv6InDiscards); 7376 freemsg(first_mp); 7377 return; 7378 } 7379 hck_flags = 0; 7380 ip6h = (ip6_t *)mp->b_rptr; 7381 whereptr = (uint8_t *)ip6h + hdr_len; 7382 } 7383 /* 7384 * Extract the offset field from the TCP header. 7385 */ 7386 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7387 if (offset != 5) { 7388 if (offset < 5) { 7389 ip1dbg(("ip_rput_data_v6: short " 7390 "TCP data offset")); 7391 BUMP_MIB(ill->ill_ip6_mib, 7392 ipv6InDiscards); 7393 freemsg(first_mp); 7394 return; 7395 } 7396 /* 7397 * There must be TCP options. 7398 * Make sure we can grab them. 7399 */ 7400 offset <<= 2; 7401 if (remlen < offset) 7402 goto pkt_too_short; 7403 if (mp->b_cont != NULL && 7404 whereptr + offset > mp->b_wptr) { 7405 if (!pullupmsg(mp, 7406 hdr_len + offset)) { 7407 BUMP_MIB(ill->ill_ip6_mib, 7408 ipv6InDiscards); 7409 freemsg(first_mp); 7410 return; 7411 } 7412 hck_flags = 0; 7413 ip6h = (ip6_t *)mp->b_rptr; 7414 whereptr = (uint8_t *)ip6h + hdr_len; 7415 } 7416 } 7417 7418 up = (uint16_t *)&ip6h->ip6_src; 7419 /* 7420 * TCP checksum calculation. First sum up the 7421 * pseudo-header fields: 7422 * - Source IPv6 address 7423 * - Destination IPv6 address 7424 * - TCP payload length 7425 * - TCP protocol ID 7426 */ 7427 sum = htons(IPPROTO_TCP + remlen) + 7428 up[0] + up[1] + up[2] + up[3] + 7429 up[4] + up[5] + up[6] + up[7] + 7430 up[8] + up[9] + up[10] + up[11] + 7431 up[12] + up[13] + up[14] + up[15]; 7432 7433 /* Fold initial sum */ 7434 sum = (sum & 0xffff) + (sum >> 16); 7435 7436 mp1 = mp->b_cont; 7437 7438 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7439 IP6_STAT(ip6_in_sw_cksum); 7440 7441 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7442 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7443 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7444 mp, mp1, cksum_err); 7445 7446 if (cksum_err) { 7447 BUMP_MIB(&ip_mib, tcpInErrs); 7448 7449 if (hck_flags & HCK_FULLCKSUM) 7450 IP6_STAT(ip6_tcp_in_full_hw_cksum_err); 7451 else if (hck_flags & HCK_PARTIALCKSUM) 7452 IP6_STAT(ip6_tcp_in_part_hw_cksum_err); 7453 else 7454 IP6_STAT(ip6_tcp_in_sw_cksum_err); 7455 7456 freemsg(first_mp); 7457 return; 7458 } 7459 tcp_fanout: 7460 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7461 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7462 IP_FF_IP6INFO), hdr_len, mctl_present, zoneid); 7463 return; 7464 } 7465 case IPPROTO_SCTP: 7466 { 7467 sctp_hdr_t *sctph; 7468 uint32_t calcsum, pktsum; 7469 uint_t hdr_len = pkt_len - remlen; 7470 7471 /* SCTP needs all of the SCTP header */ 7472 if (remlen < sizeof (*sctph)) { 7473 goto pkt_too_short; 7474 } 7475 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7476 ASSERT(mp->b_cont != NULL); 7477 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7478 BUMP_MIB(ill->ill_ip6_mib, 7479 ipv6InDiscards); 7480 freemsg(mp); 7481 return; 7482 } 7483 ip6h = (ip6_t *)mp->b_rptr; 7484 whereptr = (uint8_t *)ip6h + hdr_len; 7485 } 7486 7487 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7488 /* checksum */ 7489 pktsum = sctph->sh_chksum; 7490 sctph->sh_chksum = 0; 7491 calcsum = sctp_cksum(mp, hdr_len); 7492 if (calcsum != pktsum) { 7493 BUMP_MIB(&sctp_mib, sctpChecksumError); 7494 freemsg(mp); 7495 return; 7496 } 7497 sctph->sh_chksum = pktsum; 7498 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7499 if ((connp = sctp_find_conn(&ip6h->ip6_src, 7500 &ip6h->ip6_dst, ports, ipif_id, zoneid)) == NULL) { 7501 ip_fanout_sctp_raw(first_mp, ill, 7502 (ipha_t *)ip6h, B_FALSE, ports, 7503 mctl_present, 7504 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), 7505 B_TRUE, ipif_id, zoneid); 7506 return; 7507 } 7508 BUMP_MIB(&ip_mib, ipInDelivers); 7509 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7510 B_FALSE, mctl_present); 7511 return; 7512 } 7513 case IPPROTO_UDP: { 7514 uint16_t *up; 7515 uint32_t sum; 7516 7517 hdr_len = pkt_len - remlen; 7518 7519 if (hada_mp != NULL) { 7520 ip0dbg(("udp hada drop\n")); 7521 goto hada_drop; 7522 } 7523 7524 /* Verify that at least the ports are present */ 7525 if (remlen < UDPH_SIZE) 7526 goto pkt_too_short; 7527 if (mp->b_cont != NULL && 7528 whereptr + UDPH_SIZE > mp->b_wptr) { 7529 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7530 BUMP_MIB(ill->ill_ip6_mib, 7531 ipv6InDiscards); 7532 freemsg(first_mp); 7533 return; 7534 } 7535 hck_flags = 0; 7536 ip6h = (ip6_t *)mp->b_rptr; 7537 whereptr = (uint8_t *)ip6h + hdr_len; 7538 } 7539 7540 /* 7541 * Before going through the regular checksum 7542 * calculation, make sure the received checksum 7543 * is non-zero. RFC 2460 says, a 0x0000 checksum 7544 * in a UDP packet (within IPv6 packet) is invalid 7545 * and should be replaced by 0xffff. This makes 7546 * sense as regular checksum calculation will 7547 * pass for both the cases i.e. 0x0000 and 0xffff. 7548 * Removing one of the case makes error detection 7549 * stronger. 7550 */ 7551 7552 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7553 /* 0x0000 checksum is invalid */ 7554 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7555 "checksum value 0x0000\n")); 7556 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7557 freemsg(first_mp); 7558 return; 7559 } 7560 7561 up = (uint16_t *)&ip6h->ip6_src; 7562 7563 /* 7564 * UDP checksum calculation. First sum up the 7565 * pseudo-header fields: 7566 * - Source IPv6 address 7567 * - Destination IPv6 address 7568 * - UDP payload length 7569 * - UDP protocol ID 7570 */ 7571 7572 sum = htons(IPPROTO_UDP + remlen) + 7573 up[0] + up[1] + up[2] + up[3] + 7574 up[4] + up[5] + up[6] + up[7] + 7575 up[8] + up[9] + up[10] + up[11] + 7576 up[12] + up[13] + up[14] + up[15]; 7577 7578 /* Fold initial sum */ 7579 sum = (sum & 0xffff) + (sum >> 16); 7580 7581 if (reass_hck_flags != 0) { 7582 hck_flags = reass_hck_flags; 7583 7584 IP_CKSUM_RECV_REASS(hck_flags, 7585 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7586 sum, reass_sum, cksum_err); 7587 } else { 7588 mp1 = mp->b_cont; 7589 7590 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7591 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7592 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7593 mp, mp1, cksum_err); 7594 } 7595 7596 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7597 IP6_STAT(ip6_in_sw_cksum); 7598 7599 if (cksum_err) { 7600 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7601 7602 if (hck_flags & HCK_FULLCKSUM) 7603 IP6_STAT(ip6_udp_in_full_hw_cksum_err); 7604 else if (hck_flags & HCK_PARTIALCKSUM) 7605 IP6_STAT(ip6_udp_in_part_hw_cksum_err); 7606 else 7607 IP6_STAT(ip6_udp_in_sw_cksum_err); 7608 7609 freemsg(first_mp); 7610 return; 7611 } 7612 goto udp_fanout; 7613 } 7614 case IPPROTO_ICMPV6: { 7615 uint16_t *up; 7616 uint32_t sum; 7617 uint_t hdr_len = pkt_len - remlen; 7618 7619 if (hada_mp != NULL) { 7620 ip0dbg(("icmp hada drop\n")); 7621 goto hada_drop; 7622 } 7623 7624 up = (uint16_t *)&ip6h->ip6_src; 7625 sum = htons(IPPROTO_ICMPV6 + remlen) + 7626 up[0] + up[1] + up[2] + up[3] + 7627 up[4] + up[5] + up[6] + up[7] + 7628 up[8] + up[9] + up[10] + up[11] + 7629 up[12] + up[13] + up[14] + up[15]; 7630 sum = (sum & 0xffff) + (sum >> 16); 7631 sum = IP_CSUM(mp, hdr_len, sum); 7632 if (sum != 0) { 7633 /* IPv6 ICMP checksum failed */ 7634 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7635 "failed %x\n", 7636 sum)); 7637 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7638 BUMP_MIB(ill->ill_icmp6_mib, 7639 ipv6IfIcmpInErrors); 7640 freemsg(first_mp); 7641 return; 7642 } 7643 7644 icmp_fanout: 7645 /* Check variable for testing applications */ 7646 if (ipv6_drop_inbound_icmpv6) { 7647 freemsg(first_mp); 7648 return; 7649 } 7650 /* 7651 * Assume that there is always at least one conn for 7652 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7653 * where there is no conn. 7654 */ 7655 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7656 ASSERT(!(ill->ill_phyint->phyint_flags & 7657 PHYI_LOOPBACK)); 7658 /* 7659 * In the multicast case, applications may have 7660 * joined the group from different zones, so we 7661 * need to deliver the packet to each of them. 7662 * Loop through the multicast memberships 7663 * structures (ilm) on the receive ill and send 7664 * a copy of the packet up each matching one. 7665 */ 7666 ILM_WALKER_HOLD(ill); 7667 for (ilm = ill->ill_ilm; ilm != NULL; 7668 ilm = ilm->ilm_next) { 7669 if (ilm->ilm_flags & ILM_DELETED) 7670 continue; 7671 if (!IN6_ARE_ADDR_EQUAL( 7672 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7673 continue; 7674 if (!ipif_lookup_zoneid(ill, 7675 ilm->ilm_zoneid, IPIF_UP, NULL)) 7676 continue; 7677 7678 first_mp1 = ip_copymsg(first_mp); 7679 if (first_mp1 == NULL) 7680 continue; 7681 icmp_inbound_v6(q, first_mp1, ill, 7682 hdr_len, mctl_present, 0, 7683 ilm->ilm_zoneid); 7684 } 7685 ILM_WALKER_RELE(ill); 7686 } else { 7687 first_mp1 = ip_copymsg(first_mp); 7688 if (first_mp1 != NULL) 7689 icmp_inbound_v6(q, first_mp1, ill, 7690 hdr_len, mctl_present, 0, zoneid); 7691 } 7692 } 7693 /* FALLTHRU */ 7694 default: { 7695 /* 7696 * Handle protocols with which IPv6 is less intimate. 7697 */ 7698 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IP6INFO; 7699 7700 if (hada_mp != NULL) { 7701 ip0dbg(("default hada drop\n")); 7702 goto hada_drop; 7703 } 7704 7705 /* 7706 * Enable sending ICMP for "Unknown" nexthdr 7707 * case. i.e. where we did not FALLTHRU from 7708 * IPPROTO_ICMPV6 processing case above. 7709 * If we did FALLTHRU, then the packet has already been 7710 * processed for IPPF, don't process it again in 7711 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7712 * flags 7713 */ 7714 if (nexthdr != IPPROTO_ICMPV6) 7715 proto_flags |= IP_FF_SEND_ICMP; 7716 else 7717 proto_flags |= IP6_NO_IPPOLICY; 7718 7719 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7720 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7721 mctl_present, zoneid); 7722 return; 7723 } 7724 7725 case IPPROTO_DSTOPTS: { 7726 uint_t ehdrlen; 7727 uint8_t *optptr; 7728 ip6_dest_t *desthdr; 7729 7730 /* Check if AH is present. */ 7731 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7732 ire, hada_mp, zoneid)) { 7733 ip0dbg(("dst early hada drop\n")); 7734 return; 7735 } 7736 7737 /* 7738 * Reinitialize pointers, as ipsec_early_ah_v6() does 7739 * complete pullups. We don't have to do more pullups 7740 * as a result. 7741 */ 7742 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7743 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7744 ip6h = (ip6_t *)mp->b_rptr; 7745 7746 if (remlen < MIN_EHDR_LEN) 7747 goto pkt_too_short; 7748 7749 desthdr = (ip6_dest_t *)whereptr; 7750 nexthdr = desthdr->ip6d_nxt; 7751 prev_nexthdr_offset = (uint_t)(whereptr - 7752 (uint8_t *)ip6h); 7753 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7754 if (remlen < ehdrlen) 7755 goto pkt_too_short; 7756 optptr = whereptr + 2; 7757 /* 7758 * Note: XXX This code does not seem to make 7759 * distinction between Destination Options Header 7760 * being before/after Routing Header which can 7761 * happen if we are at the end of source route. 7762 * This may become significant in future. 7763 * (No real significant Destination Options are 7764 * defined/implemented yet ). 7765 */ 7766 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7767 ehdrlen - 2, IPPROTO_DSTOPTS)) { 7768 case -1: 7769 /* 7770 * Packet has been consumed and any needed 7771 * ICMP errors sent. 7772 */ 7773 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7774 freemsg(hada_mp); 7775 return; 7776 case 0: 7777 /* No action needed continue */ 7778 break; 7779 case 1: 7780 /* 7781 * Unnexpected return value 7782 * (Router alert is a Hop-by-Hop option) 7783 */ 7784 #ifdef DEBUG 7785 panic("ip_rput_data_v6: router " 7786 "alert hbh opt indication in dest opt"); 7787 /*NOTREACHED*/ 7788 #else 7789 freemsg(hada_mp); 7790 freemsg(first_mp); 7791 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7792 return; 7793 #endif 7794 } 7795 used = ehdrlen; 7796 break; 7797 } 7798 case IPPROTO_FRAGMENT: { 7799 ip6_frag_t *fraghdr; 7800 size_t no_frag_hdr_len; 7801 7802 if (hada_mp != NULL) { 7803 ip0dbg(("frag hada drop\n")); 7804 goto hada_drop; 7805 } 7806 7807 ASSERT(first_mp == mp); 7808 if (remlen < sizeof (ip6_frag_t)) 7809 goto pkt_too_short; 7810 7811 if (mp->b_cont != NULL && 7812 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 7813 if (!pullupmsg(mp, 7814 pkt_len - remlen + sizeof (ip6_frag_t))) { 7815 BUMP_MIB(ill->ill_ip6_mib, 7816 ipv6InDiscards); 7817 freemsg(mp); 7818 return; 7819 } 7820 hck_flags = 0; 7821 ip6h = (ip6_t *)mp->b_rptr; 7822 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7823 } 7824 7825 fraghdr = (ip6_frag_t *)whereptr; 7826 used = (uint_t)sizeof (ip6_frag_t); 7827 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmReqds); 7828 7829 /* 7830 * Invoke the CGTP (multirouting) filtering module to 7831 * process the incoming packet. Packets identified as 7832 * duplicates must be discarded. Filtering is active 7833 * only if the the ip_cgtp_filter ndd variable is 7834 * non-zero. 7835 */ 7836 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 7837 int cgtp_flt_pkt = 7838 ip_cgtp_filter_ops->cfo_filter_v6( 7839 inill->ill_rq, ip6h, fraghdr); 7840 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 7841 freemsg(mp); 7842 return; 7843 } 7844 } 7845 7846 /* Restore the flags */ 7847 DB_CKSUMFLAGS(mp) = hck_flags; 7848 7849 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 7850 remlen - used, &prev_nexthdr_offset, 7851 &reass_sum, &reass_hck_flags); 7852 if (mp == NULL) { 7853 /* Reassembly is still pending */ 7854 return; 7855 } 7856 /* The first mblk are the headers before the frag hdr */ 7857 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmOKs); 7858 7859 first_mp = mp; /* mp has most likely changed! */ 7860 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 7861 ip6h = (ip6_t *)mp->b_rptr; 7862 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 7863 whereptr = mp->b_rptr + no_frag_hdr_len; 7864 remlen = ntohs(ip6h->ip6_plen) + 7865 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 7866 pkt_len = msgdsize(mp); 7867 used = 0; 7868 break; 7869 } 7870 case IPPROTO_HOPOPTS: 7871 if (hada_mp != NULL) { 7872 ip0dbg(("hop hada drop\n")); 7873 goto hada_drop; 7874 } 7875 /* 7876 * Illegal header sequence. 7877 * (Hop-by-hop headers are processed above 7878 * and required to immediately follow IPv6 header) 7879 */ 7880 icmp_param_problem_v6(WR(q), first_mp, 7881 ICMP6_PARAMPROB_NEXTHEADER, 7882 prev_nexthdr_offset, 7883 B_FALSE, B_FALSE); 7884 return; 7885 7886 case IPPROTO_ROUTING: { 7887 uint_t ehdrlen; 7888 ip6_rthdr_t *rthdr; 7889 7890 /* Check if AH is present. */ 7891 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7892 ire, hada_mp, zoneid)) { 7893 ip0dbg(("routing hada drop\n")); 7894 return; 7895 } 7896 7897 /* 7898 * Reinitialize pointers, as ipsec_early_ah_v6() does 7899 * complete pullups. We don't have to do more pullups 7900 * as a result. 7901 */ 7902 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7903 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7904 ip6h = (ip6_t *)mp->b_rptr; 7905 7906 if (remlen < MIN_EHDR_LEN) 7907 goto pkt_too_short; 7908 rthdr = (ip6_rthdr_t *)whereptr; 7909 nexthdr = rthdr->ip6r_nxt; 7910 prev_nexthdr_offset = (uint_t)(whereptr - 7911 (uint8_t *)ip6h); 7912 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7913 if (remlen < ehdrlen) 7914 goto pkt_too_short; 7915 if (rthdr->ip6r_segleft != 0) { 7916 /* Not end of source route */ 7917 if (ll_multicast) { 7918 BUMP_MIB(ill->ill_ip6_mib, 7919 ipv6ForwProhibits); 7920 freemsg(hada_mp); 7921 freemsg(mp); 7922 return; 7923 } 7924 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 7925 flags, hada_mp); 7926 return; 7927 } 7928 used = ehdrlen; 7929 break; 7930 } 7931 case IPPROTO_AH: 7932 case IPPROTO_ESP: { 7933 /* 7934 * Fast path for AH/ESP. If this is the first time 7935 * we are sending a datagram to AH/ESP, allocate 7936 * a IPSEC_IN message and prepend it. Otherwise, 7937 * just fanout. 7938 */ 7939 7940 ipsec_in_t *ii; 7941 int ipsec_rc; 7942 7943 if (!mctl_present) { 7944 ASSERT(first_mp == mp); 7945 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 7946 NULL) { 7947 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 7948 "allocation failure.\n")); 7949 BUMP_MIB(ill->ill_ip6_mib, 7950 ipv6InDiscards); 7951 freemsg(mp); 7952 return; 7953 } 7954 /* 7955 * Store the ill_index so that when we come back 7956 * from IPSEC we ride on the same queue. 7957 */ 7958 ii = (ipsec_in_t *)first_mp->b_rptr; 7959 ii->ipsec_in_ill_index = 7960 ill->ill_phyint->phyint_ifindex; 7961 ii->ipsec_in_rill_index = 7962 ii->ipsec_in_ill_index; 7963 first_mp->b_cont = mp; 7964 /* 7965 * Cache hardware acceleration info. 7966 */ 7967 if (hada_mp != NULL) { 7968 IPSECHW_DEBUG(IPSECHW_PKT, 7969 ("ip_rput_data_v6: " 7970 "caching data attr.\n")); 7971 ii->ipsec_in_accelerated = B_TRUE; 7972 ii->ipsec_in_da = hada_mp; 7973 hada_mp = NULL; 7974 } 7975 } else { 7976 ii = (ipsec_in_t *)first_mp->b_rptr; 7977 } 7978 7979 if (!ipsec_loaded()) { 7980 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 7981 ire->ire_zoneid); 7982 return; 7983 } 7984 7985 /* select inbound SA and have IPsec process the pkt */ 7986 if (nexthdr == IPPROTO_ESP) { 7987 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 7988 if (esph == NULL) 7989 return; 7990 ASSERT(ii->ipsec_in_esp_sa != NULL); 7991 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 7992 NULL); 7993 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 7994 first_mp, esph); 7995 } else { 7996 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 7997 if (ah == NULL) 7998 return; 7999 ASSERT(ii->ipsec_in_ah_sa != NULL); 8000 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8001 NULL); 8002 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8003 first_mp, ah); 8004 } 8005 8006 switch (ipsec_rc) { 8007 case IPSEC_STATUS_SUCCESS: 8008 break; 8009 case IPSEC_STATUS_FAILED: 8010 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8011 /* FALLTHRU */ 8012 case IPSEC_STATUS_PENDING: 8013 return; 8014 } 8015 /* we're done with IPsec processing, send it up */ 8016 ip_fanout_proto_again(first_mp, ill, inill, ire); 8017 return; 8018 } 8019 case IPPROTO_NONE: 8020 /* All processing is done. Count as "delivered". */ 8021 freemsg(hada_mp); 8022 freemsg(first_mp); 8023 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8024 return; 8025 } 8026 whereptr += used; 8027 ASSERT(remlen >= used); 8028 remlen -= used; 8029 } 8030 /* NOTREACHED */ 8031 8032 pkt_too_short: 8033 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8034 ip6_len, pkt_len, remlen)); 8035 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 8036 freemsg(hada_mp); 8037 freemsg(first_mp); 8038 return; 8039 udp_fanout: 8040 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8041 connp = NULL; 8042 } else { 8043 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8044 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8045 CONN_DEC_REF(connp); 8046 connp = NULL; 8047 } 8048 } 8049 8050 if (connp == NULL) { 8051 uint32_t ports; 8052 8053 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8054 UDP_PORTS_OFFSET); 8055 IP6_STAT(ip6_udp_slow_path); 8056 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8057 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), mctl_present, 8058 zoneid); 8059 return; 8060 } 8061 8062 if (CONN_UDP_FLOWCTLD(connp)) { 8063 freemsg(first_mp); 8064 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 8065 CONN_DEC_REF(connp); 8066 return; 8067 } 8068 8069 /* Initiate IPPF processing */ 8070 if (IP6_IN_IPP(flags)) { 8071 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8072 if (mp == NULL) { 8073 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8074 CONN_DEC_REF(connp); 8075 return; 8076 } 8077 } 8078 8079 if (connp->conn_ipv6_recvpktinfo || 8080 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8081 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8082 if (mp == NULL) { 8083 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8084 CONN_DEC_REF(connp); 8085 return; 8086 } 8087 } 8088 8089 IP6_STAT(ip6_udp_fast_path); 8090 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 8091 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8092 8093 /* Send it upstream */ 8094 CONN_UDP_RECV(connp, mp); 8095 8096 CONN_DEC_REF(connp); 8097 freemsg(hada_mp); 8098 return; 8099 8100 hada_drop: 8101 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8102 /* IPsec kstats: bump counter here */ 8103 freemsg(hada_mp); 8104 freemsg(first_mp); 8105 } 8106 8107 /* 8108 * Reassemble fragment. 8109 * When it returns a completed message the first mblk will only contain 8110 * the headers prior to the fragment header. 8111 * 8112 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8113 * of the preceding header. This is needed to patch the previous header's 8114 * nexthdr field when reassembly completes. 8115 */ 8116 static mblk_t * 8117 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8118 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8119 uint32_t *cksum_val, uint16_t *cksum_flags) 8120 { 8121 ill_t *ill = (ill_t *)q->q_ptr; 8122 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8123 uint16_t offset; 8124 boolean_t more_frags; 8125 uint8_t nexthdr = fraghdr->ip6f_nxt; 8126 in6_addr_t *v6dst_ptr; 8127 in6_addr_t *v6src_ptr; 8128 uint_t end; 8129 uint_t hdr_length; 8130 size_t count; 8131 ipf_t *ipf; 8132 ipf_t **ipfp; 8133 ipfb_t *ipfb; 8134 mblk_t *mp1; 8135 uint8_t ecn_info = 0; 8136 size_t msg_len; 8137 mblk_t *tail_mp; 8138 mblk_t *t_mp; 8139 boolean_t pruned = B_FALSE; 8140 uint32_t sum_val; 8141 uint16_t sum_flags; 8142 8143 8144 if (cksum_val != NULL) 8145 *cksum_val = 0; 8146 if (cksum_flags != NULL) 8147 *cksum_flags = 0; 8148 8149 /* 8150 * We utilize hardware computed checksum info only for UDP since 8151 * IP fragmentation is a normal occurence for the protocol. In 8152 * addition, checksum offload support for IP fragments carrying 8153 * UDP payload is commonly implemented across network adapters. 8154 */ 8155 ASSERT(ill != NULL); 8156 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8157 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8158 mblk_t *mp1 = mp->b_cont; 8159 int32_t len; 8160 8161 /* Record checksum information from the packet */ 8162 sum_val = (uint32_t)DB_CKSUM16(mp); 8163 sum_flags = DB_CKSUMFLAGS(mp); 8164 8165 /* fragmented payload offset from beginning of mblk */ 8166 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8167 8168 if ((sum_flags & HCK_PARTIALCKSUM) && 8169 (mp1 == NULL || mp1->b_cont == NULL) && 8170 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8171 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8172 uint32_t adj; 8173 /* 8174 * Partial checksum has been calculated by hardware 8175 * and attached to the packet; in addition, any 8176 * prepended extraneous data is even byte aligned. 8177 * If any such data exists, we adjust the checksum; 8178 * this would also handle any postpended data. 8179 */ 8180 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8181 mp, mp1, len, adj); 8182 8183 /* One's complement subtract extraneous checksum */ 8184 if (adj >= sum_val) 8185 sum_val = ~(adj - sum_val) & 0xFFFF; 8186 else 8187 sum_val -= adj; 8188 } 8189 } else { 8190 sum_val = 0; 8191 sum_flags = 0; 8192 } 8193 8194 /* Clear hardware checksumming flag */ 8195 DB_CKSUMFLAGS(mp) = 0; 8196 8197 /* 8198 * Note: Fragment offset in header is in 8-octet units. 8199 * Clearing least significant 3 bits not only extracts 8200 * it but also gets it in units of octets. 8201 */ 8202 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8203 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8204 8205 /* 8206 * Is the more frags flag on and the payload length not a multiple 8207 * of eight? 8208 */ 8209 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8210 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8211 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8212 (uint32_t)((char *)&ip6h->ip6_plen - 8213 (char *)ip6h), B_FALSE, B_FALSE); 8214 return (NULL); 8215 } 8216 8217 v6src_ptr = &ip6h->ip6_src; 8218 v6dst_ptr = &ip6h->ip6_dst; 8219 end = remlen; 8220 8221 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8222 end += offset; 8223 8224 /* 8225 * Would fragment cause reassembled packet to have a payload length 8226 * greater than IP_MAXPACKET - the max payload size? 8227 */ 8228 if (end > IP_MAXPACKET) { 8229 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8230 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8231 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8232 (char *)ip6h), B_FALSE, B_FALSE); 8233 return (NULL); 8234 } 8235 8236 /* 8237 * This packet just has one fragment. Reassembly not 8238 * needed. 8239 */ 8240 if (!more_frags && offset == 0) { 8241 goto reass_done; 8242 } 8243 8244 /* 8245 * Drop the fragmented as early as possible, if 8246 * we don't have resource(s) to re-assemble. 8247 */ 8248 if (ip_reass_queue_bytes == 0) { 8249 freemsg(mp); 8250 return (NULL); 8251 } 8252 8253 /* Record the ECN field info. */ 8254 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8255 /* 8256 * If this is not the first fragment, dump the unfragmentable 8257 * portion of the packet. 8258 */ 8259 if (offset) 8260 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8261 8262 /* 8263 * Fragmentation reassembly. Each ILL has a hash table for 8264 * queueing packets undergoing reassembly for all IPIFs 8265 * associated with the ILL. The hash is based on the packet 8266 * IP ident field. The ILL frag hash table was allocated 8267 * as a timer block at the time the ILL was created. Whenever 8268 * there is anything on the reassembly queue, the timer will 8269 * be running. 8270 */ 8271 msg_len = MBLKSIZE(mp); 8272 tail_mp = mp; 8273 while (tail_mp->b_cont != NULL) { 8274 tail_mp = tail_mp->b_cont; 8275 msg_len += MBLKSIZE(tail_mp); 8276 } 8277 /* 8278 * If the reassembly list for this ILL will get too big 8279 * prune it. 8280 */ 8281 8282 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8283 ip_reass_queue_bytes) { 8284 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8285 : (ip_reass_queue_bytes - msg_len)); 8286 pruned = B_TRUE; 8287 } 8288 8289 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8290 mutex_enter(&ipfb->ipfb_lock); 8291 8292 ipfp = &ipfb->ipfb_ipf; 8293 /* Try to find an existing fragment queue for this packet. */ 8294 for (;;) { 8295 ipf = ipfp[0]; 8296 if (ipf) { 8297 /* 8298 * It has to match on ident, source address, and 8299 * dest address. 8300 */ 8301 if (ipf->ipf_ident == ident && 8302 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8303 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8304 8305 /* 8306 * If we have received too many 8307 * duplicate fragments for this packet 8308 * free it. 8309 */ 8310 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8311 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8312 freemsg(mp); 8313 mutex_exit(&ipfb->ipfb_lock); 8314 return (NULL); 8315 } 8316 8317 break; 8318 } 8319 ipfp = &ipf->ipf_hash_next; 8320 continue; 8321 } 8322 8323 8324 /* 8325 * If we pruned the list, do we want to store this new 8326 * fragment?. We apply an optimization here based on the 8327 * fact that most fragments will be received in order. 8328 * So if the offset of this incoming fragment is zero, 8329 * it is the first fragment of a new packet. We will 8330 * keep it. Otherwise drop the fragment, as we have 8331 * probably pruned the packet already (since the 8332 * packet cannot be found). 8333 */ 8334 8335 if (pruned && offset != 0) { 8336 mutex_exit(&ipfb->ipfb_lock); 8337 freemsg(mp); 8338 return (NULL); 8339 } 8340 8341 /* New guy. Allocate a frag message. */ 8342 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8343 if (!mp1) { 8344 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8345 freemsg(mp); 8346 partial_reass_done: 8347 mutex_exit(&ipfb->ipfb_lock); 8348 return (NULL); 8349 } 8350 8351 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8352 /* 8353 * Too many fragmented packets in this hash bucket. 8354 * Free the oldest. 8355 */ 8356 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8357 } 8358 8359 mp1->b_cont = mp; 8360 8361 /* Initialize the fragment header. */ 8362 ipf = (ipf_t *)mp1->b_rptr; 8363 ipf->ipf_mp = mp1; 8364 ipf->ipf_ptphn = ipfp; 8365 ipfp[0] = ipf; 8366 ipf->ipf_hash_next = NULL; 8367 ipf->ipf_ident = ident; 8368 ipf->ipf_v6src = *v6src_ptr; 8369 ipf->ipf_v6dst = *v6dst_ptr; 8370 /* Record reassembly start time. */ 8371 ipf->ipf_timestamp = gethrestime_sec(); 8372 /* Record ipf generation and account for frag header */ 8373 ipf->ipf_gen = ill->ill_ipf_gen++; 8374 ipf->ipf_count = MBLKSIZE(mp1); 8375 ipf->ipf_protocol = nexthdr; 8376 ipf->ipf_nf_hdr_len = 0; 8377 ipf->ipf_prev_nexthdr_offset = 0; 8378 ipf->ipf_last_frag_seen = B_FALSE; 8379 ipf->ipf_ecn = ecn_info; 8380 ipf->ipf_num_dups = 0; 8381 ipfb->ipfb_frag_pkts++; 8382 ipf->ipf_checksum = 0; 8383 ipf->ipf_checksum_flags = 0; 8384 8385 /* Store checksum value in fragment header */ 8386 if (sum_flags != 0) { 8387 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8388 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8389 ipf->ipf_checksum = sum_val; 8390 ipf->ipf_checksum_flags = sum_flags; 8391 } 8392 8393 /* 8394 * We handle reassembly two ways. In the easy case, 8395 * where all the fragments show up in order, we do 8396 * minimal bookkeeping, and just clip new pieces on 8397 * the end. If we ever see a hole, then we go off 8398 * to ip_reassemble which has to mark the pieces and 8399 * keep track of the number of holes, etc. Obviously, 8400 * the point of having both mechanisms is so we can 8401 * handle the easy case as efficiently as possible. 8402 */ 8403 if (offset == 0) { 8404 /* Easy case, in-order reassembly so far. */ 8405 /* Update the byte count */ 8406 ipf->ipf_count += msg_len; 8407 ipf->ipf_tail_mp = tail_mp; 8408 /* 8409 * Keep track of next expected offset in 8410 * ipf_end. 8411 */ 8412 ipf->ipf_end = end; 8413 ipf->ipf_nf_hdr_len = hdr_length; 8414 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8415 } else { 8416 /* Hard case, hole at the beginning. */ 8417 ipf->ipf_tail_mp = NULL; 8418 /* 8419 * ipf_end == 0 means that we have given up 8420 * on easy reassembly. 8421 */ 8422 ipf->ipf_end = 0; 8423 8424 /* Forget checksum offload from now on */ 8425 ipf->ipf_checksum_flags = 0; 8426 8427 /* 8428 * ipf_hole_cnt is set by ip_reassemble. 8429 * ipf_count is updated by ip_reassemble. 8430 * No need to check for return value here 8431 * as we don't expect reassembly to complete or 8432 * fail for the first fragment itself. 8433 */ 8434 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8435 msg_len); 8436 } 8437 /* Update per ipfb and ill byte counts */ 8438 ipfb->ipfb_count += ipf->ipf_count; 8439 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8440 ill->ill_frag_count += ipf->ipf_count; 8441 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8442 /* If the frag timer wasn't already going, start it. */ 8443 mutex_enter(&ill->ill_lock); 8444 ill_frag_timer_start(ill); 8445 mutex_exit(&ill->ill_lock); 8446 goto partial_reass_done; 8447 } 8448 8449 /* 8450 * If the packet's flag has changed (it could be coming up 8451 * from an interface different than the previous, therefore 8452 * possibly different checksum capability), then forget about 8453 * any stored checksum states. Otherwise add the value to 8454 * the existing one stored in the fragment header. 8455 */ 8456 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8457 sum_val += ipf->ipf_checksum; 8458 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8459 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8460 ipf->ipf_checksum = sum_val; 8461 } else if (ipf->ipf_checksum_flags != 0) { 8462 /* Forget checksum offload from now on */ 8463 ipf->ipf_checksum_flags = 0; 8464 } 8465 8466 /* 8467 * We have a new piece of a datagram which is already being 8468 * reassembled. Update the ECN info if all IP fragments 8469 * are ECN capable. If there is one which is not, clear 8470 * all the info. If there is at least one which has CE 8471 * code point, IP needs to report that up to transport. 8472 */ 8473 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8474 if (ecn_info == IPH_ECN_CE) 8475 ipf->ipf_ecn = IPH_ECN_CE; 8476 } else { 8477 ipf->ipf_ecn = IPH_ECN_NECT; 8478 } 8479 8480 if (offset && ipf->ipf_end == offset) { 8481 /* The new fragment fits at the end */ 8482 ipf->ipf_tail_mp->b_cont = mp; 8483 /* Update the byte count */ 8484 ipf->ipf_count += msg_len; 8485 /* Update per ipfb and ill byte counts */ 8486 ipfb->ipfb_count += msg_len; 8487 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8488 ill->ill_frag_count += msg_len; 8489 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8490 if (more_frags) { 8491 /* More to come. */ 8492 ipf->ipf_end = end; 8493 ipf->ipf_tail_mp = tail_mp; 8494 goto partial_reass_done; 8495 } 8496 } else { 8497 /* 8498 * Go do the hard cases. 8499 * Call ip_reassemble(). 8500 */ 8501 int ret; 8502 8503 if (offset == 0) { 8504 if (ipf->ipf_prev_nexthdr_offset == 0) { 8505 ipf->ipf_nf_hdr_len = hdr_length; 8506 ipf->ipf_prev_nexthdr_offset = 8507 *prev_nexthdr_offset; 8508 } 8509 } 8510 /* Save current byte count */ 8511 count = ipf->ipf_count; 8512 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8513 8514 /* Count of bytes added and subtracted (freeb()ed) */ 8515 count = ipf->ipf_count - count; 8516 if (count) { 8517 /* Update per ipfb and ill byte counts */ 8518 ipfb->ipfb_count += count; 8519 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8520 ill->ill_frag_count += count; 8521 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8522 } 8523 if (ret == IP_REASS_PARTIAL) { 8524 goto partial_reass_done; 8525 } else if (ret == IP_REASS_FAILED) { 8526 /* Reassembly failed. Free up all resources */ 8527 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8528 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8529 IP_REASS_SET_START(t_mp, 0); 8530 IP_REASS_SET_END(t_mp, 0); 8531 } 8532 freemsg(mp); 8533 goto partial_reass_done; 8534 } 8535 8536 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8537 } 8538 /* 8539 * We have completed reassembly. Unhook the frag header from 8540 * the reassembly list. 8541 * 8542 * Grab the unfragmentable header length next header value out 8543 * of the first fragment 8544 */ 8545 ASSERT(ipf->ipf_nf_hdr_len != 0); 8546 hdr_length = ipf->ipf_nf_hdr_len; 8547 8548 /* 8549 * Before we free the frag header, record the ECN info 8550 * to report back to the transport. 8551 */ 8552 ecn_info = ipf->ipf_ecn; 8553 8554 /* 8555 * Store the nextheader field in the header preceding the fragment 8556 * header 8557 */ 8558 nexthdr = ipf->ipf_protocol; 8559 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8560 ipfp = ipf->ipf_ptphn; 8561 8562 /* We need to supply these to caller */ 8563 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8564 sum_val = ipf->ipf_checksum; 8565 else 8566 sum_val = 0; 8567 8568 mp1 = ipf->ipf_mp; 8569 count = ipf->ipf_count; 8570 ipf = ipf->ipf_hash_next; 8571 if (ipf) 8572 ipf->ipf_ptphn = ipfp; 8573 ipfp[0] = ipf; 8574 ill->ill_frag_count -= count; 8575 ASSERT(ipfb->ipfb_count >= count); 8576 ipfb->ipfb_count -= count; 8577 ipfb->ipfb_frag_pkts--; 8578 mutex_exit(&ipfb->ipfb_lock); 8579 /* Ditch the frag header. */ 8580 mp = mp1->b_cont; 8581 freeb(mp1); 8582 8583 /* 8584 * Make sure the packet is good by doing some sanity 8585 * check. If bad we can silentely drop the packet. 8586 */ 8587 reass_done: 8588 if (hdr_length < sizeof (ip6_frag_t)) { 8589 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8590 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8591 freemsg(mp); 8592 return (NULL); 8593 } 8594 8595 /* 8596 * Remove the fragment header from the initial header by 8597 * splitting the mblk into the non-fragmentable header and 8598 * everthing after the fragment extension header. This has the 8599 * side effect of putting all the headers that need destination 8600 * processing into the b_cont block-- on return this fact is 8601 * used in order to avoid having to look at the extensions 8602 * already processed. 8603 * 8604 * Note that this code assumes that the unfragmentable portion 8605 * of the header is in the first mblk and increments 8606 * the read pointer past it. If this assumption is broken 8607 * this code fails badly. 8608 */ 8609 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8610 mblk_t *nmp; 8611 8612 if (!(nmp = dupb(mp))) { 8613 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8614 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8615 freemsg(mp); 8616 return (NULL); 8617 } 8618 nmp->b_cont = mp->b_cont; 8619 mp->b_cont = nmp; 8620 nmp->b_rptr += hdr_length; 8621 } 8622 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8623 8624 ip6h = (ip6_t *)mp->b_rptr; 8625 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8626 8627 /* Restore original IP length in header. */ 8628 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8629 /* Record the ECN info. */ 8630 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8631 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8632 8633 /* Reassembly is successful; return checksum information if needed */ 8634 if (cksum_val != NULL) 8635 *cksum_val = sum_val; 8636 if (cksum_flags != NULL) 8637 *cksum_flags = sum_flags; 8638 8639 return (mp); 8640 } 8641 8642 /* 8643 * Walk through the options to see if there is a routing header. 8644 * If present get the destination which is the last address of 8645 * the option. 8646 */ 8647 in6_addr_t 8648 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8649 { 8650 uint8_t nexthdr; 8651 uint8_t *whereptr; 8652 ip6_hbh_t *hbhhdr; 8653 ip6_dest_t *dsthdr; 8654 ip6_rthdr0_t *rthdr; 8655 ip6_frag_t *fraghdr; 8656 int ehdrlen; 8657 int left; 8658 in6_addr_t *ap, rv; 8659 8660 if (is_fragment != NULL) 8661 *is_fragment = B_FALSE; 8662 8663 rv = ip6h->ip6_dst; 8664 8665 nexthdr = ip6h->ip6_nxt; 8666 whereptr = (uint8_t *)&ip6h[1]; 8667 for (;;) { 8668 8669 ASSERT(nexthdr != IPPROTO_RAW); 8670 switch (nexthdr) { 8671 case IPPROTO_HOPOPTS: 8672 hbhhdr = (ip6_hbh_t *)whereptr; 8673 nexthdr = hbhhdr->ip6h_nxt; 8674 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8675 break; 8676 case IPPROTO_DSTOPTS: 8677 dsthdr = (ip6_dest_t *)whereptr; 8678 nexthdr = dsthdr->ip6d_nxt; 8679 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8680 break; 8681 case IPPROTO_ROUTING: 8682 rthdr = (ip6_rthdr0_t *)whereptr; 8683 nexthdr = rthdr->ip6r0_nxt; 8684 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 8685 8686 left = rthdr->ip6r0_segleft; 8687 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 8688 rv = *(ap + left - 1); 8689 /* 8690 * If the caller doesn't care whether the packet 8691 * is a fragment or not, we can stop here since 8692 * we have our destination. 8693 */ 8694 if (is_fragment == NULL) 8695 goto done; 8696 break; 8697 case IPPROTO_FRAGMENT: 8698 fraghdr = (ip6_frag_t *)whereptr; 8699 nexthdr = fraghdr->ip6f_nxt; 8700 ehdrlen = sizeof (ip6_frag_t); 8701 if (is_fragment != NULL) 8702 *is_fragment = B_TRUE; 8703 goto done; 8704 default : 8705 goto done; 8706 } 8707 whereptr += ehdrlen; 8708 } 8709 8710 done: 8711 return (rv); 8712 } 8713 8714 /* 8715 * ip_source_routed_v6: 8716 * This function is called by redirect code in ip_rput_data_v6 to 8717 * know whether this packet is source routed through this node i.e 8718 * whether this node (router) is part of the journey. This 8719 * function is called under two cases : 8720 * 8721 * case 1 : Routing header was processed by this node and 8722 * ip_process_rthdr replaced ip6_dst with the next hop 8723 * and we are forwarding the packet to the next hop. 8724 * 8725 * case 2 : Routing header was not processed by this node and we 8726 * are just forwarding the packet. 8727 * 8728 * For case (1) we don't want to send redirects. For case(2) we 8729 * want to send redirects. 8730 */ 8731 static boolean_t 8732 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 8733 { 8734 uint8_t nexthdr; 8735 in6_addr_t *addrptr; 8736 ip6_rthdr0_t *rthdr; 8737 uint8_t numaddr; 8738 ip6_hbh_t *hbhhdr; 8739 uint_t ehdrlen; 8740 uint8_t *byteptr; 8741 8742 ip2dbg(("ip_source_routed_v6\n")); 8743 nexthdr = ip6h->ip6_nxt; 8744 ehdrlen = IPV6_HDR_LEN; 8745 8746 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 8747 while (nexthdr == IPPROTO_HOPOPTS || 8748 nexthdr == IPPROTO_DSTOPTS) { 8749 byteptr = (uint8_t *)ip6h + ehdrlen; 8750 /* 8751 * Check if we have already processed 8752 * packets or we are just a forwarding 8753 * router which only pulled up msgs up 8754 * to IPV6HDR and one HBH ext header 8755 */ 8756 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8757 ip2dbg(("ip_source_routed_v6: Extension" 8758 " headers not processed\n")); 8759 return (B_FALSE); 8760 } 8761 hbhhdr = (ip6_hbh_t *)byteptr; 8762 nexthdr = hbhhdr->ip6h_nxt; 8763 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 8764 } 8765 switch (nexthdr) { 8766 case IPPROTO_ROUTING: 8767 byteptr = (uint8_t *)ip6h + ehdrlen; 8768 /* 8769 * If for some reason, we haven't pulled up 8770 * the routing hdr data mblk, then we must 8771 * not have processed it at all. So for sure 8772 * we are not part of the source routed journey. 8773 */ 8774 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8775 ip2dbg(("ip_source_routed_v6: Routing" 8776 " header not processed\n")); 8777 return (B_FALSE); 8778 } 8779 rthdr = (ip6_rthdr0_t *)byteptr; 8780 /* 8781 * Either we are an intermediate router or the 8782 * last hop before destination and we have 8783 * already processed the routing header. 8784 * If segment_left is greater than or equal to zero, 8785 * then we must be the (numaddr - segleft) entry 8786 * of the routing header. Although ip6r0_segleft 8787 * is a unit8_t variable, we still check for zero 8788 * or greater value, if in case the data type 8789 * is changed someday in future. 8790 */ 8791 if (rthdr->ip6r0_segleft > 0 || 8792 rthdr->ip6r0_segleft == 0) { 8793 ire_t *ire = NULL; 8794 8795 numaddr = rthdr->ip6r0_len / 2; 8796 addrptr = (in6_addr_t *)((char *)rthdr + 8797 sizeof (*rthdr)); 8798 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 8799 if (addrptr != NULL) { 8800 ire = ire_ctable_lookup_v6(addrptr, NULL, 8801 IRE_LOCAL, NULL, ALL_ZONES, MATCH_IRE_TYPE); 8802 if (ire != NULL) { 8803 ire_refrele(ire); 8804 return (B_TRUE); 8805 } 8806 ip1dbg(("ip_source_routed_v6: No ire found\n")); 8807 } 8808 } 8809 /* FALLTHRU */ 8810 default: 8811 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 8812 return (B_FALSE); 8813 } 8814 } 8815 8816 /* 8817 * ip_wput_v6 -- Packets sent down from transport modules show up here. 8818 * Assumes that the following set of headers appear in the first 8819 * mblk: 8820 * ip6i_t (if present) CAN also appear as a separate mblk. 8821 * ip6_t 8822 * Any extension headers 8823 * TCP/UDP/SCTP header (if present) 8824 * The routine can handle an ICMPv6 header that is not in the first mblk. 8825 * 8826 * The order to determine the outgoing interface is as follows: 8827 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 8828 * 2. If conn_nofailover_ill is set then use that ill. 8829 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 8830 * 4. If q is an ill queue and (link local or multicast destination) then 8831 * use that ill. 8832 * 5. If IPV6_BOUND_IF has been set use that ill. 8833 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 8834 * look for the best IRE match for the unspecified group to determine 8835 * the ill. 8836 * 7. For unicast: Just do an IRE lookup for the best match. 8837 */ 8838 void 8839 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 8840 { 8841 conn_t *connp = NULL; 8842 queue_t *q = (queue_t *)arg2; 8843 ire_t *ire = NULL; 8844 ire_t *sctp_ire = NULL; 8845 ip6_t *ip6h; 8846 in6_addr_t *v6dstp; 8847 ill_t *ill = NULL; 8848 ipif_t *ipif; 8849 ip6i_t *ip6i; 8850 int cksum_request; /* -1 => normal. */ 8851 /* 1 => Skip TCP/UDP/SCTP checksum */ 8852 /* Otherwise contains insert offset for checksum */ 8853 int unspec_src; 8854 boolean_t do_outrequests; /* Increment OutRequests? */ 8855 mib2_ipv6IfStatsEntry_t *mibptr; 8856 int match_flags = MATCH_IRE_ILL_GROUP; 8857 boolean_t attach_if = B_FALSE; 8858 mblk_t *first_mp; 8859 boolean_t mctl_present; 8860 ipsec_out_t *io; 8861 boolean_t drop_if_delayed = B_FALSE; 8862 boolean_t multirt_need_resolve = B_FALSE; 8863 mblk_t *copy_mp = NULL; 8864 int err; 8865 int ip6i_flags = 0; 8866 zoneid_t zoneid; 8867 ill_t *saved_ill = NULL; 8868 boolean_t conn_lock_held; 8869 boolean_t need_decref = B_FALSE; 8870 8871 /* 8872 * Highest bit in version field is Reachability Confirmation bit 8873 * used by NUD in ip_xmit_v6(). 8874 */ 8875 #ifdef _BIG_ENDIAN 8876 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 8877 #else 8878 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 8879 #endif 8880 8881 /* 8882 * M_CTL comes from 5 places 8883 * 8884 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 8885 * both V4 and V6 datagrams. 8886 * 8887 * 2) AH/ESP sends down M_CTL after doing their job with both 8888 * V4 and V6 datagrams. 8889 * 8890 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 8891 * attached. 8892 * 8893 * 4) Notifications from an external resolver (for XRESOLV ifs) 8894 * 8895 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 8896 * IPsec hardware acceleration support. 8897 * 8898 * We need to handle (1)'s IPv6 case and (3) here. For the 8899 * IPv4 case in (1), and (2), IPSEC processing has already 8900 * started. The code in ip_wput() already knows how to handle 8901 * continuing IPSEC processing (for IPv4 and IPv6). All other 8902 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 8903 * for handling. 8904 */ 8905 first_mp = mp; 8906 mctl_present = B_FALSE; 8907 io = NULL; 8908 8909 /* Multidata transmit? */ 8910 if (DB_TYPE(mp) == M_MULTIDATA) { 8911 /* 8912 * We should never get here, since all Multidata messages 8913 * originating from tcp should have been directed over to 8914 * tcp_multisend() in the first place. 8915 */ 8916 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 8917 freemsg(mp); 8918 return; 8919 } else if (DB_TYPE(mp) == M_CTL) { 8920 uint32_t mctltype = 0; 8921 uint32_t mlen = MBLKL(first_mp); 8922 8923 mp = mp->b_cont; 8924 mctl_present = B_TRUE; 8925 io = (ipsec_out_t *)first_mp->b_rptr; 8926 8927 /* 8928 * Validate this M_CTL message. The only three types of 8929 * M_CTL messages we expect to see in this code path are 8930 * ipsec_out_t or ipsec_in_t structures (allocated as 8931 * ipsec_info_t unions), or ipsec_ctl_t structures. 8932 * The ipsec_out_type and ipsec_in_type overlap in the two 8933 * data structures, and they are either set to IPSEC_OUT 8934 * or IPSEC_IN depending on which data structure it is. 8935 * ipsec_ctl_t is an IPSEC_CTL. 8936 * 8937 * All other M_CTL messages are sent to ip_wput_nondata() 8938 * for handling. 8939 */ 8940 if (mlen >= sizeof (io->ipsec_out_type)) 8941 mctltype = io->ipsec_out_type; 8942 8943 if ((mlen == sizeof (ipsec_ctl_t)) && 8944 (mctltype == IPSEC_CTL)) { 8945 ip_output(Q_TO_CONN(q), first_mp, q, caller); 8946 return; 8947 } 8948 8949 if ((mlen < sizeof (ipsec_info_t)) || 8950 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 8951 mp == NULL) { 8952 ip_wput_nondata(NULL, q, first_mp, NULL); 8953 return; 8954 } 8955 /* NDP callbacks have q_next non-NULL. That's case #3. */ 8956 if (q->q_next == NULL) { 8957 ip6h = (ip6_t *)mp->b_rptr; 8958 /* 8959 * For a freshly-generated TCP dgram that needs IPV6 8960 * processing, don't call ip_wput immediately. We can 8961 * tell this by the ipsec_out_proc_begin. In-progress 8962 * IPSEC_OUT messages have proc_begin set to TRUE, 8963 * and we want to send all IPSEC_IN messages to 8964 * ip_wput() for IPsec processing or finishing. 8965 */ 8966 if (mctltype == IPSEC_IN || 8967 IPVER(ip6h) != IPV6_VERSION || 8968 io->ipsec_out_proc_begin) { 8969 mibptr = &ip6_mib; 8970 goto notv6; 8971 } 8972 } 8973 } else if (DB_TYPE(mp) != M_DATA) { 8974 ip_wput_nondata(NULL, q, mp, NULL); 8975 return; 8976 } 8977 8978 ip6h = (ip6_t *)mp->b_rptr; 8979 8980 if (IPVER(ip6h) != IPV6_VERSION) { 8981 mibptr = &ip6_mib; 8982 goto notv6; 8983 } 8984 8985 if (q->q_next != NULL) { 8986 ill = (ill_t *)q->q_ptr; 8987 /* 8988 * We don't know if this ill will be used for IPv6 8989 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 8990 * ipif_set_values() sets the ill_isv6 flag to true if 8991 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 8992 * just drop the packet. 8993 */ 8994 if (!ill->ill_isv6) { 8995 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 8996 "ILLF_IPV6 was set\n")); 8997 freemsg(first_mp); 8998 return; 8999 } 9000 /* For uniformity do a refhold */ 9001 mutex_enter(&ill->ill_lock); 9002 if (!ILL_CAN_LOOKUP(ill)) { 9003 mutex_exit(&ill->ill_lock); 9004 freemsg(first_mp); 9005 return; 9006 } 9007 ill_refhold_locked(ill); 9008 mutex_exit(&ill->ill_lock); 9009 mibptr = ill->ill_ip6_mib; 9010 /* 9011 * ill_ip6_mib is allocated by ipif_set_values() when 9012 * ill_isv6 is set. Thus if ill_isv6 is true, 9013 * ill_ip6_mib had better not be NULL. 9014 */ 9015 ASSERT(mibptr != NULL); 9016 unspec_src = 0; 9017 BUMP_MIB(mibptr, ipv6OutRequests); 9018 do_outrequests = B_FALSE; 9019 } else { 9020 connp = (conn_t *)arg; 9021 ASSERT(connp != NULL); 9022 9023 /* is queue flow controlled? */ 9024 if ((q->q_first || connp->conn_draining) && 9025 (caller == IP_WPUT)) { 9026 /* 9027 * 1) TCP sends down M_CTL for detached connections. 9028 * 2) AH/ESP sends down M_CTL. 9029 * 9030 * We don't flow control either of the above. Only 9031 * UDP and others are flow controlled for which we 9032 * can't have a M_CTL. 9033 */ 9034 ASSERT(first_mp == mp); 9035 (void) putq(q, mp); 9036 return; 9037 } 9038 mibptr = &ip6_mib; 9039 unspec_src = connp->conn_unspec_src; 9040 do_outrequests = B_TRUE; 9041 if (mp->b_flag & MSGHASREF) { 9042 mp->b_flag &= ~MSGHASREF; 9043 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9044 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9045 need_decref = B_TRUE; 9046 } 9047 9048 /* 9049 * If there is a policy, try to attach an ipsec_out in 9050 * the front. At the end, first_mp either points to a 9051 * M_DATA message or IPSEC_OUT message linked to a 9052 * M_DATA message. We have to do it now as we might 9053 * lose the "conn" if we go through ip_newroute. 9054 */ 9055 if (!mctl_present && 9056 (connp->conn_out_enforce_policy || 9057 connp->conn_latch != NULL)) { 9058 ASSERT(first_mp == mp); 9059 /* XXX Any better way to get the protocol fast ? */ 9060 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 9061 connp->conn_ulp)) == NULL)) { 9062 if (need_decref) 9063 CONN_DEC_REF(connp); 9064 return; 9065 } else { 9066 ASSERT(mp->b_datap->db_type == M_CTL); 9067 first_mp = mp; 9068 mp = mp->b_cont; 9069 mctl_present = B_TRUE; 9070 io = (ipsec_out_t *)first_mp->b_rptr; 9071 } 9072 } 9073 } 9074 9075 /* check for alignment and full IPv6 header */ 9076 if (!OK_32PTR((uchar_t *)ip6h) || 9077 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9078 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9079 if (do_outrequests) 9080 BUMP_MIB(mibptr, ipv6OutRequests); 9081 BUMP_MIB(mibptr, ipv6OutDiscards); 9082 freemsg(first_mp); 9083 if (ill != NULL) 9084 ill_refrele(ill); 9085 if (need_decref) 9086 CONN_DEC_REF(connp); 9087 return; 9088 } 9089 v6dstp = &ip6h->ip6_dst; 9090 cksum_request = -1; 9091 ip6i = NULL; 9092 9093 /* 9094 * Once neighbor discovery has completed, ndp_process() will provide 9095 * locally generated packets for which processing can be reattempted. 9096 * In these cases, connp is NULL and the original zone is part of a 9097 * prepended ipsec_out_t. 9098 */ 9099 if (io != NULL) { 9100 zoneid = io->ipsec_out_zoneid; 9101 ASSERT(zoneid != ALL_ZONES); 9102 } else { 9103 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 9104 } 9105 9106 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9107 /* 9108 * This is an ip6i_t header followed by an ip6_hdr. 9109 * Check which fields are set. 9110 * 9111 * When the packet comes from a transport we should have 9112 * all needed headers in the first mblk. However, when 9113 * going through ip_newroute*_v6 the ip6i might be in 9114 * a separate mblk when we return here. In that case 9115 * we pullup everything to ensure that extension and transport 9116 * headers "stay" in the first mblk. 9117 */ 9118 ip6i = (ip6i_t *)ip6h; 9119 ip6i_flags = ip6i->ip6i_flags; 9120 9121 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9122 ((mp->b_wptr - (uchar_t *)ip6i) >= 9123 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9124 9125 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9126 if (!pullupmsg(mp, -1)) { 9127 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9128 if (do_outrequests) 9129 BUMP_MIB(mibptr, ipv6OutRequests); 9130 BUMP_MIB(mibptr, ipv6OutDiscards); 9131 freemsg(first_mp); 9132 if (ill != NULL) 9133 ill_refrele(ill); 9134 if (need_decref) 9135 CONN_DEC_REF(connp); 9136 return; 9137 } 9138 ip6h = (ip6_t *)mp->b_rptr; 9139 v6dstp = &ip6h->ip6_dst; 9140 ip6i = (ip6i_t *)ip6h; 9141 } 9142 ip6h = (ip6_t *)&ip6i[1]; 9143 9144 /* 9145 * Advance rptr past the ip6i_t to get ready for 9146 * transmitting the packet. However, if the packet gets 9147 * passed to ip_newroute*_v6 then rptr is moved back so 9148 * that the ip6i_t header can be inspected when the 9149 * packet comes back here after passing through 9150 * ire_add_then_send. 9151 */ 9152 mp->b_rptr = (uchar_t *)ip6h; 9153 9154 /* 9155 * IP6I_ATTACH_IF is set in this function when we had a 9156 * conn and it was either bound to the IPFF_NOFAILOVER address 9157 * or IPV6_BOUND_PIF was set. These options override other 9158 * options that set the ifindex. We come here with 9159 * IP6I_ATTACH_IF set when we can't find the ire and 9160 * ip_newroute_v6 is feeding the packet for second time. 9161 */ 9162 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9163 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9164 ASSERT(ip6i->ip6i_ifindex != 0); 9165 if (ill != NULL) 9166 ill_refrele(ill); 9167 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9168 NULL, NULL, NULL, NULL); 9169 if (ill == NULL) { 9170 if (do_outrequests) 9171 BUMP_MIB(mibptr, ipv6OutRequests); 9172 BUMP_MIB(mibptr, ipv6OutDiscards); 9173 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9174 ip6i->ip6i_ifindex)); 9175 if (need_decref) 9176 CONN_DEC_REF(connp); 9177 freemsg(first_mp); 9178 return; 9179 } 9180 mibptr = ill->ill_ip6_mib; 9181 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9182 /* 9183 * Preserve the index so that when we return 9184 * from IPSEC processing, we know where to 9185 * send the packet. 9186 */ 9187 if (mctl_present) { 9188 ASSERT(io != NULL); 9189 io->ipsec_out_ill_index = 9190 ip6i->ip6i_ifindex; 9191 } 9192 } 9193 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9194 /* 9195 * This is a multipathing probe packet that has 9196 * been delayed in ND resolution. Drop the 9197 * packet for the reasons mentioned in 9198 * nce_queue_mp() 9199 */ 9200 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9201 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9202 freemsg(first_mp); 9203 ill_refrele(ill); 9204 if (need_decref) 9205 CONN_DEC_REF(connp); 9206 return; 9207 } 9208 } 9209 } 9210 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9211 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9212 9213 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9214 if (secpolicy_net_rawaccess(cr) != 0) { 9215 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9216 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9217 NULL, zoneid, 9218 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9219 if (ire == NULL) { 9220 if (do_outrequests) 9221 BUMP_MIB(mibptr, 9222 ipv6OutRequests); 9223 BUMP_MIB(mibptr, ipv6OutDiscards); 9224 ip1dbg(("ip_wput_v6: bad source " 9225 "addr\n")); 9226 freemsg(first_mp); 9227 if (ill != NULL) 9228 ill_refrele(ill); 9229 if (need_decref) 9230 CONN_DEC_REF(connp); 9231 return; 9232 } 9233 ire_refrele(ire); 9234 } 9235 /* No need to verify again when using ip_newroute */ 9236 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9237 } 9238 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9239 /* 9240 * Make sure they match since ip_newroute*_v6 etc might 9241 * (unknown to them) inspect ip6i_nexthop when 9242 * they think they access ip6_dst. 9243 */ 9244 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9245 } 9246 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9247 cksum_request = 1; 9248 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9249 cksum_request = ip6i->ip6i_checksum_off; 9250 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9251 unspec_src = 1; 9252 9253 if (do_outrequests && ill != NULL) { 9254 BUMP_MIB(mibptr, ipv6OutRequests); 9255 do_outrequests = B_FALSE; 9256 } 9257 /* 9258 * Store ip6i_t info that we need after we come back 9259 * from IPSEC processing. 9260 */ 9261 if (mctl_present) { 9262 ASSERT(io != NULL); 9263 io->ipsec_out_unspec_src = unspec_src; 9264 } 9265 } 9266 if (connp != NULL && connp->conn_dontroute) 9267 ip6h->ip6_hops = 1; 9268 9269 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9270 goto ipv6multicast; 9271 9272 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9273 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9274 ill_t *conn_outgoing_pill; 9275 9276 conn_outgoing_pill = conn_get_held_ill(connp, 9277 &connp->conn_outgoing_pill, &err); 9278 if (err == ILL_LOOKUP_FAILED) { 9279 if (ill != NULL) 9280 ill_refrele(ill); 9281 if (need_decref) 9282 CONN_DEC_REF(connp); 9283 freemsg(first_mp); 9284 return; 9285 } 9286 if (conn_outgoing_pill != NULL) { 9287 if (ill != NULL) 9288 ill_refrele(ill); 9289 ill = conn_outgoing_pill; 9290 attach_if = B_TRUE; 9291 match_flags = MATCH_IRE_ILL; 9292 mibptr = ill->ill_ip6_mib; 9293 9294 /* 9295 * Check if we need an ire that will not be 9296 * looked up by anybody else i.e. HIDDEN. 9297 */ 9298 if (ill_is_probeonly(ill)) 9299 match_flags |= MATCH_IRE_MARK_HIDDEN; 9300 goto send_from_ill; 9301 } 9302 } 9303 9304 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9305 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9306 ill_t *conn_nofailover_ill; 9307 9308 conn_nofailover_ill = conn_get_held_ill(connp, 9309 &connp->conn_nofailover_ill, &err); 9310 if (err == ILL_LOOKUP_FAILED) { 9311 if (ill != NULL) 9312 ill_refrele(ill); 9313 if (need_decref) 9314 CONN_DEC_REF(connp); 9315 freemsg(first_mp); 9316 return; 9317 } 9318 if (conn_nofailover_ill != NULL) { 9319 if (ill != NULL) 9320 ill_refrele(ill); 9321 ill = conn_nofailover_ill; 9322 attach_if = B_TRUE; 9323 /* 9324 * Assumes that ipc_nofailover_ill is used only for 9325 * multipathing probe packets. These packets are better 9326 * dropped, if they are delayed in ND resolution, for 9327 * the reasons described in nce_queue_mp(). 9328 * IP6I_DROP_IFDELAYED will be set later on in this 9329 * function for this packet. 9330 */ 9331 drop_if_delayed = B_TRUE; 9332 match_flags = MATCH_IRE_ILL; 9333 mibptr = ill->ill_ip6_mib; 9334 9335 /* 9336 * Check if we need an ire that will not be 9337 * looked up by anybody else i.e. HIDDEN. 9338 */ 9339 if (ill_is_probeonly(ill)) 9340 match_flags |= MATCH_IRE_MARK_HIDDEN; 9341 goto send_from_ill; 9342 } 9343 } 9344 9345 /* 9346 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9347 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9348 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9349 */ 9350 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9351 ASSERT(ip6i->ip6i_ifindex != 0); 9352 attach_if = B_TRUE; 9353 ASSERT(ill != NULL); 9354 match_flags = MATCH_IRE_ILL; 9355 9356 /* 9357 * Check if we need an ire that will not be 9358 * looked up by anybody else i.e. HIDDEN. 9359 */ 9360 if (ill_is_probeonly(ill)) 9361 match_flags |= MATCH_IRE_MARK_HIDDEN; 9362 goto send_from_ill; 9363 } 9364 9365 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9366 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9367 ASSERT(ill != NULL); 9368 goto send_from_ill; 9369 } 9370 9371 /* 9372 * 4. If q is an ill queue and (link local or multicast destination) 9373 * then use that ill. 9374 */ 9375 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9376 goto send_from_ill; 9377 } 9378 9379 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9380 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9381 ill_t *conn_outgoing_ill; 9382 9383 conn_outgoing_ill = conn_get_held_ill(connp, 9384 &connp->conn_outgoing_ill, &err); 9385 if (err == ILL_LOOKUP_FAILED) { 9386 if (ill != NULL) 9387 ill_refrele(ill); 9388 if (need_decref) 9389 CONN_DEC_REF(connp); 9390 freemsg(first_mp); 9391 return; 9392 } 9393 if (ill != NULL) 9394 ill_refrele(ill); 9395 ill = conn_outgoing_ill; 9396 mibptr = ill->ill_ip6_mib; 9397 goto send_from_ill; 9398 } 9399 9400 /* 9401 * 6. For unicast: Just do an IRE lookup for the best match. 9402 * If we get here for a link-local address it is rather random 9403 * what interface we pick on a multihomed host. 9404 * *If* there is an IRE_CACHE (and the link-local address 9405 * isn't duplicated on multi links) this will find the IRE_CACHE. 9406 * Otherwise it will use one of the matching IRE_INTERFACE routes 9407 * for the link-local prefix. Hence, applications 9408 * *should* be encouraged to specify an outgoing interface when sending 9409 * to a link local address. 9410 */ 9411 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9412 !connp->conn_fully_bound)) { 9413 /* 9414 * We cache IRE_CACHEs to avoid lookups. We don't do 9415 * this for the tcp global queue and listen end point 9416 * as it does not really have a real destination to 9417 * talk to. 9418 */ 9419 ire = ire_cache_lookup_v6(v6dstp, zoneid); 9420 } else { 9421 /* 9422 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9423 * grab a lock here to check for CONDEMNED as it is okay 9424 * to send a packet or two with the IRE_CACHE that is going 9425 * away. 9426 */ 9427 mutex_enter(&connp->conn_lock); 9428 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9429 if (ire != NULL && 9430 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9431 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9432 9433 IRE_REFHOLD(ire); 9434 mutex_exit(&connp->conn_lock); 9435 9436 } else { 9437 boolean_t cached = B_FALSE; 9438 9439 connp->conn_ire_cache = NULL; 9440 mutex_exit(&connp->conn_lock); 9441 /* Release the old ire */ 9442 if (ire != NULL && sctp_ire == NULL) 9443 IRE_REFRELE_NOTR(ire); 9444 9445 ire = (ire_t *)ire_cache_lookup_v6(v6dstp, zoneid); 9446 if (ire != NULL) { 9447 IRE_REFHOLD_NOTR(ire); 9448 9449 mutex_enter(&connp->conn_lock); 9450 if (!(connp->conn_state_flags & CONN_CLOSING) && 9451 (connp->conn_ire_cache == NULL)) { 9452 rw_enter(&ire->ire_bucket->irb_lock, 9453 RW_READER); 9454 if (!(ire->ire_marks & 9455 IRE_MARK_CONDEMNED)) { 9456 connp->conn_ire_cache = ire; 9457 cached = B_TRUE; 9458 } 9459 rw_exit(&ire->ire_bucket->irb_lock); 9460 } 9461 mutex_exit(&connp->conn_lock); 9462 9463 /* 9464 * We can continue to use the ire but since it 9465 * was not cached, we should drop the extra 9466 * reference. 9467 */ 9468 if (!cached) 9469 IRE_REFRELE_NOTR(ire); 9470 } 9471 } 9472 } 9473 9474 if (ire != NULL) { 9475 if (do_outrequests) { 9476 /* Handle IRE_LOCAL's that might appear here */ 9477 if (ire->ire_type == IRE_CACHE) { 9478 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9479 ill_ip6_mib; 9480 } else { 9481 mibptr = ire->ire_ipif->ipif_ill->ill_ip6_mib; 9482 } 9483 BUMP_MIB(mibptr, ipv6OutRequests); 9484 } 9485 ASSERT(!attach_if); 9486 9487 /* 9488 * Check if the ire has the RTF_MULTIRT flag, inherited 9489 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9490 */ 9491 if (ire->ire_flags & RTF_MULTIRT) { 9492 /* 9493 * Force hop limit of multirouted packets if required. 9494 * The hop limit of such packets is bounded by the 9495 * ip_multirt_ttl ndd variable. 9496 * NDP packets must have a hop limit of 255; don't 9497 * change the hop limit in that case. 9498 */ 9499 if ((ip_multirt_ttl > 0) && 9500 (ip6h->ip6_hops > ip_multirt_ttl) && 9501 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9502 if (ip_debug > 3) { 9503 ip2dbg(("ip_wput_v6: forcing multirt " 9504 "hop limit to %d (was %d) ", 9505 ip_multirt_ttl, ip6h->ip6_hops)); 9506 pr_addr_dbg("v6dst %s\n", AF_INET6, 9507 &ire->ire_addr_v6); 9508 } 9509 ip6h->ip6_hops = ip_multirt_ttl; 9510 } 9511 9512 /* 9513 * We look at this point if there are pending 9514 * unresolved routes. ire_multirt_need_resolve_v6() 9515 * checks in O(n) that all IRE_OFFSUBNET ire 9516 * entries for the packet's destination and 9517 * flagged RTF_MULTIRT are currently resolved. 9518 * If some remain unresolved, we do a copy 9519 * of the current message. It will be used 9520 * to initiate additional route resolutions. 9521 */ 9522 multirt_need_resolve = 9523 ire_multirt_need_resolve_v6(&ire->ire_addr_v6); 9524 ip2dbg(("ip_wput_v6: ire %p, " 9525 "multirt_need_resolve %d, first_mp %p\n", 9526 (void *)ire, multirt_need_resolve, 9527 (void *)first_mp)); 9528 if (multirt_need_resolve) { 9529 copy_mp = copymsg(first_mp); 9530 if (copy_mp != NULL) { 9531 MULTIRT_DEBUG_TAG(copy_mp); 9532 } 9533 } 9534 } 9535 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9536 connp, caller, 0, ip6i_flags); 9537 if (need_decref) { 9538 CONN_DEC_REF(connp); 9539 connp = NULL; 9540 } 9541 IRE_REFRELE(ire); 9542 9543 /* 9544 * Try to resolve another multiroute if 9545 * ire_multirt_need_resolve_v6() deemed it necessary. 9546 * copy_mp will be consumed (sent or freed) by 9547 * ip_newroute_v6(). 9548 */ 9549 if (copy_mp != NULL) { 9550 if (mctl_present) { 9551 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9552 } else { 9553 ip6h = (ip6_t *)copy_mp->b_rptr; 9554 } 9555 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9556 &ip6h->ip6_src, NULL, zoneid); 9557 } 9558 if (ill != NULL) 9559 ill_refrele(ill); 9560 return; 9561 } 9562 9563 /* 9564 * No full IRE for this destination. Send it to 9565 * ip_newroute_v6 to see if anything else matches. 9566 * Mark this packet as having originated on this 9567 * machine. 9568 * Update rptr if there was an ip6i_t header. 9569 */ 9570 mp->b_prev = NULL; 9571 mp->b_next = NULL; 9572 if (ip6i != NULL) 9573 mp->b_rptr -= sizeof (ip6i_t); 9574 9575 if (unspec_src) { 9576 if (ip6i == NULL) { 9577 /* 9578 * Add ip6i_t header to carry unspec_src 9579 * until the packet comes back in ip_wput_v6. 9580 */ 9581 mp = ip_add_info_v6(mp, NULL, v6dstp); 9582 if (mp == NULL) { 9583 if (do_outrequests) 9584 BUMP_MIB(mibptr, ipv6OutRequests); 9585 BUMP_MIB(mibptr, ipv6OutDiscards); 9586 if (mctl_present) 9587 freeb(first_mp); 9588 if (ill != NULL) 9589 ill_refrele(ill); 9590 if (need_decref) 9591 CONN_DEC_REF(connp); 9592 return; 9593 } 9594 ip6i = (ip6i_t *)mp->b_rptr; 9595 9596 if (mctl_present) { 9597 ASSERT(first_mp != mp); 9598 first_mp->b_cont = mp; 9599 } else { 9600 first_mp = mp; 9601 } 9602 9603 if ((mp->b_wptr - (uchar_t *)ip6i) == 9604 sizeof (ip6i_t)) { 9605 /* 9606 * ndp_resolver called from ip_newroute_v6 9607 * expects pulled up message. 9608 */ 9609 if (!pullupmsg(mp, -1)) { 9610 ip1dbg(("ip_wput_v6: pullupmsg" 9611 " failed\n")); 9612 if (do_outrequests) { 9613 BUMP_MIB(mibptr, 9614 ipv6OutRequests); 9615 } 9616 BUMP_MIB(mibptr, ipv6OutDiscards); 9617 freemsg(first_mp); 9618 if (ill != NULL) 9619 ill_refrele(ill); 9620 if (need_decref) 9621 CONN_DEC_REF(connp); 9622 return; 9623 } 9624 ip6i = (ip6i_t *)mp->b_rptr; 9625 } 9626 ip6h = (ip6_t *)&ip6i[1]; 9627 v6dstp = &ip6h->ip6_dst; 9628 } 9629 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9630 if (mctl_present) { 9631 ASSERT(io != NULL); 9632 io->ipsec_out_unspec_src = unspec_src; 9633 } 9634 } 9635 if (do_outrequests) 9636 BUMP_MIB(mibptr, ipv6OutRequests); 9637 if (need_decref) 9638 CONN_DEC_REF(connp); 9639 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 9640 if (ill != NULL) 9641 ill_refrele(ill); 9642 return; 9643 9644 9645 /* 9646 * Handle multicast packets with or without an conn. 9647 * Assumes that the transports set ip6_hops taking 9648 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9649 * into account. 9650 */ 9651 ipv6multicast: 9652 ip2dbg(("ip_wput_v6: multicast\n")); 9653 9654 /* 9655 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 9656 * 2. If conn_nofailover_ill is set then use that ill. 9657 * 9658 * Hold the conn_lock till we refhold the ill of interest that is 9659 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9660 * while holding any locks, postpone the refrele until after the 9661 * conn_lock is dropped. 9662 */ 9663 if (connp != NULL) { 9664 mutex_enter(&connp->conn_lock); 9665 conn_lock_held = B_TRUE; 9666 } else { 9667 conn_lock_held = B_FALSE; 9668 } 9669 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9670 err = ill_check_and_refhold(connp->conn_outgoing_pill); 9671 if (err == ILL_LOOKUP_FAILED) { 9672 ip1dbg(("ip_output_v6: multicast" 9673 " conn_outgoing_pill no ipif\n")); 9674 multicast_discard: 9675 ASSERT(saved_ill == NULL); 9676 if (conn_lock_held) 9677 mutex_exit(&connp->conn_lock); 9678 if (ill != NULL) 9679 ill_refrele(ill); 9680 freemsg(first_mp); 9681 if (do_outrequests) 9682 BUMP_MIB(mibptr, ipv6OutDiscards); 9683 if (need_decref) 9684 CONN_DEC_REF(connp); 9685 return; 9686 } 9687 saved_ill = ill; 9688 ill = connp->conn_outgoing_pill; 9689 attach_if = B_TRUE; 9690 match_flags = MATCH_IRE_ILL; 9691 mibptr = ill->ill_ip6_mib; 9692 9693 /* 9694 * Check if we need an ire that will not be 9695 * looked up by anybody else i.e. HIDDEN. 9696 */ 9697 if (ill_is_probeonly(ill)) 9698 match_flags |= MATCH_IRE_MARK_HIDDEN; 9699 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9700 err = ill_check_and_refhold(connp->conn_nofailover_ill); 9701 if (err == ILL_LOOKUP_FAILED) { 9702 ip1dbg(("ip_output_v6: multicast" 9703 " conn_nofailover_ill no ipif\n")); 9704 goto multicast_discard; 9705 } 9706 saved_ill = ill; 9707 ill = connp->conn_nofailover_ill; 9708 attach_if = B_TRUE; 9709 match_flags = MATCH_IRE_ILL; 9710 9711 /* 9712 * Check if we need an ire that will not be 9713 * looked up by anybody else i.e. HIDDEN. 9714 */ 9715 if (ill_is_probeonly(ill)) 9716 match_flags |= MATCH_IRE_MARK_HIDDEN; 9717 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9718 /* 9719 * Redo 1. If we did not find an IRE_CACHE the first time, 9720 * we should have an ip6i_t with IP6I_ATTACH_IF if 9721 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 9722 * used on this endpoint. 9723 */ 9724 ASSERT(ip6i->ip6i_ifindex != 0); 9725 attach_if = B_TRUE; 9726 ASSERT(ill != NULL); 9727 match_flags = MATCH_IRE_ILL; 9728 9729 /* 9730 * Check if we need an ire that will not be 9731 * looked up by anybody else i.e. HIDDEN. 9732 */ 9733 if (ill_is_probeonly(ill)) 9734 match_flags |= MATCH_IRE_MARK_HIDDEN; 9735 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9736 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9737 9738 ASSERT(ill != NULL); 9739 } else if (ill != NULL) { 9740 /* 9741 * 4. If q is an ill queue and (link local or multicast 9742 * destination) then use that ill. 9743 * We don't need the ipif initialization here. 9744 * This useless assert below is just to prevent lint from 9745 * reporting a null body if statement. 9746 */ 9747 ASSERT(ill != NULL); 9748 } else if (connp != NULL) { 9749 /* 9750 * 5. If IPV6_BOUND_IF has been set use that ill. 9751 * 9752 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 9753 * Otherwise look for the best IRE match for the unspecified 9754 * group to determine the ill. 9755 * 9756 * conn_multicast_ill is used for only IPv6 packets. 9757 * conn_multicast_ipif is used for only IPv4 packets. 9758 * Thus a PF_INET6 socket send both IPv4 and IPv6 9759 * multicast packets using different IP*_MULTICAST_IF 9760 * interfaces. 9761 */ 9762 if (connp->conn_outgoing_ill != NULL) { 9763 err = ill_check_and_refhold(connp->conn_outgoing_ill); 9764 if (err == ILL_LOOKUP_FAILED) { 9765 ip1dbg(("ip_output_v6: multicast" 9766 " conn_outgoing_ill no ipif\n")); 9767 goto multicast_discard; 9768 } 9769 ill = connp->conn_outgoing_ill; 9770 } else if (connp->conn_multicast_ill != NULL) { 9771 err = ill_check_and_refhold(connp->conn_multicast_ill); 9772 if (err == ILL_LOOKUP_FAILED) { 9773 ip1dbg(("ip_output_v6: multicast" 9774 " conn_multicast_ill no ipif\n")); 9775 goto multicast_discard; 9776 } 9777 ill = connp->conn_multicast_ill; 9778 } else { 9779 mutex_exit(&connp->conn_lock); 9780 conn_lock_held = B_FALSE; 9781 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 9782 if (ipif == NULL) { 9783 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9784 goto multicast_discard; 9785 } 9786 /* 9787 * We have a ref to this ipif, so we can safely 9788 * access ipif_ill. 9789 */ 9790 ill = ipif->ipif_ill; 9791 mutex_enter(&ill->ill_lock); 9792 if (!ILL_CAN_LOOKUP(ill)) { 9793 mutex_exit(&ill->ill_lock); 9794 ipif_refrele(ipif); 9795 ill = NULL; 9796 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9797 goto multicast_discard; 9798 } 9799 ill_refhold_locked(ill); 9800 mutex_exit(&ill->ill_lock); 9801 ipif_refrele(ipif); 9802 /* 9803 * Save binding until IPV6_MULTICAST_IF 9804 * changes it 9805 */ 9806 mutex_enter(&connp->conn_lock); 9807 connp->conn_multicast_ill = ill; 9808 connp->conn_orig_multicast_ifindex = 9809 ill->ill_phyint->phyint_ifindex; 9810 mutex_exit(&connp->conn_lock); 9811 } 9812 } 9813 if (conn_lock_held) 9814 mutex_exit(&connp->conn_lock); 9815 9816 if (saved_ill != NULL) 9817 ill_refrele(saved_ill); 9818 9819 ASSERT(ill != NULL); 9820 /* 9821 * For multicast loopback interfaces replace the multicast address 9822 * with a unicast address for the ire lookup. 9823 */ 9824 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 9825 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 9826 9827 mibptr = ill->ill_ip6_mib; 9828 if (do_outrequests) { 9829 BUMP_MIB(mibptr, ipv6OutRequests); 9830 do_outrequests = B_FALSE; 9831 } 9832 BUMP_MIB(mibptr, ipv6OutMcastPkts); 9833 9834 /* 9835 * As we may lose the conn by the time we reach ip_wput_ire_v6 9836 * we copy conn_multicast_loop and conn_dontroute on to an 9837 * ipsec_out. In case if this datagram goes out secure, 9838 * we need the ill_index also. Copy that also into the 9839 * ipsec_out. 9840 */ 9841 if (mctl_present) { 9842 io = (ipsec_out_t *)first_mp->b_rptr; 9843 ASSERT(first_mp->b_datap->db_type == M_CTL); 9844 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9845 } else { 9846 ASSERT(mp == first_mp); 9847 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 9848 BUMP_MIB(mibptr, ipv6OutDiscards); 9849 freemsg(mp); 9850 if (ill != NULL) 9851 ill_refrele(ill); 9852 if (need_decref) 9853 CONN_DEC_REF(connp); 9854 return; 9855 } 9856 io = (ipsec_out_t *)first_mp->b_rptr; 9857 /* This is not a secure packet */ 9858 io->ipsec_out_secure = B_FALSE; 9859 io->ipsec_out_use_global_policy = B_TRUE; 9860 io->ipsec_out_zoneid = 9861 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 9862 first_mp->b_cont = mp; 9863 mctl_present = B_TRUE; 9864 } 9865 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9866 io->ipsec_out_unspec_src = unspec_src; 9867 if (connp != NULL) 9868 io->ipsec_out_dontroute = connp->conn_dontroute; 9869 9870 send_from_ill: 9871 ASSERT(ill != NULL); 9872 ASSERT(mibptr == ill->ill_ip6_mib); 9873 if (do_outrequests) { 9874 BUMP_MIB(mibptr, ipv6OutRequests); 9875 do_outrequests = B_FALSE; 9876 } 9877 9878 if (io != NULL) 9879 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9880 9881 /* 9882 * When a specific ill is specified (using IPV6_PKTINFO, 9883 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 9884 * on routing entries (ftable and ctable) that have a matching 9885 * ire->ire_ipif->ipif_ill. Thus this can only be used 9886 * for destinations that are on-link for the specific ill 9887 * and that can appear on multiple links. Thus it is useful 9888 * for multicast destinations, link-local destinations, and 9889 * at some point perhaps for site-local destinations (if the 9890 * node sits at a site boundary). 9891 * We create the cache entries in the regular ctable since 9892 * it can not "confuse" things for other destinations. 9893 * table. 9894 * 9895 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 9896 * It is used only when ire_cache_lookup is used above. 9897 */ 9898 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 9899 zoneid, match_flags); 9900 if (ire != NULL) { 9901 /* 9902 * Check if the ire has the RTF_MULTIRT flag, inherited 9903 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9904 */ 9905 if (ire->ire_flags & RTF_MULTIRT) { 9906 /* 9907 * Force hop limit of multirouted packets if required. 9908 * The hop limit of such packets is bounded by the 9909 * ip_multirt_ttl ndd variable. 9910 * NDP packets must have a hop limit of 255; don't 9911 * change the hop limit in that case. 9912 */ 9913 if ((ip_multirt_ttl > 0) && 9914 (ip6h->ip6_hops > ip_multirt_ttl) && 9915 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9916 if (ip_debug > 3) { 9917 ip2dbg(("ip_wput_v6: forcing multirt " 9918 "hop limit to %d (was %d) ", 9919 ip_multirt_ttl, ip6h->ip6_hops)); 9920 pr_addr_dbg("v6dst %s\n", AF_INET6, 9921 &ire->ire_addr_v6); 9922 } 9923 ip6h->ip6_hops = ip_multirt_ttl; 9924 } 9925 9926 /* 9927 * We look at this point if there are pending 9928 * unresolved routes. ire_multirt_need_resolve_v6() 9929 * checks in O(n) that all IRE_OFFSUBNET ire 9930 * entries for the packet's destination and 9931 * flagged RTF_MULTIRT are currently resolved. 9932 * If some remain unresolved, we make a copy 9933 * of the current message. It will be used 9934 * to initiate additional route resolutions. 9935 */ 9936 multirt_need_resolve = 9937 ire_multirt_need_resolve_v6(&ire->ire_addr_v6); 9938 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 9939 "multirt_need_resolve %d, first_mp %p\n", 9940 (void *)ire, multirt_need_resolve, 9941 (void *)first_mp)); 9942 if (multirt_need_resolve) { 9943 copy_mp = copymsg(first_mp); 9944 if (copy_mp != NULL) { 9945 MULTIRT_DEBUG_TAG(copy_mp); 9946 } 9947 } 9948 } 9949 9950 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 9951 ill->ill_name, (void *)ire, 9952 ill->ill_phyint->phyint_ifindex)); 9953 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9954 connp, caller, 9955 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 9956 ip6i_flags); 9957 ire_refrele(ire); 9958 if (need_decref) { 9959 CONN_DEC_REF(connp); 9960 connp = NULL; 9961 } 9962 9963 /* 9964 * Try to resolve another multiroute if 9965 * ire_multirt_need_resolve_v6() deemed it necessary. 9966 * copy_mp will be consumed (sent or freed) by 9967 * ip_newroute_[ipif_]v6(). 9968 */ 9969 if (copy_mp != NULL) { 9970 if (mctl_present) { 9971 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9972 } else { 9973 ip6h = (ip6_t *)copy_mp->b_rptr; 9974 } 9975 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 9976 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 9977 zoneid); 9978 if (ipif == NULL) { 9979 ip1dbg(("ip_wput_v6: No ipif for " 9980 "multicast\n")); 9981 MULTIRT_DEBUG_UNTAG(copy_mp); 9982 freemsg(copy_mp); 9983 return; 9984 } 9985 ip_newroute_ipif_v6(q, copy_mp, ipif, 9986 ip6h->ip6_dst, unspec_src, zoneid); 9987 ipif_refrele(ipif); 9988 } else { 9989 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9990 &ip6h->ip6_src, ill, zoneid); 9991 } 9992 } 9993 if (ill != NULL) 9994 ill_refrele(ill); 9995 return; 9996 } 9997 if (need_decref) { 9998 CONN_DEC_REF(connp); 9999 connp = NULL; 10000 } 10001 10002 /* Update rptr if there was an ip6i_t header. */ 10003 if (ip6i != NULL) 10004 mp->b_rptr -= sizeof (ip6i_t); 10005 if (unspec_src || attach_if) { 10006 if (ip6i == NULL) { 10007 /* 10008 * Add ip6i_t header to carry unspec_src 10009 * or attach_if until the packet comes back in 10010 * ip_wput_v6. 10011 */ 10012 if (mctl_present) { 10013 first_mp->b_cont = 10014 ip_add_info_v6(mp, NULL, v6dstp); 10015 mp = first_mp->b_cont; 10016 if (mp == NULL) 10017 freeb(first_mp); 10018 } else { 10019 first_mp = mp = ip_add_info_v6(mp, NULL, 10020 v6dstp); 10021 } 10022 if (mp == NULL) { 10023 BUMP_MIB(mibptr, ipv6OutDiscards); 10024 if (ill != NULL) 10025 ill_refrele(ill); 10026 return; 10027 } 10028 ip6i = (ip6i_t *)mp->b_rptr; 10029 if ((mp->b_wptr - (uchar_t *)ip6i) == 10030 sizeof (ip6i_t)) { 10031 /* 10032 * ndp_resolver called from ip_newroute_v6 10033 * expects a pulled up message. 10034 */ 10035 if (!pullupmsg(mp, -1)) { 10036 ip1dbg(("ip_wput_v6: pullupmsg" 10037 " failed\n")); 10038 BUMP_MIB(mibptr, ipv6OutDiscards); 10039 freemsg(first_mp); 10040 return; 10041 } 10042 ip6i = (ip6i_t *)mp->b_rptr; 10043 } 10044 ip6h = (ip6_t *)&ip6i[1]; 10045 v6dstp = &ip6h->ip6_dst; 10046 } 10047 if (unspec_src) 10048 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10049 if (attach_if) { 10050 /* 10051 * Bind to nofailover/BOUND_PIF overrides ifindex. 10052 */ 10053 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10054 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10055 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10056 if (drop_if_delayed) { 10057 /* This is a multipathing probe packet */ 10058 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10059 } 10060 } 10061 if (mctl_present) { 10062 ASSERT(io != NULL); 10063 io->ipsec_out_unspec_src = unspec_src; 10064 } 10065 } 10066 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10067 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10068 unspec_src, zoneid); 10069 } else { 10070 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10071 zoneid); 10072 } 10073 if (ill != NULL) 10074 ill_refrele(ill); 10075 return; 10076 10077 notv6: 10078 /* 10079 * XXX implement a IPv4 and IPv6 packet counter per conn and 10080 * switch when ratio exceeds e.g. 10:1 10081 */ 10082 if (q->q_next == NULL) { 10083 connp = Q_TO_CONN(q); 10084 10085 if (IPCL_IS_TCP(connp)) { 10086 /* change conn_send for the tcp_v4_connections */ 10087 connp->conn_send = ip_output; 10088 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10089 /* The 'q' is the default SCTP queue */ 10090 connp = (conn_t *)arg; 10091 } else { 10092 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 10093 } 10094 } 10095 BUMP_MIB(mibptr, ipv6OutIPv4); 10096 (void) ip_output(connp, first_mp, q, caller); 10097 if (ill != NULL) 10098 ill_refrele(ill); 10099 } 10100 10101 static void 10102 ip_wput_v6(queue_t *q, mblk_t *mp) 10103 { 10104 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10105 } 10106 10107 static void 10108 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10109 { 10110 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10111 io->ipsec_out_attach_if = B_TRUE; 10112 io->ipsec_out_ill_index = attach_index; 10113 } 10114 10115 /* 10116 * NULL send-to queue - packet is to be delivered locally. 10117 */ 10118 void 10119 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10120 ire_t *ire, int fanout_flags) 10121 { 10122 uint32_t ports; 10123 mblk_t *mp = first_mp, *first_mp1; 10124 boolean_t mctl_present; 10125 uint8_t nexthdr; 10126 uint16_t hdr_length; 10127 ipsec_out_t *io; 10128 mib2_ipv6IfStatsEntry_t *mibptr; 10129 ilm_t *ilm; 10130 uint_t nexthdr_offset; 10131 10132 nexthdr = ip6h->ip6_nxt; 10133 mibptr = ill->ill_ip6_mib; 10134 10135 /* Fastpath */ 10136 switch (nexthdr) { 10137 case IPPROTO_TCP: 10138 case IPPROTO_UDP: 10139 case IPPROTO_ICMPV6: 10140 case IPPROTO_SCTP: 10141 hdr_length = IPV6_HDR_LEN; 10142 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10143 (uchar_t *)ip6h); 10144 break; 10145 default: { 10146 uint8_t *nexthdrp; 10147 10148 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10149 &hdr_length, &nexthdrp)) { 10150 /* Malformed packet */ 10151 BUMP_MIB(mibptr, ipv6OutDiscards); 10152 freemsg(first_mp); 10153 return; 10154 } 10155 nexthdr = *nexthdrp; 10156 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10157 break; 10158 } 10159 } 10160 10161 if (DB_TYPE(mp) == M_CTL) { 10162 io = (ipsec_out_t *)mp->b_rptr; 10163 if (!io->ipsec_out_secure) { 10164 mp = mp->b_cont; 10165 freeb(first_mp); 10166 first_mp = mp; 10167 mctl_present = B_FALSE; 10168 } else { 10169 mctl_present = B_TRUE; 10170 mp = first_mp->b_cont; 10171 ipsec_out_to_in(first_mp); 10172 } 10173 } else { 10174 mctl_present = B_FALSE; 10175 } 10176 10177 UPDATE_OB_PKT_COUNT(ire); 10178 ire->ire_last_used_time = lbolt; 10179 10180 /* 10181 * Remove reacability confirmation bit from version field 10182 * before looping back the packet. 10183 */ 10184 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10185 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10186 } 10187 10188 switch (nexthdr) { 10189 case IPPROTO_TCP: 10190 if (DB_TYPE(mp) == M_DATA) { 10191 /* 10192 * M_DATA mblk, so init mblk (chain) for 10193 * no struio(). 10194 */ 10195 mblk_t *mp1 = mp; 10196 10197 do { 10198 mp1->b_datap->db_struioflag = 0; 10199 } while ((mp1 = mp1->b_cont) != NULL); 10200 } 10201 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10202 TCP_PORTS_OFFSET); 10203 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10204 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10205 IP_FF_IP6INFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10206 hdr_length, mctl_present, ire->ire_zoneid); 10207 return; 10208 10209 case IPPROTO_UDP: 10210 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10211 UDP_PORTS_OFFSET); 10212 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10213 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO| 10214 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10215 return; 10216 10217 case IPPROTO_SCTP: 10218 { 10219 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10220 10221 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10222 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10223 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO, 10224 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10225 ire->ire_zoneid); 10226 return; 10227 } 10228 case IPPROTO_ICMPV6: { 10229 icmp6_t *icmp6; 10230 10231 /* check for full IPv6+ICMPv6 header */ 10232 if ((mp->b_wptr - mp->b_rptr) < 10233 (hdr_length + ICMP6_MINLEN)) { 10234 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10235 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10236 " failed\n")); 10237 BUMP_MIB(mibptr, ipv6OutDiscards); 10238 freemsg(first_mp); 10239 return; 10240 } 10241 ip6h = (ip6_t *)mp->b_rptr; 10242 } 10243 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10244 10245 /* Update output mib stats */ 10246 icmp_update_out_mib_v6(ill, icmp6); 10247 10248 /* Check variable for testing applications */ 10249 if (ipv6_drop_inbound_icmpv6) { 10250 freemsg(first_mp); 10251 return; 10252 } 10253 /* 10254 * Assume that there is always at least one conn for 10255 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10256 * where there is no conn. 10257 */ 10258 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10259 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10260 /* 10261 * In the multicast case, applications may have 10262 * joined the group from different zones, so we 10263 * need to deliver the packet to each of them. 10264 * Loop through the multicast memberships 10265 * structures (ilm) on the receive ill and send 10266 * a copy of the packet up each matching one. 10267 * However, we don't do this for multicasts sent 10268 * on the loopback interface (PHYI_LOOPBACK flag 10269 * set) as they must stay in the sender's zone. 10270 */ 10271 ILM_WALKER_HOLD(ill); 10272 for (ilm = ill->ill_ilm; ilm != NULL; 10273 ilm = ilm->ilm_next) { 10274 if (ilm->ilm_flags & ILM_DELETED) 10275 continue; 10276 if (!IN6_ARE_ADDR_EQUAL( 10277 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10278 continue; 10279 if ((fanout_flags & 10280 IP_FF_NO_MCAST_LOOP) && 10281 ilm->ilm_zoneid == ire->ire_zoneid) 10282 continue; 10283 if (!ipif_lookup_zoneid(ill, 10284 ilm->ilm_zoneid, IPIF_UP, NULL)) 10285 continue; 10286 10287 first_mp1 = ip_copymsg(first_mp); 10288 if (first_mp1 == NULL) 10289 continue; 10290 icmp_inbound_v6(q, first_mp1, ill, 10291 hdr_length, mctl_present, 10292 IP6_NO_IPPOLICY, ilm->ilm_zoneid); 10293 } 10294 ILM_WALKER_RELE(ill); 10295 } else { 10296 first_mp1 = ip_copymsg(first_mp); 10297 if (first_mp1 != NULL) 10298 icmp_inbound_v6(q, first_mp1, ill, 10299 hdr_length, mctl_present, 10300 IP6_NO_IPPOLICY, ire->ire_zoneid); 10301 } 10302 } 10303 /* FALLTHRU */ 10304 default: { 10305 /* 10306 * Handle protocols with which IPv6 is less intimate. 10307 */ 10308 fanout_flags |= IP_FF_RAWIP|IP_FF_IP6INFO; 10309 10310 /* 10311 * Enable sending ICMP for "Unknown" nexthdr 10312 * case. i.e. where we did not FALLTHRU from 10313 * IPPROTO_ICMPV6 processing case above. 10314 */ 10315 if (nexthdr != IPPROTO_ICMPV6) 10316 fanout_flags |= IP_FF_SEND_ICMP; 10317 /* 10318 * Note: There can be more than one stream bound 10319 * to a particular protocol. When this is the case, 10320 * each one gets a copy of any incoming packets. 10321 */ 10322 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10323 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10324 mctl_present, ire->ire_zoneid); 10325 return; 10326 } 10327 } 10328 } 10329 10330 /* 10331 * Send packet using IRE. 10332 * Checksumming is controlled by cksum_request: 10333 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10334 * 1 => Skip TCP/UDP/SCTP checksum 10335 * Otherwise => checksum_request contains insert offset for checksum 10336 * 10337 * Assumes that the following set of headers appear in the first 10338 * mblk: 10339 * ip6_t 10340 * Any extension headers 10341 * TCP/UDP/SCTP header (if present) 10342 * The routine can handle an ICMPv6 header that is not in the first mblk. 10343 * 10344 * NOTE : This function does not ire_refrele the ire passed in as the 10345 * argument unlike ip_wput_ire where the REFRELE is done. 10346 * Refer to ip_wput_ire for more on this. 10347 */ 10348 static void 10349 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10350 int cksum_request, conn_t *connp, int caller, int attach_index, int flags) 10351 { 10352 ip6_t *ip6h; 10353 uint8_t nexthdr; 10354 uint16_t hdr_length; 10355 uint_t reachable = 0x0; 10356 ill_t *ill; 10357 mib2_ipv6IfStatsEntry_t *mibptr; 10358 mblk_t *first_mp; 10359 boolean_t mctl_present; 10360 ipsec_out_t *io; 10361 boolean_t conn_dontroute; /* conn value for multicast */ 10362 boolean_t conn_multicast_loop; /* conn value for multicast */ 10363 boolean_t multicast_forward; /* Should we forward ? */ 10364 int max_frag; 10365 zoneid_t zoneid; 10366 10367 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 10368 ill = ire_to_ill(ire); 10369 first_mp = mp; 10370 multicast_forward = B_FALSE; 10371 10372 if (mp->b_datap->db_type != M_CTL) { 10373 ip6h = (ip6_t *)first_mp->b_rptr; 10374 } else { 10375 io = (ipsec_out_t *)first_mp->b_rptr; 10376 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10377 /* 10378 * Grab the zone id now because the M_CTL can be discarded by 10379 * ip_wput_ire_parse_ipsec_out() below. 10380 */ 10381 zoneid = io->ipsec_out_zoneid; 10382 ASSERT(zoneid != ALL_ZONES); 10383 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10384 /* 10385 * For the multicast case, ipsec_out carries conn_dontroute and 10386 * conn_multicast_loop as conn may not be available here. We 10387 * need this for multicast loopback and forwarding which is done 10388 * later in the code. 10389 */ 10390 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10391 conn_dontroute = io->ipsec_out_dontroute; 10392 conn_multicast_loop = io->ipsec_out_multicast_loop; 10393 /* 10394 * If conn_dontroute is not set or conn_multicast_loop 10395 * is set, we need to do forwarding/loopback. For 10396 * datagrams from ip_wput_multicast, conn_dontroute is 10397 * set to B_TRUE and conn_multicast_loop is set to 10398 * B_FALSE so that we neither do forwarding nor 10399 * loopback. 10400 */ 10401 if (!conn_dontroute || conn_multicast_loop) 10402 multicast_forward = B_TRUE; 10403 } 10404 } 10405 10406 /* 10407 * If the sender didn't supply the hop limit and there is a default 10408 * unicast hop limit associated with the output interface, we use 10409 * that if the packet is unicast. Interface specific unicast hop 10410 * limits as set via the SIOCSLIFLNKINFO ioctl. 10411 */ 10412 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10413 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10414 ip6h->ip6_hops = ill->ill_max_hops; 10415 } 10416 10417 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid) { 10418 /* 10419 * When a zone sends a packet to another zone, we try to deliver 10420 * the packet under the same conditions as if the destination 10421 * was a real node on the network. To do so, we look for a 10422 * matching route in the forwarding table. 10423 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10424 * ip_newroute_v6() does. 10425 */ 10426 ire_t *src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10427 NULL, NULL, zoneid, 0, (MATCH_IRE_RECURSIVE | 10428 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10429 if (src_ire != NULL && 10430 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 10431 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10432 !unspec_src) { 10433 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10434 } 10435 ire_refrele(src_ire); 10436 } else { 10437 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 10438 if (src_ire != NULL) { 10439 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10440 ire_refrele(src_ire); 10441 freemsg(first_mp); 10442 return; 10443 } 10444 ire_refrele(src_ire); 10445 } 10446 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10447 /* Failed */ 10448 freemsg(first_mp); 10449 return; 10450 } 10451 icmp_unreachable_v6(q, first_mp, 10452 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE); 10453 return; 10454 } 10455 } 10456 10457 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10458 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10459 connp, unspec_src); 10460 if (mp == NULL) { 10461 return; 10462 } 10463 } 10464 10465 first_mp = mp; 10466 if (mp->b_datap->db_type == M_CTL) { 10467 io = (ipsec_out_t *)mp->b_rptr; 10468 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10469 mp = mp->b_cont; 10470 mctl_present = B_TRUE; 10471 } else { 10472 mctl_present = B_FALSE; 10473 } 10474 10475 ip6h = (ip6_t *)mp->b_rptr; 10476 nexthdr = ip6h->ip6_nxt; 10477 mibptr = ill->ill_ip6_mib; 10478 10479 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10480 /* 10481 * The ire_src_addr_v6 always contains a useable source address 10482 * for the destination (based on source address selection rules 10483 * with respect to address scope as well as deprecated vs. 10484 * preferred addresses). 10485 */ 10486 ip6h->ip6_src = ire->ire_src_addr_v6; 10487 } 10488 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10489 if ((connp != NULL && connp->conn_multicast_loop) || 10490 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10491 ilm_t *ilm; 10492 10493 ILM_WALKER_HOLD(ill); 10494 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10495 ILM_WALKER_RELE(ill); 10496 if (ilm != NULL) { 10497 mblk_t *nmp; 10498 int fanout_flags = 0; 10499 10500 if (connp != NULL && 10501 !connp->conn_multicast_loop) { 10502 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10503 } 10504 ip1dbg(("ip_wput_ire_v6: " 10505 "Loopback multicast\n")); 10506 nmp = ip_copymsg(first_mp); 10507 if (nmp != NULL) { 10508 ip6_t *nip6h; 10509 10510 if (mctl_present) { 10511 nip6h = (ip6_t *) 10512 nmp->b_cont->b_rptr; 10513 } else { 10514 nip6h = (ip6_t *)nmp->b_rptr; 10515 } 10516 /* 10517 * Deliver locally and to every local 10518 * zone, except the sending zone when 10519 * IPV6_MULTICAST_LOOP is disabled. 10520 */ 10521 ip_wput_local_v6(RD(q), ill, nip6h, nmp, 10522 ire, fanout_flags); 10523 } else { 10524 BUMP_MIB(mibptr, ipv6OutDiscards); 10525 ip1dbg(("ip_wput_ire_v6: " 10526 "copymsg failed\n")); 10527 } 10528 } 10529 } 10530 if (ip6h->ip6_hops == 0 || 10531 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10532 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10533 /* 10534 * Local multicast or just loopback on loopback 10535 * interface. 10536 */ 10537 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10538 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10539 freemsg(first_mp); 10540 return; 10541 } 10542 } 10543 10544 if (ire->ire_stq != NULL) { 10545 uint32_t sum; 10546 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10547 ill_phyint->phyint_ifindex; 10548 queue_t *dev_q = ire->ire_stq->q_next; 10549 10550 /* 10551 * non-NULL send-to queue - packet is to be sent 10552 * out an interface. 10553 */ 10554 10555 /* Driver is flow-controlling? */ 10556 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10557 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 10558 /* 10559 * Queue packet if we have an conn to give back 10560 * pressure. We can't queue packets intended for 10561 * hardware acceleration since we've tossed that 10562 * state already. If the packet is being fed back 10563 * from ire_send_v6, we don't know the position in 10564 * the queue to enqueue the packet and we discard 10565 * the packet. 10566 */ 10567 ASSERT(mp == first_mp); 10568 if (ip_output_queue && connp != NULL && 10569 !mctl_present && caller != IRE_SEND) { 10570 if (caller == IP_WSRV) { 10571 connp->conn_did_putbq = 1; 10572 (void) putbq(connp->conn_wq, mp); 10573 conn_drain_insert(connp); 10574 /* 10575 * caller == IP_WSRV implies we are 10576 * the service thread, and the 10577 * queue is already noenabled. 10578 * The check for canput and 10579 * the putbq is not atomic. 10580 * So we need to check again. 10581 */ 10582 if (canput(dev_q)) 10583 connp->conn_did_putbq = 0; 10584 } else { 10585 (void) putq(connp->conn_wq, mp); 10586 } 10587 return; 10588 } 10589 BUMP_MIB(mibptr, ipv6OutDiscards); 10590 freemsg(mp); 10591 return; 10592 } 10593 10594 /* 10595 * Look for reachability confirmations from the transport. 10596 */ 10597 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10598 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10599 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10600 if (mctl_present) 10601 io->ipsec_out_reachable = B_TRUE; 10602 } 10603 /* Fastpath */ 10604 switch (nexthdr) { 10605 case IPPROTO_TCP: 10606 case IPPROTO_UDP: 10607 case IPPROTO_ICMPV6: 10608 case IPPROTO_SCTP: 10609 hdr_length = IPV6_HDR_LEN; 10610 break; 10611 default: { 10612 uint8_t *nexthdrp; 10613 10614 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10615 &hdr_length, &nexthdrp)) { 10616 /* Malformed packet */ 10617 BUMP_MIB(mibptr, ipv6OutDiscards); 10618 freemsg(first_mp); 10619 return; 10620 } 10621 nexthdr = *nexthdrp; 10622 break; 10623 } 10624 } 10625 10626 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10627 uint16_t *up; 10628 uint16_t *insp; 10629 10630 /* 10631 * The packet header is processed once for all, even 10632 * in the multirouting case. We disable hardware 10633 * checksum if the packet is multirouted, as it will be 10634 * replicated via several interfaces, and not all of 10635 * them may have this capability. 10636 */ 10637 if (cksum_request == 1 && 10638 !(ire->ire_flags & RTF_MULTIRT)) { 10639 /* Skip the transport checksum */ 10640 goto cksum_done; 10641 } 10642 /* 10643 * Do user-configured raw checksum. 10644 * Compute checksum and insert at offset "cksum_request" 10645 */ 10646 10647 /* check for enough headers for checksum */ 10648 cksum_request += hdr_length; /* offset from rptr */ 10649 if ((mp->b_wptr - mp->b_rptr) < 10650 (cksum_request + sizeof (int16_t))) { 10651 if (!pullupmsg(mp, 10652 cksum_request + sizeof (int16_t))) { 10653 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10654 " failed\n")); 10655 BUMP_MIB(mibptr, ipv6OutDiscards); 10656 freemsg(first_mp); 10657 return; 10658 } 10659 ip6h = (ip6_t *)mp->b_rptr; 10660 } 10661 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10662 ASSERT(((uintptr_t)insp & 0x1) == 0); 10663 up = (uint16_t *)&ip6h->ip6_src; 10664 /* 10665 * icmp has placed length and routing 10666 * header adjustment in *insp. 10667 */ 10668 sum = htons(nexthdr) + 10669 up[0] + up[1] + up[2] + up[3] + 10670 up[4] + up[5] + up[6] + up[7] + 10671 up[8] + up[9] + up[10] + up[11] + 10672 up[12] + up[13] + up[14] + up[15]; 10673 sum = (sum & 0xffff) + (sum >> 16); 10674 *insp = IP_CSUM(mp, hdr_length, sum); 10675 if (*insp == 0) 10676 *insp = 0xFFFF; 10677 } else if (nexthdr == IPPROTO_TCP) { 10678 uint16_t *up; 10679 10680 /* 10681 * Check for full IPv6 header + enough TCP header 10682 * to get at the checksum field. 10683 */ 10684 if ((mp->b_wptr - mp->b_rptr) < 10685 (hdr_length + TCP_CHECKSUM_OFFSET + 10686 TCP_CHECKSUM_SIZE)) { 10687 if (!pullupmsg(mp, hdr_length + 10688 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 10689 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10690 " failed\n")); 10691 BUMP_MIB(mibptr, ipv6OutDiscards); 10692 freemsg(first_mp); 10693 return; 10694 } 10695 ip6h = (ip6_t *)mp->b_rptr; 10696 } 10697 10698 up = (uint16_t *)&ip6h->ip6_src; 10699 /* 10700 * Note: The TCP module has stored the length value 10701 * into the tcp checksum field, so we don't 10702 * need to explicitly sum it in here. 10703 */ 10704 sum = up[0] + up[1] + up[2] + up[3] + 10705 up[4] + up[5] + up[6] + up[7] + 10706 up[8] + up[9] + up[10] + up[11] + 10707 up[12] + up[13] + up[14] + up[15]; 10708 10709 /* Fold the initial sum */ 10710 sum = (sum & 0xffff) + (sum >> 16); 10711 10712 up = (uint16_t *)(((uchar_t *)ip6h) + 10713 hdr_length + TCP_CHECKSUM_OFFSET); 10714 10715 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 10716 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10717 ire->ire_max_frag, mctl_present, sum); 10718 10719 /* Software checksum? */ 10720 if (DB_CKSUMFLAGS(mp) == 0) { 10721 IP6_STAT(ip6_out_sw_cksum); 10722 IP6_STAT_UPDATE(ip6_tcp_out_sw_cksum_bytes, 10723 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10724 hdr_length); 10725 } 10726 } else if (nexthdr == IPPROTO_UDP) { 10727 uint16_t *up; 10728 10729 /* 10730 * check for full IPv6 header + enough UDP header 10731 * to get at the UDP checksum field 10732 */ 10733 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 10734 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10735 if (!pullupmsg(mp, hdr_length + 10736 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10737 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 10738 " failed\n")); 10739 BUMP_MIB(mibptr, ipv6OutDiscards); 10740 freemsg(first_mp); 10741 return; 10742 } 10743 ip6h = (ip6_t *)mp->b_rptr; 10744 } 10745 up = (uint16_t *)&ip6h->ip6_src; 10746 /* 10747 * Note: The UDP module has stored the length value 10748 * into the udp checksum field, so we don't 10749 * need to explicitly sum it in here. 10750 */ 10751 sum = up[0] + up[1] + up[2] + up[3] + 10752 up[4] + up[5] + up[6] + up[7] + 10753 up[8] + up[9] + up[10] + up[11] + 10754 up[12] + up[13] + up[14] + up[15]; 10755 10756 /* Fold the initial sum */ 10757 sum = (sum & 0xffff) + (sum >> 16); 10758 10759 up = (uint16_t *)(((uchar_t *)ip6h) + 10760 hdr_length + UDP_CHECKSUM_OFFSET); 10761 10762 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 10763 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10764 ire->ire_max_frag, mctl_present, sum); 10765 10766 /* Software checksum? */ 10767 if (DB_CKSUMFLAGS(mp) == 0) { 10768 IP6_STAT(ip6_out_sw_cksum); 10769 IP6_STAT_UPDATE(ip6_udp_out_sw_cksum_bytes, 10770 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10771 hdr_length); 10772 } 10773 } else if (nexthdr == IPPROTO_ICMPV6) { 10774 uint16_t *up; 10775 icmp6_t *icmp6; 10776 10777 /* check for full IPv6+ICMPv6 header */ 10778 if ((mp->b_wptr - mp->b_rptr) < 10779 (hdr_length + ICMP6_MINLEN)) { 10780 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10781 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10782 " failed\n")); 10783 BUMP_MIB(mibptr, ipv6OutDiscards); 10784 freemsg(first_mp); 10785 return; 10786 } 10787 ip6h = (ip6_t *)mp->b_rptr; 10788 } 10789 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10790 up = (uint16_t *)&ip6h->ip6_src; 10791 /* 10792 * icmp has placed length and routing 10793 * header adjustment in icmp6_cksum. 10794 */ 10795 sum = htons(IPPROTO_ICMPV6) + 10796 up[0] + up[1] + up[2] + up[3] + 10797 up[4] + up[5] + up[6] + up[7] + 10798 up[8] + up[9] + up[10] + up[11] + 10799 up[12] + up[13] + up[14] + up[15]; 10800 sum = (sum & 0xffff) + (sum >> 16); 10801 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 10802 if (icmp6->icmp6_cksum == 0) 10803 icmp6->icmp6_cksum = 0xFFFF; 10804 10805 /* Update output mib stats */ 10806 icmp_update_out_mib_v6(ill, icmp6); 10807 } else if (nexthdr == IPPROTO_SCTP) { 10808 sctp_hdr_t *sctph; 10809 10810 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 10811 if (!pullupmsg(mp, hdr_length + 10812 sizeof (*sctph))) { 10813 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 10814 " failed\n")); 10815 BUMP_MIB(ill->ill_ip6_mib, 10816 ipv6OutDiscards); 10817 freemsg(mp); 10818 return; 10819 } 10820 ip6h = (ip6_t *)mp->b_rptr; 10821 } 10822 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 10823 sctph->sh_chksum = 0; 10824 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 10825 } 10826 10827 cksum_done: 10828 /* 10829 * We force the insertion of a fragment header using the 10830 * IPH_FRAG_HDR flag in two cases: 10831 * - after reception of an ICMPv6 "packet too big" message 10832 * with a MTU < 1280 (cf. RFC 2460 section 5) 10833 * - for multirouted IPv6 packets, so that the receiver can 10834 * discard duplicates according to their fragment identifier 10835 * 10836 * Two flags modifed from the API can modify this behavior. 10837 * The first is IPV6_USE_MIN_MTU. With this API the user 10838 * can specify how to manage PMTUD for unicast and multicast. 10839 * 10840 * IPV6_DONTFRAG disallows fragmentation. 10841 */ 10842 max_frag = ire->ire_max_frag; 10843 switch (IP6I_USE_MIN_MTU_API(flags)) { 10844 case IPV6_USE_MIN_MTU_DEFAULT: 10845 case IPV6_USE_MIN_MTU_UNICAST: 10846 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10847 max_frag = IPV6_MIN_MTU; 10848 } 10849 break; 10850 10851 case IPV6_USE_MIN_MTU_NEVER: 10852 max_frag = IPV6_MIN_MTU; 10853 break; 10854 } 10855 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 10856 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 10857 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 10858 icmp_pkt2big_v6(ire->ire_stq, first_mp, 10859 max_frag, B_FALSE, B_TRUE); 10860 return; 10861 } 10862 10863 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 10864 (mp->b_cont ? msgdsize(mp) : 10865 mp->b_wptr - (uchar_t *)ip6h)) { 10866 ip0dbg(("Packet length mismatch: %d, %ld\n", 10867 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10868 msgdsize(mp))); 10869 freemsg(first_mp); 10870 return; 10871 } 10872 /* Do IPSEC processing first */ 10873 if (mctl_present) { 10874 if (attach_index != 0) 10875 ipsec_out_attach_if(io, attach_index); 10876 ipsec_out_process(q, first_mp, ire, ill_index); 10877 return; 10878 } 10879 ASSERT(mp->b_prev == NULL); 10880 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 10881 ntohs(ip6h->ip6_plen) + 10882 IPV6_HDR_LEN, max_frag)); 10883 ASSERT(mp == first_mp); 10884 /* Initiate IPPF processing */ 10885 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 10886 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 10887 if (mp == NULL) { 10888 return; 10889 } 10890 } 10891 ip_wput_frag_v6(mp, ire, reachable, connp, 10892 caller, max_frag); 10893 return; 10894 } 10895 /* Do IPSEC processing first */ 10896 if (mctl_present) { 10897 int extra_len = ipsec_out_extra_length(first_mp); 10898 10899 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 10900 max_frag && ip_ulp_cando_pkt2big(nexthdr)) { 10901 /* 10902 * IPsec headers will push the packet over the 10903 * MTU limit. Issue an ICMPv6 Packet Too Big 10904 * message for this packet if the upper-layer 10905 * that issued this packet will be able to 10906 * react to the icmp_pkt2big_v6() that we'll 10907 * generate. 10908 */ 10909 icmp_pkt2big_v6(ire->ire_stq, first_mp, 10910 max_frag, B_FALSE, B_TRUE); 10911 return; 10912 } 10913 if (attach_index != 0) 10914 ipsec_out_attach_if(io, attach_index); 10915 ipsec_out_process(q, first_mp, ire, ill_index); 10916 return; 10917 } 10918 /* 10919 * XXX multicast: add ip_mforward_v6() here. 10920 * Check conn_dontroute 10921 */ 10922 #ifdef lint 10923 /* 10924 * XXX The only purpose of this statement is to avoid lint 10925 * errors. See the above "XXX multicast". When that gets 10926 * fixed, remove this whole #ifdef lint section. 10927 */ 10928 ip3dbg(("multicast forward is %s.\n", 10929 (multicast_forward ? "TRUE" : "FALSE"))); 10930 #endif 10931 10932 UPDATE_OB_PKT_COUNT(ire); 10933 ire->ire_last_used_time = lbolt; 10934 ASSERT(mp == first_mp); 10935 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 10936 } else { 10937 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 10938 } 10939 } 10940 10941 /* 10942 * Outbound IPv6 fragmentation routine using MDT. 10943 */ 10944 static void 10945 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 10946 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 10947 { 10948 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 10949 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 10950 mblk_t *hdr_mp, *md_mp = NULL; 10951 int i1; 10952 multidata_t *mmd; 10953 unsigned char *hdr_ptr, *pld_ptr; 10954 ip_pdescinfo_t pdi; 10955 uint32_t ident; 10956 size_t len; 10957 uint16_t offset; 10958 queue_t *stq = ire->ire_stq; 10959 ill_t *ill = (ill_t *)stq->q_ptr; 10960 10961 ASSERT(DB_TYPE(mp) == M_DATA); 10962 ASSERT(MBLKL(mp) > unfragmentable_len); 10963 10964 /* 10965 * Move read ptr past unfragmentable portion, we don't want this part 10966 * of the data in our fragments. 10967 */ 10968 mp->b_rptr += unfragmentable_len; 10969 10970 /* Calculate how many packets we will send out */ 10971 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 10972 pkts = (i1 + max_chunk - 1) / max_chunk; 10973 ASSERT(pkts > 1); 10974 10975 /* Allocate a message block which will hold all the IP Headers. */ 10976 wroff = ip_wroff_extra; 10977 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 10978 10979 i1 = pkts * hdr_chunk_len; 10980 /* 10981 * Create the header buffer, Multidata and destination address 10982 * and SAP attribute that should be associated with it. 10983 */ 10984 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 10985 ((hdr_mp->b_wptr += i1), 10986 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 10987 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 10988 freemsg(mp); 10989 if (md_mp == NULL) { 10990 freemsg(hdr_mp); 10991 } else { 10992 free_mmd: IP6_STAT(ip6_frag_mdt_discarded); 10993 freemsg(md_mp); 10994 } 10995 IP6_STAT(ip6_frag_mdt_allocfail); 10996 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 10997 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutDiscards, pkts); 10998 return; 10999 } 11000 IP6_STAT(ip6_frag_mdt_allocd); 11001 11002 /* 11003 * Add a payload buffer to the Multidata; this operation must not 11004 * fail, or otherwise our logic in this routine is broken. There 11005 * is no memory allocation done by the routine, so any returned 11006 * failure simply tells us that we've done something wrong. 11007 * 11008 * A failure tells us that either we're adding the same payload 11009 * buffer more than once, or we're trying to add more buffers than 11010 * allowed. None of the above cases should happen, and we panic 11011 * because either there's horrible heap corruption, and/or 11012 * programming mistake. 11013 */ 11014 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11015 goto pbuf_panic; 11016 } 11017 11018 hdr_ptr = hdr_mp->b_rptr; 11019 pld_ptr = mp->b_rptr; 11020 11021 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11022 11023 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11024 11025 /* 11026 * len is the total length of the fragmentable data in this 11027 * datagram. For each fragment sent, we will decrement len 11028 * by the amount of fragmentable data sent in that fragment 11029 * until len reaches zero. 11030 */ 11031 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11032 11033 offset = 0; 11034 prev_nexthdr_offset += wroff; 11035 11036 while (len != 0) { 11037 size_t mlen; 11038 ip6_t *fip6h; 11039 ip6_frag_t *fraghdr; 11040 int error; 11041 11042 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11043 mlen = MIN(len, max_chunk); 11044 len -= mlen; 11045 11046 fip6h = (ip6_t *)(hdr_ptr + wroff); 11047 ASSERT(OK_32PTR(fip6h)); 11048 bcopy(ip6h, fip6h, unfragmentable_len); 11049 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11050 11051 fip6h->ip6_plen = htons((uint16_t)(mlen + 11052 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11053 11054 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11055 unfragmentable_len); 11056 fraghdr->ip6f_nxt = nexthdr; 11057 fraghdr->ip6f_reserved = 0; 11058 fraghdr->ip6f_offlg = htons(offset) | 11059 ((len != 0) ? IP6F_MORE_FRAG : 0); 11060 fraghdr->ip6f_ident = ident; 11061 11062 /* 11063 * Record offset and size of header and data of the next packet 11064 * in the multidata message. 11065 */ 11066 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11067 unfragmentable_len + sizeof (ip6_frag_t), 0); 11068 PDESC_PLD_INIT(&pdi); 11069 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11070 ASSERT(i1 > 0); 11071 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11072 if (i1 == mlen) { 11073 pld_ptr += mlen; 11074 } else { 11075 i1 = mlen - i1; 11076 mp = mp->b_cont; 11077 ASSERT(mp != NULL); 11078 ASSERT(MBLKL(mp) >= i1); 11079 /* 11080 * Attach the next payload message block to the 11081 * multidata message. 11082 */ 11083 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11084 goto pbuf_panic; 11085 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11086 pld_ptr = mp->b_rptr + i1; 11087 } 11088 11089 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11090 KM_NOSLEEP)) == NULL) { 11091 /* 11092 * Any failure other than ENOMEM indicates that we 11093 * have passed in invalid pdesc info or parameters 11094 * to mmd_addpdesc, which must not happen. 11095 * 11096 * EINVAL is a result of failure on boundary checks 11097 * against the pdesc info contents. It should not 11098 * happen, and we panic because either there's 11099 * horrible heap corruption, and/or programming 11100 * mistake. 11101 */ 11102 if (error != ENOMEM) { 11103 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11104 "pdesc logic error detected for " 11105 "mmd %p pinfo %p (%d)\n", 11106 (void *)mmd, (void *)&pdi, error); 11107 /* NOTREACHED */ 11108 } 11109 IP6_STAT(ip6_frag_mdt_addpdescfail); 11110 /* Free unattached payload message blocks as well */ 11111 md_mp->b_cont = mp->b_cont; 11112 goto free_mmd; 11113 } 11114 11115 /* Advance fragment offset. */ 11116 offset += mlen; 11117 11118 /* Advance to location for next header in the buffer. */ 11119 hdr_ptr += hdr_chunk_len; 11120 11121 /* Did we reach the next payload message block? */ 11122 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11123 mp = mp->b_cont; 11124 /* 11125 * Attach the next message block with payload 11126 * data to the multidata message. 11127 */ 11128 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11129 goto pbuf_panic; 11130 pld_ptr = mp->b_rptr; 11131 } 11132 } 11133 11134 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11135 ASSERT(mp->b_wptr == pld_ptr); 11136 11137 /* Update IP statistics */ 11138 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutFragCreates, pkts); 11139 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11140 IP6_STAT_UPDATE(ip6_frag_mdt_pkt_out, pkts); 11141 11142 ire->ire_ob_pkt_count += pkts; 11143 if (ire->ire_ipif != NULL) 11144 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11145 11146 ire->ire_last_used_time = lbolt; 11147 /* Send it down */ 11148 putnext(stq, md_mp); 11149 return; 11150 11151 pbuf_panic: 11152 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11153 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11154 pbuf_idx); 11155 /* NOTREACHED */ 11156 } 11157 11158 /* 11159 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11160 * We have not optimized this in terms of number of mblks 11161 * allocated. For instance, for each fragment sent we always allocate a 11162 * mblk to hold the IPv6 header and fragment header. 11163 * 11164 * Assumes that all the extension headers are contained in the first mblk. 11165 * 11166 * The fragment header is inserted after an hop-by-hop options header 11167 * and after [an optional destinations header followed by] a routing header. 11168 * 11169 * NOTE : This function does not ire_refrele the ire passed in as 11170 * the argument. 11171 */ 11172 void 11173 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11174 int caller, int max_frag) 11175 { 11176 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11177 ip6_t *fip6h; 11178 mblk_t *hmp; 11179 mblk_t *hmp0; 11180 mblk_t *dmp; 11181 ip6_frag_t *fraghdr; 11182 size_t unfragmentable_len; 11183 size_t len; 11184 size_t mlen; 11185 size_t max_chunk; 11186 uint32_t ident; 11187 uint16_t off_flags; 11188 uint16_t offset = 0; 11189 ill_t *ill; 11190 uint8_t nexthdr; 11191 uint_t prev_nexthdr_offset; 11192 uint8_t *ptr; 11193 11194 ASSERT(ire->ire_type == IRE_CACHE); 11195 ill = (ill_t *)ire->ire_stq->q_ptr; 11196 11197 /* 11198 * Determine the length of the unfragmentable portion of this 11199 * datagram. This consists of the IPv6 header, a potential 11200 * hop-by-hop options header, a potential pre-routing-header 11201 * destination options header, and a potential routing header. 11202 */ 11203 nexthdr = ip6h->ip6_nxt; 11204 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11205 ptr = (uint8_t *)&ip6h[1]; 11206 11207 if (nexthdr == IPPROTO_HOPOPTS) { 11208 ip6_hbh_t *hbh_hdr; 11209 uint_t hdr_len; 11210 11211 hbh_hdr = (ip6_hbh_t *)ptr; 11212 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11213 nexthdr = hbh_hdr->ip6h_nxt; 11214 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11215 - (uint8_t *)ip6h; 11216 ptr += hdr_len; 11217 } 11218 if (nexthdr == IPPROTO_DSTOPTS) { 11219 ip6_dest_t *dest_hdr; 11220 uint_t hdr_len; 11221 11222 dest_hdr = (ip6_dest_t *)ptr; 11223 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11224 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11225 nexthdr = dest_hdr->ip6d_nxt; 11226 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11227 - (uint8_t *)ip6h; 11228 ptr += hdr_len; 11229 } 11230 } 11231 if (nexthdr == IPPROTO_ROUTING) { 11232 ip6_rthdr_t *rthdr; 11233 uint_t hdr_len; 11234 11235 rthdr = (ip6_rthdr_t *)ptr; 11236 nexthdr = rthdr->ip6r_nxt; 11237 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11238 - (uint8_t *)ip6h; 11239 hdr_len = 8 * (rthdr->ip6r_len + 1); 11240 ptr += hdr_len; 11241 } 11242 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11243 11244 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11245 sizeof (ip6_frag_t)) & ~7; 11246 11247 /* Check if we can use MDT to send out the frags. */ 11248 ASSERT(!IRE_IS_LOCAL(ire)); 11249 if (ip_multidata_outbound && reachable == 0 && 11250 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11251 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11252 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11253 nexthdr, prev_nexthdr_offset); 11254 return; 11255 } 11256 11257 /* 11258 * Allocate an mblk with enough room for the link-layer 11259 * header, the unfragmentable part of the datagram, and the 11260 * fragment header. This (or a copy) will be used as the 11261 * first mblk for each fragment we send. 11262 */ 11263 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 11264 BPRI_HI); 11265 if (hmp == NULL) { 11266 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11267 freemsg(mp); 11268 return; 11269 } 11270 hmp->b_rptr += ip_wroff_extra; 11271 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11272 11273 fip6h = (ip6_t *)hmp->b_rptr; 11274 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11275 11276 bcopy(ip6h, fip6h, unfragmentable_len); 11277 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11278 11279 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11280 11281 fraghdr->ip6f_nxt = nexthdr; 11282 fraghdr->ip6f_reserved = 0; 11283 fraghdr->ip6f_offlg = 0; 11284 fraghdr->ip6f_ident = htonl(ident); 11285 11286 /* 11287 * len is the total length of the fragmentable data in this 11288 * datagram. For each fragment sent, we will decrement len 11289 * by the amount of fragmentable data sent in that fragment 11290 * until len reaches zero. 11291 */ 11292 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11293 11294 /* 11295 * Move read ptr past unfragmentable portion, we don't want this part 11296 * of the data in our fragments. 11297 */ 11298 mp->b_rptr += unfragmentable_len; 11299 11300 while (len != 0) { 11301 mlen = MIN(len, max_chunk); 11302 len -= mlen; 11303 if (len != 0) { 11304 /* Not last */ 11305 hmp0 = copyb(hmp); 11306 if (hmp0 == NULL) { 11307 freeb(hmp); 11308 freemsg(mp); 11309 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11310 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11311 return; 11312 } 11313 off_flags = IP6F_MORE_FRAG; 11314 } else { 11315 /* Last fragment */ 11316 hmp0 = hmp; 11317 hmp = NULL; 11318 off_flags = 0; 11319 } 11320 fip6h = (ip6_t *)(hmp0->b_rptr); 11321 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11322 11323 fip6h->ip6_plen = htons((uint16_t)(mlen + 11324 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11325 /* 11326 * Note: Optimization alert. 11327 * In IPv6 (and IPv4) protocol header, Fragment Offset 11328 * ("offset") is 13 bits wide and in 8-octet units. 11329 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11330 * it occupies the most significant 13 bits. 11331 * (least significant 13 bits in IPv4). 11332 * We do not do any shifts here. Not shifting is same effect 11333 * as taking offset value in octet units, dividing by 8 and 11334 * then shifting 3 bits left to line it up in place in proper 11335 * place protocol header. 11336 */ 11337 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11338 11339 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11340 /* mp has already been freed by ip_carve_mp() */ 11341 if (hmp != NULL) 11342 freeb(hmp); 11343 freeb(hmp0); 11344 ip1dbg(("ip_carve_mp: failed\n")); 11345 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11346 return; 11347 } 11348 hmp0->b_cont = dmp; 11349 /* Get the priority marking, if any */ 11350 hmp0->b_band = dmp->b_band; 11351 UPDATE_OB_PKT_COUNT(ire); 11352 ire->ire_last_used_time = lbolt; 11353 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11354 caller, NULL); 11355 reachable = 0; /* No need to redo state machine in loop */ 11356 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragCreates); 11357 offset += mlen; 11358 } 11359 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11360 } 11361 11362 /* 11363 * Determine if the ill and multicast aspects of that packets 11364 * "matches" the conn. 11365 */ 11366 boolean_t 11367 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11368 zoneid_t zoneid) 11369 { 11370 ill_t *in_ill; 11371 boolean_t wantpacket = B_TRUE; 11372 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11373 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11374 11375 /* 11376 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11377 * unicast and multicast reception to conn_incoming_ill. 11378 * conn_wantpacket_v6 is called both for unicast and 11379 * multicast. 11380 * 11381 * 1) The unicast copy of the packet can come anywhere in 11382 * the ill group if it is part of the group. Thus, we 11383 * need to check to see whether the ill group matches 11384 * if in_ill is part of a group. 11385 * 11386 * 2) ip_rput does not suppress duplicate multicast packets. 11387 * If there are two interfaces in a ill group and we have 11388 * 2 applications (conns) joined a multicast group G on 11389 * both the interfaces, ilm_lookup_ill filter in ip_rput 11390 * will give us two packets because we join G on both the 11391 * interfaces rather than nominating just one interface 11392 * for receiving multicast like broadcast above. So, 11393 * we have to call ilg_lookup_ill to filter out duplicate 11394 * copies, if ill is part of a group, to supress duplicates. 11395 */ 11396 in_ill = connp->conn_incoming_ill; 11397 if (in_ill != NULL) { 11398 mutex_enter(&connp->conn_lock); 11399 in_ill = connp->conn_incoming_ill; 11400 mutex_enter(&ill->ill_lock); 11401 /* 11402 * No IPMP, and the packet did not arrive on conn_incoming_ill 11403 * OR, IPMP in use and the packet arrived on an IPMP group 11404 * different from the conn_incoming_ill's IPMP group. 11405 * Reject the packet. 11406 */ 11407 if ((in_ill->ill_group == NULL && in_ill != ill) || 11408 (in_ill->ill_group != NULL && 11409 in_ill->ill_group != ill->ill_group)) { 11410 wantpacket = B_FALSE; 11411 } 11412 mutex_exit(&ill->ill_lock); 11413 mutex_exit(&connp->conn_lock); 11414 if (!wantpacket) 11415 return (B_FALSE); 11416 } 11417 11418 if (connp->conn_multi_router) 11419 return (B_TRUE); 11420 11421 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11422 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11423 /* 11424 * Unicast case: we match the conn only if it's in the specified 11425 * zone. 11426 */ 11427 return (connp->conn_zoneid == zoneid); 11428 } 11429 11430 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11431 connp->conn_zoneid == zoneid) { 11432 /* 11433 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11434 * disabled, therefore we don't dispatch the multicast packet to 11435 * the sending zone. 11436 */ 11437 return (B_FALSE); 11438 } 11439 11440 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 11441 connp->conn_zoneid != zoneid) { 11442 /* 11443 * Multicast packet on the loopback interface: we only match 11444 * conns who joined the group in the specified zone. 11445 */ 11446 return (B_FALSE); 11447 } 11448 11449 mutex_enter(&connp->conn_lock); 11450 wantpacket = 11451 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11452 mutex_exit(&connp->conn_lock); 11453 11454 return (wantpacket); 11455 } 11456 11457 11458 /* 11459 * Transmit a packet and update any NUD state based on the flags 11460 * XXX need to "recover" any ip6i_t when doing putq! 11461 * 11462 * NOTE : This function does not ire_refrele the ire passed in as the 11463 * argument. 11464 */ 11465 void 11466 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11467 int caller, ipsec_out_t *io) 11468 { 11469 mblk_t *mp1; 11470 nce_t *nce = ire->ire_nce; 11471 ill_t *ill; 11472 uint64_t delta; 11473 ip6_t *ip6h; 11474 queue_t *stq = ire->ire_stq; 11475 ire_t *ire1 = NULL; 11476 ire_t *save_ire = ire; 11477 boolean_t multirt_send = B_FALSE; 11478 mblk_t *next_mp = NULL; 11479 11480 ip6h = (ip6_t *)mp->b_rptr; 11481 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11482 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11483 ASSERT(nce != NULL); 11484 ASSERT(mp->b_datap->db_type == M_DATA); 11485 ASSERT(stq != NULL); 11486 11487 ill = ire_to_ill(ire); 11488 if (!ill) { 11489 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11490 freemsg(mp); 11491 return; 11492 } 11493 11494 /* 11495 * If a packet is to be sent out an interface that is a 6to4 11496 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11497 * destination, must be checked to have a 6to4 prefix 11498 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11499 * address configured on the sending interface. Otherwise, 11500 * the packet was delivered to this interface in error and the 11501 * packet must be dropped. 11502 */ 11503 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11504 ipif_t *ipif = ill->ill_ipif; 11505 11506 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11507 &ip6h->ip6_dst)) { 11508 if (ip_debug > 2) { 11509 /* ip1dbg */ 11510 pr_addr_dbg("ip_xmit_v6: attempting to " 11511 "send 6to4 addressed IPv6 " 11512 "destination (%s) out the wrong " 11513 "interface.\n", AF_INET6, 11514 &ip6h->ip6_dst); 11515 } 11516 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 11517 freemsg(mp); 11518 return; 11519 } 11520 } 11521 11522 /* Flow-control check has been done in ip_wput_ire_v6 */ 11523 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11524 caller == IP_WSRV || canput(stq->q_next)) { 11525 uint32_t ill_index; 11526 11527 /* 11528 * In most cases, the emission loop below is entered only 11529 * once. Only in the case where the ire holds the 11530 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11531 * flagged ires in the bucket, and send the packet 11532 * through all crossed RTF_MULTIRT routes. 11533 */ 11534 if (ire->ire_flags & RTF_MULTIRT) { 11535 /* 11536 * Multirouting case. The bucket where ire is stored 11537 * probably holds other RTF_MULTIRT flagged ires 11538 * to the destination. In this call to ip_xmit_v6, 11539 * we attempt to send the packet through all 11540 * those ires. Thus, we first ensure that ire is the 11541 * first RTF_MULTIRT ire in the bucket, 11542 * before walking the ire list. 11543 */ 11544 ire_t *first_ire; 11545 irb_t *irb = ire->ire_bucket; 11546 ASSERT(irb != NULL); 11547 multirt_send = B_TRUE; 11548 11549 /* Make sure we do not omit any multiroute ire. */ 11550 IRB_REFHOLD(irb); 11551 for (first_ire = irb->irb_ire; 11552 first_ire != NULL; 11553 first_ire = first_ire->ire_next) { 11554 if ((first_ire->ire_flags & RTF_MULTIRT) && 11555 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11556 &ire->ire_addr_v6)) && 11557 !(first_ire->ire_marks & 11558 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 11559 break; 11560 } 11561 11562 if ((first_ire != NULL) && (first_ire != ire)) { 11563 IRE_REFHOLD(first_ire); 11564 /* ire will be released by the caller */ 11565 ire = first_ire; 11566 nce = ire->ire_nce; 11567 stq = ire->ire_stq; 11568 ill = ire_to_ill(ire); 11569 } 11570 IRB_REFRELE(irb); 11571 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11572 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11573 ILL_MDT_USABLE(ill)) { 11574 /* 11575 * This tcp connection was marked as MDT-capable, but 11576 * it has been turned off due changes in the interface. 11577 * Now that the interface support is back, turn it on 11578 * by notifying tcp. We don't directly modify tcp_mdt, 11579 * since we leave all the details to the tcp code that 11580 * knows better. 11581 */ 11582 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11583 11584 if (mdimp == NULL) { 11585 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11586 "connp %p (ENOMEM)\n", (void *)connp)); 11587 } else { 11588 CONN_INC_REF(connp); 11589 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 11590 connp, SQTAG_TCP_INPUT_MCTL); 11591 } 11592 } 11593 11594 do { 11595 boolean_t qos_done = B_FALSE; 11596 11597 if (multirt_send) { 11598 irb_t *irb; 11599 /* 11600 * We are in a multiple send case, need to get 11601 * the next ire and make a duplicate of the 11602 * packet. ire1 holds here the next ire to 11603 * process in the bucket. If multirouting is 11604 * expected, any non-RTF_MULTIRT ire that has 11605 * the right destination address is ignored. 11606 */ 11607 irb = ire->ire_bucket; 11608 ASSERT(irb != NULL); 11609 11610 IRB_REFHOLD(irb); 11611 for (ire1 = ire->ire_next; 11612 ire1 != NULL; 11613 ire1 = ire1->ire_next) { 11614 if (!(ire1->ire_flags & RTF_MULTIRT)) 11615 continue; 11616 if (!IN6_ARE_ADDR_EQUAL( 11617 &ire1->ire_addr_v6, 11618 &ire->ire_addr_v6)) 11619 continue; 11620 if (ire1->ire_marks & 11621 (IRE_MARK_CONDEMNED| 11622 IRE_MARK_HIDDEN)) 11623 continue; 11624 11625 /* Got one */ 11626 if (ire1 != save_ire) { 11627 IRE_REFHOLD(ire1); 11628 } 11629 break; 11630 } 11631 IRB_REFRELE(irb); 11632 11633 if (ire1 != NULL) { 11634 next_mp = copyb(mp); 11635 if ((next_mp == NULL) || 11636 ((mp->b_cont != NULL) && 11637 ((next_mp->b_cont = 11638 dupmsg(mp->b_cont)) == 11639 NULL))) { 11640 freemsg(next_mp); 11641 next_mp = NULL; 11642 ire_refrele(ire1); 11643 ire1 = NULL; 11644 } 11645 } 11646 11647 /* Last multiroute ire; don't loop anymore. */ 11648 if (ire1 == NULL) { 11649 multirt_send = B_FALSE; 11650 } 11651 } 11652 11653 ill_index = 11654 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11655 11656 /* 11657 * Check for fastpath, we need to hold nce_lock to 11658 * prevent fastpath update from chaining nce_fp_mp. 11659 */ 11660 mutex_enter(&nce->nce_lock); 11661 if ((mp1 = nce->nce_fp_mp) != NULL) { 11662 uint32_t hlen; 11663 uchar_t *rptr; 11664 11665 /* Initiate IPPF processing */ 11666 if (IP6_OUT_IPP(flags)) { 11667 /* 11668 * We have to release the nce lock since 11669 * IPPF components use 11670 * ill_lookup_on_ifindex(), 11671 * which takes the ill_g_lock and the 11672 * ill_lock locks. 11673 */ 11674 mutex_exit(&nce->nce_lock); 11675 ip_process(IPP_LOCAL_OUT, &mp, 11676 ill_index); 11677 if (mp == NULL) { 11678 BUMP_MIB( 11679 ill->ill_ip6_mib, 11680 ipv6OutDiscards); 11681 if (next_mp != NULL) 11682 freemsg(next_mp); 11683 if (ire != save_ire) { 11684 ire_refrele(ire); 11685 } 11686 return; 11687 } 11688 mutex_enter(&nce->nce_lock); 11689 if ((mp1 = nce->nce_fp_mp) == NULL) { 11690 /* 11691 * Probably disappeared during 11692 * IPQoS processing. 11693 */ 11694 qos_done = B_TRUE; 11695 goto prepend_unitdata; 11696 } 11697 } 11698 hlen = MBLKL(mp1); 11699 rptr = mp->b_rptr - hlen; 11700 /* 11701 * make sure there is room for the fastpath 11702 * datalink header 11703 */ 11704 if (rptr < mp->b_datap->db_base) { 11705 mp1 = copyb(mp1); 11706 if (mp1 == NULL) { 11707 mutex_exit(&nce->nce_lock); 11708 BUMP_MIB(ill->ill_ip6_mib, 11709 ipv6OutDiscards); 11710 freemsg(mp); 11711 if (next_mp != NULL) 11712 freemsg(next_mp); 11713 if (ire != save_ire) { 11714 ire_refrele(ire); 11715 } 11716 return; 11717 } 11718 mp1->b_cont = mp; 11719 11720 /* Get the priority marking, if any */ 11721 mp1->b_band = mp->b_band; 11722 mp = mp1; 11723 } else { 11724 mp->b_rptr = rptr; 11725 /* 11726 * fastpath - pre-pend datalink 11727 * header 11728 */ 11729 bcopy(mp1->b_rptr, rptr, hlen); 11730 } 11731 11732 mutex_exit(&nce->nce_lock); 11733 11734 } else { 11735 prepend_unitdata: 11736 mutex_exit(&nce->nce_lock); 11737 mp1 = nce->nce_res_mp; 11738 if (mp1 == NULL) { 11739 ip1dbg(("ip_xmit_v6: No resolution " 11740 "block ire = %p\n", (void *)ire)); 11741 freemsg(mp); 11742 if (next_mp != NULL) 11743 freemsg(next_mp); 11744 if (ire != save_ire) { 11745 ire_refrele(ire); 11746 } 11747 return; 11748 } 11749 /* 11750 * Prepend the DL_UNITDATA_REQ. 11751 */ 11752 mp1 = copyb(mp1); 11753 if (mp1 == NULL) { 11754 BUMP_MIB(ill->ill_ip6_mib, 11755 ipv6OutDiscards); 11756 freemsg(mp); 11757 if (next_mp != NULL) 11758 freemsg(next_mp); 11759 if (ire != save_ire) { 11760 ire_refrele(ire); 11761 } 11762 return; 11763 } 11764 mp1->b_cont = mp; 11765 mp = mp1; 11766 /* 11767 * Initiate IPPF processing, if it is 11768 * already done, bypass. 11769 */ 11770 if (!qos_done && IP6_OUT_IPP(flags)) { 11771 ip_process(IPP_LOCAL_OUT, &mp, 11772 ill_index); 11773 if (mp == NULL) { 11774 BUMP_MIB(ill->ill_ip6_mib, 11775 ipv6OutDiscards); 11776 if (next_mp != NULL) 11777 freemsg(next_mp); 11778 if (ire != save_ire) { 11779 ire_refrele(ire); 11780 } 11781 return; 11782 } 11783 } 11784 } 11785 11786 /* 11787 * Update ire counters; for save_ire, this has been 11788 * done by the caller. 11789 */ 11790 if (ire != save_ire) { 11791 UPDATE_OB_PKT_COUNT(ire); 11792 ire->ire_last_used_time = lbolt; 11793 } 11794 11795 /* 11796 * Send it down. XXX Do we want to flow control AH/ESP 11797 * packets that carry TCP payloads? We don't flow 11798 * control TCP packets, but we should also not 11799 * flow-control TCP packets that have been protected. 11800 * We don't have an easy way to find out if an AH/ESP 11801 * packet was originally TCP or not currently. 11802 */ 11803 if (io == NULL) { 11804 putnext(stq, mp); 11805 } else { 11806 /* 11807 * Safety Pup says: make sure this is 11808 * going to the right interface! 11809 */ 11810 if (io->ipsec_out_capab_ill_index != 11811 ill_index) { 11812 /* IPsec kstats: bump lose counter */ 11813 freemsg(mp1); 11814 } else { 11815 ipsec_hw_putnext(stq, mp); 11816 } 11817 } 11818 11819 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 11820 if (ire != save_ire) { 11821 ire_refrele(ire); 11822 } 11823 if (multirt_send) { 11824 ASSERT(ire1 != NULL); 11825 /* 11826 * Proceed with the next RTF_MULTIRT 11827 * ire, also set up the send-to queue 11828 * accordingly. 11829 */ 11830 ire = ire1; 11831 ire1 = NULL; 11832 stq = ire->ire_stq; 11833 nce = ire->ire_nce; 11834 ill = ire_to_ill(ire); 11835 mp = next_mp; 11836 next_mp = NULL; 11837 continue; 11838 } 11839 ASSERT(next_mp == NULL); 11840 ASSERT(ire1 == NULL); 11841 return; 11842 } 11843 11844 ASSERT(nce->nce_state != ND_INCOMPLETE); 11845 11846 /* 11847 * Check for upper layer advice 11848 */ 11849 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 11850 /* 11851 * It should be o.k. to check the state without 11852 * a lock here, at most we lose an advice. 11853 */ 11854 nce->nce_last = TICK_TO_MSEC(lbolt64); 11855 if (nce->nce_state != ND_REACHABLE) { 11856 11857 mutex_enter(&nce->nce_lock); 11858 nce->nce_state = ND_REACHABLE; 11859 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 11860 mutex_exit(&nce->nce_lock); 11861 (void) untimeout(nce->nce_timeout_id); 11862 if (ip_debug > 2) { 11863 /* ip1dbg */ 11864 pr_addr_dbg("ip_xmit_v6: state" 11865 " for %s changed to" 11866 " REACHABLE\n", AF_INET6, 11867 &ire->ire_addr_v6); 11868 } 11869 } 11870 if (ire != save_ire) { 11871 ire_refrele(ire); 11872 } 11873 if (multirt_send) { 11874 ASSERT(ire1 != NULL); 11875 /* 11876 * Proceed with the next RTF_MULTIRT 11877 * ire, also set up the send-to queue 11878 * accordingly. 11879 */ 11880 ire = ire1; 11881 ire1 = NULL; 11882 stq = ire->ire_stq; 11883 nce = ire->ire_nce; 11884 ill = ire_to_ill(ire); 11885 mp = next_mp; 11886 next_mp = NULL; 11887 continue; 11888 } 11889 ASSERT(next_mp == NULL); 11890 ASSERT(ire1 == NULL); 11891 return; 11892 } 11893 11894 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 11895 ip1dbg(("ip_xmit_v6: delta = %" PRId64 11896 " ill_reachable_time = %d \n", delta, 11897 ill->ill_reachable_time)); 11898 if (delta > (uint64_t)ill->ill_reachable_time) { 11899 nce = ire->ire_nce; 11900 mutex_enter(&nce->nce_lock); 11901 switch (nce->nce_state) { 11902 case ND_REACHABLE: 11903 case ND_STALE: 11904 /* 11905 * ND_REACHABLE is identical to 11906 * ND_STALE in this specific case. If 11907 * reachable time has expired for this 11908 * neighbor (delta is greater than 11909 * reachable time), conceptually, the 11910 * neighbor cache is no longer in 11911 * REACHABLE state, but already in 11912 * STALE state. So the correct 11913 * transition here is to ND_DELAY. 11914 */ 11915 nce->nce_state = ND_DELAY; 11916 mutex_exit(&nce->nce_lock); 11917 NDP_RESTART_TIMER(nce, 11918 delay_first_probe_time); 11919 if (ip_debug > 3) { 11920 /* ip2dbg */ 11921 pr_addr_dbg("ip_xmit_v6: state" 11922 " for %s changed to" 11923 " DELAY\n", AF_INET6, 11924 &ire->ire_addr_v6); 11925 } 11926 break; 11927 case ND_DELAY: 11928 case ND_PROBE: 11929 mutex_exit(&nce->nce_lock); 11930 /* Timers have already started */ 11931 break; 11932 case ND_UNREACHABLE: 11933 /* 11934 * ndp timer has detected that this nce 11935 * is unreachable and initiated deleting 11936 * this nce and all its associated IREs. 11937 * This is a race where we found the 11938 * ire before it was deleted and have 11939 * just sent out a packet using this 11940 * unreachable nce. 11941 */ 11942 mutex_exit(&nce->nce_lock); 11943 break; 11944 default: 11945 ASSERT(0); 11946 } 11947 } 11948 11949 if (multirt_send) { 11950 ASSERT(ire1 != NULL); 11951 /* 11952 * Proceed with the next RTF_MULTIRT ire, 11953 * Also set up the send-to queue accordingly. 11954 */ 11955 if (ire != save_ire) { 11956 ire_refrele(ire); 11957 } 11958 ire = ire1; 11959 ire1 = NULL; 11960 stq = ire->ire_stq; 11961 nce = ire->ire_nce; 11962 ill = ire_to_ill(ire); 11963 mp = next_mp; 11964 next_mp = NULL; 11965 } 11966 } while (multirt_send); 11967 /* 11968 * In the multirouting case, release the last ire used for 11969 * emission. save_ire will be released by the caller. 11970 */ 11971 if (ire != save_ire) { 11972 ire_refrele(ire); 11973 } 11974 } else { 11975 /* 11976 * Queue packet if we have an conn to give back pressure. 11977 * We can't queue packets intended for hardware acceleration 11978 * since we've tossed that state already. If the packet is 11979 * being fed back from ire_send_v6, we don't know the 11980 * position in the queue to enqueue the packet and we discard 11981 * the packet. 11982 */ 11983 if (ip_output_queue && (connp != NULL) && (io == NULL) && 11984 (caller != IRE_SEND)) { 11985 if (caller == IP_WSRV) { 11986 connp->conn_did_putbq = 1; 11987 (void) putbq(connp->conn_wq, mp); 11988 conn_drain_insert(connp); 11989 /* 11990 * caller == IP_WSRV implies we are 11991 * the service thread, and the 11992 * queue is already noenabled. 11993 * The check for canput and 11994 * the putbq is not atomic. 11995 * So we need to check again. 11996 */ 11997 if (canput(stq->q_next)) 11998 connp->conn_did_putbq = 0; 11999 } else { 12000 (void) putq(connp->conn_wq, mp); 12001 } 12002 return; 12003 } 12004 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 12005 freemsg(mp); 12006 return; 12007 } 12008 } 12009 12010 /* 12011 * pr_addr_dbg function provides the needed buffer space to call 12012 * inet_ntop() function's 3rd argument. This function should be 12013 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12014 * stack buffer space in it's own stack frame. This function uses 12015 * a buffer from it's own stack and prints the information. 12016 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12017 * 12018 * Note: This function can call inet_ntop() once. 12019 */ 12020 void 12021 pr_addr_dbg(char *fmt1, int af, const void *addr) 12022 { 12023 char buf[INET6_ADDRSTRLEN]; 12024 12025 if (fmt1 == NULL) { 12026 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12027 return; 12028 } 12029 12030 /* 12031 * This does not compare debug level and just prints 12032 * out. Thus it is the responsibility of the caller 12033 * to check the appropriate debug-level before calling 12034 * this function. 12035 */ 12036 if (ip_debug > 0) { 12037 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12038 } 12039 12040 12041 } 12042 12043 12044 /* 12045 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12046 * if needed and extension headers) that will be needed based on the 12047 * ip6_pkt_t structure passed by the caller. 12048 * 12049 * The returned length does not include the length of the upper level 12050 * protocol (ULP) header. 12051 */ 12052 int 12053 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12054 { 12055 int len; 12056 12057 len = IPV6_HDR_LEN; 12058 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12059 len += sizeof (ip6i_t); 12060 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12061 ASSERT(ipp->ipp_hopoptslen != 0); 12062 len += ipp->ipp_hopoptslen; 12063 } 12064 if (ipp->ipp_fields & IPPF_RTHDR) { 12065 ASSERT(ipp->ipp_rthdrlen != 0); 12066 len += ipp->ipp_rthdrlen; 12067 } 12068 /* 12069 * En-route destination options 12070 * Only do them if there's a routing header as well 12071 */ 12072 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12073 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12074 ASSERT(ipp->ipp_rtdstoptslen != 0); 12075 len += ipp->ipp_rtdstoptslen; 12076 } 12077 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12078 ASSERT(ipp->ipp_dstoptslen != 0); 12079 len += ipp->ipp_dstoptslen; 12080 } 12081 return (len); 12082 } 12083 12084 /* 12085 * All-purpose routine to build a header chain of an IPv6 header 12086 * followed by any required extension headers and a proto header, 12087 * preceeded (where necessary) by an ip6i_t private header. 12088 * 12089 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12090 * will be filled in appropriately. 12091 * Thus the caller must fill in the rest of the IPv6 header, such as 12092 * traffic class/flowid, source address (if not set here), hoplimit (if not 12093 * set here) and destination address. 12094 * 12095 * The extension headers and ip6i_t header will all be fully filled in. 12096 */ 12097 void 12098 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12099 ip6_pkt_t *ipp, uint8_t protocol) 12100 { 12101 uint8_t *nxthdr_ptr; 12102 uint8_t *cp; 12103 ip6i_t *ip6i; 12104 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12105 12106 /* 12107 * If sending private ip6i_t header down (checksum info, nexthop, 12108 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12109 * then fill it in. (The checksum info will be filled in by icmp). 12110 */ 12111 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12112 ip6i = (ip6i_t *)ip6h; 12113 ip6h = (ip6_t *)&ip6i[1]; 12114 12115 ip6i->ip6i_flags = 0; 12116 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12117 if (ipp->ipp_fields & IPPF_IFINDEX || 12118 ipp->ipp_fields & IPPF_SCOPE_ID) { 12119 ASSERT(ipp->ipp_ifindex != 0); 12120 ip6i->ip6i_flags |= IP6I_IFINDEX; 12121 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12122 } 12123 if (ipp->ipp_fields & IPPF_ADDR) { 12124 /* 12125 * Enable per-packet source address verification if 12126 * IPV6_PKTINFO specified the source address. 12127 * ip6_src is set in the transport's _wput function. 12128 */ 12129 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12130 &ipp->ipp_addr)); 12131 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12132 } 12133 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12134 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12135 /* 12136 * We need to set this flag so that IP doesn't 12137 * rewrite the IPv6 header's hoplimit with the 12138 * current default value. 12139 */ 12140 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12141 } 12142 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12143 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12144 &ipp->ipp_nexthop)); 12145 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12146 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12147 } 12148 /* 12149 * tell IP this is an ip6i_t private header 12150 */ 12151 ip6i->ip6i_nxt = IPPROTO_RAW; 12152 } 12153 /* Initialize IPv6 header */ 12154 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12155 if (ipp->ipp_fields & IPPF_TCLASS) { 12156 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12157 (ipp->ipp_tclass << 20); 12158 } 12159 if (ipp->ipp_fields & IPPF_ADDR) 12160 ip6h->ip6_src = ipp->ipp_addr; 12161 12162 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12163 cp = (uint8_t *)&ip6h[1]; 12164 /* 12165 * Here's where we have to start stringing together 12166 * any extension headers in the right order: 12167 * Hop-by-hop, destination, routing, and final destination opts. 12168 */ 12169 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12170 /* Hop-by-hop options */ 12171 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12172 12173 *nxthdr_ptr = IPPROTO_HOPOPTS; 12174 nxthdr_ptr = &hbh->ip6h_nxt; 12175 12176 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12177 cp += ipp->ipp_hopoptslen; 12178 } 12179 /* 12180 * En-route destination options 12181 * Only do them if there's a routing header as well 12182 */ 12183 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12184 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12185 ip6_dest_t *dst = (ip6_dest_t *)cp; 12186 12187 *nxthdr_ptr = IPPROTO_DSTOPTS; 12188 nxthdr_ptr = &dst->ip6d_nxt; 12189 12190 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12191 cp += ipp->ipp_rtdstoptslen; 12192 } 12193 /* 12194 * Routing header next 12195 */ 12196 if (ipp->ipp_fields & IPPF_RTHDR) { 12197 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12198 12199 *nxthdr_ptr = IPPROTO_ROUTING; 12200 nxthdr_ptr = &rt->ip6r_nxt; 12201 12202 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12203 cp += ipp->ipp_rthdrlen; 12204 } 12205 /* 12206 * Do ultimate destination options 12207 */ 12208 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12209 ip6_dest_t *dest = (ip6_dest_t *)cp; 12210 12211 *nxthdr_ptr = IPPROTO_DSTOPTS; 12212 nxthdr_ptr = &dest->ip6d_nxt; 12213 12214 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12215 cp += ipp->ipp_dstoptslen; 12216 } 12217 /* 12218 * Now set the last header pointer to the proto passed in 12219 */ 12220 *nxthdr_ptr = protocol; 12221 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12222 } 12223 12224 /* 12225 * Return a pointer to the routing header extension header 12226 * in the IPv6 header(s) chain passed in. 12227 * If none found, return NULL 12228 * Assumes that all extension headers are in same mblk as the v6 header 12229 */ 12230 ip6_rthdr_t * 12231 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12232 { 12233 ip6_dest_t *desthdr; 12234 ip6_frag_t *fraghdr; 12235 uint_t hdrlen; 12236 uint8_t nexthdr; 12237 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12238 12239 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12240 return ((ip6_rthdr_t *)ptr); 12241 12242 /* 12243 * The routing header will precede all extension headers 12244 * other than the hop-by-hop and destination options 12245 * extension headers, so if we see anything other than those, 12246 * we're done and didn't find it. 12247 * We could see a destination options header alone but no 12248 * routing header, in which case we'll return NULL as soon as 12249 * we see anything after that. 12250 * Hop-by-hop and destination option headers are identical, 12251 * so we can use either one we want as a template. 12252 */ 12253 nexthdr = ip6h->ip6_nxt; 12254 while (ptr < endptr) { 12255 /* Is there enough left for len + nexthdr? */ 12256 if (ptr + MIN_EHDR_LEN > endptr) 12257 return (NULL); 12258 12259 switch (nexthdr) { 12260 case IPPROTO_HOPOPTS: 12261 case IPPROTO_DSTOPTS: 12262 /* Assumes the headers are identical for hbh and dst */ 12263 desthdr = (ip6_dest_t *)ptr; 12264 hdrlen = 8 * (desthdr->ip6d_len + 1); 12265 nexthdr = desthdr->ip6d_nxt; 12266 break; 12267 12268 case IPPROTO_ROUTING: 12269 return ((ip6_rthdr_t *)ptr); 12270 12271 case IPPROTO_FRAGMENT: 12272 fraghdr = (ip6_frag_t *)ptr; 12273 hdrlen = sizeof (ip6_frag_t); 12274 nexthdr = fraghdr->ip6f_nxt; 12275 break; 12276 12277 default: 12278 return (NULL); 12279 } 12280 ptr += hdrlen; 12281 } 12282 return (NULL); 12283 } 12284 12285 /* 12286 * Called for source-routed packets originating on this node. 12287 * Manipulates the original routing header by moving every entry up 12288 * one slot, placing the first entry in the v6 header's v6_dst field, 12289 * and placing the ultimate destination in the routing header's last 12290 * slot. 12291 * 12292 * Returns the checksum diference between the ultimate destination 12293 * (last hop in the routing header when the packet is sent) and 12294 * the first hop (ip6_dst when the packet is sent) 12295 */ 12296 uint32_t 12297 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 12298 { 12299 uint_t numaddr; 12300 uint_t i; 12301 in6_addr_t *addrptr; 12302 in6_addr_t tmp; 12303 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12304 uint32_t cksm; 12305 uint32_t addrsum = 0; 12306 uint16_t *ptr; 12307 12308 /* 12309 * Perform any processing needed for source routing. 12310 * We know that all extension headers will be in the same mblk 12311 * as the IPv6 header. 12312 */ 12313 12314 /* 12315 * If no segments left in header, or the header length field is zero, 12316 * don't move hop addresses around; 12317 * Checksum difference is zero. 12318 */ 12319 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12320 return (0); 12321 12322 ptr = (uint16_t *)&ip6h->ip6_dst; 12323 cksm = 0; 12324 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12325 cksm += ptr[i]; 12326 } 12327 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12328 12329 /* 12330 * Here's where the fun begins - we have to 12331 * move all addresses up one spot, take the 12332 * first hop and make it our first ip6_dst, 12333 * and place the ultimate destination in the 12334 * newly-opened last slot. 12335 */ 12336 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12337 numaddr = rthdr->ip6r0_len / 2; 12338 tmp = *addrptr; 12339 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12340 *addrptr = addrptr[1]; 12341 } 12342 *addrptr = ip6h->ip6_dst; 12343 ip6h->ip6_dst = tmp; 12344 12345 /* 12346 * From the checksummed ultimate destination subtract the checksummed 12347 * current ip6_dst (the first hop address). Return that number. 12348 * (In the v4 case, the second part of this is done in each routine 12349 * that calls ip_massage_options(). We do it all in this one place 12350 * for v6). 12351 */ 12352 ptr = (uint16_t *)&ip6h->ip6_dst; 12353 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12354 addrsum += ptr[i]; 12355 } 12356 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12357 if ((int)cksm < 0) 12358 cksm--; 12359 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12360 12361 return (cksm); 12362 } 12363 12364 /* 12365 * See if the upper-level protocol indicated by 'proto' will be able 12366 * to do something with an ICMP_FRAGMENTATION_NEEDED (IPv4) or 12367 * ICMP6_PACKET_TOO_BIG (IPv6). 12368 */ 12369 static boolean_t 12370 ip_ulp_cando_pkt2big(int proto) 12371 { 12372 /* 12373 * For now, only TCP can handle this. 12374 * Tunnels may be able to also, but since tun isn't working over 12375 * IPv6 yet, don't worry about it for now. 12376 */ 12377 return (proto == IPPROTO_TCP); 12378 } 12379 12380 12381 /* 12382 * Propagate a multicast group membership operation (join/leave) (*fn) on 12383 * all interfaces crossed by the related multirt routes. 12384 * The call is considered successful if the operation succeeds 12385 * on at least one interface. 12386 * The function is called if the destination address in the packet to send 12387 * is multirouted. 12388 */ 12389 int 12390 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12391 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12392 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12393 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12394 { 12395 ire_t *ire_gw; 12396 irb_t *irb; 12397 int index, error = 0; 12398 opt_restart_t *or; 12399 12400 irb = ire->ire_bucket; 12401 ASSERT(irb != NULL); 12402 12403 ASSERT(DB_TYPE(first_mp) == M_CTL); 12404 or = (opt_restart_t *)first_mp->b_rptr; 12405 12406 IRB_REFHOLD(irb); 12407 for (; ire != NULL; ire = ire->ire_next) { 12408 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12409 continue; 12410 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12411 continue; 12412 12413 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12414 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, 12415 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 12416 /* No resolver exists for the gateway; skip this ire. */ 12417 if (ire_gw == NULL) 12418 continue; 12419 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12420 /* 12421 * A resolver exists: we can get the interface on which we have 12422 * to apply the operation. 12423 */ 12424 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12425 first_mp); 12426 if (error == 0) 12427 or->or_private = CGTP_MCAST_SUCCESS; 12428 12429 if (ip_debug > 0) { 12430 ulong_t off; 12431 char *ksym; 12432 12433 ksym = kobj_getsymname((uintptr_t)fn, &off); 12434 ip2dbg(("ip_multirt_apply_membership_v6: " 12435 "called %s, multirt group 0x%08x via itf 0x%08x, " 12436 "error %d [success %u]\n", 12437 ksym ? ksym : "?", 12438 ntohl(V4_PART_OF_V6((*v6grp))), 12439 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12440 error, or->or_private)); 12441 } 12442 12443 ire_refrele(ire_gw); 12444 if (error == EINPROGRESS) { 12445 IRB_REFRELE(irb); 12446 return (error); 12447 } 12448 } 12449 IRB_REFRELE(irb); 12450 /* 12451 * Consider the call as successful if we succeeded on at least 12452 * one interface. Otherwise, return the last encountered error. 12453 */ 12454 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12455 } 12456 12457 void 12458 ip6_kstat_init(void) 12459 { 12460 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 12461 "net", KSTAT_TYPE_NAMED, 12462 sizeof (ip6_statistics) / sizeof (kstat_named_t), 12463 KSTAT_FLAG_VIRTUAL)) != NULL) { 12464 ip6_kstat->ks_data = &ip6_statistics; 12465 kstat_install(ip6_kstat); 12466 } 12467 } 12468 12469 /* 12470 * The following two functions set and get the value for the 12471 * IPV6_SRC_PREFERENCES socket option. 12472 */ 12473 int 12474 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12475 { 12476 /* 12477 * We only support preferences that are covered by 12478 * IPV6_PREFER_SRC_MASK. 12479 */ 12480 if (prefs & ~IPV6_PREFER_SRC_MASK) 12481 return (EINVAL); 12482 12483 /* 12484 * Look for conflicting preferences or default preferences. If 12485 * both bits of a related pair are clear, the application wants the 12486 * system's default value for that pair. Both bits in a pair can't 12487 * be set. 12488 */ 12489 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12490 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12491 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12492 IPV6_PREFER_SRC_MIPMASK) { 12493 return (EINVAL); 12494 } 12495 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12496 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12497 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12498 IPV6_PREFER_SRC_TMPMASK) { 12499 return (EINVAL); 12500 } 12501 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12502 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12503 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12504 IPV6_PREFER_SRC_CGAMASK) { 12505 return (EINVAL); 12506 } 12507 12508 connp->conn_src_preferences = prefs; 12509 return (0); 12510 } 12511 12512 size_t 12513 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12514 { 12515 *val = connp->conn_src_preferences; 12516 return (sizeof (connp->conn_src_preferences)); 12517 } 12518 12519 int 12520 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 12521 { 12522 ill_t *ill; 12523 ire_t *ire; 12524 int error; 12525 12526 /* 12527 * Verify the source address and ifindex. Privileged users can use 12528 * any source address. For ancillary data the source address is 12529 * checked in ip_wput_v6. 12530 */ 12531 if (pkti->ipi6_ifindex != 0) { 12532 ASSERT(connp != NULL); 12533 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 12534 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 12535 if (ill == NULL) { 12536 /* 12537 * We just want to know if the interface exists, we 12538 * don't really care about the ill pointer itself. 12539 */ 12540 if (error != EINPROGRESS) 12541 return (error); 12542 error = 0; /* Ensure we don't use it below */ 12543 } else { 12544 ill_refrele(ill); 12545 } 12546 } 12547 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12548 secpolicy_net_rawaccess(cr) != 0) { 12549 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12550 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12551 connp->conn_zoneid, MATCH_IRE_TYPE); 12552 if (ire != NULL) 12553 ire_refrele(ire); 12554 else 12555 return (ENXIO); 12556 } 12557 return (0); 12558 } 12559 12560 /* 12561 * Get the size of the IP options (including the IP headers size) 12562 * without including the AH header's size. If till_ah is B_FALSE, 12563 * and if AH header is present, dest options beyond AH header will 12564 * also be included in the returned size. 12565 */ 12566 int 12567 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12568 { 12569 ip6_t *ip6h; 12570 uint8_t nexthdr; 12571 uint8_t *whereptr; 12572 ip6_hbh_t *hbhhdr; 12573 ip6_dest_t *dsthdr; 12574 ip6_rthdr_t *rthdr; 12575 int ehdrlen; 12576 int size; 12577 ah_t *ah; 12578 12579 ip6h = (ip6_t *)mp->b_rptr; 12580 size = IPV6_HDR_LEN; 12581 nexthdr = ip6h->ip6_nxt; 12582 whereptr = (uint8_t *)&ip6h[1]; 12583 for (;;) { 12584 /* Assume IP has already stripped it */ 12585 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12586 switch (nexthdr) { 12587 case IPPROTO_HOPOPTS: 12588 hbhhdr = (ip6_hbh_t *)whereptr; 12589 nexthdr = hbhhdr->ip6h_nxt; 12590 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12591 break; 12592 case IPPROTO_DSTOPTS: 12593 dsthdr = (ip6_dest_t *)whereptr; 12594 nexthdr = dsthdr->ip6d_nxt; 12595 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12596 break; 12597 case IPPROTO_ROUTING: 12598 rthdr = (ip6_rthdr_t *)whereptr; 12599 nexthdr = rthdr->ip6r_nxt; 12600 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12601 break; 12602 default : 12603 if (till_ah) { 12604 ASSERT(nexthdr == IPPROTO_AH); 12605 return (size); 12606 } 12607 /* 12608 * If we don't have a AH header to traverse, 12609 * return now. This happens normally for 12610 * outbound datagrams where we have not inserted 12611 * the AH header. 12612 */ 12613 if (nexthdr != IPPROTO_AH) { 12614 return (size); 12615 } 12616 12617 /* 12618 * We don't include the AH header's size 12619 * to be symmetrical with other cases where 12620 * we either don't have a AH header (outbound) 12621 * or peek into the AH header yet (inbound and 12622 * not pulled up yet). 12623 */ 12624 ah = (ah_t *)whereptr; 12625 nexthdr = ah->ah_nexthdr; 12626 ehdrlen = (ah->ah_length << 2) + 8; 12627 12628 if (nexthdr == IPPROTO_DSTOPTS) { 12629 if (whereptr + ehdrlen >= mp->b_wptr) { 12630 /* 12631 * The destination options header 12632 * is not part of the first mblk. 12633 */ 12634 whereptr = mp->b_cont->b_rptr; 12635 } else { 12636 whereptr += ehdrlen; 12637 } 12638 12639 dsthdr = (ip6_dest_t *)whereptr; 12640 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12641 size += ehdrlen; 12642 } 12643 return (size); 12644 } 12645 whereptr += ehdrlen; 12646 size += ehdrlen; 12647 } 12648 } 12649