1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/tiuser.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/kobj.h> 47 #include <sys/zone.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_arp.h> 60 #include <net/if_types.h> 61 #include <net/route.h> 62 #include <net/if_dl.h> 63 #include <sys/sockio.h> 64 #include <netinet/in.h> 65 #include <netinet/ip6.h> 66 #include <netinet/icmp6.h> 67 #include <netinet/sctp.h> 68 69 #include <inet/common.h> 70 #include <inet/mi.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 #include <inet/snmpcom.h> 75 76 #include <inet/ip.h> 77 #include <inet/ip_impl.h> 78 #include <inet/ip6.h> 79 #include <inet/ip6_asp.h> 80 #include <inet/tcp.h> 81 #include <inet/tcp_impl.h> 82 #include <inet/udp_impl.h> 83 #include <inet/ipp_common.h> 84 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_rts.h> 89 #include <inet/optcom.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/tun.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/multidata.h> 98 #include <sys/pattr.h> 99 #include <inet/ipclassifier.h> 100 #include <inet/ipsecah.h> 101 #include <inet/udp_impl.h> 102 #include <sys/squeue.h> 103 104 extern squeue_func_t ip_input_proc; 105 106 /* 107 * IP statistics. 108 */ 109 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 110 #define IP6_STAT_UPDATE(x, n) (ip6_statistics.x.value.ui64 += (n)) 111 112 typedef struct ip6_stat { 113 kstat_named_t ip6_udp_fast_path; 114 kstat_named_t ip6_udp_slow_path; 115 kstat_named_t ip6_udp_fannorm; 116 kstat_named_t ip6_udp_fanmb; 117 kstat_named_t ip6_out_sw_cksum; 118 kstat_named_t ip6_in_sw_cksum; 119 kstat_named_t ip6_tcp_in_full_hw_cksum_err; 120 kstat_named_t ip6_tcp_in_part_hw_cksum_err; 121 kstat_named_t ip6_tcp_in_sw_cksum_err; 122 kstat_named_t ip6_tcp_out_sw_cksum_bytes; 123 kstat_named_t ip6_udp_in_full_hw_cksum_err; 124 kstat_named_t ip6_udp_in_part_hw_cksum_err; 125 kstat_named_t ip6_udp_in_sw_cksum_err; 126 kstat_named_t ip6_udp_out_sw_cksum_bytes; 127 kstat_named_t ip6_frag_mdt_pkt_out; 128 kstat_named_t ip6_frag_mdt_discarded; 129 kstat_named_t ip6_frag_mdt_allocfail; 130 kstat_named_t ip6_frag_mdt_addpdescfail; 131 kstat_named_t ip6_frag_mdt_allocd; 132 } ip6_stat_t; 133 134 static ip6_stat_t ip6_statistics = { 135 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 136 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 137 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 138 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 139 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 140 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 141 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 142 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 143 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 144 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 145 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 146 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 147 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 148 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 149 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 150 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 151 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 152 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 153 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 154 }; 155 156 static kstat_t *ip6_kstat; 157 158 /* 159 * Naming conventions: 160 * These rules should be judiciously applied 161 * if there is a need to identify something as IPv6 versus IPv4 162 * IPv6 funcions will end with _v6 in the ip module. 163 * IPv6 funcions will end with _ipv6 in the transport modules. 164 * IPv6 macros: 165 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 166 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 167 * And then there are ..V4_PART_OF_V6. 168 * The intent is that macros in the ip module end with _V6. 169 * IPv6 global variables will start with ipv6_ 170 * IPv6 structures will start with ipv6 171 * IPv6 defined constants should start with IPV6_ 172 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 173 */ 174 175 /* 176 * IPv6 mibs when the interface (ill) is not known. 177 * When the ill is known the per-interface mib in the ill is used. 178 */ 179 mib2_ipv6IfStatsEntry_t ip6_mib; 180 mib2_ipv6IfIcmpEntry_t icmp6_mib; 181 182 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 183 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 184 185 const in6_addr_t ipv6_all_ones = 186 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 187 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 188 189 #ifdef _BIG_ENDIAN 190 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 191 #else /* _BIG_ENDIAN */ 192 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 193 #endif /* _BIG_ENDIAN */ 194 195 #ifdef _BIG_ENDIAN 196 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 197 #else /* _BIG_ENDIAN */ 198 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 199 #endif /* _BIG_ENDIAN */ 200 201 #ifdef _BIG_ENDIAN 202 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 203 #else /* _BIG_ENDIAN */ 204 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 205 #endif /* _BIG_ENDIAN */ 206 207 #ifdef _BIG_ENDIAN 208 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 209 #else /* _BIG_ENDIAN */ 210 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 211 #endif /* _BIG_ENDIAN */ 212 213 #ifdef _BIG_ENDIAN 214 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 215 #else /* _BIG_ENDIAN */ 216 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 217 #endif /* _BIG_ENDIAN */ 218 219 #ifdef _BIG_ENDIAN 220 const in6_addr_t ipv6_solicited_node_mcast = 221 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 222 #else /* _BIG_ENDIAN */ 223 const in6_addr_t ipv6_solicited_node_mcast = 224 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 225 #endif /* _BIG_ENDIAN */ 226 227 /* 228 * Used by icmp_send_redirect_v6 for picking random src. 229 */ 230 uint_t icmp_redirect_v6_src_index; 231 232 /* Leave room for ip_newroute to tack on the src and target addresses */ 233 #define OK_RESOLVER_MP_V6(mp) \ 234 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 235 236 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 237 boolean_t, zoneid_t); 238 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 239 const in6_addr_t *, boolean_t); 240 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 241 static boolean_t icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp); 242 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 243 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 244 boolean_t, boolean_t, boolean_t, boolean_t); 245 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 246 iulp_t *); 247 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 248 uint16_t, boolean_t, boolean_t, boolean_t); 249 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 250 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 251 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 252 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 253 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 254 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 255 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 256 uint8_t *, uint_t, uint8_t); 257 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 258 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 259 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 260 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 261 conn_t *, int, int, int); 262 static boolean_t ip_ulp_cando_pkt2big(int); 263 264 static void ip_rput_v6(queue_t *, mblk_t *); 265 static void ip_wput_v6(queue_t *, mblk_t *); 266 267 /* 268 * A template for an IPv6 AR_ENTRY_QUERY 269 */ 270 static areq_t ipv6_areq_template = { 271 AR_ENTRY_QUERY, /* cmd */ 272 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 273 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 274 IP6_DL_SAP, /* protocol, from arps perspective */ 275 sizeof (areq_t), /* target addr offset */ 276 IPV6_ADDR_LEN, /* target addr_length */ 277 0, /* flags */ 278 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 279 IPV6_ADDR_LEN, /* sender addr length */ 280 6, /* xmit_count */ 281 1000, /* (re)xmit_interval in milliseconds */ 282 4 /* max # of requests to buffer */ 283 /* anything else filled in by the code */ 284 }; 285 286 struct qinit rinit_ipv6 = { 287 (pfi_t)ip_rput_v6, 288 NULL, 289 ip_open, 290 ip_close, 291 NULL, 292 &ip_mod_info 293 }; 294 295 struct qinit winit_ipv6 = { 296 (pfi_t)ip_wput_v6, 297 (pfi_t)ip_wsrv, 298 ip_open, 299 ip_close, 300 NULL, 301 &ip_mod_info 302 }; 303 304 /* 305 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 306 * The message has already been checksummed and if needed, 307 * a copy has been made to be sent any interested ICMP client (conn) 308 * Note that this is different than icmp_inbound() which does the fanout 309 * to conn's as well as local processing of the ICMP packets. 310 * 311 * All error messages are passed to the matching transport stream. 312 * 313 * Zones notes: 314 * The packet is only processed in the context of the specified zone: typically 315 * only this zone will reply to an echo request. This means that the caller must 316 * call icmp_inbound_v6() for each relevant zone. 317 */ 318 static void 319 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 320 boolean_t mctl_present, uint_t flags, zoneid_t zoneid) 321 { 322 icmp6_t *icmp6; 323 ip6_t *ip6h; 324 boolean_t interested; 325 ip6i_t *ip6i; 326 in6_addr_t origsrc; 327 ire_t *ire; 328 mblk_t *first_mp; 329 ipsec_in_t *ii; 330 331 ASSERT(ill != NULL); 332 first_mp = mp; 333 if (mctl_present) { 334 mp = first_mp->b_cont; 335 ASSERT(mp != NULL); 336 337 ii = (ipsec_in_t *)first_mp->b_rptr; 338 ASSERT(ii->ipsec_in_type == IPSEC_IN); 339 } 340 341 ip6h = (ip6_t *)mp->b_rptr; 342 343 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 344 345 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 346 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 347 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 348 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 349 freemsg(first_mp); 350 return; 351 } 352 ip6h = (ip6_t *)mp->b_rptr; 353 } 354 if (icmp_accept_clear_messages == 0) { 355 first_mp = ipsec_check_global_policy(first_mp, NULL, 356 NULL, ip6h, mctl_present); 357 if (first_mp == NULL) 358 return; 359 } 360 361 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 362 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 363 icmp6->icmp6_code)); 364 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 365 366 /* Initiate IPPF processing here */ 367 if (IP6_IN_IPP(flags)) { 368 369 /* 370 * If the ifindex changes due to SIOCSLIFINDEX 371 * packet may return to IP on the wrong ill. 372 */ 373 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 374 if (mp == NULL) { 375 if (mctl_present) { 376 freeb(first_mp); 377 } 378 return; 379 } 380 } 381 382 switch (icmp6->icmp6_type) { 383 case ICMP6_DST_UNREACH: 384 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 385 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 386 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 387 break; 388 389 case ICMP6_TIME_EXCEEDED: 390 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 391 break; 392 393 case ICMP6_PARAM_PROB: 394 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 395 break; 396 397 case ICMP6_PACKET_TOO_BIG: 398 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 399 zoneid); 400 return; 401 case ICMP6_ECHO_REQUEST: 402 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 403 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 404 !ipv6_resp_echo_mcast) 405 break; 406 407 /* 408 * We must have exclusive use of the mblk to convert it to 409 * a response. 410 * If not, we copy it. 411 */ 412 if (mp->b_datap->db_ref > 1) { 413 mblk_t *mp1; 414 415 mp1 = copymsg(mp); 416 freemsg(mp); 417 if (mp1 == NULL) { 418 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 419 if (mctl_present) 420 freeb(first_mp); 421 return; 422 } 423 mp = mp1; 424 ip6h = (ip6_t *)mp->b_rptr; 425 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 426 if (mctl_present) 427 first_mp->b_cont = mp; 428 else 429 first_mp = mp; 430 } 431 432 /* 433 * Turn the echo into an echo reply. 434 * Remove any extension headers (do not reverse a source route) 435 * and clear the flow id (keep traffic class for now). 436 */ 437 if (hdr_length != IPV6_HDR_LEN) { 438 int i; 439 440 for (i = 0; i < IPV6_HDR_LEN; i++) 441 mp->b_rptr[hdr_length - i - 1] = 442 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 443 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 444 ip6h = (ip6_t *)mp->b_rptr; 445 ip6h->ip6_nxt = IPPROTO_ICMPV6; 446 hdr_length = IPV6_HDR_LEN; 447 } 448 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 449 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 450 451 ip6h->ip6_plen = 452 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 453 origsrc = ip6h->ip6_src; 454 /* 455 * Reverse the source and destination addresses. 456 * If the return address is a multicast, zero out the source 457 * (ip_wput_v6 will set an address). 458 */ 459 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 460 ip6h->ip6_src = ipv6_all_zeros; 461 ip6h->ip6_dst = origsrc; 462 } else { 463 ip6h->ip6_src = ip6h->ip6_dst; 464 ip6h->ip6_dst = origsrc; 465 } 466 467 /* set the hop limit */ 468 ip6h->ip6_hops = ipv6_def_hops; 469 470 /* 471 * Prepare for checksum by putting icmp length in the icmp 472 * checksum field. The checksum is calculated in ip_wput_v6. 473 */ 474 icmp6->icmp6_cksum = ip6h->ip6_plen; 475 /* 476 * ICMP echo replies should go out on the same interface 477 * the request came on as probes used by in.mpathd for 478 * detecting NIC failures are ECHO packets. We turn-off load 479 * spreading by allocating a ip6i and setting ip6i_attach_if 480 * to B_TRUE which is handled both by ip_wput_v6 and 481 * ip_newroute_v6. If we don't turnoff load spreading, 482 * the packets might get dropped if there are no 483 * non-FAILED/INACTIVE interfaces for it to go out on and 484 * in.mpathd would wrongly detect a failure or mis-detect 485 * a NIC failure as a link failure. As load spreading can 486 * happen only if ill_group is not NULL, we do only for 487 * that case and this does not affect the normal case. 488 * 489 * We force this only on echo packets that came from on-link 490 * hosts. We restrict this to link-local addresses which 491 * is used by in.mpathd for probing. In the IPv6 case, 492 * default routes typically have an ire_ipif pointer and 493 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 494 * might work. As a default route out of this interface 495 * may not be present, enforcing this packet to go out in 496 * this case may not work. 497 */ 498 if (ill->ill_group != NULL && 499 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 500 /* 501 * If we are sending replies to ourselves, don't 502 * set ATTACH_IF as we may not be able to find 503 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 504 * causes ip_wput_v6 to look for an IRE_LOCAL on 505 * "ill" which it may not find and will try to 506 * create an IRE_CACHE for our local address. Once 507 * we do this, we will try to forward all packets 508 * meant to our LOCAL address. 509 */ 510 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES); 511 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 512 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 513 if (mp == NULL) { 514 BUMP_MIB(ill->ill_icmp6_mib, 515 ipv6IfIcmpInErrors); 516 if (ire != NULL) 517 ire_refrele(ire); 518 if (mctl_present) 519 freeb(first_mp); 520 return; 521 } else if (mctl_present) { 522 first_mp->b_cont = mp; 523 } else { 524 first_mp = mp; 525 } 526 ip6i = (ip6i_t *)mp->b_rptr; 527 ip6i->ip6i_flags = IP6I_ATTACH_IF; 528 ip6i->ip6i_ifindex = 529 ill->ill_phyint->phyint_ifindex; 530 } 531 if (ire != NULL) 532 ire_refrele(ire); 533 } 534 535 if (!mctl_present) { 536 /* 537 * This packet should go out the same way as it 538 * came in i.e in clear. To make sure that global 539 * policy will not be applied to this in ip_wput, 540 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 541 */ 542 ASSERT(first_mp == mp); 543 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 544 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 545 freemsg(mp); 546 return; 547 } 548 ii = (ipsec_in_t *)first_mp->b_rptr; 549 550 /* This is not a secure packet */ 551 ii->ipsec_in_secure = B_FALSE; 552 first_mp->b_cont = mp; 553 } 554 ii->ipsec_in_zoneid = zoneid; 555 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 556 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 557 return; 558 } 559 put(WR(q), first_mp); 560 return; 561 562 case ICMP6_ECHO_REPLY: 563 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 564 break; 565 566 case ND_ROUTER_SOLICIT: 567 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 568 break; 569 570 case ND_ROUTER_ADVERT: 571 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 572 break; 573 574 case ND_NEIGHBOR_SOLICIT: 575 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 576 if (mctl_present) 577 freeb(first_mp); 578 /* XXX may wish to pass first_mp up to ndp_input someday. */ 579 ndp_input(ill, mp); 580 return; 581 582 case ND_NEIGHBOR_ADVERT: 583 BUMP_MIB(ill->ill_icmp6_mib, 584 ipv6IfIcmpInNeighborAdvertisements); 585 if (mctl_present) 586 freeb(first_mp); 587 /* XXX may wish to pass first_mp up to ndp_input someday. */ 588 ndp_input(ill, mp); 589 return; 590 591 case ND_REDIRECT: { 592 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 593 594 if (ipv6_ignore_redirect) 595 break; 596 597 /* 598 * As there is no upper client to deliver, we don't 599 * need the first_mp any more. 600 */ 601 if (mctl_present) 602 freeb(first_mp); 603 if (!pullupmsg(mp, -1) || 604 !icmp_redirect_ok_v6(ill, mp)) { 605 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 606 break; 607 } 608 icmp_redirect_v6(q, mp, ill); 609 return; 610 } 611 612 /* 613 * The next three icmp messages will be handled by MLD. 614 * Pass all valid MLD packets up to any process(es) 615 * listening on a raw ICMP socket. MLD messages are 616 * freed by mld_input function. 617 */ 618 case MLD_LISTENER_QUERY: 619 case MLD_LISTENER_REPORT: 620 case MLD_LISTENER_REDUCTION: 621 if (mctl_present) 622 freeb(first_mp); 623 mld_input(q, mp, ill); 624 return; 625 default: 626 break; 627 } 628 if (interested) { 629 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 630 mctl_present, zoneid); 631 } else { 632 freemsg(first_mp); 633 } 634 } 635 636 /* 637 * Process received IPv6 ICMP Packet too big. 638 * After updating any IRE it does the fanout to any matching transport streams. 639 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 640 */ 641 /* ARGSUSED */ 642 static void 643 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 644 boolean_t mctl_present, zoneid_t zoneid) 645 { 646 ip6_t *ip6h; 647 ip6_t *inner_ip6h; 648 icmp6_t *icmp6; 649 uint16_t hdr_length; 650 uint32_t mtu; 651 ire_t *ire, *first_ire; 652 mblk_t *first_mp; 653 654 first_mp = mp; 655 if (mctl_present) 656 mp = first_mp->b_cont; 657 /* 658 * We must have exclusive use of the mblk to update the MTU 659 * in the packet. 660 * If not, we copy it. 661 * 662 * If there's an M_CTL present, we know that allocated first_mp 663 * earlier in this function, so we know first_mp has refcnt of one. 664 */ 665 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 666 if (mp->b_datap->db_ref > 1) { 667 mblk_t *mp1; 668 669 mp1 = copymsg(mp); 670 freemsg(mp); 671 if (mp1 == NULL) { 672 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 673 if (mctl_present) 674 freeb(first_mp); 675 return; 676 } 677 mp = mp1; 678 if (mctl_present) 679 first_mp->b_cont = mp; 680 else 681 first_mp = mp; 682 } 683 ip6h = (ip6_t *)mp->b_rptr; 684 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 685 hdr_length = ip_hdr_length_v6(mp, ip6h); 686 else 687 hdr_length = IPV6_HDR_LEN; 688 689 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 690 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 691 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 692 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 693 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 694 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 695 freemsg(first_mp); 696 return; 697 } 698 ip6h = (ip6_t *)mp->b_rptr; 699 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 700 inner_ip6h = (ip6_t *)&icmp6[1]; 701 } 702 703 /* 704 * For link local destinations matching simply on IRE type is not 705 * sufficient. Same link local addresses for different ILL's is 706 * possible. 707 */ 708 709 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 710 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 711 IRE_CACHE, ill->ill_ipif, ALL_ZONES, 712 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 713 714 if (first_ire == NULL) { 715 if (ip_debug > 2) { 716 /* ip1dbg */ 717 pr_addr_dbg("icmp_inbound_too_big_v6:" 718 "no ire for dst %s\n", AF_INET6, 719 &inner_ip6h->ip6_dst); 720 } 721 freemsg(first_mp); 722 return; 723 } 724 725 mtu = ntohl(icmp6->icmp6_mtu); 726 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 727 for (ire = first_ire; ire != NULL && 728 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 729 ire = ire->ire_next) { 730 mutex_enter(&ire->ire_lock); 731 if (mtu < IPV6_MIN_MTU) { 732 ip1dbg(("Received mtu less than IPv6 " 733 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 734 mtu = IPV6_MIN_MTU; 735 /* 736 * If an mtu less than IPv6 min mtu is received, 737 * we must include a fragment header in 738 * subsequent packets. 739 */ 740 ire->ire_frag_flag |= IPH_FRAG_HDR; 741 } 742 ip1dbg(("Received mtu from router: %d\n", mtu)); 743 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 744 /* Record the new max frag size for the ULP. */ 745 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 746 /* 747 * If we need a fragment header in every packet 748 * (above case or multirouting), make sure the 749 * ULP takes it into account when computing the 750 * payload size. 751 */ 752 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 753 sizeof (ip6_frag_t)); 754 } else { 755 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 756 } 757 mutex_exit(&ire->ire_lock); 758 } 759 rw_exit(&first_ire->ire_bucket->irb_lock); 760 ire_refrele(first_ire); 761 } else { 762 irb_t *irb = NULL; 763 /* 764 * for non-link local destinations we match only on the IRE type 765 */ 766 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 767 IRE_CACHE, ill->ill_ipif, ALL_ZONES, MATCH_IRE_TYPE); 768 if (ire == NULL) { 769 if (ip_debug > 2) { 770 /* ip1dbg */ 771 pr_addr_dbg("icmp_inbound_too_big_v6:" 772 "no ire for dst %s\n", 773 AF_INET6, &inner_ip6h->ip6_dst); 774 } 775 freemsg(first_mp); 776 return; 777 } 778 irb = ire->ire_bucket; 779 ire_refrele(ire); 780 rw_enter(&irb->irb_lock, RW_READER); 781 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 782 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 783 &inner_ip6h->ip6_dst)) { 784 mtu = ntohl(icmp6->icmp6_mtu); 785 mutex_enter(&ire->ire_lock); 786 if (mtu < IPV6_MIN_MTU) { 787 ip1dbg(("Received mtu less than IPv6" 788 "min mtu %d: %d\n", 789 IPV6_MIN_MTU, mtu)); 790 mtu = IPV6_MIN_MTU; 791 /* 792 * If an mtu less than IPv6 min mtu is 793 * received, we must include a fragment 794 * header in subsequent packets. 795 */ 796 ire->ire_frag_flag |= IPH_FRAG_HDR; 797 } 798 799 ip1dbg(("Received mtu from router: %d\n", mtu)); 800 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 801 /* Record the new max frag size for the ULP. */ 802 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 803 /* 804 * If we need a fragment header in 805 * every packet (above case or 806 * multirouting), make sure the ULP 807 * takes it into account when computing 808 * the payload size. 809 */ 810 icmp6->icmp6_mtu = 811 htonl(ire->ire_max_frag - 812 sizeof (ip6_frag_t)); 813 } else { 814 icmp6->icmp6_mtu = 815 htonl(ire->ire_max_frag); 816 } 817 mutex_exit(&ire->ire_lock); 818 } 819 } 820 rw_exit(&irb->irb_lock); 821 } 822 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 823 mctl_present, zoneid); 824 } 825 826 static void 827 pkt_too_big(conn_t *connp, void *arg) 828 { 829 mblk_t *mp; 830 831 if (!connp->conn_ipv6_recvpathmtu) 832 return; 833 834 /* create message and drop it on this connections read queue */ 835 if ((mp = dupb((mblk_t *)arg)) == NULL) { 836 return; 837 } 838 mp->b_datap->db_type = M_CTL; 839 840 putnext(connp->conn_rq, mp); 841 } 842 843 /* 844 * Fanout received ICMPv6 error packets to the transports. 845 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 846 */ 847 void 848 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 849 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 850 { 851 uint16_t *up; /* Pointer to ports in ULP header */ 852 uint32_t ports; /* reversed ports for fanout */ 853 ip6_t rip6h; /* With reversed addresses */ 854 uint16_t hdr_length; 855 uint8_t *nexthdrp; 856 uint8_t nexthdr; 857 mblk_t *first_mp; 858 ipsec_in_t *ii; 859 tcpha_t *tcpha; 860 conn_t *connp; 861 862 first_mp = mp; 863 if (mctl_present) { 864 mp = first_mp->b_cont; 865 ASSERT(mp != NULL); 866 867 ii = (ipsec_in_t *)first_mp->b_rptr; 868 ASSERT(ii->ipsec_in_type == IPSEC_IN); 869 } else { 870 ii = NULL; 871 } 872 873 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 874 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 875 876 /* 877 * Need to pullup everything in order to use 878 * ip_hdr_length_nexthdr_v6() 879 */ 880 if (mp->b_cont != NULL) { 881 if (!pullupmsg(mp, -1)) { 882 ip1dbg(("icmp_inbound_error_fanout_v6: " 883 "pullupmsg failed\n")); 884 goto drop_pkt; 885 } 886 ip6h = (ip6_t *)mp->b_rptr; 887 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 888 } 889 890 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 891 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 892 goto drop_pkt; 893 894 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 895 goto drop_pkt; 896 nexthdr = *nexthdrp; 897 898 /* Set message type, must be done after pullups */ 899 mp->b_datap->db_type = M_CTL; 900 901 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 902 /* 903 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 904 * sockets. 905 * 906 * Note I don't like walking every connection to deliver 907 * this information to a set of listeners. A separate 908 * list could be kept to keep the cost of this down. 909 */ 910 ipcl_walk(pkt_too_big, (void *)mp); 911 } 912 913 /* Try to pass the ICMP message to clients who need it */ 914 switch (nexthdr) { 915 case IPPROTO_UDP: { 916 /* 917 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 918 * UDP header to get the port information. 919 */ 920 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 921 mp->b_wptr) { 922 break; 923 } 924 /* 925 * Attempt to find a client stream based on port. 926 * Note that we do a reverse lookup since the header is 927 * in the form we sent it out. 928 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 929 * and we only set the src and dst addresses and nexthdr. 930 */ 931 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 932 rip6h.ip6_src = ip6h->ip6_dst; 933 rip6h.ip6_dst = ip6h->ip6_src; 934 rip6h.ip6_nxt = nexthdr; 935 ((uint16_t *)&ports)[0] = up[1]; 936 ((uint16_t *)&ports)[1] = up[0]; 937 938 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 939 IP6_NO_IPPOLICY, mctl_present, zoneid); 940 return; 941 } 942 case IPPROTO_TCP: { 943 /* 944 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 945 * the TCP header to get the port information. 946 */ 947 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 948 mp->b_wptr) { 949 break; 950 } 951 952 /* 953 * Attempt to find a client stream based on port. 954 * Note that we do a reverse lookup since the header is 955 * in the form we sent it out. 956 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 957 * we only set the src and dst addresses and nexthdr. 958 */ 959 960 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 961 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 962 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 963 if (connp == NULL) { 964 goto drop_pkt; 965 } 966 967 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 968 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 969 return; 970 971 } 972 case IPPROTO_SCTP: 973 /* 974 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 975 * the SCTP header to get the port information. 976 */ 977 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 978 mp->b_wptr) { 979 break; 980 } 981 982 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 983 ((uint16_t *)&ports)[0] = up[1]; 984 ((uint16_t *)&ports)[1] = up[0]; 985 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 986 IP6_NO_IPPOLICY, 0, zoneid); 987 return; 988 case IPPROTO_ESP: 989 case IPPROTO_AH: { 990 int ipsec_rc; 991 992 /* 993 * We need a IPSEC_IN in the front to fanout to AH/ESP. 994 * We will re-use the IPSEC_IN if it is already present as 995 * AH/ESP will not affect any fields in the IPSEC_IN for 996 * ICMP errors. If there is no IPSEC_IN, allocate a new 997 * one and attach it in the front. 998 */ 999 if (ii != NULL) { 1000 /* 1001 * ip_fanout_proto_again converts the ICMP errors 1002 * that come back from AH/ESP to M_DATA so that 1003 * if it is non-AH/ESP and we do a pullupmsg in 1004 * this function, it would work. Convert it back 1005 * to M_CTL before we send up as this is a ICMP 1006 * error. This could have been generated locally or 1007 * by some router. Validate the inner IPSEC 1008 * headers. 1009 * 1010 * NOTE : ill_index is used by ip_fanout_proto_again 1011 * to locate the ill. 1012 */ 1013 ASSERT(ill != NULL); 1014 ii->ipsec_in_ill_index = 1015 ill->ill_phyint->phyint_ifindex; 1016 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1017 first_mp->b_cont->b_datap->db_type = M_CTL; 1018 } else { 1019 /* 1020 * IPSEC_IN is not present. We attach a ipsec_in 1021 * message and send up to IPSEC for validating 1022 * and removing the IPSEC headers. Clear 1023 * ipsec_in_secure so that when we return 1024 * from IPSEC, we don't mistakenly think that this 1025 * is a secure packet came from the network. 1026 * 1027 * NOTE : ill_index is used by ip_fanout_proto_again 1028 * to locate the ill. 1029 */ 1030 ASSERT(first_mp == mp); 1031 first_mp = ipsec_in_alloc(B_FALSE); 1032 if (first_mp == NULL) { 1033 freemsg(mp); 1034 BUMP_MIB(&ip_mib, ipInDiscards); 1035 return; 1036 } 1037 ii = (ipsec_in_t *)first_mp->b_rptr; 1038 1039 /* This is not a secure packet */ 1040 ii->ipsec_in_secure = B_FALSE; 1041 first_mp->b_cont = mp; 1042 mp->b_datap->db_type = M_CTL; 1043 ASSERT(ill != NULL); 1044 ii->ipsec_in_ill_index = 1045 ill->ill_phyint->phyint_ifindex; 1046 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1047 } 1048 1049 if (!ipsec_loaded()) { 1050 ip_proto_not_sup(q, first_mp, 0, zoneid); 1051 return; 1052 } 1053 1054 if (nexthdr == IPPROTO_ESP) 1055 ipsec_rc = ipsecesp_icmp_error(first_mp); 1056 else 1057 ipsec_rc = ipsecah_icmp_error(first_mp); 1058 if (ipsec_rc == IPSEC_STATUS_FAILED) 1059 return; 1060 1061 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1062 return; 1063 } 1064 case IPPROTO_ENCAP: 1065 case IPPROTO_IPV6: 1066 if ((uint8_t *)ip6h + hdr_length + 1067 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1068 sizeof (ip6_t)) > mp->b_wptr) 1069 goto drop_pkt; 1070 1071 if (nexthdr == IPPROTO_ENCAP || 1072 !IN6_ARE_ADDR_EQUAL( 1073 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1074 &ip6h->ip6_src) || 1075 !IN6_ARE_ADDR_EQUAL( 1076 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1077 &ip6h->ip6_dst)) { 1078 /* 1079 * For tunnels that have used IPsec protection, 1080 * we need to adjust the MTU to take into account 1081 * the IPsec overhead. 1082 */ 1083 if (ii != NULL) 1084 icmp6->icmp6_mtu = htons( 1085 ntohs(icmp6->icmp6_mtu) - 1086 ipsec_in_extra_length(first_mp)); 1087 } else { 1088 /* 1089 * Self-encapsulated case. As in the ipv4 case, 1090 * we need to strip the 2nd IP header. Since mp 1091 * is already pulled-up, we can simply bcopy 1092 * the 3rd header + data over the 2nd header. 1093 */ 1094 uint16_t unused_len; 1095 ip6_t *inner_ip6h = (ip6_t *) 1096 ((uchar_t *)ip6h + hdr_length); 1097 1098 /* 1099 * Make sure we don't do recursion more than once. 1100 */ 1101 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1102 &unused_len, &nexthdrp) || 1103 *nexthdrp == IPPROTO_IPV6) { 1104 goto drop_pkt; 1105 } 1106 1107 /* 1108 * We are about to modify the packet. Make a copy if 1109 * someone else has a reference to it. 1110 */ 1111 if (DB_REF(mp) > 1) { 1112 mblk_t *mp1; 1113 uint16_t icmp6_offset; 1114 1115 mp1 = copymsg(mp); 1116 if (mp1 == NULL) { 1117 goto drop_pkt; 1118 } 1119 icmp6_offset = (uint16_t) 1120 ((uchar_t *)icmp6 - mp->b_rptr); 1121 freemsg(mp); 1122 mp = mp1; 1123 1124 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1125 ip6h = (ip6_t *)&icmp6[1]; 1126 inner_ip6h = (ip6_t *) 1127 ((uchar_t *)ip6h + hdr_length); 1128 1129 if (mctl_present) 1130 first_mp->b_cont = mp; 1131 else 1132 first_mp = mp; 1133 } 1134 1135 /* 1136 * Need to set db_type back to M_DATA before 1137 * refeeding mp into this function. 1138 */ 1139 DB_TYPE(mp) = M_DATA; 1140 1141 /* 1142 * Copy the 3rd header + remaining data on top 1143 * of the 2nd header. 1144 */ 1145 bcopy(inner_ip6h, ip6h, 1146 mp->b_wptr - (uchar_t *)inner_ip6h); 1147 1148 /* 1149 * Subtract length of the 2nd header. 1150 */ 1151 mp->b_wptr -= hdr_length; 1152 1153 /* 1154 * Now recurse, and see what I _really_ should be 1155 * doing here. 1156 */ 1157 icmp_inbound_error_fanout_v6(q, first_mp, 1158 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1159 zoneid); 1160 return; 1161 } 1162 /* FALLTHRU */ 1163 default: 1164 /* 1165 * The rip6h header is only used for the lookup and we 1166 * only set the src and dst addresses and nexthdr. 1167 */ 1168 rip6h.ip6_src = ip6h->ip6_dst; 1169 rip6h.ip6_dst = ip6h->ip6_src; 1170 rip6h.ip6_nxt = nexthdr; 1171 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1172 IP6_NO_IPPOLICY, mctl_present, zoneid); 1173 return; 1174 } 1175 /* NOTREACHED */ 1176 drop_pkt: 1177 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1178 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1179 freemsg(first_mp); 1180 } 1181 1182 /* 1183 * Validate the incoming redirect message, if valid redirect 1184 * processing is done later. This is separated from the actual 1185 * redirect processing to avoid becoming single threaded when not 1186 * necessary. (i.e invalid packet) 1187 * Assumes that any AH or ESP headers have already been removed. 1188 * The mp has already been pulled up. 1189 */ 1190 boolean_t 1191 icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp) 1192 { 1193 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1194 nd_redirect_t *rd; 1195 ire_t *ire; 1196 uint16_t len; 1197 uint16_t hdr_length; 1198 1199 ASSERT(mp->b_cont == NULL); 1200 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1201 hdr_length = ip_hdr_length_v6(mp, ip6h); 1202 else 1203 hdr_length = IPV6_HDR_LEN; 1204 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1205 len = mp->b_wptr - mp->b_rptr - hdr_length; 1206 if (!IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1207 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1208 (rd->nd_rd_code != 0) || 1209 (len < sizeof (nd_redirect_t)) || 1210 (IN6_IS_ADDR_V4MAPPED(&rd->nd_rd_dst)) || 1211 (IN6_IS_ADDR_MULTICAST(&rd->nd_rd_dst))) { 1212 return (B_FALSE); 1213 } 1214 if (!(IN6_IS_ADDR_LINKLOCAL(&rd->nd_rd_target) || 1215 IN6_ARE_ADDR_EQUAL(&rd->nd_rd_target, &rd->nd_rd_dst))) { 1216 return (B_FALSE); 1217 } 1218 1219 /* 1220 * Verify that the IP source address of the redirect is 1221 * the same as the current first-hop router for the specified 1222 * ICMP destination address. Just to be cautious, this test 1223 * will be done again before we add the redirect, in case 1224 * router goes away between now and then. 1225 */ 1226 ire = ire_route_lookup_v6(&rd->nd_rd_dst, 0, 1227 &ip6h->ip6_src, 0, ill->ill_ipif, NULL, ALL_ZONES, 1228 MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1229 if (ire == NULL) 1230 return (B_FALSE); 1231 ire_refrele(ire); 1232 if (len > sizeof (nd_redirect_t)) { 1233 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1234 len - sizeof (nd_redirect_t))) 1235 return (B_FALSE); 1236 } 1237 return (B_TRUE); 1238 } 1239 1240 /* 1241 * Process received IPv6 ICMP Redirect messages. 1242 * Assumes that the icmp packet has already been verfied to be 1243 * valid, aligned and in a single mblk all done in icmp_redirect_ok_v6(). 1244 */ 1245 /* ARGSUSED */ 1246 static void 1247 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1248 { 1249 ip6_t *ip6h; 1250 uint16_t hdr_length; 1251 nd_redirect_t *rd; 1252 ire_t *ire; 1253 ire_t *prev_ire; 1254 ire_t *redir_ire; 1255 in6_addr_t *src, *dst, *gateway; 1256 nd_opt_hdr_t *opt; 1257 nce_t *nce; 1258 int nce_flags = 0; 1259 int err = 0; 1260 boolean_t redirect_to_router = B_FALSE; 1261 int len; 1262 iulp_t ulp_info = { 0 }; 1263 ill_t *prev_ire_ill; 1264 ipif_t *ipif; 1265 1266 ip6h = (ip6_t *)mp->b_rptr; 1267 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1268 hdr_length = ip_hdr_length_v6(mp, ip6h); 1269 else 1270 hdr_length = IPV6_HDR_LEN; 1271 1272 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1273 src = &ip6h->ip6_src; 1274 dst = &rd->nd_rd_dst; 1275 gateway = &rd->nd_rd_target; 1276 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1277 redirect_to_router = B_TRUE; 1278 nce_flags |= NCE_F_ISROUTER; 1279 } 1280 /* 1281 * Make sure we had a route for the dest in question and that 1282 * route was pointing to the old gateway (the source of the 1283 * redirect packet.) 1284 */ 1285 ipif = ipif_get_next_ipif(NULL, ill); 1286 if (ipif == NULL) { 1287 freemsg(mp); 1288 return; 1289 } 1290 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1291 ALL_ZONES, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1292 ipif_refrele(ipif); 1293 /* 1294 * Check that 1295 * the redirect was not from ourselves 1296 * old gateway is still directly reachable 1297 */ 1298 if (prev_ire == NULL || 1299 prev_ire->ire_type == IRE_LOCAL) { 1300 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1301 goto fail_redirect; 1302 } 1303 prev_ire_ill = ire_to_ill(prev_ire); 1304 ASSERT(prev_ire_ill != NULL); 1305 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1306 nce_flags |= NCE_F_NONUD; 1307 1308 /* 1309 * Should we use the old ULP info to create the new gateway? From 1310 * a user's perspective, we should inherit the info so that it 1311 * is a "smooth" transition. If we do not do that, then new 1312 * connections going thru the new gateway will have no route metrics, 1313 * which is counter-intuitive to user. From a network point of 1314 * view, this may or may not make sense even though the new gateway 1315 * is still directly connected to us so the route metrics should not 1316 * change much. 1317 * 1318 * But if the old ire_uinfo is not initialized, we do another 1319 * recursive lookup on the dest using the new gateway. There may 1320 * be a route to that. If so, use it to initialize the redirect 1321 * route. 1322 */ 1323 if (prev_ire->ire_uinfo.iulp_set) { 1324 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1325 } else if (redirect_to_router) { 1326 /* 1327 * Only do the following if the redirection is really to 1328 * a router. 1329 */ 1330 ire_t *tmp_ire; 1331 ire_t *sire; 1332 1333 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1334 ALL_ZONES, 0, 1335 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1336 if (sire != NULL) { 1337 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1338 ASSERT(tmp_ire != NULL); 1339 ire_refrele(tmp_ire); 1340 ire_refrele(sire); 1341 } else if (tmp_ire != NULL) { 1342 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1343 sizeof (iulp_t)); 1344 ire_refrele(tmp_ire); 1345 } 1346 } 1347 1348 len = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1349 opt = (nd_opt_hdr_t *)&rd[1]; 1350 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1351 if (opt != NULL) { 1352 err = ndp_lookup_then_add(ill, 1353 (uchar_t *)&opt[1], /* Link layer address */ 1354 gateway, 1355 &ipv6_all_ones, /* prefix mask */ 1356 &ipv6_all_zeros, /* Mapping mask */ 1357 0, 1358 nce_flags, 1359 ND_STALE, 1360 &nce); 1361 switch (err) { 1362 case 0: 1363 NCE_REFRELE(nce); 1364 break; 1365 case EEXIST: 1366 /* 1367 * Check to see if link layer address has changed and 1368 * process the nce_state accordingly. 1369 */ 1370 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1371 NCE_REFRELE(nce); 1372 break; 1373 default: 1374 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1375 err)); 1376 goto fail_redirect; 1377 } 1378 } 1379 if (redirect_to_router) { 1380 /* icmp_redirect_ok_v6() must have already verified this */ 1381 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1382 1383 /* 1384 * Create a Route Association. This will allow us to remember 1385 * a router told us to use the particular gateway. 1386 */ 1387 ire = ire_create_v6( 1388 dst, 1389 &ipv6_all_ones, /* mask */ 1390 &prev_ire->ire_src_addr_v6, /* source addr */ 1391 gateway, /* gateway addr */ 1392 &prev_ire->ire_max_frag, /* max frag */ 1393 NULL, /* Fast Path header */ 1394 NULL, /* no rfq */ 1395 NULL, /* no stq */ 1396 IRE_HOST_REDIRECT, 1397 NULL, 1398 prev_ire->ire_ipif, 1399 NULL, 1400 0, 1401 0, 1402 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1403 &ulp_info); 1404 } else { 1405 /* 1406 * Just create an on link entry, may or may not be a router 1407 * If there is no link layer address option ire_add() won't 1408 * add this. 1409 */ 1410 ire = ire_create_v6( 1411 dst, /* gateway == dst */ 1412 &ipv6_all_ones, /* mask */ 1413 &prev_ire->ire_src_addr_v6, /* source addr */ 1414 &ipv6_all_zeros, /* gateway addr */ 1415 &prev_ire->ire_max_frag, /* max frag */ 1416 NULL, /* Fast Path header */ 1417 prev_ire->ire_rfq, /* ire rfq */ 1418 prev_ire->ire_stq, /* ire stq */ 1419 IRE_CACHE, 1420 NULL, 1421 prev_ire->ire_ipif, 1422 &ipv6_all_ones, 1423 0, 1424 0, 1425 0, 1426 &ulp_info); 1427 } 1428 if (ire == NULL) 1429 goto fail_redirect; 1430 1431 /* 1432 * XXX If there is no nce i.e there is no target link layer address 1433 * option with the redirect message, ire_add will fail. In that 1434 * case we never add the IRE_CACHE/IRE_HOST_REDIRECT. We need 1435 * to fix this. 1436 */ 1437 if (ire_add(&ire, NULL, NULL, NULL) == 0) { 1438 1439 /* tell routing sockets that we received a redirect */ 1440 ip_rts_change_v6(RTM_REDIRECT, 1441 &rd->nd_rd_dst, 1442 &rd->nd_rd_target, 1443 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1444 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1445 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1446 1447 /* 1448 * Delete any existing IRE_HOST_REDIRECT for this destination. 1449 * This together with the added IRE has the effect of 1450 * modifying an existing redirect. 1451 */ 1452 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST_REDIRECT, 1453 ire->ire_ipif, NULL, ALL_ZONES, 0, 1454 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1455 1456 ire_refrele(ire); /* Held in ire_add_v6 */ 1457 1458 if (redir_ire != NULL) { 1459 ire_delete(redir_ire); 1460 ire_refrele(redir_ire); 1461 } 1462 } 1463 1464 if (prev_ire->ire_type == IRE_CACHE) 1465 ire_delete(prev_ire); 1466 ire_refrele(prev_ire); 1467 prev_ire = NULL; 1468 1469 fail_redirect: 1470 if (prev_ire != NULL) 1471 ire_refrele(prev_ire); 1472 freemsg(mp); 1473 } 1474 1475 static ill_t * 1476 ip_queue_to_ill_v6(queue_t *q) 1477 { 1478 ill_t *ill; 1479 1480 ASSERT(WR(q) == q); 1481 1482 if (q->q_next != NULL) { 1483 ill = (ill_t *)q->q_ptr; 1484 if (ILL_CAN_LOOKUP(ill)) 1485 ill_refhold(ill); 1486 else 1487 ill = NULL; 1488 } else { 1489 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1490 NULL, NULL, NULL, NULL, NULL); 1491 } 1492 if (ill == NULL) 1493 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1494 return (ill); 1495 } 1496 1497 /* 1498 * Assigns an appropriate source address to the packet. 1499 * If origdst is one of our IP addresses that use it as the source. 1500 * If the queue is an ill queue then select a source from that ill. 1501 * Otherwise pick a source based on a route lookup back to the origsrc. 1502 * 1503 * src is the return parameter. Returns a pointer to src or NULL if failure. 1504 */ 1505 static in6_addr_t * 1506 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1507 in6_addr_t *src) 1508 { 1509 ill_t *ill; 1510 ire_t *ire; 1511 ipif_t *ipif; 1512 zoneid_t zoneid; 1513 1514 ASSERT(!(wq->q_flag & QREADR)); 1515 if (wq->q_next != NULL) { 1516 ill = (ill_t *)wq->q_ptr; 1517 zoneid = GLOBAL_ZONEID; 1518 } else { 1519 ill = NULL; 1520 zoneid = Q_TO_CONN(wq)->conn_zoneid; 1521 } 1522 1523 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1524 NULL, NULL, zoneid, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1525 if (ire != NULL) { 1526 /* Destined to one of our addresses */ 1527 *src = *origdst; 1528 ire_refrele(ire); 1529 return (src); 1530 } 1531 if (ire != NULL) { 1532 ire_refrele(ire); 1533 ire = NULL; 1534 } 1535 if (ill == NULL) { 1536 /* What is the route back to the original source? */ 1537 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1538 NULL, NULL, zoneid, 1539 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1540 if (ire == NULL) { 1541 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1542 return (NULL); 1543 } 1544 /* 1545 * Does not matter whether we use ire_stq or ire_ipif here. 1546 * Just pick an ill for ICMP replies. 1547 */ 1548 ASSERT(ire->ire_ipif != NULL); 1549 ill = ire->ire_ipif->ipif_ill; 1550 ire_refrele(ire); 1551 } 1552 ipif = ipif_select_source_v6(ill, origsrc, B_FALSE, 1553 IPV6_PREFER_SRC_DEFAULT, zoneid); 1554 if (ipif != NULL) { 1555 *src = ipif->ipif_v6src_addr; 1556 ipif_refrele(ipif); 1557 return (src); 1558 } 1559 /* 1560 * Unusual case - can't find a usable source address to reach the 1561 * original source. Use what in the route to the source. 1562 */ 1563 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1564 NULL, NULL, zoneid, (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1565 if (ire == NULL) { 1566 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1567 return (NULL); 1568 } 1569 ASSERT(ire != NULL); 1570 *src = ire->ire_src_addr_v6; 1571 ire_refrele(ire); 1572 return (src); 1573 } 1574 1575 /* 1576 * Build and ship an IPv6 ICMP message using the packet data in mp, 1577 * and the ICMP header pointed to by "stuff". (May be called as 1578 * writer.) 1579 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1580 * verify that an icmp error packet can be sent. 1581 * 1582 * If q is an ill write side queue (which is the case when packets 1583 * arrive from ip_rput) then ip_wput code will ensure that packets to 1584 * link-local destinations are sent out that ill. 1585 * 1586 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1587 * source address (see above function). 1588 */ 1589 static void 1590 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1591 const in6_addr_t *v6src_ptr, boolean_t mctl_present) 1592 { 1593 ip6_t *ip6h; 1594 in6_addr_t v6dst; 1595 size_t len_needed; 1596 size_t msg_len; 1597 mblk_t *mp1; 1598 icmp6_t *icmp6; 1599 ill_t *ill; 1600 in6_addr_t v6src; 1601 mblk_t *ipsec_mp; 1602 ipsec_out_t *io; 1603 1604 ill = ip_queue_to_ill_v6(q); 1605 if (ill == NULL) { 1606 freemsg(mp); 1607 return; 1608 } 1609 1610 if (mctl_present) { 1611 /* 1612 * If it is : 1613 * 1614 * 1) a IPSEC_OUT, then this is caused by outbound 1615 * datagram originating on this host. IPSEC processing 1616 * may or may not have been done. Refer to comments above 1617 * icmp_inbound_error_fanout for details. 1618 * 1619 * 2) a IPSEC_IN if we are generating a icmp_message 1620 * for an incoming datagram destined for us i.e called 1621 * from ip_fanout_send_icmp. 1622 */ 1623 ipsec_info_t *in; 1624 1625 ipsec_mp = mp; 1626 mp = ipsec_mp->b_cont; 1627 1628 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1629 ip6h = (ip6_t *)mp->b_rptr; 1630 1631 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1632 in->ipsec_info_type == IPSEC_IN); 1633 1634 if (in->ipsec_info_type == IPSEC_IN) { 1635 /* 1636 * Convert the IPSEC_IN to IPSEC_OUT. 1637 */ 1638 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1639 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1640 ill_refrele(ill); 1641 return; 1642 } 1643 } else { 1644 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1645 io = (ipsec_out_t *)in; 1646 /* 1647 * Clear out ipsec_out_proc_begin, so we do a fresh 1648 * ire lookup. 1649 */ 1650 io->ipsec_out_proc_begin = B_FALSE; 1651 } 1652 } else { 1653 /* 1654 * This is in clear. The icmp message we are building 1655 * here should go out in clear. 1656 */ 1657 ipsec_in_t *ii; 1658 ASSERT(mp->b_datap->db_type == M_DATA); 1659 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1660 freemsg(mp); 1661 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1662 ill_refrele(ill); 1663 return; 1664 } 1665 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1666 1667 /* This is not a secure packet */ 1668 ii->ipsec_in_secure = B_FALSE; 1669 ipsec_mp->b_cont = mp; 1670 ip6h = (ip6_t *)mp->b_rptr; 1671 /* 1672 * Convert the IPSEC_IN to IPSEC_OUT. 1673 */ 1674 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1675 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1676 ill_refrele(ill); 1677 return; 1678 } 1679 } 1680 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1681 1682 if (v6src_ptr != NULL) { 1683 v6src = *v6src_ptr; 1684 } else { 1685 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1686 &v6src) == NULL) { 1687 freemsg(ipsec_mp); 1688 ill_refrele(ill); 1689 return; 1690 } 1691 } 1692 v6dst = ip6h->ip6_src; 1693 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1694 msg_len = msgdsize(mp); 1695 if (msg_len > len_needed) { 1696 if (!adjmsg(mp, len_needed - msg_len)) { 1697 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1698 freemsg(ipsec_mp); 1699 ill_refrele(ill); 1700 return; 1701 } 1702 msg_len = len_needed; 1703 } 1704 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1705 if (mp1 == NULL) { 1706 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1707 freemsg(ipsec_mp); 1708 ill_refrele(ill); 1709 return; 1710 } 1711 ill_refrele(ill); 1712 mp1->b_cont = mp; 1713 mp = mp1; 1714 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1715 io->ipsec_out_type == IPSEC_OUT); 1716 ipsec_mp->b_cont = mp; 1717 1718 /* 1719 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1720 * node generates be accepted in peace by all on-host destinations. 1721 * If we do NOT assume that all on-host destinations trust 1722 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1723 * (Look for ipsec_out_icmp_loopback). 1724 */ 1725 io->ipsec_out_icmp_loopback = B_TRUE; 1726 1727 ip6h = (ip6_t *)mp->b_rptr; 1728 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1729 1730 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1731 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1732 ip6h->ip6_hops = ipv6_def_hops; 1733 ip6h->ip6_dst = v6dst; 1734 ip6h->ip6_src = v6src; 1735 msg_len += IPV6_HDR_LEN + len; 1736 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1737 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1738 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1739 } 1740 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1741 icmp6 = (icmp6_t *)&ip6h[1]; 1742 bcopy(stuff, (char *)icmp6, len); 1743 /* 1744 * Prepare for checksum by putting icmp length in the icmp 1745 * checksum field. The checksum is calculated in ip_wput_v6. 1746 */ 1747 icmp6->icmp6_cksum = ip6h->ip6_plen; 1748 if (icmp6->icmp6_type == ND_REDIRECT) { 1749 ip6h->ip6_hops = IPV6_MAX_HOPS; 1750 } 1751 /* Send to V6 writeside put routine */ 1752 put(q, ipsec_mp); 1753 } 1754 1755 /* 1756 * Update the output mib when ICMPv6 packets are sent. 1757 */ 1758 static void 1759 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1760 { 1761 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1762 1763 switch (icmp6->icmp6_type) { 1764 case ICMP6_DST_UNREACH: 1765 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1766 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1767 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1768 break; 1769 1770 case ICMP6_TIME_EXCEEDED: 1771 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1772 break; 1773 1774 case ICMP6_PARAM_PROB: 1775 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1776 break; 1777 1778 case ICMP6_PACKET_TOO_BIG: 1779 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1780 break; 1781 1782 case ICMP6_ECHO_REQUEST: 1783 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1784 break; 1785 1786 case ICMP6_ECHO_REPLY: 1787 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1788 break; 1789 1790 case ND_ROUTER_SOLICIT: 1791 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1792 break; 1793 1794 case ND_ROUTER_ADVERT: 1795 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1796 break; 1797 1798 case ND_NEIGHBOR_SOLICIT: 1799 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1800 break; 1801 1802 case ND_NEIGHBOR_ADVERT: 1803 BUMP_MIB(ill->ill_icmp6_mib, 1804 ipv6IfIcmpOutNeighborAdvertisements); 1805 break; 1806 1807 case ND_REDIRECT: 1808 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1809 break; 1810 1811 case MLD_LISTENER_QUERY: 1812 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1813 break; 1814 1815 case MLD_LISTENER_REPORT: 1816 case MLD_V2_LISTENER_REPORT: 1817 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1818 break; 1819 1820 case MLD_LISTENER_REDUCTION: 1821 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1822 break; 1823 } 1824 } 1825 1826 /* 1827 * Check if it is ok to send an ICMPv6 error packet in 1828 * response to the IP packet in mp. 1829 * Free the message and return null if no 1830 * ICMP error packet should be sent. 1831 */ 1832 static mblk_t * 1833 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1834 boolean_t llbcast, boolean_t mcast_ok) 1835 { 1836 ip6_t *ip6h; 1837 1838 if (!mp) 1839 return (NULL); 1840 1841 ip6h = (ip6_t *)mp->b_rptr; 1842 1843 /* Check if source address uniquely identifies the host */ 1844 1845 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1846 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1847 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1848 freemsg(mp); 1849 return (NULL); 1850 } 1851 1852 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1853 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1854 icmp6_t *icmp6; 1855 1856 if (mp->b_wptr - mp->b_rptr < len_needed) { 1857 if (!pullupmsg(mp, len_needed)) { 1858 ill_t *ill; 1859 1860 ill = ip_queue_to_ill_v6(q); 1861 if (ill == NULL) { 1862 BUMP_MIB(&icmp6_mib, 1863 ipv6IfIcmpInErrors); 1864 } else { 1865 BUMP_MIB(ill->ill_icmp6_mib, 1866 ipv6IfIcmpInErrors); 1867 ill_refrele(ill); 1868 } 1869 freemsg(mp); 1870 return (NULL); 1871 } 1872 ip6h = (ip6_t *)mp->b_rptr; 1873 } 1874 icmp6 = (icmp6_t *)&ip6h[1]; 1875 /* Explicitly do not generate errors in response to redirects */ 1876 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1877 icmp6->icmp6_type == ND_REDIRECT) { 1878 freemsg(mp); 1879 return (NULL); 1880 } 1881 } 1882 /* 1883 * Check that the destination is not multicast and that the packet 1884 * was not sent on link layer broadcast or multicast. (Exception 1885 * is Packet too big message as per the draft - when mcast_ok is set.) 1886 */ 1887 if (!mcast_ok && 1888 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1889 freemsg(mp); 1890 return (NULL); 1891 } 1892 if (icmp_err_rate_limit()) { 1893 /* 1894 * Only send ICMP error packets every so often. 1895 * This should be done on a per port/source basis, 1896 * but for now this will suffice. 1897 */ 1898 freemsg(mp); 1899 return (NULL); 1900 } 1901 return (mp); 1902 } 1903 1904 /* 1905 * Generate an ICMPv6 redirect message. 1906 * Include target link layer address option if it exits. 1907 * Always include redirect header. 1908 */ 1909 static void 1910 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1911 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1912 { 1913 nd_redirect_t *rd; 1914 nd_opt_rd_hdr_t *rdh; 1915 uchar_t *buf; 1916 nce_t *nce = NULL; 1917 nd_opt_hdr_t *opt; 1918 int len; 1919 int ll_opt_len = 0; 1920 int max_redir_hdr_data_len; 1921 int pkt_len; 1922 in6_addr_t *srcp; 1923 1924 /* 1925 * We are called from ip_rput where we could 1926 * not have attached an IPSEC_IN. 1927 */ 1928 ASSERT(mp->b_datap->db_type == M_DATA); 1929 1930 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1931 if (mp == NULL) 1932 return; 1933 nce = ndp_lookup(ill, targetp, B_FALSE); 1934 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1935 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1936 ill->ill_phys_addr_length + 7)/8 * 8; 1937 } 1938 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1939 ASSERT(len % 4 == 0); 1940 buf = kmem_alloc(len, KM_NOSLEEP); 1941 if (buf == NULL) { 1942 if (nce != NULL) 1943 NCE_REFRELE(nce); 1944 freemsg(mp); 1945 return; 1946 } 1947 1948 rd = (nd_redirect_t *)buf; 1949 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1950 rd->nd_rd_code = 0; 1951 rd->nd_rd_reserved = 0; 1952 rd->nd_rd_target = *targetp; 1953 rd->nd_rd_dst = *dest; 1954 1955 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1956 if (nce != NULL && ll_opt_len != 0) { 1957 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1958 opt->nd_opt_len = ll_opt_len/8; 1959 bcopy((char *)nce->nce_res_mp->b_rptr + 1960 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1961 ill->ill_phys_addr_length); 1962 } 1963 if (nce != NULL) 1964 NCE_REFRELE(nce); 1965 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1966 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1967 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1968 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1969 pkt_len = msgdsize(mp); 1970 /* Make sure mp is 8 byte aligned */ 1971 if (pkt_len > max_redir_hdr_data_len) { 1972 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1973 sizeof (nd_opt_rd_hdr_t))/8; 1974 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1975 } else { 1976 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1977 (void) adjmsg(mp, -(pkt_len % 8)); 1978 } 1979 rdh->nd_opt_rh_reserved1 = 0; 1980 rdh->nd_opt_rh_reserved2 = 0; 1981 /* ipif_v6src_addr contains the link-local source address */ 1982 rw_enter(&ill_g_lock, RW_READER); 1983 if (ill->ill_group != NULL) { 1984 /* 1985 * The receiver of the redirect will verify whether it 1986 * had a route through us (srcp that we will use in 1987 * the redirect) or not. As we load spread even link-locals, 1988 * we don't know which source address the receiver of 1989 * redirect has in its route for communicating with us. 1990 * Thus we randomly choose a source here and finally we 1991 * should get to the right one and it will eventually 1992 * accept the redirect from us. We can't call 1993 * ip_lookup_scope_v6 because we don't have the right 1994 * link-local address here. Thus we randomly choose one. 1995 */ 1996 int cnt = ill->ill_group->illgrp_ill_count; 1997 1998 ill = ill->ill_group->illgrp_ill; 1999 cnt = ++icmp_redirect_v6_src_index % cnt; 2000 while (cnt--) 2001 ill = ill->ill_group_next; 2002 srcp = &ill->ill_ipif->ipif_v6src_addr; 2003 } else { 2004 srcp = &ill->ill_ipif->ipif_v6src_addr; 2005 } 2006 rw_exit(&ill_g_lock); 2007 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE); 2008 kmem_free(buf, len); 2009 } 2010 2011 2012 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 2013 void 2014 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 2015 boolean_t llbcast, boolean_t mcast_ok) 2016 { 2017 icmp6_t icmp6; 2018 boolean_t mctl_present; 2019 mblk_t *first_mp; 2020 2021 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2022 2023 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2024 if (mp == NULL) { 2025 if (mctl_present) 2026 freeb(first_mp); 2027 return; 2028 } 2029 bzero(&icmp6, sizeof (icmp6_t)); 2030 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2031 icmp6.icmp6_code = code; 2032 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2033 } 2034 2035 /* 2036 * Generate an ICMP unreachable message. 2037 */ 2038 void 2039 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2040 boolean_t llbcast, boolean_t mcast_ok) 2041 { 2042 icmp6_t icmp6; 2043 boolean_t mctl_present; 2044 mblk_t *first_mp; 2045 2046 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2047 2048 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2049 if (mp == NULL) { 2050 if (mctl_present) 2051 freeb(first_mp); 2052 return; 2053 } 2054 bzero(&icmp6, sizeof (icmp6_t)); 2055 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2056 icmp6.icmp6_code = code; 2057 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2058 } 2059 2060 /* 2061 * Generate an ICMP pkt too big message. 2062 */ 2063 static void 2064 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2065 boolean_t llbcast, boolean_t mcast_ok) 2066 { 2067 icmp6_t icmp6; 2068 mblk_t *first_mp; 2069 boolean_t mctl_present; 2070 2071 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2072 2073 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2074 if (mp == NULL) { 2075 if (mctl_present) 2076 freeb(first_mp); 2077 return; 2078 } 2079 bzero(&icmp6, sizeof (icmp6_t)); 2080 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2081 icmp6.icmp6_code = 0; 2082 icmp6.icmp6_mtu = htonl(mtu); 2083 2084 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2085 } 2086 2087 /* 2088 * Generate an ICMP parameter problem message. (May be called as writer.) 2089 * 'offset' is the offset from the beginning of the packet in error. 2090 */ 2091 static void 2092 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2093 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok) 2094 { 2095 icmp6_t icmp6; 2096 boolean_t mctl_present; 2097 mblk_t *first_mp; 2098 2099 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2100 2101 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2102 if (mp == NULL) { 2103 if (mctl_present) 2104 freeb(first_mp); 2105 return; 2106 } 2107 bzero((char *)&icmp6, sizeof (icmp6_t)); 2108 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2109 icmp6.icmp6_code = code; 2110 icmp6.icmp6_pptr = htonl(offset); 2111 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2112 } 2113 2114 /* 2115 * This code will need to take into account the possibility of binding 2116 * to a link local address on a multi-homed host, in which case the 2117 * outgoing interface (from the conn) will need to be used when getting 2118 * an ire for the dst. Going through proper outgoing interface and 2119 * choosing the source address corresponding to the outgoing interface 2120 * is necessary when the destination address is a link-local address and 2121 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2122 * This can happen when active connection is setup; thus ipp pointer 2123 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2124 * pointer is passed as ipp pointer. 2125 */ 2126 mblk_t * 2127 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2128 { 2129 ssize_t len; 2130 int protocol; 2131 struct T_bind_req *tbr; 2132 sin6_t *sin6; 2133 ipa6_conn_t *ac6; 2134 in6_addr_t *v6srcp; 2135 in6_addr_t *v6dstp; 2136 uint16_t lport; 2137 uint16_t fport; 2138 uchar_t *ucp; 2139 mblk_t *mp1; 2140 boolean_t ire_requested; 2141 boolean_t ipsec_policy_set; 2142 int error = 0; 2143 boolean_t local_bind; 2144 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2145 ipa6_conn_x_t *acx6; 2146 boolean_t verify_dst; 2147 2148 ASSERT(connp->conn_af_isv6); 2149 len = mp->b_wptr - mp->b_rptr; 2150 if (len < (sizeof (*tbr) + 1)) { 2151 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2152 "ip_bind_v6: bogus msg, len %ld", len); 2153 goto bad_addr; 2154 } 2155 /* Back up and extract the protocol identifier. */ 2156 mp->b_wptr--; 2157 tbr = (struct T_bind_req *)mp->b_rptr; 2158 /* Reset the message type in preparation for shipping it back. */ 2159 mp->b_datap->db_type = M_PCPROTO; 2160 2161 protocol = *mp->b_wptr & 0xFF; 2162 connp->conn_ulp = (uint8_t)protocol; 2163 2164 /* 2165 * Check for a zero length address. This is from a protocol that 2166 * wants to register to receive all packets of its type. 2167 */ 2168 if (tbr->ADDR_length == 0) { 2169 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2170 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2171 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2172 /* 2173 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2174 * Do not allow others to bind to these. 2175 */ 2176 goto bad_addr; 2177 } 2178 2179 connp->conn_srcv6 = ipv6_all_zeros; 2180 ipcl_proto_insert_v6(connp, protocol); 2181 2182 tbr->PRIM_type = T_BIND_ACK; 2183 return (mp); 2184 } 2185 2186 /* Extract the address pointer from the message. */ 2187 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2188 tbr->ADDR_length); 2189 if (ucp == NULL) { 2190 ip1dbg(("ip_bind_v6: no address\n")); 2191 goto bad_addr; 2192 } 2193 if (!OK_32PTR(ucp)) { 2194 ip1dbg(("ip_bind_v6: unaligned address\n")); 2195 goto bad_addr; 2196 } 2197 mp1 = mp->b_cont; /* trailing mp if any */ 2198 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2199 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2200 2201 switch (tbr->ADDR_length) { 2202 default: 2203 ip1dbg(("ip_bind_v6: bad address length %d\n", 2204 (int)tbr->ADDR_length)); 2205 goto bad_addr; 2206 2207 case IPV6_ADDR_LEN: 2208 /* Verification of local address only */ 2209 v6srcp = (in6_addr_t *)ucp; 2210 lport = 0; 2211 local_bind = B_TRUE; 2212 break; 2213 2214 case sizeof (sin6_t): 2215 sin6 = (sin6_t *)ucp; 2216 v6srcp = &sin6->sin6_addr; 2217 lport = sin6->sin6_port; 2218 local_bind = B_TRUE; 2219 break; 2220 2221 case sizeof (ipa6_conn_t): 2222 /* 2223 * Verify that both the source and destination addresses 2224 * are valid. 2225 * Note that we allow connect to broadcast and multicast 2226 * addresses when ire_requested is set. Thus the ULP 2227 * has to check for IRE_BROADCAST and multicast. 2228 */ 2229 ac6 = (ipa6_conn_t *)ucp; 2230 v6srcp = &ac6->ac6_laddr; 2231 v6dstp = &ac6->ac6_faddr; 2232 fport = ac6->ac6_fport; 2233 /* For raw socket, the local port is not set. */ 2234 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2235 connp->conn_lport; 2236 local_bind = B_FALSE; 2237 /* Always verify destination reachability. */ 2238 verify_dst = B_TRUE; 2239 break; 2240 2241 case sizeof (ipa6_conn_x_t): 2242 /* 2243 * Verify that the source address is valid. 2244 * Note that we allow connect to broadcast and multicast 2245 * addresses when ire_requested is set. Thus the ULP 2246 * has to check for IRE_BROADCAST and multicast. 2247 */ 2248 acx6 = (ipa6_conn_x_t *)ucp; 2249 ac6 = &acx6->ac6x_conn; 2250 v6srcp = &ac6->ac6_laddr; 2251 v6dstp = &ac6->ac6_faddr; 2252 fport = ac6->ac6_fport; 2253 lport = ac6->ac6_lport; 2254 local_bind = B_FALSE; 2255 /* 2256 * Client that passed ipa6_conn_x_t to us specifies whether to 2257 * verify destination reachability. 2258 */ 2259 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2260 break; 2261 } 2262 if (local_bind) { 2263 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2264 /* Bind to IPv4 address */ 2265 ipaddr_t v4src; 2266 2267 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2268 2269 error = ip_bind_laddr(connp, mp, v4src, lport, 2270 ire_requested, ipsec_policy_set, 2271 tbr->ADDR_length != IPV6_ADDR_LEN); 2272 if (error != 0) 2273 goto bad_addr; 2274 connp->conn_pkt_isv6 = B_FALSE; 2275 } else { 2276 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2277 error = 0; 2278 goto bad_addr; 2279 } 2280 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2281 ire_requested, ipsec_policy_set, 2282 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2283 if (error != 0) 2284 goto bad_addr; 2285 connp->conn_pkt_isv6 = B_TRUE; 2286 } 2287 if (protocol == IPPROTO_TCP) 2288 connp->conn_recv = tcp_conn_request; 2289 } else { 2290 /* 2291 * Bind to local and remote address. Local might be 2292 * unspecified in which case it will be extracted from 2293 * ire_src_addr_v6 2294 */ 2295 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2296 /* Connect to IPv4 address */ 2297 ipaddr_t v4src; 2298 ipaddr_t v4dst; 2299 2300 /* Is the source unspecified or mapped? */ 2301 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2302 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2303 ip1dbg(("ip_bind_v6: " 2304 "dst is mapped, but not the src\n")); 2305 goto bad_addr; 2306 } 2307 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2308 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2309 2310 /* 2311 * XXX Fix needed. Need to pass ipsec_policy_set 2312 * instead of B_FALSE. 2313 */ 2314 2315 /* Always verify destination reachability. */ 2316 error = ip_bind_connected(connp, mp, &v4src, lport, 2317 v4dst, fport, ire_requested, ipsec_policy_set, 2318 B_TRUE, B_TRUE); 2319 if (error != 0) 2320 goto bad_addr; 2321 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2322 connp->conn_pkt_isv6 = B_FALSE; 2323 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2324 ip1dbg(("ip_bind_v6: " 2325 "src is mapped, but not the dst\n")); 2326 goto bad_addr; 2327 } else { 2328 error = ip_bind_connected_v6(connp, mp, v6srcp, 2329 lport, v6dstp, ipp, fport, ire_requested, 2330 ipsec_policy_set, B_TRUE, verify_dst); 2331 if (error != 0) 2332 goto bad_addr; 2333 connp->conn_pkt_isv6 = B_TRUE; 2334 } 2335 if (protocol == IPPROTO_TCP) 2336 connp->conn_recv = tcp_input; 2337 } 2338 /* Update qinfo if v4/v6 changed */ 2339 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2340 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2341 if (connp->conn_pkt_isv6) 2342 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2343 else 2344 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2345 } 2346 2347 /* 2348 * Pass the IPSEC headers size in ire_ipsec_overhead. 2349 * We can't do this in ip_bind_insert_ire because the policy 2350 * may not have been inherited at that point in time and hence 2351 * conn_out_enforce_policy may not be set. 2352 */ 2353 mp1 = mp->b_cont; 2354 if (ire_requested && connp->conn_out_enforce_policy && 2355 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2356 ire_t *ire = (ire_t *)mp1->b_rptr; 2357 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2358 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2359 } 2360 2361 /* Send it home. */ 2362 mp->b_datap->db_type = M_PCPROTO; 2363 tbr->PRIM_type = T_BIND_ACK; 2364 return (mp); 2365 2366 bad_addr: 2367 if (error == EINPROGRESS) 2368 return (NULL); 2369 if (error > 0) 2370 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2371 else 2372 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2373 return (mp); 2374 } 2375 2376 /* 2377 * Here address is verified to be a valid local address. 2378 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2379 * address is also considered a valid local address. 2380 * In the case of a multicast address, however, the 2381 * upper protocol is expected to reset the src address 2382 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2383 * no packets are emitted with multicast address as 2384 * source address. 2385 * The addresses valid for bind are: 2386 * (1) - in6addr_any 2387 * (2) - IP address of an UP interface 2388 * (3) - IP address of a DOWN interface 2389 * (4) - a multicast address. In this case 2390 * the conn will only receive packets destined to 2391 * the specified multicast address. Note: the 2392 * application still has to issue an 2393 * IPV6_JOIN_GROUP socket option. 2394 * 2395 * In all the above cases, the bound address must be valid in the current zone. 2396 * When the address is loopback or multicast, there might be many matching IREs 2397 * so bind has to look up based on the zone. 2398 */ 2399 static int 2400 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2401 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2402 boolean_t fanout_insert) 2403 { 2404 int error = 0; 2405 ire_t *src_ire = NULL; 2406 ipif_t *ipif = NULL; 2407 mblk_t *policy_mp; 2408 zoneid_t zoneid; 2409 2410 if (ipsec_policy_set) 2411 policy_mp = mp->b_cont; 2412 2413 /* 2414 * If it was previously connected, conn_fully_bound would have 2415 * been set. 2416 */ 2417 connp->conn_fully_bound = B_FALSE; 2418 2419 zoneid = connp->conn_zoneid; 2420 2421 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2422 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2423 0, NULL, NULL, zoneid, MATCH_IRE_ZONEONLY); 2424 /* 2425 * If an address other than in6addr_any is requested, 2426 * we verify that it is a valid address for bind 2427 * Note: Following code is in if-else-if form for 2428 * readability compared to a condition check. 2429 */ 2430 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2431 /* LINTED - statement has no consequent */ 2432 if (IRE_IS_LOCAL(src_ire)) { 2433 /* 2434 * (2) Bind to address of local UP interface 2435 */ 2436 ipif = src_ire->ire_ipif; 2437 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2438 ipif_t *multi_ipif = NULL; 2439 ire_t *save_ire; 2440 /* 2441 * (4) bind to multicast address. 2442 * Fake out the IRE returned to upper 2443 * layer to be a broadcast IRE in 2444 * ip_bind_insert_ire_v6(). 2445 * Pass other information that matches 2446 * the ipif (e.g. the source address). 2447 * conn_multicast_ill is only used for 2448 * IPv6 packets 2449 */ 2450 mutex_enter(&connp->conn_lock); 2451 if (connp->conn_multicast_ill != NULL) { 2452 (void) ipif_lookup_zoneid( 2453 connp->conn_multicast_ill, zoneid, 0, 2454 &multi_ipif); 2455 } else { 2456 /* 2457 * Look for default like 2458 * ip_wput_v6 2459 */ 2460 multi_ipif = ipif_lookup_group_v6( 2461 &ipv6_unspecified_group, zoneid); 2462 } 2463 mutex_exit(&connp->conn_lock); 2464 save_ire = src_ire; 2465 src_ire = NULL; 2466 if (multi_ipif == NULL || 2467 !ire_requested || (src_ire = 2468 ipif_to_ire_v6(multi_ipif)) == 2469 NULL) { 2470 src_ire = save_ire; 2471 error = EADDRNOTAVAIL; 2472 } else { 2473 ASSERT(src_ire != NULL); 2474 if (save_ire != NULL) 2475 ire_refrele(save_ire); 2476 } 2477 if (multi_ipif != NULL) 2478 ipif_refrele(multi_ipif); 2479 } else { 2480 *mp->b_wptr++ = (char)connp->conn_ulp; 2481 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2482 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2483 if (ipif == NULL) { 2484 if (error == EINPROGRESS) { 2485 if (src_ire != NULL) 2486 ire_refrele(src_ire); 2487 return (error); 2488 } 2489 /* 2490 * Not a valid address for bind 2491 */ 2492 error = EADDRNOTAVAIL; 2493 } else { 2494 ipif_refrele(ipif); 2495 } 2496 /* 2497 * Just to keep it consistent with the processing in 2498 * ip_bind_v6(). 2499 */ 2500 mp->b_wptr--; 2501 } 2502 2503 if (error != 0) { 2504 /* Red Alert! Attempting to be a bogon! */ 2505 if (ip_debug > 2) { 2506 /* ip1dbg */ 2507 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2508 " address %s\n", AF_INET6, v6src); 2509 } 2510 goto bad_addr; 2511 } 2512 } 2513 2514 /* 2515 * Allow setting new policies. For example, disconnects come 2516 * down as ipa_t bind. As we would have set conn_policy_cached 2517 * to B_TRUE before, we should set it to B_FALSE, so that policy 2518 * can change after the disconnect. 2519 */ 2520 connp->conn_policy_cached = B_FALSE; 2521 2522 /* If not fanout_insert this was just an address verification */ 2523 if (fanout_insert) { 2524 /* 2525 * The addresses have been verified. Time to insert in 2526 * the correct fanout list. 2527 */ 2528 connp->conn_srcv6 = *v6src; 2529 connp->conn_remv6 = ipv6_all_zeros; 2530 connp->conn_lport = lport; 2531 connp->conn_fport = 0; 2532 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2533 } 2534 if (error == 0) { 2535 if (ire_requested) { 2536 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2537 error = -1; 2538 goto bad_addr; 2539 } 2540 } else if (ipsec_policy_set) { 2541 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2542 error = -1; 2543 goto bad_addr; 2544 } 2545 } 2546 } 2547 bad_addr: 2548 if (src_ire != NULL) 2549 ire_refrele(src_ire); 2550 2551 if (ipsec_policy_set) { 2552 ASSERT(policy_mp != NULL); 2553 freeb(policy_mp); 2554 /* 2555 * As of now assume that nothing else accompanies 2556 * IPSEC_POLICY_SET. 2557 */ 2558 mp->b_cont = NULL; 2559 } 2560 return (error); 2561 } 2562 2563 /* ARGSUSED */ 2564 static void 2565 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2566 void *dummy_arg) 2567 { 2568 conn_t *connp = NULL; 2569 t_scalar_t prim; 2570 2571 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2572 2573 if (CONN_Q(q)) 2574 connp = Q_TO_CONN(q); 2575 ASSERT(connp != NULL); 2576 2577 prim = ((union T_primitives *)mp->b_rptr)->type; 2578 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2579 2580 if (IPCL_IS_TCP(connp)) { 2581 /* Pass sticky_ipp for scope_id and pktinfo */ 2582 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2583 } else { 2584 /* For UDP and ICMP */ 2585 mp = ip_bind_v6(q, mp, connp, NULL); 2586 } 2587 if (mp != NULL) { 2588 if (IPCL_IS_TCP(connp)) { 2589 CONN_INC_REF(connp); 2590 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2591 connp, SQTAG_TCP_RPUTOTHER); 2592 } else if (IPCL_IS_UDP(connp)) { 2593 udp_resume_bind(connp, mp); 2594 } else { 2595 qreply(q, mp); 2596 CONN_OPER_PENDING_DONE(connp); 2597 } 2598 } 2599 } 2600 2601 /* 2602 * Verify that both the source and destination addresses 2603 * are valid. If verify_dst, then destination address must also be reachable, 2604 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2605 * It takes ip6_pkt_t * as one of the arguments to determine correct 2606 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2607 * destination address. Note that parameter ipp is only useful for TCP connect 2608 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2609 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2610 * 2611 */ 2612 static int 2613 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2614 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2615 boolean_t ire_requested, boolean_t ipsec_policy_set, 2616 boolean_t fanout_insert, boolean_t verify_dst) 2617 { 2618 ire_t *src_ire; 2619 ire_t *dst_ire; 2620 int error = 0; 2621 int protocol; 2622 mblk_t *policy_mp; 2623 ire_t *sire = NULL; 2624 ire_t *md_dst_ire = NULL; 2625 ill_t *md_ill = NULL; 2626 ill_t *dst_ill = NULL; 2627 ipif_t *src_ipif = NULL; 2628 zoneid_t zoneid; 2629 boolean_t ill_held = B_FALSE; 2630 2631 src_ire = dst_ire = NULL; 2632 /* 2633 * NOTE: The protocol is beyond the wptr because that's how 2634 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2635 */ 2636 protocol = *mp->b_wptr & 0xFF; 2637 2638 /* 2639 * If we never got a disconnect before, clear it now. 2640 */ 2641 connp->conn_fully_bound = B_FALSE; 2642 2643 if (ipsec_policy_set) { 2644 policy_mp = mp->b_cont; 2645 } 2646 2647 zoneid = connp->conn_zoneid; 2648 2649 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2650 ipif_t *ipif; 2651 2652 /* 2653 * Use an "emulated" IRE_BROADCAST to tell the transport it 2654 * is a multicast. 2655 * Pass other information that matches 2656 * the ipif (e.g. the source address). 2657 * 2658 * conn_multicast_ill is only used for IPv6 packets 2659 */ 2660 mutex_enter(&connp->conn_lock); 2661 if (connp->conn_multicast_ill != NULL) { 2662 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2663 zoneid, 0, &ipif); 2664 } else { 2665 /* Look for default like ip_wput_v6 */ 2666 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2667 } 2668 mutex_exit(&connp->conn_lock); 2669 if (ipif == NULL || !ire_requested || 2670 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2671 if (ipif != NULL) 2672 ipif_refrele(ipif); 2673 if (ip_debug > 2) { 2674 /* ip1dbg */ 2675 pr_addr_dbg("ip_bind_connected_v6: bad " 2676 "connected multicast %s\n", AF_INET6, 2677 v6dst); 2678 } 2679 error = ENETUNREACH; 2680 goto bad_addr; 2681 } 2682 if (ipif != NULL) 2683 ipif_refrele(ipif); 2684 } else { 2685 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2686 NULL, &sire, zoneid, 2687 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2688 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE); 2689 /* 2690 * We also prevent ire's with src address INADDR_ANY to 2691 * be used, which are created temporarily for 2692 * sending out packets from endpoints that have 2693 * conn_unspec_src set. 2694 */ 2695 if (dst_ire == NULL || 2696 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2697 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2698 /* 2699 * When verifying destination reachability, we always 2700 * complain. 2701 * 2702 * When not verifying destination reachability but we 2703 * found an IRE, i.e. the destination is reachable, 2704 * then the other tests still apply and we complain. 2705 */ 2706 if (verify_dst || (dst_ire != NULL)) { 2707 if (ip_debug > 2) { 2708 /* ip1dbg */ 2709 pr_addr_dbg("ip_bind_connected_v6: bad" 2710 " connected dst %s\n", AF_INET6, 2711 v6dst); 2712 } 2713 if (dst_ire == NULL || 2714 !(dst_ire->ire_type & IRE_HOST)) { 2715 error = ENETUNREACH; 2716 } else { 2717 error = EHOSTUNREACH; 2718 } 2719 goto bad_addr; 2720 } 2721 } 2722 } 2723 2724 /* 2725 * If the app does a connect(), it means that it will most likely 2726 * send more than 1 packet to the destination. It makes sense 2727 * to clear the temporary flag. 2728 */ 2729 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2730 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2731 irb_t *irb = dst_ire->ire_bucket; 2732 2733 rw_enter(&irb->irb_lock, RW_WRITER); 2734 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2735 irb->irb_tmp_ire_cnt--; 2736 rw_exit(&irb->irb_lock); 2737 } 2738 2739 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2740 2741 /* 2742 * See if we should notify ULP about MDT; we do this whether or not 2743 * ire_requested is TRUE, in order to handle active connects; MDT 2744 * eligibility tests for passive connects are handled separately 2745 * through tcp_adapt_ire(). We do this before the source address 2746 * selection, because dst_ire may change after a call to 2747 * ipif_select_source_v6(). This is a best-effort check, as the 2748 * packet for this connection may not actually go through 2749 * dst_ire->ire_stq, and the exact IRE can only be known after 2750 * calling ip_newroute_v6(). This is why we further check on the 2751 * IRE during Multidata packet transmission in tcp_multisend(). 2752 */ 2753 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2754 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2755 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2756 ILL_MDT_CAPABLE(md_ill)) { 2757 md_dst_ire = dst_ire; 2758 IRE_REFHOLD(md_dst_ire); 2759 } 2760 2761 if (dst_ire != NULL && 2762 dst_ire->ire_type == IRE_LOCAL && 2763 dst_ire->ire_zoneid != zoneid) { 2764 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2765 zoneid, 0, 2766 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2767 MATCH_IRE_RJ_BHOLE); 2768 if (src_ire == NULL) { 2769 error = EHOSTUNREACH; 2770 goto bad_addr; 2771 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2772 if (!(src_ire->ire_type & IRE_HOST)) 2773 error = ENETUNREACH; 2774 else 2775 error = EHOSTUNREACH; 2776 goto bad_addr; 2777 } 2778 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2779 src_ipif = src_ire->ire_ipif; 2780 ipif_refhold(src_ipif); 2781 *v6src = src_ipif->ipif_v6lcl_addr; 2782 } 2783 ire_refrele(src_ire); 2784 src_ire = NULL; 2785 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2786 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2787 *v6src = sire->ire_src_addr_v6; 2788 ire_refrele(dst_ire); 2789 dst_ire = sire; 2790 sire = NULL; 2791 } else if (dst_ire->ire_type == IRE_CACHE && 2792 (dst_ire->ire_flags & RTF_SETSRC)) { 2793 ASSERT(dst_ire->ire_zoneid == zoneid); 2794 *v6src = dst_ire->ire_src_addr_v6; 2795 } else { 2796 /* 2797 * Pick a source address so that a proper inbound load 2798 * spreading would happen. Use dst_ill specified by the 2799 * app. when socket option or scopeid is set. 2800 */ 2801 int err; 2802 2803 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2804 uint_t if_index; 2805 2806 /* 2807 * Scope id or IPV6_PKTINFO 2808 */ 2809 2810 if_index = ipp->ipp_ifindex; 2811 dst_ill = ill_lookup_on_ifindex( 2812 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2813 if (dst_ill == NULL) { 2814 ip1dbg(("ip_bind_connected_v6:" 2815 " bad ifindex %d\n", if_index)); 2816 error = EADDRNOTAVAIL; 2817 goto bad_addr; 2818 } 2819 ill_held = B_TRUE; 2820 } else if (connp->conn_outgoing_ill != NULL) { 2821 /* 2822 * For IPV6_BOUND_IF socket option, 2823 * conn_outgoing_ill should be set 2824 * already in TCP or UDP/ICMP. 2825 */ 2826 dst_ill = conn_get_held_ill(connp, 2827 &connp->conn_outgoing_ill, &err); 2828 if (err == ILL_LOOKUP_FAILED) { 2829 ip1dbg(("ip_bind_connected_v6:" 2830 "no ill for bound_if\n")); 2831 error = EADDRNOTAVAIL; 2832 goto bad_addr; 2833 } 2834 ill_held = B_TRUE; 2835 } else if (dst_ire->ire_stq != NULL) { 2836 /* No need to hold ill here */ 2837 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2838 } else { 2839 /* No need to hold ill here */ 2840 dst_ill = dst_ire->ire_ipif->ipif_ill; 2841 } 2842 if (!ip6_asp_can_lookup()) { 2843 *mp->b_wptr++ = (char)protocol; 2844 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2845 ip_bind_connected_resume_v6); 2846 error = EINPROGRESS; 2847 goto refrele_and_quit; 2848 } 2849 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2850 B_FALSE, connp->conn_src_preferences, zoneid); 2851 ip6_asp_table_refrele(); 2852 if (src_ipif == NULL) { 2853 pr_addr_dbg("ip_bind_connected_v6: " 2854 "no usable source address for " 2855 "connection to %s\n", AF_INET6, v6dst); 2856 error = EADDRNOTAVAIL; 2857 goto bad_addr; 2858 } 2859 *v6src = src_ipif->ipif_v6lcl_addr; 2860 } 2861 } 2862 2863 /* 2864 * We do ire_route_lookup_v6() here (and not an interface lookup) 2865 * as we assert that v6src should only come from an 2866 * UP interface for hard binding. 2867 */ 2868 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2869 NULL, zoneid, MATCH_IRE_ZONEONLY); 2870 2871 /* src_ire must be a local|loopback */ 2872 if (!IRE_IS_LOCAL(src_ire)) { 2873 if (ip_debug > 2) { 2874 /* ip1dbg */ 2875 pr_addr_dbg("ip_bind_connected_v6: bad " 2876 "connected src %s\n", AF_INET6, v6src); 2877 } 2878 error = EADDRNOTAVAIL; 2879 goto bad_addr; 2880 } 2881 2882 /* 2883 * If the source address is a loopback address, the 2884 * destination had best be local or multicast. 2885 * The transports that can't handle multicast will reject 2886 * those addresses. 2887 */ 2888 if (src_ire->ire_type == IRE_LOOPBACK && 2889 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2890 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2891 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2892 error = -1; 2893 goto bad_addr; 2894 } 2895 /* 2896 * Allow setting new policies. For example, disconnects come 2897 * down as ipa_t bind. As we would have set conn_policy_cached 2898 * to B_TRUE before, we should set it to B_FALSE, so that policy 2899 * can change after the disconnect. 2900 */ 2901 connp->conn_policy_cached = B_FALSE; 2902 2903 /* 2904 * The addresses have been verified. Initialize the conn 2905 * before calling the policy as they expect the conns 2906 * initialized. 2907 */ 2908 connp->conn_srcv6 = *v6src; 2909 connp->conn_remv6 = *v6dst; 2910 connp->conn_lport = lport; 2911 connp->conn_fport = fport; 2912 2913 ASSERT(!(ipsec_policy_set && ire_requested)); 2914 if (ire_requested) { 2915 iulp_t *ulp_info = NULL; 2916 2917 /* 2918 * Note that sire will not be NULL if this is an off-link 2919 * connection and there is not cache for that dest yet. 2920 * 2921 * XXX Because of an existing bug, if there are multiple 2922 * default routes, the IRE returned now may not be the actual 2923 * default route used (default routes are chosen in a 2924 * round robin fashion). So if the metrics for different 2925 * default routes are different, we may return the wrong 2926 * metrics. This will not be a problem if the existing 2927 * bug is fixed. 2928 */ 2929 if (sire != NULL) 2930 ulp_info = &(sire->ire_uinfo); 2931 2932 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 2933 error = -1; 2934 goto bad_addr; 2935 } 2936 } else if (ipsec_policy_set) { 2937 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2938 error = -1; 2939 goto bad_addr; 2940 } 2941 } 2942 2943 /* 2944 * Cache IPsec policy in this conn. If we have per-socket policy, 2945 * we'll cache that. If we don't, we'll inherit global policy. 2946 * 2947 * We can't insert until the conn reflects the policy. Note that 2948 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2949 * connections where we don't have a policy. This is to prevent 2950 * global policy lookups in the inbound path. 2951 * 2952 * If we insert before we set conn_policy_cached, 2953 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2954 * because global policy cound be non-empty. We normally call 2955 * ipsec_check_policy() for conn_policy_cached connections only if 2956 * conn_in_enforce_policy is set. But in this case, 2957 * conn_policy_cached can get set anytime since we made the 2958 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2959 * is called, which will make the above assumption false. Thus, we 2960 * need to insert after we set conn_policy_cached. 2961 */ 2962 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2963 goto bad_addr; 2964 2965 /* If not fanout_insert this was just an address verification */ 2966 if (fanout_insert) { 2967 /* 2968 * The addresses have been verified. Time to insert in 2969 * the correct fanout list. 2970 */ 2971 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2972 connp->conn_ports, 2973 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2974 } 2975 if (error == 0) { 2976 connp->conn_fully_bound = B_TRUE; 2977 /* 2978 * Our initial checks for MDT have passed; the IRE is not 2979 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2980 * be supporting MDT. Pass the IRE, IPC and ILL into 2981 * ip_mdinfo_return(), which performs further checks 2982 * against them and upon success, returns the MDT info 2983 * mblk which we will attach to the bind acknowledgment. 2984 */ 2985 if (md_dst_ire != NULL) { 2986 mblk_t *mdinfo_mp; 2987 2988 ASSERT(md_ill != NULL); 2989 ASSERT(md_ill->ill_mdt_capab != NULL); 2990 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2991 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 2992 linkb(mp, mdinfo_mp); 2993 } 2994 } 2995 bad_addr: 2996 if (ipsec_policy_set) { 2997 ASSERT(policy_mp != NULL); 2998 freeb(policy_mp); 2999 /* 3000 * As of now assume that nothing else accompanies 3001 * IPSEC_POLICY_SET. 3002 */ 3003 mp->b_cont = NULL; 3004 } 3005 refrele_and_quit: 3006 if (src_ire != NULL) 3007 IRE_REFRELE(src_ire); 3008 if (dst_ire != NULL) 3009 IRE_REFRELE(dst_ire); 3010 if (sire != NULL) 3011 IRE_REFRELE(sire); 3012 if (src_ipif != NULL) 3013 ipif_refrele(src_ipif); 3014 if (md_dst_ire != NULL) 3015 IRE_REFRELE(md_dst_ire); 3016 if (ill_held && dst_ill != NULL) 3017 ill_refrele(dst_ill); 3018 return (error); 3019 } 3020 3021 /* 3022 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3023 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3024 */ 3025 static boolean_t 3026 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3027 iulp_t *ulp_info) 3028 { 3029 mblk_t *mp1; 3030 ire_t *ret_ire; 3031 3032 mp1 = mp->b_cont; 3033 ASSERT(mp1 != NULL); 3034 3035 if (ire != NULL) { 3036 /* 3037 * mp1 initialized above to IRE_DB_REQ_TYPE 3038 * appended mblk. Its <upper protocol>'s 3039 * job to make sure there is room. 3040 */ 3041 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3042 return (B_FALSE); 3043 3044 mp1->b_datap->db_type = IRE_DB_TYPE; 3045 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3046 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3047 ret_ire = (ire_t *)mp1->b_rptr; 3048 if (IN6_IS_ADDR_MULTICAST(dst) || 3049 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3050 ret_ire->ire_type = IRE_BROADCAST; 3051 ret_ire->ire_addr_v6 = *dst; 3052 } 3053 if (ulp_info != NULL) { 3054 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3055 sizeof (iulp_t)); 3056 } 3057 ret_ire->ire_mp = mp1; 3058 } else { 3059 /* 3060 * No IRE was found. Remove IRE mblk. 3061 */ 3062 mp->b_cont = mp1->b_cont; 3063 freeb(mp1); 3064 } 3065 return (B_TRUE); 3066 } 3067 3068 /* 3069 * Add an ip6i_t header to the front of the mblk. 3070 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3071 * Returns NULL if allocation fails (and frees original message). 3072 * Used in outgoing path when going through ip_newroute_*v6(). 3073 * Used in incoming path to pass ifindex to transports. 3074 */ 3075 mblk_t * 3076 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3077 { 3078 mblk_t *mp1; 3079 ip6i_t *ip6i; 3080 ip6_t *ip6h; 3081 3082 ip6h = (ip6_t *)mp->b_rptr; 3083 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3084 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3085 mp->b_datap->db_ref > 1) { 3086 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3087 if (mp1 == NULL) { 3088 freemsg(mp); 3089 return (NULL); 3090 } 3091 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3092 mp1->b_cont = mp; 3093 mp = mp1; 3094 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3095 } 3096 mp->b_rptr = (uchar_t *)ip6i; 3097 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3098 ip6i->ip6i_nxt = IPPROTO_RAW; 3099 if (ill != NULL) { 3100 ip6i->ip6i_flags = IP6I_IFINDEX; 3101 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3102 } else { 3103 ip6i->ip6i_flags = 0; 3104 } 3105 ip6i->ip6i_nexthop = *dst; 3106 return (mp); 3107 } 3108 3109 /* 3110 * Handle protocols with which IP is less intimate. There 3111 * can be more than one stream bound to a particular 3112 * protocol. When this is the case, normally each one gets a copy 3113 * of any incoming packets. 3114 * However, if the packet was tunneled and not multicast we only send to it 3115 * the first match. 3116 * 3117 * Zones notes: 3118 * Packets will be distributed to streams in all zones. This is really only 3119 * useful for ICMPv6 as only applications in the global zone can create raw 3120 * sockets for other protocols. 3121 */ 3122 static void 3123 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3124 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3125 boolean_t mctl_present, zoneid_t zoneid) 3126 { 3127 queue_t *rq; 3128 mblk_t *mp1, *first_mp1; 3129 in6_addr_t dst = ip6h->ip6_dst; 3130 in6_addr_t src = ip6h->ip6_src; 3131 boolean_t one_only; 3132 mblk_t *first_mp = mp; 3133 boolean_t secure; 3134 conn_t *connp, *first_connp, *next_connp; 3135 connf_t *connfp; 3136 3137 if (mctl_present) { 3138 mp = first_mp->b_cont; 3139 secure = ipsec_in_is_secure(first_mp); 3140 ASSERT(mp != NULL); 3141 } else { 3142 secure = B_FALSE; 3143 } 3144 3145 /* 3146 * If the packet was tunneled and not multicast we only send to it 3147 * the first match. 3148 */ 3149 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3150 !IN6_IS_ADDR_MULTICAST(&dst)); 3151 3152 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3153 mutex_enter(&connfp->connf_lock); 3154 connp = connfp->connf_head; 3155 for (connp = connfp->connf_head; connp != NULL; 3156 connp = connp->conn_next) { 3157 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3158 zoneid)) 3159 break; 3160 } 3161 3162 if (connp == NULL || connp->conn_upq == NULL) { 3163 /* 3164 * No one bound to this port. Is 3165 * there a client that wants all 3166 * unclaimed datagrams? 3167 */ 3168 mutex_exit(&connfp->connf_lock); 3169 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3170 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3171 nexthdr_offset, mctl_present, zoneid)) { 3172 BUMP_MIB(ill->ill_ip6_mib, ipv6InUnknownProtos); 3173 } 3174 3175 return; 3176 } 3177 3178 CONN_INC_REF(connp); 3179 first_connp = connp; 3180 3181 /* 3182 * XXX: Fix the multiple protocol listeners case. We should not 3183 * be walking the conn->next list here. 3184 */ 3185 if (one_only) { 3186 /* 3187 * Only send message to one tunnel driver by immediately 3188 * terminating the loop. 3189 */ 3190 connp = NULL; 3191 } else { 3192 connp = connp->conn_next; 3193 3194 } 3195 for (;;) { 3196 while (connp != NULL) { 3197 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3198 flags, zoneid)) 3199 break; 3200 connp = connp->conn_next; 3201 } 3202 3203 /* 3204 * Just copy the data part alone. The mctl part is 3205 * needed just for verifying policy and it is never 3206 * sent up. 3207 */ 3208 if (connp == NULL || connp->conn_upq == NULL || 3209 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3210 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3211 /* 3212 * No more intested clients or memory 3213 * allocation failed 3214 */ 3215 connp = first_connp; 3216 break; 3217 } 3218 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3219 CONN_INC_REF(connp); 3220 mutex_exit(&connfp->connf_lock); 3221 rq = connp->conn_rq; 3222 /* 3223 * For link-local always add ifindex so that transport can set 3224 * sin6_scope_id. Avoid it for ICMP error fanout. 3225 */ 3226 if ((connp->conn_ipv6_recvpktinfo || 3227 IN6_IS_ADDR_LINKLOCAL(&src)) && 3228 (flags & IP_FF_IP6INFO)) { 3229 /* Add header */ 3230 mp1 = ip_add_info_v6(mp1, inill, &dst); 3231 } 3232 if (mp1 == NULL) { 3233 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3234 } else if (!canputnext(rq)) { 3235 if (flags & IP_FF_RAWIP) { 3236 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3237 } else { 3238 BUMP_MIB(ill->ill_icmp6_mib, 3239 ipv6IfIcmpInOverflows); 3240 } 3241 3242 freemsg(mp1); 3243 } else { 3244 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3245 first_mp1 = ipsec_check_inbound_policy 3246 (first_mp1, connp, NULL, ip6h, 3247 mctl_present); 3248 } 3249 if (first_mp1 != NULL) { 3250 if (mctl_present) 3251 freeb(first_mp1); 3252 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3253 putnext(rq, mp1); 3254 } 3255 } 3256 mutex_enter(&connfp->connf_lock); 3257 /* Follow the next pointer before releasing the conn. */ 3258 next_connp = connp->conn_next; 3259 CONN_DEC_REF(connp); 3260 connp = next_connp; 3261 } 3262 3263 /* Last one. Send it upstream. */ 3264 mutex_exit(&connfp->connf_lock); 3265 3266 /* Initiate IPPF processing */ 3267 if (IP6_IN_IPP(flags)) { 3268 uint_t ifindex; 3269 3270 mutex_enter(&ill->ill_lock); 3271 ifindex = ill->ill_phyint->phyint_ifindex; 3272 mutex_exit(&ill->ill_lock); 3273 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3274 if (mp == NULL) { 3275 CONN_DEC_REF(connp); 3276 if (mctl_present) 3277 freeb(first_mp); 3278 return; 3279 } 3280 } 3281 3282 /* 3283 * For link-local always add ifindex so that transport can set 3284 * sin6_scope_id. Avoid it for ICMP error fanout. 3285 */ 3286 if ((connp->conn_ipv6_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3287 (flags & IP_FF_IP6INFO)) { 3288 /* Add header */ 3289 mp = ip_add_info_v6(mp, inill, &dst); 3290 if (mp == NULL) { 3291 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3292 CONN_DEC_REF(connp); 3293 if (mctl_present) 3294 freeb(first_mp); 3295 return; 3296 } else if (mctl_present) { 3297 first_mp->b_cont = mp; 3298 } else { 3299 first_mp = mp; 3300 } 3301 } 3302 3303 rq = connp->conn_rq; 3304 if (!canputnext(rq)) { 3305 if (flags & IP_FF_RAWIP) { 3306 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3307 } else { 3308 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3309 } 3310 3311 freemsg(first_mp); 3312 } else { 3313 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3314 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3315 NULL, ip6h, mctl_present); 3316 if (first_mp == NULL) { 3317 CONN_DEC_REF(connp); 3318 return; 3319 } 3320 } 3321 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3322 putnext(rq, mp); 3323 if (mctl_present) 3324 freeb(first_mp); 3325 } 3326 CONN_DEC_REF(connp); 3327 } 3328 3329 /* 3330 * Send an ICMP error after patching up the packet appropriately. Returns 3331 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3332 */ 3333 int 3334 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3335 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3336 boolean_t mctl_present, zoneid_t zoneid) 3337 { 3338 ip6_t *ip6h; 3339 mblk_t *first_mp; 3340 boolean_t secure; 3341 unsigned char db_type; 3342 3343 first_mp = mp; 3344 if (mctl_present) { 3345 mp = mp->b_cont; 3346 secure = ipsec_in_is_secure(first_mp); 3347 ASSERT(mp != NULL); 3348 } else { 3349 /* 3350 * If this is an ICMP error being reported - which goes 3351 * up as M_CTLs, we need to convert them to M_DATA till 3352 * we finish checking with global policy because 3353 * ipsec_check_global_policy() assumes M_DATA as clear 3354 * and M_CTL as secure. 3355 */ 3356 db_type = mp->b_datap->db_type; 3357 mp->b_datap->db_type = M_DATA; 3358 secure = B_FALSE; 3359 } 3360 /* 3361 * We are generating an icmp error for some inbound packet. 3362 * Called from all ip_fanout_(udp, tcp, proto) functions. 3363 * Before we generate an error, check with global policy 3364 * to see whether this is allowed to enter the system. As 3365 * there is no "conn", we are checking with global policy. 3366 */ 3367 ip6h = (ip6_t *)mp->b_rptr; 3368 if (secure || ipsec_inbound_v6_policy_present) { 3369 first_mp = ipsec_check_global_policy(first_mp, NULL, 3370 NULL, ip6h, mctl_present); 3371 if (first_mp == NULL) 3372 return (0); 3373 } 3374 3375 if (!mctl_present) 3376 mp->b_datap->db_type = db_type; 3377 3378 if (flags & IP_FF_SEND_ICMP) { 3379 if (flags & IP_FF_HDR_COMPLETE) { 3380 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3381 freemsg(first_mp); 3382 return (1); 3383 } 3384 } 3385 switch (icmp_type) { 3386 case ICMP6_DST_UNREACH: 3387 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3388 B_FALSE, B_FALSE); 3389 break; 3390 case ICMP6_PARAM_PROB: 3391 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3392 nexthdr_offset, B_FALSE, B_FALSE); 3393 break; 3394 default: 3395 #ifdef DEBUG 3396 panic("ip_fanout_send_icmp_v6: wrong type"); 3397 /*NOTREACHED*/ 3398 #else 3399 freemsg(first_mp); 3400 break; 3401 #endif 3402 } 3403 } else { 3404 freemsg(first_mp); 3405 return (0); 3406 } 3407 3408 return (1); 3409 } 3410 3411 3412 /* 3413 * Fanout for TCP packets 3414 * The caller puts <fport, lport> in the ports parameter. 3415 */ 3416 static void 3417 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3418 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3419 { 3420 mblk_t *first_mp; 3421 boolean_t secure; 3422 conn_t *connp; 3423 tcph_t *tcph; 3424 boolean_t syn_present = B_FALSE; 3425 3426 first_mp = mp; 3427 if (mctl_present) { 3428 mp = first_mp->b_cont; 3429 secure = ipsec_in_is_secure(first_mp); 3430 ASSERT(mp != NULL); 3431 } else { 3432 secure = B_FALSE; 3433 } 3434 3435 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3436 3437 if (connp == NULL || 3438 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3439 /* 3440 * No hard-bound match. Send Reset. 3441 */ 3442 dblk_t *dp = mp->b_datap; 3443 uint32_t ill_index; 3444 3445 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3446 3447 /* Initiate IPPf processing, if needed. */ 3448 if (IPP_ENABLED(IPP_LOCAL_IN) && (flags & IP6_NO_IPPOLICY)) { 3449 ill_index = ill->ill_phyint->phyint_ifindex; 3450 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3451 if (first_mp == NULL) { 3452 if (connp != NULL) 3453 CONN_DEC_REF(connp); 3454 return; 3455 } 3456 } 3457 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3458 tcp_xmit_listeners_reset(first_mp, hdr_len); 3459 if (connp != NULL) 3460 CONN_DEC_REF(connp); 3461 return; 3462 } 3463 3464 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3465 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3466 if (connp->conn_flags & IPCL_TCP) { 3467 squeue_t *sqp; 3468 3469 /* 3470 * For fused tcp loopback, assign the eager's 3471 * squeue to be that of the active connect's. 3472 */ 3473 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3474 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3475 !IP6_IN_IPP(flags)) { 3476 ASSERT(Q_TO_CONN(q) != NULL); 3477 sqp = Q_TO_CONN(q)->conn_sqp; 3478 } else { 3479 sqp = IP_SQUEUE_GET(lbolt); 3480 } 3481 3482 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3483 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3484 3485 /* 3486 * db_cksumstuff is unused in the incoming 3487 * path; Thus store the ifindex here. It will 3488 * be cleared in tcp_conn_create_v6(). 3489 */ 3490 DB_CKSUMSTUFF(mp) = 3491 (intptr_t)ill->ill_phyint->phyint_ifindex; 3492 syn_present = B_TRUE; 3493 } 3494 } 3495 3496 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3497 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3498 if ((flags & TH_RST) || (flags & TH_URG)) { 3499 CONN_DEC_REF(connp); 3500 freemsg(first_mp); 3501 return; 3502 } 3503 if (flags & TH_ACK) { 3504 tcp_xmit_listeners_reset(first_mp, hdr_len); 3505 CONN_DEC_REF(connp); 3506 return; 3507 } 3508 3509 CONN_DEC_REF(connp); 3510 freemsg(first_mp); 3511 return; 3512 } 3513 3514 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3515 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3516 NULL, ip6h, mctl_present); 3517 if (first_mp == NULL) { 3518 CONN_DEC_REF(connp); 3519 return; 3520 } 3521 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3522 ASSERT(syn_present); 3523 if (mctl_present) { 3524 ASSERT(first_mp != mp); 3525 first_mp->b_datap->db_struioflag |= 3526 STRUIO_POLICY; 3527 } else { 3528 ASSERT(first_mp == mp); 3529 mp->b_datap->db_struioflag &= 3530 ~STRUIO_EAGER; 3531 mp->b_datap->db_struioflag |= 3532 STRUIO_POLICY; 3533 } 3534 } else { 3535 /* 3536 * Discard first_mp early since we're dealing with a 3537 * fully-connected conn_t and tcp doesn't do policy in 3538 * this case. Also, if someone is bound to IPPROTO_TCP 3539 * over raw IP, they don't expect to see a M_CTL. 3540 */ 3541 if (mctl_present) { 3542 freeb(first_mp); 3543 mctl_present = B_FALSE; 3544 } 3545 first_mp = mp; 3546 } 3547 } 3548 3549 /* Initiate IPPF processing */ 3550 if (IP6_IN_IPP(flags)) { 3551 uint_t ifindex; 3552 3553 mutex_enter(&ill->ill_lock); 3554 ifindex = ill->ill_phyint->phyint_ifindex; 3555 mutex_exit(&ill->ill_lock); 3556 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3557 if (mp == NULL) { 3558 CONN_DEC_REF(connp); 3559 if (mctl_present) { 3560 freeb(first_mp); 3561 } 3562 return; 3563 } else if (mctl_present) { 3564 /* 3565 * ip_add_info_v6 might return a new mp. 3566 */ 3567 ASSERT(first_mp != mp); 3568 first_mp->b_cont = mp; 3569 } else { 3570 first_mp = mp; 3571 } 3572 } 3573 3574 /* 3575 * For link-local always add ifindex so that TCP can bind to that 3576 * interface. Avoid it for ICMP error fanout. 3577 */ 3578 if (!syn_present && ((connp->conn_ipv6_recvpktinfo || 3579 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3580 (flags & IP_FF_IP6INFO))) { 3581 /* Add header */ 3582 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3583 if (mp == NULL) { 3584 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3585 CONN_DEC_REF(connp); 3586 if (mctl_present) 3587 freeb(first_mp); 3588 return; 3589 } else if (mctl_present) { 3590 ASSERT(first_mp != mp); 3591 first_mp->b_cont = mp; 3592 } else { 3593 first_mp = mp; 3594 } 3595 } 3596 3597 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3598 if (IPCL_IS_TCP(connp)) { 3599 (*ip_input_proc)(connp->conn_sqp, first_mp, 3600 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3601 } else { 3602 putnext(connp->conn_rq, first_mp); 3603 CONN_DEC_REF(connp); 3604 } 3605 } 3606 3607 /* 3608 * Fanout for UDP packets. 3609 * The caller puts <fport, lport> in the ports parameter. 3610 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3611 * 3612 * If SO_REUSEADDR is set all multicast and broadcast packets 3613 * will be delivered to all streams bound to the same port. 3614 * 3615 * Zones notes: 3616 * Multicast packets will be distributed to streams in all zones. 3617 */ 3618 static void 3619 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3620 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3621 zoneid_t zoneid) 3622 { 3623 uint32_t dstport, srcport; 3624 in6_addr_t dst; 3625 mblk_t *first_mp; 3626 boolean_t secure; 3627 conn_t *connp; 3628 connf_t *connfp; 3629 conn_t *first_conn; 3630 conn_t *next_conn; 3631 mblk_t *mp1, *first_mp1; 3632 in6_addr_t src; 3633 3634 first_mp = mp; 3635 if (mctl_present) { 3636 mp = first_mp->b_cont; 3637 secure = ipsec_in_is_secure(first_mp); 3638 ASSERT(mp != NULL); 3639 } else { 3640 secure = B_FALSE; 3641 } 3642 3643 /* Extract ports in net byte order */ 3644 dstport = htons(ntohl(ports) & 0xFFFF); 3645 srcport = htons(ntohl(ports) >> 16); 3646 dst = ip6h->ip6_dst; 3647 src = ip6h->ip6_src; 3648 3649 /* Attempt to find a client stream based on destination port. */ 3650 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3651 mutex_enter(&connfp->connf_lock); 3652 connp = connfp->connf_head; 3653 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3654 /* 3655 * Not multicast. Send to the one (first) client we find. 3656 */ 3657 while (connp != NULL) { 3658 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3659 src) && connp->conn_zoneid == zoneid && 3660 conn_wantpacket_v6(connp, ill, ip6h, 3661 flags, zoneid)) { 3662 break; 3663 } 3664 connp = connp->conn_next; 3665 } 3666 if (connp == NULL || connp->conn_upq == NULL) 3667 goto notfound; 3668 3669 /* Found a client */ 3670 CONN_INC_REF(connp); 3671 mutex_exit(&connfp->connf_lock); 3672 3673 if (CONN_UDP_FLOWCTLD(connp)) { 3674 freemsg(first_mp); 3675 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3676 CONN_DEC_REF(connp); 3677 return; 3678 } 3679 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3680 first_mp = ipsec_check_inbound_policy(first_mp, 3681 connp, NULL, ip6h, mctl_present); 3682 if (first_mp == NULL) { 3683 CONN_DEC_REF(connp); 3684 return; 3685 } 3686 } 3687 /* Initiate IPPF processing */ 3688 if (IP6_IN_IPP(flags)) { 3689 uint_t ifindex; 3690 3691 mutex_enter(&ill->ill_lock); 3692 ifindex = ill->ill_phyint->phyint_ifindex; 3693 mutex_exit(&ill->ill_lock); 3694 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3695 if (mp == NULL) { 3696 CONN_DEC_REF(connp); 3697 if (mctl_present) 3698 freeb(first_mp); 3699 return; 3700 } 3701 } 3702 /* 3703 * For link-local always add ifindex so that 3704 * transport can set sin6_scope_id. Avoid it for 3705 * ICMP error fanout. 3706 */ 3707 if ((connp->conn_ipv6_recvpktinfo || 3708 IN6_IS_ADDR_LINKLOCAL(&src)) && 3709 (flags & IP_FF_IP6INFO)) { 3710 /* Add header */ 3711 mp = ip_add_info_v6(mp, inill, &dst); 3712 if (mp == NULL) { 3713 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3714 CONN_DEC_REF(connp); 3715 if (mctl_present) 3716 freeb(first_mp); 3717 return; 3718 } else if (mctl_present) { 3719 first_mp->b_cont = mp; 3720 } else { 3721 first_mp = mp; 3722 } 3723 } 3724 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3725 3726 /* Send it upstream */ 3727 CONN_UDP_RECV(connp, mp); 3728 3729 IP6_STAT(ip6_udp_fannorm); 3730 CONN_DEC_REF(connp); 3731 if (mctl_present) 3732 freeb(first_mp); 3733 return; 3734 } 3735 3736 /* 3737 * The code is fine but we shouldn't be walking the conn_next 3738 * list in IPv6 (its a classifier private data struct). Maybe create 3739 * a classifier API to put a REF_HOLD on all matching conn in the 3740 * list and return an array. 3741 */ 3742 while (connp != NULL) { 3743 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3744 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) 3745 break; 3746 connp = connp->conn_next; 3747 } 3748 3749 if (connp == NULL || connp->conn_upq == NULL) 3750 goto notfound; 3751 3752 first_conn = connp; 3753 3754 CONN_INC_REF(connp); 3755 connp = connp->conn_next; 3756 for (;;) { 3757 while (connp != NULL) { 3758 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3759 src) && conn_wantpacket_v6(connp, ill, ip6h, 3760 flags, zoneid)) 3761 break; 3762 connp = connp->conn_next; 3763 } 3764 /* 3765 * Just copy the data part alone. The mctl part is 3766 * needed just for verifying policy and it is never 3767 * sent up. 3768 */ 3769 if (connp == NULL || 3770 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3771 ((first_mp1 = ip_copymsg(first_mp)) 3772 == NULL))) { 3773 /* 3774 * No more interested clients or memory 3775 * allocation failed 3776 */ 3777 connp = first_conn; 3778 break; 3779 } 3780 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3781 CONN_INC_REF(connp); 3782 mutex_exit(&connfp->connf_lock); 3783 /* 3784 * For link-local always add ifindex so that transport 3785 * can set sin6_scope_id. Avoid it for ICMP error 3786 * fanout. 3787 */ 3788 if ((connp->conn_ipv6_recvpktinfo || 3789 IN6_IS_ADDR_LINKLOCAL(&src)) && 3790 (flags & IP_FF_IP6INFO)) { 3791 /* Add header */ 3792 mp1 = ip_add_info_v6(mp1, inill, &dst); 3793 } 3794 if (mp1 == NULL) { 3795 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3796 goto next_one; 3797 } 3798 if (CONN_UDP_FLOWCTLD(connp)) { 3799 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3800 freemsg(mp1); 3801 goto next_one; 3802 } 3803 3804 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3805 secure) { 3806 first_mp1 = ipsec_check_inbound_policy 3807 (first_mp1, connp, NULL, ip6h, 3808 mctl_present); 3809 } 3810 if (first_mp1 != NULL) { 3811 if (mctl_present) 3812 freeb(first_mp1); 3813 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3814 3815 /* Send it upstream */ 3816 CONN_UDP_RECV(connp, mp1); 3817 } 3818 next_one: 3819 mutex_enter(&connfp->connf_lock); 3820 /* Follow the next pointer before releasing the conn. */ 3821 next_conn = connp->conn_next; 3822 IP6_STAT(ip6_udp_fanmb); 3823 CONN_DEC_REF(connp); 3824 connp = next_conn; 3825 } 3826 3827 /* Last one. Send it upstream. */ 3828 mutex_exit(&connfp->connf_lock); 3829 3830 /* Initiate IPPF processing */ 3831 if (IP6_IN_IPP(flags)) { 3832 uint_t ifindex; 3833 3834 mutex_enter(&ill->ill_lock); 3835 ifindex = ill->ill_phyint->phyint_ifindex; 3836 mutex_exit(&ill->ill_lock); 3837 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3838 if (mp == NULL) { 3839 CONN_DEC_REF(connp); 3840 if (mctl_present) { 3841 freeb(first_mp); 3842 } 3843 return; 3844 } 3845 } 3846 3847 /* 3848 * For link-local always add ifindex so that transport can set 3849 * sin6_scope_id. Avoid it for ICMP error fanout. 3850 */ 3851 if ((connp->conn_ipv6_recvpktinfo || 3852 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IP6INFO)) { 3853 /* Add header */ 3854 mp = ip_add_info_v6(mp, inill, &dst); 3855 if (mp == NULL) { 3856 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3857 CONN_DEC_REF(connp); 3858 if (mctl_present) 3859 freeb(first_mp); 3860 return; 3861 } else if (mctl_present) { 3862 first_mp->b_cont = mp; 3863 } else { 3864 first_mp = mp; 3865 } 3866 } 3867 if (CONN_UDP_FLOWCTLD(connp)) { 3868 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3869 freemsg(mp); 3870 } else { 3871 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3872 first_mp = ipsec_check_inbound_policy(first_mp, 3873 connp, NULL, ip6h, mctl_present); 3874 if (first_mp == NULL) { 3875 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3876 CONN_DEC_REF(connp); 3877 return; 3878 } 3879 } 3880 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3881 3882 /* Send it upstream */ 3883 CONN_UDP_RECV(connp, mp); 3884 } 3885 IP6_STAT(ip6_udp_fanmb); 3886 CONN_DEC_REF(connp); 3887 if (mctl_present) 3888 freeb(first_mp); 3889 return; 3890 3891 notfound: 3892 mutex_exit(&connfp->connf_lock); 3893 /* 3894 * No one bound to this port. Is 3895 * there a client that wants all 3896 * unclaimed datagrams? 3897 */ 3898 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3899 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3900 0, flags | IP_FF_RAWIP | IP_FF_IP6INFO, mctl_present, 3901 zoneid); 3902 } else { 3903 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3904 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3905 mctl_present, zoneid)) { 3906 BUMP_MIB(&ip_mib, udpNoPorts); 3907 } 3908 } 3909 } 3910 3911 /* 3912 * int ip_find_hdr_v6() 3913 * 3914 * This routine is used by the upper layer protocols and the IP tunnel 3915 * module to: 3916 * - Set extension header pointers to appropriate locations 3917 * - Determine IPv6 header length and return it 3918 * - Return a pointer to the last nexthdr value 3919 * 3920 * The caller must initialize ipp_fields. 3921 * 3922 * NOTE: If multiple extension headers of the same type are present, 3923 * ip_find_hdr_v6() will set the respective extension header pointers 3924 * to the first one that it encounters in the IPv6 header. It also 3925 * skips fragment headers. This routine deals with malformed packets 3926 * of various sorts in which case the returned length is up to the 3927 * malformed part. 3928 */ 3929 int 3930 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3931 { 3932 uint_t length, ehdrlen; 3933 uint8_t nexthdr; 3934 uint8_t *whereptr, *endptr; 3935 ip6_dest_t *tmpdstopts; 3936 ip6_rthdr_t *tmprthdr; 3937 ip6_hbh_t *tmphopopts; 3938 ip6_frag_t *tmpfraghdr; 3939 3940 length = IPV6_HDR_LEN; 3941 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3942 endptr = mp->b_wptr; 3943 3944 nexthdr = ip6h->ip6_nxt; 3945 while (whereptr < endptr) { 3946 /* Is there enough left for len + nexthdr? */ 3947 if (whereptr + MIN_EHDR_LEN > endptr) 3948 goto done; 3949 3950 switch (nexthdr) { 3951 case IPPROTO_HOPOPTS: 3952 tmphopopts = (ip6_hbh_t *)whereptr; 3953 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 3954 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 3955 goto done; 3956 nexthdr = tmphopopts->ip6h_nxt; 3957 /* return only 1st hbh */ 3958 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 3959 ipp->ipp_fields |= IPPF_HOPOPTS; 3960 ipp->ipp_hopopts = tmphopopts; 3961 ipp->ipp_hopoptslen = ehdrlen; 3962 } 3963 break; 3964 case IPPROTO_DSTOPTS: 3965 tmpdstopts = (ip6_dest_t *)whereptr; 3966 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 3967 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 3968 goto done; 3969 nexthdr = tmpdstopts->ip6d_nxt; 3970 /* 3971 * ipp_dstopts is set to the destination header after a 3972 * routing header. 3973 * Assume it is a post-rthdr destination header 3974 * and adjust when we find an rthdr. 3975 */ 3976 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 3977 ipp->ipp_fields |= IPPF_DSTOPTS; 3978 ipp->ipp_dstopts = tmpdstopts; 3979 ipp->ipp_dstoptslen = ehdrlen; 3980 } 3981 break; 3982 case IPPROTO_ROUTING: 3983 tmprthdr = (ip6_rthdr_t *)whereptr; 3984 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 3985 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 3986 goto done; 3987 nexthdr = tmprthdr->ip6r_nxt; 3988 /* return only 1st rthdr */ 3989 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 3990 ipp->ipp_fields |= IPPF_RTHDR; 3991 ipp->ipp_rthdr = tmprthdr; 3992 ipp->ipp_rthdrlen = ehdrlen; 3993 } 3994 /* 3995 * Make any destination header we've seen be a 3996 * pre-rthdr destination header. 3997 */ 3998 if (ipp->ipp_fields & IPPF_DSTOPTS) { 3999 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4000 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4001 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4002 ipp->ipp_dstopts = NULL; 4003 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4004 ipp->ipp_dstoptslen = 0; 4005 } 4006 break; 4007 case IPPROTO_FRAGMENT: 4008 /* 4009 * Fragment headers are skipped. Currently, only 4010 * IP cares for their existence. If anyone other 4011 * than IP ever has the need to know about the 4012 * location of fragment headers, support can be 4013 * added to the ip6_pkt_t at that time. 4014 */ 4015 tmpfraghdr = (ip6_frag_t *)whereptr; 4016 ehdrlen = sizeof (ip6_frag_t); 4017 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4018 goto done; 4019 nexthdr = tmpfraghdr->ip6f_nxt; 4020 break; 4021 case IPPROTO_NONE: 4022 default: 4023 goto done; 4024 } 4025 length += ehdrlen; 4026 whereptr += ehdrlen; 4027 } 4028 done: 4029 if (nexthdrp != NULL) 4030 *nexthdrp = nexthdr; 4031 return (length); 4032 } 4033 4034 int 4035 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4036 { 4037 ire_t *ire; 4038 4039 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4040 ire = ire_lookup_local_v6(zoneid); 4041 if (ire == NULL) { 4042 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4043 return (1); 4044 } 4045 ip6h->ip6_src = ire->ire_addr_v6; 4046 ire_refrele(ire); 4047 } 4048 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4049 ip6h->ip6_hops = ipv6_def_hops; 4050 return (0); 4051 } 4052 4053 /* 4054 * Try to determine where and what are the IPv6 header length and 4055 * pointer to nexthdr value for the upper layer protocol (or an 4056 * unknown next hdr). 4057 * 4058 * Parameters returns a pointer to the nexthdr value; 4059 * Must handle malformed packets of various sorts. 4060 * Function returns failure for malformed cases. 4061 */ 4062 boolean_t 4063 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4064 uint8_t **nexthdrpp) 4065 { 4066 uint16_t length; 4067 uint_t ehdrlen; 4068 uint8_t *nexthdrp; 4069 uint8_t *whereptr; 4070 uint8_t *endptr; 4071 ip6_dest_t *desthdr; 4072 ip6_rthdr_t *rthdr; 4073 ip6_frag_t *fraghdr; 4074 4075 length = IPV6_HDR_LEN; 4076 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4077 endptr = mp->b_wptr; 4078 4079 nexthdrp = &ip6h->ip6_nxt; 4080 while (whereptr < endptr) { 4081 /* Is there enough left for len + nexthdr? */ 4082 if (whereptr + MIN_EHDR_LEN > endptr) 4083 break; 4084 4085 switch (*nexthdrp) { 4086 case IPPROTO_HOPOPTS: 4087 case IPPROTO_DSTOPTS: 4088 /* Assumes the headers are identical for hbh and dst */ 4089 desthdr = (ip6_dest_t *)whereptr; 4090 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4091 if ((uchar_t *)desthdr + ehdrlen > endptr) 4092 return (B_FALSE); 4093 nexthdrp = &desthdr->ip6d_nxt; 4094 break; 4095 case IPPROTO_ROUTING: 4096 rthdr = (ip6_rthdr_t *)whereptr; 4097 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4098 if ((uchar_t *)rthdr + ehdrlen > endptr) 4099 return (B_FALSE); 4100 nexthdrp = &rthdr->ip6r_nxt; 4101 break; 4102 case IPPROTO_FRAGMENT: 4103 fraghdr = (ip6_frag_t *)whereptr; 4104 ehdrlen = sizeof (ip6_frag_t); 4105 if ((uchar_t *)&fraghdr[1] > endptr) 4106 return (B_FALSE); 4107 nexthdrp = &fraghdr->ip6f_nxt; 4108 break; 4109 case IPPROTO_NONE: 4110 /* No next header means we're finished */ 4111 default: 4112 *hdr_length_ptr = length; 4113 *nexthdrpp = nexthdrp; 4114 return (B_TRUE); 4115 } 4116 length += ehdrlen; 4117 whereptr += ehdrlen; 4118 *hdr_length_ptr = length; 4119 *nexthdrpp = nexthdrp; 4120 } 4121 switch (*nexthdrp) { 4122 case IPPROTO_HOPOPTS: 4123 case IPPROTO_DSTOPTS: 4124 case IPPROTO_ROUTING: 4125 case IPPROTO_FRAGMENT: 4126 /* 4127 * If any know extension headers are still to be processed, 4128 * the packet's malformed (or at least all the IP header(s) are 4129 * not in the same mblk - and that should never happen. 4130 */ 4131 return (B_FALSE); 4132 4133 default: 4134 /* 4135 * If we get here, we know that all of the IP headers were in 4136 * the same mblk, even if the ULP header is in the next mblk. 4137 */ 4138 *hdr_length_ptr = length; 4139 *nexthdrpp = nexthdrp; 4140 return (B_TRUE); 4141 } 4142 } 4143 4144 /* 4145 * Return the length of the IPv6 related headers (including extension headers) 4146 * Returns a length even if the packet is malformed. 4147 */ 4148 int 4149 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4150 { 4151 uint16_t hdr_len; 4152 uint8_t *nexthdrp; 4153 4154 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4155 return (hdr_len); 4156 } 4157 4158 /* 4159 * Select an ill for the packet by considering load spreading across 4160 * a different ill in the group if dst_ill is part of some group. 4161 */ 4162 static ill_t * 4163 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4164 { 4165 ill_t *ill; 4166 4167 /* 4168 * We schedule irrespective of whether the source address is 4169 * INADDR_UNSPECIED or not. 4170 */ 4171 ill = illgrp_scheduler(dst_ill); 4172 if (ill == NULL) 4173 return (NULL); 4174 4175 /* 4176 * For groups with names ip_sioctl_groupname ensures that all 4177 * ills are of same type. For groups without names, ifgrp_insert 4178 * ensures this. 4179 */ 4180 ASSERT(dst_ill->ill_type == ill->ill_type); 4181 4182 return (ill); 4183 } 4184 4185 /* 4186 * IPv6 - 4187 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4188 * to send out a packet to a destination address for which we do not have 4189 * specific routing information. 4190 * 4191 * Handle non-multicast packets. If ill is non-NULL the match is done 4192 * for that ill. 4193 * 4194 * When a specific ill is specified (using IPV6_PKTINFO, 4195 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4196 * on routing entries (ftable and ctable) that have a matching 4197 * ire->ire_ipif->ipif_ill. Thus this can only be used 4198 * for destinations that are on-link for the specific ill 4199 * and that can appear on multiple links. Thus it is useful 4200 * for multicast destinations, link-local destinations, and 4201 * at some point perhaps for site-local destinations (if the 4202 * node sits at a site boundary). 4203 * We create the cache entries in the regular ctable since 4204 * it can not "confuse" things for other destinations. 4205 * table. 4206 * 4207 * When ill is part of a ill group, we subject the packets 4208 * to load spreading even if the ill is specified by the 4209 * means described above. We disable only for IPV6_BOUND_PIF 4210 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4211 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4212 * set. 4213 * 4214 * NOTE : These are the scopes of some of the variables that point at IRE, 4215 * which needs to be followed while making any future modifications 4216 * to avoid memory leaks. 4217 * 4218 * - ire and sire are the entries looked up initially by 4219 * ire_ftable_lookup_v6. 4220 * - ipif_ire is used to hold the interface ire associated with 4221 * the new cache ire. But it's scope is limited, so we always REFRELE 4222 * it before branching out to error paths. 4223 * - save_ire is initialized before ire_create, so that ire returned 4224 * by ire_create will not over-write the ire. We REFRELE save_ire 4225 * before breaking out of the switch. 4226 * 4227 * Thus on failures, we have to REFRELE only ire and sire, if they 4228 * are not NULL. 4229 * 4230 * v6srcp may be used in the future. Currently unused. 4231 */ 4232 /* ARGSUSED */ 4233 void 4234 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4235 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4236 { 4237 in6_addr_t v6gw; 4238 in6_addr_t dst; 4239 ire_t *ire = NULL; 4240 ipif_t *src_ipif = NULL; 4241 ill_t *dst_ill = NULL; 4242 ire_t *sire = NULL; 4243 ire_t *save_ire; 4244 mblk_t *dlureq_mp; 4245 ip6_t *ip6h; 4246 int err = 0; 4247 mblk_t *first_mp; 4248 ipsec_out_t *io; 4249 ill_t *attach_ill = NULL; 4250 ushort_t ire_marks = 0; 4251 int match_flags; 4252 boolean_t ip6i_present; 4253 ire_t *first_sire = NULL; 4254 mblk_t *copy_mp = NULL; 4255 mblk_t *xmit_mp = NULL; 4256 in6_addr_t save_dst; 4257 uint32_t multirt_flags = 4258 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4259 boolean_t multirt_is_resolvable; 4260 boolean_t multirt_resolve_next; 4261 boolean_t need_rele = B_FALSE; 4262 boolean_t do_attach_ill = B_FALSE; 4263 boolean_t ip6_asp_table_held = B_FALSE; 4264 4265 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4266 4267 first_mp = mp; 4268 if (mp->b_datap->db_type == M_CTL) { 4269 mp = mp->b_cont; 4270 io = (ipsec_out_t *)first_mp->b_rptr; 4271 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4272 } else { 4273 io = NULL; 4274 } 4275 4276 /* 4277 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4278 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4279 * could be NULL. 4280 * 4281 * This information can appear either in an ip6i_t or an IPSEC_OUT 4282 * message. 4283 */ 4284 ip6h = (ip6_t *)mp->b_rptr; 4285 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4286 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4287 if (!ip6i_present || 4288 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4289 attach_ill = ip_grab_attach_ill(ill, first_mp, 4290 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4291 io->ipsec_out_ill_index), B_TRUE); 4292 /* Failure case frees things for us. */ 4293 if (attach_ill == NULL) 4294 return; 4295 4296 /* 4297 * Check if we need an ire that will not be 4298 * looked up by anybody else i.e. HIDDEN. 4299 */ 4300 if (ill_is_probeonly(attach_ill)) 4301 ire_marks = IRE_MARK_HIDDEN; 4302 } 4303 } 4304 4305 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4306 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4307 goto icmp_err_ret; 4308 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4309 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4310 goto icmp_err_ret; 4311 } 4312 4313 /* 4314 * If this IRE is created for forwarding or it is not for 4315 * TCP traffic, mark it as temporary. 4316 * 4317 * Is it sufficient just to check the next header?? 4318 */ 4319 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4320 ire_marks |= IRE_MARK_TEMPORARY; 4321 4322 /* 4323 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4324 * chain until it gets the most specific information available. 4325 * For example, we know that there is no IRE_CACHE for this dest, 4326 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4327 * ire_ftable_lookup_v6 will look up the gateway, etc. 4328 */ 4329 4330 if (ill == NULL) { 4331 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4332 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE; 4333 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4334 NULL, &sire, zoneid, 0, match_flags); 4335 /* 4336 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4337 * in a NULL ill, but the packet could be a neighbor 4338 * solicitation/advertisment and could have a valid attach_ill. 4339 */ 4340 if (attach_ill != NULL) 4341 ill_refrele(attach_ill); 4342 } else { 4343 if (attach_ill != NULL) { 4344 /* 4345 * attach_ill is set only for communicating with 4346 * on-link hosts. So, don't look for DEFAULT. 4347 * ip_wput_v6 passes the right ill in this case and 4348 * hence we can assert. 4349 */ 4350 ASSERT(ill == attach_ill); 4351 ill_refrele(attach_ill); 4352 do_attach_ill = B_TRUE; 4353 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4354 } else { 4355 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4356 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4357 } 4358 match_flags |= MATCH_IRE_PARENT; 4359 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, ill->ill_ipif, 4360 &sire, zoneid, 0, match_flags); 4361 } 4362 4363 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4364 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4365 4366 if (zoneid == ALL_ZONES && ire != NULL) { 4367 /* 4368 * In the forwarding case, we can use a route from any zone 4369 * since we won't change the source address. We can easily 4370 * assert that the source address is already set when there's no 4371 * ip6_info header - otherwise we'd have to call pullupmsg(). 4372 */ 4373 ASSERT(ip6i_present || 4374 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4375 zoneid = ire->ire_zoneid; 4376 } 4377 4378 /* 4379 * We enter a loop that will be run only once in most cases. 4380 * The loop is re-entered in the case where the destination 4381 * can be reached through multiple RTF_MULTIRT-flagged routes. 4382 * The intention is to compute multiple routes to a single 4383 * destination in a single ip_newroute_v6 call. 4384 * The information is contained in sire->ire_flags. 4385 */ 4386 do { 4387 multirt_resolve_next = B_FALSE; 4388 4389 if (dst_ill != NULL) { 4390 ill_refrele(dst_ill); 4391 dst_ill = NULL; 4392 } 4393 if (src_ipif != NULL) { 4394 ipif_refrele(src_ipif); 4395 src_ipif = NULL; 4396 } 4397 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4398 ip3dbg(("ip_newroute_v6: starting new resolution " 4399 "with first_mp %p, tag %d\n", 4400 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4401 4402 /* 4403 * We check if there are trailing unresolved routes for 4404 * the destination contained in sire. 4405 */ 4406 multirt_is_resolvable = 4407 ire_multirt_lookup_v6(&ire, &sire, multirt_flags); 4408 4409 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4410 "ire %p, sire %p\n", 4411 multirt_is_resolvable, (void *)ire, (void *)sire)); 4412 4413 if (!multirt_is_resolvable) { 4414 /* 4415 * No more multirt routes to resolve; give up 4416 * (all routes resolved or no more resolvable 4417 * routes). 4418 */ 4419 if (ire != NULL) { 4420 ire_refrele(ire); 4421 ire = NULL; 4422 } 4423 } else { 4424 ASSERT(sire != NULL); 4425 ASSERT(ire != NULL); 4426 /* 4427 * We simply use first_sire as a flag that 4428 * indicates if a resolvable multirt route has 4429 * already been found during the preceding 4430 * loops. If it is not the case, we may have 4431 * to send an ICMP error to report that the 4432 * destination is unreachable. We do not 4433 * IRE_REFHOLD first_sire. 4434 */ 4435 if (first_sire == NULL) { 4436 first_sire = sire; 4437 } 4438 } 4439 } 4440 if ((ire == NULL) || (ire == sire)) { 4441 /* 4442 * either ire == NULL (the destination cannot be 4443 * resolved) or ire == sire (the gateway cannot be 4444 * resolved). At this point, there are no more routes 4445 * to resolve for the destination, thus we exit. 4446 */ 4447 if (ip_debug > 3) { 4448 /* ip2dbg */ 4449 pr_addr_dbg("ip_newroute_v6: " 4450 "can't resolve %s\n", AF_INET6, v6dstp); 4451 } 4452 ip3dbg(("ip_newroute_v6: " 4453 "ire %p, sire %p, first_sire %p\n", 4454 (void *)ire, (void *)sire, (void *)first_sire)); 4455 4456 if (sire != NULL) { 4457 ire_refrele(sire); 4458 sire = NULL; 4459 } 4460 4461 if (first_sire != NULL) { 4462 /* 4463 * At least one multirt route has been found 4464 * in the same ip_newroute() call; there is no 4465 * need to report an ICMP error. 4466 * first_sire was not IRE_REFHOLDed. 4467 */ 4468 MULTIRT_DEBUG_UNTAG(first_mp); 4469 freemsg(first_mp); 4470 return; 4471 } 4472 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4473 RTA_DST); 4474 goto icmp_err_ret; 4475 } 4476 4477 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4478 4479 /* 4480 * Verify that the returned IRE does not have either the 4481 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4482 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4483 */ 4484 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4485 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4486 goto icmp_err_ret; 4487 4488 /* 4489 * Increment the ire_ob_pkt_count field for ire if it is an 4490 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4491 * increment the same for the parent IRE, sire, if it is some 4492 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4493 * and HOST_REDIRECT). 4494 */ 4495 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4496 UPDATE_OB_PKT_COUNT(ire); 4497 ire->ire_last_used_time = lbolt; 4498 } 4499 4500 if (sire != NULL) { 4501 mutex_enter(&sire->ire_lock); 4502 v6gw = sire->ire_gateway_addr_v6; 4503 mutex_exit(&sire->ire_lock); 4504 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4505 IRE_INTERFACE)) == 0); 4506 UPDATE_OB_PKT_COUNT(sire); 4507 sire->ire_last_used_time = lbolt; 4508 } else { 4509 v6gw = ipv6_all_zeros; 4510 } 4511 4512 /* 4513 * We have a route to reach the destination. 4514 * 4515 * 1) If the interface is part of ill group, try to get a new 4516 * ill taking load spreading into account. 4517 * 4518 * 2) After selecting the ill, get a source address that might 4519 * create good inbound load spreading and that matches the 4520 * right scope. ipif_select_source_v6 does this for us. 4521 * 4522 * If the application specified the ill (ifindex), we still 4523 * load spread. Only if the packets needs to go out specifically 4524 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4525 * IPV6_BOUND_PIF we don't try to use a different ill for load 4526 * spreading. 4527 */ 4528 if (!do_attach_ill) { 4529 /* 4530 * If the interface belongs to an interface group, 4531 * make sure the next possible interface in the group 4532 * is used. This encourages load spreading among 4533 * peers in an interface group. However, in the case 4534 * of multirouting, load spreading is not used, as we 4535 * actually want to replicate outgoing packets through 4536 * particular interfaces. 4537 * 4538 * Note: While we pick a dst_ill we are really only 4539 * interested in the ill for load spreading. 4540 * The source ipif is determined by source address 4541 * selection below. 4542 */ 4543 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4544 dst_ill = ire->ire_ipif->ipif_ill; 4545 /* For uniformity do a refhold */ 4546 ill_refhold(dst_ill); 4547 } else { 4548 /* 4549 * If we are here trying to create an IRE_CACHE 4550 * for an offlink destination and have the 4551 * IRE_CACHE for the next hop and the latter is 4552 * using virtual IP source address selection i.e 4553 * it's ire->ire_ipif is pointing to a virtual 4554 * network interface (vni) then 4555 * ip_newroute_get_dst_ll() will return the vni 4556 * interface as the dst_ill. Since the vni is 4557 * virtual i.e not associated with any physical 4558 * interface, it cannot be the dst_ill, hence 4559 * in such a case call ip_newroute_get_dst_ll() 4560 * with the stq_ill instead of the ire_ipif ILL. 4561 * The function returns a refheld ill. 4562 */ 4563 if ((ire->ire_type == IRE_CACHE) && 4564 IS_VNI(ire->ire_ipif->ipif_ill)) 4565 dst_ill = ip_newroute_get_dst_ill_v6( 4566 ire->ire_stq->q_ptr); 4567 else 4568 dst_ill = ip_newroute_get_dst_ill_v6( 4569 ire->ire_ipif->ipif_ill); 4570 } 4571 if (dst_ill == NULL) { 4572 if (ip_debug > 2) { 4573 pr_addr_dbg("ip_newroute_v6 : no dst " 4574 "ill for dst %s\n", 4575 AF_INET6, v6dstp); 4576 } 4577 goto icmp_err_ret; 4578 } else if (dst_ill->ill_group == NULL && ill != NULL && 4579 dst_ill != ill) { 4580 /* 4581 * If "ill" is not part of any group, we should 4582 * have found a route matching "ill" as we 4583 * called ire_ftable_lookup_v6 with 4584 * MATCH_IRE_ILL_GROUP. 4585 * Rather than asserting when there is a 4586 * mismatch, we just drop the packet. 4587 */ 4588 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4589 "dst_ill %s ill %s\n", 4590 dst_ill->ill_name, 4591 ill->ill_name)); 4592 goto icmp_err_ret; 4593 } 4594 } else { 4595 dst_ill = ire->ire_ipif->ipif_ill; 4596 /* For uniformity do refhold */ 4597 ill_refhold(dst_ill); 4598 /* 4599 * We should have found a route matching ill as we 4600 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4601 * Rather than asserting, while there is a mismatch, 4602 * we just drop the packet. 4603 */ 4604 if (dst_ill != ill) { 4605 ip0dbg(("ip_newroute_v6: Packet dropped as " 4606 "IP6I_ATTACH_IF ill is %s, " 4607 "ire->ire_ipif->ipif_ill is %s\n", 4608 ill->ill_name, 4609 dst_ill->ill_name)); 4610 goto icmp_err_ret; 4611 } 4612 } 4613 /* 4614 * Pick a source address which matches the scope of the 4615 * destination address. 4616 * For RTF_SETSRC routes, the source address is imposed by the 4617 * parent ire (sire). 4618 */ 4619 ASSERT(src_ipif == NULL); 4620 if (ire->ire_type == IRE_IF_RESOLVER && 4621 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4622 ip6_asp_can_lookup()) { 4623 /* 4624 * The ire cache entry we're adding is for the 4625 * gateway itself. The source address in this case 4626 * is relative to the gateway's address. 4627 */ 4628 ip6_asp_table_held = B_TRUE; 4629 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4630 B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid); 4631 if (src_ipif != NULL) 4632 ire_marks |= IRE_MARK_USESRC_CHECK; 4633 } else { 4634 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4635 /* 4636 * Check that the ipif matching the requested 4637 * source address still exists. 4638 */ 4639 src_ipif = ipif_lookup_addr_v6( 4640 &sire->ire_src_addr_v6, NULL, zoneid, 4641 NULL, NULL, NULL, NULL); 4642 } 4643 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4644 ip6_asp_table_held = B_TRUE; 4645 src_ipif = ipif_select_source_v6(dst_ill, 4646 v6dstp, B_FALSE, IPV6_PREFER_SRC_DEFAULT, 4647 zoneid); 4648 if (src_ipif != NULL) 4649 ire_marks |= IRE_MARK_USESRC_CHECK; 4650 } 4651 } 4652 4653 if (src_ipif == NULL) { 4654 if (ip_debug > 2) { 4655 /* ip1dbg */ 4656 pr_addr_dbg("ip_newroute_v6: no src for " 4657 "dst %s\n, ", AF_INET6, v6dstp); 4658 printf("ip_newroute_v6: interface name %s\n", 4659 dst_ill->ill_name); 4660 } 4661 goto icmp_err_ret; 4662 } 4663 4664 if (ip_debug > 3) { 4665 /* ip2dbg */ 4666 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4667 AF_INET6, &v6gw); 4668 } 4669 ip2dbg(("\tire type %s (%d)\n", 4670 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4671 4672 /* 4673 * At this point in ip_newroute_v6(), ire is either the 4674 * IRE_CACHE of the next-hop gateway for an off-subnet 4675 * destination or an IRE_INTERFACE type that should be used 4676 * to resolve an on-subnet destination or an on-subnet 4677 * next-hop gateway. 4678 * 4679 * In the IRE_CACHE case, we have the following : 4680 * 4681 * 1) src_ipif - used for getting a source address. 4682 * 4683 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4684 * means packets using this IRE_CACHE will go out on dst_ill. 4685 * 4686 * 3) The IRE sire will point to the prefix that is the longest 4687 * matching route for the destination. These prefix types 4688 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST, and 4689 * IRE_HOST_REDIRECT. 4690 * 4691 * The newly created IRE_CACHE entry for the off-subnet 4692 * destination is tied to both the prefix route and the 4693 * interface route used to resolve the next-hop gateway 4694 * via the ire_phandle and ire_ihandle fields, respectively. 4695 * 4696 * In the IRE_INTERFACE case, we have the following : 4697 * 4698 * 1) src_ipif - used for getting a source address. 4699 * 4700 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4701 * means packets using the IRE_CACHE that we will build 4702 * here will go out on dst_ill. 4703 * 4704 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4705 * to be created will only be tied to the IRE_INTERFACE that 4706 * was derived from the ire_ihandle field. 4707 * 4708 * If sire is non-NULL, it means the destination is off-link 4709 * and we will first create the IRE_CACHE for the gateway. 4710 * Next time through ip_newroute_v6, we will create the 4711 * IRE_CACHE for the final destination as described above. 4712 */ 4713 save_ire = ire; 4714 switch (ire->ire_type) { 4715 case IRE_CACHE: { 4716 ire_t *ipif_ire; 4717 4718 ASSERT(sire != NULL); 4719 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4720 mutex_enter(&ire->ire_lock); 4721 v6gw = ire->ire_gateway_addr_v6; 4722 mutex_exit(&ire->ire_lock); 4723 } 4724 /* 4725 * We need 3 ire's to create a new cache ire for an 4726 * off-link destination from the cache ire of the 4727 * gateway. 4728 * 4729 * 1. The prefix ire 'sire' 4730 * 2. The cache ire of the gateway 'ire' 4731 * 3. The interface ire 'ipif_ire' 4732 * 4733 * We have (1) and (2). We lookup (3) below. 4734 * 4735 * If there is no interface route to the gateway, 4736 * it is a race condition, where we found the cache 4737 * but the inteface route has been deleted. 4738 */ 4739 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4740 if (ipif_ire == NULL) { 4741 ip1dbg(("ip_newroute_v6:" 4742 "ire_ihandle_lookup_offlink_v6 failed\n")); 4743 goto icmp_err_ret; 4744 } 4745 /* 4746 * Assume DL_UNITDATA_REQ is same for all physical 4747 * interfaces in the ifgrp. If it isn't, this code will 4748 * have to be seriously rewhacked to allow the 4749 * fastpath probing (such that I cache the link 4750 * header in the IRE_CACHE) to work over ifgrps. 4751 * We have what we need to build an IRE_CACHE. 4752 */ 4753 /* 4754 * Note: the new ire inherits RTF_SETSRC 4755 * and RTF_MULTIRT to propagate these flags from prefix 4756 * to cache. 4757 */ 4758 ire = ire_create_v6( 4759 v6dstp, /* dest address */ 4760 &ipv6_all_ones, /* mask */ 4761 &src_ipif->ipif_v6src_addr, /* source address */ 4762 &v6gw, /* gateway address */ 4763 &save_ire->ire_max_frag, 4764 NULL, /* Fast Path header */ 4765 dst_ill->ill_rq, /* recv-from queue */ 4766 dst_ill->ill_wq, /* send-to queue */ 4767 IRE_CACHE, 4768 NULL, 4769 src_ipif, 4770 &sire->ire_mask_v6, /* Parent mask */ 4771 sire->ire_phandle, /* Parent handle */ 4772 ipif_ire->ire_ihandle, /* Interface handle */ 4773 sire->ire_flags & /* flags if any */ 4774 (RTF_SETSRC | RTF_MULTIRT), 4775 &(sire->ire_uinfo)); 4776 4777 if (ire == NULL) { 4778 ire_refrele(save_ire); 4779 ire_refrele(ipif_ire); 4780 break; 4781 } 4782 ire->ire_marks |= ire_marks; 4783 4784 /* 4785 * Prevent sire and ipif_ire from getting deleted. The 4786 * newly created ire is tied to both of them via the 4787 * phandle and ihandle respectively. 4788 */ 4789 IRB_REFHOLD(sire->ire_bucket); 4790 /* Has it been removed already ? */ 4791 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4792 IRB_REFRELE(sire->ire_bucket); 4793 ire_refrele(ipif_ire); 4794 ire_refrele(save_ire); 4795 break; 4796 } 4797 4798 IRB_REFHOLD(ipif_ire->ire_bucket); 4799 /* Has it been removed already ? */ 4800 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4801 IRB_REFRELE(ipif_ire->ire_bucket); 4802 IRB_REFRELE(sire->ire_bucket); 4803 ire_refrele(ipif_ire); 4804 ire_refrele(save_ire); 4805 break; 4806 } 4807 4808 xmit_mp = first_mp; 4809 if (ire->ire_flags & RTF_MULTIRT) { 4810 copy_mp = copymsg(first_mp); 4811 if (copy_mp != NULL) { 4812 xmit_mp = copy_mp; 4813 MULTIRT_DEBUG_TAG(first_mp); 4814 } 4815 } 4816 ire_add_then_send(q, ire, xmit_mp); 4817 if (ip6_asp_table_held) { 4818 ip6_asp_table_refrele(); 4819 ip6_asp_table_held = B_FALSE; 4820 } 4821 ire_refrele(save_ire); 4822 4823 /* Assert that sire is not deleted yet. */ 4824 ASSERT(sire->ire_ptpn != NULL); 4825 IRB_REFRELE(sire->ire_bucket); 4826 4827 /* Assert that ipif_ire is not deleted yet. */ 4828 ASSERT(ipif_ire->ire_ptpn != NULL); 4829 IRB_REFRELE(ipif_ire->ire_bucket); 4830 ire_refrele(ipif_ire); 4831 4832 if (copy_mp != NULL) { 4833 /* 4834 * Search for the next unresolved 4835 * multirt route. 4836 */ 4837 copy_mp = NULL; 4838 ipif_ire = NULL; 4839 ire = NULL; 4840 /* re-enter the loop */ 4841 multirt_resolve_next = B_TRUE; 4842 continue; 4843 } 4844 ire_refrele(sire); 4845 ill_refrele(dst_ill); 4846 ipif_refrele(src_ipif); 4847 return; 4848 } 4849 case IRE_IF_NORESOLVER: 4850 /* 4851 * We have what we need to build an IRE_CACHE. 4852 * 4853 * Create a new dlureq_mp with the IPv6 gateway 4854 * address in destination address in the DLPI hdr 4855 * if the physical length is exactly 16 bytes. 4856 */ 4857 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 4858 const in6_addr_t *addr; 4859 4860 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4861 addr = &v6gw; 4862 else 4863 addr = v6dstp; 4864 4865 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 4866 dst_ill->ill_phys_addr_length, 4867 dst_ill->ill_sap, 4868 dst_ill->ill_sap_length); 4869 } else { 4870 dlureq_mp = ire->ire_dlureq_mp; 4871 } 4872 if (dlureq_mp == NULL) 4873 break; 4874 4875 /* 4876 * Note: the new ire inherits sire flags RTF_SETSRC 4877 * and RTF_MULTIRT to propagate those rules from prefix 4878 * to cache. 4879 */ 4880 ire = ire_create_v6( 4881 v6dstp, /* dest address */ 4882 &ipv6_all_ones, /* mask */ 4883 &src_ipif->ipif_v6src_addr, /* source address */ 4884 &v6gw, /* gateway address */ 4885 &save_ire->ire_max_frag, 4886 NULL, /* Fast Path header */ 4887 dst_ill->ill_rq, /* recv-from queue */ 4888 dst_ill->ill_wq, /* send-to queue */ 4889 IRE_CACHE, 4890 dlureq_mp, 4891 src_ipif, 4892 &save_ire->ire_mask_v6, /* Parent mask */ 4893 (sire != NULL) ? /* Parent handle */ 4894 sire->ire_phandle : 0, 4895 save_ire->ire_ihandle, /* Interface handle */ 4896 (sire != NULL) ? /* flags if any */ 4897 sire->ire_flags & 4898 (RTF_SETSRC | RTF_MULTIRT) : 0, 4899 &(save_ire->ire_uinfo)); 4900 4901 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 4902 freeb(dlureq_mp); 4903 4904 if (ire == NULL) { 4905 ire_refrele(save_ire); 4906 break; 4907 } 4908 4909 ire->ire_marks |= ire_marks; 4910 4911 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4912 dst = v6gw; 4913 else 4914 dst = *v6dstp; 4915 err = ndp_noresolver(dst_ill, &dst); 4916 if (err != 0) { 4917 ire_refrele(save_ire); 4918 break; 4919 } 4920 4921 /* Prevent save_ire from getting deleted */ 4922 IRB_REFHOLD(save_ire->ire_bucket); 4923 /* Has it been removed already ? */ 4924 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 4925 IRB_REFRELE(save_ire->ire_bucket); 4926 ire_refrele(save_ire); 4927 break; 4928 } 4929 4930 xmit_mp = first_mp; 4931 /* 4932 * In case of MULTIRT, a copy of the current packet 4933 * to send is made to further re-enter the 4934 * loop and attempt another route resolution 4935 */ 4936 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4937 copy_mp = copymsg(first_mp); 4938 if (copy_mp != NULL) { 4939 xmit_mp = copy_mp; 4940 MULTIRT_DEBUG_TAG(first_mp); 4941 } 4942 } 4943 ire_add_then_send(q, ire, xmit_mp); 4944 if (ip6_asp_table_held) { 4945 ip6_asp_table_refrele(); 4946 ip6_asp_table_held = B_FALSE; 4947 } 4948 4949 /* Assert that it is not deleted yet. */ 4950 ASSERT(save_ire->ire_ptpn != NULL); 4951 IRB_REFRELE(save_ire->ire_bucket); 4952 ire_refrele(save_ire); 4953 4954 if (copy_mp != NULL) { 4955 /* 4956 * If we found a (no)resolver, we ignore any 4957 * trailing top priority IRE_CACHE in 4958 * further loops. This ensures that we do not 4959 * omit any (no)resolver despite the priority 4960 * in this call. 4961 * IRE_CACHE, if any, will be processed 4962 * by another thread entering ip_newroute(), 4963 * (on resolver response, for example). 4964 * We use this to force multiple parallel 4965 * resolution as soon as a packet needs to be 4966 * sent. The result is, after one packet 4967 * emission all reachable routes are generally 4968 * resolved. 4969 * Otherwise, complete resolution of MULTIRT 4970 * routes would require several emissions as 4971 * side effect. 4972 */ 4973 multirt_flags &= ~MULTIRT_CACHEGW; 4974 4975 /* 4976 * Search for the next unresolved multirt 4977 * route. 4978 */ 4979 copy_mp = NULL; 4980 save_ire = NULL; 4981 ire = NULL; 4982 /* re-enter the loop */ 4983 multirt_resolve_next = B_TRUE; 4984 continue; 4985 } 4986 4987 /* Don't need sire anymore */ 4988 if (sire != NULL) 4989 ire_refrele(sire); 4990 ill_refrele(dst_ill); 4991 ipif_refrele(src_ipif); 4992 return; 4993 4994 case IRE_IF_RESOLVER: 4995 /* 4996 * We can't build an IRE_CACHE yet, but at least we 4997 * found a resolver that can help. 4998 */ 4999 dst = *v6dstp; 5000 /* 5001 * To be at this point in the code with a non-zero gw 5002 * means that dst is reachable through a gateway that 5003 * we have never resolved. By changing dst to the gw 5004 * addr we resolve the gateway first. When 5005 * ire_add_then_send() tries to put the IP dg to dst, 5006 * it will reenter ip_newroute() at which time we will 5007 * find the IRE_CACHE for the gw and create another 5008 * IRE_CACHE above (for dst itself). 5009 */ 5010 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5011 save_dst = dst; 5012 dst = v6gw; 5013 v6gw = ipv6_all_zeros; 5014 } 5015 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5016 /* 5017 * Ask the external resolver to do its thing. 5018 * Make an mblk chain in the following form: 5019 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5020 */ 5021 mblk_t *ire_mp; 5022 mblk_t *areq_mp; 5023 areq_t *areq; 5024 in6_addr_t *addrp; 5025 5026 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5027 if (ip6_asp_table_held) { 5028 ip6_asp_table_refrele(); 5029 ip6_asp_table_held = B_FALSE; 5030 } 5031 ire = ire_create_mp_v6( 5032 &dst, /* dest address */ 5033 &ipv6_all_ones, /* mask */ 5034 &src_ipif->ipif_v6src_addr, 5035 /* source address */ 5036 &v6gw, /* gateway address */ 5037 NULL, /* Fast Path header */ 5038 dst_ill->ill_rq, /* recv-from queue */ 5039 dst_ill->ill_wq, /* send-to queue */ 5040 IRE_CACHE, 5041 NULL, 5042 src_ipif, 5043 &save_ire->ire_mask_v6, 5044 /* Parent mask */ 5045 0, 5046 save_ire->ire_ihandle, 5047 /* Interface handle */ 5048 0, /* flags if any */ 5049 &(save_ire->ire_uinfo)); 5050 5051 ire_refrele(save_ire); 5052 if (ire == NULL) { 5053 ip1dbg(("ip_newroute_v6:" 5054 "ire is NULL\n")); 5055 break; 5056 } 5057 if ((sire != NULL) && 5058 (sire->ire_flags & RTF_MULTIRT)) { 5059 /* 5060 * processing a copy of the packet to 5061 * send for further resolution loops 5062 */ 5063 copy_mp = copymsg(first_mp); 5064 if (copy_mp != NULL) 5065 MULTIRT_DEBUG_TAG(copy_mp); 5066 } 5067 ire->ire_marks |= ire_marks; 5068 ire_mp = ire->ire_mp; 5069 /* 5070 * Now create or find an nce for this interface. 5071 * The hw addr will need to to be set from 5072 * the reply to the AR_ENTRY_QUERY that 5073 * we're about to send. This will be done in 5074 * ire_add_v6(). 5075 */ 5076 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5077 switch (err) { 5078 case 0: 5079 /* 5080 * New cache entry created. 5081 * Break, then ask the external 5082 * resolver. 5083 */ 5084 break; 5085 case EINPROGRESS: 5086 /* 5087 * Resolution in progress; 5088 * packet has been queued by 5089 * ndp_resolver(). 5090 */ 5091 ire_delete(ire); 5092 ire = NULL; 5093 /* 5094 * Check if another multirt 5095 * route must be resolved. 5096 */ 5097 if (copy_mp != NULL) { 5098 /* 5099 * If we found a resolver, we 5100 * ignore any trailing top 5101 * priority IRE_CACHE in 5102 * further loops. The reason is 5103 * the same as for noresolver. 5104 */ 5105 multirt_flags &= 5106 ~MULTIRT_CACHEGW; 5107 /* 5108 * Search for the next 5109 * unresolved multirt route. 5110 */ 5111 first_mp = copy_mp; 5112 copy_mp = NULL; 5113 mp = first_mp; 5114 if (mp->b_datap->db_type == 5115 M_CTL) { 5116 mp = mp->b_cont; 5117 } 5118 ASSERT(sire != NULL); 5119 dst = save_dst; 5120 /* 5121 * re-enter the loop 5122 */ 5123 multirt_resolve_next = 5124 B_TRUE; 5125 continue; 5126 } 5127 5128 if (sire != NULL) 5129 ire_refrele(sire); 5130 ill_refrele(dst_ill); 5131 ipif_refrele(src_ipif); 5132 return; 5133 default: 5134 /* 5135 * Transient error; packet will be 5136 * freed. 5137 */ 5138 ire_delete(ire); 5139 ire = NULL; 5140 break; 5141 } 5142 if (err != 0) 5143 break; 5144 /* 5145 * Now set up the AR_ENTRY_QUERY and send it. 5146 */ 5147 areq_mp = ill_arp_alloc(dst_ill, 5148 (uchar_t *)&ipv6_areq_template, 5149 (caddr_t)&dst); 5150 if (areq_mp == NULL) { 5151 ip1dbg(("ip_newroute_v6:" 5152 "areq_mp is NULL\n")); 5153 freemsg(ire_mp); 5154 break; 5155 } 5156 areq = (areq_t *)areq_mp->b_rptr; 5157 addrp = (in6_addr_t *)((char *)areq + 5158 areq->areq_target_addr_offset); 5159 *addrp = dst; 5160 addrp = (in6_addr_t *)((char *)areq + 5161 areq->areq_sender_addr_offset); 5162 *addrp = src_ipif->ipif_v6src_addr; 5163 /* 5164 * link the chain, then send up to the resolver. 5165 */ 5166 linkb(areq_mp, ire_mp); 5167 linkb(areq_mp, mp); 5168 ip1dbg(("ip_newroute_v6:" 5169 "putnext to resolver\n")); 5170 putnext(dst_ill->ill_rq, areq_mp); 5171 /* 5172 * Check if another multirt route 5173 * must be resolved. 5174 */ 5175 ire = NULL; 5176 if (copy_mp != NULL) { 5177 /* 5178 * If we find a resolver, we ignore any 5179 * trailing top priority IRE_CACHE in 5180 * further loops. The reason is the 5181 * same as for noresolver. 5182 */ 5183 multirt_flags &= ~MULTIRT_CACHEGW; 5184 /* 5185 * Search for the next unresolved 5186 * multirt route. 5187 */ 5188 first_mp = copy_mp; 5189 copy_mp = NULL; 5190 mp = first_mp; 5191 if (mp->b_datap->db_type == M_CTL) { 5192 mp = mp->b_cont; 5193 } 5194 ASSERT(sire != NULL); 5195 dst = save_dst; 5196 /* 5197 * re-enter the loop 5198 */ 5199 multirt_resolve_next = B_TRUE; 5200 continue; 5201 } 5202 5203 if (sire != NULL) 5204 ire_refrele(sire); 5205 ill_refrele(dst_ill); 5206 ipif_refrele(src_ipif); 5207 return; 5208 } 5209 /* 5210 * Non-external resolver case. 5211 */ 5212 ire = ire_create_v6( 5213 &dst, /* dest address */ 5214 &ipv6_all_ones, /* mask */ 5215 &src_ipif->ipif_v6src_addr, /* source address */ 5216 &v6gw, /* gateway address */ 5217 &save_ire->ire_max_frag, 5218 NULL, /* Fast Path header */ 5219 dst_ill->ill_rq, /* recv-from queue */ 5220 dst_ill->ill_wq, /* send-to queue */ 5221 IRE_CACHE, 5222 NULL, 5223 src_ipif, 5224 &save_ire->ire_mask_v6, /* Parent mask */ 5225 0, 5226 save_ire->ire_ihandle, /* Interface handle */ 5227 0, /* flags if any */ 5228 &(save_ire->ire_uinfo)); 5229 5230 if (ire == NULL) { 5231 ire_refrele(save_ire); 5232 break; 5233 } 5234 5235 if ((sire != NULL) && 5236 (sire->ire_flags & RTF_MULTIRT)) { 5237 copy_mp = copymsg(first_mp); 5238 if (copy_mp != NULL) 5239 MULTIRT_DEBUG_TAG(copy_mp); 5240 } 5241 5242 ire->ire_marks |= ire_marks; 5243 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5244 switch (err) { 5245 case 0: 5246 /* Prevent save_ire from getting deleted */ 5247 IRB_REFHOLD(save_ire->ire_bucket); 5248 /* Has it been removed already ? */ 5249 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5250 IRB_REFRELE(save_ire->ire_bucket); 5251 ire_refrele(save_ire); 5252 break; 5253 } 5254 5255 /* 5256 * We have a resolved cache entry, 5257 * add in the IRE. 5258 */ 5259 ire_add_then_send(q, ire, first_mp); 5260 if (ip6_asp_table_held) { 5261 ip6_asp_table_refrele(); 5262 ip6_asp_table_held = B_FALSE; 5263 } 5264 5265 /* Assert that it is not deleted yet. */ 5266 ASSERT(save_ire->ire_ptpn != NULL); 5267 IRB_REFRELE(save_ire->ire_bucket); 5268 ire_refrele(save_ire); 5269 /* 5270 * Check if another multirt route 5271 * must be resolved. 5272 */ 5273 ire = NULL; 5274 if (copy_mp != NULL) { 5275 /* 5276 * If we find a resolver, we ignore any 5277 * trailing top priority IRE_CACHE in 5278 * further loops. The reason is the 5279 * same as for noresolver. 5280 */ 5281 multirt_flags &= ~MULTIRT_CACHEGW; 5282 /* 5283 * Search for the next unresolved 5284 * multirt route. 5285 */ 5286 first_mp = copy_mp; 5287 copy_mp = NULL; 5288 mp = first_mp; 5289 if (mp->b_datap->db_type == M_CTL) { 5290 mp = mp->b_cont; 5291 } 5292 ASSERT(sire != NULL); 5293 dst = save_dst; 5294 /* 5295 * re-enter the loop 5296 */ 5297 multirt_resolve_next = B_TRUE; 5298 continue; 5299 } 5300 5301 if (sire != NULL) 5302 ire_refrele(sire); 5303 ill_refrele(dst_ill); 5304 ipif_refrele(src_ipif); 5305 return; 5306 5307 case EINPROGRESS: 5308 /* 5309 * mp was consumed - presumably queued. 5310 * No need for ire, presumably resolution is 5311 * in progress, and ire will be added when the 5312 * address is resolved. 5313 */ 5314 if (ip6_asp_table_held) { 5315 ip6_asp_table_refrele(); 5316 ip6_asp_table_held = B_FALSE; 5317 } 5318 ASSERT(ire->ire_nce == NULL); 5319 ire_delete(ire); 5320 ire_refrele(save_ire); 5321 /* 5322 * Check if another multirt route 5323 * must be resolved. 5324 */ 5325 ire = NULL; 5326 if (copy_mp != NULL) { 5327 /* 5328 * If we find a resolver, we ignore any 5329 * trailing top priority IRE_CACHE in 5330 * further loops. The reason is the 5331 * same as for noresolver. 5332 */ 5333 multirt_flags &= ~MULTIRT_CACHEGW; 5334 /* 5335 * Search for the next unresolved 5336 * multirt route. 5337 */ 5338 first_mp = copy_mp; 5339 copy_mp = NULL; 5340 mp = first_mp; 5341 if (mp->b_datap->db_type == M_CTL) { 5342 mp = mp->b_cont; 5343 } 5344 ASSERT(sire != NULL); 5345 dst = save_dst; 5346 /* 5347 * re-enter the loop 5348 */ 5349 multirt_resolve_next = B_TRUE; 5350 continue; 5351 } 5352 if (sire != NULL) 5353 ire_refrele(sire); 5354 ill_refrele(dst_ill); 5355 ipif_refrele(src_ipif); 5356 return; 5357 default: 5358 /* Some transient error */ 5359 ASSERT(ire->ire_nce == NULL); 5360 ire_refrele(save_ire); 5361 break; 5362 } 5363 break; 5364 default: 5365 break; 5366 } 5367 if (ip6_asp_table_held) { 5368 ip6_asp_table_refrele(); 5369 ip6_asp_table_held = B_FALSE; 5370 } 5371 } while (multirt_resolve_next); 5372 5373 err_ret: 5374 ip1dbg(("ip_newroute_v6: dropped\n")); 5375 if (src_ipif != NULL) 5376 ipif_refrele(src_ipif); 5377 if (dst_ill != NULL) { 5378 need_rele = B_TRUE; 5379 ill = dst_ill; 5380 } 5381 if (ill != NULL) { 5382 if (mp->b_prev != NULL) { 5383 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 5384 } else { 5385 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 5386 } 5387 5388 if (need_rele) 5389 ill_refrele(ill); 5390 } else { 5391 if (mp->b_prev != NULL) { 5392 BUMP_MIB(&ip6_mib, ipv6InDiscards); 5393 } else { 5394 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 5395 } 5396 } 5397 /* Did this packet originate externally? */ 5398 if (mp->b_prev) { 5399 mp->b_next = NULL; 5400 mp->b_prev = NULL; 5401 } 5402 if (copy_mp != NULL) { 5403 MULTIRT_DEBUG_UNTAG(copy_mp); 5404 freemsg(copy_mp); 5405 } 5406 MULTIRT_DEBUG_UNTAG(first_mp); 5407 freemsg(first_mp); 5408 if (ire != NULL) 5409 ire_refrele(ire); 5410 if (sire != NULL) 5411 ire_refrele(sire); 5412 return; 5413 5414 icmp_err_ret: 5415 if (ip6_asp_table_held) 5416 ip6_asp_table_refrele(); 5417 if (src_ipif != NULL) 5418 ipif_refrele(src_ipif); 5419 if (dst_ill != NULL) { 5420 need_rele = B_TRUE; 5421 ill = dst_ill; 5422 } 5423 ip1dbg(("ip_newroute_v6: no route\n")); 5424 if (sire != NULL) 5425 ire_refrele(sire); 5426 /* 5427 * We need to set sire to NULL to avoid double freeing if we 5428 * ever goto err_ret from below. 5429 */ 5430 sire = NULL; 5431 ip6h = (ip6_t *)mp->b_rptr; 5432 /* Skip ip6i_t header if present */ 5433 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5434 /* Make sure the IPv6 header is present */ 5435 if ((mp->b_wptr - (uchar_t *)ip6h) < 5436 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5437 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5438 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5439 goto err_ret; 5440 } 5441 } 5442 mp->b_rptr += sizeof (ip6i_t); 5443 ip6h = (ip6_t *)mp->b_rptr; 5444 } 5445 /* Did this packet originate externally? */ 5446 if (mp->b_prev) { 5447 if (ill != NULL) { 5448 BUMP_MIB(ill->ill_ip6_mib, ipv6InNoRoutes); 5449 } else { 5450 BUMP_MIB(&ip6_mib, ipv6InNoRoutes); 5451 } 5452 mp->b_next = NULL; 5453 mp->b_prev = NULL; 5454 q = WR(q); 5455 } else { 5456 if (ill != NULL) { 5457 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 5458 } else { 5459 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 5460 } 5461 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5462 /* Failed */ 5463 if (copy_mp != NULL) { 5464 MULTIRT_DEBUG_UNTAG(copy_mp); 5465 freemsg(copy_mp); 5466 } 5467 MULTIRT_DEBUG_UNTAG(first_mp); 5468 freemsg(first_mp); 5469 if (ire != NULL) 5470 ire_refrele(ire); 5471 if (need_rele) 5472 ill_refrele(ill); 5473 return; 5474 } 5475 } 5476 5477 if (need_rele) 5478 ill_refrele(ill); 5479 5480 /* 5481 * At this point we will have ire only if RTF_BLACKHOLE 5482 * or RTF_REJECT flags are set on the IRE. It will not 5483 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5484 */ 5485 if (ire != NULL) { 5486 if (ire->ire_flags & RTF_BLACKHOLE) { 5487 ire_refrele(ire); 5488 if (copy_mp != NULL) { 5489 MULTIRT_DEBUG_UNTAG(copy_mp); 5490 freemsg(copy_mp); 5491 } 5492 MULTIRT_DEBUG_UNTAG(first_mp); 5493 freemsg(first_mp); 5494 return; 5495 } 5496 ire_refrele(ire); 5497 } 5498 if (ip_debug > 3) { 5499 /* ip2dbg */ 5500 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5501 AF_INET6, v6dstp); 5502 } 5503 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5504 B_FALSE, B_FALSE); 5505 } 5506 5507 /* 5508 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5509 * we need to send out a packet to a destination address for which we do not 5510 * have specific routing information. It is only used for multicast packets. 5511 * 5512 * If unspec_src we allow creating an IRE with source address zero. 5513 * ire_send_v6() will delete it after the packet is sent. 5514 */ 5515 void 5516 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5517 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5518 { 5519 ire_t *ire = NULL; 5520 ipif_t *src_ipif = NULL; 5521 int err = 0; 5522 ill_t *dst_ill = NULL; 5523 ire_t *save_ire; 5524 ushort_t ire_marks = 0; 5525 ipsec_out_t *io; 5526 ill_t *attach_ill = NULL; 5527 ill_t *ill; 5528 ip6_t *ip6h; 5529 mblk_t *first_mp; 5530 boolean_t ip6i_present; 5531 ire_t *fire = NULL; 5532 mblk_t *copy_mp = NULL; 5533 boolean_t multirt_resolve_next; 5534 in6_addr_t *v6dstp = &v6dst; 5535 boolean_t ipif_held = B_FALSE; 5536 boolean_t ill_held = B_FALSE; 5537 boolean_t ip6_asp_table_held = B_FALSE; 5538 5539 /* 5540 * This loop is run only once in most cases. 5541 * We loop to resolve further routes only when the destination 5542 * can be reached through multiple RTF_MULTIRT-flagged ires. 5543 */ 5544 do { 5545 multirt_resolve_next = B_FALSE; 5546 if (dst_ill != NULL) { 5547 ill_refrele(dst_ill); 5548 dst_ill = NULL; 5549 } 5550 5551 if (src_ipif != NULL) { 5552 ipif_refrele(src_ipif); 5553 src_ipif = NULL; 5554 } 5555 ASSERT(ipif != NULL); 5556 ill = ipif->ipif_ill; 5557 5558 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5559 if (ip_debug > 2) { 5560 /* ip1dbg */ 5561 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5562 AF_INET6, v6dstp); 5563 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5564 ill->ill_name, ipif->ipif_isv6); 5565 } 5566 5567 first_mp = mp; 5568 if (mp->b_datap->db_type == M_CTL) { 5569 mp = mp->b_cont; 5570 io = (ipsec_out_t *)first_mp->b_rptr; 5571 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5572 } else { 5573 io = NULL; 5574 } 5575 5576 /* 5577 * If the interface is a pt-pt interface we look for an 5578 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5579 * local_address and the pt-pt destination address. 5580 * Otherwise we just match the local address. 5581 */ 5582 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5583 goto err_ret; 5584 } 5585 /* 5586 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5587 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5588 * as it could be NULL. 5589 * 5590 * This information can appear either in an ip6i_t or an 5591 * IPSEC_OUT message. 5592 */ 5593 ip6h = (ip6_t *)mp->b_rptr; 5594 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5595 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5596 if (!ip6i_present || 5597 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5598 attach_ill = ip_grab_attach_ill(ill, first_mp, 5599 (ip6i_present ? 5600 ((ip6i_t *)ip6h)->ip6i_ifindex : 5601 io->ipsec_out_ill_index), B_TRUE); 5602 /* Failure case frees things for us. */ 5603 if (attach_ill == NULL) 5604 return; 5605 5606 /* 5607 * Check if we need an ire that will not be 5608 * looked up by anybody else i.e. HIDDEN. 5609 */ 5610 if (ill_is_probeonly(attach_ill)) 5611 ire_marks = IRE_MARK_HIDDEN; 5612 } 5613 } 5614 5615 /* 5616 * We check if an IRE_OFFSUBNET for the addr that goes through 5617 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5618 * RTF_MULTIRT flags must be honored. 5619 */ 5620 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5621 ip2dbg(("ip_newroute_ipif_v6: " 5622 "ipif_lookup_multi_ire_v6(" 5623 "ipif %p, dst %08x) = fire %p\n", 5624 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5625 (void *)fire)); 5626 5627 /* 5628 * If the application specified the ill (ifindex), we still 5629 * load spread. Only if the packets needs to go out specifically 5630 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5631 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5632 * multirouting, then we don't try to use a different ill for 5633 * load spreading. 5634 */ 5635 if (attach_ill == NULL) { 5636 /* 5637 * If the interface belongs to an interface group, 5638 * make sure the next possible interface in the group 5639 * is used. This encourages load spreading among peers 5640 * in an interface group. 5641 * 5642 * Note: While we pick a dst_ill we are really only 5643 * interested in the ill for load spreading. The source 5644 * ipif is determined by source address selection below. 5645 */ 5646 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5647 dst_ill = ipif->ipif_ill; 5648 /* For uniformity do a refhold */ 5649 ill_refhold(dst_ill); 5650 } else { 5651 /* refheld by ip_newroute_get_dst_ill_v6 */ 5652 dst_ill = 5653 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5654 } 5655 if (dst_ill == NULL) { 5656 if (ip_debug > 2) { 5657 pr_addr_dbg("ip_newroute_ipif_v6: " 5658 "no dst ill for dst %s\n", 5659 AF_INET6, v6dstp); 5660 } 5661 goto err_ret; 5662 } 5663 } else { 5664 dst_ill = ipif->ipif_ill; 5665 /* 5666 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5667 * and IPV6_BOUND_PIF case. 5668 */ 5669 ASSERT(dst_ill == attach_ill); 5670 /* attach_ill is already refheld */ 5671 } 5672 /* 5673 * Pick a source address which matches the scope of the 5674 * destination address. 5675 * For RTF_SETSRC routes, the source address is imposed by the 5676 * parent ire (fire). 5677 */ 5678 ASSERT(src_ipif == NULL); 5679 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5680 /* 5681 * Check that the ipif matching the requested source 5682 * address still exists. 5683 */ 5684 src_ipif = 5685 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5686 NULL, zoneid, NULL, NULL, NULL, NULL); 5687 } 5688 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5689 ip6_asp_table_held = B_TRUE; 5690 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5691 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5692 } 5693 5694 if (src_ipif == NULL) { 5695 if (!unspec_src) { 5696 if (ip_debug > 2) { 5697 /* ip1dbg */ 5698 pr_addr_dbg("ip_newroute_ipif_v6: " 5699 "no src for dst %s\n,", 5700 AF_INET6, v6dstp); 5701 printf(" through interface %s\n", 5702 dst_ill->ill_name); 5703 } 5704 goto err_ret; 5705 } 5706 /* Use any ipif for source */ 5707 for (src_ipif = dst_ill->ill_ipif; src_ipif != NULL; 5708 src_ipif = src_ipif->ipif_next) { 5709 if ((src_ipif->ipif_flags & IPIF_UP) && 5710 IN6_IS_ADDR_UNSPECIFIED( 5711 &src_ipif->ipif_v6src_addr)) 5712 break; 5713 } 5714 if (src_ipif == NULL) { 5715 if (ip_debug > 2) { 5716 /* ip1dbg */ 5717 pr_addr_dbg("ip_newroute_ipif_v6: " 5718 "no src for dst %s\n ", 5719 AF_INET6, v6dstp); 5720 printf("ip_newroute_ipif_v6: if %s" 5721 "(UNSPEC_SRC)\n", 5722 dst_ill->ill_name); 5723 } 5724 goto err_ret; 5725 } 5726 src_ipif = ipif; 5727 ipif_refhold(src_ipif); 5728 } 5729 ire = ipif_to_ire_v6(ipif); 5730 if (ire == NULL) { 5731 if (ip_debug > 2) { 5732 /* ip1dbg */ 5733 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5734 AF_INET6, &ipif->ipif_v6lcl_addr); 5735 printf("ip_newroute_ipif_v6: " 5736 "if %s\n", dst_ill->ill_name); 5737 } 5738 goto err_ret; 5739 } 5740 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5741 goto err_ret; 5742 5743 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5744 5745 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5746 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5747 if (ip_debug > 2) { 5748 /* ip1dbg */ 5749 pr_addr_dbg(" address %s\n", 5750 AF_INET6, &ire->ire_src_addr_v6); 5751 } 5752 save_ire = ire; 5753 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5754 (void *)ire, (void *)ipif)); 5755 5756 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5757 /* 5758 * an IRE_OFFSUBET was looked up 5759 * on that interface. 5760 * this ire has RTF_MULTIRT flag, 5761 * so the resolution loop 5762 * will be re-entered to resolve 5763 * additional routes on other 5764 * interfaces. For that purpose, 5765 * a copy of the packet is 5766 * made at this point. 5767 */ 5768 fire->ire_last_used_time = lbolt; 5769 copy_mp = copymsg(first_mp); 5770 if (copy_mp) { 5771 MULTIRT_DEBUG_TAG(copy_mp); 5772 } 5773 } 5774 5775 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5776 switch (ire->ire_type) { 5777 case IRE_IF_NORESOLVER: { 5778 /* We have what we need to build an IRE_CACHE. */ 5779 mblk_t *dlureq_mp; 5780 5781 /* 5782 * Create a new dlureq_mp with the 5783 * IPv6 gateway address in destination address in the 5784 * DLPI hdr if the physical length is exactly 16 bytes. 5785 */ 5786 ASSERT(dst_ill->ill_isv6); 5787 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5788 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5789 dst_ill->ill_phys_addr_length, 5790 dst_ill->ill_sap, 5791 dst_ill->ill_sap_length); 5792 } else { 5793 dlureq_mp = ire->ire_dlureq_mp; 5794 } 5795 5796 if (dlureq_mp == NULL) 5797 break; 5798 /* 5799 * The newly created ire will inherit the flags of the 5800 * parent ire, if any. 5801 */ 5802 ire = ire_create_v6( 5803 v6dstp, /* dest address */ 5804 &ipv6_all_ones, /* mask */ 5805 &src_ipif->ipif_v6src_addr, /* source address */ 5806 NULL, /* gateway address */ 5807 &save_ire->ire_max_frag, 5808 NULL, /* Fast Path header */ 5809 dst_ill->ill_rq, /* recv-from queue */ 5810 dst_ill->ill_wq, /* send-to queue */ 5811 IRE_CACHE, 5812 dlureq_mp, 5813 src_ipif, 5814 NULL, 5815 (fire != NULL) ? /* Parent handle */ 5816 fire->ire_phandle : 0, 5817 save_ire->ire_ihandle, /* Interface handle */ 5818 (fire != NULL) ? 5819 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5820 0, 5821 &ire_uinfo_null); 5822 5823 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 5824 freeb(dlureq_mp); 5825 5826 if (ire == NULL) { 5827 ire_refrele(save_ire); 5828 break; 5829 } 5830 5831 ire->ire_marks |= ire_marks; 5832 5833 err = ndp_noresolver(dst_ill, v6dstp); 5834 if (err != 0) { 5835 ire_refrele(save_ire); 5836 break; 5837 } 5838 5839 /* Prevent save_ire from getting deleted */ 5840 IRB_REFHOLD(save_ire->ire_bucket); 5841 /* Has it been removed already ? */ 5842 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5843 IRB_REFRELE(save_ire->ire_bucket); 5844 ire_refrele(save_ire); 5845 break; 5846 } 5847 5848 ire_add_then_send(q, ire, first_mp); 5849 if (ip6_asp_table_held) { 5850 ip6_asp_table_refrele(); 5851 ip6_asp_table_held = B_FALSE; 5852 } 5853 5854 /* Assert that it is not deleted yet. */ 5855 ASSERT(save_ire->ire_ptpn != NULL); 5856 IRB_REFRELE(save_ire->ire_bucket); 5857 ire_refrele(save_ire); 5858 if (fire != NULL) { 5859 ire_refrele(fire); 5860 fire = NULL; 5861 } 5862 5863 /* 5864 * The resolution loop is re-entered if we 5865 * actually are in a multirouting case. 5866 */ 5867 if (copy_mp != NULL) { 5868 boolean_t need_resolve = 5869 ire_multirt_need_resolve_v6(v6dstp); 5870 if (!need_resolve) { 5871 MULTIRT_DEBUG_UNTAG(copy_mp); 5872 freemsg(copy_mp); 5873 copy_mp = NULL; 5874 } else { 5875 /* 5876 * ipif_lookup_group_v6() calls 5877 * ire_lookup_multi_v6() that uses 5878 * ire_ftable_lookup_v6() to find 5879 * an IRE_INTERFACE for the group. 5880 * In the multirt case, 5881 * ire_lookup_multi_v6() then invokes 5882 * ire_multirt_lookup_v6() to find 5883 * the next resolvable ire. 5884 * As a result, we obtain a new 5885 * interface, derived from the 5886 * next ire. 5887 */ 5888 if (ipif_held) { 5889 ipif_refrele(ipif); 5890 ipif_held = B_FALSE; 5891 } 5892 ipif = ipif_lookup_group_v6(v6dstp, 5893 zoneid); 5894 ip2dbg(("ip_newroute_ipif: " 5895 "multirt dst %08x, ipif %p\n", 5896 ntohl(V4_PART_OF_V6((*v6dstp))), 5897 (void *)ipif)); 5898 if (ipif != NULL) { 5899 ipif_held = B_TRUE; 5900 mp = copy_mp; 5901 copy_mp = NULL; 5902 multirt_resolve_next = 5903 B_TRUE; 5904 continue; 5905 } else { 5906 freemsg(copy_mp); 5907 } 5908 } 5909 } 5910 ill_refrele(dst_ill); 5911 if (ipif_held) { 5912 ipif_refrele(ipif); 5913 ipif_held = B_FALSE; 5914 } 5915 if (src_ipif != NULL) 5916 ipif_refrele(src_ipif); 5917 return; 5918 } 5919 case IRE_IF_RESOLVER: { 5920 5921 ASSERT(dst_ill->ill_isv6); 5922 5923 /* 5924 * We obtain a partial IRE_CACHE which we will pass 5925 * along with the resolver query. When the response 5926 * comes back it will be there ready for us to add. 5927 */ 5928 /* 5929 * the newly created ire will inherit the flags of the 5930 * parent ire, if any. 5931 */ 5932 ire = ire_create_v6( 5933 v6dstp, /* dest address */ 5934 &ipv6_all_ones, /* mask */ 5935 &src_ipif->ipif_v6src_addr, /* source address */ 5936 NULL, /* gateway address */ 5937 &save_ire->ire_max_frag, 5938 NULL, /* Fast Path header */ 5939 dst_ill->ill_rq, /* recv-from queue */ 5940 dst_ill->ill_wq, /* send-to queue */ 5941 IRE_CACHE, 5942 NULL, 5943 src_ipif, 5944 NULL, 5945 (fire != NULL) ? /* Parent handle */ 5946 fire->ire_phandle : 0, 5947 save_ire->ire_ihandle, /* Interface handle */ 5948 (fire != NULL) ? 5949 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5950 0, 5951 &ire_uinfo_null); 5952 5953 if (ire == NULL) { 5954 ire_refrele(save_ire); 5955 break; 5956 } 5957 5958 ire->ire_marks |= ire_marks; 5959 5960 /* Resolve and add ire to the ctable */ 5961 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 5962 switch (err) { 5963 case 0: 5964 /* Prevent save_ire from getting deleted */ 5965 IRB_REFHOLD(save_ire->ire_bucket); 5966 /* Has it been removed already ? */ 5967 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5968 IRB_REFRELE(save_ire->ire_bucket); 5969 ire_refrele(save_ire); 5970 break; 5971 } 5972 /* 5973 * We have a resolved cache entry, 5974 * add in the IRE. 5975 */ 5976 ire_add_then_send(q, ire, first_mp); 5977 if (ip6_asp_table_held) { 5978 ip6_asp_table_refrele(); 5979 ip6_asp_table_held = B_FALSE; 5980 } 5981 5982 /* Assert that it is not deleted yet. */ 5983 ASSERT(save_ire->ire_ptpn != NULL); 5984 IRB_REFRELE(save_ire->ire_bucket); 5985 ire_refrele(save_ire); 5986 if (fire != NULL) { 5987 ire_refrele(fire); 5988 fire = NULL; 5989 } 5990 5991 /* 5992 * The resolution loop is re-entered if we 5993 * actually are in a multirouting case. 5994 */ 5995 if (copy_mp != NULL) { 5996 boolean_t need_resolve = 5997 ire_multirt_need_resolve_v6(v6dstp); 5998 if (!need_resolve) { 5999 MULTIRT_DEBUG_UNTAG(copy_mp); 6000 freemsg(copy_mp); 6001 copy_mp = NULL; 6002 } else { 6003 /* 6004 * ipif_lookup_group_v6() calls 6005 * ire_lookup_multi_v6() that 6006 * uses ire_ftable_lookup_v6() 6007 * to find an IRE_INTERFACE for 6008 * the group. In the multirt 6009 * case, ire_lookup_multi_v6() 6010 * then invokes 6011 * ire_multirt_lookup_v6() to 6012 * find the next resolvable ire. 6013 * As a result, we obtain a new 6014 * interface, derived from the 6015 * next ire. 6016 */ 6017 if (ipif_held) { 6018 ipif_refrele(ipif); 6019 ipif_held = B_FALSE; 6020 } 6021 ipif = ipif_lookup_group_v6( 6022 v6dstp, zoneid); 6023 ip2dbg(("ip_newroute_ipif: " 6024 "multirt dst %08x, " 6025 "ipif %p\n", 6026 ntohl(V4_PART_OF_V6( 6027 (*v6dstp))), 6028 (void *)ipif)); 6029 if (ipif != NULL) { 6030 ipif_held = B_TRUE; 6031 mp = copy_mp; 6032 copy_mp = NULL; 6033 multirt_resolve_next = 6034 B_TRUE; 6035 continue; 6036 } else { 6037 freemsg(copy_mp); 6038 } 6039 } 6040 } 6041 ill_refrele(dst_ill); 6042 if (ipif_held) { 6043 ipif_refrele(ipif); 6044 ipif_held = B_FALSE; 6045 } 6046 if (src_ipif != NULL) 6047 ipif_refrele(src_ipif); 6048 return; 6049 6050 case EINPROGRESS: 6051 /* 6052 * mp was consumed - presumably queued. 6053 * No need for ire, presumably resolution is 6054 * in progress, and ire will be added when the 6055 * address is resolved. 6056 */ 6057 if (ip6_asp_table_held) { 6058 ip6_asp_table_refrele(); 6059 ip6_asp_table_held = B_FALSE; 6060 } 6061 ire_delete(ire); 6062 ire_refrele(save_ire); 6063 if (fire != NULL) { 6064 ire_refrele(fire); 6065 fire = NULL; 6066 } 6067 6068 /* 6069 * The resolution loop is re-entered if we 6070 * actually are in a multirouting case. 6071 */ 6072 if (copy_mp != NULL) { 6073 boolean_t need_resolve = 6074 ire_multirt_need_resolve_v6(v6dstp); 6075 if (!need_resolve) { 6076 MULTIRT_DEBUG_UNTAG(copy_mp); 6077 freemsg(copy_mp); 6078 copy_mp = NULL; 6079 } else { 6080 /* 6081 * ipif_lookup_group_v6() calls 6082 * ire_lookup_multi_v6() that 6083 * uses ire_ftable_lookup_v6() 6084 * to find an IRE_INTERFACE for 6085 * the group. In the multirt 6086 * case, ire_lookup_multi_v6() 6087 * then invokes 6088 * ire_multirt_lookup_v6() to 6089 * find the next resolvable ire. 6090 * As a result, we obtain a new 6091 * interface, derived from the 6092 * next ire. 6093 */ 6094 if (ipif_held) { 6095 ipif_refrele(ipif); 6096 ipif_held = B_FALSE; 6097 } 6098 ipif = ipif_lookup_group_v6( 6099 v6dstp, zoneid); 6100 ip2dbg(("ip_newroute_ipif: " 6101 "multirt dst %08x, " 6102 "ipif %p\n", 6103 ntohl(V4_PART_OF_V6( 6104 (*v6dstp))), 6105 (void *)ipif)); 6106 if (ipif != NULL) { 6107 ipif_held = B_TRUE; 6108 mp = copy_mp; 6109 copy_mp = NULL; 6110 multirt_resolve_next = 6111 B_TRUE; 6112 continue; 6113 } else { 6114 freemsg(copy_mp); 6115 } 6116 } 6117 } 6118 ill_refrele(dst_ill); 6119 if (ipif_held) { 6120 ipif_refrele(ipif); 6121 ipif_held = B_FALSE; 6122 } 6123 if (src_ipif != NULL) 6124 ipif_refrele(src_ipif); 6125 return; 6126 default: 6127 /* Some transient error */ 6128 ire_refrele(save_ire); 6129 break; 6130 } 6131 break; 6132 } 6133 default: 6134 break; 6135 } 6136 if (ip6_asp_table_held) { 6137 ip6_asp_table_refrele(); 6138 ip6_asp_table_held = B_FALSE; 6139 } 6140 } while (multirt_resolve_next); 6141 6142 err_ret: 6143 if (ip6_asp_table_held) 6144 ip6_asp_table_refrele(); 6145 if (ire != NULL) 6146 ire_refrele(ire); 6147 if (fire != NULL) 6148 ire_refrele(fire); 6149 if (ipif != NULL && ipif_held) 6150 ipif_refrele(ipif); 6151 if (src_ipif != NULL) 6152 ipif_refrele(src_ipif); 6153 /* Multicast - no point in trying to generate ICMP error */ 6154 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6155 if (dst_ill != NULL) { 6156 ill = dst_ill; 6157 ill_held = B_TRUE; 6158 } 6159 if (mp->b_prev || mp->b_next) { 6160 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6161 } else { 6162 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 6163 } 6164 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6165 mp->b_next = NULL; 6166 mp->b_prev = NULL; 6167 freemsg(first_mp); 6168 if (ill_held) 6169 ill_refrele(ill); 6170 } 6171 6172 /* 6173 * Parse and process any hop-by-hop or destination options. 6174 * 6175 * Assumes that q is an ill read queue so that ICMP errors for link-local 6176 * destinations are sent out the correct interface. 6177 * 6178 * Returns -1 if there was an error and mp has been consumed. 6179 * Returns 0 if no special action is needed. 6180 * Returns 1 if the packet contained a router alert option for this node 6181 * which is verified to be "interesting/known" for our implementation. 6182 * 6183 * XXX Note: In future as more hbh or dest options are defined, 6184 * it may be better to have different routines for hbh and dest 6185 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6186 * may have same value in different namespaces. Or is it same namespace ?? 6187 * Current code checks for each opt_type (other than pads) if it is in 6188 * the expected nexthdr (hbh or dest) 6189 */ 6190 static int 6191 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6192 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6193 { 6194 uint8_t opt_type; 6195 uint_t optused; 6196 int ret = 0; 6197 mblk_t *first_mp; 6198 6199 first_mp = mp; 6200 if (mp->b_datap->db_type == M_CTL) { 6201 mp = mp->b_cont; 6202 } 6203 6204 while (optlen != 0) { 6205 opt_type = *optptr; 6206 if (opt_type == IP6OPT_PAD1) { 6207 optused = 1; 6208 } else { 6209 if (optlen < 2) 6210 goto bad_opt; 6211 switch (opt_type) { 6212 case IP6OPT_PADN: 6213 /* 6214 * Note:We don't verify that (N-2) pad octets 6215 * are zero as required by spec. Adhere to 6216 * "be liberal in what you accept..." part of 6217 * implementation philosophy (RFC791,RFC1122) 6218 */ 6219 optused = 2 + optptr[1]; 6220 if (optused > optlen) 6221 goto bad_opt; 6222 break; 6223 6224 case IP6OPT_JUMBO: 6225 if (hdr_type != IPPROTO_HOPOPTS) 6226 goto opt_error; 6227 goto opt_error; /* XXX Not implemented! */ 6228 6229 case IP6OPT_ROUTER_ALERT: { 6230 struct ip6_opt_router *or; 6231 6232 if (hdr_type != IPPROTO_HOPOPTS) 6233 goto opt_error; 6234 optused = 2 + optptr[1]; 6235 if (optused > optlen) 6236 goto bad_opt; 6237 or = (struct ip6_opt_router *)optptr; 6238 /* Check total length and alignment */ 6239 if (optused != sizeof (*or) || 6240 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6241 goto opt_error; 6242 /* Check value */ 6243 switch (*((uint16_t *)or->ip6or_value)) { 6244 case IP6_ALERT_MLD: 6245 case IP6_ALERT_RSVP: 6246 ret = 1; 6247 } 6248 break; 6249 } 6250 case IP6OPT_HOME_ADDRESS: { 6251 /* 6252 * Minimal support for the home address option 6253 * (which is required by all IPv6 nodes). 6254 * Implement by just swapping the home address 6255 * and source address. 6256 * XXX Note: this has IPsec implications since 6257 * AH needs to take this into account. 6258 * Also, when IPsec is used we need to ensure 6259 * that this is only processed once 6260 * in the received packet (to avoid swapping 6261 * back and forth). 6262 * NOTE:This option processing is considered 6263 * to be unsafe and prone to a denial of 6264 * service attack. 6265 * The current processing is not safe even with 6266 * IPsec secured IP packets. Since the home 6267 * address option processing requirement still 6268 * is in the IETF draft and in the process of 6269 * being redefined for its usage, it has been 6270 * decided to turn off the option by default. 6271 * If this section of code needs to be executed, 6272 * ndd variable ip6_ignore_home_address_opt 6273 * should be set to 0 at the user's own risk. 6274 */ 6275 struct ip6_opt_home_address *oh; 6276 in6_addr_t tmp; 6277 6278 if (ipv6_ignore_home_address_opt) 6279 goto opt_error; 6280 6281 if (hdr_type != IPPROTO_DSTOPTS) 6282 goto opt_error; 6283 optused = 2 + optptr[1]; 6284 if (optused > optlen) 6285 goto bad_opt; 6286 6287 /* 6288 * We did this dest. opt the first time 6289 * around (i.e. before AH processing). 6290 * If we've done AH... stop now. 6291 */ 6292 if (first_mp != mp) { 6293 ipsec_in_t *ii; 6294 6295 ii = (ipsec_in_t *)first_mp->b_rptr; 6296 if (ii->ipsec_in_ah_sa != NULL) 6297 break; 6298 } 6299 6300 oh = (struct ip6_opt_home_address *)optptr; 6301 /* Check total length and alignment */ 6302 if (optused < sizeof (*oh) || 6303 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6304 goto opt_error; 6305 /* Swap ip6_src and the home address */ 6306 tmp = ip6h->ip6_src; 6307 /* XXX Note: only 8 byte alignment option */ 6308 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6309 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6310 break; 6311 } 6312 6313 case IP6OPT_TUNNEL_LIMIT: 6314 if (hdr_type != IPPROTO_DSTOPTS) { 6315 goto opt_error; 6316 } 6317 optused = 2 + optptr[1]; 6318 if (optused > optlen) { 6319 goto bad_opt; 6320 } 6321 if (optused != 3) { 6322 goto opt_error; 6323 } 6324 break; 6325 6326 default: 6327 opt_error: 6328 ip1dbg(("ip_process_options_v6: bad opt 0x%x\n", 6329 opt_type)); 6330 switch (IP6OPT_TYPE(opt_type)) { 6331 case IP6OPT_TYPE_SKIP: 6332 optused = 2 + optptr[1]; 6333 if (optused > optlen) 6334 goto bad_opt; 6335 break; 6336 case IP6OPT_TYPE_DISCARD: 6337 freemsg(first_mp); 6338 return (-1); 6339 case IP6OPT_TYPE_ICMP: 6340 icmp_param_problem_v6(WR(q), first_mp, 6341 ICMP6_PARAMPROB_OPTION, 6342 (uint32_t)(optptr - 6343 (uint8_t *)ip6h), 6344 B_FALSE, B_FALSE); 6345 return (-1); 6346 case IP6OPT_TYPE_FORCEICMP: 6347 icmp_param_problem_v6(WR(q), first_mp, 6348 ICMP6_PARAMPROB_OPTION, 6349 (uint32_t)(optptr - 6350 (uint8_t *)ip6h), 6351 B_FALSE, B_TRUE); 6352 return (-1); 6353 } 6354 } 6355 } 6356 optlen -= optused; 6357 optptr += optused; 6358 } 6359 return (ret); 6360 6361 bad_opt: 6362 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6363 (uint32_t)(optptr - (uint8_t *)ip6h), 6364 B_FALSE, B_FALSE); 6365 return (-1); 6366 } 6367 6368 /* 6369 * Process a routing header that is not yet empty. 6370 * Only handles type 0 routing headers. 6371 */ 6372 static void 6373 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6374 ill_t *ill, uint_t flags, mblk_t *hada_mp) 6375 { 6376 ip6_rthdr0_t *rthdr; 6377 uint_t ehdrlen; 6378 uint_t numaddr; 6379 in6_addr_t *addrptr; 6380 in6_addr_t tmp; 6381 6382 ASSERT(rth->ip6r_segleft != 0); 6383 6384 if (!ipv6_forward_src_routed) { 6385 /* XXX Check for source routed out same interface? */ 6386 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 6387 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 6388 freemsg(hada_mp); 6389 freemsg(mp); 6390 return; 6391 } 6392 6393 if (rth->ip6r_type != 0) { 6394 if (hada_mp != NULL) 6395 goto hada_drop; 6396 icmp_param_problem_v6(WR(q), mp, 6397 ICMP6_PARAMPROB_HEADER, 6398 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6399 B_FALSE, B_FALSE); 6400 return; 6401 } 6402 rthdr = (ip6_rthdr0_t *)rth; 6403 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6404 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6405 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6406 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6407 if (rthdr->ip6r0_len & 0x1) { 6408 /* An odd length is impossible */ 6409 if (hada_mp != NULL) 6410 goto hada_drop; 6411 icmp_param_problem_v6(WR(q), mp, 6412 ICMP6_PARAMPROB_HEADER, 6413 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6414 B_FALSE, B_FALSE); 6415 return; 6416 } 6417 numaddr = rthdr->ip6r0_len / 2; 6418 if (rthdr->ip6r0_segleft > numaddr) { 6419 /* segleft exceeds number of addresses in routing header */ 6420 if (hada_mp != NULL) 6421 goto hada_drop; 6422 icmp_param_problem_v6(WR(q), mp, 6423 ICMP6_PARAMPROB_HEADER, 6424 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6425 (uchar_t *)ip6h), 6426 B_FALSE, B_FALSE); 6427 return; 6428 } 6429 addrptr += (numaddr - rthdr->ip6r0_segleft); 6430 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6431 IN6_IS_ADDR_MULTICAST(addrptr)) { 6432 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6433 freemsg(hada_mp); 6434 freemsg(mp); 6435 return; 6436 } 6437 /* Swap */ 6438 tmp = *addrptr; 6439 *addrptr = ip6h->ip6_dst; 6440 ip6h->ip6_dst = tmp; 6441 rthdr->ip6r0_segleft--; 6442 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6443 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6444 if (hada_mp != NULL) 6445 goto hada_drop; 6446 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6447 B_FALSE, B_FALSE); 6448 return; 6449 } 6450 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); 6451 return; 6452 hada_drop: 6453 /* IPsec kstats: bean counter? */ 6454 freemsg(hada_mp); 6455 freemsg(mp); 6456 } 6457 6458 /* 6459 * Read side put procedure for IPv6 module. 6460 */ 6461 static void 6462 ip_rput_v6(queue_t *q, mblk_t *mp) 6463 { 6464 mblk_t *mp1, *first_mp, *hada_mp = NULL; 6465 ip6_t *ip6h; 6466 boolean_t ll_multicast = B_FALSE, mctl_present = B_FALSE; 6467 ill_t *ill; 6468 struct iocblk *iocp; 6469 uint_t flags = 0; 6470 6471 ill = (ill_t *)q->q_ptr; 6472 if (ill->ill_state_flags & ILL_CONDEMNED) { 6473 union DL_primitives *dl; 6474 6475 dl = (union DL_primitives *)mp->b_rptr; 6476 /* 6477 * Things are opening or closing - only accept DLPI 6478 * ack messages. If the stream is closing and ip_wsrv 6479 * has completed, ip_close is out of the qwait, but has 6480 * not yet completed qprocsoff. Don't proceed any further 6481 * because the ill has been cleaned up and things hanging 6482 * off the ill have been freed. 6483 */ 6484 if ((mp->b_datap->db_type != M_PCPROTO) || 6485 (dl->dl_primitive == DL_UNITDATA_IND)) { 6486 inet_freemsg(mp); 6487 return; 6488 } 6489 } 6490 6491 switch (mp->b_datap->db_type) { 6492 case M_DATA: 6493 break; 6494 6495 case M_PROTO: 6496 case M_PCPROTO: 6497 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6498 DL_UNITDATA_IND) { 6499 /* Go handle anything other than data elsewhere. */ 6500 ip_rput_dlpi(q, mp); 6501 return; 6502 } 6503 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6504 ll_multicast = dlur->dl_group_address; 6505 #undef dlur 6506 /* Ditch the DLPI header. */ 6507 mp1 = mp; 6508 mp = mp->b_cont; 6509 freeb(mp1); 6510 break; 6511 case M_BREAK: 6512 panic("ip_rput_v6: got an M_BREAK"); 6513 /*NOTREACHED*/ 6514 case M_IOCACK: 6515 iocp = (struct iocblk *)mp->b_rptr; 6516 switch (iocp->ioc_cmd) { 6517 case DL_IOC_HDR_INFO: 6518 ill = (ill_t *)q->q_ptr; 6519 ill_fastpath_ack(ill, mp); 6520 return; 6521 case SIOCSTUNPARAM: 6522 case SIOCGTUNPARAM: 6523 case OSIOCSTUNPARAM: 6524 case OSIOCGTUNPARAM: 6525 /* Go through qwriter */ 6526 break; 6527 default: 6528 putnext(q, mp); 6529 return; 6530 } 6531 /* FALLTHRU */ 6532 case M_ERROR: 6533 case M_HANGUP: 6534 mutex_enter(&ill->ill_lock); 6535 if (ill->ill_state_flags & ILL_CONDEMNED) { 6536 mutex_exit(&ill->ill_lock); 6537 freemsg(mp); 6538 return; 6539 } 6540 ill_refhold_locked(ill); 6541 mutex_exit(&ill->ill_lock); 6542 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6543 return; 6544 case M_CTL: { 6545 if ((MBLKL(mp) > sizeof (int)) && 6546 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6547 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6548 mctl_present = B_TRUE; 6549 break; 6550 } 6551 putnext(q, mp); 6552 return; 6553 } 6554 case M_IOCNAK: 6555 iocp = (struct iocblk *)mp->b_rptr; 6556 switch (iocp->ioc_cmd) { 6557 case DL_IOC_HDR_INFO: 6558 case SIOCSTUNPARAM: 6559 case SIOCGTUNPARAM: 6560 case OSIOCSTUNPARAM: 6561 case OSIOCGTUNPARAM: 6562 mutex_enter(&ill->ill_lock); 6563 if (ill->ill_state_flags & ILL_CONDEMNED) { 6564 mutex_exit(&ill->ill_lock); 6565 freemsg(mp); 6566 return; 6567 } 6568 ill_refhold_locked(ill); 6569 mutex_exit(&ill->ill_lock); 6570 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6571 B_FALSE); 6572 return; 6573 default: 6574 break; 6575 } 6576 /* FALLTHRU */ 6577 default: 6578 putnext(q, mp); 6579 return; 6580 } 6581 6582 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 6583 /* 6584 * if db_ref > 1 then copymsg and free original. Packet may be 6585 * changed and do not want other entity who has a reference to this 6586 * message to trip over the changes. This is a blind change because 6587 * trying to catch all places that might change packet is too 6588 * difficult (since it may be a module above this one). 6589 */ 6590 if (mp->b_datap->db_ref > 1) { 6591 mblk_t *mp1; 6592 6593 mp1 = copymsg(mp); 6594 freemsg(mp); 6595 if (mp1 == NULL) { 6596 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6597 return; 6598 } 6599 mp = mp1; 6600 } 6601 first_mp = mp; 6602 if (mctl_present) { 6603 hada_mp = first_mp; 6604 mp = first_mp->b_cont; 6605 } 6606 6607 ip6h = (ip6_t *)mp->b_rptr; 6608 6609 /* check for alignment and full IPv6 header */ 6610 if (!OK_32PTR((uchar_t *)ip6h) || 6611 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6612 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6613 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6614 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6615 freemsg(first_mp); 6616 return; 6617 } 6618 ip6h = (ip6_t *)mp->b_rptr; 6619 } 6620 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6621 IPV6_DEFAULT_VERS_AND_FLOW) { 6622 /* 6623 * It may be a bit too expensive to do this mapped address 6624 * check here, but in the interest of robustness, it seems 6625 * like the correct place. 6626 * TODO: Avoid this check for e.g. connected TCP sockets 6627 */ 6628 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6629 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6630 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6631 freemsg(first_mp); 6632 return; 6633 } 6634 6635 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6636 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6637 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6638 freemsg(first_mp); 6639 return; 6640 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6641 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6642 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6643 freemsg(first_mp); 6644 return; 6645 } 6646 6647 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6648 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); 6649 } else { 6650 BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4); 6651 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6652 freemsg(first_mp); 6653 } 6654 } 6655 6656 /* 6657 * Walk through the IPv6 packet in mp and see if there's an AH header 6658 * in it. See if the AH header needs to get done before other headers in 6659 * the packet. (Worker function for ipsec_early_ah_v6().) 6660 */ 6661 #define IPSEC_HDR_DONT_PROCESS 0 6662 #define IPSEC_HDR_PROCESS 1 6663 #define IPSEC_MEMORY_ERROR 2 6664 static int 6665 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6666 { 6667 uint_t length; 6668 uint_t ehdrlen; 6669 uint8_t *whereptr; 6670 uint8_t *endptr; 6671 uint8_t *nexthdrp; 6672 ip6_dest_t *desthdr; 6673 ip6_rthdr_t *rthdr; 6674 ip6_t *ip6h; 6675 6676 /* 6677 * For now just pullup everything. In general, the less pullups, 6678 * the better, but there's so much squirrelling through anyway, 6679 * it's just easier this way. 6680 */ 6681 if (!pullupmsg(mp, -1)) { 6682 return (IPSEC_MEMORY_ERROR); 6683 } 6684 6685 ip6h = (ip6_t *)mp->b_rptr; 6686 length = IPV6_HDR_LEN; 6687 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6688 endptr = mp->b_wptr; 6689 6690 /* 6691 * We can't just use the argument nexthdr in the place 6692 * of nexthdrp becaue we don't dereference nexthdrp 6693 * till we confirm whether it is a valid address. 6694 */ 6695 nexthdrp = &ip6h->ip6_nxt; 6696 while (whereptr < endptr) { 6697 /* Is there enough left for len + nexthdr? */ 6698 if (whereptr + MIN_EHDR_LEN > endptr) 6699 return (IPSEC_MEMORY_ERROR); 6700 6701 switch (*nexthdrp) { 6702 case IPPROTO_HOPOPTS: 6703 case IPPROTO_DSTOPTS: 6704 /* Assumes the headers are identical for hbh and dst */ 6705 desthdr = (ip6_dest_t *)whereptr; 6706 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6707 if ((uchar_t *)desthdr + ehdrlen > endptr) 6708 return (IPSEC_MEMORY_ERROR); 6709 /* 6710 * Return DONT_PROCESS because of potential Mobile IPv6 6711 * cruft for destination options. 6712 */ 6713 if (*nexthdrp == IPPROTO_DSTOPTS) 6714 return (IPSEC_HDR_DONT_PROCESS); 6715 nexthdrp = &desthdr->ip6d_nxt; 6716 break; 6717 case IPPROTO_ROUTING: 6718 rthdr = (ip6_rthdr_t *)whereptr; 6719 6720 /* 6721 * If there's more hops left on the routing header, 6722 * return now with DON'T PROCESS. 6723 */ 6724 if (rthdr->ip6r_segleft > 0) 6725 return (IPSEC_HDR_DONT_PROCESS); 6726 6727 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6728 if ((uchar_t *)rthdr + ehdrlen > endptr) 6729 return (IPSEC_MEMORY_ERROR); 6730 nexthdrp = &rthdr->ip6r_nxt; 6731 break; 6732 case IPPROTO_FRAGMENT: 6733 /* Wait for reassembly */ 6734 return (IPSEC_HDR_DONT_PROCESS); 6735 case IPPROTO_AH: 6736 *nexthdr = IPPROTO_AH; 6737 return (IPSEC_HDR_PROCESS); 6738 case IPPROTO_NONE: 6739 /* No next header means we're finished */ 6740 default: 6741 return (IPSEC_HDR_DONT_PROCESS); 6742 } 6743 length += ehdrlen; 6744 whereptr += ehdrlen; 6745 } 6746 panic("ipsec_needs_processing_v6"); 6747 /*NOTREACHED*/ 6748 } 6749 6750 /* 6751 * Path for AH if options are present. If this is the first time we are 6752 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6753 * Otherwise, just fanout. Return value answers the boolean question: 6754 * "Did I consume the mblk you sent me?" 6755 * 6756 * Sometimes AH needs to be done before other IPv6 headers for security 6757 * reasons. This function (and its ipsec_needs_processing_v6() above) 6758 * indicates if that is so, and fans out to the appropriate IPsec protocol 6759 * for the datagram passed in. 6760 */ 6761 static boolean_t 6762 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6763 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 6764 { 6765 mblk_t *mp; 6766 uint8_t nexthdr; 6767 ipsec_in_t *ii = NULL; 6768 ah_t *ah; 6769 ipsec_status_t ipsec_rc; 6770 6771 ASSERT((hada_mp == NULL) || (!mctl_present)); 6772 6773 switch (ipsec_needs_processing_v6( 6774 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6775 case IPSEC_MEMORY_ERROR: 6776 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6777 freemsg(hada_mp); 6778 freemsg(first_mp); 6779 return (B_TRUE); 6780 case IPSEC_HDR_DONT_PROCESS: 6781 return (B_FALSE); 6782 } 6783 6784 /* Default means send it to AH! */ 6785 ASSERT(nexthdr == IPPROTO_AH); 6786 if (!mctl_present) { 6787 mp = first_mp; 6788 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 6789 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6790 "allocation failure.\n")); 6791 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6792 freemsg(hada_mp); 6793 freemsg(mp); 6794 return (B_TRUE); 6795 } 6796 /* 6797 * Store the ill_index so that when we come back 6798 * from IPSEC we ride on the same queue. 6799 */ 6800 ii = (ipsec_in_t *)first_mp->b_rptr; 6801 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 6802 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 6803 first_mp->b_cont = mp; 6804 } 6805 /* 6806 * Cache hardware acceleration info. 6807 */ 6808 if (hada_mp != NULL) { 6809 ASSERT(ii != NULL); 6810 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 6811 "caching data attr.\n")); 6812 ii->ipsec_in_accelerated = B_TRUE; 6813 ii->ipsec_in_da = hada_mp; 6814 } 6815 6816 if (!ipsec_loaded()) { 6817 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 6818 return (B_TRUE); 6819 } 6820 6821 ah = ipsec_inbound_ah_sa(first_mp); 6822 if (ah == NULL) 6823 return (B_TRUE); 6824 ASSERT(ii->ipsec_in_ah_sa != NULL); 6825 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 6826 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 6827 6828 switch (ipsec_rc) { 6829 case IPSEC_STATUS_SUCCESS: 6830 /* we're done with IPsec processing, send it up */ 6831 ip_fanout_proto_again(first_mp, ill, ill, ire); 6832 break; 6833 case IPSEC_STATUS_FAILED: 6834 BUMP_MIB(&ip6_mib, ipv6InDiscards); 6835 break; 6836 case IPSEC_STATUS_PENDING: 6837 /* no action needed */ 6838 break; 6839 } 6840 return (B_TRUE); 6841 } 6842 6843 /* 6844 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 6845 * ip_rput_v6 has already verified alignment, the min length, the version, 6846 * and db_ref = 1. 6847 * 6848 * The ill passed in (the arg named inill) is the ill that the packet 6849 * actually arrived on. We need to remember this when saving the 6850 * input interface index into potential IPV6_PKTINFO data in 6851 * ip_add_info_v6(). 6852 */ 6853 void 6854 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 6855 uint_t flags, mblk_t *hada_mp) 6856 { 6857 ire_t *ire = NULL; 6858 queue_t *rq; 6859 ill_t *ill = inill; 6860 ipif_t *ipif; 6861 uint8_t *whereptr; 6862 uint8_t nexthdr; 6863 uint16_t remlen; 6864 uint_t prev_nexthdr_offset; 6865 uint_t used; 6866 size_t pkt_len; 6867 uint16_t ip6_len; 6868 uint_t hdr_len; 6869 boolean_t mctl_present; 6870 mblk_t *first_mp; 6871 mblk_t *first_mp1; 6872 boolean_t no_forward; 6873 ip6_hbh_t *hbhhdr; 6874 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 6875 conn_t *connp; 6876 ilm_t *ilm; 6877 uint32_t ports; 6878 uint_t ipif_id = 0; 6879 zoneid_t zoneid = GLOBAL_ZONEID; 6880 uint16_t hck_flags, reass_hck_flags; 6881 uint32_t reass_sum; 6882 boolean_t cksum_err; 6883 mblk_t *mp1; 6884 6885 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 6886 6887 if (hada_mp != NULL) { 6888 /* 6889 * It's an IPsec accelerated packet. 6890 * Keep a pointer to the data attributes around until 6891 * we allocate the ipsecinfo structure. 6892 */ 6893 IPSECHW_DEBUG(IPSECHW_PKT, 6894 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 6895 hada_mp->b_cont = NULL; 6896 /* 6897 * Since it is accelerated, it came directly from 6898 * the ill. 6899 */ 6900 ASSERT(mctl_present == B_FALSE); 6901 ASSERT(mp->b_datap->db_type != M_CTL); 6902 } 6903 6904 ASSERT(OK_32PTR((uchar_t *)ip6h) && 6905 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 6906 6907 if (mp->b_cont == NULL) 6908 pkt_len = mp->b_wptr - mp->b_rptr; 6909 else 6910 pkt_len = msgdsize(mp); 6911 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6912 6913 /* 6914 * Check for bogus (too short packet) and packet which 6915 * was padded by the link layer. 6916 */ 6917 if (ip6_len != pkt_len) { 6918 ssize_t diff; 6919 6920 if (ip6_len > pkt_len) { 6921 ip1dbg(("ip_rput_data_v6: packet too short %d %lu\n", 6922 ip6_len, pkt_len)); 6923 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 6924 freemsg(hada_mp); 6925 freemsg(first_mp); 6926 return; 6927 } 6928 diff = (ssize_t)(pkt_len - ip6_len); 6929 6930 if (!adjmsg(mp, -diff)) { 6931 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 6932 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6933 freemsg(hada_mp); 6934 freemsg(first_mp); 6935 return; 6936 } 6937 pkt_len -= diff; 6938 } 6939 6940 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 6941 hck_flags = DB_CKSUMFLAGS(mp); 6942 else 6943 hck_flags = 0; 6944 6945 /* Clear checksum flags in case we need to forward */ 6946 DB_CKSUMFLAGS(mp) = 0; 6947 reass_sum = reass_hck_flags = 0; 6948 6949 nexthdr = ip6h->ip6_nxt; 6950 6951 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 6952 (uchar_t *)ip6h); 6953 whereptr = (uint8_t *)&ip6h[1]; 6954 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 6955 6956 /* Process hop by hop header options */ 6957 if (nexthdr == IPPROTO_HOPOPTS) { 6958 uint_t ehdrlen; 6959 uint8_t *optptr; 6960 6961 if (remlen < MIN_EHDR_LEN) 6962 goto pkt_too_short; 6963 if (mp->b_cont != NULL && 6964 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 6965 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 6966 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6967 freemsg(hada_mp); 6968 freemsg(first_mp); 6969 return; 6970 } 6971 ip6h = (ip6_t *)mp->b_rptr; 6972 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 6973 } 6974 hbhhdr = (ip6_hbh_t *)whereptr; 6975 nexthdr = hbhhdr->ip6h_nxt; 6976 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 6977 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 6978 6979 if (remlen < ehdrlen) 6980 goto pkt_too_short; 6981 if (mp->b_cont != NULL && 6982 whereptr + ehdrlen > mp->b_wptr) { 6983 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 6984 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6985 freemsg(hada_mp); 6986 freemsg(first_mp); 6987 return; 6988 } 6989 ip6h = (ip6_t *)mp->b_rptr; 6990 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 6991 hbhhdr = (ip6_hbh_t *)whereptr; 6992 } 6993 6994 optptr = whereptr + 2; 6995 whereptr += ehdrlen; 6996 remlen -= ehdrlen; 6997 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 6998 ehdrlen - 2, IPPROTO_HOPOPTS)) { 6999 case -1: 7000 /* 7001 * Packet has been consumed and any 7002 * needed ICMP messages sent. 7003 */ 7004 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7005 freemsg(hada_mp); 7006 return; 7007 case 0: 7008 /* no action needed */ 7009 break; 7010 case 1: 7011 /* Known router alert */ 7012 goto ipv6forus; 7013 } 7014 } 7015 7016 /* 7017 * On incoming v6 multicast packets we will bypass the ire table, 7018 * and assume that the read queue corresponds to the targetted 7019 * interface. 7020 * 7021 * The effect of this is the same as the IPv4 original code, but is 7022 * much cleaner I think. See ip_rput for how that was done. 7023 */ 7024 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7025 BUMP_MIB(ill->ill_ip6_mib, ipv6InMcastPkts); 7026 /* 7027 * XXX TODO Give to mrouted to for multicast forwarding. 7028 */ 7029 ILM_WALKER_HOLD(ill); 7030 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7031 ILM_WALKER_RELE(ill); 7032 if (ilm == NULL) { 7033 if (ip_debug > 3) { 7034 /* ip2dbg */ 7035 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7036 " which is not for us: %s\n", AF_INET6, 7037 &ip6h->ip6_dst); 7038 } 7039 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7040 freemsg(hada_mp); 7041 freemsg(first_mp); 7042 return; 7043 } 7044 if (ip_debug > 3) { 7045 /* ip2dbg */ 7046 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7047 AF_INET6, &ip6h->ip6_dst); 7048 } 7049 rq = ill->ill_rq; 7050 zoneid = GLOBAL_ZONEID; 7051 goto ipv6forus; 7052 } 7053 7054 ipif = ill->ill_ipif; 7055 7056 /* 7057 * If a packet was received on an interface that is a 6to4 tunnel, 7058 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7059 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7060 * the 6to4 prefix of the address configured on the receiving interface. 7061 * Otherwise, the packet was delivered to this interface in error and 7062 * the packet must be dropped. 7063 */ 7064 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7065 7066 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7067 &ip6h->ip6_dst)) { 7068 if (ip_debug > 2) { 7069 /* ip1dbg */ 7070 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7071 "addressed packet which is not for us: " 7072 "%s\n", AF_INET6, &ip6h->ip6_dst); 7073 } 7074 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7075 freemsg(first_mp); 7076 return; 7077 } 7078 } 7079 7080 /* 7081 * Find an ire that matches destination. For link-local addresses 7082 * we have to match the ill. 7083 * TBD for site local addresses. 7084 */ 7085 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7086 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7087 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, 7088 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7089 } else { 7090 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES); 7091 } 7092 if (ire == NULL) { 7093 /* 7094 * No matching IRE found. Mark this packet as having 7095 * originated externally. 7096 */ 7097 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7098 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7099 if (!(ill->ill_flags & ILLF_ROUTER)) 7100 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7101 freemsg(hada_mp); 7102 freemsg(first_mp); 7103 return; 7104 } 7105 if (ip6h->ip6_hops <= 1) { 7106 if (hada_mp != NULL) 7107 goto hada_drop; 7108 icmp_time_exceeded_v6(WR(q), first_mp, 7109 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7110 return; 7111 } 7112 /* 7113 * Per RFC 3513 section 2.5.2, we must not forward packets with 7114 * an unspecified source address. 7115 */ 7116 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7117 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7118 freemsg(hada_mp); 7119 freemsg(first_mp); 7120 return; 7121 } 7122 mp->b_prev = (mblk_t *)(uintptr_t) 7123 ill->ill_phyint->phyint_ifindex; 7124 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7125 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7126 ALL_ZONES); 7127 return; 7128 } 7129 ipif_id = ire->ire_ipif->ipif_seqid; 7130 /* we have a matching IRE */ 7131 if (ire->ire_stq != NULL) { 7132 ill_group_t *ill_group; 7133 ill_group_t *ire_group; 7134 7135 /* 7136 * To be quicker, we may wish not to chase pointers 7137 * (ire->ire_ipif->ipif_ill...) and instead store the 7138 * forwarding policy in the ire. An unfortunate side- 7139 * effect of this would be requiring an ire flush whenever 7140 * the ILLF_ROUTER flag changes. For now, chase pointers 7141 * once and store in the boolean no_forward. 7142 * 7143 * This appears twice to keep it out of the non-forwarding, 7144 * yes-it's-for-us-on-the-right-interface case. 7145 */ 7146 no_forward = ((ill->ill_flags & 7147 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7148 7149 7150 ASSERT(first_mp == mp); 7151 /* 7152 * This ire has a send-to queue - forward the packet. 7153 */ 7154 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7155 freemsg(hada_mp); 7156 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7157 if (no_forward) 7158 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7159 freemsg(mp); 7160 ire_refrele(ire); 7161 return; 7162 } 7163 if (ip6h->ip6_hops <= 1) { 7164 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7165 icmp_time_exceeded_v6(WR(q), mp, 7166 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7167 ire_refrele(ire); 7168 return; 7169 } 7170 /* 7171 * Per RFC 3513 section 2.5.2, we must not forward packets with 7172 * an unspecified source address. 7173 */ 7174 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7175 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7176 freemsg(hada_mp); 7177 freemsg(mp); 7178 ire_refrele(ire); 7179 return; 7180 } 7181 if (pkt_len > ire->ire_max_frag) { 7182 BUMP_MIB(ill->ill_ip6_mib, ipv6InTooBigErrors); 7183 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7184 ll_multicast, B_TRUE); 7185 ire_refrele(ire); 7186 return; 7187 } 7188 7189 /* 7190 * Check to see if we're forwarding the packet to a 7191 * different link from which it came. If so, check the 7192 * source and destination addresses since routers must not 7193 * forward any packets with link-local source or 7194 * destination addresses to other links. Otherwise (if 7195 * we're forwarding onto the same link), conditionally send 7196 * a redirect message. 7197 */ 7198 ill_group = ill->ill_group; 7199 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7200 if (ire->ire_rfq != q && (ill_group == NULL || 7201 ill_group != ire_group)) { 7202 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7203 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7204 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7205 freemsg(mp); 7206 ire_refrele(ire); 7207 return; 7208 } 7209 /* TBD add site-local check at site boundary? */ 7210 } else if (ipv6_send_redirects) { 7211 in6_addr_t *v6targ; 7212 in6_addr_t gw_addr_v6; 7213 ire_t *src_ire_v6 = NULL; 7214 7215 /* 7216 * Don't send a redirect when forwarding a source 7217 * routed packet. 7218 */ 7219 if (ip_source_routed_v6(ip6h, mp)) 7220 goto forward; 7221 7222 mutex_enter(&ire->ire_lock); 7223 gw_addr_v6 = ire->ire_gateway_addr_v6; 7224 mutex_exit(&ire->ire_lock); 7225 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7226 v6targ = &gw_addr_v6; 7227 /* 7228 * We won't send redirects to a router 7229 * that doesn't have a link local 7230 * address, but will forward. 7231 */ 7232 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7233 BUMP_MIB(ill->ill_ip6_mib, 7234 ipv6InAddrErrors); 7235 goto forward; 7236 } 7237 } else { 7238 v6targ = &ip6h->ip6_dst; 7239 } 7240 7241 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7242 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7243 ALL_ZONES, 0, MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7244 7245 if (src_ire_v6 != NULL) { 7246 /* 7247 * The source is directly connected. 7248 */ 7249 mp1 = copymsg(mp); 7250 if (mp1 != NULL) { 7251 icmp_send_redirect_v6(WR(q), 7252 mp1, v6targ, &ip6h->ip6_dst, 7253 ill, B_FALSE); 7254 } 7255 ire_refrele(src_ire_v6); 7256 } 7257 } 7258 7259 forward: 7260 /* Hoplimit verified above */ 7261 ip6h->ip6_hops--; 7262 UPDATE_IB_PKT_COUNT(ire); 7263 ire->ire_last_used_time = lbolt; 7264 BUMP_MIB(ill->ill_ip6_mib, ipv6OutForwDatagrams); 7265 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7266 IRE_REFRELE(ire); 7267 return; 7268 } 7269 rq = ire->ire_rfq; 7270 7271 /* 7272 * Need to put on correct queue for reassembly to find it. 7273 * No need to use put() since reassembly has its own locks. 7274 * Note: multicast packets and packets destined to addresses 7275 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7276 * the arriving ill. 7277 */ 7278 if (rq != q) { 7279 boolean_t check_multi = B_TRUE; 7280 ill_group_t *ill_group = NULL; 7281 ill_group_t *ire_group = NULL; 7282 ill_t *ire_ill = NULL; 7283 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7284 7285 /* 7286 * To be quicker, we may wish not to chase pointers 7287 * (ire->ire_ipif->ipif_ill...) and instead store the 7288 * forwarding policy in the ire. An unfortunate side- 7289 * effect of this would be requiring an ire flush whenever 7290 * the ILLF_ROUTER flag changes. For now, chase pointers 7291 * once and store in the boolean no_forward. 7292 */ 7293 no_forward = ((ill->ill_flags & 7294 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7295 7296 ill_group = ill->ill_group; 7297 if (rq != NULL) { 7298 ire_ill = (ill_t *)(rq->q_ptr); 7299 ire_group = ire_ill->ill_group; 7300 } 7301 7302 /* 7303 * If it's part of the same IPMP group, or if it's a legal 7304 * address on the 'usesrc' interface, then bypass strict 7305 * checks. 7306 */ 7307 if (ill_group != NULL && ill_group == ire_group) { 7308 check_multi = B_FALSE; 7309 } else if (ill_ifindex != 0 && ire_ill != NULL && 7310 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7311 check_multi = B_FALSE; 7312 } 7313 7314 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7315 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7316 /* 7317 * This packet came in on an interface other than the 7318 * one associated with the destination address 7319 * and we are strict about matches. 7320 * 7321 * As long as the ills belong to the same group, 7322 * we don't consider them to arriving on the wrong 7323 * interface. Thus, when the switch is doing inbound 7324 * load spreading, we won't drop packets when we 7325 * are doing strict multihoming checks. 7326 */ 7327 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7328 freemsg(hada_mp); 7329 freemsg(first_mp); 7330 ire_refrele(ire); 7331 return; 7332 } 7333 7334 if (rq != NULL) 7335 q = rq; 7336 7337 ill = (ill_t *)q->q_ptr; 7338 ASSERT(ill); 7339 } 7340 7341 zoneid = ire->ire_zoneid; 7342 UPDATE_IB_PKT_COUNT(ire); 7343 ire->ire_last_used_time = lbolt; 7344 /* Don't use the ire after this point. */ 7345 ire_refrele(ire); 7346 ipv6forus: 7347 /* 7348 * Looks like this packet is for us one way or another. 7349 * This is where we'll process destination headers etc. 7350 */ 7351 for (; ; ) { 7352 switch (nexthdr) { 7353 case IPPROTO_TCP: { 7354 uint16_t *up; 7355 uint32_t sum; 7356 int offset; 7357 7358 hdr_len = pkt_len - remlen; 7359 7360 if (hada_mp != NULL) { 7361 ip0dbg(("tcp hada drop\n")); 7362 goto hada_drop; 7363 } 7364 7365 7366 /* TCP needs all of the TCP header */ 7367 if (remlen < TCP_MIN_HEADER_LENGTH) 7368 goto pkt_too_short; 7369 if (mp->b_cont != NULL && 7370 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7371 if (!pullupmsg(mp, 7372 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7373 BUMP_MIB(ill->ill_ip6_mib, 7374 ipv6InDiscards); 7375 freemsg(first_mp); 7376 return; 7377 } 7378 hck_flags = 0; 7379 ip6h = (ip6_t *)mp->b_rptr; 7380 whereptr = (uint8_t *)ip6h + hdr_len; 7381 } 7382 /* 7383 * Extract the offset field from the TCP header. 7384 */ 7385 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7386 if (offset != 5) { 7387 if (offset < 5) { 7388 ip1dbg(("ip_rput_data_v6: short " 7389 "TCP data offset")); 7390 BUMP_MIB(ill->ill_ip6_mib, 7391 ipv6InDiscards); 7392 freemsg(first_mp); 7393 return; 7394 } 7395 /* 7396 * There must be TCP options. 7397 * Make sure we can grab them. 7398 */ 7399 offset <<= 2; 7400 if (remlen < offset) 7401 goto pkt_too_short; 7402 if (mp->b_cont != NULL && 7403 whereptr + offset > mp->b_wptr) { 7404 if (!pullupmsg(mp, 7405 hdr_len + offset)) { 7406 BUMP_MIB(ill->ill_ip6_mib, 7407 ipv6InDiscards); 7408 freemsg(first_mp); 7409 return; 7410 } 7411 hck_flags = 0; 7412 ip6h = (ip6_t *)mp->b_rptr; 7413 whereptr = (uint8_t *)ip6h + hdr_len; 7414 } 7415 } 7416 7417 up = (uint16_t *)&ip6h->ip6_src; 7418 /* 7419 * TCP checksum calculation. First sum up the 7420 * pseudo-header fields: 7421 * - Source IPv6 address 7422 * - Destination IPv6 address 7423 * - TCP payload length 7424 * - TCP protocol ID 7425 */ 7426 sum = htons(IPPROTO_TCP + remlen) + 7427 up[0] + up[1] + up[2] + up[3] + 7428 up[4] + up[5] + up[6] + up[7] + 7429 up[8] + up[9] + up[10] + up[11] + 7430 up[12] + up[13] + up[14] + up[15]; 7431 7432 /* Fold initial sum */ 7433 sum = (sum & 0xffff) + (sum >> 16); 7434 7435 mp1 = mp->b_cont; 7436 7437 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7438 IP6_STAT(ip6_in_sw_cksum); 7439 7440 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7441 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7442 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7443 mp, mp1, cksum_err); 7444 7445 if (cksum_err) { 7446 BUMP_MIB(&ip_mib, tcpInErrs); 7447 7448 if (hck_flags & HCK_FULLCKSUM) 7449 IP6_STAT(ip6_tcp_in_full_hw_cksum_err); 7450 else if (hck_flags & HCK_PARTIALCKSUM) 7451 IP6_STAT(ip6_tcp_in_part_hw_cksum_err); 7452 else 7453 IP6_STAT(ip6_tcp_in_sw_cksum_err); 7454 7455 freemsg(first_mp); 7456 return; 7457 } 7458 tcp_fanout: 7459 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7460 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7461 IP_FF_IP6INFO), hdr_len, mctl_present, zoneid); 7462 return; 7463 } 7464 case IPPROTO_SCTP: 7465 { 7466 sctp_hdr_t *sctph; 7467 uint32_t calcsum, pktsum; 7468 uint_t hdr_len = pkt_len - remlen; 7469 7470 /* SCTP needs all of the SCTP header */ 7471 if (remlen < sizeof (*sctph)) { 7472 goto pkt_too_short; 7473 } 7474 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7475 ASSERT(mp->b_cont != NULL); 7476 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7477 BUMP_MIB(ill->ill_ip6_mib, 7478 ipv6InDiscards); 7479 freemsg(mp); 7480 return; 7481 } 7482 ip6h = (ip6_t *)mp->b_rptr; 7483 whereptr = (uint8_t *)ip6h + hdr_len; 7484 } 7485 7486 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7487 /* checksum */ 7488 pktsum = sctph->sh_chksum; 7489 sctph->sh_chksum = 0; 7490 calcsum = sctp_cksum(mp, hdr_len); 7491 if (calcsum != pktsum) { 7492 BUMP_MIB(&sctp_mib, sctpChecksumError); 7493 freemsg(mp); 7494 return; 7495 } 7496 sctph->sh_chksum = pktsum; 7497 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7498 if ((connp = sctp_find_conn(&ip6h->ip6_src, 7499 &ip6h->ip6_dst, ports, ipif_id, zoneid)) == NULL) { 7500 ip_fanout_sctp_raw(first_mp, ill, 7501 (ipha_t *)ip6h, B_FALSE, ports, 7502 mctl_present, 7503 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), 7504 B_TRUE, ipif_id, zoneid); 7505 return; 7506 } 7507 BUMP_MIB(&ip_mib, ipInDelivers); 7508 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7509 B_FALSE, mctl_present); 7510 return; 7511 } 7512 case IPPROTO_UDP: { 7513 uint16_t *up; 7514 uint32_t sum; 7515 7516 hdr_len = pkt_len - remlen; 7517 7518 if (hada_mp != NULL) { 7519 ip0dbg(("udp hada drop\n")); 7520 goto hada_drop; 7521 } 7522 7523 /* Verify that at least the ports are present */ 7524 if (remlen < UDPH_SIZE) 7525 goto pkt_too_short; 7526 if (mp->b_cont != NULL && 7527 whereptr + UDPH_SIZE > mp->b_wptr) { 7528 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7529 BUMP_MIB(ill->ill_ip6_mib, 7530 ipv6InDiscards); 7531 freemsg(first_mp); 7532 return; 7533 } 7534 hck_flags = 0; 7535 ip6h = (ip6_t *)mp->b_rptr; 7536 whereptr = (uint8_t *)ip6h + hdr_len; 7537 } 7538 7539 /* 7540 * Before going through the regular checksum 7541 * calculation, make sure the received checksum 7542 * is non-zero. RFC 2460 says, a 0x0000 checksum 7543 * in a UDP packet (within IPv6 packet) is invalid 7544 * and should be replaced by 0xffff. This makes 7545 * sense as regular checksum calculation will 7546 * pass for both the cases i.e. 0x0000 and 0xffff. 7547 * Removing one of the case makes error detection 7548 * stronger. 7549 */ 7550 7551 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7552 /* 0x0000 checksum is invalid */ 7553 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7554 "checksum value 0x0000\n")); 7555 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7556 freemsg(first_mp); 7557 return; 7558 } 7559 7560 up = (uint16_t *)&ip6h->ip6_src; 7561 7562 /* 7563 * UDP checksum calculation. First sum up the 7564 * pseudo-header fields: 7565 * - Source IPv6 address 7566 * - Destination IPv6 address 7567 * - UDP payload length 7568 * - UDP protocol ID 7569 */ 7570 7571 sum = htons(IPPROTO_UDP + remlen) + 7572 up[0] + up[1] + up[2] + up[3] + 7573 up[4] + up[5] + up[6] + up[7] + 7574 up[8] + up[9] + up[10] + up[11] + 7575 up[12] + up[13] + up[14] + up[15]; 7576 7577 /* Fold initial sum */ 7578 sum = (sum & 0xffff) + (sum >> 16); 7579 7580 if (reass_hck_flags != 0) { 7581 hck_flags = reass_hck_flags; 7582 7583 IP_CKSUM_RECV_REASS(hck_flags, 7584 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7585 sum, reass_sum, cksum_err); 7586 } else { 7587 mp1 = mp->b_cont; 7588 7589 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7590 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7591 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7592 mp, mp1, cksum_err); 7593 } 7594 7595 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7596 IP6_STAT(ip6_in_sw_cksum); 7597 7598 if (cksum_err) { 7599 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7600 7601 if (hck_flags & HCK_FULLCKSUM) 7602 IP6_STAT(ip6_udp_in_full_hw_cksum_err); 7603 else if (hck_flags & HCK_PARTIALCKSUM) 7604 IP6_STAT(ip6_udp_in_part_hw_cksum_err); 7605 else 7606 IP6_STAT(ip6_udp_in_sw_cksum_err); 7607 7608 freemsg(first_mp); 7609 return; 7610 } 7611 goto udp_fanout; 7612 } 7613 case IPPROTO_ICMPV6: { 7614 uint16_t *up; 7615 uint32_t sum; 7616 uint_t hdr_len = pkt_len - remlen; 7617 7618 if (hada_mp != NULL) { 7619 ip0dbg(("icmp hada drop\n")); 7620 goto hada_drop; 7621 } 7622 7623 up = (uint16_t *)&ip6h->ip6_src; 7624 sum = htons(IPPROTO_ICMPV6 + remlen) + 7625 up[0] + up[1] + up[2] + up[3] + 7626 up[4] + up[5] + up[6] + up[7] + 7627 up[8] + up[9] + up[10] + up[11] + 7628 up[12] + up[13] + up[14] + up[15]; 7629 sum = (sum & 0xffff) + (sum >> 16); 7630 sum = IP_CSUM(mp, hdr_len, sum); 7631 if (sum != 0) { 7632 /* IPv6 ICMP checksum failed */ 7633 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7634 "failed %x\n", 7635 sum)); 7636 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7637 BUMP_MIB(ill->ill_icmp6_mib, 7638 ipv6IfIcmpInErrors); 7639 freemsg(first_mp); 7640 return; 7641 } 7642 7643 icmp_fanout: 7644 /* Check variable for testing applications */ 7645 if (ipv6_drop_inbound_icmpv6) { 7646 freemsg(first_mp); 7647 return; 7648 } 7649 /* 7650 * Assume that there is always at least one conn for 7651 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7652 * where there is no conn. 7653 */ 7654 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7655 ASSERT(!(ill->ill_phyint->phyint_flags & 7656 PHYI_LOOPBACK)); 7657 /* 7658 * In the multicast case, applications may have 7659 * joined the group from different zones, so we 7660 * need to deliver the packet to each of them. 7661 * Loop through the multicast memberships 7662 * structures (ilm) on the receive ill and send 7663 * a copy of the packet up each matching one. 7664 */ 7665 ILM_WALKER_HOLD(ill); 7666 for (ilm = ill->ill_ilm; ilm != NULL; 7667 ilm = ilm->ilm_next) { 7668 if (ilm->ilm_flags & ILM_DELETED) 7669 continue; 7670 if (!IN6_ARE_ADDR_EQUAL( 7671 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7672 continue; 7673 if (!ipif_lookup_zoneid(ill, 7674 ilm->ilm_zoneid, IPIF_UP, NULL)) 7675 continue; 7676 7677 first_mp1 = ip_copymsg(first_mp); 7678 if (first_mp1 == NULL) 7679 continue; 7680 icmp_inbound_v6(q, first_mp1, ill, 7681 hdr_len, mctl_present, 0, 7682 ilm->ilm_zoneid); 7683 } 7684 ILM_WALKER_RELE(ill); 7685 } else { 7686 first_mp1 = ip_copymsg(first_mp); 7687 if (first_mp1 != NULL) 7688 icmp_inbound_v6(q, first_mp1, ill, 7689 hdr_len, mctl_present, 0, zoneid); 7690 } 7691 } 7692 /* FALLTHRU */ 7693 default: { 7694 /* 7695 * Handle protocols with which IPv6 is less intimate. 7696 */ 7697 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IP6INFO; 7698 7699 if (hada_mp != NULL) { 7700 ip0dbg(("default hada drop\n")); 7701 goto hada_drop; 7702 } 7703 7704 /* 7705 * Enable sending ICMP for "Unknown" nexthdr 7706 * case. i.e. where we did not FALLTHRU from 7707 * IPPROTO_ICMPV6 processing case above. 7708 * If we did FALLTHRU, then the packet has already been 7709 * processed for IPPF, don't process it again in 7710 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7711 * flags 7712 */ 7713 if (nexthdr != IPPROTO_ICMPV6) 7714 proto_flags |= IP_FF_SEND_ICMP; 7715 else 7716 proto_flags |= IP6_NO_IPPOLICY; 7717 7718 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7719 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7720 mctl_present, zoneid); 7721 return; 7722 } 7723 7724 case IPPROTO_DSTOPTS: { 7725 uint_t ehdrlen; 7726 uint8_t *optptr; 7727 ip6_dest_t *desthdr; 7728 7729 /* Check if AH is present. */ 7730 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7731 ire, hada_mp, zoneid)) { 7732 ip0dbg(("dst early hada drop\n")); 7733 return; 7734 } 7735 7736 /* 7737 * Reinitialize pointers, as ipsec_early_ah_v6() does 7738 * complete pullups. We don't have to do more pullups 7739 * as a result. 7740 */ 7741 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7742 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7743 ip6h = (ip6_t *)mp->b_rptr; 7744 7745 if (remlen < MIN_EHDR_LEN) 7746 goto pkt_too_short; 7747 7748 desthdr = (ip6_dest_t *)whereptr; 7749 nexthdr = desthdr->ip6d_nxt; 7750 prev_nexthdr_offset = (uint_t)(whereptr - 7751 (uint8_t *)ip6h); 7752 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7753 if (remlen < ehdrlen) 7754 goto pkt_too_short; 7755 optptr = whereptr + 2; 7756 /* 7757 * Note: XXX This code does not seem to make 7758 * distinction between Destination Options Header 7759 * being before/after Routing Header which can 7760 * happen if we are at the end of source route. 7761 * This may become significant in future. 7762 * (No real significant Destination Options are 7763 * defined/implemented yet ). 7764 */ 7765 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7766 ehdrlen - 2, IPPROTO_DSTOPTS)) { 7767 case -1: 7768 /* 7769 * Packet has been consumed and any needed 7770 * ICMP errors sent. 7771 */ 7772 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7773 freemsg(hada_mp); 7774 return; 7775 case 0: 7776 /* No action needed continue */ 7777 break; 7778 case 1: 7779 /* 7780 * Unnexpected return value 7781 * (Router alert is a Hop-by-Hop option) 7782 */ 7783 #ifdef DEBUG 7784 panic("ip_rput_data_v6: router " 7785 "alert hbh opt indication in dest opt"); 7786 /*NOTREACHED*/ 7787 #else 7788 freemsg(hada_mp); 7789 freemsg(first_mp); 7790 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7791 return; 7792 #endif 7793 } 7794 used = ehdrlen; 7795 break; 7796 } 7797 case IPPROTO_FRAGMENT: { 7798 ip6_frag_t *fraghdr; 7799 size_t no_frag_hdr_len; 7800 7801 if (hada_mp != NULL) { 7802 ip0dbg(("frag hada drop\n")); 7803 goto hada_drop; 7804 } 7805 7806 ASSERT(first_mp == mp); 7807 if (remlen < sizeof (ip6_frag_t)) 7808 goto pkt_too_short; 7809 7810 if (mp->b_cont != NULL && 7811 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 7812 if (!pullupmsg(mp, 7813 pkt_len - remlen + sizeof (ip6_frag_t))) { 7814 BUMP_MIB(ill->ill_ip6_mib, 7815 ipv6InDiscards); 7816 freemsg(mp); 7817 return; 7818 } 7819 hck_flags = 0; 7820 ip6h = (ip6_t *)mp->b_rptr; 7821 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7822 } 7823 7824 fraghdr = (ip6_frag_t *)whereptr; 7825 used = (uint_t)sizeof (ip6_frag_t); 7826 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmReqds); 7827 7828 /* 7829 * Invoke the CGTP (multirouting) filtering module to 7830 * process the incoming packet. Packets identified as 7831 * duplicates must be discarded. Filtering is active 7832 * only if the the ip_cgtp_filter ndd variable is 7833 * non-zero. 7834 */ 7835 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 7836 int cgtp_flt_pkt = 7837 ip_cgtp_filter_ops->cfo_filter_v6( 7838 inill->ill_rq, ip6h, fraghdr); 7839 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 7840 freemsg(mp); 7841 return; 7842 } 7843 } 7844 7845 /* Restore the flags */ 7846 DB_CKSUMFLAGS(mp) = hck_flags; 7847 7848 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 7849 remlen - used, &prev_nexthdr_offset, 7850 &reass_sum, &reass_hck_flags); 7851 if (mp == NULL) { 7852 /* Reassembly is still pending */ 7853 return; 7854 } 7855 /* The first mblk are the headers before the frag hdr */ 7856 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmOKs); 7857 7858 first_mp = mp; /* mp has most likely changed! */ 7859 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 7860 ip6h = (ip6_t *)mp->b_rptr; 7861 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 7862 whereptr = mp->b_rptr + no_frag_hdr_len; 7863 remlen = ntohs(ip6h->ip6_plen) + 7864 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 7865 pkt_len = msgdsize(mp); 7866 used = 0; 7867 break; 7868 } 7869 case IPPROTO_HOPOPTS: 7870 if (hada_mp != NULL) { 7871 ip0dbg(("hop hada drop\n")); 7872 goto hada_drop; 7873 } 7874 /* 7875 * Illegal header sequence. 7876 * (Hop-by-hop headers are processed above 7877 * and required to immediately follow IPv6 header) 7878 */ 7879 icmp_param_problem_v6(WR(q), first_mp, 7880 ICMP6_PARAMPROB_NEXTHEADER, 7881 prev_nexthdr_offset, 7882 B_FALSE, B_FALSE); 7883 return; 7884 7885 case IPPROTO_ROUTING: { 7886 uint_t ehdrlen; 7887 ip6_rthdr_t *rthdr; 7888 7889 /* Check if AH is present. */ 7890 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7891 ire, hada_mp, zoneid)) { 7892 ip0dbg(("routing hada drop\n")); 7893 return; 7894 } 7895 7896 /* 7897 * Reinitialize pointers, as ipsec_early_ah_v6() does 7898 * complete pullups. We don't have to do more pullups 7899 * as a result. 7900 */ 7901 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7902 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7903 ip6h = (ip6_t *)mp->b_rptr; 7904 7905 if (remlen < MIN_EHDR_LEN) 7906 goto pkt_too_short; 7907 rthdr = (ip6_rthdr_t *)whereptr; 7908 nexthdr = rthdr->ip6r_nxt; 7909 prev_nexthdr_offset = (uint_t)(whereptr - 7910 (uint8_t *)ip6h); 7911 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7912 if (remlen < ehdrlen) 7913 goto pkt_too_short; 7914 if (rthdr->ip6r_segleft != 0) { 7915 /* Not end of source route */ 7916 if (ll_multicast) { 7917 BUMP_MIB(ill->ill_ip6_mib, 7918 ipv6ForwProhibits); 7919 freemsg(hada_mp); 7920 freemsg(mp); 7921 return; 7922 } 7923 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 7924 flags, hada_mp); 7925 return; 7926 } 7927 used = ehdrlen; 7928 break; 7929 } 7930 case IPPROTO_AH: 7931 case IPPROTO_ESP: { 7932 /* 7933 * Fast path for AH/ESP. If this is the first time 7934 * we are sending a datagram to AH/ESP, allocate 7935 * a IPSEC_IN message and prepend it. Otherwise, 7936 * just fanout. 7937 */ 7938 7939 ipsec_in_t *ii; 7940 int ipsec_rc; 7941 7942 if (!mctl_present) { 7943 ASSERT(first_mp == mp); 7944 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 7945 NULL) { 7946 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 7947 "allocation failure.\n")); 7948 BUMP_MIB(ill->ill_ip6_mib, 7949 ipv6InDiscards); 7950 freemsg(mp); 7951 return; 7952 } 7953 /* 7954 * Store the ill_index so that when we come back 7955 * from IPSEC we ride on the same queue. 7956 */ 7957 ii = (ipsec_in_t *)first_mp->b_rptr; 7958 ii->ipsec_in_ill_index = 7959 ill->ill_phyint->phyint_ifindex; 7960 ii->ipsec_in_rill_index = 7961 ii->ipsec_in_ill_index; 7962 first_mp->b_cont = mp; 7963 /* 7964 * Cache hardware acceleration info. 7965 */ 7966 if (hada_mp != NULL) { 7967 IPSECHW_DEBUG(IPSECHW_PKT, 7968 ("ip_rput_data_v6: " 7969 "caching data attr.\n")); 7970 ii->ipsec_in_accelerated = B_TRUE; 7971 ii->ipsec_in_da = hada_mp; 7972 hada_mp = NULL; 7973 } 7974 } else { 7975 ii = (ipsec_in_t *)first_mp->b_rptr; 7976 } 7977 7978 if (!ipsec_loaded()) { 7979 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 7980 ire->ire_zoneid); 7981 return; 7982 } 7983 7984 /* select inbound SA and have IPsec process the pkt */ 7985 if (nexthdr == IPPROTO_ESP) { 7986 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 7987 if (esph == NULL) 7988 return; 7989 ASSERT(ii->ipsec_in_esp_sa != NULL); 7990 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 7991 NULL); 7992 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 7993 first_mp, esph); 7994 } else { 7995 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 7996 if (ah == NULL) 7997 return; 7998 ASSERT(ii->ipsec_in_ah_sa != NULL); 7999 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8000 NULL); 8001 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8002 first_mp, ah); 8003 } 8004 8005 switch (ipsec_rc) { 8006 case IPSEC_STATUS_SUCCESS: 8007 break; 8008 case IPSEC_STATUS_FAILED: 8009 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8010 /* FALLTHRU */ 8011 case IPSEC_STATUS_PENDING: 8012 return; 8013 } 8014 /* we're done with IPsec processing, send it up */ 8015 ip_fanout_proto_again(first_mp, ill, inill, ire); 8016 return; 8017 } 8018 case IPPROTO_NONE: 8019 /* All processing is done. Count as "delivered". */ 8020 freemsg(hada_mp); 8021 freemsg(first_mp); 8022 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8023 return; 8024 } 8025 whereptr += used; 8026 ASSERT(remlen >= used); 8027 remlen -= used; 8028 } 8029 /* NOTREACHED */ 8030 8031 pkt_too_short: 8032 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8033 ip6_len, pkt_len, remlen)); 8034 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 8035 freemsg(hada_mp); 8036 freemsg(first_mp); 8037 return; 8038 udp_fanout: 8039 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8040 connp = NULL; 8041 } else { 8042 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8043 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8044 CONN_DEC_REF(connp); 8045 connp = NULL; 8046 } 8047 } 8048 8049 if (connp == NULL) { 8050 uint32_t ports; 8051 8052 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8053 UDP_PORTS_OFFSET); 8054 IP6_STAT(ip6_udp_slow_path); 8055 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8056 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), mctl_present, 8057 zoneid); 8058 return; 8059 } 8060 8061 if (CONN_UDP_FLOWCTLD(connp)) { 8062 freemsg(first_mp); 8063 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 8064 CONN_DEC_REF(connp); 8065 return; 8066 } 8067 8068 /* Initiate IPPF processing */ 8069 if (IP6_IN_IPP(flags)) { 8070 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8071 if (mp == NULL) { 8072 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8073 CONN_DEC_REF(connp); 8074 return; 8075 } 8076 } 8077 8078 if (connp->conn_ipv6_recvpktinfo || 8079 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8080 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8081 if (mp == NULL) { 8082 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8083 CONN_DEC_REF(connp); 8084 return; 8085 } 8086 } 8087 8088 IP6_STAT(ip6_udp_fast_path); 8089 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 8090 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8091 8092 /* Send it upstream */ 8093 CONN_UDP_RECV(connp, mp); 8094 8095 CONN_DEC_REF(connp); 8096 freemsg(hada_mp); 8097 return; 8098 8099 hada_drop: 8100 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8101 /* IPsec kstats: bump counter here */ 8102 freemsg(hada_mp); 8103 freemsg(first_mp); 8104 } 8105 8106 /* 8107 * Reassemble fragment. 8108 * When it returns a completed message the first mblk will only contain 8109 * the headers prior to the fragment header. 8110 * 8111 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8112 * of the preceding header. This is needed to patch the previous header's 8113 * nexthdr field when reassembly completes. 8114 */ 8115 static mblk_t * 8116 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8117 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8118 uint32_t *cksum_val, uint16_t *cksum_flags) 8119 { 8120 ill_t *ill = (ill_t *)q->q_ptr; 8121 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8122 uint16_t offset; 8123 boolean_t more_frags; 8124 uint8_t nexthdr = fraghdr->ip6f_nxt; 8125 in6_addr_t *v6dst_ptr; 8126 in6_addr_t *v6src_ptr; 8127 uint_t end; 8128 uint_t hdr_length; 8129 size_t count; 8130 ipf_t *ipf; 8131 ipf_t **ipfp; 8132 ipfb_t *ipfb; 8133 mblk_t *mp1; 8134 uint8_t ecn_info = 0; 8135 size_t msg_len; 8136 mblk_t *tail_mp; 8137 mblk_t *t_mp; 8138 boolean_t pruned = B_FALSE; 8139 uint32_t sum_val; 8140 uint16_t sum_flags; 8141 8142 8143 if (cksum_val != NULL) 8144 *cksum_val = 0; 8145 if (cksum_flags != NULL) 8146 *cksum_flags = 0; 8147 8148 /* 8149 * We utilize hardware computed checksum info only for UDP since 8150 * IP fragmentation is a normal occurence for the protocol. In 8151 * addition, checksum offload support for IP fragments carrying 8152 * UDP payload is commonly implemented across network adapters. 8153 */ 8154 ASSERT(ill != NULL); 8155 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8156 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8157 mblk_t *mp1 = mp->b_cont; 8158 int32_t len; 8159 8160 /* Record checksum information from the packet */ 8161 sum_val = (uint32_t)DB_CKSUM16(mp); 8162 sum_flags = DB_CKSUMFLAGS(mp); 8163 8164 /* fragmented payload offset from beginning of mblk */ 8165 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8166 8167 if ((sum_flags & HCK_PARTIALCKSUM) && 8168 (mp1 == NULL || mp1->b_cont == NULL) && 8169 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8170 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8171 uint32_t adj; 8172 /* 8173 * Partial checksum has been calculated by hardware 8174 * and attached to the packet; in addition, any 8175 * prepended extraneous data is even byte aligned. 8176 * If any such data exists, we adjust the checksum; 8177 * this would also handle any postpended data. 8178 */ 8179 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8180 mp, mp1, len, adj); 8181 8182 /* One's complement subtract extraneous checksum */ 8183 if (adj >= sum_val) 8184 sum_val = ~(adj - sum_val) & 0xFFFF; 8185 else 8186 sum_val -= adj; 8187 } 8188 } else { 8189 sum_val = 0; 8190 sum_flags = 0; 8191 } 8192 8193 /* Clear hardware checksumming flag */ 8194 DB_CKSUMFLAGS(mp) = 0; 8195 8196 /* 8197 * Note: Fragment offset in header is in 8-octet units. 8198 * Clearing least significant 3 bits not only extracts 8199 * it but also gets it in units of octets. 8200 */ 8201 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8202 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8203 8204 /* 8205 * Is the more frags flag on and the payload length not a multiple 8206 * of eight? 8207 */ 8208 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8209 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8210 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8211 (uint32_t)((char *)&ip6h->ip6_plen - 8212 (char *)ip6h), B_FALSE, B_FALSE); 8213 return (NULL); 8214 } 8215 8216 v6src_ptr = &ip6h->ip6_src; 8217 v6dst_ptr = &ip6h->ip6_dst; 8218 end = remlen; 8219 8220 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8221 end += offset; 8222 8223 /* 8224 * Would fragment cause reassembled packet to have a payload length 8225 * greater than IP_MAXPACKET - the max payload size? 8226 */ 8227 if (end > IP_MAXPACKET) { 8228 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8229 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8230 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8231 (char *)ip6h), B_FALSE, B_FALSE); 8232 return (NULL); 8233 } 8234 8235 /* 8236 * This packet just has one fragment. Reassembly not 8237 * needed. 8238 */ 8239 if (!more_frags && offset == 0) { 8240 goto reass_done; 8241 } 8242 8243 /* 8244 * Drop the fragmented as early as possible, if 8245 * we don't have resource(s) to re-assemble. 8246 */ 8247 if (ip_reass_queue_bytes == 0) { 8248 freemsg(mp); 8249 return (NULL); 8250 } 8251 8252 /* Record the ECN field info. */ 8253 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8254 /* 8255 * If this is not the first fragment, dump the unfragmentable 8256 * portion of the packet. 8257 */ 8258 if (offset) 8259 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8260 8261 /* 8262 * Fragmentation reassembly. Each ILL has a hash table for 8263 * queueing packets undergoing reassembly for all IPIFs 8264 * associated with the ILL. The hash is based on the packet 8265 * IP ident field. The ILL frag hash table was allocated 8266 * as a timer block at the time the ILL was created. Whenever 8267 * there is anything on the reassembly queue, the timer will 8268 * be running. 8269 */ 8270 msg_len = MBLKSIZE(mp); 8271 tail_mp = mp; 8272 while (tail_mp->b_cont != NULL) { 8273 tail_mp = tail_mp->b_cont; 8274 msg_len += MBLKSIZE(tail_mp); 8275 } 8276 /* 8277 * If the reassembly list for this ILL will get too big 8278 * prune it. 8279 */ 8280 8281 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8282 ip_reass_queue_bytes) { 8283 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8284 : (ip_reass_queue_bytes - msg_len)); 8285 pruned = B_TRUE; 8286 } 8287 8288 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8289 mutex_enter(&ipfb->ipfb_lock); 8290 8291 ipfp = &ipfb->ipfb_ipf; 8292 /* Try to find an existing fragment queue for this packet. */ 8293 for (;;) { 8294 ipf = ipfp[0]; 8295 if (ipf) { 8296 /* 8297 * It has to match on ident, source address, and 8298 * dest address. 8299 */ 8300 if (ipf->ipf_ident == ident && 8301 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8302 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8303 8304 /* 8305 * If we have received too many 8306 * duplicate fragments for this packet 8307 * free it. 8308 */ 8309 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8310 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8311 freemsg(mp); 8312 mutex_exit(&ipfb->ipfb_lock); 8313 return (NULL); 8314 } 8315 8316 break; 8317 } 8318 ipfp = &ipf->ipf_hash_next; 8319 continue; 8320 } 8321 8322 8323 /* 8324 * If we pruned the list, do we want to store this new 8325 * fragment?. We apply an optimization here based on the 8326 * fact that most fragments will be received in order. 8327 * So if the offset of this incoming fragment is zero, 8328 * it is the first fragment of a new packet. We will 8329 * keep it. Otherwise drop the fragment, as we have 8330 * probably pruned the packet already (since the 8331 * packet cannot be found). 8332 */ 8333 8334 if (pruned && offset != 0) { 8335 mutex_exit(&ipfb->ipfb_lock); 8336 freemsg(mp); 8337 return (NULL); 8338 } 8339 8340 /* New guy. Allocate a frag message. */ 8341 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8342 if (!mp1) { 8343 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8344 freemsg(mp); 8345 partial_reass_done: 8346 mutex_exit(&ipfb->ipfb_lock); 8347 return (NULL); 8348 } 8349 8350 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8351 /* 8352 * Too many fragmented packets in this hash bucket. 8353 * Free the oldest. 8354 */ 8355 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8356 } 8357 8358 mp1->b_cont = mp; 8359 8360 /* Initialize the fragment header. */ 8361 ipf = (ipf_t *)mp1->b_rptr; 8362 ipf->ipf_mp = mp1; 8363 ipf->ipf_ptphn = ipfp; 8364 ipfp[0] = ipf; 8365 ipf->ipf_hash_next = NULL; 8366 ipf->ipf_ident = ident; 8367 ipf->ipf_v6src = *v6src_ptr; 8368 ipf->ipf_v6dst = *v6dst_ptr; 8369 /* Record reassembly start time. */ 8370 ipf->ipf_timestamp = gethrestime_sec(); 8371 /* Record ipf generation and account for frag header */ 8372 ipf->ipf_gen = ill->ill_ipf_gen++; 8373 ipf->ipf_count = MBLKSIZE(mp1); 8374 ipf->ipf_protocol = nexthdr; 8375 ipf->ipf_nf_hdr_len = 0; 8376 ipf->ipf_prev_nexthdr_offset = 0; 8377 ipf->ipf_last_frag_seen = B_FALSE; 8378 ipf->ipf_ecn = ecn_info; 8379 ipf->ipf_num_dups = 0; 8380 ipfb->ipfb_frag_pkts++; 8381 ipf->ipf_checksum = 0; 8382 ipf->ipf_checksum_flags = 0; 8383 8384 /* Store checksum value in fragment header */ 8385 if (sum_flags != 0) { 8386 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8387 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8388 ipf->ipf_checksum = sum_val; 8389 ipf->ipf_checksum_flags = sum_flags; 8390 } 8391 8392 /* 8393 * We handle reassembly two ways. In the easy case, 8394 * where all the fragments show up in order, we do 8395 * minimal bookkeeping, and just clip new pieces on 8396 * the end. If we ever see a hole, then we go off 8397 * to ip_reassemble which has to mark the pieces and 8398 * keep track of the number of holes, etc. Obviously, 8399 * the point of having both mechanisms is so we can 8400 * handle the easy case as efficiently as possible. 8401 */ 8402 if (offset == 0) { 8403 /* Easy case, in-order reassembly so far. */ 8404 /* Update the byte count */ 8405 ipf->ipf_count += msg_len; 8406 ipf->ipf_tail_mp = tail_mp; 8407 /* 8408 * Keep track of next expected offset in 8409 * ipf_end. 8410 */ 8411 ipf->ipf_end = end; 8412 ipf->ipf_nf_hdr_len = hdr_length; 8413 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8414 } else { 8415 /* Hard case, hole at the beginning. */ 8416 ipf->ipf_tail_mp = NULL; 8417 /* 8418 * ipf_end == 0 means that we have given up 8419 * on easy reassembly. 8420 */ 8421 ipf->ipf_end = 0; 8422 8423 /* Forget checksum offload from now on */ 8424 ipf->ipf_checksum_flags = 0; 8425 8426 /* 8427 * ipf_hole_cnt is set by ip_reassemble. 8428 * ipf_count is updated by ip_reassemble. 8429 * No need to check for return value here 8430 * as we don't expect reassembly to complete or 8431 * fail for the first fragment itself. 8432 */ 8433 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8434 msg_len); 8435 } 8436 /* Update per ipfb and ill byte counts */ 8437 ipfb->ipfb_count += ipf->ipf_count; 8438 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8439 ill->ill_frag_count += ipf->ipf_count; 8440 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8441 /* If the frag timer wasn't already going, start it. */ 8442 mutex_enter(&ill->ill_lock); 8443 ill_frag_timer_start(ill); 8444 mutex_exit(&ill->ill_lock); 8445 goto partial_reass_done; 8446 } 8447 8448 /* 8449 * If the packet's flag has changed (it could be coming up 8450 * from an interface different than the previous, therefore 8451 * possibly different checksum capability), then forget about 8452 * any stored checksum states. Otherwise add the value to 8453 * the existing one stored in the fragment header. 8454 */ 8455 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8456 sum_val += ipf->ipf_checksum; 8457 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8458 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8459 ipf->ipf_checksum = sum_val; 8460 } else if (ipf->ipf_checksum_flags != 0) { 8461 /* Forget checksum offload from now on */ 8462 ipf->ipf_checksum_flags = 0; 8463 } 8464 8465 /* 8466 * We have a new piece of a datagram which is already being 8467 * reassembled. Update the ECN info if all IP fragments 8468 * are ECN capable. If there is one which is not, clear 8469 * all the info. If there is at least one which has CE 8470 * code point, IP needs to report that up to transport. 8471 */ 8472 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8473 if (ecn_info == IPH_ECN_CE) 8474 ipf->ipf_ecn = IPH_ECN_CE; 8475 } else { 8476 ipf->ipf_ecn = IPH_ECN_NECT; 8477 } 8478 8479 if (offset && ipf->ipf_end == offset) { 8480 /* The new fragment fits at the end */ 8481 ipf->ipf_tail_mp->b_cont = mp; 8482 /* Update the byte count */ 8483 ipf->ipf_count += msg_len; 8484 /* Update per ipfb and ill byte counts */ 8485 ipfb->ipfb_count += msg_len; 8486 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8487 ill->ill_frag_count += msg_len; 8488 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8489 if (more_frags) { 8490 /* More to come. */ 8491 ipf->ipf_end = end; 8492 ipf->ipf_tail_mp = tail_mp; 8493 goto partial_reass_done; 8494 } 8495 } else { 8496 /* 8497 * Go do the hard cases. 8498 * Call ip_reassemble(). 8499 */ 8500 int ret; 8501 8502 if (offset == 0) { 8503 if (ipf->ipf_prev_nexthdr_offset == 0) { 8504 ipf->ipf_nf_hdr_len = hdr_length; 8505 ipf->ipf_prev_nexthdr_offset = 8506 *prev_nexthdr_offset; 8507 } 8508 } 8509 /* Save current byte count */ 8510 count = ipf->ipf_count; 8511 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8512 8513 /* Count of bytes added and subtracted (freeb()ed) */ 8514 count = ipf->ipf_count - count; 8515 if (count) { 8516 /* Update per ipfb and ill byte counts */ 8517 ipfb->ipfb_count += count; 8518 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8519 ill->ill_frag_count += count; 8520 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8521 } 8522 if (ret == IP_REASS_PARTIAL) { 8523 goto partial_reass_done; 8524 } else if (ret == IP_REASS_FAILED) { 8525 /* Reassembly failed. Free up all resources */ 8526 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8527 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8528 IP_REASS_SET_START(t_mp, 0); 8529 IP_REASS_SET_END(t_mp, 0); 8530 } 8531 freemsg(mp); 8532 goto partial_reass_done; 8533 } 8534 8535 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8536 } 8537 /* 8538 * We have completed reassembly. Unhook the frag header from 8539 * the reassembly list. 8540 * 8541 * Grab the unfragmentable header length next header value out 8542 * of the first fragment 8543 */ 8544 ASSERT(ipf->ipf_nf_hdr_len != 0); 8545 hdr_length = ipf->ipf_nf_hdr_len; 8546 8547 /* 8548 * Before we free the frag header, record the ECN info 8549 * to report back to the transport. 8550 */ 8551 ecn_info = ipf->ipf_ecn; 8552 8553 /* 8554 * Store the nextheader field in the header preceding the fragment 8555 * header 8556 */ 8557 nexthdr = ipf->ipf_protocol; 8558 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8559 ipfp = ipf->ipf_ptphn; 8560 8561 /* We need to supply these to caller */ 8562 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8563 sum_val = ipf->ipf_checksum; 8564 else 8565 sum_val = 0; 8566 8567 mp1 = ipf->ipf_mp; 8568 count = ipf->ipf_count; 8569 ipf = ipf->ipf_hash_next; 8570 if (ipf) 8571 ipf->ipf_ptphn = ipfp; 8572 ipfp[0] = ipf; 8573 ill->ill_frag_count -= count; 8574 ASSERT(ipfb->ipfb_count >= count); 8575 ipfb->ipfb_count -= count; 8576 ipfb->ipfb_frag_pkts--; 8577 mutex_exit(&ipfb->ipfb_lock); 8578 /* Ditch the frag header. */ 8579 mp = mp1->b_cont; 8580 freeb(mp1); 8581 8582 /* 8583 * Make sure the packet is good by doing some sanity 8584 * check. If bad we can silentely drop the packet. 8585 */ 8586 reass_done: 8587 if (hdr_length < sizeof (ip6_frag_t)) { 8588 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8589 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8590 freemsg(mp); 8591 return (NULL); 8592 } 8593 8594 /* 8595 * Remove the fragment header from the initial header by 8596 * splitting the mblk into the non-fragmentable header and 8597 * everthing after the fragment extension header. This has the 8598 * side effect of putting all the headers that need destination 8599 * processing into the b_cont block-- on return this fact is 8600 * used in order to avoid having to look at the extensions 8601 * already processed. 8602 * 8603 * Note that this code assumes that the unfragmentable portion 8604 * of the header is in the first mblk and increments 8605 * the read pointer past it. If this assumption is broken 8606 * this code fails badly. 8607 */ 8608 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8609 mblk_t *nmp; 8610 8611 if (!(nmp = dupb(mp))) { 8612 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8613 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8614 freemsg(mp); 8615 return (NULL); 8616 } 8617 nmp->b_cont = mp->b_cont; 8618 mp->b_cont = nmp; 8619 nmp->b_rptr += hdr_length; 8620 } 8621 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8622 8623 ip6h = (ip6_t *)mp->b_rptr; 8624 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8625 8626 /* Restore original IP length in header. */ 8627 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8628 /* Record the ECN info. */ 8629 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8630 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8631 8632 /* Reassembly is successful; return checksum information if needed */ 8633 if (cksum_val != NULL) 8634 *cksum_val = sum_val; 8635 if (cksum_flags != NULL) 8636 *cksum_flags = sum_flags; 8637 8638 return (mp); 8639 } 8640 8641 /* 8642 * Walk through the options to see if there is a routing header. 8643 * If present get the destination which is the last address of 8644 * the option. 8645 */ 8646 in6_addr_t 8647 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8648 { 8649 uint8_t nexthdr; 8650 uint8_t *whereptr; 8651 ip6_hbh_t *hbhhdr; 8652 ip6_dest_t *dsthdr; 8653 ip6_rthdr0_t *rthdr; 8654 ip6_frag_t *fraghdr; 8655 int ehdrlen; 8656 int left; 8657 in6_addr_t *ap, rv; 8658 8659 if (is_fragment != NULL) 8660 *is_fragment = B_FALSE; 8661 8662 rv = ip6h->ip6_dst; 8663 8664 nexthdr = ip6h->ip6_nxt; 8665 whereptr = (uint8_t *)&ip6h[1]; 8666 for (;;) { 8667 8668 ASSERT(nexthdr != IPPROTO_RAW); 8669 switch (nexthdr) { 8670 case IPPROTO_HOPOPTS: 8671 hbhhdr = (ip6_hbh_t *)whereptr; 8672 nexthdr = hbhhdr->ip6h_nxt; 8673 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8674 break; 8675 case IPPROTO_DSTOPTS: 8676 dsthdr = (ip6_dest_t *)whereptr; 8677 nexthdr = dsthdr->ip6d_nxt; 8678 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8679 break; 8680 case IPPROTO_ROUTING: 8681 rthdr = (ip6_rthdr0_t *)whereptr; 8682 nexthdr = rthdr->ip6r0_nxt; 8683 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 8684 8685 left = rthdr->ip6r0_segleft; 8686 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 8687 rv = *(ap + left - 1); 8688 /* 8689 * If the caller doesn't care whether the packet 8690 * is a fragment or not, we can stop here since 8691 * we have our destination. 8692 */ 8693 if (is_fragment == NULL) 8694 goto done; 8695 break; 8696 case IPPROTO_FRAGMENT: 8697 fraghdr = (ip6_frag_t *)whereptr; 8698 nexthdr = fraghdr->ip6f_nxt; 8699 ehdrlen = sizeof (ip6_frag_t); 8700 if (is_fragment != NULL) 8701 *is_fragment = B_TRUE; 8702 goto done; 8703 default : 8704 goto done; 8705 } 8706 whereptr += ehdrlen; 8707 } 8708 8709 done: 8710 return (rv); 8711 } 8712 8713 /* 8714 * ip_source_routed_v6: 8715 * This function is called by redirect code in ip_rput_data_v6 to 8716 * know whether this packet is source routed through this node i.e 8717 * whether this node (router) is part of the journey. This 8718 * function is called under two cases : 8719 * 8720 * case 1 : Routing header was processed by this node and 8721 * ip_process_rthdr replaced ip6_dst with the next hop 8722 * and we are forwarding the packet to the next hop. 8723 * 8724 * case 2 : Routing header was not processed by this node and we 8725 * are just forwarding the packet. 8726 * 8727 * For case (1) we don't want to send redirects. For case(2) we 8728 * want to send redirects. 8729 */ 8730 static boolean_t 8731 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 8732 { 8733 uint8_t nexthdr; 8734 in6_addr_t *addrptr; 8735 ip6_rthdr0_t *rthdr; 8736 uint8_t numaddr; 8737 ip6_hbh_t *hbhhdr; 8738 uint_t ehdrlen; 8739 uint8_t *byteptr; 8740 8741 ip2dbg(("ip_source_routed_v6\n")); 8742 nexthdr = ip6h->ip6_nxt; 8743 ehdrlen = IPV6_HDR_LEN; 8744 8745 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 8746 while (nexthdr == IPPROTO_HOPOPTS || 8747 nexthdr == IPPROTO_DSTOPTS) { 8748 byteptr = (uint8_t *)ip6h + ehdrlen; 8749 /* 8750 * Check if we have already processed 8751 * packets or we are just a forwarding 8752 * router which only pulled up msgs up 8753 * to IPV6HDR and one HBH ext header 8754 */ 8755 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8756 ip2dbg(("ip_source_routed_v6: Extension" 8757 " headers not processed\n")); 8758 return (B_FALSE); 8759 } 8760 hbhhdr = (ip6_hbh_t *)byteptr; 8761 nexthdr = hbhhdr->ip6h_nxt; 8762 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 8763 } 8764 switch (nexthdr) { 8765 case IPPROTO_ROUTING: 8766 byteptr = (uint8_t *)ip6h + ehdrlen; 8767 /* 8768 * If for some reason, we haven't pulled up 8769 * the routing hdr data mblk, then we must 8770 * not have processed it at all. So for sure 8771 * we are not part of the source routed journey. 8772 */ 8773 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8774 ip2dbg(("ip_source_routed_v6: Routing" 8775 " header not processed\n")); 8776 return (B_FALSE); 8777 } 8778 rthdr = (ip6_rthdr0_t *)byteptr; 8779 /* 8780 * Either we are an intermediate router or the 8781 * last hop before destination and we have 8782 * already processed the routing header. 8783 * If segment_left is greater than or equal to zero, 8784 * then we must be the (numaddr - segleft) entry 8785 * of the routing header. Although ip6r0_segleft 8786 * is a unit8_t variable, we still check for zero 8787 * or greater value, if in case the data type 8788 * is changed someday in future. 8789 */ 8790 if (rthdr->ip6r0_segleft > 0 || 8791 rthdr->ip6r0_segleft == 0) { 8792 ire_t *ire = NULL; 8793 8794 numaddr = rthdr->ip6r0_len / 2; 8795 addrptr = (in6_addr_t *)((char *)rthdr + 8796 sizeof (*rthdr)); 8797 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 8798 if (addrptr != NULL) { 8799 ire = ire_ctable_lookup_v6(addrptr, NULL, 8800 IRE_LOCAL, NULL, ALL_ZONES, MATCH_IRE_TYPE); 8801 if (ire != NULL) { 8802 ire_refrele(ire); 8803 return (B_TRUE); 8804 } 8805 ip1dbg(("ip_source_routed_v6: No ire found\n")); 8806 } 8807 } 8808 /* FALLTHRU */ 8809 default: 8810 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 8811 return (B_FALSE); 8812 } 8813 } 8814 8815 /* 8816 * ip_wput_v6 -- Packets sent down from transport modules show up here. 8817 * Assumes that the following set of headers appear in the first 8818 * mblk: 8819 * ip6i_t (if present) CAN also appear as a separate mblk. 8820 * ip6_t 8821 * Any extension headers 8822 * TCP/UDP/SCTP header (if present) 8823 * The routine can handle an ICMPv6 header that is not in the first mblk. 8824 * 8825 * The order to determine the outgoing interface is as follows: 8826 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 8827 * 2. If conn_nofailover_ill is set then use that ill. 8828 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 8829 * 4. If q is an ill queue and (link local or multicast destination) then 8830 * use that ill. 8831 * 5. If IPV6_BOUND_IF has been set use that ill. 8832 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 8833 * look for the best IRE match for the unspecified group to determine 8834 * the ill. 8835 * 7. For unicast: Just do an IRE lookup for the best match. 8836 */ 8837 void 8838 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 8839 { 8840 conn_t *connp = NULL; 8841 queue_t *q = (queue_t *)arg2; 8842 ire_t *ire = NULL; 8843 ire_t *sctp_ire = NULL; 8844 ip6_t *ip6h; 8845 in6_addr_t *v6dstp; 8846 ill_t *ill = NULL; 8847 ipif_t *ipif; 8848 ip6i_t *ip6i; 8849 int cksum_request; /* -1 => normal. */ 8850 /* 1 => Skip TCP/UDP/SCTP checksum */ 8851 /* Otherwise contains insert offset for checksum */ 8852 int unspec_src; 8853 boolean_t do_outrequests; /* Increment OutRequests? */ 8854 mib2_ipv6IfStatsEntry_t *mibptr; 8855 int match_flags = MATCH_IRE_ILL_GROUP; 8856 boolean_t attach_if = B_FALSE; 8857 mblk_t *first_mp; 8858 boolean_t mctl_present; 8859 ipsec_out_t *io; 8860 boolean_t drop_if_delayed = B_FALSE; 8861 boolean_t multirt_need_resolve = B_FALSE; 8862 mblk_t *copy_mp = NULL; 8863 int err; 8864 int ip6i_flags = 0; 8865 zoneid_t zoneid; 8866 ill_t *saved_ill = NULL; 8867 boolean_t conn_lock_held; 8868 boolean_t need_decref = B_FALSE; 8869 8870 /* 8871 * Highest bit in version field is Reachability Confirmation bit 8872 * used by NUD in ip_xmit_v6(). 8873 */ 8874 #ifdef _BIG_ENDIAN 8875 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 8876 #else 8877 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 8878 #endif 8879 8880 /* 8881 * M_CTL comes from 5 places 8882 * 8883 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 8884 * both V4 and V6 datagrams. 8885 * 8886 * 2) AH/ESP sends down M_CTL after doing their job with both 8887 * V4 and V6 datagrams. 8888 * 8889 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 8890 * attached. 8891 * 8892 * 4) Notifications from an external resolver (for XRESOLV ifs) 8893 * 8894 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 8895 * IPsec hardware acceleration support. 8896 * 8897 * We need to handle (1)'s IPv6 case and (3) here. For the 8898 * IPv4 case in (1), and (2), IPSEC processing has already 8899 * started. The code in ip_wput() already knows how to handle 8900 * continuing IPSEC processing (for IPv4 and IPv6). All other 8901 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 8902 * for handling. 8903 */ 8904 first_mp = mp; 8905 mctl_present = B_FALSE; 8906 io = NULL; 8907 8908 /* Multidata transmit? */ 8909 if (DB_TYPE(mp) == M_MULTIDATA) { 8910 /* 8911 * We should never get here, since all Multidata messages 8912 * originating from tcp should have been directed over to 8913 * tcp_multisend() in the first place. 8914 */ 8915 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 8916 freemsg(mp); 8917 return; 8918 } else if (DB_TYPE(mp) == M_CTL) { 8919 uint32_t mctltype = 0; 8920 uint32_t mlen = MBLKL(first_mp); 8921 8922 mp = mp->b_cont; 8923 mctl_present = B_TRUE; 8924 io = (ipsec_out_t *)first_mp->b_rptr; 8925 8926 /* 8927 * Validate this M_CTL message. The only three types of 8928 * M_CTL messages we expect to see in this code path are 8929 * ipsec_out_t or ipsec_in_t structures (allocated as 8930 * ipsec_info_t unions), or ipsec_ctl_t structures. 8931 * The ipsec_out_type and ipsec_in_type overlap in the two 8932 * data structures, and they are either set to IPSEC_OUT 8933 * or IPSEC_IN depending on which data structure it is. 8934 * ipsec_ctl_t is an IPSEC_CTL. 8935 * 8936 * All other M_CTL messages are sent to ip_wput_nondata() 8937 * for handling. 8938 */ 8939 if (mlen >= sizeof (io->ipsec_out_type)) 8940 mctltype = io->ipsec_out_type; 8941 8942 if ((mlen == sizeof (ipsec_ctl_t)) && 8943 (mctltype == IPSEC_CTL)) { 8944 ip_output(Q_TO_CONN(q), first_mp, q, caller); 8945 return; 8946 } 8947 8948 if ((mlen < sizeof (ipsec_info_t)) || 8949 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 8950 mp == NULL) { 8951 ip_wput_nondata(NULL, q, first_mp, NULL); 8952 return; 8953 } 8954 /* NDP callbacks have q_next non-NULL. That's case #3. */ 8955 if (q->q_next == NULL) { 8956 ip6h = (ip6_t *)mp->b_rptr; 8957 /* 8958 * For a freshly-generated TCP dgram that needs IPV6 8959 * processing, don't call ip_wput immediately. We can 8960 * tell this by the ipsec_out_proc_begin. In-progress 8961 * IPSEC_OUT messages have proc_begin set to TRUE, 8962 * and we want to send all IPSEC_IN messages to 8963 * ip_wput() for IPsec processing or finishing. 8964 */ 8965 if (mctltype == IPSEC_IN || 8966 IPVER(ip6h) != IPV6_VERSION || 8967 io->ipsec_out_proc_begin) { 8968 mibptr = &ip6_mib; 8969 goto notv6; 8970 } 8971 } 8972 } else if (DB_TYPE(mp) != M_DATA) { 8973 ip_wput_nondata(NULL, q, mp, NULL); 8974 return; 8975 } 8976 8977 ip6h = (ip6_t *)mp->b_rptr; 8978 8979 if (IPVER(ip6h) != IPV6_VERSION) { 8980 mibptr = &ip6_mib; 8981 goto notv6; 8982 } 8983 8984 if (q->q_next != NULL) { 8985 ill = (ill_t *)q->q_ptr; 8986 /* 8987 * We don't know if this ill will be used for IPv6 8988 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 8989 * ipif_set_values() sets the ill_isv6 flag to true if 8990 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 8991 * just drop the packet. 8992 */ 8993 if (!ill->ill_isv6) { 8994 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 8995 "ILLF_IPV6 was set\n")); 8996 freemsg(first_mp); 8997 return; 8998 } 8999 /* For uniformity do a refhold */ 9000 mutex_enter(&ill->ill_lock); 9001 if (!ILL_CAN_LOOKUP(ill)) { 9002 mutex_exit(&ill->ill_lock); 9003 freemsg(first_mp); 9004 return; 9005 } 9006 ill_refhold_locked(ill); 9007 mutex_exit(&ill->ill_lock); 9008 mibptr = ill->ill_ip6_mib; 9009 /* 9010 * ill_ip6_mib is allocated by ipif_set_values() when 9011 * ill_isv6 is set. Thus if ill_isv6 is true, 9012 * ill_ip6_mib had better not be NULL. 9013 */ 9014 ASSERT(mibptr != NULL); 9015 unspec_src = 0; 9016 BUMP_MIB(mibptr, ipv6OutRequests); 9017 do_outrequests = B_FALSE; 9018 } else { 9019 connp = (conn_t *)arg; 9020 ASSERT(connp != NULL); 9021 9022 /* is queue flow controlled? */ 9023 if ((q->q_first || connp->conn_draining) && 9024 (caller == IP_WPUT)) { 9025 /* 9026 * 1) TCP sends down M_CTL for detached connections. 9027 * 2) AH/ESP sends down M_CTL. 9028 * 9029 * We don't flow control either of the above. Only 9030 * UDP and others are flow controlled for which we 9031 * can't have a M_CTL. 9032 */ 9033 ASSERT(first_mp == mp); 9034 (void) putq(q, mp); 9035 return; 9036 } 9037 mibptr = &ip6_mib; 9038 unspec_src = connp->conn_unspec_src; 9039 do_outrequests = B_TRUE; 9040 if (mp->b_flag & MSGHASREF) { 9041 mp->b_flag &= ~MSGHASREF; 9042 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9043 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9044 need_decref = B_TRUE; 9045 } 9046 9047 /* 9048 * If there is a policy, try to attach an ipsec_out in 9049 * the front. At the end, first_mp either points to a 9050 * M_DATA message or IPSEC_OUT message linked to a 9051 * M_DATA message. We have to do it now as we might 9052 * lose the "conn" if we go through ip_newroute. 9053 */ 9054 if (!mctl_present && 9055 (connp->conn_out_enforce_policy || 9056 connp->conn_latch != NULL)) { 9057 ASSERT(first_mp == mp); 9058 /* XXX Any better way to get the protocol fast ? */ 9059 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 9060 connp->conn_ulp)) == NULL)) { 9061 if (need_decref) 9062 CONN_DEC_REF(connp); 9063 return; 9064 } else { 9065 ASSERT(mp->b_datap->db_type == M_CTL); 9066 first_mp = mp; 9067 mp = mp->b_cont; 9068 mctl_present = B_TRUE; 9069 io = (ipsec_out_t *)first_mp->b_rptr; 9070 } 9071 } 9072 } 9073 9074 /* check for alignment and full IPv6 header */ 9075 if (!OK_32PTR((uchar_t *)ip6h) || 9076 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9077 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9078 if (do_outrequests) 9079 BUMP_MIB(mibptr, ipv6OutRequests); 9080 BUMP_MIB(mibptr, ipv6OutDiscards); 9081 freemsg(first_mp); 9082 if (ill != NULL) 9083 ill_refrele(ill); 9084 if (need_decref) 9085 CONN_DEC_REF(connp); 9086 return; 9087 } 9088 v6dstp = &ip6h->ip6_dst; 9089 cksum_request = -1; 9090 ip6i = NULL; 9091 9092 /* 9093 * Once neighbor discovery has completed, ndp_process() will provide 9094 * locally generated packets for which processing can be reattempted. 9095 * In these cases, connp is NULL and the original zone is part of a 9096 * prepended ipsec_out_t. 9097 */ 9098 if (io != NULL) { 9099 zoneid = io->ipsec_out_zoneid; 9100 ASSERT(zoneid != ALL_ZONES); 9101 } else { 9102 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 9103 } 9104 9105 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9106 /* 9107 * This is an ip6i_t header followed by an ip6_hdr. 9108 * Check which fields are set. 9109 * 9110 * When the packet comes from a transport we should have 9111 * all needed headers in the first mblk. However, when 9112 * going through ip_newroute*_v6 the ip6i might be in 9113 * a separate mblk when we return here. In that case 9114 * we pullup everything to ensure that extension and transport 9115 * headers "stay" in the first mblk. 9116 */ 9117 ip6i = (ip6i_t *)ip6h; 9118 ip6i_flags = ip6i->ip6i_flags; 9119 9120 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9121 ((mp->b_wptr - (uchar_t *)ip6i) >= 9122 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9123 9124 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9125 if (!pullupmsg(mp, -1)) { 9126 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9127 if (do_outrequests) 9128 BUMP_MIB(mibptr, ipv6OutRequests); 9129 BUMP_MIB(mibptr, ipv6OutDiscards); 9130 freemsg(first_mp); 9131 if (ill != NULL) 9132 ill_refrele(ill); 9133 if (need_decref) 9134 CONN_DEC_REF(connp); 9135 return; 9136 } 9137 ip6h = (ip6_t *)mp->b_rptr; 9138 v6dstp = &ip6h->ip6_dst; 9139 ip6i = (ip6i_t *)ip6h; 9140 } 9141 ip6h = (ip6_t *)&ip6i[1]; 9142 9143 /* 9144 * Advance rptr past the ip6i_t to get ready for 9145 * transmitting the packet. However, if the packet gets 9146 * passed to ip_newroute*_v6 then rptr is moved back so 9147 * that the ip6i_t header can be inspected when the 9148 * packet comes back here after passing through 9149 * ire_add_then_send. 9150 */ 9151 mp->b_rptr = (uchar_t *)ip6h; 9152 9153 /* 9154 * IP6I_ATTACH_IF is set in this function when we had a 9155 * conn and it was either bound to the IPFF_NOFAILOVER address 9156 * or IPV6_BOUND_PIF was set. These options override other 9157 * options that set the ifindex. We come here with 9158 * IP6I_ATTACH_IF set when we can't find the ire and 9159 * ip_newroute_v6 is feeding the packet for second time. 9160 */ 9161 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9162 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9163 ASSERT(ip6i->ip6i_ifindex != 0); 9164 if (ill != NULL) 9165 ill_refrele(ill); 9166 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9167 NULL, NULL, NULL, NULL); 9168 if (ill == NULL) { 9169 if (do_outrequests) 9170 BUMP_MIB(mibptr, ipv6OutRequests); 9171 BUMP_MIB(mibptr, ipv6OutDiscards); 9172 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9173 ip6i->ip6i_ifindex)); 9174 if (need_decref) 9175 CONN_DEC_REF(connp); 9176 freemsg(first_mp); 9177 return; 9178 } 9179 mibptr = ill->ill_ip6_mib; 9180 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9181 /* 9182 * Preserve the index so that when we return 9183 * from IPSEC processing, we know where to 9184 * send the packet. 9185 */ 9186 if (mctl_present) { 9187 ASSERT(io != NULL); 9188 io->ipsec_out_ill_index = 9189 ip6i->ip6i_ifindex; 9190 } 9191 } 9192 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9193 /* 9194 * This is a multipathing probe packet that has 9195 * been delayed in ND resolution. Drop the 9196 * packet for the reasons mentioned in 9197 * nce_queue_mp() 9198 */ 9199 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9200 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9201 freemsg(first_mp); 9202 ill_refrele(ill); 9203 if (need_decref) 9204 CONN_DEC_REF(connp); 9205 return; 9206 } 9207 } 9208 } 9209 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9210 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9211 9212 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9213 if (secpolicy_net_rawaccess(cr) != 0) { 9214 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9215 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9216 NULL, zoneid, 9217 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9218 if (ire == NULL) { 9219 if (do_outrequests) 9220 BUMP_MIB(mibptr, 9221 ipv6OutRequests); 9222 BUMP_MIB(mibptr, ipv6OutDiscards); 9223 ip1dbg(("ip_wput_v6: bad source " 9224 "addr\n")); 9225 freemsg(first_mp); 9226 if (ill != NULL) 9227 ill_refrele(ill); 9228 if (need_decref) 9229 CONN_DEC_REF(connp); 9230 return; 9231 } 9232 ire_refrele(ire); 9233 } 9234 /* No need to verify again when using ip_newroute */ 9235 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9236 } 9237 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9238 /* 9239 * Make sure they match since ip_newroute*_v6 etc might 9240 * (unknown to them) inspect ip6i_nexthop when 9241 * they think they access ip6_dst. 9242 */ 9243 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9244 } 9245 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9246 cksum_request = 1; 9247 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9248 cksum_request = ip6i->ip6i_checksum_off; 9249 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9250 unspec_src = 1; 9251 9252 if (do_outrequests && ill != NULL) { 9253 BUMP_MIB(mibptr, ipv6OutRequests); 9254 do_outrequests = B_FALSE; 9255 } 9256 /* 9257 * Store ip6i_t info that we need after we come back 9258 * from IPSEC processing. 9259 */ 9260 if (mctl_present) { 9261 ASSERT(io != NULL); 9262 io->ipsec_out_unspec_src = unspec_src; 9263 } 9264 } 9265 if (connp != NULL && connp->conn_dontroute) 9266 ip6h->ip6_hops = 1; 9267 9268 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9269 goto ipv6multicast; 9270 9271 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9272 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9273 ill_t *conn_outgoing_pill; 9274 9275 conn_outgoing_pill = conn_get_held_ill(connp, 9276 &connp->conn_outgoing_pill, &err); 9277 if (err == ILL_LOOKUP_FAILED) { 9278 if (ill != NULL) 9279 ill_refrele(ill); 9280 if (need_decref) 9281 CONN_DEC_REF(connp); 9282 freemsg(first_mp); 9283 return; 9284 } 9285 if (conn_outgoing_pill != NULL) { 9286 if (ill != NULL) 9287 ill_refrele(ill); 9288 ill = conn_outgoing_pill; 9289 attach_if = B_TRUE; 9290 match_flags = MATCH_IRE_ILL; 9291 mibptr = ill->ill_ip6_mib; 9292 9293 /* 9294 * Check if we need an ire that will not be 9295 * looked up by anybody else i.e. HIDDEN. 9296 */ 9297 if (ill_is_probeonly(ill)) 9298 match_flags |= MATCH_IRE_MARK_HIDDEN; 9299 goto send_from_ill; 9300 } 9301 } 9302 9303 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9304 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9305 ill_t *conn_nofailover_ill; 9306 9307 conn_nofailover_ill = conn_get_held_ill(connp, 9308 &connp->conn_nofailover_ill, &err); 9309 if (err == ILL_LOOKUP_FAILED) { 9310 if (ill != NULL) 9311 ill_refrele(ill); 9312 if (need_decref) 9313 CONN_DEC_REF(connp); 9314 freemsg(first_mp); 9315 return; 9316 } 9317 if (conn_nofailover_ill != NULL) { 9318 if (ill != NULL) 9319 ill_refrele(ill); 9320 ill = conn_nofailover_ill; 9321 attach_if = B_TRUE; 9322 /* 9323 * Assumes that ipc_nofailover_ill is used only for 9324 * multipathing probe packets. These packets are better 9325 * dropped, if they are delayed in ND resolution, for 9326 * the reasons described in nce_queue_mp(). 9327 * IP6I_DROP_IFDELAYED will be set later on in this 9328 * function for this packet. 9329 */ 9330 drop_if_delayed = B_TRUE; 9331 match_flags = MATCH_IRE_ILL; 9332 mibptr = ill->ill_ip6_mib; 9333 9334 /* 9335 * Check if we need an ire that will not be 9336 * looked up by anybody else i.e. HIDDEN. 9337 */ 9338 if (ill_is_probeonly(ill)) 9339 match_flags |= MATCH_IRE_MARK_HIDDEN; 9340 goto send_from_ill; 9341 } 9342 } 9343 9344 /* 9345 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9346 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9347 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9348 */ 9349 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9350 ASSERT(ip6i->ip6i_ifindex != 0); 9351 attach_if = B_TRUE; 9352 ASSERT(ill != NULL); 9353 match_flags = MATCH_IRE_ILL; 9354 9355 /* 9356 * Check if we need an ire that will not be 9357 * looked up by anybody else i.e. HIDDEN. 9358 */ 9359 if (ill_is_probeonly(ill)) 9360 match_flags |= MATCH_IRE_MARK_HIDDEN; 9361 goto send_from_ill; 9362 } 9363 9364 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9365 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9366 ASSERT(ill != NULL); 9367 goto send_from_ill; 9368 } 9369 9370 /* 9371 * 4. If q is an ill queue and (link local or multicast destination) 9372 * then use that ill. 9373 */ 9374 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9375 goto send_from_ill; 9376 } 9377 9378 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9379 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9380 ill_t *conn_outgoing_ill; 9381 9382 conn_outgoing_ill = conn_get_held_ill(connp, 9383 &connp->conn_outgoing_ill, &err); 9384 if (err == ILL_LOOKUP_FAILED) { 9385 if (ill != NULL) 9386 ill_refrele(ill); 9387 if (need_decref) 9388 CONN_DEC_REF(connp); 9389 freemsg(first_mp); 9390 return; 9391 } 9392 if (ill != NULL) 9393 ill_refrele(ill); 9394 ill = conn_outgoing_ill; 9395 mibptr = ill->ill_ip6_mib; 9396 goto send_from_ill; 9397 } 9398 9399 /* 9400 * 6. For unicast: Just do an IRE lookup for the best match. 9401 * If we get here for a link-local address it is rather random 9402 * what interface we pick on a multihomed host. 9403 * *If* there is an IRE_CACHE (and the link-local address 9404 * isn't duplicated on multi links) this will find the IRE_CACHE. 9405 * Otherwise it will use one of the matching IRE_INTERFACE routes 9406 * for the link-local prefix. Hence, applications 9407 * *should* be encouraged to specify an outgoing interface when sending 9408 * to a link local address. 9409 */ 9410 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9411 !connp->conn_fully_bound)) { 9412 /* 9413 * We cache IRE_CACHEs to avoid lookups. We don't do 9414 * this for the tcp global queue and listen end point 9415 * as it does not really have a real destination to 9416 * talk to. 9417 */ 9418 ire = ire_cache_lookup_v6(v6dstp, zoneid); 9419 } else { 9420 /* 9421 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9422 * grab a lock here to check for CONDEMNED as it is okay 9423 * to send a packet or two with the IRE_CACHE that is going 9424 * away. 9425 */ 9426 mutex_enter(&connp->conn_lock); 9427 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9428 if (ire != NULL && 9429 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9430 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9431 9432 IRE_REFHOLD(ire); 9433 mutex_exit(&connp->conn_lock); 9434 9435 } else { 9436 boolean_t cached = B_FALSE; 9437 9438 connp->conn_ire_cache = NULL; 9439 mutex_exit(&connp->conn_lock); 9440 /* Release the old ire */ 9441 if (ire != NULL && sctp_ire == NULL) 9442 IRE_REFRELE_NOTR(ire); 9443 9444 ire = (ire_t *)ire_cache_lookup_v6(v6dstp, zoneid); 9445 if (ire != NULL) { 9446 IRE_REFHOLD_NOTR(ire); 9447 9448 mutex_enter(&connp->conn_lock); 9449 if (!(connp->conn_state_flags & CONN_CLOSING) && 9450 (connp->conn_ire_cache == NULL)) { 9451 rw_enter(&ire->ire_bucket->irb_lock, 9452 RW_READER); 9453 if (!(ire->ire_marks & 9454 IRE_MARK_CONDEMNED)) { 9455 connp->conn_ire_cache = ire; 9456 cached = B_TRUE; 9457 } 9458 rw_exit(&ire->ire_bucket->irb_lock); 9459 } 9460 mutex_exit(&connp->conn_lock); 9461 9462 /* 9463 * We can continue to use the ire but since it 9464 * was not cached, we should drop the extra 9465 * reference. 9466 */ 9467 if (!cached) 9468 IRE_REFRELE_NOTR(ire); 9469 } 9470 } 9471 } 9472 9473 if (ire != NULL) { 9474 if (do_outrequests) { 9475 /* Handle IRE_LOCAL's that might appear here */ 9476 if (ire->ire_type == IRE_CACHE) { 9477 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9478 ill_ip6_mib; 9479 } else { 9480 mibptr = ire->ire_ipif->ipif_ill->ill_ip6_mib; 9481 } 9482 BUMP_MIB(mibptr, ipv6OutRequests); 9483 } 9484 ASSERT(!attach_if); 9485 9486 /* 9487 * Check if the ire has the RTF_MULTIRT flag, inherited 9488 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9489 */ 9490 if (ire->ire_flags & RTF_MULTIRT) { 9491 /* 9492 * Force hop limit of multirouted packets if required. 9493 * The hop limit of such packets is bounded by the 9494 * ip_multirt_ttl ndd variable. 9495 * NDP packets must have a hop limit of 255; don't 9496 * change the hop limit in that case. 9497 */ 9498 if ((ip_multirt_ttl > 0) && 9499 (ip6h->ip6_hops > ip_multirt_ttl) && 9500 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9501 if (ip_debug > 3) { 9502 ip2dbg(("ip_wput_v6: forcing multirt " 9503 "hop limit to %d (was %d) ", 9504 ip_multirt_ttl, ip6h->ip6_hops)); 9505 pr_addr_dbg("v6dst %s\n", AF_INET6, 9506 &ire->ire_addr_v6); 9507 } 9508 ip6h->ip6_hops = ip_multirt_ttl; 9509 } 9510 9511 /* 9512 * We look at this point if there are pending 9513 * unresolved routes. ire_multirt_need_resolve_v6() 9514 * checks in O(n) that all IRE_OFFSUBNET ire 9515 * entries for the packet's destination and 9516 * flagged RTF_MULTIRT are currently resolved. 9517 * If some remain unresolved, we do a copy 9518 * of the current message. It will be used 9519 * to initiate additional route resolutions. 9520 */ 9521 multirt_need_resolve = 9522 ire_multirt_need_resolve_v6(&ire->ire_addr_v6); 9523 ip2dbg(("ip_wput_v6: ire %p, " 9524 "multirt_need_resolve %d, first_mp %p\n", 9525 (void *)ire, multirt_need_resolve, 9526 (void *)first_mp)); 9527 if (multirt_need_resolve) { 9528 copy_mp = copymsg(first_mp); 9529 if (copy_mp != NULL) { 9530 MULTIRT_DEBUG_TAG(copy_mp); 9531 } 9532 } 9533 } 9534 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9535 connp, caller, 0, ip6i_flags); 9536 if (need_decref) { 9537 CONN_DEC_REF(connp); 9538 connp = NULL; 9539 } 9540 IRE_REFRELE(ire); 9541 9542 /* 9543 * Try to resolve another multiroute if 9544 * ire_multirt_need_resolve_v6() deemed it necessary. 9545 * copy_mp will be consumed (sent or freed) by 9546 * ip_newroute_v6(). 9547 */ 9548 if (copy_mp != NULL) { 9549 if (mctl_present) { 9550 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9551 } else { 9552 ip6h = (ip6_t *)copy_mp->b_rptr; 9553 } 9554 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9555 &ip6h->ip6_src, NULL, zoneid); 9556 } 9557 if (ill != NULL) 9558 ill_refrele(ill); 9559 return; 9560 } 9561 9562 /* 9563 * No full IRE for this destination. Send it to 9564 * ip_newroute_v6 to see if anything else matches. 9565 * Mark this packet as having originated on this 9566 * machine. 9567 * Update rptr if there was an ip6i_t header. 9568 */ 9569 mp->b_prev = NULL; 9570 mp->b_next = NULL; 9571 if (ip6i != NULL) 9572 mp->b_rptr -= sizeof (ip6i_t); 9573 9574 if (unspec_src) { 9575 if (ip6i == NULL) { 9576 /* 9577 * Add ip6i_t header to carry unspec_src 9578 * until the packet comes back in ip_wput_v6. 9579 */ 9580 mp = ip_add_info_v6(mp, NULL, v6dstp); 9581 if (mp == NULL) { 9582 if (do_outrequests) 9583 BUMP_MIB(mibptr, ipv6OutRequests); 9584 BUMP_MIB(mibptr, ipv6OutDiscards); 9585 if (mctl_present) 9586 freeb(first_mp); 9587 if (ill != NULL) 9588 ill_refrele(ill); 9589 if (need_decref) 9590 CONN_DEC_REF(connp); 9591 return; 9592 } 9593 ip6i = (ip6i_t *)mp->b_rptr; 9594 9595 if (mctl_present) { 9596 ASSERT(first_mp != mp); 9597 first_mp->b_cont = mp; 9598 } else { 9599 first_mp = mp; 9600 } 9601 9602 if ((mp->b_wptr - (uchar_t *)ip6i) == 9603 sizeof (ip6i_t)) { 9604 /* 9605 * ndp_resolver called from ip_newroute_v6 9606 * expects pulled up message. 9607 */ 9608 if (!pullupmsg(mp, -1)) { 9609 ip1dbg(("ip_wput_v6: pullupmsg" 9610 " failed\n")); 9611 if (do_outrequests) { 9612 BUMP_MIB(mibptr, 9613 ipv6OutRequests); 9614 } 9615 BUMP_MIB(mibptr, ipv6OutDiscards); 9616 freemsg(first_mp); 9617 if (ill != NULL) 9618 ill_refrele(ill); 9619 if (need_decref) 9620 CONN_DEC_REF(connp); 9621 return; 9622 } 9623 ip6i = (ip6i_t *)mp->b_rptr; 9624 } 9625 ip6h = (ip6_t *)&ip6i[1]; 9626 v6dstp = &ip6h->ip6_dst; 9627 } 9628 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9629 if (mctl_present) { 9630 ASSERT(io != NULL); 9631 io->ipsec_out_unspec_src = unspec_src; 9632 } 9633 } 9634 if (do_outrequests) 9635 BUMP_MIB(mibptr, ipv6OutRequests); 9636 if (need_decref) 9637 CONN_DEC_REF(connp); 9638 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 9639 if (ill != NULL) 9640 ill_refrele(ill); 9641 return; 9642 9643 9644 /* 9645 * Handle multicast packets with or without an conn. 9646 * Assumes that the transports set ip6_hops taking 9647 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9648 * into account. 9649 */ 9650 ipv6multicast: 9651 ip2dbg(("ip_wput_v6: multicast\n")); 9652 9653 /* 9654 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 9655 * 2. If conn_nofailover_ill is set then use that ill. 9656 * 9657 * Hold the conn_lock till we refhold the ill of interest that is 9658 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9659 * while holding any locks, postpone the refrele until after the 9660 * conn_lock is dropped. 9661 */ 9662 if (connp != NULL) { 9663 mutex_enter(&connp->conn_lock); 9664 conn_lock_held = B_TRUE; 9665 } else { 9666 conn_lock_held = B_FALSE; 9667 } 9668 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9669 err = ill_check_and_refhold(connp->conn_outgoing_pill); 9670 if (err == ILL_LOOKUP_FAILED) { 9671 ip1dbg(("ip_output_v6: multicast" 9672 " conn_outgoing_pill no ipif\n")); 9673 multicast_discard: 9674 ASSERT(saved_ill == NULL); 9675 if (conn_lock_held) 9676 mutex_exit(&connp->conn_lock); 9677 if (ill != NULL) 9678 ill_refrele(ill); 9679 freemsg(first_mp); 9680 if (do_outrequests) 9681 BUMP_MIB(mibptr, ipv6OutDiscards); 9682 if (need_decref) 9683 CONN_DEC_REF(connp); 9684 return; 9685 } 9686 saved_ill = ill; 9687 ill = connp->conn_outgoing_pill; 9688 attach_if = B_TRUE; 9689 match_flags = MATCH_IRE_ILL; 9690 mibptr = ill->ill_ip6_mib; 9691 9692 /* 9693 * Check if we need an ire that will not be 9694 * looked up by anybody else i.e. HIDDEN. 9695 */ 9696 if (ill_is_probeonly(ill)) 9697 match_flags |= MATCH_IRE_MARK_HIDDEN; 9698 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9699 err = ill_check_and_refhold(connp->conn_nofailover_ill); 9700 if (err == ILL_LOOKUP_FAILED) { 9701 ip1dbg(("ip_output_v6: multicast" 9702 " conn_nofailover_ill no ipif\n")); 9703 goto multicast_discard; 9704 } 9705 saved_ill = ill; 9706 ill = connp->conn_nofailover_ill; 9707 attach_if = B_TRUE; 9708 match_flags = MATCH_IRE_ILL; 9709 9710 /* 9711 * Check if we need an ire that will not be 9712 * looked up by anybody else i.e. HIDDEN. 9713 */ 9714 if (ill_is_probeonly(ill)) 9715 match_flags |= MATCH_IRE_MARK_HIDDEN; 9716 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9717 /* 9718 * Redo 1. If we did not find an IRE_CACHE the first time, 9719 * we should have an ip6i_t with IP6I_ATTACH_IF if 9720 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 9721 * used on this endpoint. 9722 */ 9723 ASSERT(ip6i->ip6i_ifindex != 0); 9724 attach_if = B_TRUE; 9725 ASSERT(ill != NULL); 9726 match_flags = MATCH_IRE_ILL; 9727 9728 /* 9729 * Check if we need an ire that will not be 9730 * looked up by anybody else i.e. HIDDEN. 9731 */ 9732 if (ill_is_probeonly(ill)) 9733 match_flags |= MATCH_IRE_MARK_HIDDEN; 9734 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9735 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9736 9737 ASSERT(ill != NULL); 9738 } else if (ill != NULL) { 9739 /* 9740 * 4. If q is an ill queue and (link local or multicast 9741 * destination) then use that ill. 9742 * We don't need the ipif initialization here. 9743 * This useless assert below is just to prevent lint from 9744 * reporting a null body if statement. 9745 */ 9746 ASSERT(ill != NULL); 9747 } else if (connp != NULL) { 9748 /* 9749 * 5. If IPV6_BOUND_IF has been set use that ill. 9750 * 9751 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 9752 * Otherwise look for the best IRE match for the unspecified 9753 * group to determine the ill. 9754 * 9755 * conn_multicast_ill is used for only IPv6 packets. 9756 * conn_multicast_ipif is used for only IPv4 packets. 9757 * Thus a PF_INET6 socket send both IPv4 and IPv6 9758 * multicast packets using different IP*_MULTICAST_IF 9759 * interfaces. 9760 */ 9761 if (connp->conn_outgoing_ill != NULL) { 9762 err = ill_check_and_refhold(connp->conn_outgoing_ill); 9763 if (err == ILL_LOOKUP_FAILED) { 9764 ip1dbg(("ip_output_v6: multicast" 9765 " conn_outgoing_ill no ipif\n")); 9766 goto multicast_discard; 9767 } 9768 ill = connp->conn_outgoing_ill; 9769 } else if (connp->conn_multicast_ill != NULL) { 9770 err = ill_check_and_refhold(connp->conn_multicast_ill); 9771 if (err == ILL_LOOKUP_FAILED) { 9772 ip1dbg(("ip_output_v6: multicast" 9773 " conn_multicast_ill no ipif\n")); 9774 goto multicast_discard; 9775 } 9776 ill = connp->conn_multicast_ill; 9777 } else { 9778 mutex_exit(&connp->conn_lock); 9779 conn_lock_held = B_FALSE; 9780 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 9781 if (ipif == NULL) { 9782 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9783 goto multicast_discard; 9784 } 9785 /* 9786 * We have a ref to this ipif, so we can safely 9787 * access ipif_ill. 9788 */ 9789 ill = ipif->ipif_ill; 9790 mutex_enter(&ill->ill_lock); 9791 if (!ILL_CAN_LOOKUP(ill)) { 9792 mutex_exit(&ill->ill_lock); 9793 ipif_refrele(ipif); 9794 ill = NULL; 9795 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9796 goto multicast_discard; 9797 } 9798 ill_refhold_locked(ill); 9799 mutex_exit(&ill->ill_lock); 9800 ipif_refrele(ipif); 9801 /* 9802 * Save binding until IPV6_MULTICAST_IF 9803 * changes it 9804 */ 9805 mutex_enter(&connp->conn_lock); 9806 connp->conn_multicast_ill = ill; 9807 connp->conn_orig_multicast_ifindex = 9808 ill->ill_phyint->phyint_ifindex; 9809 mutex_exit(&connp->conn_lock); 9810 } 9811 } 9812 if (conn_lock_held) 9813 mutex_exit(&connp->conn_lock); 9814 9815 if (saved_ill != NULL) 9816 ill_refrele(saved_ill); 9817 9818 ASSERT(ill != NULL); 9819 /* 9820 * For multicast loopback interfaces replace the multicast address 9821 * with a unicast address for the ire lookup. 9822 */ 9823 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 9824 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 9825 9826 mibptr = ill->ill_ip6_mib; 9827 if (do_outrequests) { 9828 BUMP_MIB(mibptr, ipv6OutRequests); 9829 do_outrequests = B_FALSE; 9830 } 9831 BUMP_MIB(mibptr, ipv6OutMcastPkts); 9832 9833 /* 9834 * As we may lose the conn by the time we reach ip_wput_ire_v6 9835 * we copy conn_multicast_loop and conn_dontroute on to an 9836 * ipsec_out. In case if this datagram goes out secure, 9837 * we need the ill_index also. Copy that also into the 9838 * ipsec_out. 9839 */ 9840 if (mctl_present) { 9841 io = (ipsec_out_t *)first_mp->b_rptr; 9842 ASSERT(first_mp->b_datap->db_type == M_CTL); 9843 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9844 } else { 9845 ASSERT(mp == first_mp); 9846 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 9847 BUMP_MIB(mibptr, ipv6OutDiscards); 9848 freemsg(mp); 9849 if (ill != NULL) 9850 ill_refrele(ill); 9851 if (need_decref) 9852 CONN_DEC_REF(connp); 9853 return; 9854 } 9855 io = (ipsec_out_t *)first_mp->b_rptr; 9856 /* This is not a secure packet */ 9857 io->ipsec_out_secure = B_FALSE; 9858 io->ipsec_out_use_global_policy = B_TRUE; 9859 io->ipsec_out_zoneid = 9860 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 9861 first_mp->b_cont = mp; 9862 mctl_present = B_TRUE; 9863 } 9864 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9865 io->ipsec_out_unspec_src = unspec_src; 9866 if (connp != NULL) 9867 io->ipsec_out_dontroute = connp->conn_dontroute; 9868 9869 send_from_ill: 9870 ASSERT(ill != NULL); 9871 ASSERT(mibptr == ill->ill_ip6_mib); 9872 if (do_outrequests) { 9873 BUMP_MIB(mibptr, ipv6OutRequests); 9874 do_outrequests = B_FALSE; 9875 } 9876 9877 if (io != NULL) 9878 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9879 9880 /* 9881 * When a specific ill is specified (using IPV6_PKTINFO, 9882 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 9883 * on routing entries (ftable and ctable) that have a matching 9884 * ire->ire_ipif->ipif_ill. Thus this can only be used 9885 * for destinations that are on-link for the specific ill 9886 * and that can appear on multiple links. Thus it is useful 9887 * for multicast destinations, link-local destinations, and 9888 * at some point perhaps for site-local destinations (if the 9889 * node sits at a site boundary). 9890 * We create the cache entries in the regular ctable since 9891 * it can not "confuse" things for other destinations. 9892 * table. 9893 * 9894 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 9895 * It is used only when ire_cache_lookup is used above. 9896 */ 9897 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 9898 zoneid, match_flags); 9899 if (ire != NULL) { 9900 /* 9901 * Check if the ire has the RTF_MULTIRT flag, inherited 9902 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9903 */ 9904 if (ire->ire_flags & RTF_MULTIRT) { 9905 /* 9906 * Force hop limit of multirouted packets if required. 9907 * The hop limit of such packets is bounded by the 9908 * ip_multirt_ttl ndd variable. 9909 * NDP packets must have a hop limit of 255; don't 9910 * change the hop limit in that case. 9911 */ 9912 if ((ip_multirt_ttl > 0) && 9913 (ip6h->ip6_hops > ip_multirt_ttl) && 9914 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9915 if (ip_debug > 3) { 9916 ip2dbg(("ip_wput_v6: forcing multirt " 9917 "hop limit to %d (was %d) ", 9918 ip_multirt_ttl, ip6h->ip6_hops)); 9919 pr_addr_dbg("v6dst %s\n", AF_INET6, 9920 &ire->ire_addr_v6); 9921 } 9922 ip6h->ip6_hops = ip_multirt_ttl; 9923 } 9924 9925 /* 9926 * We look at this point if there are pending 9927 * unresolved routes. ire_multirt_need_resolve_v6() 9928 * checks in O(n) that all IRE_OFFSUBNET ire 9929 * entries for the packet's destination and 9930 * flagged RTF_MULTIRT are currently resolved. 9931 * If some remain unresolved, we make a copy 9932 * of the current message. It will be used 9933 * to initiate additional route resolutions. 9934 */ 9935 multirt_need_resolve = 9936 ire_multirt_need_resolve_v6(&ire->ire_addr_v6); 9937 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 9938 "multirt_need_resolve %d, first_mp %p\n", 9939 (void *)ire, multirt_need_resolve, 9940 (void *)first_mp)); 9941 if (multirt_need_resolve) { 9942 copy_mp = copymsg(first_mp); 9943 if (copy_mp != NULL) { 9944 MULTIRT_DEBUG_TAG(copy_mp); 9945 } 9946 } 9947 } 9948 9949 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 9950 ill->ill_name, (void *)ire, 9951 ill->ill_phyint->phyint_ifindex)); 9952 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9953 connp, caller, 9954 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 9955 ip6i_flags); 9956 ire_refrele(ire); 9957 if (need_decref) { 9958 CONN_DEC_REF(connp); 9959 connp = NULL; 9960 } 9961 9962 /* 9963 * Try to resolve another multiroute if 9964 * ire_multirt_need_resolve_v6() deemed it necessary. 9965 * copy_mp will be consumed (sent or freed) by 9966 * ip_newroute_[ipif_]v6(). 9967 */ 9968 if (copy_mp != NULL) { 9969 if (mctl_present) { 9970 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9971 } else { 9972 ip6h = (ip6_t *)copy_mp->b_rptr; 9973 } 9974 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 9975 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 9976 zoneid); 9977 if (ipif == NULL) { 9978 ip1dbg(("ip_wput_v6: No ipif for " 9979 "multicast\n")); 9980 MULTIRT_DEBUG_UNTAG(copy_mp); 9981 freemsg(copy_mp); 9982 return; 9983 } 9984 ip_newroute_ipif_v6(q, copy_mp, ipif, 9985 ip6h->ip6_dst, unspec_src, zoneid); 9986 ipif_refrele(ipif); 9987 } else { 9988 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9989 &ip6h->ip6_src, ill, zoneid); 9990 } 9991 } 9992 if (ill != NULL) 9993 ill_refrele(ill); 9994 return; 9995 } 9996 if (need_decref) { 9997 CONN_DEC_REF(connp); 9998 connp = NULL; 9999 } 10000 10001 /* Update rptr if there was an ip6i_t header. */ 10002 if (ip6i != NULL) 10003 mp->b_rptr -= sizeof (ip6i_t); 10004 if (unspec_src || attach_if) { 10005 if (ip6i == NULL) { 10006 /* 10007 * Add ip6i_t header to carry unspec_src 10008 * or attach_if until the packet comes back in 10009 * ip_wput_v6. 10010 */ 10011 if (mctl_present) { 10012 first_mp->b_cont = 10013 ip_add_info_v6(mp, NULL, v6dstp); 10014 mp = first_mp->b_cont; 10015 if (mp == NULL) 10016 freeb(first_mp); 10017 } else { 10018 first_mp = mp = ip_add_info_v6(mp, NULL, 10019 v6dstp); 10020 } 10021 if (mp == NULL) { 10022 BUMP_MIB(mibptr, ipv6OutDiscards); 10023 if (ill != NULL) 10024 ill_refrele(ill); 10025 return; 10026 } 10027 ip6i = (ip6i_t *)mp->b_rptr; 10028 if ((mp->b_wptr - (uchar_t *)ip6i) == 10029 sizeof (ip6i_t)) { 10030 /* 10031 * ndp_resolver called from ip_newroute_v6 10032 * expects a pulled up message. 10033 */ 10034 if (!pullupmsg(mp, -1)) { 10035 ip1dbg(("ip_wput_v6: pullupmsg" 10036 " failed\n")); 10037 BUMP_MIB(mibptr, ipv6OutDiscards); 10038 freemsg(first_mp); 10039 return; 10040 } 10041 ip6i = (ip6i_t *)mp->b_rptr; 10042 } 10043 ip6h = (ip6_t *)&ip6i[1]; 10044 v6dstp = &ip6h->ip6_dst; 10045 } 10046 if (unspec_src) 10047 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10048 if (attach_if) { 10049 /* 10050 * Bind to nofailover/BOUND_PIF overrides ifindex. 10051 */ 10052 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10053 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10054 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10055 if (drop_if_delayed) { 10056 /* This is a multipathing probe packet */ 10057 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10058 } 10059 } 10060 if (mctl_present) { 10061 ASSERT(io != NULL); 10062 io->ipsec_out_unspec_src = unspec_src; 10063 } 10064 } 10065 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10066 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10067 unspec_src, zoneid); 10068 } else { 10069 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10070 zoneid); 10071 } 10072 if (ill != NULL) 10073 ill_refrele(ill); 10074 return; 10075 10076 notv6: 10077 /* 10078 * XXX implement a IPv4 and IPv6 packet counter per conn and 10079 * switch when ratio exceeds e.g. 10:1 10080 */ 10081 if (q->q_next == NULL) { 10082 connp = Q_TO_CONN(q); 10083 10084 if (IPCL_IS_TCP(connp)) { 10085 /* change conn_send for the tcp_v4_connections */ 10086 connp->conn_send = ip_output; 10087 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10088 /* The 'q' is the default SCTP queue */ 10089 connp = (conn_t *)arg; 10090 } else { 10091 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 10092 } 10093 } 10094 BUMP_MIB(mibptr, ipv6OutIPv4); 10095 (void) ip_output(connp, first_mp, q, caller); 10096 if (ill != NULL) 10097 ill_refrele(ill); 10098 } 10099 10100 static void 10101 ip_wput_v6(queue_t *q, mblk_t *mp) 10102 { 10103 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10104 } 10105 10106 static void 10107 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10108 { 10109 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10110 io->ipsec_out_attach_if = B_TRUE; 10111 io->ipsec_out_ill_index = attach_index; 10112 } 10113 10114 /* 10115 * NULL send-to queue - packet is to be delivered locally. 10116 */ 10117 void 10118 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10119 ire_t *ire, int fanout_flags) 10120 { 10121 uint32_t ports; 10122 mblk_t *mp = first_mp, *first_mp1; 10123 boolean_t mctl_present; 10124 uint8_t nexthdr; 10125 uint16_t hdr_length; 10126 ipsec_out_t *io; 10127 mib2_ipv6IfStatsEntry_t *mibptr; 10128 ilm_t *ilm; 10129 uint_t nexthdr_offset; 10130 10131 nexthdr = ip6h->ip6_nxt; 10132 mibptr = ill->ill_ip6_mib; 10133 10134 /* Fastpath */ 10135 switch (nexthdr) { 10136 case IPPROTO_TCP: 10137 case IPPROTO_UDP: 10138 case IPPROTO_ICMPV6: 10139 case IPPROTO_SCTP: 10140 hdr_length = IPV6_HDR_LEN; 10141 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10142 (uchar_t *)ip6h); 10143 break; 10144 default: { 10145 uint8_t *nexthdrp; 10146 10147 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10148 &hdr_length, &nexthdrp)) { 10149 /* Malformed packet */ 10150 BUMP_MIB(mibptr, ipv6OutDiscards); 10151 freemsg(first_mp); 10152 return; 10153 } 10154 nexthdr = *nexthdrp; 10155 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10156 break; 10157 } 10158 } 10159 10160 if (DB_TYPE(mp) == M_CTL) { 10161 io = (ipsec_out_t *)mp->b_rptr; 10162 if (!io->ipsec_out_secure) { 10163 mp = mp->b_cont; 10164 freeb(first_mp); 10165 first_mp = mp; 10166 mctl_present = B_FALSE; 10167 } else { 10168 mctl_present = B_TRUE; 10169 mp = first_mp->b_cont; 10170 ipsec_out_to_in(first_mp); 10171 } 10172 } else { 10173 mctl_present = B_FALSE; 10174 } 10175 10176 UPDATE_OB_PKT_COUNT(ire); 10177 ire->ire_last_used_time = lbolt; 10178 10179 /* 10180 * Remove reacability confirmation bit from version field 10181 * before looping back the packet. 10182 */ 10183 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10184 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10185 } 10186 10187 switch (nexthdr) { 10188 case IPPROTO_TCP: 10189 if (DB_TYPE(mp) == M_DATA) { 10190 /* 10191 * M_DATA mblk, so init mblk (chain) for 10192 * no struio(). 10193 */ 10194 mblk_t *mp1 = mp; 10195 10196 do { 10197 mp1->b_datap->db_struioflag = 0; 10198 } while ((mp1 = mp1->b_cont) != NULL); 10199 } 10200 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10201 TCP_PORTS_OFFSET); 10202 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10203 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10204 IP_FF_IP6INFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10205 hdr_length, mctl_present, ire->ire_zoneid); 10206 return; 10207 10208 case IPPROTO_UDP: 10209 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10210 UDP_PORTS_OFFSET); 10211 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10212 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO| 10213 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10214 return; 10215 10216 case IPPROTO_SCTP: 10217 { 10218 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10219 10220 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10221 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10222 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO, 10223 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10224 ire->ire_zoneid); 10225 return; 10226 } 10227 case IPPROTO_ICMPV6: { 10228 icmp6_t *icmp6; 10229 10230 /* check for full IPv6+ICMPv6 header */ 10231 if ((mp->b_wptr - mp->b_rptr) < 10232 (hdr_length + ICMP6_MINLEN)) { 10233 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10234 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10235 " failed\n")); 10236 BUMP_MIB(mibptr, ipv6OutDiscards); 10237 freemsg(first_mp); 10238 return; 10239 } 10240 ip6h = (ip6_t *)mp->b_rptr; 10241 } 10242 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10243 10244 /* Update output mib stats */ 10245 icmp_update_out_mib_v6(ill, icmp6); 10246 10247 /* Check variable for testing applications */ 10248 if (ipv6_drop_inbound_icmpv6) { 10249 freemsg(first_mp); 10250 return; 10251 } 10252 /* 10253 * Assume that there is always at least one conn for 10254 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10255 * where there is no conn. 10256 */ 10257 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10258 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10259 /* 10260 * In the multicast case, applications may have 10261 * joined the group from different zones, so we 10262 * need to deliver the packet to each of them. 10263 * Loop through the multicast memberships 10264 * structures (ilm) on the receive ill and send 10265 * a copy of the packet up each matching one. 10266 * However, we don't do this for multicasts sent 10267 * on the loopback interface (PHYI_LOOPBACK flag 10268 * set) as they must stay in the sender's zone. 10269 */ 10270 ILM_WALKER_HOLD(ill); 10271 for (ilm = ill->ill_ilm; ilm != NULL; 10272 ilm = ilm->ilm_next) { 10273 if (ilm->ilm_flags & ILM_DELETED) 10274 continue; 10275 if (!IN6_ARE_ADDR_EQUAL( 10276 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10277 continue; 10278 if ((fanout_flags & 10279 IP_FF_NO_MCAST_LOOP) && 10280 ilm->ilm_zoneid == ire->ire_zoneid) 10281 continue; 10282 if (!ipif_lookup_zoneid(ill, 10283 ilm->ilm_zoneid, IPIF_UP, NULL)) 10284 continue; 10285 10286 first_mp1 = ip_copymsg(first_mp); 10287 if (first_mp1 == NULL) 10288 continue; 10289 icmp_inbound_v6(q, first_mp1, ill, 10290 hdr_length, mctl_present, 10291 IP6_NO_IPPOLICY, ilm->ilm_zoneid); 10292 } 10293 ILM_WALKER_RELE(ill); 10294 } else { 10295 first_mp1 = ip_copymsg(first_mp); 10296 if (first_mp1 != NULL) 10297 icmp_inbound_v6(q, first_mp1, ill, 10298 hdr_length, mctl_present, 10299 IP6_NO_IPPOLICY, ire->ire_zoneid); 10300 } 10301 } 10302 /* FALLTHRU */ 10303 default: { 10304 /* 10305 * Handle protocols with which IPv6 is less intimate. 10306 */ 10307 fanout_flags |= IP_FF_RAWIP|IP_FF_IP6INFO; 10308 10309 /* 10310 * Enable sending ICMP for "Unknown" nexthdr 10311 * case. i.e. where we did not FALLTHRU from 10312 * IPPROTO_ICMPV6 processing case above. 10313 */ 10314 if (nexthdr != IPPROTO_ICMPV6) 10315 fanout_flags |= IP_FF_SEND_ICMP; 10316 /* 10317 * Note: There can be more than one stream bound 10318 * to a particular protocol. When this is the case, 10319 * each one gets a copy of any incoming packets. 10320 */ 10321 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10322 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10323 mctl_present, ire->ire_zoneid); 10324 return; 10325 } 10326 } 10327 } 10328 10329 /* 10330 * Send packet using IRE. 10331 * Checksumming is controlled by cksum_request: 10332 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10333 * 1 => Skip TCP/UDP/SCTP checksum 10334 * Otherwise => checksum_request contains insert offset for checksum 10335 * 10336 * Assumes that the following set of headers appear in the first 10337 * mblk: 10338 * ip6_t 10339 * Any extension headers 10340 * TCP/UDP/SCTP header (if present) 10341 * The routine can handle an ICMPv6 header that is not in the first mblk. 10342 * 10343 * NOTE : This function does not ire_refrele the ire passed in as the 10344 * argument unlike ip_wput_ire where the REFRELE is done. 10345 * Refer to ip_wput_ire for more on this. 10346 */ 10347 static void 10348 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10349 int cksum_request, conn_t *connp, int caller, int attach_index, int flags) 10350 { 10351 ip6_t *ip6h; 10352 uint8_t nexthdr; 10353 uint16_t hdr_length; 10354 uint_t reachable = 0x0; 10355 ill_t *ill; 10356 mib2_ipv6IfStatsEntry_t *mibptr; 10357 mblk_t *first_mp; 10358 boolean_t mctl_present; 10359 ipsec_out_t *io; 10360 boolean_t conn_dontroute; /* conn value for multicast */ 10361 boolean_t conn_multicast_loop; /* conn value for multicast */ 10362 boolean_t multicast_forward; /* Should we forward ? */ 10363 int max_frag; 10364 zoneid_t zoneid; 10365 10366 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 10367 ill = ire_to_ill(ire); 10368 first_mp = mp; 10369 multicast_forward = B_FALSE; 10370 10371 if (mp->b_datap->db_type != M_CTL) { 10372 ip6h = (ip6_t *)first_mp->b_rptr; 10373 } else { 10374 io = (ipsec_out_t *)first_mp->b_rptr; 10375 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10376 /* 10377 * Grab the zone id now because the M_CTL can be discarded by 10378 * ip_wput_ire_parse_ipsec_out() below. 10379 */ 10380 zoneid = io->ipsec_out_zoneid; 10381 ASSERT(zoneid != ALL_ZONES); 10382 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10383 /* 10384 * For the multicast case, ipsec_out carries conn_dontroute and 10385 * conn_multicast_loop as conn may not be available here. We 10386 * need this for multicast loopback and forwarding which is done 10387 * later in the code. 10388 */ 10389 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10390 conn_dontroute = io->ipsec_out_dontroute; 10391 conn_multicast_loop = io->ipsec_out_multicast_loop; 10392 /* 10393 * If conn_dontroute is not set or conn_multicast_loop 10394 * is set, we need to do forwarding/loopback. For 10395 * datagrams from ip_wput_multicast, conn_dontroute is 10396 * set to B_TRUE and conn_multicast_loop is set to 10397 * B_FALSE so that we neither do forwarding nor 10398 * loopback. 10399 */ 10400 if (!conn_dontroute || conn_multicast_loop) 10401 multicast_forward = B_TRUE; 10402 } 10403 } 10404 10405 /* 10406 * If the sender didn't supply the hop limit and there is a default 10407 * unicast hop limit associated with the output interface, we use 10408 * that if the packet is unicast. Interface specific unicast hop 10409 * limits as set via the SIOCSLIFLNKINFO ioctl. 10410 */ 10411 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10412 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10413 ip6h->ip6_hops = ill->ill_max_hops; 10414 } 10415 10416 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid) { 10417 /* 10418 * When a zone sends a packet to another zone, we try to deliver 10419 * the packet under the same conditions as if the destination 10420 * was a real node on the network. To do so, we look for a 10421 * matching route in the forwarding table. 10422 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10423 * ip_newroute_v6() does. 10424 */ 10425 ire_t *src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10426 NULL, NULL, zoneid, 0, (MATCH_IRE_RECURSIVE | 10427 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10428 if (src_ire != NULL && 10429 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 10430 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10431 !unspec_src) { 10432 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10433 } 10434 ire_refrele(src_ire); 10435 } else { 10436 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 10437 if (src_ire != NULL) { 10438 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10439 ire_refrele(src_ire); 10440 freemsg(first_mp); 10441 return; 10442 } 10443 ire_refrele(src_ire); 10444 } 10445 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10446 /* Failed */ 10447 freemsg(first_mp); 10448 return; 10449 } 10450 icmp_unreachable_v6(q, first_mp, 10451 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE); 10452 return; 10453 } 10454 } 10455 10456 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10457 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10458 connp, unspec_src); 10459 if (mp == NULL) { 10460 return; 10461 } 10462 } 10463 10464 first_mp = mp; 10465 if (mp->b_datap->db_type == M_CTL) { 10466 io = (ipsec_out_t *)mp->b_rptr; 10467 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10468 mp = mp->b_cont; 10469 mctl_present = B_TRUE; 10470 } else { 10471 mctl_present = B_FALSE; 10472 } 10473 10474 ip6h = (ip6_t *)mp->b_rptr; 10475 nexthdr = ip6h->ip6_nxt; 10476 mibptr = ill->ill_ip6_mib; 10477 10478 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10479 /* 10480 * The ire_src_addr_v6 always contains a useable source address 10481 * for the destination (based on source address selection rules 10482 * with respect to address scope as well as deprecated vs. 10483 * preferred addresses). 10484 */ 10485 ip6h->ip6_src = ire->ire_src_addr_v6; 10486 } 10487 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10488 if ((connp != NULL && connp->conn_multicast_loop) || 10489 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10490 ilm_t *ilm; 10491 10492 ILM_WALKER_HOLD(ill); 10493 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10494 ILM_WALKER_RELE(ill); 10495 if (ilm != NULL) { 10496 mblk_t *nmp; 10497 int fanout_flags = 0; 10498 10499 if (connp != NULL && 10500 !connp->conn_multicast_loop) { 10501 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10502 } 10503 ip1dbg(("ip_wput_ire_v6: " 10504 "Loopback multicast\n")); 10505 nmp = ip_copymsg(first_mp); 10506 if (nmp != NULL) { 10507 ip6_t *nip6h; 10508 10509 if (mctl_present) { 10510 nip6h = (ip6_t *) 10511 nmp->b_cont->b_rptr; 10512 } else { 10513 nip6h = (ip6_t *)nmp->b_rptr; 10514 } 10515 /* 10516 * Deliver locally and to every local 10517 * zone, except the sending zone when 10518 * IPV6_MULTICAST_LOOP is disabled. 10519 */ 10520 ip_wput_local_v6(RD(q), ill, nip6h, nmp, 10521 ire, fanout_flags); 10522 } else { 10523 BUMP_MIB(mibptr, ipv6OutDiscards); 10524 ip1dbg(("ip_wput_ire_v6: " 10525 "copymsg failed\n")); 10526 } 10527 } 10528 } 10529 if (ip6h->ip6_hops == 0 || 10530 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10531 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10532 /* 10533 * Local multicast or just loopback on loopback 10534 * interface. 10535 */ 10536 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10537 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10538 freemsg(first_mp); 10539 return; 10540 } 10541 } 10542 10543 if (ire->ire_stq != NULL) { 10544 uint32_t sum; 10545 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10546 ill_phyint->phyint_ifindex; 10547 queue_t *dev_q = ire->ire_stq->q_next; 10548 10549 /* 10550 * non-NULL send-to queue - packet is to be sent 10551 * out an interface. 10552 */ 10553 10554 /* Driver is flow-controlling? */ 10555 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10556 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 10557 /* 10558 * Queue packet if we have an conn to give back 10559 * pressure. We can't queue packets intended for 10560 * hardware acceleration since we've tossed that 10561 * state already. If the packet is being fed back 10562 * from ire_send_v6, we don't know the position in 10563 * the queue to enqueue the packet and we discard 10564 * the packet. 10565 */ 10566 if (ip_output_queue && connp != NULL && 10567 !mctl_present && caller != IRE_SEND) { 10568 if (caller == IP_WSRV) { 10569 connp->conn_did_putbq = 1; 10570 (void) putbq(connp->conn_wq, mp); 10571 conn_drain_insert(connp); 10572 /* 10573 * caller == IP_WSRV implies we are 10574 * the service thread, and the 10575 * queue is already noenabled. 10576 * The check for canput and 10577 * the putbq is not atomic. 10578 * So we need to check again. 10579 */ 10580 if (canput(dev_q)) 10581 connp->conn_did_putbq = 0; 10582 } else { 10583 (void) putq(connp->conn_wq, mp); 10584 } 10585 return; 10586 } 10587 BUMP_MIB(mibptr, ipv6OutDiscards); 10588 freemsg(first_mp); 10589 return; 10590 } 10591 10592 /* 10593 * Look for reachability confirmations from the transport. 10594 */ 10595 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10596 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10597 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10598 if (mctl_present) 10599 io->ipsec_out_reachable = B_TRUE; 10600 } 10601 /* Fastpath */ 10602 switch (nexthdr) { 10603 case IPPROTO_TCP: 10604 case IPPROTO_UDP: 10605 case IPPROTO_ICMPV6: 10606 case IPPROTO_SCTP: 10607 hdr_length = IPV6_HDR_LEN; 10608 break; 10609 default: { 10610 uint8_t *nexthdrp; 10611 10612 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10613 &hdr_length, &nexthdrp)) { 10614 /* Malformed packet */ 10615 BUMP_MIB(mibptr, ipv6OutDiscards); 10616 freemsg(first_mp); 10617 return; 10618 } 10619 nexthdr = *nexthdrp; 10620 break; 10621 } 10622 } 10623 10624 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10625 uint16_t *up; 10626 uint16_t *insp; 10627 10628 /* 10629 * The packet header is processed once for all, even 10630 * in the multirouting case. We disable hardware 10631 * checksum if the packet is multirouted, as it will be 10632 * replicated via several interfaces, and not all of 10633 * them may have this capability. 10634 */ 10635 if (cksum_request == 1 && 10636 !(ire->ire_flags & RTF_MULTIRT)) { 10637 /* Skip the transport checksum */ 10638 goto cksum_done; 10639 } 10640 /* 10641 * Do user-configured raw checksum. 10642 * Compute checksum and insert at offset "cksum_request" 10643 */ 10644 10645 /* check for enough headers for checksum */ 10646 cksum_request += hdr_length; /* offset from rptr */ 10647 if ((mp->b_wptr - mp->b_rptr) < 10648 (cksum_request + sizeof (int16_t))) { 10649 if (!pullupmsg(mp, 10650 cksum_request + sizeof (int16_t))) { 10651 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10652 " failed\n")); 10653 BUMP_MIB(mibptr, ipv6OutDiscards); 10654 freemsg(first_mp); 10655 return; 10656 } 10657 ip6h = (ip6_t *)mp->b_rptr; 10658 } 10659 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10660 ASSERT(((uintptr_t)insp & 0x1) == 0); 10661 up = (uint16_t *)&ip6h->ip6_src; 10662 /* 10663 * icmp has placed length and routing 10664 * header adjustment in *insp. 10665 */ 10666 sum = htons(nexthdr) + 10667 up[0] + up[1] + up[2] + up[3] + 10668 up[4] + up[5] + up[6] + up[7] + 10669 up[8] + up[9] + up[10] + up[11] + 10670 up[12] + up[13] + up[14] + up[15]; 10671 sum = (sum & 0xffff) + (sum >> 16); 10672 *insp = IP_CSUM(mp, hdr_length, sum); 10673 if (*insp == 0) 10674 *insp = 0xFFFF; 10675 } else if (nexthdr == IPPROTO_TCP) { 10676 uint16_t *up; 10677 10678 /* 10679 * Check for full IPv6 header + enough TCP header 10680 * to get at the checksum field. 10681 */ 10682 if ((mp->b_wptr - mp->b_rptr) < 10683 (hdr_length + TCP_CHECKSUM_OFFSET + 10684 TCP_CHECKSUM_SIZE)) { 10685 if (!pullupmsg(mp, hdr_length + 10686 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 10687 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10688 " failed\n")); 10689 BUMP_MIB(mibptr, ipv6OutDiscards); 10690 freemsg(first_mp); 10691 return; 10692 } 10693 ip6h = (ip6_t *)mp->b_rptr; 10694 } 10695 10696 up = (uint16_t *)&ip6h->ip6_src; 10697 /* 10698 * Note: The TCP module has stored the length value 10699 * into the tcp checksum field, so we don't 10700 * need to explicitly sum it in here. 10701 */ 10702 sum = up[0] + up[1] + up[2] + up[3] + 10703 up[4] + up[5] + up[6] + up[7] + 10704 up[8] + up[9] + up[10] + up[11] + 10705 up[12] + up[13] + up[14] + up[15]; 10706 10707 /* Fold the initial sum */ 10708 sum = (sum & 0xffff) + (sum >> 16); 10709 10710 up = (uint16_t *)(((uchar_t *)ip6h) + 10711 hdr_length + TCP_CHECKSUM_OFFSET); 10712 10713 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 10714 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10715 ire->ire_max_frag, mctl_present, sum); 10716 10717 /* Software checksum? */ 10718 if (DB_CKSUMFLAGS(mp) == 0) { 10719 IP6_STAT(ip6_out_sw_cksum); 10720 IP6_STAT_UPDATE(ip6_tcp_out_sw_cksum_bytes, 10721 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10722 hdr_length); 10723 } 10724 } else if (nexthdr == IPPROTO_UDP) { 10725 uint16_t *up; 10726 10727 /* 10728 * check for full IPv6 header + enough UDP header 10729 * to get at the UDP checksum field 10730 */ 10731 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 10732 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10733 if (!pullupmsg(mp, hdr_length + 10734 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10735 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 10736 " failed\n")); 10737 BUMP_MIB(mibptr, ipv6OutDiscards); 10738 freemsg(first_mp); 10739 return; 10740 } 10741 ip6h = (ip6_t *)mp->b_rptr; 10742 } 10743 up = (uint16_t *)&ip6h->ip6_src; 10744 /* 10745 * Note: The UDP module has stored the length value 10746 * into the udp checksum field, so we don't 10747 * need to explicitly sum it in here. 10748 */ 10749 sum = up[0] + up[1] + up[2] + up[3] + 10750 up[4] + up[5] + up[6] + up[7] + 10751 up[8] + up[9] + up[10] + up[11] + 10752 up[12] + up[13] + up[14] + up[15]; 10753 10754 /* Fold the initial sum */ 10755 sum = (sum & 0xffff) + (sum >> 16); 10756 10757 up = (uint16_t *)(((uchar_t *)ip6h) + 10758 hdr_length + UDP_CHECKSUM_OFFSET); 10759 10760 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 10761 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10762 ire->ire_max_frag, mctl_present, sum); 10763 10764 /* Software checksum? */ 10765 if (DB_CKSUMFLAGS(mp) == 0) { 10766 IP6_STAT(ip6_out_sw_cksum); 10767 IP6_STAT_UPDATE(ip6_udp_out_sw_cksum_bytes, 10768 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10769 hdr_length); 10770 } 10771 } else if (nexthdr == IPPROTO_ICMPV6) { 10772 uint16_t *up; 10773 icmp6_t *icmp6; 10774 10775 /* check for full IPv6+ICMPv6 header */ 10776 if ((mp->b_wptr - mp->b_rptr) < 10777 (hdr_length + ICMP6_MINLEN)) { 10778 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10779 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10780 " failed\n")); 10781 BUMP_MIB(mibptr, ipv6OutDiscards); 10782 freemsg(first_mp); 10783 return; 10784 } 10785 ip6h = (ip6_t *)mp->b_rptr; 10786 } 10787 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10788 up = (uint16_t *)&ip6h->ip6_src; 10789 /* 10790 * icmp has placed length and routing 10791 * header adjustment in icmp6_cksum. 10792 */ 10793 sum = htons(IPPROTO_ICMPV6) + 10794 up[0] + up[1] + up[2] + up[3] + 10795 up[4] + up[5] + up[6] + up[7] + 10796 up[8] + up[9] + up[10] + up[11] + 10797 up[12] + up[13] + up[14] + up[15]; 10798 sum = (sum & 0xffff) + (sum >> 16); 10799 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 10800 if (icmp6->icmp6_cksum == 0) 10801 icmp6->icmp6_cksum = 0xFFFF; 10802 10803 /* Update output mib stats */ 10804 icmp_update_out_mib_v6(ill, icmp6); 10805 } else if (nexthdr == IPPROTO_SCTP) { 10806 sctp_hdr_t *sctph; 10807 10808 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 10809 if (!pullupmsg(mp, hdr_length + 10810 sizeof (*sctph))) { 10811 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 10812 " failed\n")); 10813 BUMP_MIB(ill->ill_ip6_mib, 10814 ipv6OutDiscards); 10815 freemsg(mp); 10816 return; 10817 } 10818 ip6h = (ip6_t *)mp->b_rptr; 10819 } 10820 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 10821 sctph->sh_chksum = 0; 10822 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 10823 } 10824 10825 cksum_done: 10826 /* 10827 * We force the insertion of a fragment header using the 10828 * IPH_FRAG_HDR flag in two cases: 10829 * - after reception of an ICMPv6 "packet too big" message 10830 * with a MTU < 1280 (cf. RFC 2460 section 5) 10831 * - for multirouted IPv6 packets, so that the receiver can 10832 * discard duplicates according to their fragment identifier 10833 * 10834 * Two flags modifed from the API can modify this behavior. 10835 * The first is IPV6_USE_MIN_MTU. With this API the user 10836 * can specify how to manage PMTUD for unicast and multicast. 10837 * 10838 * IPV6_DONTFRAG disallows fragmentation. 10839 */ 10840 max_frag = ire->ire_max_frag; 10841 switch (IP6I_USE_MIN_MTU_API(flags)) { 10842 case IPV6_USE_MIN_MTU_DEFAULT: 10843 case IPV6_USE_MIN_MTU_UNICAST: 10844 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10845 max_frag = IPV6_MIN_MTU; 10846 } 10847 break; 10848 10849 case IPV6_USE_MIN_MTU_NEVER: 10850 max_frag = IPV6_MIN_MTU; 10851 break; 10852 } 10853 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 10854 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 10855 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 10856 icmp_pkt2big_v6(ire->ire_stq, first_mp, 10857 max_frag, B_FALSE, B_TRUE); 10858 return; 10859 } 10860 10861 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 10862 (mp->b_cont ? msgdsize(mp) : 10863 mp->b_wptr - (uchar_t *)ip6h)) { 10864 ip0dbg(("Packet length mismatch: %d, %ld\n", 10865 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10866 msgdsize(mp))); 10867 freemsg(first_mp); 10868 return; 10869 } 10870 /* Do IPSEC processing first */ 10871 if (mctl_present) { 10872 if (attach_index != 0) 10873 ipsec_out_attach_if(io, attach_index); 10874 ipsec_out_process(q, first_mp, ire, ill_index); 10875 return; 10876 } 10877 ASSERT(mp->b_prev == NULL); 10878 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 10879 ntohs(ip6h->ip6_plen) + 10880 IPV6_HDR_LEN, max_frag)); 10881 ASSERT(mp == first_mp); 10882 /* Initiate IPPF processing */ 10883 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 10884 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 10885 if (mp == NULL) { 10886 return; 10887 } 10888 } 10889 ip_wput_frag_v6(mp, ire, reachable, connp, 10890 caller, max_frag); 10891 return; 10892 } 10893 /* Do IPSEC processing first */ 10894 if (mctl_present) { 10895 int extra_len = ipsec_out_extra_length(first_mp); 10896 10897 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 10898 max_frag && ip_ulp_cando_pkt2big(nexthdr)) { 10899 /* 10900 * IPsec headers will push the packet over the 10901 * MTU limit. Issue an ICMPv6 Packet Too Big 10902 * message for this packet if the upper-layer 10903 * that issued this packet will be able to 10904 * react to the icmp_pkt2big_v6() that we'll 10905 * generate. 10906 */ 10907 icmp_pkt2big_v6(ire->ire_stq, first_mp, 10908 max_frag, B_FALSE, B_TRUE); 10909 return; 10910 } 10911 if (attach_index != 0) 10912 ipsec_out_attach_if(io, attach_index); 10913 ipsec_out_process(q, first_mp, ire, ill_index); 10914 return; 10915 } 10916 /* 10917 * XXX multicast: add ip_mforward_v6() here. 10918 * Check conn_dontroute 10919 */ 10920 #ifdef lint 10921 /* 10922 * XXX The only purpose of this statement is to avoid lint 10923 * errors. See the above "XXX multicast". When that gets 10924 * fixed, remove this whole #ifdef lint section. 10925 */ 10926 ip3dbg(("multicast forward is %s.\n", 10927 (multicast_forward ? "TRUE" : "FALSE"))); 10928 #endif 10929 10930 UPDATE_OB_PKT_COUNT(ire); 10931 ire->ire_last_used_time = lbolt; 10932 ASSERT(mp == first_mp); 10933 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 10934 } else { 10935 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 10936 } 10937 } 10938 10939 /* 10940 * Outbound IPv6 fragmentation routine using MDT. 10941 */ 10942 static void 10943 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 10944 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 10945 { 10946 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 10947 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 10948 mblk_t *hdr_mp, *md_mp = NULL; 10949 int i1; 10950 multidata_t *mmd; 10951 unsigned char *hdr_ptr, *pld_ptr; 10952 ip_pdescinfo_t pdi; 10953 uint32_t ident; 10954 size_t len; 10955 uint16_t offset; 10956 queue_t *stq = ire->ire_stq; 10957 ill_t *ill = (ill_t *)stq->q_ptr; 10958 10959 ASSERT(DB_TYPE(mp) == M_DATA); 10960 ASSERT(MBLKL(mp) > unfragmentable_len); 10961 10962 /* 10963 * Move read ptr past unfragmentable portion, we don't want this part 10964 * of the data in our fragments. 10965 */ 10966 mp->b_rptr += unfragmentable_len; 10967 10968 /* Calculate how many packets we will send out */ 10969 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 10970 pkts = (i1 + max_chunk - 1) / max_chunk; 10971 ASSERT(pkts > 1); 10972 10973 /* Allocate a message block which will hold all the IP Headers. */ 10974 wroff = ip_wroff_extra; 10975 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 10976 10977 i1 = pkts * hdr_chunk_len; 10978 /* 10979 * Create the header buffer, Multidata and destination address 10980 * and SAP attribute that should be associated with it. 10981 */ 10982 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 10983 ((hdr_mp->b_wptr += i1), 10984 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 10985 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 10986 freemsg(mp); 10987 if (md_mp == NULL) { 10988 freemsg(hdr_mp); 10989 } else { 10990 free_mmd: IP6_STAT(ip6_frag_mdt_discarded); 10991 freemsg(md_mp); 10992 } 10993 IP6_STAT(ip6_frag_mdt_allocfail); 10994 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 10995 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutDiscards, pkts); 10996 return; 10997 } 10998 IP6_STAT(ip6_frag_mdt_allocd); 10999 11000 /* 11001 * Add a payload buffer to the Multidata; this operation must not 11002 * fail, or otherwise our logic in this routine is broken. There 11003 * is no memory allocation done by the routine, so any returned 11004 * failure simply tells us that we've done something wrong. 11005 * 11006 * A failure tells us that either we're adding the same payload 11007 * buffer more than once, or we're trying to add more buffers than 11008 * allowed. None of the above cases should happen, and we panic 11009 * because either there's horrible heap corruption, and/or 11010 * programming mistake. 11011 */ 11012 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11013 goto pbuf_panic; 11014 } 11015 11016 hdr_ptr = hdr_mp->b_rptr; 11017 pld_ptr = mp->b_rptr; 11018 11019 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11020 11021 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11022 11023 /* 11024 * len is the total length of the fragmentable data in this 11025 * datagram. For each fragment sent, we will decrement len 11026 * by the amount of fragmentable data sent in that fragment 11027 * until len reaches zero. 11028 */ 11029 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11030 11031 offset = 0; 11032 prev_nexthdr_offset += wroff; 11033 11034 while (len != 0) { 11035 size_t mlen; 11036 ip6_t *fip6h; 11037 ip6_frag_t *fraghdr; 11038 int error; 11039 11040 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11041 mlen = MIN(len, max_chunk); 11042 len -= mlen; 11043 11044 fip6h = (ip6_t *)(hdr_ptr + wroff); 11045 ASSERT(OK_32PTR(fip6h)); 11046 bcopy(ip6h, fip6h, unfragmentable_len); 11047 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11048 11049 fip6h->ip6_plen = htons((uint16_t)(mlen + 11050 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11051 11052 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11053 unfragmentable_len); 11054 fraghdr->ip6f_nxt = nexthdr; 11055 fraghdr->ip6f_reserved = 0; 11056 fraghdr->ip6f_offlg = htons(offset) | 11057 ((len != 0) ? IP6F_MORE_FRAG : 0); 11058 fraghdr->ip6f_ident = ident; 11059 11060 /* 11061 * Record offset and size of header and data of the next packet 11062 * in the multidata message. 11063 */ 11064 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11065 unfragmentable_len + sizeof (ip6_frag_t), 0); 11066 PDESC_PLD_INIT(&pdi); 11067 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11068 ASSERT(i1 > 0); 11069 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11070 if (i1 == mlen) { 11071 pld_ptr += mlen; 11072 } else { 11073 i1 = mlen - i1; 11074 mp = mp->b_cont; 11075 ASSERT(mp != NULL); 11076 ASSERT(MBLKL(mp) >= i1); 11077 /* 11078 * Attach the next payload message block to the 11079 * multidata message. 11080 */ 11081 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11082 goto pbuf_panic; 11083 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11084 pld_ptr = mp->b_rptr + i1; 11085 } 11086 11087 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11088 KM_NOSLEEP)) == NULL) { 11089 /* 11090 * Any failure other than ENOMEM indicates that we 11091 * have passed in invalid pdesc info or parameters 11092 * to mmd_addpdesc, which must not happen. 11093 * 11094 * EINVAL is a result of failure on boundary checks 11095 * against the pdesc info contents. It should not 11096 * happen, and we panic because either there's 11097 * horrible heap corruption, and/or programming 11098 * mistake. 11099 */ 11100 if (error != ENOMEM) { 11101 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11102 "pdesc logic error detected for " 11103 "mmd %p pinfo %p (%d)\n", 11104 (void *)mmd, (void *)&pdi, error); 11105 /* NOTREACHED */ 11106 } 11107 IP6_STAT(ip6_frag_mdt_addpdescfail); 11108 /* Free unattached payload message blocks as well */ 11109 md_mp->b_cont = mp->b_cont; 11110 goto free_mmd; 11111 } 11112 11113 /* Advance fragment offset. */ 11114 offset += mlen; 11115 11116 /* Advance to location for next header in the buffer. */ 11117 hdr_ptr += hdr_chunk_len; 11118 11119 /* Did we reach the next payload message block? */ 11120 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11121 mp = mp->b_cont; 11122 /* 11123 * Attach the next message block with payload 11124 * data to the multidata message. 11125 */ 11126 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11127 goto pbuf_panic; 11128 pld_ptr = mp->b_rptr; 11129 } 11130 } 11131 11132 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11133 ASSERT(mp->b_wptr == pld_ptr); 11134 11135 /* Update IP statistics */ 11136 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutFragCreates, pkts); 11137 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11138 IP6_STAT_UPDATE(ip6_frag_mdt_pkt_out, pkts); 11139 11140 ire->ire_ob_pkt_count += pkts; 11141 if (ire->ire_ipif != NULL) 11142 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11143 11144 ire->ire_last_used_time = lbolt; 11145 /* Send it down */ 11146 putnext(stq, md_mp); 11147 return; 11148 11149 pbuf_panic: 11150 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11151 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11152 pbuf_idx); 11153 /* NOTREACHED */ 11154 } 11155 11156 /* 11157 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11158 * We have not optimized this in terms of number of mblks 11159 * allocated. For instance, for each fragment sent we always allocate a 11160 * mblk to hold the IPv6 header and fragment header. 11161 * 11162 * Assumes that all the extension headers are contained in the first mblk. 11163 * 11164 * The fragment header is inserted after an hop-by-hop options header 11165 * and after [an optional destinations header followed by] a routing header. 11166 * 11167 * NOTE : This function does not ire_refrele the ire passed in as 11168 * the argument. 11169 */ 11170 void 11171 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11172 int caller, int max_frag) 11173 { 11174 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11175 ip6_t *fip6h; 11176 mblk_t *hmp; 11177 mblk_t *hmp0; 11178 mblk_t *dmp; 11179 ip6_frag_t *fraghdr; 11180 size_t unfragmentable_len; 11181 size_t len; 11182 size_t mlen; 11183 size_t max_chunk; 11184 uint32_t ident; 11185 uint16_t off_flags; 11186 uint16_t offset = 0; 11187 ill_t *ill; 11188 uint8_t nexthdr; 11189 uint_t prev_nexthdr_offset; 11190 uint8_t *ptr; 11191 11192 ASSERT(ire->ire_type == IRE_CACHE); 11193 ill = (ill_t *)ire->ire_stq->q_ptr; 11194 11195 /* 11196 * Determine the length of the unfragmentable portion of this 11197 * datagram. This consists of the IPv6 header, a potential 11198 * hop-by-hop options header, a potential pre-routing-header 11199 * destination options header, and a potential routing header. 11200 */ 11201 nexthdr = ip6h->ip6_nxt; 11202 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11203 ptr = (uint8_t *)&ip6h[1]; 11204 11205 if (nexthdr == IPPROTO_HOPOPTS) { 11206 ip6_hbh_t *hbh_hdr; 11207 uint_t hdr_len; 11208 11209 hbh_hdr = (ip6_hbh_t *)ptr; 11210 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11211 nexthdr = hbh_hdr->ip6h_nxt; 11212 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11213 - (uint8_t *)ip6h; 11214 ptr += hdr_len; 11215 } 11216 if (nexthdr == IPPROTO_DSTOPTS) { 11217 ip6_dest_t *dest_hdr; 11218 uint_t hdr_len; 11219 11220 dest_hdr = (ip6_dest_t *)ptr; 11221 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11222 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11223 nexthdr = dest_hdr->ip6d_nxt; 11224 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11225 - (uint8_t *)ip6h; 11226 ptr += hdr_len; 11227 } 11228 } 11229 if (nexthdr == IPPROTO_ROUTING) { 11230 ip6_rthdr_t *rthdr; 11231 uint_t hdr_len; 11232 11233 rthdr = (ip6_rthdr_t *)ptr; 11234 nexthdr = rthdr->ip6r_nxt; 11235 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11236 - (uint8_t *)ip6h; 11237 hdr_len = 8 * (rthdr->ip6r_len + 1); 11238 ptr += hdr_len; 11239 } 11240 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11241 11242 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11243 sizeof (ip6_frag_t)) & ~7; 11244 11245 /* Check if we can use MDT to send out the frags. */ 11246 ASSERT(!IRE_IS_LOCAL(ire)); 11247 if (ip_multidata_outbound && reachable == 0 && 11248 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11249 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11250 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11251 nexthdr, prev_nexthdr_offset); 11252 return; 11253 } 11254 11255 /* 11256 * Allocate an mblk with enough room for the link-layer 11257 * header, the unfragmentable part of the datagram, and the 11258 * fragment header. This (or a copy) will be used as the 11259 * first mblk for each fragment we send. 11260 */ 11261 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 11262 BPRI_HI); 11263 if (hmp == NULL) { 11264 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11265 freemsg(mp); 11266 return; 11267 } 11268 hmp->b_rptr += ip_wroff_extra; 11269 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11270 11271 fip6h = (ip6_t *)hmp->b_rptr; 11272 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11273 11274 bcopy(ip6h, fip6h, unfragmentable_len); 11275 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11276 11277 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11278 11279 fraghdr->ip6f_nxt = nexthdr; 11280 fraghdr->ip6f_reserved = 0; 11281 fraghdr->ip6f_offlg = 0; 11282 fraghdr->ip6f_ident = htonl(ident); 11283 11284 /* 11285 * len is the total length of the fragmentable data in this 11286 * datagram. For each fragment sent, we will decrement len 11287 * by the amount of fragmentable data sent in that fragment 11288 * until len reaches zero. 11289 */ 11290 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11291 11292 /* 11293 * Move read ptr past unfragmentable portion, we don't want this part 11294 * of the data in our fragments. 11295 */ 11296 mp->b_rptr += unfragmentable_len; 11297 11298 while (len != 0) { 11299 mlen = MIN(len, max_chunk); 11300 len -= mlen; 11301 if (len != 0) { 11302 /* Not last */ 11303 hmp0 = copyb(hmp); 11304 if (hmp0 == NULL) { 11305 freeb(hmp); 11306 freemsg(mp); 11307 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11308 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11309 return; 11310 } 11311 off_flags = IP6F_MORE_FRAG; 11312 } else { 11313 /* Last fragment */ 11314 hmp0 = hmp; 11315 hmp = NULL; 11316 off_flags = 0; 11317 } 11318 fip6h = (ip6_t *)(hmp0->b_rptr); 11319 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11320 11321 fip6h->ip6_plen = htons((uint16_t)(mlen + 11322 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11323 /* 11324 * Note: Optimization alert. 11325 * In IPv6 (and IPv4) protocol header, Fragment Offset 11326 * ("offset") is 13 bits wide and in 8-octet units. 11327 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11328 * it occupies the most significant 13 bits. 11329 * (least significant 13 bits in IPv4). 11330 * We do not do any shifts here. Not shifting is same effect 11331 * as taking offset value in octet units, dividing by 8 and 11332 * then shifting 3 bits left to line it up in place in proper 11333 * place protocol header. 11334 */ 11335 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11336 11337 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11338 /* mp has already been freed by ip_carve_mp() */ 11339 if (hmp != NULL) 11340 freeb(hmp); 11341 freeb(hmp0); 11342 ip1dbg(("ip_carve_mp: failed\n")); 11343 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11344 return; 11345 } 11346 hmp0->b_cont = dmp; 11347 /* Get the priority marking, if any */ 11348 hmp0->b_band = dmp->b_band; 11349 UPDATE_OB_PKT_COUNT(ire); 11350 ire->ire_last_used_time = lbolt; 11351 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11352 caller, NULL); 11353 reachable = 0; /* No need to redo state machine in loop */ 11354 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragCreates); 11355 offset += mlen; 11356 } 11357 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11358 } 11359 11360 /* 11361 * Determine if the ill and multicast aspects of that packets 11362 * "matches" the conn. 11363 */ 11364 boolean_t 11365 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11366 zoneid_t zoneid) 11367 { 11368 ill_t *in_ill; 11369 boolean_t wantpacket = B_TRUE; 11370 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11371 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11372 11373 /* 11374 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11375 * unicast and multicast reception to conn_incoming_ill. 11376 * conn_wantpacket_v6 is called both for unicast and 11377 * multicast. 11378 * 11379 * 1) The unicast copy of the packet can come anywhere in 11380 * the ill group if it is part of the group. Thus, we 11381 * need to check to see whether the ill group matches 11382 * if in_ill is part of a group. 11383 * 11384 * 2) ip_rput does not suppress duplicate multicast packets. 11385 * If there are two interfaces in a ill group and we have 11386 * 2 applications (conns) joined a multicast group G on 11387 * both the interfaces, ilm_lookup_ill filter in ip_rput 11388 * will give us two packets because we join G on both the 11389 * interfaces rather than nominating just one interface 11390 * for receiving multicast like broadcast above. So, 11391 * we have to call ilg_lookup_ill to filter out duplicate 11392 * copies, if ill is part of a group, to supress duplicates. 11393 */ 11394 in_ill = connp->conn_incoming_ill; 11395 if (in_ill != NULL) { 11396 mutex_enter(&connp->conn_lock); 11397 in_ill = connp->conn_incoming_ill; 11398 mutex_enter(&ill->ill_lock); 11399 /* 11400 * No IPMP, and the packet did not arrive on conn_incoming_ill 11401 * OR, IPMP in use and the packet arrived on an IPMP group 11402 * different from the conn_incoming_ill's IPMP group. 11403 * Reject the packet. 11404 */ 11405 if ((in_ill->ill_group == NULL && in_ill != ill) || 11406 (in_ill->ill_group != NULL && 11407 in_ill->ill_group != ill->ill_group)) { 11408 wantpacket = B_FALSE; 11409 } 11410 mutex_exit(&ill->ill_lock); 11411 mutex_exit(&connp->conn_lock); 11412 if (!wantpacket) 11413 return (B_FALSE); 11414 } 11415 11416 if (connp->conn_multi_router) 11417 return (B_TRUE); 11418 11419 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11420 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11421 /* 11422 * Unicast case: we match the conn only if it's in the specified 11423 * zone. 11424 */ 11425 return (connp->conn_zoneid == zoneid); 11426 } 11427 11428 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11429 connp->conn_zoneid == zoneid) { 11430 /* 11431 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11432 * disabled, therefore we don't dispatch the multicast packet to 11433 * the sending zone. 11434 */ 11435 return (B_FALSE); 11436 } 11437 11438 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 11439 connp->conn_zoneid != zoneid) { 11440 /* 11441 * Multicast packet on the loopback interface: we only match 11442 * conns who joined the group in the specified zone. 11443 */ 11444 return (B_FALSE); 11445 } 11446 11447 mutex_enter(&connp->conn_lock); 11448 wantpacket = 11449 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11450 mutex_exit(&connp->conn_lock); 11451 11452 return (wantpacket); 11453 } 11454 11455 11456 /* 11457 * Transmit a packet and update any NUD state based on the flags 11458 * XXX need to "recover" any ip6i_t when doing putq! 11459 * 11460 * NOTE : This function does not ire_refrele the ire passed in as the 11461 * argument. 11462 */ 11463 void 11464 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11465 int caller, ipsec_out_t *io) 11466 { 11467 mblk_t *mp1; 11468 nce_t *nce = ire->ire_nce; 11469 ill_t *ill; 11470 uint64_t delta; 11471 ip6_t *ip6h; 11472 queue_t *stq = ire->ire_stq; 11473 ire_t *ire1 = NULL; 11474 ire_t *save_ire = ire; 11475 boolean_t multirt_send = B_FALSE; 11476 mblk_t *next_mp = NULL; 11477 11478 ip6h = (ip6_t *)mp->b_rptr; 11479 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11480 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11481 ASSERT(nce != NULL); 11482 ASSERT(mp->b_datap->db_type == M_DATA); 11483 ASSERT(stq != NULL); 11484 11485 ill = ire_to_ill(ire); 11486 if (!ill) { 11487 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11488 freemsg(mp); 11489 return; 11490 } 11491 11492 /* 11493 * If a packet is to be sent out an interface that is a 6to4 11494 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11495 * destination, must be checked to have a 6to4 prefix 11496 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11497 * address configured on the sending interface. Otherwise, 11498 * the packet was delivered to this interface in error and the 11499 * packet must be dropped. 11500 */ 11501 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11502 ipif_t *ipif = ill->ill_ipif; 11503 11504 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11505 &ip6h->ip6_dst)) { 11506 if (ip_debug > 2) { 11507 /* ip1dbg */ 11508 pr_addr_dbg("ip_xmit_v6: attempting to " 11509 "send 6to4 addressed IPv6 " 11510 "destination (%s) out the wrong " 11511 "interface.\n", AF_INET6, 11512 &ip6h->ip6_dst); 11513 } 11514 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 11515 freemsg(mp); 11516 return; 11517 } 11518 } 11519 11520 /* Flow-control check has been done in ip_wput_ire_v6 */ 11521 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11522 caller == IP_WSRV || canput(stq->q_next)) { 11523 uint32_t ill_index; 11524 11525 /* 11526 * In most cases, the emission loop below is entered only 11527 * once. Only in the case where the ire holds the 11528 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11529 * flagged ires in the bucket, and send the packet 11530 * through all crossed RTF_MULTIRT routes. 11531 */ 11532 if (ire->ire_flags & RTF_MULTIRT) { 11533 /* 11534 * Multirouting case. The bucket where ire is stored 11535 * probably holds other RTF_MULTIRT flagged ires 11536 * to the destination. In this call to ip_xmit_v6, 11537 * we attempt to send the packet through all 11538 * those ires. Thus, we first ensure that ire is the 11539 * first RTF_MULTIRT ire in the bucket, 11540 * before walking the ire list. 11541 */ 11542 ire_t *first_ire; 11543 irb_t *irb = ire->ire_bucket; 11544 ASSERT(irb != NULL); 11545 multirt_send = B_TRUE; 11546 11547 /* Make sure we do not omit any multiroute ire. */ 11548 IRB_REFHOLD(irb); 11549 for (first_ire = irb->irb_ire; 11550 first_ire != NULL; 11551 first_ire = first_ire->ire_next) { 11552 if ((first_ire->ire_flags & RTF_MULTIRT) && 11553 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11554 &ire->ire_addr_v6)) && 11555 !(first_ire->ire_marks & 11556 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 11557 break; 11558 } 11559 11560 if ((first_ire != NULL) && (first_ire != ire)) { 11561 IRE_REFHOLD(first_ire); 11562 /* ire will be released by the caller */ 11563 ire = first_ire; 11564 nce = ire->ire_nce; 11565 stq = ire->ire_stq; 11566 ill = ire_to_ill(ire); 11567 } 11568 IRB_REFRELE(irb); 11569 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11570 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11571 ILL_MDT_USABLE(ill)) { 11572 /* 11573 * This tcp connection was marked as MDT-capable, but 11574 * it has been turned off due changes in the interface. 11575 * Now that the interface support is back, turn it on 11576 * by notifying tcp. We don't directly modify tcp_mdt, 11577 * since we leave all the details to the tcp code that 11578 * knows better. 11579 */ 11580 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11581 11582 if (mdimp == NULL) { 11583 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11584 "connp %p (ENOMEM)\n", (void *)connp)); 11585 } else { 11586 CONN_INC_REF(connp); 11587 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 11588 connp, SQTAG_TCP_INPUT_MCTL); 11589 } 11590 } 11591 11592 do { 11593 boolean_t qos_done = B_FALSE; 11594 11595 if (multirt_send) { 11596 irb_t *irb; 11597 /* 11598 * We are in a multiple send case, need to get 11599 * the next ire and make a duplicate of the 11600 * packet. ire1 holds here the next ire to 11601 * process in the bucket. If multirouting is 11602 * expected, any non-RTF_MULTIRT ire that has 11603 * the right destination address is ignored. 11604 */ 11605 irb = ire->ire_bucket; 11606 ASSERT(irb != NULL); 11607 11608 IRB_REFHOLD(irb); 11609 for (ire1 = ire->ire_next; 11610 ire1 != NULL; 11611 ire1 = ire1->ire_next) { 11612 if (!(ire1->ire_flags & RTF_MULTIRT)) 11613 continue; 11614 if (!IN6_ARE_ADDR_EQUAL( 11615 &ire1->ire_addr_v6, 11616 &ire->ire_addr_v6)) 11617 continue; 11618 if (ire1->ire_marks & 11619 (IRE_MARK_CONDEMNED| 11620 IRE_MARK_HIDDEN)) 11621 continue; 11622 11623 /* Got one */ 11624 if (ire1 != save_ire) { 11625 IRE_REFHOLD(ire1); 11626 } 11627 break; 11628 } 11629 IRB_REFRELE(irb); 11630 11631 if (ire1 != NULL) { 11632 next_mp = copyb(mp); 11633 if ((next_mp == NULL) || 11634 ((mp->b_cont != NULL) && 11635 ((next_mp->b_cont = 11636 dupmsg(mp->b_cont)) == 11637 NULL))) { 11638 freemsg(next_mp); 11639 next_mp = NULL; 11640 ire_refrele(ire1); 11641 ire1 = NULL; 11642 } 11643 } 11644 11645 /* Last multiroute ire; don't loop anymore. */ 11646 if (ire1 == NULL) { 11647 multirt_send = B_FALSE; 11648 } 11649 } 11650 11651 ill_index = 11652 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11653 11654 /* 11655 * Check for fastpath, we need to hold nce_lock to 11656 * prevent fastpath update from chaining nce_fp_mp. 11657 */ 11658 mutex_enter(&nce->nce_lock); 11659 if ((mp1 = nce->nce_fp_mp) != NULL) { 11660 uint32_t hlen; 11661 uchar_t *rptr; 11662 11663 /* Initiate IPPF processing */ 11664 if (IP6_OUT_IPP(flags)) { 11665 /* 11666 * We have to release the nce lock since 11667 * IPPF components use 11668 * ill_lookup_on_ifindex(), 11669 * which takes the ill_g_lock and the 11670 * ill_lock locks. 11671 */ 11672 mutex_exit(&nce->nce_lock); 11673 ip_process(IPP_LOCAL_OUT, &mp, 11674 ill_index); 11675 if (mp == NULL) { 11676 BUMP_MIB( 11677 ill->ill_ip6_mib, 11678 ipv6OutDiscards); 11679 if (next_mp != NULL) 11680 freemsg(next_mp); 11681 if (ire != save_ire) { 11682 ire_refrele(ire); 11683 } 11684 return; 11685 } 11686 mutex_enter(&nce->nce_lock); 11687 if ((mp1 = nce->nce_fp_mp) == NULL) { 11688 /* 11689 * Probably disappeared during 11690 * IPQoS processing. 11691 */ 11692 qos_done = B_TRUE; 11693 goto prepend_unitdata; 11694 } 11695 } 11696 hlen = MBLKL(mp1); 11697 rptr = mp->b_rptr - hlen; 11698 /* 11699 * make sure there is room for the fastpath 11700 * datalink header 11701 */ 11702 if (rptr < mp->b_datap->db_base) { 11703 mp1 = copyb(mp1); 11704 if (mp1 == NULL) { 11705 mutex_exit(&nce->nce_lock); 11706 BUMP_MIB(ill->ill_ip6_mib, 11707 ipv6OutDiscards); 11708 freemsg(mp); 11709 if (next_mp != NULL) 11710 freemsg(next_mp); 11711 if (ire != save_ire) { 11712 ire_refrele(ire); 11713 } 11714 return; 11715 } 11716 mp1->b_cont = mp; 11717 11718 /* Get the priority marking, if any */ 11719 mp1->b_band = mp->b_band; 11720 mp = mp1; 11721 } else { 11722 mp->b_rptr = rptr; 11723 /* 11724 * fastpath - pre-pend datalink 11725 * header 11726 */ 11727 bcopy(mp1->b_rptr, rptr, hlen); 11728 } 11729 11730 mutex_exit(&nce->nce_lock); 11731 11732 } else { 11733 prepend_unitdata: 11734 mutex_exit(&nce->nce_lock); 11735 mp1 = nce->nce_res_mp; 11736 if (mp1 == NULL) { 11737 ip1dbg(("ip_xmit_v6: No resolution " 11738 "block ire = %p\n", (void *)ire)); 11739 freemsg(mp); 11740 if (next_mp != NULL) 11741 freemsg(next_mp); 11742 if (ire != save_ire) { 11743 ire_refrele(ire); 11744 } 11745 return; 11746 } 11747 /* 11748 * Prepend the DL_UNITDATA_REQ. 11749 */ 11750 mp1 = copyb(mp1); 11751 if (mp1 == NULL) { 11752 BUMP_MIB(ill->ill_ip6_mib, 11753 ipv6OutDiscards); 11754 freemsg(mp); 11755 if (next_mp != NULL) 11756 freemsg(next_mp); 11757 if (ire != save_ire) { 11758 ire_refrele(ire); 11759 } 11760 return; 11761 } 11762 mp1->b_cont = mp; 11763 mp = mp1; 11764 /* 11765 * Initiate IPPF processing, if it is 11766 * already done, bypass. 11767 */ 11768 if (!qos_done && IP6_OUT_IPP(flags)) { 11769 ip_process(IPP_LOCAL_OUT, &mp, 11770 ill_index); 11771 if (mp == NULL) { 11772 BUMP_MIB(ill->ill_ip6_mib, 11773 ipv6OutDiscards); 11774 if (next_mp != NULL) 11775 freemsg(next_mp); 11776 if (ire != save_ire) { 11777 ire_refrele(ire); 11778 } 11779 return; 11780 } 11781 } 11782 } 11783 11784 /* 11785 * Update ire counters; for save_ire, this has been 11786 * done by the caller. 11787 */ 11788 if (ire != save_ire) { 11789 UPDATE_OB_PKT_COUNT(ire); 11790 ire->ire_last_used_time = lbolt; 11791 } 11792 11793 /* 11794 * Send it down. XXX Do we want to flow control AH/ESP 11795 * packets that carry TCP payloads? We don't flow 11796 * control TCP packets, but we should also not 11797 * flow-control TCP packets that have been protected. 11798 * We don't have an easy way to find out if an AH/ESP 11799 * packet was originally TCP or not currently. 11800 */ 11801 if (io == NULL) { 11802 putnext(stq, mp); 11803 } else { 11804 /* 11805 * Safety Pup says: make sure this is 11806 * going to the right interface! 11807 */ 11808 if (io->ipsec_out_capab_ill_index != 11809 ill_index) { 11810 /* IPsec kstats: bump lose counter */ 11811 freemsg(mp1); 11812 } else { 11813 ipsec_hw_putnext(stq, mp); 11814 } 11815 } 11816 11817 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 11818 if (ire != save_ire) { 11819 ire_refrele(ire); 11820 } 11821 if (multirt_send) { 11822 ASSERT(ire1 != NULL); 11823 /* 11824 * Proceed with the next RTF_MULTIRT 11825 * ire, also set up the send-to queue 11826 * accordingly. 11827 */ 11828 ire = ire1; 11829 ire1 = NULL; 11830 stq = ire->ire_stq; 11831 nce = ire->ire_nce; 11832 ill = ire_to_ill(ire); 11833 mp = next_mp; 11834 next_mp = NULL; 11835 continue; 11836 } 11837 ASSERT(next_mp == NULL); 11838 ASSERT(ire1 == NULL); 11839 return; 11840 } 11841 11842 ASSERT(nce->nce_state != ND_INCOMPLETE); 11843 11844 /* 11845 * Check for upper layer advice 11846 */ 11847 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 11848 /* 11849 * It should be o.k. to check the state without 11850 * a lock here, at most we lose an advice. 11851 */ 11852 nce->nce_last = TICK_TO_MSEC(lbolt64); 11853 if (nce->nce_state != ND_REACHABLE) { 11854 11855 mutex_enter(&nce->nce_lock); 11856 nce->nce_state = ND_REACHABLE; 11857 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 11858 mutex_exit(&nce->nce_lock); 11859 (void) untimeout(nce->nce_timeout_id); 11860 if (ip_debug > 2) { 11861 /* ip1dbg */ 11862 pr_addr_dbg("ip_xmit_v6: state" 11863 " for %s changed to" 11864 " REACHABLE\n", AF_INET6, 11865 &ire->ire_addr_v6); 11866 } 11867 } 11868 if (ire != save_ire) { 11869 ire_refrele(ire); 11870 } 11871 if (multirt_send) { 11872 ASSERT(ire1 != NULL); 11873 /* 11874 * Proceed with the next RTF_MULTIRT 11875 * ire, also set up the send-to queue 11876 * accordingly. 11877 */ 11878 ire = ire1; 11879 ire1 = NULL; 11880 stq = ire->ire_stq; 11881 nce = ire->ire_nce; 11882 ill = ire_to_ill(ire); 11883 mp = next_mp; 11884 next_mp = NULL; 11885 continue; 11886 } 11887 ASSERT(next_mp == NULL); 11888 ASSERT(ire1 == NULL); 11889 return; 11890 } 11891 11892 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 11893 ip1dbg(("ip_xmit_v6: delta = %" PRId64 11894 " ill_reachable_time = %d \n", delta, 11895 ill->ill_reachable_time)); 11896 if (delta > (uint64_t)ill->ill_reachable_time) { 11897 nce = ire->ire_nce; 11898 mutex_enter(&nce->nce_lock); 11899 switch (nce->nce_state) { 11900 case ND_REACHABLE: 11901 case ND_STALE: 11902 /* 11903 * ND_REACHABLE is identical to 11904 * ND_STALE in this specific case. If 11905 * reachable time has expired for this 11906 * neighbor (delta is greater than 11907 * reachable time), conceptually, the 11908 * neighbor cache is no longer in 11909 * REACHABLE state, but already in 11910 * STALE state. So the correct 11911 * transition here is to ND_DELAY. 11912 */ 11913 nce->nce_state = ND_DELAY; 11914 mutex_exit(&nce->nce_lock); 11915 NDP_RESTART_TIMER(nce, 11916 delay_first_probe_time); 11917 if (ip_debug > 3) { 11918 /* ip2dbg */ 11919 pr_addr_dbg("ip_xmit_v6: state" 11920 " for %s changed to" 11921 " DELAY\n", AF_INET6, 11922 &ire->ire_addr_v6); 11923 } 11924 break; 11925 case ND_DELAY: 11926 case ND_PROBE: 11927 mutex_exit(&nce->nce_lock); 11928 /* Timers have already started */ 11929 break; 11930 case ND_UNREACHABLE: 11931 /* 11932 * ndp timer has detected that this nce 11933 * is unreachable and initiated deleting 11934 * this nce and all its associated IREs. 11935 * This is a race where we found the 11936 * ire before it was deleted and have 11937 * just sent out a packet using this 11938 * unreachable nce. 11939 */ 11940 mutex_exit(&nce->nce_lock); 11941 break; 11942 default: 11943 ASSERT(0); 11944 } 11945 } 11946 11947 if (multirt_send) { 11948 ASSERT(ire1 != NULL); 11949 /* 11950 * Proceed with the next RTF_MULTIRT ire, 11951 * Also set up the send-to queue accordingly. 11952 */ 11953 if (ire != save_ire) { 11954 ire_refrele(ire); 11955 } 11956 ire = ire1; 11957 ire1 = NULL; 11958 stq = ire->ire_stq; 11959 nce = ire->ire_nce; 11960 ill = ire_to_ill(ire); 11961 mp = next_mp; 11962 next_mp = NULL; 11963 } 11964 } while (multirt_send); 11965 /* 11966 * In the multirouting case, release the last ire used for 11967 * emission. save_ire will be released by the caller. 11968 */ 11969 if (ire != save_ire) { 11970 ire_refrele(ire); 11971 } 11972 } else { 11973 /* 11974 * Queue packet if we have an conn to give back pressure. 11975 * We can't queue packets intended for hardware acceleration 11976 * since we've tossed that state already. If the packet is 11977 * being fed back from ire_send_v6, we don't know the 11978 * position in the queue to enqueue the packet and we discard 11979 * the packet. 11980 */ 11981 if (ip_output_queue && (connp != NULL) && (io == NULL) && 11982 (caller != IRE_SEND)) { 11983 if (caller == IP_WSRV) { 11984 connp->conn_did_putbq = 1; 11985 (void) putbq(connp->conn_wq, mp); 11986 conn_drain_insert(connp); 11987 /* 11988 * caller == IP_WSRV implies we are 11989 * the service thread, and the 11990 * queue is already noenabled. 11991 * The check for canput and 11992 * the putbq is not atomic. 11993 * So we need to check again. 11994 */ 11995 if (canput(stq->q_next)) 11996 connp->conn_did_putbq = 0; 11997 } else { 11998 (void) putq(connp->conn_wq, mp); 11999 } 12000 return; 12001 } 12002 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 12003 freemsg(mp); 12004 return; 12005 } 12006 } 12007 12008 /* 12009 * pr_addr_dbg function provides the needed buffer space to call 12010 * inet_ntop() function's 3rd argument. This function should be 12011 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12012 * stack buffer space in it's own stack frame. This function uses 12013 * a buffer from it's own stack and prints the information. 12014 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12015 * 12016 * Note: This function can call inet_ntop() once. 12017 */ 12018 void 12019 pr_addr_dbg(char *fmt1, int af, const void *addr) 12020 { 12021 char buf[INET6_ADDRSTRLEN]; 12022 12023 if (fmt1 == NULL) { 12024 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12025 return; 12026 } 12027 12028 /* 12029 * This does not compare debug level and just prints 12030 * out. Thus it is the responsibility of the caller 12031 * to check the appropriate debug-level before calling 12032 * this function. 12033 */ 12034 if (ip_debug > 0) { 12035 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12036 } 12037 12038 12039 } 12040 12041 12042 /* 12043 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12044 * if needed and extension headers) that will be needed based on the 12045 * ip6_pkt_t structure passed by the caller. 12046 * 12047 * The returned length does not include the length of the upper level 12048 * protocol (ULP) header. 12049 */ 12050 int 12051 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12052 { 12053 int len; 12054 12055 len = IPV6_HDR_LEN; 12056 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12057 len += sizeof (ip6i_t); 12058 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12059 ASSERT(ipp->ipp_hopoptslen != 0); 12060 len += ipp->ipp_hopoptslen; 12061 } 12062 if (ipp->ipp_fields & IPPF_RTHDR) { 12063 ASSERT(ipp->ipp_rthdrlen != 0); 12064 len += ipp->ipp_rthdrlen; 12065 } 12066 /* 12067 * En-route destination options 12068 * Only do them if there's a routing header as well 12069 */ 12070 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12071 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12072 ASSERT(ipp->ipp_rtdstoptslen != 0); 12073 len += ipp->ipp_rtdstoptslen; 12074 } 12075 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12076 ASSERT(ipp->ipp_dstoptslen != 0); 12077 len += ipp->ipp_dstoptslen; 12078 } 12079 return (len); 12080 } 12081 12082 /* 12083 * All-purpose routine to build a header chain of an IPv6 header 12084 * followed by any required extension headers and a proto header, 12085 * preceeded (where necessary) by an ip6i_t private header. 12086 * 12087 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12088 * will be filled in appropriately. 12089 * Thus the caller must fill in the rest of the IPv6 header, such as 12090 * traffic class/flowid, source address (if not set here), hoplimit (if not 12091 * set here) and destination address. 12092 * 12093 * The extension headers and ip6i_t header will all be fully filled in. 12094 */ 12095 void 12096 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12097 ip6_pkt_t *ipp, uint8_t protocol) 12098 { 12099 uint8_t *nxthdr_ptr; 12100 uint8_t *cp; 12101 ip6i_t *ip6i; 12102 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12103 12104 /* 12105 * If sending private ip6i_t header down (checksum info, nexthop, 12106 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12107 * then fill it in. (The checksum info will be filled in by icmp). 12108 */ 12109 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12110 ip6i = (ip6i_t *)ip6h; 12111 ip6h = (ip6_t *)&ip6i[1]; 12112 12113 ip6i->ip6i_flags = 0; 12114 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12115 if (ipp->ipp_fields & IPPF_IFINDEX || 12116 ipp->ipp_fields & IPPF_SCOPE_ID) { 12117 ASSERT(ipp->ipp_ifindex != 0); 12118 ip6i->ip6i_flags |= IP6I_IFINDEX; 12119 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12120 } 12121 if (ipp->ipp_fields & IPPF_ADDR) { 12122 /* 12123 * Enable per-packet source address verification if 12124 * IPV6_PKTINFO specified the source address. 12125 * ip6_src is set in the transport's _wput function. 12126 */ 12127 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12128 &ipp->ipp_addr)); 12129 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12130 } 12131 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12132 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12133 /* 12134 * We need to set this flag so that IP doesn't 12135 * rewrite the IPv6 header's hoplimit with the 12136 * current default value. 12137 */ 12138 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12139 } 12140 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12141 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12142 &ipp->ipp_nexthop)); 12143 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12144 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12145 } 12146 /* 12147 * tell IP this is an ip6i_t private header 12148 */ 12149 ip6i->ip6i_nxt = IPPROTO_RAW; 12150 } 12151 /* Initialize IPv6 header */ 12152 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12153 if (ipp->ipp_fields & IPPF_TCLASS) { 12154 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12155 (ipp->ipp_tclass << 20); 12156 } 12157 if (ipp->ipp_fields & IPPF_ADDR) 12158 ip6h->ip6_src = ipp->ipp_addr; 12159 12160 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12161 cp = (uint8_t *)&ip6h[1]; 12162 /* 12163 * Here's where we have to start stringing together 12164 * any extension headers in the right order: 12165 * Hop-by-hop, destination, routing, and final destination opts. 12166 */ 12167 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12168 /* Hop-by-hop options */ 12169 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12170 12171 *nxthdr_ptr = IPPROTO_HOPOPTS; 12172 nxthdr_ptr = &hbh->ip6h_nxt; 12173 12174 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12175 cp += ipp->ipp_hopoptslen; 12176 } 12177 /* 12178 * En-route destination options 12179 * Only do them if there's a routing header as well 12180 */ 12181 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12182 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12183 ip6_dest_t *dst = (ip6_dest_t *)cp; 12184 12185 *nxthdr_ptr = IPPROTO_DSTOPTS; 12186 nxthdr_ptr = &dst->ip6d_nxt; 12187 12188 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12189 cp += ipp->ipp_rtdstoptslen; 12190 } 12191 /* 12192 * Routing header next 12193 */ 12194 if (ipp->ipp_fields & IPPF_RTHDR) { 12195 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12196 12197 *nxthdr_ptr = IPPROTO_ROUTING; 12198 nxthdr_ptr = &rt->ip6r_nxt; 12199 12200 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12201 cp += ipp->ipp_rthdrlen; 12202 } 12203 /* 12204 * Do ultimate destination options 12205 */ 12206 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12207 ip6_dest_t *dest = (ip6_dest_t *)cp; 12208 12209 *nxthdr_ptr = IPPROTO_DSTOPTS; 12210 nxthdr_ptr = &dest->ip6d_nxt; 12211 12212 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12213 cp += ipp->ipp_dstoptslen; 12214 } 12215 /* 12216 * Now set the last header pointer to the proto passed in 12217 */ 12218 *nxthdr_ptr = protocol; 12219 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12220 } 12221 12222 /* 12223 * Return a pointer to the routing header extension header 12224 * in the IPv6 header(s) chain passed in. 12225 * If none found, return NULL 12226 * Assumes that all extension headers are in same mblk as the v6 header 12227 */ 12228 ip6_rthdr_t * 12229 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12230 { 12231 ip6_dest_t *desthdr; 12232 ip6_frag_t *fraghdr; 12233 uint_t hdrlen; 12234 uint8_t nexthdr; 12235 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12236 12237 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12238 return ((ip6_rthdr_t *)ptr); 12239 12240 /* 12241 * The routing header will precede all extension headers 12242 * other than the hop-by-hop and destination options 12243 * extension headers, so if we see anything other than those, 12244 * we're done and didn't find it. 12245 * We could see a destination options header alone but no 12246 * routing header, in which case we'll return NULL as soon as 12247 * we see anything after that. 12248 * Hop-by-hop and destination option headers are identical, 12249 * so we can use either one we want as a template. 12250 */ 12251 nexthdr = ip6h->ip6_nxt; 12252 while (ptr < endptr) { 12253 /* Is there enough left for len + nexthdr? */ 12254 if (ptr + MIN_EHDR_LEN > endptr) 12255 return (NULL); 12256 12257 switch (nexthdr) { 12258 case IPPROTO_HOPOPTS: 12259 case IPPROTO_DSTOPTS: 12260 /* Assumes the headers are identical for hbh and dst */ 12261 desthdr = (ip6_dest_t *)ptr; 12262 hdrlen = 8 * (desthdr->ip6d_len + 1); 12263 nexthdr = desthdr->ip6d_nxt; 12264 break; 12265 12266 case IPPROTO_ROUTING: 12267 return ((ip6_rthdr_t *)ptr); 12268 12269 case IPPROTO_FRAGMENT: 12270 fraghdr = (ip6_frag_t *)ptr; 12271 hdrlen = sizeof (ip6_frag_t); 12272 nexthdr = fraghdr->ip6f_nxt; 12273 break; 12274 12275 default: 12276 return (NULL); 12277 } 12278 ptr += hdrlen; 12279 } 12280 return (NULL); 12281 } 12282 12283 /* 12284 * Called for source-routed packets originating on this node. 12285 * Manipulates the original routing header by moving every entry up 12286 * one slot, placing the first entry in the v6 header's v6_dst field, 12287 * and placing the ultimate destination in the routing header's last 12288 * slot. 12289 * 12290 * Returns the checksum diference between the ultimate destination 12291 * (last hop in the routing header when the packet is sent) and 12292 * the first hop (ip6_dst when the packet is sent) 12293 */ 12294 uint32_t 12295 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 12296 { 12297 uint_t numaddr; 12298 uint_t i; 12299 in6_addr_t *addrptr; 12300 in6_addr_t tmp; 12301 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12302 uint32_t cksm; 12303 uint32_t addrsum = 0; 12304 uint16_t *ptr; 12305 12306 /* 12307 * Perform any processing needed for source routing. 12308 * We know that all extension headers will be in the same mblk 12309 * as the IPv6 header. 12310 */ 12311 12312 /* 12313 * If no segments left in header, or the header length field is zero, 12314 * don't move hop addresses around; 12315 * Checksum difference is zero. 12316 */ 12317 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12318 return (0); 12319 12320 ptr = (uint16_t *)&ip6h->ip6_dst; 12321 cksm = 0; 12322 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12323 cksm += ptr[i]; 12324 } 12325 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12326 12327 /* 12328 * Here's where the fun begins - we have to 12329 * move all addresses up one spot, take the 12330 * first hop and make it our first ip6_dst, 12331 * and place the ultimate destination in the 12332 * newly-opened last slot. 12333 */ 12334 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12335 numaddr = rthdr->ip6r0_len / 2; 12336 tmp = *addrptr; 12337 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12338 *addrptr = addrptr[1]; 12339 } 12340 *addrptr = ip6h->ip6_dst; 12341 ip6h->ip6_dst = tmp; 12342 12343 /* 12344 * From the checksummed ultimate destination subtract the checksummed 12345 * current ip6_dst (the first hop address). Return that number. 12346 * (In the v4 case, the second part of this is done in each routine 12347 * that calls ip_massage_options(). We do it all in this one place 12348 * for v6). 12349 */ 12350 ptr = (uint16_t *)&ip6h->ip6_dst; 12351 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12352 addrsum += ptr[i]; 12353 } 12354 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12355 if ((int)cksm < 0) 12356 cksm--; 12357 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12358 12359 return (cksm); 12360 } 12361 12362 /* 12363 * See if the upper-level protocol indicated by 'proto' will be able 12364 * to do something with an ICMP_FRAGMENTATION_NEEDED (IPv4) or 12365 * ICMP6_PACKET_TOO_BIG (IPv6). 12366 */ 12367 static boolean_t 12368 ip_ulp_cando_pkt2big(int proto) 12369 { 12370 /* 12371 * For now, only TCP can handle this. 12372 * Tunnels may be able to also, but since tun isn't working over 12373 * IPv6 yet, don't worry about it for now. 12374 */ 12375 return (proto == IPPROTO_TCP); 12376 } 12377 12378 12379 /* 12380 * Propagate a multicast group membership operation (join/leave) (*fn) on 12381 * all interfaces crossed by the related multirt routes. 12382 * The call is considered successful if the operation succeeds 12383 * on at least one interface. 12384 * The function is called if the destination address in the packet to send 12385 * is multirouted. 12386 */ 12387 int 12388 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12389 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12390 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12391 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12392 { 12393 ire_t *ire_gw; 12394 irb_t *irb; 12395 int index, error = 0; 12396 opt_restart_t *or; 12397 12398 irb = ire->ire_bucket; 12399 ASSERT(irb != NULL); 12400 12401 ASSERT(DB_TYPE(first_mp) == M_CTL); 12402 or = (opt_restart_t *)first_mp->b_rptr; 12403 12404 IRB_REFHOLD(irb); 12405 for (; ire != NULL; ire = ire->ire_next) { 12406 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12407 continue; 12408 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12409 continue; 12410 12411 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12412 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, 12413 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 12414 /* No resolver exists for the gateway; skip this ire. */ 12415 if (ire_gw == NULL) 12416 continue; 12417 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12418 /* 12419 * A resolver exists: we can get the interface on which we have 12420 * to apply the operation. 12421 */ 12422 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12423 first_mp); 12424 if (error == 0) 12425 or->or_private = CGTP_MCAST_SUCCESS; 12426 12427 if (ip_debug > 0) { 12428 ulong_t off; 12429 char *ksym; 12430 12431 ksym = kobj_getsymname((uintptr_t)fn, &off); 12432 ip2dbg(("ip_multirt_apply_membership_v6: " 12433 "called %s, multirt group 0x%08x via itf 0x%08x, " 12434 "error %d [success %u]\n", 12435 ksym ? ksym : "?", 12436 ntohl(V4_PART_OF_V6((*v6grp))), 12437 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12438 error, or->or_private)); 12439 } 12440 12441 ire_refrele(ire_gw); 12442 if (error == EINPROGRESS) { 12443 IRB_REFRELE(irb); 12444 return (error); 12445 } 12446 } 12447 IRB_REFRELE(irb); 12448 /* 12449 * Consider the call as successful if we succeeded on at least 12450 * one interface. Otherwise, return the last encountered error. 12451 */ 12452 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12453 } 12454 12455 void 12456 ip6_kstat_init(void) 12457 { 12458 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 12459 "net", KSTAT_TYPE_NAMED, 12460 sizeof (ip6_statistics) / sizeof (kstat_named_t), 12461 KSTAT_FLAG_VIRTUAL)) != NULL) { 12462 ip6_kstat->ks_data = &ip6_statistics; 12463 kstat_install(ip6_kstat); 12464 } 12465 } 12466 12467 /* 12468 * The following two functions set and get the value for the 12469 * IPV6_SRC_PREFERENCES socket option. 12470 */ 12471 int 12472 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12473 { 12474 /* 12475 * We only support preferences that are covered by 12476 * IPV6_PREFER_SRC_MASK. 12477 */ 12478 if (prefs & ~IPV6_PREFER_SRC_MASK) 12479 return (EINVAL); 12480 12481 /* 12482 * Look for conflicting preferences or default preferences. If 12483 * both bits of a related pair are clear, the application wants the 12484 * system's default value for that pair. Both bits in a pair can't 12485 * be set. 12486 */ 12487 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12488 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12489 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12490 IPV6_PREFER_SRC_MIPMASK) { 12491 return (EINVAL); 12492 } 12493 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12494 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12495 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12496 IPV6_PREFER_SRC_TMPMASK) { 12497 return (EINVAL); 12498 } 12499 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12500 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12501 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12502 IPV6_PREFER_SRC_CGAMASK) { 12503 return (EINVAL); 12504 } 12505 12506 connp->conn_src_preferences = prefs; 12507 return (0); 12508 } 12509 12510 size_t 12511 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12512 { 12513 *val = connp->conn_src_preferences; 12514 return (sizeof (connp->conn_src_preferences)); 12515 } 12516 12517 int 12518 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 12519 { 12520 ill_t *ill; 12521 ire_t *ire; 12522 int error; 12523 12524 /* 12525 * Verify the source address and ifindex. Privileged users can use 12526 * any source address. For ancillary data the source address is 12527 * checked in ip_wput_v6. 12528 */ 12529 if (pkti->ipi6_ifindex != 0) { 12530 ASSERT(connp != NULL); 12531 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 12532 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 12533 if (ill == NULL) { 12534 /* 12535 * We just want to know if the interface exists, we 12536 * don't really care about the ill pointer itself. 12537 */ 12538 if (error != EINPROGRESS) 12539 return (error); 12540 error = 0; /* Ensure we don't use it below */ 12541 } else { 12542 ill_refrele(ill); 12543 } 12544 } 12545 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12546 secpolicy_net_rawaccess(cr) != 0) { 12547 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12548 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12549 connp->conn_zoneid, MATCH_IRE_TYPE); 12550 if (ire != NULL) 12551 ire_refrele(ire); 12552 else 12553 return (ENXIO); 12554 } 12555 return (0); 12556 } 12557 12558 /* 12559 * Get the size of the IP options (including the IP headers size) 12560 * without including the AH header's size. If till_ah is B_FALSE, 12561 * and if AH header is present, dest options beyond AH header will 12562 * also be included in the returned size. 12563 */ 12564 int 12565 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12566 { 12567 ip6_t *ip6h; 12568 uint8_t nexthdr; 12569 uint8_t *whereptr; 12570 ip6_hbh_t *hbhhdr; 12571 ip6_dest_t *dsthdr; 12572 ip6_rthdr_t *rthdr; 12573 int ehdrlen; 12574 int size; 12575 ah_t *ah; 12576 12577 ip6h = (ip6_t *)mp->b_rptr; 12578 size = IPV6_HDR_LEN; 12579 nexthdr = ip6h->ip6_nxt; 12580 whereptr = (uint8_t *)&ip6h[1]; 12581 for (;;) { 12582 /* Assume IP has already stripped it */ 12583 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12584 switch (nexthdr) { 12585 case IPPROTO_HOPOPTS: 12586 hbhhdr = (ip6_hbh_t *)whereptr; 12587 nexthdr = hbhhdr->ip6h_nxt; 12588 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12589 break; 12590 case IPPROTO_DSTOPTS: 12591 dsthdr = (ip6_dest_t *)whereptr; 12592 nexthdr = dsthdr->ip6d_nxt; 12593 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12594 break; 12595 case IPPROTO_ROUTING: 12596 rthdr = (ip6_rthdr_t *)whereptr; 12597 nexthdr = rthdr->ip6r_nxt; 12598 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12599 break; 12600 default : 12601 if (till_ah) { 12602 ASSERT(nexthdr == IPPROTO_AH); 12603 return (size); 12604 } 12605 /* 12606 * If we don't have a AH header to traverse, 12607 * return now. This happens normally for 12608 * outbound datagrams where we have not inserted 12609 * the AH header. 12610 */ 12611 if (nexthdr != IPPROTO_AH) { 12612 return (size); 12613 } 12614 12615 /* 12616 * We don't include the AH header's size 12617 * to be symmetrical with other cases where 12618 * we either don't have a AH header (outbound) 12619 * or peek into the AH header yet (inbound and 12620 * not pulled up yet). 12621 */ 12622 ah = (ah_t *)whereptr; 12623 nexthdr = ah->ah_nexthdr; 12624 ehdrlen = (ah->ah_length << 2) + 8; 12625 12626 if (nexthdr == IPPROTO_DSTOPTS) { 12627 if (whereptr + ehdrlen >= mp->b_wptr) { 12628 /* 12629 * The destination options header 12630 * is not part of the first mblk. 12631 */ 12632 whereptr = mp->b_cont->b_rptr; 12633 } else { 12634 whereptr += ehdrlen; 12635 } 12636 12637 dsthdr = (ip6_dest_t *)whereptr; 12638 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12639 size += ehdrlen; 12640 } 12641 return (size); 12642 } 12643 whereptr += ehdrlen; 12644 size += ehdrlen; 12645 } 12646 } 12647