1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/kobj.h> 46 #include <sys/zone.h> 47 48 #include <sys/kmem.h> 49 #include <sys/systm.h> 50 #include <sys/param.h> 51 #include <sys/socket.h> 52 #include <sys/vtrace.h> 53 #include <sys/isa_defs.h> 54 #include <sys/atomic.h> 55 #include <sys/iphada.h> 56 #include <sys/policy.h> 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 #include <net/if_dl.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <netinet/ip6.h> 64 #include <netinet/icmp6.h> 65 #include <netinet/sctp.h> 66 67 #include <inet/common.h> 68 #include <inet/mi.h> 69 #include <inet/mib2.h> 70 #include <inet/nd.h> 71 #include <inet/arp.h> 72 73 #include <inet/ip.h> 74 #include <inet/ip_impl.h> 75 #include <inet/ip6.h> 76 #include <inet/ip6_asp.h> 77 #include <inet/tcp.h> 78 #include <inet/tcp_impl.h> 79 #include <inet/udp_impl.h> 80 #include <inet/ipp_common.h> 81 82 #include <inet/ip_multi.h> 83 #include <inet/ip_if.h> 84 #include <inet/ip_ire.h> 85 #include <inet/ip_rts.h> 86 #include <inet/optcom.h> 87 #include <inet/ip_ndp.h> 88 #include <net/pfkeyv2.h> 89 #include <inet/ipsec_info.h> 90 #include <inet/sadb.h> 91 #include <inet/ipsec_impl.h> 92 #include <inet/tun.h> 93 #include <inet/sctp_ip.h> 94 #include <sys/pattr.h> 95 #include <inet/ipclassifier.h> 96 #include <inet/ipsecah.h> 97 #include <inet/udp_impl.h> 98 #include <sys/squeue.h> 99 100 #include <sys/tsol/label.h> 101 #include <sys/tsol/tnet.h> 102 103 #include <rpc/pmap_prot.h> 104 105 extern squeue_func_t ip_input_proc; 106 107 /* 108 * IP statistics. 109 */ 110 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 111 #define IP6_STAT_UPDATE(x, n) (ip6_statistics.x.value.ui64 += (n)) 112 113 typedef struct ip6_stat { 114 kstat_named_t ip6_udp_fast_path; 115 kstat_named_t ip6_udp_slow_path; 116 kstat_named_t ip6_udp_fannorm; 117 kstat_named_t ip6_udp_fanmb; 118 kstat_named_t ip6_out_sw_cksum; 119 kstat_named_t ip6_in_sw_cksum; 120 kstat_named_t ip6_tcp_in_full_hw_cksum_err; 121 kstat_named_t ip6_tcp_in_part_hw_cksum_err; 122 kstat_named_t ip6_tcp_in_sw_cksum_err; 123 kstat_named_t ip6_tcp_out_sw_cksum_bytes; 124 kstat_named_t ip6_udp_in_full_hw_cksum_err; 125 kstat_named_t ip6_udp_in_part_hw_cksum_err; 126 kstat_named_t ip6_udp_in_sw_cksum_err; 127 kstat_named_t ip6_udp_out_sw_cksum_bytes; 128 kstat_named_t ip6_frag_mdt_pkt_out; 129 kstat_named_t ip6_frag_mdt_discarded; 130 kstat_named_t ip6_frag_mdt_allocfail; 131 kstat_named_t ip6_frag_mdt_addpdescfail; 132 kstat_named_t ip6_frag_mdt_allocd; 133 } ip6_stat_t; 134 135 static ip6_stat_t ip6_statistics = { 136 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 137 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 138 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 139 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 140 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 141 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 142 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 143 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 144 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 145 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 146 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 147 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 148 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 149 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 150 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 151 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 152 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 153 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 154 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 155 }; 156 157 static kstat_t *ip6_kstat; 158 159 /* 160 * Naming conventions: 161 * These rules should be judiciously applied 162 * if there is a need to identify something as IPv6 versus IPv4 163 * IPv6 funcions will end with _v6 in the ip module. 164 * IPv6 funcions will end with _ipv6 in the transport modules. 165 * IPv6 macros: 166 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 167 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 168 * And then there are ..V4_PART_OF_V6. 169 * The intent is that macros in the ip module end with _V6. 170 * IPv6 global variables will start with ipv6_ 171 * IPv6 structures will start with ipv6 172 * IPv6 defined constants should start with IPV6_ 173 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 174 */ 175 176 /* 177 * IPv6 mibs when the interface (ill) is not known. 178 * When the ill is known the per-interface mib in the ill is used. 179 */ 180 mib2_ipv6IfStatsEntry_t ip6_mib; 181 mib2_ipv6IfIcmpEntry_t icmp6_mib; 182 183 /* 184 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 185 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 186 * from IANA. This mechanism will remain in effect until an official 187 * number is obtained. 188 */ 189 uchar_t ip6opt_ls; 190 191 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 192 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 193 194 const in6_addr_t ipv6_all_ones = 195 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 196 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 197 198 #ifdef _BIG_ENDIAN 199 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 200 #else /* _BIG_ENDIAN */ 201 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 202 #endif /* _BIG_ENDIAN */ 203 204 #ifdef _BIG_ENDIAN 205 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 206 #else /* _BIG_ENDIAN */ 207 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 208 #endif /* _BIG_ENDIAN */ 209 210 #ifdef _BIG_ENDIAN 211 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 212 #else /* _BIG_ENDIAN */ 213 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 214 #endif /* _BIG_ENDIAN */ 215 216 #ifdef _BIG_ENDIAN 217 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 218 #else /* _BIG_ENDIAN */ 219 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 220 #endif /* _BIG_ENDIAN */ 221 222 #ifdef _BIG_ENDIAN 223 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 224 #else /* _BIG_ENDIAN */ 225 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 226 #endif /* _BIG_ENDIAN */ 227 228 #ifdef _BIG_ENDIAN 229 const in6_addr_t ipv6_solicited_node_mcast = 230 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 231 #else /* _BIG_ENDIAN */ 232 const in6_addr_t ipv6_solicited_node_mcast = 233 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 234 #endif /* _BIG_ENDIAN */ 235 236 /* 237 * Used by icmp_send_redirect_v6 for picking random src. 238 */ 239 uint_t icmp_redirect_v6_src_index; 240 241 /* Leave room for ip_newroute to tack on the src and target addresses */ 242 #define OK_RESOLVER_MP_V6(mp) \ 243 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 244 245 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 246 boolean_t, zoneid_t); 247 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 248 const in6_addr_t *, boolean_t); 249 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 250 static boolean_t icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp); 251 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 252 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 253 boolean_t, boolean_t, boolean_t, boolean_t); 254 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 255 iulp_t *); 256 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 257 uint16_t, boolean_t, boolean_t, boolean_t); 258 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 259 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 260 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 261 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 262 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 263 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 264 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 265 uint8_t *, uint_t, uint8_t); 266 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 267 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 268 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 269 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 270 conn_t *, int, int, int); 271 static boolean_t ip_ulp_cando_pkt2big(int); 272 273 static void ip_rput_v6(queue_t *, mblk_t *); 274 static void ip_wput_v6(queue_t *, mblk_t *); 275 276 /* 277 * A template for an IPv6 AR_ENTRY_QUERY 278 */ 279 static areq_t ipv6_areq_template = { 280 AR_ENTRY_QUERY, /* cmd */ 281 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 282 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 283 IP6_DL_SAP, /* protocol, from arps perspective */ 284 sizeof (areq_t), /* target addr offset */ 285 IPV6_ADDR_LEN, /* target addr_length */ 286 0, /* flags */ 287 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 288 IPV6_ADDR_LEN, /* sender addr length */ 289 6, /* xmit_count */ 290 1000, /* (re)xmit_interval in milliseconds */ 291 4 /* max # of requests to buffer */ 292 /* anything else filled in by the code */ 293 }; 294 295 struct qinit rinit_ipv6 = { 296 (pfi_t)ip_rput_v6, 297 NULL, 298 ip_open, 299 ip_close, 300 NULL, 301 &ip_mod_info 302 }; 303 304 struct qinit winit_ipv6 = { 305 (pfi_t)ip_wput_v6, 306 (pfi_t)ip_wsrv, 307 ip_open, 308 ip_close, 309 NULL, 310 &ip_mod_info 311 }; 312 313 /* 314 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 315 * The message has already been checksummed and if needed, 316 * a copy has been made to be sent any interested ICMP client (conn) 317 * Note that this is different than icmp_inbound() which does the fanout 318 * to conn's as well as local processing of the ICMP packets. 319 * 320 * All error messages are passed to the matching transport stream. 321 * 322 * Zones notes: 323 * The packet is only processed in the context of the specified zone: typically 324 * only this zone will reply to an echo request. This means that the caller must 325 * call icmp_inbound_v6() for each relevant zone. 326 */ 327 static void 328 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 329 boolean_t mctl_present, uint_t flags, zoneid_t zoneid) 330 { 331 icmp6_t *icmp6; 332 ip6_t *ip6h; 333 boolean_t interested; 334 ip6i_t *ip6i; 335 in6_addr_t origsrc; 336 ire_t *ire; 337 mblk_t *first_mp; 338 ipsec_in_t *ii; 339 340 ASSERT(ill != NULL); 341 first_mp = mp; 342 if (mctl_present) { 343 mp = first_mp->b_cont; 344 ASSERT(mp != NULL); 345 346 ii = (ipsec_in_t *)first_mp->b_rptr; 347 ASSERT(ii->ipsec_in_type == IPSEC_IN); 348 } 349 350 ip6h = (ip6_t *)mp->b_rptr; 351 352 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 353 354 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 355 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 356 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 357 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 358 freemsg(first_mp); 359 return; 360 } 361 ip6h = (ip6_t *)mp->b_rptr; 362 } 363 if (icmp_accept_clear_messages == 0) { 364 first_mp = ipsec_check_global_policy(first_mp, NULL, 365 NULL, ip6h, mctl_present); 366 if (first_mp == NULL) 367 return; 368 } 369 370 /* 371 * On a labeled system, we have to check whether the zone itself is 372 * permitted to receive raw traffic. 373 */ 374 if (is_system_labeled()) { 375 if (zoneid == ALL_ZONES) 376 zoneid = tsol_packet_to_zoneid(mp); 377 if (!tsol_can_accept_raw(mp, B_FALSE)) { 378 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 379 zoneid)); 380 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 381 freemsg(first_mp); 382 return; 383 } 384 } 385 386 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 387 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 388 icmp6->icmp6_code)); 389 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 390 391 /* Initiate IPPF processing here */ 392 if (IP6_IN_IPP(flags)) { 393 394 /* 395 * If the ifindex changes due to SIOCSLIFINDEX 396 * packet may return to IP on the wrong ill. 397 */ 398 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 399 if (mp == NULL) { 400 if (mctl_present) { 401 freeb(first_mp); 402 } 403 return; 404 } 405 } 406 407 switch (icmp6->icmp6_type) { 408 case ICMP6_DST_UNREACH: 409 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 410 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 411 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 412 break; 413 414 case ICMP6_TIME_EXCEEDED: 415 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 416 break; 417 418 case ICMP6_PARAM_PROB: 419 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 420 break; 421 422 case ICMP6_PACKET_TOO_BIG: 423 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 424 zoneid); 425 return; 426 case ICMP6_ECHO_REQUEST: 427 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 428 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 429 !ipv6_resp_echo_mcast) 430 break; 431 432 /* 433 * We must have exclusive use of the mblk to convert it to 434 * a response. 435 * If not, we copy it. 436 */ 437 if (mp->b_datap->db_ref > 1) { 438 mblk_t *mp1; 439 440 mp1 = copymsg(mp); 441 freemsg(mp); 442 if (mp1 == NULL) { 443 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 444 if (mctl_present) 445 freeb(first_mp); 446 return; 447 } 448 mp = mp1; 449 ip6h = (ip6_t *)mp->b_rptr; 450 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 451 if (mctl_present) 452 first_mp->b_cont = mp; 453 else 454 first_mp = mp; 455 } 456 457 /* 458 * Turn the echo into an echo reply. 459 * Remove any extension headers (do not reverse a source route) 460 * and clear the flow id (keep traffic class for now). 461 */ 462 if (hdr_length != IPV6_HDR_LEN) { 463 int i; 464 465 for (i = 0; i < IPV6_HDR_LEN; i++) 466 mp->b_rptr[hdr_length - i - 1] = 467 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 468 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 469 ip6h = (ip6_t *)mp->b_rptr; 470 ip6h->ip6_nxt = IPPROTO_ICMPV6; 471 hdr_length = IPV6_HDR_LEN; 472 } 473 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 474 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 475 476 ip6h->ip6_plen = 477 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 478 origsrc = ip6h->ip6_src; 479 /* 480 * Reverse the source and destination addresses. 481 * If the return address is a multicast, zero out the source 482 * (ip_wput_v6 will set an address). 483 */ 484 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 485 ip6h->ip6_src = ipv6_all_zeros; 486 ip6h->ip6_dst = origsrc; 487 } else { 488 ip6h->ip6_src = ip6h->ip6_dst; 489 ip6h->ip6_dst = origsrc; 490 } 491 492 /* set the hop limit */ 493 ip6h->ip6_hops = ipv6_def_hops; 494 495 /* 496 * Prepare for checksum by putting icmp length in the icmp 497 * checksum field. The checksum is calculated in ip_wput_v6. 498 */ 499 icmp6->icmp6_cksum = ip6h->ip6_plen; 500 /* 501 * ICMP echo replies should go out on the same interface 502 * the request came on as probes used by in.mpathd for 503 * detecting NIC failures are ECHO packets. We turn-off load 504 * spreading by allocating a ip6i and setting ip6i_attach_if 505 * to B_TRUE which is handled both by ip_wput_v6 and 506 * ip_newroute_v6. If we don't turnoff load spreading, 507 * the packets might get dropped if there are no 508 * non-FAILED/INACTIVE interfaces for it to go out on and 509 * in.mpathd would wrongly detect a failure or mis-detect 510 * a NIC failure as a link failure. As load spreading can 511 * happen only if ill_group is not NULL, we do only for 512 * that case and this does not affect the normal case. 513 * 514 * We force this only on echo packets that came from on-link 515 * hosts. We restrict this to link-local addresses which 516 * is used by in.mpathd for probing. In the IPv6 case, 517 * default routes typically have an ire_ipif pointer and 518 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 519 * might work. As a default route out of this interface 520 * may not be present, enforcing this packet to go out in 521 * this case may not work. 522 */ 523 if (ill->ill_group != NULL && 524 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 525 /* 526 * If we are sending replies to ourselves, don't 527 * set ATTACH_IF as we may not be able to find 528 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 529 * causes ip_wput_v6 to look for an IRE_LOCAL on 530 * "ill" which it may not find and will try to 531 * create an IRE_CACHE for our local address. Once 532 * we do this, we will try to forward all packets 533 * meant to our LOCAL address. 534 */ 535 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 536 NULL); 537 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 538 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 539 if (mp == NULL) { 540 BUMP_MIB(ill->ill_icmp6_mib, 541 ipv6IfIcmpInErrors); 542 if (ire != NULL) 543 ire_refrele(ire); 544 if (mctl_present) 545 freeb(first_mp); 546 return; 547 } else if (mctl_present) { 548 first_mp->b_cont = mp; 549 } else { 550 first_mp = mp; 551 } 552 ip6i = (ip6i_t *)mp->b_rptr; 553 ip6i->ip6i_flags = IP6I_ATTACH_IF; 554 ip6i->ip6i_ifindex = 555 ill->ill_phyint->phyint_ifindex; 556 } 557 if (ire != NULL) 558 ire_refrele(ire); 559 } 560 561 if (!mctl_present) { 562 /* 563 * This packet should go out the same way as it 564 * came in i.e in clear. To make sure that global 565 * policy will not be applied to this in ip_wput, 566 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 567 */ 568 ASSERT(first_mp == mp); 569 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 570 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 571 freemsg(mp); 572 return; 573 } 574 ii = (ipsec_in_t *)first_mp->b_rptr; 575 576 /* This is not a secure packet */ 577 ii->ipsec_in_secure = B_FALSE; 578 first_mp->b_cont = mp; 579 } 580 ii->ipsec_in_zoneid = zoneid; 581 ASSERT(zoneid != ALL_ZONES); 582 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 583 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 584 return; 585 } 586 put(WR(q), first_mp); 587 return; 588 589 case ICMP6_ECHO_REPLY: 590 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 591 break; 592 593 case ND_ROUTER_SOLICIT: 594 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 595 break; 596 597 case ND_ROUTER_ADVERT: 598 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 599 break; 600 601 case ND_NEIGHBOR_SOLICIT: 602 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 603 if (mctl_present) 604 freeb(first_mp); 605 /* XXX may wish to pass first_mp up to ndp_input someday. */ 606 ndp_input(ill, mp); 607 return; 608 609 case ND_NEIGHBOR_ADVERT: 610 BUMP_MIB(ill->ill_icmp6_mib, 611 ipv6IfIcmpInNeighborAdvertisements); 612 if (mctl_present) 613 freeb(first_mp); 614 /* XXX may wish to pass first_mp up to ndp_input someday. */ 615 ndp_input(ill, mp); 616 return; 617 618 case ND_REDIRECT: { 619 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 620 621 if (ipv6_ignore_redirect) 622 break; 623 624 /* 625 * As there is no upper client to deliver, we don't 626 * need the first_mp any more. 627 */ 628 if (mctl_present) 629 freeb(first_mp); 630 if (!pullupmsg(mp, -1) || 631 !icmp_redirect_ok_v6(ill, mp)) { 632 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 633 break; 634 } 635 icmp_redirect_v6(q, mp, ill); 636 return; 637 } 638 639 /* 640 * The next three icmp messages will be handled by MLD. 641 * Pass all valid MLD packets up to any process(es) 642 * listening on a raw ICMP socket. MLD messages are 643 * freed by mld_input function. 644 */ 645 case MLD_LISTENER_QUERY: 646 case MLD_LISTENER_REPORT: 647 case MLD_LISTENER_REDUCTION: 648 if (mctl_present) 649 freeb(first_mp); 650 mld_input(q, mp, ill); 651 return; 652 default: 653 break; 654 } 655 if (interested) { 656 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 657 mctl_present, zoneid); 658 } else { 659 freemsg(first_mp); 660 } 661 } 662 663 /* 664 * Process received IPv6 ICMP Packet too big. 665 * After updating any IRE it does the fanout to any matching transport streams. 666 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 667 */ 668 /* ARGSUSED */ 669 static void 670 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 671 boolean_t mctl_present, zoneid_t zoneid) 672 { 673 ip6_t *ip6h; 674 ip6_t *inner_ip6h; 675 icmp6_t *icmp6; 676 uint16_t hdr_length; 677 uint32_t mtu; 678 ire_t *ire, *first_ire; 679 mblk_t *first_mp; 680 681 first_mp = mp; 682 if (mctl_present) 683 mp = first_mp->b_cont; 684 /* 685 * We must have exclusive use of the mblk to update the MTU 686 * in the packet. 687 * If not, we copy it. 688 * 689 * If there's an M_CTL present, we know that allocated first_mp 690 * earlier in this function, so we know first_mp has refcnt of one. 691 */ 692 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 693 if (mp->b_datap->db_ref > 1) { 694 mblk_t *mp1; 695 696 mp1 = copymsg(mp); 697 freemsg(mp); 698 if (mp1 == NULL) { 699 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 700 if (mctl_present) 701 freeb(first_mp); 702 return; 703 } 704 mp = mp1; 705 if (mctl_present) 706 first_mp->b_cont = mp; 707 else 708 first_mp = mp; 709 } 710 ip6h = (ip6_t *)mp->b_rptr; 711 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 712 hdr_length = ip_hdr_length_v6(mp, ip6h); 713 else 714 hdr_length = IPV6_HDR_LEN; 715 716 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 717 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 718 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 719 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 720 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 721 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 722 freemsg(first_mp); 723 return; 724 } 725 ip6h = (ip6_t *)mp->b_rptr; 726 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 727 inner_ip6h = (ip6_t *)&icmp6[1]; 728 } 729 730 /* 731 * For link local destinations matching simply on IRE type is not 732 * sufficient. Same link local addresses for different ILL's is 733 * possible. 734 */ 735 736 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 737 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 738 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 739 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 740 741 if (first_ire == NULL) { 742 if (ip_debug > 2) { 743 /* ip1dbg */ 744 pr_addr_dbg("icmp_inbound_too_big_v6:" 745 "no ire for dst %s\n", AF_INET6, 746 &inner_ip6h->ip6_dst); 747 } 748 freemsg(first_mp); 749 return; 750 } 751 752 mtu = ntohl(icmp6->icmp6_mtu); 753 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 754 for (ire = first_ire; ire != NULL && 755 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 756 ire = ire->ire_next) { 757 mutex_enter(&ire->ire_lock); 758 if (mtu < IPV6_MIN_MTU) { 759 ip1dbg(("Received mtu less than IPv6 " 760 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 761 mtu = IPV6_MIN_MTU; 762 /* 763 * If an mtu less than IPv6 min mtu is received, 764 * we must include a fragment header in 765 * subsequent packets. 766 */ 767 ire->ire_frag_flag |= IPH_FRAG_HDR; 768 } 769 ip1dbg(("Received mtu from router: %d\n", mtu)); 770 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 771 /* Record the new max frag size for the ULP. */ 772 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 773 /* 774 * If we need a fragment header in every packet 775 * (above case or multirouting), make sure the 776 * ULP takes it into account when computing the 777 * payload size. 778 */ 779 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 780 sizeof (ip6_frag_t)); 781 } else { 782 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 783 } 784 mutex_exit(&ire->ire_lock); 785 } 786 rw_exit(&first_ire->ire_bucket->irb_lock); 787 ire_refrele(first_ire); 788 } else { 789 irb_t *irb = NULL; 790 /* 791 * for non-link local destinations we match only on the IRE type 792 */ 793 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 794 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE); 795 if (ire == NULL) { 796 if (ip_debug > 2) { 797 /* ip1dbg */ 798 pr_addr_dbg("icmp_inbound_too_big_v6:" 799 "no ire for dst %s\n", 800 AF_INET6, &inner_ip6h->ip6_dst); 801 } 802 freemsg(first_mp); 803 return; 804 } 805 irb = ire->ire_bucket; 806 ire_refrele(ire); 807 rw_enter(&irb->irb_lock, RW_READER); 808 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 809 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 810 &inner_ip6h->ip6_dst)) { 811 mtu = ntohl(icmp6->icmp6_mtu); 812 mutex_enter(&ire->ire_lock); 813 if (mtu < IPV6_MIN_MTU) { 814 ip1dbg(("Received mtu less than IPv6" 815 "min mtu %d: %d\n", 816 IPV6_MIN_MTU, mtu)); 817 mtu = IPV6_MIN_MTU; 818 /* 819 * If an mtu less than IPv6 min mtu is 820 * received, we must include a fragment 821 * header in subsequent packets. 822 */ 823 ire->ire_frag_flag |= IPH_FRAG_HDR; 824 } 825 826 ip1dbg(("Received mtu from router: %d\n", mtu)); 827 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 828 /* Record the new max frag size for the ULP. */ 829 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 830 /* 831 * If we need a fragment header in 832 * every packet (above case or 833 * multirouting), make sure the ULP 834 * takes it into account when computing 835 * the payload size. 836 */ 837 icmp6->icmp6_mtu = 838 htonl(ire->ire_max_frag - 839 sizeof (ip6_frag_t)); 840 } else { 841 icmp6->icmp6_mtu = 842 htonl(ire->ire_max_frag); 843 } 844 mutex_exit(&ire->ire_lock); 845 } 846 } 847 rw_exit(&irb->irb_lock); 848 } 849 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 850 mctl_present, zoneid); 851 } 852 853 static void 854 pkt_too_big(conn_t *connp, void *arg) 855 { 856 mblk_t *mp; 857 858 if (!connp->conn_ipv6_recvpathmtu) 859 return; 860 861 /* create message and drop it on this connections read queue */ 862 if ((mp = dupb((mblk_t *)arg)) == NULL) { 863 return; 864 } 865 mp->b_datap->db_type = M_CTL; 866 867 putnext(connp->conn_rq, mp); 868 } 869 870 /* 871 * Fanout received ICMPv6 error packets to the transports. 872 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 873 */ 874 void 875 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 876 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 877 { 878 uint16_t *up; /* Pointer to ports in ULP header */ 879 uint32_t ports; /* reversed ports for fanout */ 880 ip6_t rip6h; /* With reversed addresses */ 881 uint16_t hdr_length; 882 uint8_t *nexthdrp; 883 uint8_t nexthdr; 884 mblk_t *first_mp; 885 ipsec_in_t *ii; 886 tcpha_t *tcpha; 887 conn_t *connp; 888 889 first_mp = mp; 890 if (mctl_present) { 891 mp = first_mp->b_cont; 892 ASSERT(mp != NULL); 893 894 ii = (ipsec_in_t *)first_mp->b_rptr; 895 ASSERT(ii->ipsec_in_type == IPSEC_IN); 896 } else { 897 ii = NULL; 898 } 899 900 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 901 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 902 903 /* 904 * Need to pullup everything in order to use 905 * ip_hdr_length_nexthdr_v6() 906 */ 907 if (mp->b_cont != NULL) { 908 if (!pullupmsg(mp, -1)) { 909 ip1dbg(("icmp_inbound_error_fanout_v6: " 910 "pullupmsg failed\n")); 911 goto drop_pkt; 912 } 913 ip6h = (ip6_t *)mp->b_rptr; 914 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 915 } 916 917 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 918 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 919 goto drop_pkt; 920 921 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 922 goto drop_pkt; 923 nexthdr = *nexthdrp; 924 925 /* Set message type, must be done after pullups */ 926 mp->b_datap->db_type = M_CTL; 927 928 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 929 /* 930 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 931 * sockets. 932 * 933 * Note I don't like walking every connection to deliver 934 * this information to a set of listeners. A separate 935 * list could be kept to keep the cost of this down. 936 */ 937 ipcl_walk(pkt_too_big, (void *)mp); 938 } 939 940 /* Try to pass the ICMP message to clients who need it */ 941 switch (nexthdr) { 942 case IPPROTO_UDP: { 943 /* 944 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 945 * UDP header to get the port information. 946 */ 947 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 948 mp->b_wptr) { 949 break; 950 } 951 /* 952 * Attempt to find a client stream based on port. 953 * Note that we do a reverse lookup since the header is 954 * in the form we sent it out. 955 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 956 * and we only set the src and dst addresses and nexthdr. 957 */ 958 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 959 rip6h.ip6_src = ip6h->ip6_dst; 960 rip6h.ip6_dst = ip6h->ip6_src; 961 rip6h.ip6_nxt = nexthdr; 962 ((uint16_t *)&ports)[0] = up[1]; 963 ((uint16_t *)&ports)[1] = up[0]; 964 965 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 966 IP6_NO_IPPOLICY, mctl_present, zoneid); 967 return; 968 } 969 case IPPROTO_TCP: { 970 /* 971 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 972 * the TCP header to get the port information. 973 */ 974 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 975 mp->b_wptr) { 976 break; 977 } 978 979 /* 980 * Attempt to find a client stream based on port. 981 * Note that we do a reverse lookup since the header is 982 * in the form we sent it out. 983 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 984 * we only set the src and dst addresses and nexthdr. 985 */ 986 987 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 988 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 989 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 990 if (connp == NULL) { 991 goto drop_pkt; 992 } 993 994 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 995 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 996 return; 997 998 } 999 case IPPROTO_SCTP: 1000 /* 1001 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 1002 * the SCTP header to get the port information. 1003 */ 1004 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 1005 mp->b_wptr) { 1006 break; 1007 } 1008 1009 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 1010 ((uint16_t *)&ports)[0] = up[1]; 1011 ((uint16_t *)&ports)[1] = up[0]; 1012 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 1013 IP6_NO_IPPOLICY, 0, zoneid); 1014 return; 1015 case IPPROTO_ESP: 1016 case IPPROTO_AH: { 1017 int ipsec_rc; 1018 1019 /* 1020 * We need a IPSEC_IN in the front to fanout to AH/ESP. 1021 * We will re-use the IPSEC_IN if it is already present as 1022 * AH/ESP will not affect any fields in the IPSEC_IN for 1023 * ICMP errors. If there is no IPSEC_IN, allocate a new 1024 * one and attach it in the front. 1025 */ 1026 if (ii != NULL) { 1027 /* 1028 * ip_fanout_proto_again converts the ICMP errors 1029 * that come back from AH/ESP to M_DATA so that 1030 * if it is non-AH/ESP and we do a pullupmsg in 1031 * this function, it would work. Convert it back 1032 * to M_CTL before we send up as this is a ICMP 1033 * error. This could have been generated locally or 1034 * by some router. Validate the inner IPSEC 1035 * headers. 1036 * 1037 * NOTE : ill_index is used by ip_fanout_proto_again 1038 * to locate the ill. 1039 */ 1040 ASSERT(ill != NULL); 1041 ii->ipsec_in_ill_index = 1042 ill->ill_phyint->phyint_ifindex; 1043 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1044 first_mp->b_cont->b_datap->db_type = M_CTL; 1045 } else { 1046 /* 1047 * IPSEC_IN is not present. We attach a ipsec_in 1048 * message and send up to IPSEC for validating 1049 * and removing the IPSEC headers. Clear 1050 * ipsec_in_secure so that when we return 1051 * from IPSEC, we don't mistakenly think that this 1052 * is a secure packet came from the network. 1053 * 1054 * NOTE : ill_index is used by ip_fanout_proto_again 1055 * to locate the ill. 1056 */ 1057 ASSERT(first_mp == mp); 1058 first_mp = ipsec_in_alloc(B_FALSE); 1059 if (first_mp == NULL) { 1060 freemsg(mp); 1061 BUMP_MIB(&ip_mib, ipInDiscards); 1062 return; 1063 } 1064 ii = (ipsec_in_t *)first_mp->b_rptr; 1065 1066 /* This is not a secure packet */ 1067 ii->ipsec_in_secure = B_FALSE; 1068 first_mp->b_cont = mp; 1069 mp->b_datap->db_type = M_CTL; 1070 ASSERT(ill != NULL); 1071 ii->ipsec_in_ill_index = 1072 ill->ill_phyint->phyint_ifindex; 1073 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1074 } 1075 1076 if (!ipsec_loaded()) { 1077 ip_proto_not_sup(q, first_mp, 0, zoneid); 1078 return; 1079 } 1080 1081 if (nexthdr == IPPROTO_ESP) 1082 ipsec_rc = ipsecesp_icmp_error(first_mp); 1083 else 1084 ipsec_rc = ipsecah_icmp_error(first_mp); 1085 if (ipsec_rc == IPSEC_STATUS_FAILED) 1086 return; 1087 1088 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1089 return; 1090 } 1091 case IPPROTO_ENCAP: 1092 case IPPROTO_IPV6: 1093 if ((uint8_t *)ip6h + hdr_length + 1094 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1095 sizeof (ip6_t)) > mp->b_wptr) 1096 goto drop_pkt; 1097 1098 if (nexthdr == IPPROTO_ENCAP || 1099 !IN6_ARE_ADDR_EQUAL( 1100 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1101 &ip6h->ip6_src) || 1102 !IN6_ARE_ADDR_EQUAL( 1103 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1104 &ip6h->ip6_dst)) { 1105 /* 1106 * For tunnels that have used IPsec protection, 1107 * we need to adjust the MTU to take into account 1108 * the IPsec overhead. 1109 */ 1110 if (ii != NULL) 1111 icmp6->icmp6_mtu = htons( 1112 ntohs(icmp6->icmp6_mtu) - 1113 ipsec_in_extra_length(first_mp)); 1114 } else { 1115 /* 1116 * Self-encapsulated case. As in the ipv4 case, 1117 * we need to strip the 2nd IP header. Since mp 1118 * is already pulled-up, we can simply bcopy 1119 * the 3rd header + data over the 2nd header. 1120 */ 1121 uint16_t unused_len; 1122 ip6_t *inner_ip6h = (ip6_t *) 1123 ((uchar_t *)ip6h + hdr_length); 1124 1125 /* 1126 * Make sure we don't do recursion more than once. 1127 */ 1128 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1129 &unused_len, &nexthdrp) || 1130 *nexthdrp == IPPROTO_IPV6) { 1131 goto drop_pkt; 1132 } 1133 1134 /* 1135 * We are about to modify the packet. Make a copy if 1136 * someone else has a reference to it. 1137 */ 1138 if (DB_REF(mp) > 1) { 1139 mblk_t *mp1; 1140 uint16_t icmp6_offset; 1141 1142 mp1 = copymsg(mp); 1143 if (mp1 == NULL) { 1144 goto drop_pkt; 1145 } 1146 icmp6_offset = (uint16_t) 1147 ((uchar_t *)icmp6 - mp->b_rptr); 1148 freemsg(mp); 1149 mp = mp1; 1150 1151 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1152 ip6h = (ip6_t *)&icmp6[1]; 1153 inner_ip6h = (ip6_t *) 1154 ((uchar_t *)ip6h + hdr_length); 1155 1156 if (mctl_present) 1157 first_mp->b_cont = mp; 1158 else 1159 first_mp = mp; 1160 } 1161 1162 /* 1163 * Need to set db_type back to M_DATA before 1164 * refeeding mp into this function. 1165 */ 1166 DB_TYPE(mp) = M_DATA; 1167 1168 /* 1169 * Copy the 3rd header + remaining data on top 1170 * of the 2nd header. 1171 */ 1172 bcopy(inner_ip6h, ip6h, 1173 mp->b_wptr - (uchar_t *)inner_ip6h); 1174 1175 /* 1176 * Subtract length of the 2nd header. 1177 */ 1178 mp->b_wptr -= hdr_length; 1179 1180 /* 1181 * Now recurse, and see what I _really_ should be 1182 * doing here. 1183 */ 1184 icmp_inbound_error_fanout_v6(q, first_mp, 1185 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1186 zoneid); 1187 return; 1188 } 1189 /* FALLTHRU */ 1190 default: 1191 /* 1192 * The rip6h header is only used for the lookup and we 1193 * only set the src and dst addresses and nexthdr. 1194 */ 1195 rip6h.ip6_src = ip6h->ip6_dst; 1196 rip6h.ip6_dst = ip6h->ip6_src; 1197 rip6h.ip6_nxt = nexthdr; 1198 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1199 IP6_NO_IPPOLICY, mctl_present, zoneid); 1200 return; 1201 } 1202 /* NOTREACHED */ 1203 drop_pkt: 1204 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1205 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1206 freemsg(first_mp); 1207 } 1208 1209 /* 1210 * Validate the incoming redirect message, if valid redirect 1211 * processing is done later. This is separated from the actual 1212 * redirect processing to avoid becoming single threaded when not 1213 * necessary. (i.e invalid packet) 1214 * Assumes that any AH or ESP headers have already been removed. 1215 * The mp has already been pulled up. 1216 */ 1217 boolean_t 1218 icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp) 1219 { 1220 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1221 nd_redirect_t *rd; 1222 ire_t *ire; 1223 uint16_t len; 1224 uint16_t hdr_length; 1225 1226 ASSERT(mp->b_cont == NULL); 1227 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1228 hdr_length = ip_hdr_length_v6(mp, ip6h); 1229 else 1230 hdr_length = IPV6_HDR_LEN; 1231 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1232 len = mp->b_wptr - mp->b_rptr - hdr_length; 1233 if (!IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1234 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1235 (rd->nd_rd_code != 0) || 1236 (len < sizeof (nd_redirect_t)) || 1237 (IN6_IS_ADDR_V4MAPPED(&rd->nd_rd_dst)) || 1238 (IN6_IS_ADDR_MULTICAST(&rd->nd_rd_dst))) { 1239 return (B_FALSE); 1240 } 1241 if (!(IN6_IS_ADDR_LINKLOCAL(&rd->nd_rd_target) || 1242 IN6_ARE_ADDR_EQUAL(&rd->nd_rd_target, &rd->nd_rd_dst))) { 1243 return (B_FALSE); 1244 } 1245 1246 /* 1247 * Verify that the IP source address of the redirect is 1248 * the same as the current first-hop router for the specified 1249 * ICMP destination address. Just to be cautious, this test 1250 * will be done again before we add the redirect, in case 1251 * router goes away between now and then. 1252 */ 1253 ire = ire_route_lookup_v6(&rd->nd_rd_dst, 0, 1254 &ip6h->ip6_src, 0, ill->ill_ipif, NULL, ALL_ZONES, NULL, 1255 MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1256 if (ire == NULL) 1257 return (B_FALSE); 1258 ire_refrele(ire); 1259 if (len > sizeof (nd_redirect_t)) { 1260 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1261 len - sizeof (nd_redirect_t))) 1262 return (B_FALSE); 1263 } 1264 return (B_TRUE); 1265 } 1266 1267 /* 1268 * Process received IPv6 ICMP Redirect messages. 1269 * Assumes that the icmp packet has already been verfied to be 1270 * valid, aligned and in a single mblk all done in icmp_redirect_ok_v6(). 1271 */ 1272 /* ARGSUSED */ 1273 static void 1274 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1275 { 1276 ip6_t *ip6h; 1277 uint16_t hdr_length; 1278 nd_redirect_t *rd; 1279 ire_t *ire; 1280 ire_t *prev_ire; 1281 ire_t *redir_ire; 1282 in6_addr_t *src, *dst, *gateway; 1283 nd_opt_hdr_t *opt; 1284 nce_t *nce; 1285 int nce_flags = 0; 1286 int err = 0; 1287 boolean_t redirect_to_router = B_FALSE; 1288 int len; 1289 iulp_t ulp_info = { 0 }; 1290 ill_t *prev_ire_ill; 1291 ipif_t *ipif; 1292 1293 ip6h = (ip6_t *)mp->b_rptr; 1294 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1295 hdr_length = ip_hdr_length_v6(mp, ip6h); 1296 else 1297 hdr_length = IPV6_HDR_LEN; 1298 1299 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1300 src = &ip6h->ip6_src; 1301 dst = &rd->nd_rd_dst; 1302 gateway = &rd->nd_rd_target; 1303 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1304 redirect_to_router = B_TRUE; 1305 nce_flags |= NCE_F_ISROUTER; 1306 } 1307 /* 1308 * Make sure we had a route for the dest in question and that 1309 * route was pointing to the old gateway (the source of the 1310 * redirect packet.) 1311 */ 1312 ipif = ipif_get_next_ipif(NULL, ill); 1313 if (ipif == NULL) { 1314 freemsg(mp); 1315 return; 1316 } 1317 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1318 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1319 ipif_refrele(ipif); 1320 /* 1321 * Check that 1322 * the redirect was not from ourselves 1323 * old gateway is still directly reachable 1324 */ 1325 if (prev_ire == NULL || 1326 prev_ire->ire_type == IRE_LOCAL) { 1327 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1328 goto fail_redirect; 1329 } 1330 prev_ire_ill = ire_to_ill(prev_ire); 1331 ASSERT(prev_ire_ill != NULL); 1332 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1333 nce_flags |= NCE_F_NONUD; 1334 1335 /* 1336 * Should we use the old ULP info to create the new gateway? From 1337 * a user's perspective, we should inherit the info so that it 1338 * is a "smooth" transition. If we do not do that, then new 1339 * connections going thru the new gateway will have no route metrics, 1340 * which is counter-intuitive to user. From a network point of 1341 * view, this may or may not make sense even though the new gateway 1342 * is still directly connected to us so the route metrics should not 1343 * change much. 1344 * 1345 * But if the old ire_uinfo is not initialized, we do another 1346 * recursive lookup on the dest using the new gateway. There may 1347 * be a route to that. If so, use it to initialize the redirect 1348 * route. 1349 */ 1350 if (prev_ire->ire_uinfo.iulp_set) { 1351 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1352 } else if (redirect_to_router) { 1353 /* 1354 * Only do the following if the redirection is really to 1355 * a router. 1356 */ 1357 ire_t *tmp_ire; 1358 ire_t *sire; 1359 1360 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1361 ALL_ZONES, 0, NULL, 1362 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1363 if (sire != NULL) { 1364 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1365 ASSERT(tmp_ire != NULL); 1366 ire_refrele(tmp_ire); 1367 ire_refrele(sire); 1368 } else if (tmp_ire != NULL) { 1369 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1370 sizeof (iulp_t)); 1371 ire_refrele(tmp_ire); 1372 } 1373 } 1374 1375 len = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1376 opt = (nd_opt_hdr_t *)&rd[1]; 1377 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1378 if (opt != NULL) { 1379 err = ndp_lookup_then_add(ill, 1380 (uchar_t *)&opt[1], /* Link layer address */ 1381 gateway, 1382 &ipv6_all_ones, /* prefix mask */ 1383 &ipv6_all_zeros, /* Mapping mask */ 1384 0, 1385 nce_flags, 1386 ND_STALE, 1387 &nce, 1388 NULL, 1389 NULL); 1390 switch (err) { 1391 case 0: 1392 NCE_REFRELE(nce); 1393 break; 1394 case EEXIST: 1395 /* 1396 * Check to see if link layer address has changed and 1397 * process the nce_state accordingly. 1398 */ 1399 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1400 NCE_REFRELE(nce); 1401 break; 1402 default: 1403 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1404 err)); 1405 goto fail_redirect; 1406 } 1407 } 1408 if (redirect_to_router) { 1409 /* icmp_redirect_ok_v6() must have already verified this */ 1410 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1411 1412 /* 1413 * Create a Route Association. This will allow us to remember 1414 * a router told us to use the particular gateway. 1415 */ 1416 ire = ire_create_v6( 1417 dst, 1418 &ipv6_all_ones, /* mask */ 1419 &prev_ire->ire_src_addr_v6, /* source addr */ 1420 gateway, /* gateway addr */ 1421 &prev_ire->ire_max_frag, /* max frag */ 1422 NULL, /* Fast Path header */ 1423 NULL, /* no rfq */ 1424 NULL, /* no stq */ 1425 IRE_HOST_REDIRECT, 1426 NULL, 1427 prev_ire->ire_ipif, 1428 NULL, 1429 0, 1430 0, 1431 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1432 &ulp_info, 1433 NULL, 1434 NULL); 1435 } else { 1436 /* 1437 * Just create an on link entry, may or may not be a router 1438 * If there is no link layer address option ire_add() won't 1439 * add this. 1440 */ 1441 ire = ire_create_v6( 1442 dst, /* gateway == dst */ 1443 &ipv6_all_ones, /* mask */ 1444 &prev_ire->ire_src_addr_v6, /* source addr */ 1445 &ipv6_all_zeros, /* gateway addr */ 1446 &prev_ire->ire_max_frag, /* max frag */ 1447 NULL, /* Fast Path header */ 1448 prev_ire->ire_rfq, /* ire rfq */ 1449 prev_ire->ire_stq, /* ire stq */ 1450 IRE_CACHE, 1451 NULL, 1452 prev_ire->ire_ipif, 1453 &ipv6_all_ones, 1454 0, 1455 0, 1456 0, 1457 &ulp_info, 1458 NULL, 1459 NULL); 1460 } 1461 if (ire == NULL) 1462 goto fail_redirect; 1463 1464 /* 1465 * XXX If there is no nce i.e there is no target link layer address 1466 * option with the redirect message, ire_add will fail. In that 1467 * case we never add the IRE_CACHE/IRE_HOST_REDIRECT. We need 1468 * to fix this. 1469 */ 1470 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1471 1472 /* tell routing sockets that we received a redirect */ 1473 ip_rts_change_v6(RTM_REDIRECT, 1474 &rd->nd_rd_dst, 1475 &rd->nd_rd_target, 1476 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1477 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1478 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1479 1480 /* 1481 * Delete any existing IRE_HOST_REDIRECT for this destination. 1482 * This together with the added IRE has the effect of 1483 * modifying an existing redirect. 1484 */ 1485 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST_REDIRECT, 1486 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1487 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1488 1489 ire_refrele(ire); /* Held in ire_add_v6 */ 1490 1491 if (redir_ire != NULL) { 1492 ire_delete(redir_ire); 1493 ire_refrele(redir_ire); 1494 } 1495 } 1496 1497 if (prev_ire->ire_type == IRE_CACHE) 1498 ire_delete(prev_ire); 1499 ire_refrele(prev_ire); 1500 prev_ire = NULL; 1501 1502 fail_redirect: 1503 if (prev_ire != NULL) 1504 ire_refrele(prev_ire); 1505 freemsg(mp); 1506 } 1507 1508 static ill_t * 1509 ip_queue_to_ill_v6(queue_t *q) 1510 { 1511 ill_t *ill; 1512 1513 ASSERT(WR(q) == q); 1514 1515 if (q->q_next != NULL) { 1516 ill = (ill_t *)q->q_ptr; 1517 if (ILL_CAN_LOOKUP(ill)) 1518 ill_refhold(ill); 1519 else 1520 ill = NULL; 1521 } else { 1522 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1523 NULL, NULL, NULL, NULL, NULL); 1524 } 1525 if (ill == NULL) 1526 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1527 return (ill); 1528 } 1529 1530 /* 1531 * Assigns an appropriate source address to the packet. 1532 * If origdst is one of our IP addresses that use it as the source. 1533 * If the queue is an ill queue then select a source from that ill. 1534 * Otherwise pick a source based on a route lookup back to the origsrc. 1535 * 1536 * src is the return parameter. Returns a pointer to src or NULL if failure. 1537 */ 1538 static in6_addr_t * 1539 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1540 in6_addr_t *src) 1541 { 1542 ill_t *ill; 1543 ire_t *ire; 1544 ipif_t *ipif; 1545 zoneid_t zoneid; 1546 1547 ASSERT(!(wq->q_flag & QREADR)); 1548 if (wq->q_next != NULL) { 1549 ill = (ill_t *)wq->q_ptr; 1550 zoneid = GLOBAL_ZONEID; 1551 } else { 1552 ill = NULL; 1553 zoneid = Q_TO_CONN(wq)->conn_zoneid; 1554 } 1555 1556 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1557 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1558 if (ire != NULL) { 1559 /* Destined to one of our addresses */ 1560 *src = *origdst; 1561 ire_refrele(ire); 1562 return (src); 1563 } 1564 if (ire != NULL) { 1565 ire_refrele(ire); 1566 ire = NULL; 1567 } 1568 if (ill == NULL) { 1569 /* What is the route back to the original source? */ 1570 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1571 NULL, NULL, zoneid, NULL, 1572 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1573 if (ire == NULL) { 1574 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1575 return (NULL); 1576 } 1577 /* 1578 * Does not matter whether we use ire_stq or ire_ipif here. 1579 * Just pick an ill for ICMP replies. 1580 */ 1581 ASSERT(ire->ire_ipif != NULL); 1582 ill = ire->ire_ipif->ipif_ill; 1583 ire_refrele(ire); 1584 } 1585 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1586 IPV6_PREFER_SRC_DEFAULT, zoneid); 1587 if (ipif != NULL) { 1588 *src = ipif->ipif_v6src_addr; 1589 ipif_refrele(ipif); 1590 return (src); 1591 } 1592 /* 1593 * Unusual case - can't find a usable source address to reach the 1594 * original source. Use what in the route to the source. 1595 */ 1596 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1597 NULL, NULL, zoneid, NULL, 1598 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1599 if (ire == NULL) { 1600 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1601 return (NULL); 1602 } 1603 ASSERT(ire != NULL); 1604 *src = ire->ire_src_addr_v6; 1605 ire_refrele(ire); 1606 return (src); 1607 } 1608 1609 /* 1610 * Build and ship an IPv6 ICMP message using the packet data in mp, 1611 * and the ICMP header pointed to by "stuff". (May be called as 1612 * writer.) 1613 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1614 * verify that an icmp error packet can be sent. 1615 * 1616 * If q is an ill write side queue (which is the case when packets 1617 * arrive from ip_rput) then ip_wput code will ensure that packets to 1618 * link-local destinations are sent out that ill. 1619 * 1620 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1621 * source address (see above function). 1622 */ 1623 static void 1624 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1625 const in6_addr_t *v6src_ptr, boolean_t mctl_present) 1626 { 1627 ip6_t *ip6h; 1628 in6_addr_t v6dst; 1629 size_t len_needed; 1630 size_t msg_len; 1631 mblk_t *mp1; 1632 icmp6_t *icmp6; 1633 ill_t *ill; 1634 in6_addr_t v6src; 1635 mblk_t *ipsec_mp; 1636 ipsec_out_t *io; 1637 1638 ill = ip_queue_to_ill_v6(q); 1639 if (ill == NULL) { 1640 freemsg(mp); 1641 return; 1642 } 1643 1644 if (mctl_present) { 1645 /* 1646 * If it is : 1647 * 1648 * 1) a IPSEC_OUT, then this is caused by outbound 1649 * datagram originating on this host. IPSEC processing 1650 * may or may not have been done. Refer to comments above 1651 * icmp_inbound_error_fanout for details. 1652 * 1653 * 2) a IPSEC_IN if we are generating a icmp_message 1654 * for an incoming datagram destined for us i.e called 1655 * from ip_fanout_send_icmp. 1656 */ 1657 ipsec_info_t *in; 1658 1659 ipsec_mp = mp; 1660 mp = ipsec_mp->b_cont; 1661 1662 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1663 ip6h = (ip6_t *)mp->b_rptr; 1664 1665 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1666 in->ipsec_info_type == IPSEC_IN); 1667 1668 if (in->ipsec_info_type == IPSEC_IN) { 1669 /* 1670 * Convert the IPSEC_IN to IPSEC_OUT. 1671 */ 1672 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1673 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1674 ill_refrele(ill); 1675 return; 1676 } 1677 } else { 1678 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1679 io = (ipsec_out_t *)in; 1680 /* 1681 * Clear out ipsec_out_proc_begin, so we do a fresh 1682 * ire lookup. 1683 */ 1684 io->ipsec_out_proc_begin = B_FALSE; 1685 } 1686 } else { 1687 /* 1688 * This is in clear. The icmp message we are building 1689 * here should go out in clear. 1690 */ 1691 ipsec_in_t *ii; 1692 ASSERT(mp->b_datap->db_type == M_DATA); 1693 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1694 freemsg(mp); 1695 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1696 ill_refrele(ill); 1697 return; 1698 } 1699 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1700 1701 /* This is not a secure packet */ 1702 ii->ipsec_in_secure = B_FALSE; 1703 ipsec_mp->b_cont = mp; 1704 ip6h = (ip6_t *)mp->b_rptr; 1705 /* 1706 * Convert the IPSEC_IN to IPSEC_OUT. 1707 */ 1708 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1709 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1710 ill_refrele(ill); 1711 return; 1712 } 1713 } 1714 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1715 1716 if (v6src_ptr != NULL) { 1717 v6src = *v6src_ptr; 1718 } else { 1719 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1720 &v6src) == NULL) { 1721 freemsg(ipsec_mp); 1722 ill_refrele(ill); 1723 return; 1724 } 1725 } 1726 v6dst = ip6h->ip6_src; 1727 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1728 msg_len = msgdsize(mp); 1729 if (msg_len > len_needed) { 1730 if (!adjmsg(mp, len_needed - msg_len)) { 1731 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1732 freemsg(ipsec_mp); 1733 ill_refrele(ill); 1734 return; 1735 } 1736 msg_len = len_needed; 1737 } 1738 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1739 if (mp1 == NULL) { 1740 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1741 freemsg(ipsec_mp); 1742 ill_refrele(ill); 1743 return; 1744 } 1745 ill_refrele(ill); 1746 mp1->b_cont = mp; 1747 mp = mp1; 1748 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1749 io->ipsec_out_type == IPSEC_OUT); 1750 ipsec_mp->b_cont = mp; 1751 1752 /* 1753 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1754 * node generates be accepted in peace by all on-host destinations. 1755 * If we do NOT assume that all on-host destinations trust 1756 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1757 * (Look for ipsec_out_icmp_loopback). 1758 */ 1759 io->ipsec_out_icmp_loopback = B_TRUE; 1760 1761 ip6h = (ip6_t *)mp->b_rptr; 1762 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1763 1764 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1765 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1766 ip6h->ip6_hops = ipv6_def_hops; 1767 ip6h->ip6_dst = v6dst; 1768 ip6h->ip6_src = v6src; 1769 msg_len += IPV6_HDR_LEN + len; 1770 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1771 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1772 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1773 } 1774 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1775 icmp6 = (icmp6_t *)&ip6h[1]; 1776 bcopy(stuff, (char *)icmp6, len); 1777 /* 1778 * Prepare for checksum by putting icmp length in the icmp 1779 * checksum field. The checksum is calculated in ip_wput_v6. 1780 */ 1781 icmp6->icmp6_cksum = ip6h->ip6_plen; 1782 if (icmp6->icmp6_type == ND_REDIRECT) { 1783 ip6h->ip6_hops = IPV6_MAX_HOPS; 1784 } 1785 /* Send to V6 writeside put routine */ 1786 put(q, ipsec_mp); 1787 } 1788 1789 /* 1790 * Update the output mib when ICMPv6 packets are sent. 1791 */ 1792 static void 1793 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1794 { 1795 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1796 1797 switch (icmp6->icmp6_type) { 1798 case ICMP6_DST_UNREACH: 1799 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1800 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1801 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1802 break; 1803 1804 case ICMP6_TIME_EXCEEDED: 1805 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1806 break; 1807 1808 case ICMP6_PARAM_PROB: 1809 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1810 break; 1811 1812 case ICMP6_PACKET_TOO_BIG: 1813 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1814 break; 1815 1816 case ICMP6_ECHO_REQUEST: 1817 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1818 break; 1819 1820 case ICMP6_ECHO_REPLY: 1821 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1822 break; 1823 1824 case ND_ROUTER_SOLICIT: 1825 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1826 break; 1827 1828 case ND_ROUTER_ADVERT: 1829 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1830 break; 1831 1832 case ND_NEIGHBOR_SOLICIT: 1833 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1834 break; 1835 1836 case ND_NEIGHBOR_ADVERT: 1837 BUMP_MIB(ill->ill_icmp6_mib, 1838 ipv6IfIcmpOutNeighborAdvertisements); 1839 break; 1840 1841 case ND_REDIRECT: 1842 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1843 break; 1844 1845 case MLD_LISTENER_QUERY: 1846 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1847 break; 1848 1849 case MLD_LISTENER_REPORT: 1850 case MLD_V2_LISTENER_REPORT: 1851 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1852 break; 1853 1854 case MLD_LISTENER_REDUCTION: 1855 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1856 break; 1857 } 1858 } 1859 1860 /* 1861 * Check if it is ok to send an ICMPv6 error packet in 1862 * response to the IP packet in mp. 1863 * Free the message and return null if no 1864 * ICMP error packet should be sent. 1865 */ 1866 static mblk_t * 1867 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1868 boolean_t llbcast, boolean_t mcast_ok) 1869 { 1870 ip6_t *ip6h; 1871 1872 if (!mp) 1873 return (NULL); 1874 1875 ip6h = (ip6_t *)mp->b_rptr; 1876 1877 /* Check if source address uniquely identifies the host */ 1878 1879 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1880 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1881 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1882 freemsg(mp); 1883 return (NULL); 1884 } 1885 1886 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1887 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1888 icmp6_t *icmp6; 1889 1890 if (mp->b_wptr - mp->b_rptr < len_needed) { 1891 if (!pullupmsg(mp, len_needed)) { 1892 ill_t *ill; 1893 1894 ill = ip_queue_to_ill_v6(q); 1895 if (ill == NULL) { 1896 BUMP_MIB(&icmp6_mib, 1897 ipv6IfIcmpInErrors); 1898 } else { 1899 BUMP_MIB(ill->ill_icmp6_mib, 1900 ipv6IfIcmpInErrors); 1901 ill_refrele(ill); 1902 } 1903 freemsg(mp); 1904 return (NULL); 1905 } 1906 ip6h = (ip6_t *)mp->b_rptr; 1907 } 1908 icmp6 = (icmp6_t *)&ip6h[1]; 1909 /* Explicitly do not generate errors in response to redirects */ 1910 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1911 icmp6->icmp6_type == ND_REDIRECT) { 1912 freemsg(mp); 1913 return (NULL); 1914 } 1915 } 1916 /* 1917 * Check that the destination is not multicast and that the packet 1918 * was not sent on link layer broadcast or multicast. (Exception 1919 * is Packet too big message as per the draft - when mcast_ok is set.) 1920 */ 1921 if (!mcast_ok && 1922 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1923 freemsg(mp); 1924 return (NULL); 1925 } 1926 if (icmp_err_rate_limit()) { 1927 /* 1928 * Only send ICMP error packets every so often. 1929 * This should be done on a per port/source basis, 1930 * but for now this will suffice. 1931 */ 1932 freemsg(mp); 1933 return (NULL); 1934 } 1935 return (mp); 1936 } 1937 1938 /* 1939 * Generate an ICMPv6 redirect message. 1940 * Include target link layer address option if it exits. 1941 * Always include redirect header. 1942 */ 1943 static void 1944 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1945 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1946 { 1947 nd_redirect_t *rd; 1948 nd_opt_rd_hdr_t *rdh; 1949 uchar_t *buf; 1950 nce_t *nce = NULL; 1951 nd_opt_hdr_t *opt; 1952 int len; 1953 int ll_opt_len = 0; 1954 int max_redir_hdr_data_len; 1955 int pkt_len; 1956 in6_addr_t *srcp; 1957 1958 /* 1959 * We are called from ip_rput where we could 1960 * not have attached an IPSEC_IN. 1961 */ 1962 ASSERT(mp->b_datap->db_type == M_DATA); 1963 1964 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1965 if (mp == NULL) 1966 return; 1967 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1968 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1969 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1970 ill->ill_phys_addr_length + 7)/8 * 8; 1971 } 1972 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1973 ASSERT(len % 4 == 0); 1974 buf = kmem_alloc(len, KM_NOSLEEP); 1975 if (buf == NULL) { 1976 if (nce != NULL) 1977 NCE_REFRELE(nce); 1978 freemsg(mp); 1979 return; 1980 } 1981 1982 rd = (nd_redirect_t *)buf; 1983 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1984 rd->nd_rd_code = 0; 1985 rd->nd_rd_reserved = 0; 1986 rd->nd_rd_target = *targetp; 1987 rd->nd_rd_dst = *dest; 1988 1989 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1990 if (nce != NULL && ll_opt_len != 0) { 1991 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1992 opt->nd_opt_len = ll_opt_len/8; 1993 bcopy((char *)nce->nce_res_mp->b_rptr + 1994 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1995 ill->ill_phys_addr_length); 1996 } 1997 if (nce != NULL) 1998 NCE_REFRELE(nce); 1999 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 2000 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 2001 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 2002 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 2003 pkt_len = msgdsize(mp); 2004 /* Make sure mp is 8 byte aligned */ 2005 if (pkt_len > max_redir_hdr_data_len) { 2006 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 2007 sizeof (nd_opt_rd_hdr_t))/8; 2008 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 2009 } else { 2010 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 2011 (void) adjmsg(mp, -(pkt_len % 8)); 2012 } 2013 rdh->nd_opt_rh_reserved1 = 0; 2014 rdh->nd_opt_rh_reserved2 = 0; 2015 /* ipif_v6src_addr contains the link-local source address */ 2016 rw_enter(&ill_g_lock, RW_READER); 2017 if (ill->ill_group != NULL) { 2018 /* 2019 * The receiver of the redirect will verify whether it 2020 * had a route through us (srcp that we will use in 2021 * the redirect) or not. As we load spread even link-locals, 2022 * we don't know which source address the receiver of 2023 * redirect has in its route for communicating with us. 2024 * Thus we randomly choose a source here and finally we 2025 * should get to the right one and it will eventually 2026 * accept the redirect from us. We can't call 2027 * ip_lookup_scope_v6 because we don't have the right 2028 * link-local address here. Thus we randomly choose one. 2029 */ 2030 int cnt = ill->ill_group->illgrp_ill_count; 2031 2032 ill = ill->ill_group->illgrp_ill; 2033 cnt = ++icmp_redirect_v6_src_index % cnt; 2034 while (cnt--) 2035 ill = ill->ill_group_next; 2036 srcp = &ill->ill_ipif->ipif_v6src_addr; 2037 } else { 2038 srcp = &ill->ill_ipif->ipif_v6src_addr; 2039 } 2040 rw_exit(&ill_g_lock); 2041 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE); 2042 kmem_free(buf, len); 2043 } 2044 2045 2046 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 2047 void 2048 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 2049 boolean_t llbcast, boolean_t mcast_ok) 2050 { 2051 icmp6_t icmp6; 2052 boolean_t mctl_present; 2053 mblk_t *first_mp; 2054 2055 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2056 2057 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2058 if (mp == NULL) { 2059 if (mctl_present) 2060 freeb(first_mp); 2061 return; 2062 } 2063 bzero(&icmp6, sizeof (icmp6_t)); 2064 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2065 icmp6.icmp6_code = code; 2066 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2067 } 2068 2069 /* 2070 * Generate an ICMP unreachable message. 2071 */ 2072 void 2073 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2074 boolean_t llbcast, boolean_t mcast_ok) 2075 { 2076 icmp6_t icmp6; 2077 boolean_t mctl_present; 2078 mblk_t *first_mp; 2079 2080 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2081 2082 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2083 if (mp == NULL) { 2084 if (mctl_present) 2085 freeb(first_mp); 2086 return; 2087 } 2088 bzero(&icmp6, sizeof (icmp6_t)); 2089 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2090 icmp6.icmp6_code = code; 2091 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2092 } 2093 2094 /* 2095 * Generate an ICMP pkt too big message. 2096 */ 2097 static void 2098 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2099 boolean_t llbcast, boolean_t mcast_ok) 2100 { 2101 icmp6_t icmp6; 2102 mblk_t *first_mp; 2103 boolean_t mctl_present; 2104 2105 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2106 2107 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2108 if (mp == NULL) { 2109 if (mctl_present) 2110 freeb(first_mp); 2111 return; 2112 } 2113 bzero(&icmp6, sizeof (icmp6_t)); 2114 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2115 icmp6.icmp6_code = 0; 2116 icmp6.icmp6_mtu = htonl(mtu); 2117 2118 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2119 } 2120 2121 /* 2122 * Generate an ICMP parameter problem message. (May be called as writer.) 2123 * 'offset' is the offset from the beginning of the packet in error. 2124 */ 2125 static void 2126 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2127 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok) 2128 { 2129 icmp6_t icmp6; 2130 boolean_t mctl_present; 2131 mblk_t *first_mp; 2132 2133 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2134 2135 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2136 if (mp == NULL) { 2137 if (mctl_present) 2138 freeb(first_mp); 2139 return; 2140 } 2141 bzero((char *)&icmp6, sizeof (icmp6_t)); 2142 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2143 icmp6.icmp6_code = code; 2144 icmp6.icmp6_pptr = htonl(offset); 2145 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2146 } 2147 2148 /* 2149 * This code will need to take into account the possibility of binding 2150 * to a link local address on a multi-homed host, in which case the 2151 * outgoing interface (from the conn) will need to be used when getting 2152 * an ire for the dst. Going through proper outgoing interface and 2153 * choosing the source address corresponding to the outgoing interface 2154 * is necessary when the destination address is a link-local address and 2155 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2156 * This can happen when active connection is setup; thus ipp pointer 2157 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2158 * pointer is passed as ipp pointer. 2159 */ 2160 mblk_t * 2161 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2162 { 2163 ssize_t len; 2164 int protocol; 2165 struct T_bind_req *tbr; 2166 sin6_t *sin6; 2167 ipa6_conn_t *ac6; 2168 in6_addr_t *v6srcp; 2169 in6_addr_t *v6dstp; 2170 uint16_t lport; 2171 uint16_t fport; 2172 uchar_t *ucp; 2173 mblk_t *mp1; 2174 boolean_t ire_requested; 2175 boolean_t ipsec_policy_set; 2176 int error = 0; 2177 boolean_t local_bind; 2178 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2179 ipa6_conn_x_t *acx6; 2180 boolean_t verify_dst; 2181 2182 ASSERT(connp->conn_af_isv6); 2183 len = mp->b_wptr - mp->b_rptr; 2184 if (len < (sizeof (*tbr) + 1)) { 2185 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2186 "ip_bind_v6: bogus msg, len %ld", len); 2187 goto bad_addr; 2188 } 2189 /* Back up and extract the protocol identifier. */ 2190 mp->b_wptr--; 2191 tbr = (struct T_bind_req *)mp->b_rptr; 2192 /* Reset the message type in preparation for shipping it back. */ 2193 mp->b_datap->db_type = M_PCPROTO; 2194 2195 protocol = *mp->b_wptr & 0xFF; 2196 connp->conn_ulp = (uint8_t)protocol; 2197 2198 /* 2199 * Check for a zero length address. This is from a protocol that 2200 * wants to register to receive all packets of its type. 2201 */ 2202 if (tbr->ADDR_length == 0) { 2203 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2204 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2205 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2206 /* 2207 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2208 * Do not allow others to bind to these. 2209 */ 2210 goto bad_addr; 2211 } 2212 2213 /* 2214 * 2215 * The udp module never sends down a zero-length address, 2216 * and allowing this on a labeled system will break MLP 2217 * functionality. 2218 */ 2219 if (is_system_labeled() && protocol == IPPROTO_UDP) 2220 goto bad_addr; 2221 2222 /* Allow ipsec plumbing */ 2223 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2224 protocol != IPPROTO_ESP) 2225 goto bad_addr; 2226 2227 connp->conn_srcv6 = ipv6_all_zeros; 2228 ipcl_proto_insert_v6(connp, protocol); 2229 2230 tbr->PRIM_type = T_BIND_ACK; 2231 return (mp); 2232 } 2233 2234 /* Extract the address pointer from the message. */ 2235 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2236 tbr->ADDR_length); 2237 if (ucp == NULL) { 2238 ip1dbg(("ip_bind_v6: no address\n")); 2239 goto bad_addr; 2240 } 2241 if (!OK_32PTR(ucp)) { 2242 ip1dbg(("ip_bind_v6: unaligned address\n")); 2243 goto bad_addr; 2244 } 2245 mp1 = mp->b_cont; /* trailing mp if any */ 2246 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2247 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2248 2249 switch (tbr->ADDR_length) { 2250 default: 2251 ip1dbg(("ip_bind_v6: bad address length %d\n", 2252 (int)tbr->ADDR_length)); 2253 goto bad_addr; 2254 2255 case IPV6_ADDR_LEN: 2256 /* Verification of local address only */ 2257 v6srcp = (in6_addr_t *)ucp; 2258 lport = 0; 2259 local_bind = B_TRUE; 2260 break; 2261 2262 case sizeof (sin6_t): 2263 sin6 = (sin6_t *)ucp; 2264 v6srcp = &sin6->sin6_addr; 2265 lport = sin6->sin6_port; 2266 local_bind = B_TRUE; 2267 break; 2268 2269 case sizeof (ipa6_conn_t): 2270 /* 2271 * Verify that both the source and destination addresses 2272 * are valid. 2273 * Note that we allow connect to broadcast and multicast 2274 * addresses when ire_requested is set. Thus the ULP 2275 * has to check for IRE_BROADCAST and multicast. 2276 */ 2277 ac6 = (ipa6_conn_t *)ucp; 2278 v6srcp = &ac6->ac6_laddr; 2279 v6dstp = &ac6->ac6_faddr; 2280 fport = ac6->ac6_fport; 2281 /* For raw socket, the local port is not set. */ 2282 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2283 connp->conn_lport; 2284 local_bind = B_FALSE; 2285 /* Always verify destination reachability. */ 2286 verify_dst = B_TRUE; 2287 break; 2288 2289 case sizeof (ipa6_conn_x_t): 2290 /* 2291 * Verify that the source address is valid. 2292 * Note that we allow connect to broadcast and multicast 2293 * addresses when ire_requested is set. Thus the ULP 2294 * has to check for IRE_BROADCAST and multicast. 2295 */ 2296 acx6 = (ipa6_conn_x_t *)ucp; 2297 ac6 = &acx6->ac6x_conn; 2298 v6srcp = &ac6->ac6_laddr; 2299 v6dstp = &ac6->ac6_faddr; 2300 fport = ac6->ac6_fport; 2301 lport = ac6->ac6_lport; 2302 local_bind = B_FALSE; 2303 /* 2304 * Client that passed ipa6_conn_x_t to us specifies whether to 2305 * verify destination reachability. 2306 */ 2307 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2308 break; 2309 } 2310 if (local_bind) { 2311 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2312 /* Bind to IPv4 address */ 2313 ipaddr_t v4src; 2314 2315 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2316 2317 error = ip_bind_laddr(connp, mp, v4src, lport, 2318 ire_requested, ipsec_policy_set, 2319 tbr->ADDR_length != IPV6_ADDR_LEN); 2320 if (error != 0) 2321 goto bad_addr; 2322 connp->conn_pkt_isv6 = B_FALSE; 2323 } else { 2324 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2325 error = 0; 2326 goto bad_addr; 2327 } 2328 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2329 ire_requested, ipsec_policy_set, 2330 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2331 if (error != 0) 2332 goto bad_addr; 2333 connp->conn_pkt_isv6 = B_TRUE; 2334 } 2335 if (protocol == IPPROTO_TCP) 2336 connp->conn_recv = tcp_conn_request; 2337 } else { 2338 /* 2339 * Bind to local and remote address. Local might be 2340 * unspecified in which case it will be extracted from 2341 * ire_src_addr_v6 2342 */ 2343 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2344 /* Connect to IPv4 address */ 2345 ipaddr_t v4src; 2346 ipaddr_t v4dst; 2347 2348 /* Is the source unspecified or mapped? */ 2349 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2350 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2351 ip1dbg(("ip_bind_v6: " 2352 "dst is mapped, but not the src\n")); 2353 goto bad_addr; 2354 } 2355 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2356 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2357 2358 /* 2359 * XXX Fix needed. Need to pass ipsec_policy_set 2360 * instead of B_FALSE. 2361 */ 2362 2363 /* Always verify destination reachability. */ 2364 error = ip_bind_connected(connp, mp, &v4src, lport, 2365 v4dst, fport, ire_requested, ipsec_policy_set, 2366 B_TRUE, B_TRUE); 2367 if (error != 0) 2368 goto bad_addr; 2369 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2370 connp->conn_pkt_isv6 = B_FALSE; 2371 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2372 ip1dbg(("ip_bind_v6: " 2373 "src is mapped, but not the dst\n")); 2374 goto bad_addr; 2375 } else { 2376 error = ip_bind_connected_v6(connp, mp, v6srcp, 2377 lport, v6dstp, ipp, fport, ire_requested, 2378 ipsec_policy_set, B_TRUE, verify_dst); 2379 if (error != 0) 2380 goto bad_addr; 2381 connp->conn_pkt_isv6 = B_TRUE; 2382 } 2383 if (protocol == IPPROTO_TCP) 2384 connp->conn_recv = tcp_input; 2385 } 2386 /* Update qinfo if v4/v6 changed */ 2387 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2388 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2389 if (connp->conn_pkt_isv6) 2390 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2391 else 2392 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2393 } 2394 2395 /* 2396 * Pass the IPSEC headers size in ire_ipsec_overhead. 2397 * We can't do this in ip_bind_insert_ire because the policy 2398 * may not have been inherited at that point in time and hence 2399 * conn_out_enforce_policy may not be set. 2400 */ 2401 mp1 = mp->b_cont; 2402 if (ire_requested && connp->conn_out_enforce_policy && 2403 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2404 ire_t *ire = (ire_t *)mp1->b_rptr; 2405 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2406 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2407 } 2408 2409 /* Send it home. */ 2410 mp->b_datap->db_type = M_PCPROTO; 2411 tbr->PRIM_type = T_BIND_ACK; 2412 return (mp); 2413 2414 bad_addr: 2415 if (error == EINPROGRESS) 2416 return (NULL); 2417 if (error > 0) 2418 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2419 else 2420 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2421 return (mp); 2422 } 2423 2424 /* 2425 * Here address is verified to be a valid local address. 2426 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2427 * address is also considered a valid local address. 2428 * In the case of a multicast address, however, the 2429 * upper protocol is expected to reset the src address 2430 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2431 * no packets are emitted with multicast address as 2432 * source address. 2433 * The addresses valid for bind are: 2434 * (1) - in6addr_any 2435 * (2) - IP address of an UP interface 2436 * (3) - IP address of a DOWN interface 2437 * (4) - a multicast address. In this case 2438 * the conn will only receive packets destined to 2439 * the specified multicast address. Note: the 2440 * application still has to issue an 2441 * IPV6_JOIN_GROUP socket option. 2442 * 2443 * In all the above cases, the bound address must be valid in the current zone. 2444 * When the address is loopback or multicast, there might be many matching IREs 2445 * so bind has to look up based on the zone. 2446 */ 2447 static int 2448 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2449 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2450 boolean_t fanout_insert) 2451 { 2452 int error = 0; 2453 ire_t *src_ire = NULL; 2454 ipif_t *ipif = NULL; 2455 mblk_t *policy_mp; 2456 zoneid_t zoneid; 2457 2458 if (ipsec_policy_set) 2459 policy_mp = mp->b_cont; 2460 2461 /* 2462 * If it was previously connected, conn_fully_bound would have 2463 * been set. 2464 */ 2465 connp->conn_fully_bound = B_FALSE; 2466 2467 zoneid = connp->conn_zoneid; 2468 2469 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2470 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2471 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2472 /* 2473 * If an address other than in6addr_any is requested, 2474 * we verify that it is a valid address for bind 2475 * Note: Following code is in if-else-if form for 2476 * readability compared to a condition check. 2477 */ 2478 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2479 if (IRE_IS_LOCAL(src_ire)) { 2480 /* 2481 * (2) Bind to address of local UP interface 2482 */ 2483 ipif = src_ire->ire_ipif; 2484 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2485 ipif_t *multi_ipif = NULL; 2486 ire_t *save_ire; 2487 /* 2488 * (4) bind to multicast address. 2489 * Fake out the IRE returned to upper 2490 * layer to be a broadcast IRE in 2491 * ip_bind_insert_ire_v6(). 2492 * Pass other information that matches 2493 * the ipif (e.g. the source address). 2494 * conn_multicast_ill is only used for 2495 * IPv6 packets 2496 */ 2497 mutex_enter(&connp->conn_lock); 2498 if (connp->conn_multicast_ill != NULL) { 2499 (void) ipif_lookup_zoneid( 2500 connp->conn_multicast_ill, zoneid, 0, 2501 &multi_ipif); 2502 } else { 2503 /* 2504 * Look for default like 2505 * ip_wput_v6 2506 */ 2507 multi_ipif = ipif_lookup_group_v6( 2508 &ipv6_unspecified_group, zoneid); 2509 } 2510 mutex_exit(&connp->conn_lock); 2511 save_ire = src_ire; 2512 src_ire = NULL; 2513 if (multi_ipif == NULL || !ire_requested || 2514 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2515 src_ire = save_ire; 2516 error = EADDRNOTAVAIL; 2517 } else { 2518 ASSERT(src_ire != NULL); 2519 if (save_ire != NULL) 2520 ire_refrele(save_ire); 2521 } 2522 if (multi_ipif != NULL) 2523 ipif_refrele(multi_ipif); 2524 } else { 2525 *mp->b_wptr++ = (char)connp->conn_ulp; 2526 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2527 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2528 if (ipif == NULL) { 2529 if (error == EINPROGRESS) { 2530 if (src_ire != NULL) 2531 ire_refrele(src_ire); 2532 return (error); 2533 } 2534 /* 2535 * Not a valid address for bind 2536 */ 2537 error = EADDRNOTAVAIL; 2538 } else { 2539 ipif_refrele(ipif); 2540 } 2541 /* 2542 * Just to keep it consistent with the processing in 2543 * ip_bind_v6(). 2544 */ 2545 mp->b_wptr--; 2546 } 2547 2548 if (error != 0) { 2549 /* Red Alert! Attempting to be a bogon! */ 2550 if (ip_debug > 2) { 2551 /* ip1dbg */ 2552 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2553 " address %s\n", AF_INET6, v6src); 2554 } 2555 goto bad_addr; 2556 } 2557 } 2558 2559 /* 2560 * Allow setting new policies. For example, disconnects come 2561 * down as ipa_t bind. As we would have set conn_policy_cached 2562 * to B_TRUE before, we should set it to B_FALSE, so that policy 2563 * can change after the disconnect. 2564 */ 2565 connp->conn_policy_cached = B_FALSE; 2566 2567 /* If not fanout_insert this was just an address verification */ 2568 if (fanout_insert) { 2569 /* 2570 * The addresses have been verified. Time to insert in 2571 * the correct fanout list. 2572 */ 2573 connp->conn_srcv6 = *v6src; 2574 connp->conn_remv6 = ipv6_all_zeros; 2575 connp->conn_lport = lport; 2576 connp->conn_fport = 0; 2577 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2578 } 2579 if (error == 0) { 2580 if (ire_requested) { 2581 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2582 error = -1; 2583 goto bad_addr; 2584 } 2585 } else if (ipsec_policy_set) { 2586 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2587 error = -1; 2588 goto bad_addr; 2589 } 2590 } 2591 } 2592 bad_addr: 2593 if (error != 0) { 2594 if (connp->conn_anon_port) { 2595 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2596 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2597 B_FALSE); 2598 } 2599 connp->conn_mlp_type = mlptSingle; 2600 } 2601 2602 if (src_ire != NULL) 2603 ire_refrele(src_ire); 2604 2605 if (ipsec_policy_set) { 2606 ASSERT(policy_mp != NULL); 2607 freeb(policy_mp); 2608 /* 2609 * As of now assume that nothing else accompanies 2610 * IPSEC_POLICY_SET. 2611 */ 2612 mp->b_cont = NULL; 2613 } 2614 return (error); 2615 } 2616 2617 /* ARGSUSED */ 2618 static void 2619 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2620 void *dummy_arg) 2621 { 2622 conn_t *connp = NULL; 2623 t_scalar_t prim; 2624 2625 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2626 2627 if (CONN_Q(q)) 2628 connp = Q_TO_CONN(q); 2629 ASSERT(connp != NULL); 2630 2631 prim = ((union T_primitives *)mp->b_rptr)->type; 2632 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2633 2634 if (IPCL_IS_TCP(connp)) { 2635 /* Pass sticky_ipp for scope_id and pktinfo */ 2636 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2637 } else { 2638 /* For UDP and ICMP */ 2639 mp = ip_bind_v6(q, mp, connp, NULL); 2640 } 2641 if (mp != NULL) { 2642 if (IPCL_IS_TCP(connp)) { 2643 CONN_INC_REF(connp); 2644 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2645 connp, SQTAG_TCP_RPUTOTHER); 2646 } else if (IPCL_IS_UDP(connp)) { 2647 udp_resume_bind(connp, mp); 2648 } else { 2649 qreply(q, mp); 2650 CONN_OPER_PENDING_DONE(connp); 2651 } 2652 } 2653 } 2654 2655 /* 2656 * Verify that both the source and destination addresses 2657 * are valid. If verify_dst, then destination address must also be reachable, 2658 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2659 * It takes ip6_pkt_t * as one of the arguments to determine correct 2660 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2661 * destination address. Note that parameter ipp is only useful for TCP connect 2662 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2663 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2664 * 2665 */ 2666 static int 2667 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2668 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2669 boolean_t ire_requested, boolean_t ipsec_policy_set, 2670 boolean_t fanout_insert, boolean_t verify_dst) 2671 { 2672 ire_t *src_ire; 2673 ire_t *dst_ire; 2674 int error = 0; 2675 int protocol; 2676 mblk_t *policy_mp; 2677 ire_t *sire = NULL; 2678 ire_t *md_dst_ire = NULL; 2679 ill_t *md_ill = NULL; 2680 ill_t *dst_ill = NULL; 2681 ipif_t *src_ipif = NULL; 2682 zoneid_t zoneid; 2683 boolean_t ill_held = B_FALSE; 2684 2685 src_ire = dst_ire = NULL; 2686 /* 2687 * NOTE: The protocol is beyond the wptr because that's how 2688 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2689 */ 2690 protocol = *mp->b_wptr & 0xFF; 2691 2692 /* 2693 * If we never got a disconnect before, clear it now. 2694 */ 2695 connp->conn_fully_bound = B_FALSE; 2696 2697 if (ipsec_policy_set) { 2698 policy_mp = mp->b_cont; 2699 } 2700 2701 zoneid = connp->conn_zoneid; 2702 2703 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2704 ipif_t *ipif; 2705 2706 /* 2707 * Use an "emulated" IRE_BROADCAST to tell the transport it 2708 * is a multicast. 2709 * Pass other information that matches 2710 * the ipif (e.g. the source address). 2711 * 2712 * conn_multicast_ill is only used for IPv6 packets 2713 */ 2714 mutex_enter(&connp->conn_lock); 2715 if (connp->conn_multicast_ill != NULL) { 2716 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2717 zoneid, 0, &ipif); 2718 } else { 2719 /* Look for default like ip_wput_v6 */ 2720 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2721 } 2722 mutex_exit(&connp->conn_lock); 2723 if (ipif == NULL || !ire_requested || 2724 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2725 if (ipif != NULL) 2726 ipif_refrele(ipif); 2727 if (ip_debug > 2) { 2728 /* ip1dbg */ 2729 pr_addr_dbg("ip_bind_connected_v6: bad " 2730 "connected multicast %s\n", AF_INET6, 2731 v6dst); 2732 } 2733 error = ENETUNREACH; 2734 goto bad_addr; 2735 } 2736 if (ipif != NULL) 2737 ipif_refrele(ipif); 2738 } else { 2739 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2740 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2741 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2742 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR); 2743 /* 2744 * We also prevent ire's with src address INADDR_ANY to 2745 * be used, which are created temporarily for 2746 * sending out packets from endpoints that have 2747 * conn_unspec_src set. 2748 */ 2749 if (dst_ire == NULL || 2750 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2751 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2752 /* 2753 * When verifying destination reachability, we always 2754 * complain. 2755 * 2756 * When not verifying destination reachability but we 2757 * found an IRE, i.e. the destination is reachable, 2758 * then the other tests still apply and we complain. 2759 */ 2760 if (verify_dst || (dst_ire != NULL)) { 2761 if (ip_debug > 2) { 2762 /* ip1dbg */ 2763 pr_addr_dbg("ip_bind_connected_v6: bad" 2764 " connected dst %s\n", AF_INET6, 2765 v6dst); 2766 } 2767 if (dst_ire == NULL || 2768 !(dst_ire->ire_type & IRE_HOST)) { 2769 error = ENETUNREACH; 2770 } else { 2771 error = EHOSTUNREACH; 2772 } 2773 goto bad_addr; 2774 } 2775 } 2776 } 2777 2778 /* 2779 * We now know that routing will allow us to reach the destination. 2780 * Check whether Trusted Solaris policy allows communication with this 2781 * host, and pretend that the destination is unreachable if not. 2782 * 2783 * This is never a problem for TCP, since that transport is known to 2784 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2785 * handling. If the remote is unreachable, it will be detected at that 2786 * point, so there's no reason to check it here. 2787 * 2788 * Note that for sendto (and other datagram-oriented friends), this 2789 * check is done as part of the data path label computation instead. 2790 * The check here is just to make non-TCP connect() report the right 2791 * error. 2792 */ 2793 if (dst_ire != NULL && is_system_labeled() && 2794 !IPCL_IS_TCP(connp) && 2795 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2796 connp->conn_mac_exempt) != 0) { 2797 error = EHOSTUNREACH; 2798 if (ip_debug > 2) { 2799 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2800 AF_INET6, v6dst); 2801 } 2802 goto bad_addr; 2803 } 2804 2805 /* 2806 * If the app does a connect(), it means that it will most likely 2807 * send more than 1 packet to the destination. It makes sense 2808 * to clear the temporary flag. 2809 */ 2810 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2811 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2812 irb_t *irb = dst_ire->ire_bucket; 2813 2814 rw_enter(&irb->irb_lock, RW_WRITER); 2815 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2816 irb->irb_tmp_ire_cnt--; 2817 rw_exit(&irb->irb_lock); 2818 } 2819 2820 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2821 2822 /* 2823 * See if we should notify ULP about MDT; we do this whether or not 2824 * ire_requested is TRUE, in order to handle active connects; MDT 2825 * eligibility tests for passive connects are handled separately 2826 * through tcp_adapt_ire(). We do this before the source address 2827 * selection, because dst_ire may change after a call to 2828 * ipif_select_source_v6(). This is a best-effort check, as the 2829 * packet for this connection may not actually go through 2830 * dst_ire->ire_stq, and the exact IRE can only be known after 2831 * calling ip_newroute_v6(). This is why we further check on the 2832 * IRE during Multidata packet transmission in tcp_multisend(). 2833 */ 2834 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2835 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2836 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2837 ILL_MDT_CAPABLE(md_ill)) { 2838 md_dst_ire = dst_ire; 2839 IRE_REFHOLD(md_dst_ire); 2840 } 2841 2842 if (dst_ire != NULL && 2843 dst_ire->ire_type == IRE_LOCAL && 2844 dst_ire->ire_zoneid != zoneid && 2845 dst_ire->ire_zoneid != ALL_ZONES) { 2846 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2847 zoneid, 0, NULL, 2848 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2849 MATCH_IRE_RJ_BHOLE); 2850 if (src_ire == NULL) { 2851 error = EHOSTUNREACH; 2852 goto bad_addr; 2853 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2854 if (!(src_ire->ire_type & IRE_HOST)) 2855 error = ENETUNREACH; 2856 else 2857 error = EHOSTUNREACH; 2858 goto bad_addr; 2859 } 2860 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2861 src_ipif = src_ire->ire_ipif; 2862 ipif_refhold(src_ipif); 2863 *v6src = src_ipif->ipif_v6lcl_addr; 2864 } 2865 ire_refrele(src_ire); 2866 src_ire = NULL; 2867 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2868 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2869 *v6src = sire->ire_src_addr_v6; 2870 ire_refrele(dst_ire); 2871 dst_ire = sire; 2872 sire = NULL; 2873 } else if (dst_ire->ire_type == IRE_CACHE && 2874 (dst_ire->ire_flags & RTF_SETSRC)) { 2875 ASSERT(dst_ire->ire_zoneid == zoneid || 2876 dst_ire->ire_zoneid == ALL_ZONES); 2877 *v6src = dst_ire->ire_src_addr_v6; 2878 } else { 2879 /* 2880 * Pick a source address so that a proper inbound load 2881 * spreading would happen. Use dst_ill specified by the 2882 * app. when socket option or scopeid is set. 2883 */ 2884 int err; 2885 2886 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2887 uint_t if_index; 2888 2889 /* 2890 * Scope id or IPV6_PKTINFO 2891 */ 2892 2893 if_index = ipp->ipp_ifindex; 2894 dst_ill = ill_lookup_on_ifindex( 2895 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2896 if (dst_ill == NULL) { 2897 ip1dbg(("ip_bind_connected_v6:" 2898 " bad ifindex %d\n", if_index)); 2899 error = EADDRNOTAVAIL; 2900 goto bad_addr; 2901 } 2902 ill_held = B_TRUE; 2903 } else if (connp->conn_outgoing_ill != NULL) { 2904 /* 2905 * For IPV6_BOUND_IF socket option, 2906 * conn_outgoing_ill should be set 2907 * already in TCP or UDP/ICMP. 2908 */ 2909 dst_ill = conn_get_held_ill(connp, 2910 &connp->conn_outgoing_ill, &err); 2911 if (err == ILL_LOOKUP_FAILED) { 2912 ip1dbg(("ip_bind_connected_v6:" 2913 "no ill for bound_if\n")); 2914 error = EADDRNOTAVAIL; 2915 goto bad_addr; 2916 } 2917 ill_held = B_TRUE; 2918 } else if (dst_ire->ire_stq != NULL) { 2919 /* No need to hold ill here */ 2920 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2921 } else { 2922 /* No need to hold ill here */ 2923 dst_ill = dst_ire->ire_ipif->ipif_ill; 2924 } 2925 if (!ip6_asp_can_lookup()) { 2926 *mp->b_wptr++ = (char)protocol; 2927 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2928 ip_bind_connected_resume_v6); 2929 error = EINPROGRESS; 2930 goto refrele_and_quit; 2931 } 2932 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2933 RESTRICT_TO_NONE, connp->conn_src_preferences, 2934 zoneid); 2935 ip6_asp_table_refrele(); 2936 if (src_ipif == NULL) { 2937 pr_addr_dbg("ip_bind_connected_v6: " 2938 "no usable source address for " 2939 "connection to %s\n", AF_INET6, v6dst); 2940 error = EADDRNOTAVAIL; 2941 goto bad_addr; 2942 } 2943 *v6src = src_ipif->ipif_v6lcl_addr; 2944 } 2945 } 2946 2947 /* 2948 * We do ire_route_lookup_v6() here (and not an interface lookup) 2949 * as we assert that v6src should only come from an 2950 * UP interface for hard binding. 2951 */ 2952 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2953 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2954 2955 /* src_ire must be a local|loopback */ 2956 if (!IRE_IS_LOCAL(src_ire)) { 2957 if (ip_debug > 2) { 2958 /* ip1dbg */ 2959 pr_addr_dbg("ip_bind_connected_v6: bad " 2960 "connected src %s\n", AF_INET6, v6src); 2961 } 2962 error = EADDRNOTAVAIL; 2963 goto bad_addr; 2964 } 2965 2966 /* 2967 * If the source address is a loopback address, the 2968 * destination had best be local or multicast. 2969 * The transports that can't handle multicast will reject 2970 * those addresses. 2971 */ 2972 if (src_ire->ire_type == IRE_LOOPBACK && 2973 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2974 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2975 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2976 error = -1; 2977 goto bad_addr; 2978 } 2979 /* 2980 * Allow setting new policies. For example, disconnects come 2981 * down as ipa_t bind. As we would have set conn_policy_cached 2982 * to B_TRUE before, we should set it to B_FALSE, so that policy 2983 * can change after the disconnect. 2984 */ 2985 connp->conn_policy_cached = B_FALSE; 2986 2987 /* 2988 * The addresses have been verified. Initialize the conn 2989 * before calling the policy as they expect the conns 2990 * initialized. 2991 */ 2992 connp->conn_srcv6 = *v6src; 2993 connp->conn_remv6 = *v6dst; 2994 connp->conn_lport = lport; 2995 connp->conn_fport = fport; 2996 2997 ASSERT(!(ipsec_policy_set && ire_requested)); 2998 if (ire_requested) { 2999 iulp_t *ulp_info = NULL; 3000 3001 /* 3002 * Note that sire will not be NULL if this is an off-link 3003 * connection and there is not cache for that dest yet. 3004 * 3005 * XXX Because of an existing bug, if there are multiple 3006 * default routes, the IRE returned now may not be the actual 3007 * default route used (default routes are chosen in a 3008 * round robin fashion). So if the metrics for different 3009 * default routes are different, we may return the wrong 3010 * metrics. This will not be a problem if the existing 3011 * bug is fixed. 3012 */ 3013 if (sire != NULL) 3014 ulp_info = &(sire->ire_uinfo); 3015 3016 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 3017 error = -1; 3018 goto bad_addr; 3019 } 3020 } else if (ipsec_policy_set) { 3021 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 3022 error = -1; 3023 goto bad_addr; 3024 } 3025 } 3026 3027 /* 3028 * Cache IPsec policy in this conn. If we have per-socket policy, 3029 * we'll cache that. If we don't, we'll inherit global policy. 3030 * 3031 * We can't insert until the conn reflects the policy. Note that 3032 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 3033 * connections where we don't have a policy. This is to prevent 3034 * global policy lookups in the inbound path. 3035 * 3036 * If we insert before we set conn_policy_cached, 3037 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 3038 * because global policy cound be non-empty. We normally call 3039 * ipsec_check_policy() for conn_policy_cached connections only if 3040 * conn_in_enforce_policy is set. But in this case, 3041 * conn_policy_cached can get set anytime since we made the 3042 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 3043 * is called, which will make the above assumption false. Thus, we 3044 * need to insert after we set conn_policy_cached. 3045 */ 3046 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 3047 goto bad_addr; 3048 3049 /* If not fanout_insert this was just an address verification */ 3050 if (fanout_insert) { 3051 /* 3052 * The addresses have been verified. Time to insert in 3053 * the correct fanout list. 3054 */ 3055 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 3056 connp->conn_ports, 3057 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 3058 } 3059 if (error == 0) { 3060 connp->conn_fully_bound = B_TRUE; 3061 /* 3062 * Our initial checks for MDT have passed; the IRE is not 3063 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 3064 * be supporting MDT. Pass the IRE, IPC and ILL into 3065 * ip_mdinfo_return(), which performs further checks 3066 * against them and upon success, returns the MDT info 3067 * mblk which we will attach to the bind acknowledgment. 3068 */ 3069 if (md_dst_ire != NULL) { 3070 mblk_t *mdinfo_mp; 3071 3072 ASSERT(md_ill != NULL); 3073 ASSERT(md_ill->ill_mdt_capab != NULL); 3074 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 3075 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 3076 linkb(mp, mdinfo_mp); 3077 } 3078 } 3079 bad_addr: 3080 if (ipsec_policy_set) { 3081 ASSERT(policy_mp != NULL); 3082 freeb(policy_mp); 3083 /* 3084 * As of now assume that nothing else accompanies 3085 * IPSEC_POLICY_SET. 3086 */ 3087 mp->b_cont = NULL; 3088 } 3089 refrele_and_quit: 3090 if (src_ire != NULL) 3091 IRE_REFRELE(src_ire); 3092 if (dst_ire != NULL) 3093 IRE_REFRELE(dst_ire); 3094 if (sire != NULL) 3095 IRE_REFRELE(sire); 3096 if (src_ipif != NULL) 3097 ipif_refrele(src_ipif); 3098 if (md_dst_ire != NULL) 3099 IRE_REFRELE(md_dst_ire); 3100 if (ill_held && dst_ill != NULL) 3101 ill_refrele(dst_ill); 3102 return (error); 3103 } 3104 3105 /* 3106 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3107 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3108 */ 3109 static boolean_t 3110 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3111 iulp_t *ulp_info) 3112 { 3113 mblk_t *mp1; 3114 ire_t *ret_ire; 3115 3116 mp1 = mp->b_cont; 3117 ASSERT(mp1 != NULL); 3118 3119 if (ire != NULL) { 3120 /* 3121 * mp1 initialized above to IRE_DB_REQ_TYPE 3122 * appended mblk. Its <upper protocol>'s 3123 * job to make sure there is room. 3124 */ 3125 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3126 return (B_FALSE); 3127 3128 mp1->b_datap->db_type = IRE_DB_TYPE; 3129 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3130 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3131 ret_ire = (ire_t *)mp1->b_rptr; 3132 if (IN6_IS_ADDR_MULTICAST(dst) || 3133 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3134 ret_ire->ire_type = IRE_BROADCAST; 3135 ret_ire->ire_addr_v6 = *dst; 3136 } 3137 if (ulp_info != NULL) { 3138 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3139 sizeof (iulp_t)); 3140 } 3141 ret_ire->ire_mp = mp1; 3142 } else { 3143 /* 3144 * No IRE was found. Remove IRE mblk. 3145 */ 3146 mp->b_cont = mp1->b_cont; 3147 freeb(mp1); 3148 } 3149 return (B_TRUE); 3150 } 3151 3152 /* 3153 * Add an ip6i_t header to the front of the mblk. 3154 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3155 * Returns NULL if allocation fails (and frees original message). 3156 * Used in outgoing path when going through ip_newroute_*v6(). 3157 * Used in incoming path to pass ifindex to transports. 3158 */ 3159 mblk_t * 3160 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3161 { 3162 mblk_t *mp1; 3163 ip6i_t *ip6i; 3164 ip6_t *ip6h; 3165 3166 ip6h = (ip6_t *)mp->b_rptr; 3167 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3168 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3169 mp->b_datap->db_ref > 1) { 3170 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3171 if (mp1 == NULL) { 3172 freemsg(mp); 3173 return (NULL); 3174 } 3175 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3176 mp1->b_cont = mp; 3177 mp = mp1; 3178 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3179 } 3180 mp->b_rptr = (uchar_t *)ip6i; 3181 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3182 ip6i->ip6i_nxt = IPPROTO_RAW; 3183 if (ill != NULL) { 3184 ip6i->ip6i_flags = IP6I_IFINDEX; 3185 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3186 } else { 3187 ip6i->ip6i_flags = 0; 3188 } 3189 ip6i->ip6i_nexthop = *dst; 3190 return (mp); 3191 } 3192 3193 /* 3194 * Handle protocols with which IP is less intimate. There 3195 * can be more than one stream bound to a particular 3196 * protocol. When this is the case, normally each one gets a copy 3197 * of any incoming packets. 3198 * However, if the packet was tunneled and not multicast we only send to it 3199 * the first match. 3200 * 3201 * Zones notes: 3202 * Packets will be distributed to streams in all zones. This is really only 3203 * useful for ICMPv6 as only applications in the global zone can create raw 3204 * sockets for other protocols. 3205 */ 3206 static void 3207 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3208 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3209 boolean_t mctl_present, zoneid_t zoneid) 3210 { 3211 queue_t *rq; 3212 mblk_t *mp1, *first_mp1; 3213 in6_addr_t dst = ip6h->ip6_dst; 3214 in6_addr_t src = ip6h->ip6_src; 3215 boolean_t one_only; 3216 mblk_t *first_mp = mp; 3217 boolean_t secure, shared_addr; 3218 conn_t *connp, *first_connp, *next_connp; 3219 connf_t *connfp; 3220 3221 if (mctl_present) { 3222 mp = first_mp->b_cont; 3223 secure = ipsec_in_is_secure(first_mp); 3224 ASSERT(mp != NULL); 3225 } else { 3226 secure = B_FALSE; 3227 } 3228 3229 /* 3230 * If the packet was tunneled and not multicast we only send to it 3231 * the first match. 3232 */ 3233 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3234 !IN6_IS_ADDR_MULTICAST(&dst)); 3235 3236 shared_addr = (zoneid == ALL_ZONES); 3237 if (shared_addr) { 3238 /* 3239 * We don't allow multilevel ports for raw IP, so no need to 3240 * check for that here. 3241 */ 3242 zoneid = tsol_packet_to_zoneid(mp); 3243 } 3244 3245 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3246 mutex_enter(&connfp->connf_lock); 3247 connp = connfp->connf_head; 3248 for (connp = connfp->connf_head; connp != NULL; 3249 connp = connp->conn_next) { 3250 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3251 zoneid) && 3252 (!is_system_labeled() || 3253 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3254 connp))) 3255 break; 3256 } 3257 3258 if (connp == NULL || connp->conn_upq == NULL) { 3259 /* 3260 * No one bound to this port. Is 3261 * there a client that wants all 3262 * unclaimed datagrams? 3263 */ 3264 mutex_exit(&connfp->connf_lock); 3265 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3266 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3267 nexthdr_offset, mctl_present, zoneid)) { 3268 BUMP_MIB(ill->ill_ip6_mib, ipv6InUnknownProtos); 3269 } 3270 3271 return; 3272 } 3273 3274 CONN_INC_REF(connp); 3275 first_connp = connp; 3276 3277 /* 3278 * XXX: Fix the multiple protocol listeners case. We should not 3279 * be walking the conn->next list here. 3280 */ 3281 if (one_only) { 3282 /* 3283 * Only send message to one tunnel driver by immediately 3284 * terminating the loop. 3285 */ 3286 connp = NULL; 3287 } else { 3288 connp = connp->conn_next; 3289 3290 } 3291 for (;;) { 3292 while (connp != NULL) { 3293 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3294 flags, zoneid) && 3295 (!is_system_labeled() || 3296 tsol_receive_local(mp, &dst, IPV6_VERSION, 3297 shared_addr, connp))) 3298 break; 3299 connp = connp->conn_next; 3300 } 3301 3302 /* 3303 * Just copy the data part alone. The mctl part is 3304 * needed just for verifying policy and it is never 3305 * sent up. 3306 */ 3307 if (connp == NULL || connp->conn_upq == NULL || 3308 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3309 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3310 /* 3311 * No more intested clients or memory 3312 * allocation failed 3313 */ 3314 connp = first_connp; 3315 break; 3316 } 3317 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3318 CONN_INC_REF(connp); 3319 mutex_exit(&connfp->connf_lock); 3320 rq = connp->conn_rq; 3321 /* 3322 * For link-local always add ifindex so that transport can set 3323 * sin6_scope_id. Avoid it for ICMP error fanout. 3324 */ 3325 if ((connp->conn_ipv6_recvpktinfo || 3326 IN6_IS_ADDR_LINKLOCAL(&src)) && 3327 (flags & IP_FF_IP6INFO)) { 3328 /* Add header */ 3329 mp1 = ip_add_info_v6(mp1, inill, &dst); 3330 } 3331 if (mp1 == NULL) { 3332 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3333 } else if (!canputnext(rq)) { 3334 if (flags & IP_FF_RAWIP) { 3335 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3336 } else { 3337 BUMP_MIB(ill->ill_icmp6_mib, 3338 ipv6IfIcmpInOverflows); 3339 } 3340 3341 freemsg(mp1); 3342 } else { 3343 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3344 first_mp1 = ipsec_check_inbound_policy 3345 (first_mp1, connp, NULL, ip6h, 3346 mctl_present); 3347 } 3348 if (first_mp1 != NULL) { 3349 if (mctl_present) 3350 freeb(first_mp1); 3351 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3352 putnext(rq, mp1); 3353 } 3354 } 3355 mutex_enter(&connfp->connf_lock); 3356 /* Follow the next pointer before releasing the conn. */ 3357 next_connp = connp->conn_next; 3358 CONN_DEC_REF(connp); 3359 connp = next_connp; 3360 } 3361 3362 /* Last one. Send it upstream. */ 3363 mutex_exit(&connfp->connf_lock); 3364 3365 /* Initiate IPPF processing */ 3366 if (IP6_IN_IPP(flags)) { 3367 uint_t ifindex; 3368 3369 mutex_enter(&ill->ill_lock); 3370 ifindex = ill->ill_phyint->phyint_ifindex; 3371 mutex_exit(&ill->ill_lock); 3372 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3373 if (mp == NULL) { 3374 CONN_DEC_REF(connp); 3375 if (mctl_present) 3376 freeb(first_mp); 3377 return; 3378 } 3379 } 3380 3381 /* 3382 * For link-local always add ifindex so that transport can set 3383 * sin6_scope_id. Avoid it for ICMP error fanout. 3384 */ 3385 if ((connp->conn_ipv6_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3386 (flags & IP_FF_IP6INFO)) { 3387 /* Add header */ 3388 mp = ip_add_info_v6(mp, inill, &dst); 3389 if (mp == NULL) { 3390 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3391 CONN_DEC_REF(connp); 3392 if (mctl_present) 3393 freeb(first_mp); 3394 return; 3395 } else if (mctl_present) { 3396 first_mp->b_cont = mp; 3397 } else { 3398 first_mp = mp; 3399 } 3400 } 3401 3402 rq = connp->conn_rq; 3403 if (!canputnext(rq)) { 3404 if (flags & IP_FF_RAWIP) { 3405 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3406 } else { 3407 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3408 } 3409 3410 freemsg(first_mp); 3411 } else { 3412 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3413 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3414 NULL, ip6h, mctl_present); 3415 if (first_mp == NULL) { 3416 CONN_DEC_REF(connp); 3417 return; 3418 } 3419 } 3420 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3421 putnext(rq, mp); 3422 if (mctl_present) 3423 freeb(first_mp); 3424 } 3425 CONN_DEC_REF(connp); 3426 } 3427 3428 /* 3429 * Send an ICMP error after patching up the packet appropriately. Returns 3430 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3431 */ 3432 int 3433 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3434 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3435 boolean_t mctl_present, zoneid_t zoneid) 3436 { 3437 ip6_t *ip6h; 3438 mblk_t *first_mp; 3439 boolean_t secure; 3440 unsigned char db_type; 3441 3442 first_mp = mp; 3443 if (mctl_present) { 3444 mp = mp->b_cont; 3445 secure = ipsec_in_is_secure(first_mp); 3446 ASSERT(mp != NULL); 3447 } else { 3448 /* 3449 * If this is an ICMP error being reported - which goes 3450 * up as M_CTLs, we need to convert them to M_DATA till 3451 * we finish checking with global policy because 3452 * ipsec_check_global_policy() assumes M_DATA as clear 3453 * and M_CTL as secure. 3454 */ 3455 db_type = mp->b_datap->db_type; 3456 mp->b_datap->db_type = M_DATA; 3457 secure = B_FALSE; 3458 } 3459 /* 3460 * We are generating an icmp error for some inbound packet. 3461 * Called from all ip_fanout_(udp, tcp, proto) functions. 3462 * Before we generate an error, check with global policy 3463 * to see whether this is allowed to enter the system. As 3464 * there is no "conn", we are checking with global policy. 3465 */ 3466 ip6h = (ip6_t *)mp->b_rptr; 3467 if (secure || ipsec_inbound_v6_policy_present) { 3468 first_mp = ipsec_check_global_policy(first_mp, NULL, 3469 NULL, ip6h, mctl_present); 3470 if (first_mp == NULL) 3471 return (0); 3472 } 3473 3474 if (!mctl_present) 3475 mp->b_datap->db_type = db_type; 3476 3477 if (flags & IP_FF_SEND_ICMP) { 3478 if (flags & IP_FF_HDR_COMPLETE) { 3479 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3480 freemsg(first_mp); 3481 return (1); 3482 } 3483 } 3484 switch (icmp_type) { 3485 case ICMP6_DST_UNREACH: 3486 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3487 B_FALSE, B_FALSE); 3488 break; 3489 case ICMP6_PARAM_PROB: 3490 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3491 nexthdr_offset, B_FALSE, B_FALSE); 3492 break; 3493 default: 3494 #ifdef DEBUG 3495 panic("ip_fanout_send_icmp_v6: wrong type"); 3496 /*NOTREACHED*/ 3497 #else 3498 freemsg(first_mp); 3499 break; 3500 #endif 3501 } 3502 } else { 3503 freemsg(first_mp); 3504 return (0); 3505 } 3506 3507 return (1); 3508 } 3509 3510 3511 /* 3512 * Fanout for TCP packets 3513 * The caller puts <fport, lport> in the ports parameter. 3514 */ 3515 static void 3516 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3517 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3518 { 3519 mblk_t *first_mp; 3520 boolean_t secure; 3521 conn_t *connp; 3522 tcph_t *tcph; 3523 boolean_t syn_present = B_FALSE; 3524 3525 first_mp = mp; 3526 if (mctl_present) { 3527 mp = first_mp->b_cont; 3528 secure = ipsec_in_is_secure(first_mp); 3529 ASSERT(mp != NULL); 3530 } else { 3531 secure = B_FALSE; 3532 } 3533 3534 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3535 3536 if (connp == NULL || 3537 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3538 /* 3539 * No hard-bound match. Send Reset. 3540 */ 3541 dblk_t *dp = mp->b_datap; 3542 uint32_t ill_index; 3543 3544 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3545 3546 /* Initiate IPPf processing, if needed. */ 3547 if (IPP_ENABLED(IPP_LOCAL_IN) && (flags & IP6_NO_IPPOLICY)) { 3548 ill_index = ill->ill_phyint->phyint_ifindex; 3549 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3550 if (first_mp == NULL) { 3551 if (connp != NULL) 3552 CONN_DEC_REF(connp); 3553 return; 3554 } 3555 } 3556 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3557 tcp_xmit_listeners_reset(first_mp, hdr_len); 3558 if (connp != NULL) 3559 CONN_DEC_REF(connp); 3560 return; 3561 } 3562 3563 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3564 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3565 if (connp->conn_flags & IPCL_TCP) { 3566 squeue_t *sqp; 3567 3568 /* 3569 * For fused tcp loopback, assign the eager's 3570 * squeue to be that of the active connect's. 3571 */ 3572 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3573 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3574 !IP6_IN_IPP(flags)) { 3575 ASSERT(Q_TO_CONN(q) != NULL); 3576 sqp = Q_TO_CONN(q)->conn_sqp; 3577 } else { 3578 sqp = IP_SQUEUE_GET(lbolt); 3579 } 3580 3581 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3582 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3583 3584 /* 3585 * db_cksumstuff is unused in the incoming 3586 * path; Thus store the ifindex here. It will 3587 * be cleared in tcp_conn_create_v6(). 3588 */ 3589 DB_CKSUMSTUFF(mp) = 3590 (intptr_t)ill->ill_phyint->phyint_ifindex; 3591 syn_present = B_TRUE; 3592 } 3593 } 3594 3595 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3596 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3597 if ((flags & TH_RST) || (flags & TH_URG)) { 3598 CONN_DEC_REF(connp); 3599 freemsg(first_mp); 3600 return; 3601 } 3602 if (flags & TH_ACK) { 3603 tcp_xmit_listeners_reset(first_mp, hdr_len); 3604 CONN_DEC_REF(connp); 3605 return; 3606 } 3607 3608 CONN_DEC_REF(connp); 3609 freemsg(first_mp); 3610 return; 3611 } 3612 3613 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3614 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3615 NULL, ip6h, mctl_present); 3616 if (first_mp == NULL) { 3617 CONN_DEC_REF(connp); 3618 return; 3619 } 3620 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3621 ASSERT(syn_present); 3622 if (mctl_present) { 3623 ASSERT(first_mp != mp); 3624 first_mp->b_datap->db_struioflag |= 3625 STRUIO_POLICY; 3626 } else { 3627 ASSERT(first_mp == mp); 3628 mp->b_datap->db_struioflag &= 3629 ~STRUIO_EAGER; 3630 mp->b_datap->db_struioflag |= 3631 STRUIO_POLICY; 3632 } 3633 } else { 3634 /* 3635 * Discard first_mp early since we're dealing with a 3636 * fully-connected conn_t and tcp doesn't do policy in 3637 * this case. Also, if someone is bound to IPPROTO_TCP 3638 * over raw IP, they don't expect to see a M_CTL. 3639 */ 3640 if (mctl_present) { 3641 freeb(first_mp); 3642 mctl_present = B_FALSE; 3643 } 3644 first_mp = mp; 3645 } 3646 } 3647 3648 /* Initiate IPPF processing */ 3649 if (IP6_IN_IPP(flags)) { 3650 uint_t ifindex; 3651 3652 mutex_enter(&ill->ill_lock); 3653 ifindex = ill->ill_phyint->phyint_ifindex; 3654 mutex_exit(&ill->ill_lock); 3655 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3656 if (mp == NULL) { 3657 CONN_DEC_REF(connp); 3658 if (mctl_present) { 3659 freeb(first_mp); 3660 } 3661 return; 3662 } else if (mctl_present) { 3663 /* 3664 * ip_add_info_v6 might return a new mp. 3665 */ 3666 ASSERT(first_mp != mp); 3667 first_mp->b_cont = mp; 3668 } else { 3669 first_mp = mp; 3670 } 3671 } 3672 3673 /* 3674 * For link-local always add ifindex so that TCP can bind to that 3675 * interface. Avoid it for ICMP error fanout. 3676 */ 3677 if (!syn_present && ((connp->conn_ipv6_recvpktinfo || 3678 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3679 (flags & IP_FF_IP6INFO))) { 3680 /* Add header */ 3681 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3682 if (mp == NULL) { 3683 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3684 CONN_DEC_REF(connp); 3685 if (mctl_present) 3686 freeb(first_mp); 3687 return; 3688 } else if (mctl_present) { 3689 ASSERT(first_mp != mp); 3690 first_mp->b_cont = mp; 3691 } else { 3692 first_mp = mp; 3693 } 3694 } 3695 3696 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3697 if (IPCL_IS_TCP(connp)) { 3698 (*ip_input_proc)(connp->conn_sqp, first_mp, 3699 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3700 } else { 3701 putnext(connp->conn_rq, first_mp); 3702 CONN_DEC_REF(connp); 3703 } 3704 } 3705 3706 /* 3707 * Fanout for UDP packets. 3708 * The caller puts <fport, lport> in the ports parameter. 3709 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3710 * 3711 * If SO_REUSEADDR is set all multicast and broadcast packets 3712 * will be delivered to all streams bound to the same port. 3713 * 3714 * Zones notes: 3715 * Multicast packets will be distributed to streams in all zones. 3716 */ 3717 static void 3718 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3719 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3720 zoneid_t zoneid) 3721 { 3722 uint32_t dstport, srcport; 3723 in6_addr_t dst; 3724 mblk_t *first_mp; 3725 boolean_t secure; 3726 conn_t *connp; 3727 connf_t *connfp; 3728 conn_t *first_conn; 3729 conn_t *next_conn; 3730 mblk_t *mp1, *first_mp1; 3731 in6_addr_t src; 3732 boolean_t shared_addr; 3733 3734 first_mp = mp; 3735 if (mctl_present) { 3736 mp = first_mp->b_cont; 3737 secure = ipsec_in_is_secure(first_mp); 3738 ASSERT(mp != NULL); 3739 } else { 3740 secure = B_FALSE; 3741 } 3742 3743 /* Extract ports in net byte order */ 3744 dstport = htons(ntohl(ports) & 0xFFFF); 3745 srcport = htons(ntohl(ports) >> 16); 3746 dst = ip6h->ip6_dst; 3747 src = ip6h->ip6_src; 3748 3749 shared_addr = (zoneid == ALL_ZONES); 3750 if (shared_addr) { 3751 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3752 /* 3753 * If no shared MLP is found, tsol_mlp_findzone returns 3754 * ALL_ZONES. In that case, we assume it's SLP, and 3755 * search for the zone based on the packet label. 3756 * That will also return ALL_ZONES on failure, but 3757 * we never allow conn_zoneid to be set to ALL_ZONES. 3758 */ 3759 if (zoneid == ALL_ZONES) 3760 zoneid = tsol_packet_to_zoneid(mp); 3761 } 3762 3763 /* Attempt to find a client stream based on destination port. */ 3764 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3765 mutex_enter(&connfp->connf_lock); 3766 connp = connfp->connf_head; 3767 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3768 /* 3769 * Not multicast. Send to the one (first) client we find. 3770 */ 3771 while (connp != NULL) { 3772 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3773 src) && connp->conn_zoneid == zoneid && 3774 conn_wantpacket_v6(connp, ill, ip6h, 3775 flags, zoneid)) { 3776 break; 3777 } 3778 connp = connp->conn_next; 3779 } 3780 if (connp == NULL || connp->conn_upq == NULL) 3781 goto notfound; 3782 3783 if (is_system_labeled() && 3784 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3785 connp)) 3786 goto notfound; 3787 3788 /* Found a client */ 3789 CONN_INC_REF(connp); 3790 mutex_exit(&connfp->connf_lock); 3791 3792 if (CONN_UDP_FLOWCTLD(connp)) { 3793 freemsg(first_mp); 3794 CONN_DEC_REF(connp); 3795 return; 3796 } 3797 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3798 first_mp = ipsec_check_inbound_policy(first_mp, 3799 connp, NULL, ip6h, mctl_present); 3800 if (first_mp == NULL) { 3801 CONN_DEC_REF(connp); 3802 return; 3803 } 3804 } 3805 /* Initiate IPPF processing */ 3806 if (IP6_IN_IPP(flags)) { 3807 uint_t ifindex; 3808 3809 mutex_enter(&ill->ill_lock); 3810 ifindex = ill->ill_phyint->phyint_ifindex; 3811 mutex_exit(&ill->ill_lock); 3812 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3813 if (mp == NULL) { 3814 CONN_DEC_REF(connp); 3815 if (mctl_present) 3816 freeb(first_mp); 3817 return; 3818 } 3819 } 3820 /* 3821 * For link-local always add ifindex so that 3822 * transport can set sin6_scope_id. Avoid it for 3823 * ICMP error fanout. 3824 */ 3825 if ((connp->conn_ipv6_recvpktinfo || 3826 IN6_IS_ADDR_LINKLOCAL(&src)) && 3827 (flags & IP_FF_IP6INFO)) { 3828 /* Add header */ 3829 mp = ip_add_info_v6(mp, inill, &dst); 3830 if (mp == NULL) { 3831 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3832 CONN_DEC_REF(connp); 3833 if (mctl_present) 3834 freeb(first_mp); 3835 return; 3836 } else if (mctl_present) { 3837 first_mp->b_cont = mp; 3838 } else { 3839 first_mp = mp; 3840 } 3841 } 3842 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3843 3844 /* Send it upstream */ 3845 CONN_UDP_RECV(connp, mp); 3846 3847 IP6_STAT(ip6_udp_fannorm); 3848 CONN_DEC_REF(connp); 3849 if (mctl_present) 3850 freeb(first_mp); 3851 return; 3852 } 3853 3854 while (connp != NULL) { 3855 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3856 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3857 (!is_system_labeled() || 3858 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3859 connp))) 3860 break; 3861 connp = connp->conn_next; 3862 } 3863 3864 if (connp == NULL || connp->conn_upq == NULL) 3865 goto notfound; 3866 3867 first_conn = connp; 3868 3869 CONN_INC_REF(connp); 3870 connp = connp->conn_next; 3871 for (;;) { 3872 while (connp != NULL) { 3873 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3874 src) && conn_wantpacket_v6(connp, ill, ip6h, 3875 flags, zoneid) && 3876 (!is_system_labeled() || 3877 tsol_receive_local(mp, &dst, IPV6_VERSION, 3878 shared_addr, connp))) 3879 break; 3880 connp = connp->conn_next; 3881 } 3882 /* 3883 * Just copy the data part alone. The mctl part is 3884 * needed just for verifying policy and it is never 3885 * sent up. 3886 */ 3887 if (connp == NULL || 3888 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3889 ((first_mp1 = ip_copymsg(first_mp)) 3890 == NULL))) { 3891 /* 3892 * No more interested clients or memory 3893 * allocation failed 3894 */ 3895 connp = first_conn; 3896 break; 3897 } 3898 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3899 CONN_INC_REF(connp); 3900 mutex_exit(&connfp->connf_lock); 3901 /* 3902 * For link-local always add ifindex so that transport 3903 * can set sin6_scope_id. Avoid it for ICMP error 3904 * fanout. 3905 */ 3906 if ((connp->conn_ipv6_recvpktinfo || 3907 IN6_IS_ADDR_LINKLOCAL(&src)) && 3908 (flags & IP_FF_IP6INFO)) { 3909 /* Add header */ 3910 mp1 = ip_add_info_v6(mp1, inill, &dst); 3911 } 3912 /* mp1 could have changed */ 3913 if (mctl_present) 3914 first_mp1->b_cont = mp1; 3915 else 3916 first_mp1 = mp1; 3917 if (mp1 == NULL) { 3918 if (mctl_present) 3919 freeb(first_mp1); 3920 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3921 goto next_one; 3922 } 3923 if (CONN_UDP_FLOWCTLD(connp)) { 3924 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3925 freemsg(first_mp1); 3926 goto next_one; 3927 } 3928 3929 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3930 secure) { 3931 first_mp1 = ipsec_check_inbound_policy 3932 (first_mp1, connp, NULL, ip6h, 3933 mctl_present); 3934 } 3935 if (first_mp1 != NULL) { 3936 if (mctl_present) 3937 freeb(first_mp1); 3938 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3939 3940 /* Send it upstream */ 3941 CONN_UDP_RECV(connp, mp1); 3942 } 3943 next_one: 3944 mutex_enter(&connfp->connf_lock); 3945 /* Follow the next pointer before releasing the conn. */ 3946 next_conn = connp->conn_next; 3947 IP6_STAT(ip6_udp_fanmb); 3948 CONN_DEC_REF(connp); 3949 connp = next_conn; 3950 } 3951 3952 /* Last one. Send it upstream. */ 3953 mutex_exit(&connfp->connf_lock); 3954 3955 /* Initiate IPPF processing */ 3956 if (IP6_IN_IPP(flags)) { 3957 uint_t ifindex; 3958 3959 mutex_enter(&ill->ill_lock); 3960 ifindex = ill->ill_phyint->phyint_ifindex; 3961 mutex_exit(&ill->ill_lock); 3962 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3963 if (mp == NULL) { 3964 CONN_DEC_REF(connp); 3965 if (mctl_present) { 3966 freeb(first_mp); 3967 } 3968 return; 3969 } 3970 } 3971 3972 /* 3973 * For link-local always add ifindex so that transport can set 3974 * sin6_scope_id. Avoid it for ICMP error fanout. 3975 */ 3976 if ((connp->conn_ipv6_recvpktinfo || 3977 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IP6INFO)) { 3978 /* Add header */ 3979 mp = ip_add_info_v6(mp, inill, &dst); 3980 if (mp == NULL) { 3981 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3982 CONN_DEC_REF(connp); 3983 if (mctl_present) 3984 freeb(first_mp); 3985 return; 3986 } else if (mctl_present) { 3987 first_mp->b_cont = mp; 3988 } else { 3989 first_mp = mp; 3990 } 3991 } 3992 if (CONN_UDP_FLOWCTLD(connp)) { 3993 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3994 freemsg(mp); 3995 } else { 3996 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3997 first_mp = ipsec_check_inbound_policy(first_mp, 3998 connp, NULL, ip6h, mctl_present); 3999 if (first_mp == NULL) { 4000 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 4001 CONN_DEC_REF(connp); 4002 return; 4003 } 4004 } 4005 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 4006 4007 /* Send it upstream */ 4008 CONN_UDP_RECV(connp, mp); 4009 } 4010 IP6_STAT(ip6_udp_fanmb); 4011 CONN_DEC_REF(connp); 4012 if (mctl_present) 4013 freeb(first_mp); 4014 return; 4015 4016 notfound: 4017 mutex_exit(&connfp->connf_lock); 4018 /* 4019 * No one bound to this port. Is 4020 * there a client that wants all 4021 * unclaimed datagrams? 4022 */ 4023 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4024 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4025 0, flags | IP_FF_RAWIP | IP_FF_IP6INFO, mctl_present, 4026 zoneid); 4027 } else { 4028 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4029 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4030 mctl_present, zoneid)) { 4031 BUMP_MIB(&ip_mib, udpNoPorts); 4032 } 4033 } 4034 } 4035 4036 /* 4037 * int ip_find_hdr_v6() 4038 * 4039 * This routine is used by the upper layer protocols and the IP tunnel 4040 * module to: 4041 * - Set extension header pointers to appropriate locations 4042 * - Determine IPv6 header length and return it 4043 * - Return a pointer to the last nexthdr value 4044 * 4045 * The caller must initialize ipp_fields. 4046 * 4047 * NOTE: If multiple extension headers of the same type are present, 4048 * ip_find_hdr_v6() will set the respective extension header pointers 4049 * to the first one that it encounters in the IPv6 header. It also 4050 * skips fragment headers. This routine deals with malformed packets 4051 * of various sorts in which case the returned length is up to the 4052 * malformed part. 4053 */ 4054 int 4055 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4056 { 4057 uint_t length, ehdrlen; 4058 uint8_t nexthdr; 4059 uint8_t *whereptr, *endptr; 4060 ip6_dest_t *tmpdstopts; 4061 ip6_rthdr_t *tmprthdr; 4062 ip6_hbh_t *tmphopopts; 4063 ip6_frag_t *tmpfraghdr; 4064 4065 length = IPV6_HDR_LEN; 4066 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4067 endptr = mp->b_wptr; 4068 4069 nexthdr = ip6h->ip6_nxt; 4070 while (whereptr < endptr) { 4071 /* Is there enough left for len + nexthdr? */ 4072 if (whereptr + MIN_EHDR_LEN > endptr) 4073 goto done; 4074 4075 switch (nexthdr) { 4076 case IPPROTO_HOPOPTS: 4077 tmphopopts = (ip6_hbh_t *)whereptr; 4078 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4079 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4080 goto done; 4081 nexthdr = tmphopopts->ip6h_nxt; 4082 /* return only 1st hbh */ 4083 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4084 ipp->ipp_fields |= IPPF_HOPOPTS; 4085 ipp->ipp_hopopts = tmphopopts; 4086 ipp->ipp_hopoptslen = ehdrlen; 4087 } 4088 break; 4089 case IPPROTO_DSTOPTS: 4090 tmpdstopts = (ip6_dest_t *)whereptr; 4091 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4092 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4093 goto done; 4094 nexthdr = tmpdstopts->ip6d_nxt; 4095 /* 4096 * ipp_dstopts is set to the destination header after a 4097 * routing header. 4098 * Assume it is a post-rthdr destination header 4099 * and adjust when we find an rthdr. 4100 */ 4101 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4102 ipp->ipp_fields |= IPPF_DSTOPTS; 4103 ipp->ipp_dstopts = tmpdstopts; 4104 ipp->ipp_dstoptslen = ehdrlen; 4105 } 4106 break; 4107 case IPPROTO_ROUTING: 4108 tmprthdr = (ip6_rthdr_t *)whereptr; 4109 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4110 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4111 goto done; 4112 nexthdr = tmprthdr->ip6r_nxt; 4113 /* return only 1st rthdr */ 4114 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4115 ipp->ipp_fields |= IPPF_RTHDR; 4116 ipp->ipp_rthdr = tmprthdr; 4117 ipp->ipp_rthdrlen = ehdrlen; 4118 } 4119 /* 4120 * Make any destination header we've seen be a 4121 * pre-rthdr destination header. 4122 */ 4123 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4124 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4125 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4126 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4127 ipp->ipp_dstopts = NULL; 4128 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4129 ipp->ipp_dstoptslen = 0; 4130 } 4131 break; 4132 case IPPROTO_FRAGMENT: 4133 /* 4134 * Fragment headers are skipped. Currently, only 4135 * IP cares for their existence. If anyone other 4136 * than IP ever has the need to know about the 4137 * location of fragment headers, support can be 4138 * added to the ip6_pkt_t at that time. 4139 */ 4140 tmpfraghdr = (ip6_frag_t *)whereptr; 4141 ehdrlen = sizeof (ip6_frag_t); 4142 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4143 goto done; 4144 nexthdr = tmpfraghdr->ip6f_nxt; 4145 break; 4146 case IPPROTO_NONE: 4147 default: 4148 goto done; 4149 } 4150 length += ehdrlen; 4151 whereptr += ehdrlen; 4152 } 4153 done: 4154 if (nexthdrp != NULL) 4155 *nexthdrp = nexthdr; 4156 return (length); 4157 } 4158 4159 int 4160 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4161 { 4162 ire_t *ire; 4163 4164 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4165 ire = ire_lookup_local_v6(zoneid); 4166 if (ire == NULL) { 4167 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4168 return (1); 4169 } 4170 ip6h->ip6_src = ire->ire_addr_v6; 4171 ire_refrele(ire); 4172 } 4173 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4174 ip6h->ip6_hops = ipv6_def_hops; 4175 return (0); 4176 } 4177 4178 /* 4179 * Try to determine where and what are the IPv6 header length and 4180 * pointer to nexthdr value for the upper layer protocol (or an 4181 * unknown next hdr). 4182 * 4183 * Parameters returns a pointer to the nexthdr value; 4184 * Must handle malformed packets of various sorts. 4185 * Function returns failure for malformed cases. 4186 */ 4187 boolean_t 4188 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4189 uint8_t **nexthdrpp) 4190 { 4191 uint16_t length; 4192 uint_t ehdrlen; 4193 uint8_t *nexthdrp; 4194 uint8_t *whereptr; 4195 uint8_t *endptr; 4196 ip6_dest_t *desthdr; 4197 ip6_rthdr_t *rthdr; 4198 ip6_frag_t *fraghdr; 4199 4200 length = IPV6_HDR_LEN; 4201 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4202 endptr = mp->b_wptr; 4203 4204 nexthdrp = &ip6h->ip6_nxt; 4205 while (whereptr < endptr) { 4206 /* Is there enough left for len + nexthdr? */ 4207 if (whereptr + MIN_EHDR_LEN > endptr) 4208 break; 4209 4210 switch (*nexthdrp) { 4211 case IPPROTO_HOPOPTS: 4212 case IPPROTO_DSTOPTS: 4213 /* Assumes the headers are identical for hbh and dst */ 4214 desthdr = (ip6_dest_t *)whereptr; 4215 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4216 if ((uchar_t *)desthdr + ehdrlen > endptr) 4217 return (B_FALSE); 4218 nexthdrp = &desthdr->ip6d_nxt; 4219 break; 4220 case IPPROTO_ROUTING: 4221 rthdr = (ip6_rthdr_t *)whereptr; 4222 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4223 if ((uchar_t *)rthdr + ehdrlen > endptr) 4224 return (B_FALSE); 4225 nexthdrp = &rthdr->ip6r_nxt; 4226 break; 4227 case IPPROTO_FRAGMENT: 4228 fraghdr = (ip6_frag_t *)whereptr; 4229 ehdrlen = sizeof (ip6_frag_t); 4230 if ((uchar_t *)&fraghdr[1] > endptr) 4231 return (B_FALSE); 4232 nexthdrp = &fraghdr->ip6f_nxt; 4233 break; 4234 case IPPROTO_NONE: 4235 /* No next header means we're finished */ 4236 default: 4237 *hdr_length_ptr = length; 4238 *nexthdrpp = nexthdrp; 4239 return (B_TRUE); 4240 } 4241 length += ehdrlen; 4242 whereptr += ehdrlen; 4243 *hdr_length_ptr = length; 4244 *nexthdrpp = nexthdrp; 4245 } 4246 switch (*nexthdrp) { 4247 case IPPROTO_HOPOPTS: 4248 case IPPROTO_DSTOPTS: 4249 case IPPROTO_ROUTING: 4250 case IPPROTO_FRAGMENT: 4251 /* 4252 * If any know extension headers are still to be processed, 4253 * the packet's malformed (or at least all the IP header(s) are 4254 * not in the same mblk - and that should never happen. 4255 */ 4256 return (B_FALSE); 4257 4258 default: 4259 /* 4260 * If we get here, we know that all of the IP headers were in 4261 * the same mblk, even if the ULP header is in the next mblk. 4262 */ 4263 *hdr_length_ptr = length; 4264 *nexthdrpp = nexthdrp; 4265 return (B_TRUE); 4266 } 4267 } 4268 4269 /* 4270 * Return the length of the IPv6 related headers (including extension headers) 4271 * Returns a length even if the packet is malformed. 4272 */ 4273 int 4274 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4275 { 4276 uint16_t hdr_len; 4277 uint8_t *nexthdrp; 4278 4279 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4280 return (hdr_len); 4281 } 4282 4283 /* 4284 * Select an ill for the packet by considering load spreading across 4285 * a different ill in the group if dst_ill is part of some group. 4286 */ 4287 static ill_t * 4288 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4289 { 4290 ill_t *ill; 4291 4292 /* 4293 * We schedule irrespective of whether the source address is 4294 * INADDR_UNSPECIED or not. 4295 */ 4296 ill = illgrp_scheduler(dst_ill); 4297 if (ill == NULL) 4298 return (NULL); 4299 4300 /* 4301 * For groups with names ip_sioctl_groupname ensures that all 4302 * ills are of same type. For groups without names, ifgrp_insert 4303 * ensures this. 4304 */ 4305 ASSERT(dst_ill->ill_type == ill->ill_type); 4306 4307 return (ill); 4308 } 4309 4310 /* 4311 * IPv6 - 4312 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4313 * to send out a packet to a destination address for which we do not have 4314 * specific routing information. 4315 * 4316 * Handle non-multicast packets. If ill is non-NULL the match is done 4317 * for that ill. 4318 * 4319 * When a specific ill is specified (using IPV6_PKTINFO, 4320 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4321 * on routing entries (ftable and ctable) that have a matching 4322 * ire->ire_ipif->ipif_ill. Thus this can only be used 4323 * for destinations that are on-link for the specific ill 4324 * and that can appear on multiple links. Thus it is useful 4325 * for multicast destinations, link-local destinations, and 4326 * at some point perhaps for site-local destinations (if the 4327 * node sits at a site boundary). 4328 * We create the cache entries in the regular ctable since 4329 * it can not "confuse" things for other destinations. 4330 * table. 4331 * 4332 * When ill is part of a ill group, we subject the packets 4333 * to load spreading even if the ill is specified by the 4334 * means described above. We disable only for IPV6_BOUND_PIF 4335 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4336 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4337 * set. 4338 * 4339 * NOTE : These are the scopes of some of the variables that point at IRE, 4340 * which needs to be followed while making any future modifications 4341 * to avoid memory leaks. 4342 * 4343 * - ire and sire are the entries looked up initially by 4344 * ire_ftable_lookup_v6. 4345 * - ipif_ire is used to hold the interface ire associated with 4346 * the new cache ire. But it's scope is limited, so we always REFRELE 4347 * it before branching out to error paths. 4348 * - save_ire is initialized before ire_create, so that ire returned 4349 * by ire_create will not over-write the ire. We REFRELE save_ire 4350 * before breaking out of the switch. 4351 * 4352 * Thus on failures, we have to REFRELE only ire and sire, if they 4353 * are not NULL. 4354 * 4355 * v6srcp may be used in the future. Currently unused. 4356 */ 4357 /* ARGSUSED */ 4358 void 4359 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4360 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4361 { 4362 in6_addr_t v6gw; 4363 in6_addr_t dst; 4364 ire_t *ire = NULL; 4365 ipif_t *src_ipif = NULL; 4366 ill_t *dst_ill = NULL; 4367 ire_t *sire = NULL; 4368 ire_t *save_ire; 4369 mblk_t *dlureq_mp; 4370 ip6_t *ip6h; 4371 int err = 0; 4372 mblk_t *first_mp; 4373 ipsec_out_t *io; 4374 ill_t *attach_ill = NULL; 4375 ushort_t ire_marks = 0; 4376 int match_flags; 4377 boolean_t ip6i_present; 4378 ire_t *first_sire = NULL; 4379 mblk_t *copy_mp = NULL; 4380 mblk_t *xmit_mp = NULL; 4381 in6_addr_t save_dst; 4382 uint32_t multirt_flags = 4383 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4384 boolean_t multirt_is_resolvable; 4385 boolean_t multirt_resolve_next; 4386 boolean_t need_rele = B_FALSE; 4387 boolean_t do_attach_ill = B_FALSE; 4388 boolean_t ip6_asp_table_held = B_FALSE; 4389 tsol_ire_gw_secattr_t *attrp = NULL; 4390 tsol_gcgrp_t *gcgrp = NULL; 4391 tsol_gcgrp_addr_t ga; 4392 4393 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4394 4395 first_mp = mp; 4396 if (mp->b_datap->db_type == M_CTL) { 4397 mp = mp->b_cont; 4398 io = (ipsec_out_t *)first_mp->b_rptr; 4399 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4400 } else { 4401 io = NULL; 4402 } 4403 4404 /* 4405 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4406 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4407 * could be NULL. 4408 * 4409 * This information can appear either in an ip6i_t or an IPSEC_OUT 4410 * message. 4411 */ 4412 ip6h = (ip6_t *)mp->b_rptr; 4413 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4414 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4415 if (!ip6i_present || 4416 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4417 attach_ill = ip_grab_attach_ill(ill, first_mp, 4418 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4419 io->ipsec_out_ill_index), B_TRUE); 4420 /* Failure case frees things for us. */ 4421 if (attach_ill == NULL) 4422 return; 4423 4424 /* 4425 * Check if we need an ire that will not be 4426 * looked up by anybody else i.e. HIDDEN. 4427 */ 4428 if (ill_is_probeonly(attach_ill)) 4429 ire_marks = IRE_MARK_HIDDEN; 4430 } 4431 } 4432 4433 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4434 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4435 goto icmp_err_ret; 4436 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4437 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4438 goto icmp_err_ret; 4439 } 4440 4441 /* 4442 * If this IRE is created for forwarding or it is not for 4443 * TCP traffic, mark it as temporary. 4444 * 4445 * Is it sufficient just to check the next header?? 4446 */ 4447 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4448 ire_marks |= IRE_MARK_TEMPORARY; 4449 4450 /* 4451 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4452 * chain until it gets the most specific information available. 4453 * For example, we know that there is no IRE_CACHE for this dest, 4454 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4455 * ire_ftable_lookup_v6 will look up the gateway, etc. 4456 */ 4457 4458 if (ill == NULL) { 4459 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4460 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4461 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4462 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4463 match_flags); 4464 /* 4465 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4466 * in a NULL ill, but the packet could be a neighbor 4467 * solicitation/advertisment and could have a valid attach_ill. 4468 */ 4469 if (attach_ill != NULL) 4470 ill_refrele(attach_ill); 4471 } else { 4472 if (attach_ill != NULL) { 4473 /* 4474 * attach_ill is set only for communicating with 4475 * on-link hosts. So, don't look for DEFAULT. 4476 * ip_wput_v6 passes the right ill in this case and 4477 * hence we can assert. 4478 */ 4479 ASSERT(ill == attach_ill); 4480 ill_refrele(attach_ill); 4481 do_attach_ill = B_TRUE; 4482 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4483 } else { 4484 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4485 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4486 } 4487 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4488 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4489 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags); 4490 } 4491 4492 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4493 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4494 4495 if (zoneid == ALL_ZONES && ire != NULL) { 4496 /* 4497 * In the forwarding case, we can use a route from any zone 4498 * since we won't change the source address. We can easily 4499 * assert that the source address is already set when there's no 4500 * ip6_info header - otherwise we'd have to call pullupmsg(). 4501 */ 4502 ASSERT(ip6i_present || 4503 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4504 zoneid = ire->ire_zoneid; 4505 } 4506 4507 /* 4508 * We enter a loop that will be run only once in most cases. 4509 * The loop is re-entered in the case where the destination 4510 * can be reached through multiple RTF_MULTIRT-flagged routes. 4511 * The intention is to compute multiple routes to a single 4512 * destination in a single ip_newroute_v6 call. 4513 * The information is contained in sire->ire_flags. 4514 */ 4515 do { 4516 multirt_resolve_next = B_FALSE; 4517 4518 if (dst_ill != NULL) { 4519 ill_refrele(dst_ill); 4520 dst_ill = NULL; 4521 } 4522 if (src_ipif != NULL) { 4523 ipif_refrele(src_ipif); 4524 src_ipif = NULL; 4525 } 4526 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4527 ip3dbg(("ip_newroute_v6: starting new resolution " 4528 "with first_mp %p, tag %d\n", 4529 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4530 4531 /* 4532 * We check if there are trailing unresolved routes for 4533 * the destination contained in sire. 4534 */ 4535 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4536 &sire, multirt_flags, MBLK_GETLABEL(mp)); 4537 4538 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4539 "ire %p, sire %p\n", 4540 multirt_is_resolvable, (void *)ire, (void *)sire)); 4541 4542 if (!multirt_is_resolvable) { 4543 /* 4544 * No more multirt routes to resolve; give up 4545 * (all routes resolved or no more resolvable 4546 * routes). 4547 */ 4548 if (ire != NULL) { 4549 ire_refrele(ire); 4550 ire = NULL; 4551 } 4552 } else { 4553 ASSERT(sire != NULL); 4554 ASSERT(ire != NULL); 4555 /* 4556 * We simply use first_sire as a flag that 4557 * indicates if a resolvable multirt route has 4558 * already been found during the preceding 4559 * loops. If it is not the case, we may have 4560 * to send an ICMP error to report that the 4561 * destination is unreachable. We do not 4562 * IRE_REFHOLD first_sire. 4563 */ 4564 if (first_sire == NULL) { 4565 first_sire = sire; 4566 } 4567 } 4568 } 4569 if ((ire == NULL) || (ire == sire)) { 4570 /* 4571 * either ire == NULL (the destination cannot be 4572 * resolved) or ire == sire (the gateway cannot be 4573 * resolved). At this point, there are no more routes 4574 * to resolve for the destination, thus we exit. 4575 */ 4576 if (ip_debug > 3) { 4577 /* ip2dbg */ 4578 pr_addr_dbg("ip_newroute_v6: " 4579 "can't resolve %s\n", AF_INET6, v6dstp); 4580 } 4581 ip3dbg(("ip_newroute_v6: " 4582 "ire %p, sire %p, first_sire %p\n", 4583 (void *)ire, (void *)sire, (void *)first_sire)); 4584 4585 if (sire != NULL) { 4586 ire_refrele(sire); 4587 sire = NULL; 4588 } 4589 4590 if (first_sire != NULL) { 4591 /* 4592 * At least one multirt route has been found 4593 * in the same ip_newroute() call; there is no 4594 * need to report an ICMP error. 4595 * first_sire was not IRE_REFHOLDed. 4596 */ 4597 MULTIRT_DEBUG_UNTAG(first_mp); 4598 freemsg(first_mp); 4599 return; 4600 } 4601 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4602 RTA_DST); 4603 goto icmp_err_ret; 4604 } 4605 4606 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4607 4608 /* 4609 * Verify that the returned IRE does not have either the 4610 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4611 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4612 */ 4613 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4614 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4615 goto icmp_err_ret; 4616 4617 /* 4618 * Increment the ire_ob_pkt_count field for ire if it is an 4619 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4620 * increment the same for the parent IRE, sire, if it is some 4621 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4622 * and HOST_REDIRECT). 4623 */ 4624 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4625 UPDATE_OB_PKT_COUNT(ire); 4626 ire->ire_last_used_time = lbolt; 4627 } 4628 4629 if (sire != NULL) { 4630 mutex_enter(&sire->ire_lock); 4631 v6gw = sire->ire_gateway_addr_v6; 4632 mutex_exit(&sire->ire_lock); 4633 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4634 IRE_INTERFACE)) == 0); 4635 UPDATE_OB_PKT_COUNT(sire); 4636 sire->ire_last_used_time = lbolt; 4637 } else { 4638 v6gw = ipv6_all_zeros; 4639 } 4640 4641 /* 4642 * We have a route to reach the destination. 4643 * 4644 * 1) If the interface is part of ill group, try to get a new 4645 * ill taking load spreading into account. 4646 * 4647 * 2) After selecting the ill, get a source address that might 4648 * create good inbound load spreading and that matches the 4649 * right scope. ipif_select_source_v6 does this for us. 4650 * 4651 * If the application specified the ill (ifindex), we still 4652 * load spread. Only if the packets needs to go out specifically 4653 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4654 * IPV6_BOUND_PIF we don't try to use a different ill for load 4655 * spreading. 4656 */ 4657 if (!do_attach_ill) { 4658 /* 4659 * If the interface belongs to an interface group, 4660 * make sure the next possible interface in the group 4661 * is used. This encourages load spreading among 4662 * peers in an interface group. However, in the case 4663 * of multirouting, load spreading is not used, as we 4664 * actually want to replicate outgoing packets through 4665 * particular interfaces. 4666 * 4667 * Note: While we pick a dst_ill we are really only 4668 * interested in the ill for load spreading. 4669 * The source ipif is determined by source address 4670 * selection below. 4671 */ 4672 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4673 dst_ill = ire->ire_ipif->ipif_ill; 4674 /* For uniformity do a refhold */ 4675 ill_refhold(dst_ill); 4676 } else { 4677 /* 4678 * If we are here trying to create an IRE_CACHE 4679 * for an offlink destination and have the 4680 * IRE_CACHE for the next hop and the latter is 4681 * using virtual IP source address selection i.e 4682 * it's ire->ire_ipif is pointing to a virtual 4683 * network interface (vni) then 4684 * ip_newroute_get_dst_ll() will return the vni 4685 * interface as the dst_ill. Since the vni is 4686 * virtual i.e not associated with any physical 4687 * interface, it cannot be the dst_ill, hence 4688 * in such a case call ip_newroute_get_dst_ll() 4689 * with the stq_ill instead of the ire_ipif ILL. 4690 * The function returns a refheld ill. 4691 */ 4692 if ((ire->ire_type == IRE_CACHE) && 4693 IS_VNI(ire->ire_ipif->ipif_ill)) 4694 dst_ill = ip_newroute_get_dst_ill_v6( 4695 ire->ire_stq->q_ptr); 4696 else 4697 dst_ill = ip_newroute_get_dst_ill_v6( 4698 ire->ire_ipif->ipif_ill); 4699 } 4700 if (dst_ill == NULL) { 4701 if (ip_debug > 2) { 4702 pr_addr_dbg("ip_newroute_v6 : no dst " 4703 "ill for dst %s\n", 4704 AF_INET6, v6dstp); 4705 } 4706 goto icmp_err_ret; 4707 } else if (dst_ill->ill_group == NULL && ill != NULL && 4708 dst_ill != ill) { 4709 /* 4710 * If "ill" is not part of any group, we should 4711 * have found a route matching "ill" as we 4712 * called ire_ftable_lookup_v6 with 4713 * MATCH_IRE_ILL_GROUP. 4714 * Rather than asserting when there is a 4715 * mismatch, we just drop the packet. 4716 */ 4717 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4718 "dst_ill %s ill %s\n", 4719 dst_ill->ill_name, 4720 ill->ill_name)); 4721 goto icmp_err_ret; 4722 } 4723 } else { 4724 dst_ill = ire->ire_ipif->ipif_ill; 4725 /* For uniformity do refhold */ 4726 ill_refhold(dst_ill); 4727 /* 4728 * We should have found a route matching ill as we 4729 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4730 * Rather than asserting, while there is a mismatch, 4731 * we just drop the packet. 4732 */ 4733 if (dst_ill != ill) { 4734 ip0dbg(("ip_newroute_v6: Packet dropped as " 4735 "IP6I_ATTACH_IF ill is %s, " 4736 "ire->ire_ipif->ipif_ill is %s\n", 4737 ill->ill_name, 4738 dst_ill->ill_name)); 4739 goto icmp_err_ret; 4740 } 4741 } 4742 /* 4743 * Pick a source address which matches the scope of the 4744 * destination address. 4745 * For RTF_SETSRC routes, the source address is imposed by the 4746 * parent ire (sire). 4747 */ 4748 ASSERT(src_ipif == NULL); 4749 if (ire->ire_type == IRE_IF_RESOLVER && 4750 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4751 ip6_asp_can_lookup()) { 4752 /* 4753 * The ire cache entry we're adding is for the 4754 * gateway itself. The source address in this case 4755 * is relative to the gateway's address. 4756 */ 4757 ip6_asp_table_held = B_TRUE; 4758 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4759 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4760 if (src_ipif != NULL) 4761 ire_marks |= IRE_MARK_USESRC_CHECK; 4762 } else { 4763 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4764 /* 4765 * Check that the ipif matching the requested 4766 * source address still exists. 4767 */ 4768 src_ipif = ipif_lookup_addr_v6( 4769 &sire->ire_src_addr_v6, NULL, zoneid, 4770 NULL, NULL, NULL, NULL); 4771 } 4772 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4773 uint_t restrict_ill = RESTRICT_TO_NONE; 4774 4775 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4776 & IP6I_ATTACH_IF) 4777 restrict_ill = RESTRICT_TO_ILL; 4778 ip6_asp_table_held = B_TRUE; 4779 src_ipif = ipif_select_source_v6(dst_ill, 4780 v6dstp, restrict_ill, 4781 IPV6_PREFER_SRC_DEFAULT, zoneid); 4782 if (src_ipif != NULL) 4783 ire_marks |= IRE_MARK_USESRC_CHECK; 4784 } 4785 } 4786 4787 if (src_ipif == NULL) { 4788 if (ip_debug > 2) { 4789 /* ip1dbg */ 4790 pr_addr_dbg("ip_newroute_v6: no src for " 4791 "dst %s\n, ", AF_INET6, v6dstp); 4792 printf("ip_newroute_v6: interface name %s\n", 4793 dst_ill->ill_name); 4794 } 4795 goto icmp_err_ret; 4796 } 4797 4798 if (ip_debug > 3) { 4799 /* ip2dbg */ 4800 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4801 AF_INET6, &v6gw); 4802 } 4803 ip2dbg(("\tire type %s (%d)\n", 4804 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4805 4806 /* 4807 * At this point in ip_newroute_v6(), ire is either the 4808 * IRE_CACHE of the next-hop gateway for an off-subnet 4809 * destination or an IRE_INTERFACE type that should be used 4810 * to resolve an on-subnet destination or an on-subnet 4811 * next-hop gateway. 4812 * 4813 * In the IRE_CACHE case, we have the following : 4814 * 4815 * 1) src_ipif - used for getting a source address. 4816 * 4817 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4818 * means packets using this IRE_CACHE will go out on dst_ill. 4819 * 4820 * 3) The IRE sire will point to the prefix that is the longest 4821 * matching route for the destination. These prefix types 4822 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST, and 4823 * IRE_HOST_REDIRECT. 4824 * 4825 * The newly created IRE_CACHE entry for the off-subnet 4826 * destination is tied to both the prefix route and the 4827 * interface route used to resolve the next-hop gateway 4828 * via the ire_phandle and ire_ihandle fields, respectively. 4829 * 4830 * In the IRE_INTERFACE case, we have the following : 4831 * 4832 * 1) src_ipif - used for getting a source address. 4833 * 4834 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4835 * means packets using the IRE_CACHE that we will build 4836 * here will go out on dst_ill. 4837 * 4838 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4839 * to be created will only be tied to the IRE_INTERFACE that 4840 * was derived from the ire_ihandle field. 4841 * 4842 * If sire is non-NULL, it means the destination is off-link 4843 * and we will first create the IRE_CACHE for the gateway. 4844 * Next time through ip_newroute_v6, we will create the 4845 * IRE_CACHE for the final destination as described above. 4846 */ 4847 save_ire = ire; 4848 switch (ire->ire_type) { 4849 case IRE_CACHE: { 4850 ire_t *ipif_ire; 4851 4852 ASSERT(sire != NULL); 4853 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4854 mutex_enter(&ire->ire_lock); 4855 v6gw = ire->ire_gateway_addr_v6; 4856 mutex_exit(&ire->ire_lock); 4857 } 4858 /* 4859 * We need 3 ire's to create a new cache ire for an 4860 * off-link destination from the cache ire of the 4861 * gateway. 4862 * 4863 * 1. The prefix ire 'sire' 4864 * 2. The cache ire of the gateway 'ire' 4865 * 3. The interface ire 'ipif_ire' 4866 * 4867 * We have (1) and (2). We lookup (3) below. 4868 * 4869 * If there is no interface route to the gateway, 4870 * it is a race condition, where we found the cache 4871 * but the inteface route has been deleted. 4872 */ 4873 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4874 if (ipif_ire == NULL) { 4875 ip1dbg(("ip_newroute_v6:" 4876 "ire_ihandle_lookup_offlink_v6 failed\n")); 4877 goto icmp_err_ret; 4878 } 4879 /* 4880 * Assume DL_UNITDATA_REQ is same for all physical 4881 * interfaces in the ifgrp. If it isn't, this code will 4882 * have to be seriously rewhacked to allow the 4883 * fastpath probing (such that I cache the link 4884 * header in the IRE_CACHE) to work over ifgrps. 4885 * We have what we need to build an IRE_CACHE. 4886 */ 4887 /* 4888 * Note: the new ire inherits RTF_SETSRC 4889 * and RTF_MULTIRT to propagate these flags from prefix 4890 * to cache. 4891 */ 4892 4893 /* 4894 * Check cached gateway IRE for any security 4895 * attributes; if found, associate the gateway 4896 * credentials group to the destination IRE. 4897 */ 4898 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4899 mutex_enter(&attrp->igsa_lock); 4900 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4901 GCGRP_REFHOLD(gcgrp); 4902 mutex_exit(&attrp->igsa_lock); 4903 } 4904 4905 ire = ire_create_v6( 4906 v6dstp, /* dest address */ 4907 &ipv6_all_ones, /* mask */ 4908 &src_ipif->ipif_v6src_addr, /* source address */ 4909 &v6gw, /* gateway address */ 4910 &save_ire->ire_max_frag, 4911 NULL, /* Fast Path header */ 4912 dst_ill->ill_rq, /* recv-from queue */ 4913 dst_ill->ill_wq, /* send-to queue */ 4914 IRE_CACHE, 4915 NULL, 4916 src_ipif, 4917 &sire->ire_mask_v6, /* Parent mask */ 4918 sire->ire_phandle, /* Parent handle */ 4919 ipif_ire->ire_ihandle, /* Interface handle */ 4920 sire->ire_flags & /* flags if any */ 4921 (RTF_SETSRC | RTF_MULTIRT), 4922 &(sire->ire_uinfo), 4923 NULL, 4924 gcgrp); 4925 4926 if (ire == NULL) { 4927 if (gcgrp != NULL) { 4928 GCGRP_REFRELE(gcgrp); 4929 gcgrp = NULL; 4930 } 4931 ire_refrele(save_ire); 4932 ire_refrele(ipif_ire); 4933 break; 4934 } 4935 4936 /* reference now held by IRE */ 4937 gcgrp = NULL; 4938 4939 ire->ire_marks |= ire_marks; 4940 4941 /* 4942 * Prevent sire and ipif_ire from getting deleted. The 4943 * newly created ire is tied to both of them via the 4944 * phandle and ihandle respectively. 4945 */ 4946 IRB_REFHOLD(sire->ire_bucket); 4947 /* Has it been removed already ? */ 4948 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4949 IRB_REFRELE(sire->ire_bucket); 4950 ire_refrele(ipif_ire); 4951 ire_refrele(save_ire); 4952 break; 4953 } 4954 4955 IRB_REFHOLD(ipif_ire->ire_bucket); 4956 /* Has it been removed already ? */ 4957 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4958 IRB_REFRELE(ipif_ire->ire_bucket); 4959 IRB_REFRELE(sire->ire_bucket); 4960 ire_refrele(ipif_ire); 4961 ire_refrele(save_ire); 4962 break; 4963 } 4964 4965 xmit_mp = first_mp; 4966 if (ire->ire_flags & RTF_MULTIRT) { 4967 copy_mp = copymsg(first_mp); 4968 if (copy_mp != NULL) { 4969 xmit_mp = copy_mp; 4970 MULTIRT_DEBUG_TAG(first_mp); 4971 } 4972 } 4973 ire_add_then_send(q, ire, xmit_mp); 4974 if (ip6_asp_table_held) { 4975 ip6_asp_table_refrele(); 4976 ip6_asp_table_held = B_FALSE; 4977 } 4978 ire_refrele(save_ire); 4979 4980 /* Assert that sire is not deleted yet. */ 4981 ASSERT(sire->ire_ptpn != NULL); 4982 IRB_REFRELE(sire->ire_bucket); 4983 4984 /* Assert that ipif_ire is not deleted yet. */ 4985 ASSERT(ipif_ire->ire_ptpn != NULL); 4986 IRB_REFRELE(ipif_ire->ire_bucket); 4987 ire_refrele(ipif_ire); 4988 4989 if (copy_mp != NULL) { 4990 /* 4991 * Search for the next unresolved 4992 * multirt route. 4993 */ 4994 copy_mp = NULL; 4995 ipif_ire = NULL; 4996 ire = NULL; 4997 /* re-enter the loop */ 4998 multirt_resolve_next = B_TRUE; 4999 continue; 5000 } 5001 ire_refrele(sire); 5002 ill_refrele(dst_ill); 5003 ipif_refrele(src_ipif); 5004 return; 5005 } 5006 case IRE_IF_NORESOLVER: 5007 /* 5008 * We have what we need to build an IRE_CACHE. 5009 * 5010 * Create a new dlureq_mp with the IPv6 gateway 5011 * address in destination address in the DLPI hdr 5012 * if the physical length is exactly 16 bytes. 5013 */ 5014 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5015 const in6_addr_t *addr; 5016 5017 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5018 addr = &v6gw; 5019 else 5020 addr = v6dstp; 5021 5022 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 5023 dst_ill->ill_phys_addr_length, 5024 dst_ill->ill_sap, 5025 dst_ill->ill_sap_length); 5026 } else { 5027 dlureq_mp = ill_dlur_gen(NULL, 5028 dst_ill->ill_phys_addr_length, 5029 dst_ill->ill_sap, 5030 dst_ill->ill_sap_length); 5031 } 5032 if (dlureq_mp == NULL) 5033 break; 5034 /* 5035 * TSol note: We are creating the ire cache for the 5036 * destination 'dst'. If 'dst' is offlink, going 5037 * through the first hop 'gw', the security attributes 5038 * of 'dst' must be set to point to the gateway 5039 * credentials of gateway 'gw'. If 'dst' is onlink, it 5040 * is possible that 'dst' is a potential gateway that is 5041 * referenced by some route that has some security 5042 * attributes. Thus in the former case, we need to do a 5043 * gcgrp_lookup of 'gw' while in the latter case we 5044 * need to do gcgrp_lookup of 'dst' itself. 5045 */ 5046 ga.ga_af = AF_INET6; 5047 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5048 ga.ga_addr = v6gw; 5049 else 5050 ga.ga_addr = *v6dstp; 5051 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5052 5053 /* 5054 * Note: the new ire inherits sire flags RTF_SETSRC 5055 * and RTF_MULTIRT to propagate those rules from prefix 5056 * to cache. 5057 */ 5058 ire = ire_create_v6( 5059 v6dstp, /* dest address */ 5060 &ipv6_all_ones, /* mask */ 5061 &src_ipif->ipif_v6src_addr, /* source address */ 5062 &v6gw, /* gateway address */ 5063 &save_ire->ire_max_frag, 5064 NULL, /* Fast Path header */ 5065 dst_ill->ill_rq, /* recv-from queue */ 5066 dst_ill->ill_wq, /* send-to queue */ 5067 IRE_CACHE, 5068 dlureq_mp, 5069 src_ipif, 5070 &save_ire->ire_mask_v6, /* Parent mask */ 5071 (sire != NULL) ? /* Parent handle */ 5072 sire->ire_phandle : 0, 5073 save_ire->ire_ihandle, /* Interface handle */ 5074 (sire != NULL) ? /* flags if any */ 5075 sire->ire_flags & 5076 (RTF_SETSRC | RTF_MULTIRT) : 0, 5077 &(save_ire->ire_uinfo), 5078 NULL, 5079 gcgrp); 5080 5081 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 5082 freeb(dlureq_mp); 5083 5084 if (ire == NULL) { 5085 if (gcgrp != NULL) { 5086 GCGRP_REFRELE(gcgrp); 5087 gcgrp = NULL; 5088 } 5089 ire_refrele(save_ire); 5090 break; 5091 } 5092 5093 /* reference now held by IRE */ 5094 gcgrp = NULL; 5095 5096 ire->ire_marks |= ire_marks; 5097 5098 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5099 dst = v6gw; 5100 else 5101 dst = *v6dstp; 5102 err = ndp_noresolver(dst_ill, &dst); 5103 if (err != 0) { 5104 ire_refrele(save_ire); 5105 break; 5106 } 5107 5108 /* Prevent save_ire from getting deleted */ 5109 IRB_REFHOLD(save_ire->ire_bucket); 5110 /* Has it been removed already ? */ 5111 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5112 IRB_REFRELE(save_ire->ire_bucket); 5113 ire_refrele(save_ire); 5114 break; 5115 } 5116 5117 xmit_mp = first_mp; 5118 /* 5119 * In case of MULTIRT, a copy of the current packet 5120 * to send is made to further re-enter the 5121 * loop and attempt another route resolution 5122 */ 5123 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5124 copy_mp = copymsg(first_mp); 5125 if (copy_mp != NULL) { 5126 xmit_mp = copy_mp; 5127 MULTIRT_DEBUG_TAG(first_mp); 5128 } 5129 } 5130 ire_add_then_send(q, ire, xmit_mp); 5131 if (ip6_asp_table_held) { 5132 ip6_asp_table_refrele(); 5133 ip6_asp_table_held = B_FALSE; 5134 } 5135 5136 /* Assert that it is not deleted yet. */ 5137 ASSERT(save_ire->ire_ptpn != NULL); 5138 IRB_REFRELE(save_ire->ire_bucket); 5139 ire_refrele(save_ire); 5140 5141 if (copy_mp != NULL) { 5142 /* 5143 * If we found a (no)resolver, we ignore any 5144 * trailing top priority IRE_CACHE in 5145 * further loops. This ensures that we do not 5146 * omit any (no)resolver despite the priority 5147 * in this call. 5148 * IRE_CACHE, if any, will be processed 5149 * by another thread entering ip_newroute(), 5150 * (on resolver response, for example). 5151 * We use this to force multiple parallel 5152 * resolution as soon as a packet needs to be 5153 * sent. The result is, after one packet 5154 * emission all reachable routes are generally 5155 * resolved. 5156 * Otherwise, complete resolution of MULTIRT 5157 * routes would require several emissions as 5158 * side effect. 5159 */ 5160 multirt_flags &= ~MULTIRT_CACHEGW; 5161 5162 /* 5163 * Search for the next unresolved multirt 5164 * route. 5165 */ 5166 copy_mp = NULL; 5167 save_ire = NULL; 5168 ire = NULL; 5169 /* re-enter the loop */ 5170 multirt_resolve_next = B_TRUE; 5171 continue; 5172 } 5173 5174 /* Don't need sire anymore */ 5175 if (sire != NULL) 5176 ire_refrele(sire); 5177 ill_refrele(dst_ill); 5178 ipif_refrele(src_ipif); 5179 return; 5180 5181 case IRE_IF_RESOLVER: 5182 /* 5183 * We can't build an IRE_CACHE yet, but at least we 5184 * found a resolver that can help. 5185 */ 5186 dst = *v6dstp; 5187 5188 /* 5189 * To be at this point in the code with a non-zero gw 5190 * means that dst is reachable through a gateway that 5191 * we have never resolved. By changing dst to the gw 5192 * addr we resolve the gateway first. When 5193 * ire_add_then_send() tries to put the IP dg to dst, 5194 * it will reenter ip_newroute() at which time we will 5195 * find the IRE_CACHE for the gw and create another 5196 * IRE_CACHE above (for dst itself). 5197 */ 5198 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5199 save_dst = dst; 5200 dst = v6gw; 5201 v6gw = ipv6_all_zeros; 5202 } 5203 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5204 /* 5205 * Ask the external resolver to do its thing. 5206 * Make an mblk chain in the following form: 5207 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5208 */ 5209 mblk_t *ire_mp; 5210 mblk_t *areq_mp; 5211 areq_t *areq; 5212 in6_addr_t *addrp; 5213 5214 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5215 if (ip6_asp_table_held) { 5216 ip6_asp_table_refrele(); 5217 ip6_asp_table_held = B_FALSE; 5218 } 5219 ire = ire_create_mp_v6( 5220 &dst, /* dest address */ 5221 &ipv6_all_ones, /* mask */ 5222 &src_ipif->ipif_v6src_addr, 5223 /* source address */ 5224 &v6gw, /* gateway address */ 5225 NULL, /* Fast Path header */ 5226 dst_ill->ill_rq, /* recv-from queue */ 5227 dst_ill->ill_wq, /* send-to queue */ 5228 IRE_CACHE, 5229 NULL, 5230 src_ipif, 5231 &save_ire->ire_mask_v6, 5232 /* Parent mask */ 5233 0, 5234 save_ire->ire_ihandle, 5235 /* Interface handle */ 5236 0, /* flags if any */ 5237 &(save_ire->ire_uinfo), 5238 NULL, 5239 NULL); 5240 5241 ire_refrele(save_ire); 5242 if (ire == NULL) { 5243 ip1dbg(("ip_newroute_v6:" 5244 "ire is NULL\n")); 5245 break; 5246 } 5247 5248 if ((sire != NULL) && 5249 (sire->ire_flags & RTF_MULTIRT)) { 5250 /* 5251 * processing a copy of the packet to 5252 * send for further resolution loops 5253 */ 5254 copy_mp = copymsg(first_mp); 5255 if (copy_mp != NULL) 5256 MULTIRT_DEBUG_TAG(copy_mp); 5257 } 5258 ire->ire_marks |= ire_marks; 5259 ire_mp = ire->ire_mp; 5260 /* 5261 * Now create or find an nce for this interface. 5262 * The hw addr will need to to be set from 5263 * the reply to the AR_ENTRY_QUERY that 5264 * we're about to send. This will be done in 5265 * ire_add_v6(). 5266 */ 5267 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5268 switch (err) { 5269 case 0: 5270 /* 5271 * New cache entry created. 5272 * Break, then ask the external 5273 * resolver. 5274 */ 5275 break; 5276 case EINPROGRESS: 5277 /* 5278 * Resolution in progress; 5279 * packet has been queued by 5280 * ndp_resolver(). 5281 */ 5282 ire_delete(ire); 5283 ire = NULL; 5284 /* 5285 * Check if another multirt 5286 * route must be resolved. 5287 */ 5288 if (copy_mp != NULL) { 5289 /* 5290 * If we found a resolver, we 5291 * ignore any trailing top 5292 * priority IRE_CACHE in 5293 * further loops. The reason is 5294 * the same as for noresolver. 5295 */ 5296 multirt_flags &= 5297 ~MULTIRT_CACHEGW; 5298 /* 5299 * Search for the next 5300 * unresolved multirt route. 5301 */ 5302 first_mp = copy_mp; 5303 copy_mp = NULL; 5304 mp = first_mp; 5305 if (mp->b_datap->db_type == 5306 M_CTL) { 5307 mp = mp->b_cont; 5308 } 5309 ASSERT(sire != NULL); 5310 dst = save_dst; 5311 /* 5312 * re-enter the loop 5313 */ 5314 multirt_resolve_next = 5315 B_TRUE; 5316 continue; 5317 } 5318 5319 if (sire != NULL) 5320 ire_refrele(sire); 5321 ill_refrele(dst_ill); 5322 ipif_refrele(src_ipif); 5323 return; 5324 default: 5325 /* 5326 * Transient error; packet will be 5327 * freed. 5328 */ 5329 ire_delete(ire); 5330 ire = NULL; 5331 break; 5332 } 5333 if (err != 0) 5334 break; 5335 /* 5336 * Now set up the AR_ENTRY_QUERY and send it. 5337 */ 5338 areq_mp = ill_arp_alloc(dst_ill, 5339 (uchar_t *)&ipv6_areq_template, 5340 (caddr_t)&dst); 5341 if (areq_mp == NULL) { 5342 ip1dbg(("ip_newroute_v6:" 5343 "areq_mp is NULL\n")); 5344 freemsg(ire_mp); 5345 break; 5346 } 5347 areq = (areq_t *)areq_mp->b_rptr; 5348 addrp = (in6_addr_t *)((char *)areq + 5349 areq->areq_target_addr_offset); 5350 *addrp = dst; 5351 addrp = (in6_addr_t *)((char *)areq + 5352 areq->areq_sender_addr_offset); 5353 *addrp = src_ipif->ipif_v6src_addr; 5354 /* 5355 * link the chain, then send up to the resolver. 5356 */ 5357 linkb(areq_mp, ire_mp); 5358 linkb(areq_mp, mp); 5359 ip1dbg(("ip_newroute_v6:" 5360 "putnext to resolver\n")); 5361 putnext(dst_ill->ill_rq, areq_mp); 5362 /* 5363 * Check if another multirt route 5364 * must be resolved. 5365 */ 5366 ire = NULL; 5367 if (copy_mp != NULL) { 5368 /* 5369 * If we find a resolver, we ignore any 5370 * trailing top priority IRE_CACHE in 5371 * further loops. The reason is the 5372 * same as for noresolver. 5373 */ 5374 multirt_flags &= ~MULTIRT_CACHEGW; 5375 /* 5376 * Search for the next unresolved 5377 * multirt route. 5378 */ 5379 first_mp = copy_mp; 5380 copy_mp = NULL; 5381 mp = first_mp; 5382 if (mp->b_datap->db_type == M_CTL) { 5383 mp = mp->b_cont; 5384 } 5385 ASSERT(sire != NULL); 5386 dst = save_dst; 5387 /* 5388 * re-enter the loop 5389 */ 5390 multirt_resolve_next = B_TRUE; 5391 continue; 5392 } 5393 5394 if (sire != NULL) 5395 ire_refrele(sire); 5396 ill_refrele(dst_ill); 5397 ipif_refrele(src_ipif); 5398 return; 5399 } 5400 /* 5401 * Non-external resolver case. 5402 * 5403 * TSol note: Please see the note above the 5404 * IRE_IF_NORESOLVER case. 5405 */ 5406 ga.ga_af = AF_INET6; 5407 ga.ga_addr = dst; 5408 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5409 5410 ire = ire_create_v6( 5411 &dst, /* dest address */ 5412 &ipv6_all_ones, /* mask */ 5413 &src_ipif->ipif_v6src_addr, /* source address */ 5414 &v6gw, /* gateway address */ 5415 &save_ire->ire_max_frag, 5416 NULL, /* Fast Path header */ 5417 dst_ill->ill_rq, /* recv-from queue */ 5418 dst_ill->ill_wq, /* send-to queue */ 5419 IRE_CACHE, 5420 NULL, 5421 src_ipif, 5422 &save_ire->ire_mask_v6, /* Parent mask */ 5423 0, 5424 save_ire->ire_ihandle, /* Interface handle */ 5425 0, /* flags if any */ 5426 &(save_ire->ire_uinfo), 5427 NULL, 5428 gcgrp); 5429 5430 if (ire == NULL) { 5431 if (gcgrp != NULL) { 5432 GCGRP_REFRELE(gcgrp); 5433 gcgrp = NULL; 5434 } 5435 ire_refrele(save_ire); 5436 break; 5437 } 5438 5439 /* reference now held by IRE */ 5440 gcgrp = NULL; 5441 5442 if ((sire != NULL) && 5443 (sire->ire_flags & RTF_MULTIRT)) { 5444 copy_mp = copymsg(first_mp); 5445 if (copy_mp != NULL) 5446 MULTIRT_DEBUG_TAG(copy_mp); 5447 } 5448 5449 ire->ire_marks |= ire_marks; 5450 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5451 switch (err) { 5452 case 0: 5453 /* Prevent save_ire from getting deleted */ 5454 IRB_REFHOLD(save_ire->ire_bucket); 5455 /* Has it been removed already ? */ 5456 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5457 IRB_REFRELE(save_ire->ire_bucket); 5458 ire_refrele(save_ire); 5459 break; 5460 } 5461 5462 /* 5463 * We have a resolved cache entry, 5464 * add in the IRE. 5465 */ 5466 ire_add_then_send(q, ire, first_mp); 5467 if (ip6_asp_table_held) { 5468 ip6_asp_table_refrele(); 5469 ip6_asp_table_held = B_FALSE; 5470 } 5471 5472 /* Assert that it is not deleted yet. */ 5473 ASSERT(save_ire->ire_ptpn != NULL); 5474 IRB_REFRELE(save_ire->ire_bucket); 5475 ire_refrele(save_ire); 5476 /* 5477 * Check if another multirt route 5478 * must be resolved. 5479 */ 5480 ire = NULL; 5481 if (copy_mp != NULL) { 5482 /* 5483 * If we find a resolver, we ignore any 5484 * trailing top priority IRE_CACHE in 5485 * further loops. The reason is the 5486 * same as for noresolver. 5487 */ 5488 multirt_flags &= ~MULTIRT_CACHEGW; 5489 /* 5490 * Search for the next unresolved 5491 * multirt route. 5492 */ 5493 first_mp = copy_mp; 5494 copy_mp = NULL; 5495 mp = first_mp; 5496 if (mp->b_datap->db_type == M_CTL) { 5497 mp = mp->b_cont; 5498 } 5499 ASSERT(sire != NULL); 5500 dst = save_dst; 5501 /* 5502 * re-enter the loop 5503 */ 5504 multirt_resolve_next = B_TRUE; 5505 continue; 5506 } 5507 5508 if (sire != NULL) 5509 ire_refrele(sire); 5510 ill_refrele(dst_ill); 5511 ipif_refrele(src_ipif); 5512 return; 5513 5514 case EINPROGRESS: 5515 /* 5516 * mp was consumed - presumably queued. 5517 * No need for ire, presumably resolution is 5518 * in progress, and ire will be added when the 5519 * address is resolved. 5520 */ 5521 if (ip6_asp_table_held) { 5522 ip6_asp_table_refrele(); 5523 ip6_asp_table_held = B_FALSE; 5524 } 5525 ASSERT(ire->ire_nce == NULL); 5526 ire_delete(ire); 5527 ire_refrele(save_ire); 5528 /* 5529 * Check if another multirt route 5530 * must be resolved. 5531 */ 5532 ire = NULL; 5533 if (copy_mp != NULL) { 5534 /* 5535 * If we find a resolver, we ignore any 5536 * trailing top priority IRE_CACHE in 5537 * further loops. The reason is the 5538 * same as for noresolver. 5539 */ 5540 multirt_flags &= ~MULTIRT_CACHEGW; 5541 /* 5542 * Search for the next unresolved 5543 * multirt route. 5544 */ 5545 first_mp = copy_mp; 5546 copy_mp = NULL; 5547 mp = first_mp; 5548 if (mp->b_datap->db_type == M_CTL) { 5549 mp = mp->b_cont; 5550 } 5551 ASSERT(sire != NULL); 5552 dst = save_dst; 5553 /* 5554 * re-enter the loop 5555 */ 5556 multirt_resolve_next = B_TRUE; 5557 continue; 5558 } 5559 if (sire != NULL) 5560 ire_refrele(sire); 5561 ill_refrele(dst_ill); 5562 ipif_refrele(src_ipif); 5563 return; 5564 default: 5565 /* Some transient error */ 5566 ASSERT(ire->ire_nce == NULL); 5567 ire_refrele(save_ire); 5568 break; 5569 } 5570 break; 5571 default: 5572 break; 5573 } 5574 if (ip6_asp_table_held) { 5575 ip6_asp_table_refrele(); 5576 ip6_asp_table_held = B_FALSE; 5577 } 5578 } while (multirt_resolve_next); 5579 5580 err_ret: 5581 ip1dbg(("ip_newroute_v6: dropped\n")); 5582 if (src_ipif != NULL) 5583 ipif_refrele(src_ipif); 5584 if (dst_ill != NULL) { 5585 need_rele = B_TRUE; 5586 ill = dst_ill; 5587 } 5588 if (ill != NULL) { 5589 if (mp->b_prev != NULL) { 5590 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 5591 } else { 5592 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 5593 } 5594 5595 if (need_rele) 5596 ill_refrele(ill); 5597 } else { 5598 if (mp->b_prev != NULL) { 5599 BUMP_MIB(&ip6_mib, ipv6InDiscards); 5600 } else { 5601 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 5602 } 5603 } 5604 /* Did this packet originate externally? */ 5605 if (mp->b_prev) { 5606 mp->b_next = NULL; 5607 mp->b_prev = NULL; 5608 } 5609 if (copy_mp != NULL) { 5610 MULTIRT_DEBUG_UNTAG(copy_mp); 5611 freemsg(copy_mp); 5612 } 5613 MULTIRT_DEBUG_UNTAG(first_mp); 5614 freemsg(first_mp); 5615 if (ire != NULL) 5616 ire_refrele(ire); 5617 if (sire != NULL) 5618 ire_refrele(sire); 5619 return; 5620 5621 icmp_err_ret: 5622 if (ip6_asp_table_held) 5623 ip6_asp_table_refrele(); 5624 if (src_ipif != NULL) 5625 ipif_refrele(src_ipif); 5626 if (dst_ill != NULL) { 5627 need_rele = B_TRUE; 5628 ill = dst_ill; 5629 } 5630 ip1dbg(("ip_newroute_v6: no route\n")); 5631 if (sire != NULL) 5632 ire_refrele(sire); 5633 /* 5634 * We need to set sire to NULL to avoid double freeing if we 5635 * ever goto err_ret from below. 5636 */ 5637 sire = NULL; 5638 ip6h = (ip6_t *)mp->b_rptr; 5639 /* Skip ip6i_t header if present */ 5640 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5641 /* Make sure the IPv6 header is present */ 5642 if ((mp->b_wptr - (uchar_t *)ip6h) < 5643 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5644 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5645 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5646 goto err_ret; 5647 } 5648 } 5649 mp->b_rptr += sizeof (ip6i_t); 5650 ip6h = (ip6_t *)mp->b_rptr; 5651 } 5652 /* Did this packet originate externally? */ 5653 if (mp->b_prev) { 5654 if (ill != NULL) { 5655 BUMP_MIB(ill->ill_ip6_mib, ipv6InNoRoutes); 5656 } else { 5657 BUMP_MIB(&ip6_mib, ipv6InNoRoutes); 5658 } 5659 mp->b_next = NULL; 5660 mp->b_prev = NULL; 5661 q = WR(q); 5662 } else { 5663 if (ill != NULL) { 5664 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 5665 } else { 5666 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 5667 } 5668 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5669 /* Failed */ 5670 if (copy_mp != NULL) { 5671 MULTIRT_DEBUG_UNTAG(copy_mp); 5672 freemsg(copy_mp); 5673 } 5674 MULTIRT_DEBUG_UNTAG(first_mp); 5675 freemsg(first_mp); 5676 if (ire != NULL) 5677 ire_refrele(ire); 5678 if (need_rele) 5679 ill_refrele(ill); 5680 return; 5681 } 5682 } 5683 5684 if (need_rele) 5685 ill_refrele(ill); 5686 5687 /* 5688 * At this point we will have ire only if RTF_BLACKHOLE 5689 * or RTF_REJECT flags are set on the IRE. It will not 5690 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5691 */ 5692 if (ire != NULL) { 5693 if (ire->ire_flags & RTF_BLACKHOLE) { 5694 ire_refrele(ire); 5695 if (copy_mp != NULL) { 5696 MULTIRT_DEBUG_UNTAG(copy_mp); 5697 freemsg(copy_mp); 5698 } 5699 MULTIRT_DEBUG_UNTAG(first_mp); 5700 freemsg(first_mp); 5701 return; 5702 } 5703 ire_refrele(ire); 5704 } 5705 if (ip_debug > 3) { 5706 /* ip2dbg */ 5707 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5708 AF_INET6, v6dstp); 5709 } 5710 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5711 B_FALSE, B_FALSE); 5712 } 5713 5714 /* 5715 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5716 * we need to send out a packet to a destination address for which we do not 5717 * have specific routing information. It is only used for multicast packets. 5718 * 5719 * If unspec_src we allow creating an IRE with source address zero. 5720 * ire_send_v6() will delete it after the packet is sent. 5721 */ 5722 void 5723 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5724 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5725 { 5726 ire_t *ire = NULL; 5727 ipif_t *src_ipif = NULL; 5728 int err = 0; 5729 ill_t *dst_ill = NULL; 5730 ire_t *save_ire; 5731 ushort_t ire_marks = 0; 5732 ipsec_out_t *io; 5733 ill_t *attach_ill = NULL; 5734 ill_t *ill; 5735 ip6_t *ip6h; 5736 mblk_t *first_mp; 5737 boolean_t ip6i_present; 5738 ire_t *fire = NULL; 5739 mblk_t *copy_mp = NULL; 5740 boolean_t multirt_resolve_next; 5741 in6_addr_t *v6dstp = &v6dst; 5742 boolean_t ipif_held = B_FALSE; 5743 boolean_t ill_held = B_FALSE; 5744 boolean_t ip6_asp_table_held = B_FALSE; 5745 5746 /* 5747 * This loop is run only once in most cases. 5748 * We loop to resolve further routes only when the destination 5749 * can be reached through multiple RTF_MULTIRT-flagged ires. 5750 */ 5751 do { 5752 multirt_resolve_next = B_FALSE; 5753 if (dst_ill != NULL) { 5754 ill_refrele(dst_ill); 5755 dst_ill = NULL; 5756 } 5757 5758 if (src_ipif != NULL) { 5759 ipif_refrele(src_ipif); 5760 src_ipif = NULL; 5761 } 5762 ASSERT(ipif != NULL); 5763 ill = ipif->ipif_ill; 5764 5765 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5766 if (ip_debug > 2) { 5767 /* ip1dbg */ 5768 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5769 AF_INET6, v6dstp); 5770 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5771 ill->ill_name, ipif->ipif_isv6); 5772 } 5773 5774 first_mp = mp; 5775 if (mp->b_datap->db_type == M_CTL) { 5776 mp = mp->b_cont; 5777 io = (ipsec_out_t *)first_mp->b_rptr; 5778 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5779 } else { 5780 io = NULL; 5781 } 5782 5783 /* 5784 * If the interface is a pt-pt interface we look for an 5785 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5786 * local_address and the pt-pt destination address. 5787 * Otherwise we just match the local address. 5788 */ 5789 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5790 goto err_ret; 5791 } 5792 /* 5793 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5794 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5795 * as it could be NULL. 5796 * 5797 * This information can appear either in an ip6i_t or an 5798 * IPSEC_OUT message. 5799 */ 5800 ip6h = (ip6_t *)mp->b_rptr; 5801 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5802 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5803 if (!ip6i_present || 5804 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5805 attach_ill = ip_grab_attach_ill(ill, first_mp, 5806 (ip6i_present ? 5807 ((ip6i_t *)ip6h)->ip6i_ifindex : 5808 io->ipsec_out_ill_index), B_TRUE); 5809 /* Failure case frees things for us. */ 5810 if (attach_ill == NULL) 5811 return; 5812 5813 /* 5814 * Check if we need an ire that will not be 5815 * looked up by anybody else i.e. HIDDEN. 5816 */ 5817 if (ill_is_probeonly(attach_ill)) 5818 ire_marks = IRE_MARK_HIDDEN; 5819 } 5820 } 5821 5822 /* 5823 * We check if an IRE_OFFSUBNET for the addr that goes through 5824 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5825 * RTF_MULTIRT flags must be honored. 5826 */ 5827 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5828 ip2dbg(("ip_newroute_ipif_v6: " 5829 "ipif_lookup_multi_ire_v6(" 5830 "ipif %p, dst %08x) = fire %p\n", 5831 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5832 (void *)fire)); 5833 5834 /* 5835 * If the application specified the ill (ifindex), we still 5836 * load spread. Only if the packets needs to go out specifically 5837 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5838 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5839 * multirouting, then we don't try to use a different ill for 5840 * load spreading. 5841 */ 5842 if (attach_ill == NULL) { 5843 /* 5844 * If the interface belongs to an interface group, 5845 * make sure the next possible interface in the group 5846 * is used. This encourages load spreading among peers 5847 * in an interface group. 5848 * 5849 * Note: While we pick a dst_ill we are really only 5850 * interested in the ill for load spreading. The source 5851 * ipif is determined by source address selection below. 5852 */ 5853 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5854 dst_ill = ipif->ipif_ill; 5855 /* For uniformity do a refhold */ 5856 ill_refhold(dst_ill); 5857 } else { 5858 /* refheld by ip_newroute_get_dst_ill_v6 */ 5859 dst_ill = 5860 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5861 } 5862 if (dst_ill == NULL) { 5863 if (ip_debug > 2) { 5864 pr_addr_dbg("ip_newroute_ipif_v6: " 5865 "no dst ill for dst %s\n", 5866 AF_INET6, v6dstp); 5867 } 5868 goto err_ret; 5869 } 5870 } else { 5871 dst_ill = ipif->ipif_ill; 5872 /* 5873 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5874 * and IPV6_BOUND_PIF case. 5875 */ 5876 ASSERT(dst_ill == attach_ill); 5877 /* attach_ill is already refheld */ 5878 } 5879 /* 5880 * Pick a source address which matches the scope of the 5881 * destination address. 5882 * For RTF_SETSRC routes, the source address is imposed by the 5883 * parent ire (fire). 5884 */ 5885 ASSERT(src_ipif == NULL); 5886 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5887 /* 5888 * Check that the ipif matching the requested source 5889 * address still exists. 5890 */ 5891 src_ipif = 5892 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5893 NULL, zoneid, NULL, NULL, NULL, NULL); 5894 } 5895 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5896 ip6_asp_table_held = B_TRUE; 5897 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5898 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5899 } 5900 5901 if (src_ipif == NULL) { 5902 if (!unspec_src) { 5903 if (ip_debug > 2) { 5904 /* ip1dbg */ 5905 pr_addr_dbg("ip_newroute_ipif_v6: " 5906 "no src for dst %s\n,", 5907 AF_INET6, v6dstp); 5908 printf(" through interface %s\n", 5909 dst_ill->ill_name); 5910 } 5911 goto err_ret; 5912 } 5913 /* Use any ipif for source */ 5914 for (src_ipif = dst_ill->ill_ipif; src_ipif != NULL; 5915 src_ipif = src_ipif->ipif_next) { 5916 if ((src_ipif->ipif_flags & IPIF_UP) && 5917 IN6_IS_ADDR_UNSPECIFIED( 5918 &src_ipif->ipif_v6src_addr)) 5919 break; 5920 } 5921 if (src_ipif == NULL) { 5922 if (ip_debug > 2) { 5923 /* ip1dbg */ 5924 pr_addr_dbg("ip_newroute_ipif_v6: " 5925 "no src for dst %s\n ", 5926 AF_INET6, v6dstp); 5927 printf("ip_newroute_ipif_v6: if %s" 5928 "(UNSPEC_SRC)\n", 5929 dst_ill->ill_name); 5930 } 5931 goto err_ret; 5932 } 5933 src_ipif = ipif; 5934 ipif_refhold(src_ipif); 5935 } 5936 ire = ipif_to_ire_v6(ipif); 5937 if (ire == NULL) { 5938 if (ip_debug > 2) { 5939 /* ip1dbg */ 5940 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5941 AF_INET6, &ipif->ipif_v6lcl_addr); 5942 printf("ip_newroute_ipif_v6: " 5943 "if %s\n", dst_ill->ill_name); 5944 } 5945 goto err_ret; 5946 } 5947 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5948 goto err_ret; 5949 5950 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5951 5952 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5953 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5954 if (ip_debug > 2) { 5955 /* ip1dbg */ 5956 pr_addr_dbg(" address %s\n", 5957 AF_INET6, &ire->ire_src_addr_v6); 5958 } 5959 save_ire = ire; 5960 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5961 (void *)ire, (void *)ipif)); 5962 5963 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5964 /* 5965 * an IRE_OFFSUBET was looked up 5966 * on that interface. 5967 * this ire has RTF_MULTIRT flag, 5968 * so the resolution loop 5969 * will be re-entered to resolve 5970 * additional routes on other 5971 * interfaces. For that purpose, 5972 * a copy of the packet is 5973 * made at this point. 5974 */ 5975 fire->ire_last_used_time = lbolt; 5976 copy_mp = copymsg(first_mp); 5977 if (copy_mp) { 5978 MULTIRT_DEBUG_TAG(copy_mp); 5979 } 5980 } 5981 5982 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5983 switch (ire->ire_type) { 5984 case IRE_IF_NORESOLVER: { 5985 /* We have what we need to build an IRE_CACHE. */ 5986 mblk_t *dlureq_mp; 5987 5988 /* 5989 * Create a new dlureq_mp with the 5990 * IPv6 gateway address in destination address in the 5991 * DLPI hdr if the physical length is exactly 16 bytes. 5992 */ 5993 ASSERT(dst_ill->ill_isv6); 5994 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5995 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5996 dst_ill->ill_phys_addr_length, 5997 dst_ill->ill_sap, 5998 dst_ill->ill_sap_length); 5999 } else { 6000 dlureq_mp = ill_dlur_gen(NULL, 6001 dst_ill->ill_phys_addr_length, 6002 dst_ill->ill_sap, 6003 dst_ill->ill_sap_length); 6004 } 6005 6006 if (dlureq_mp == NULL) 6007 break; 6008 /* 6009 * The newly created ire will inherit the flags of the 6010 * parent ire, if any. 6011 */ 6012 ire = ire_create_v6( 6013 v6dstp, /* dest address */ 6014 &ipv6_all_ones, /* mask */ 6015 &src_ipif->ipif_v6src_addr, /* source address */ 6016 NULL, /* gateway address */ 6017 &save_ire->ire_max_frag, 6018 NULL, /* Fast Path header */ 6019 dst_ill->ill_rq, /* recv-from queue */ 6020 dst_ill->ill_wq, /* send-to queue */ 6021 IRE_CACHE, 6022 dlureq_mp, 6023 src_ipif, 6024 NULL, 6025 (fire != NULL) ? /* Parent handle */ 6026 fire->ire_phandle : 0, 6027 save_ire->ire_ihandle, /* Interface handle */ 6028 (fire != NULL) ? 6029 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6030 0, 6031 &ire_uinfo_null, 6032 NULL, 6033 NULL); 6034 6035 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 6036 freeb(dlureq_mp); 6037 6038 if (ire == NULL) { 6039 ire_refrele(save_ire); 6040 break; 6041 } 6042 6043 ire->ire_marks |= ire_marks; 6044 6045 err = ndp_noresolver(dst_ill, v6dstp); 6046 if (err != 0) { 6047 ire_refrele(save_ire); 6048 break; 6049 } 6050 6051 /* Prevent save_ire from getting deleted */ 6052 IRB_REFHOLD(save_ire->ire_bucket); 6053 /* Has it been removed already ? */ 6054 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6055 IRB_REFRELE(save_ire->ire_bucket); 6056 ire_refrele(save_ire); 6057 break; 6058 } 6059 6060 ire_add_then_send(q, ire, first_mp); 6061 if (ip6_asp_table_held) { 6062 ip6_asp_table_refrele(); 6063 ip6_asp_table_held = B_FALSE; 6064 } 6065 6066 /* Assert that it is not deleted yet. */ 6067 ASSERT(save_ire->ire_ptpn != NULL); 6068 IRB_REFRELE(save_ire->ire_bucket); 6069 ire_refrele(save_ire); 6070 if (fire != NULL) { 6071 ire_refrele(fire); 6072 fire = NULL; 6073 } 6074 6075 /* 6076 * The resolution loop is re-entered if we 6077 * actually are in a multirouting case. 6078 */ 6079 if (copy_mp != NULL) { 6080 boolean_t need_resolve = 6081 ire_multirt_need_resolve_v6(v6dstp, 6082 MBLK_GETLABEL(copy_mp)); 6083 if (!need_resolve) { 6084 MULTIRT_DEBUG_UNTAG(copy_mp); 6085 freemsg(copy_mp); 6086 copy_mp = NULL; 6087 } else { 6088 /* 6089 * ipif_lookup_group_v6() calls 6090 * ire_lookup_multi_v6() that uses 6091 * ire_ftable_lookup_v6() to find 6092 * an IRE_INTERFACE for the group. 6093 * In the multirt case, 6094 * ire_lookup_multi_v6() then invokes 6095 * ire_multirt_lookup_v6() to find 6096 * the next resolvable ire. 6097 * As a result, we obtain a new 6098 * interface, derived from the 6099 * next ire. 6100 */ 6101 if (ipif_held) { 6102 ipif_refrele(ipif); 6103 ipif_held = B_FALSE; 6104 } 6105 ipif = ipif_lookup_group_v6(v6dstp, 6106 zoneid); 6107 ip2dbg(("ip_newroute_ipif: " 6108 "multirt dst %08x, ipif %p\n", 6109 ntohl(V4_PART_OF_V6((*v6dstp))), 6110 (void *)ipif)); 6111 if (ipif != NULL) { 6112 ipif_held = B_TRUE; 6113 mp = copy_mp; 6114 copy_mp = NULL; 6115 multirt_resolve_next = 6116 B_TRUE; 6117 continue; 6118 } else { 6119 freemsg(copy_mp); 6120 } 6121 } 6122 } 6123 ill_refrele(dst_ill); 6124 if (ipif_held) { 6125 ipif_refrele(ipif); 6126 ipif_held = B_FALSE; 6127 } 6128 if (src_ipif != NULL) 6129 ipif_refrele(src_ipif); 6130 return; 6131 } 6132 case IRE_IF_RESOLVER: { 6133 6134 ASSERT(dst_ill->ill_isv6); 6135 6136 /* 6137 * We obtain a partial IRE_CACHE which we will pass 6138 * along with the resolver query. When the response 6139 * comes back it will be there ready for us to add. 6140 */ 6141 /* 6142 * the newly created ire will inherit the flags of the 6143 * parent ire, if any. 6144 */ 6145 ire = ire_create_v6( 6146 v6dstp, /* dest address */ 6147 &ipv6_all_ones, /* mask */ 6148 &src_ipif->ipif_v6src_addr, /* source address */ 6149 NULL, /* gateway address */ 6150 &save_ire->ire_max_frag, 6151 NULL, /* Fast Path header */ 6152 dst_ill->ill_rq, /* recv-from queue */ 6153 dst_ill->ill_wq, /* send-to queue */ 6154 IRE_CACHE, 6155 NULL, 6156 src_ipif, 6157 NULL, 6158 (fire != NULL) ? /* Parent handle */ 6159 fire->ire_phandle : 0, 6160 save_ire->ire_ihandle, /* Interface handle */ 6161 (fire != NULL) ? 6162 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6163 0, 6164 &ire_uinfo_null, 6165 NULL, 6166 NULL); 6167 6168 if (ire == NULL) { 6169 ire_refrele(save_ire); 6170 break; 6171 } 6172 6173 ire->ire_marks |= ire_marks; 6174 6175 /* Resolve and add ire to the ctable */ 6176 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6177 switch (err) { 6178 case 0: 6179 /* Prevent save_ire from getting deleted */ 6180 IRB_REFHOLD(save_ire->ire_bucket); 6181 /* Has it been removed already ? */ 6182 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6183 IRB_REFRELE(save_ire->ire_bucket); 6184 ire_refrele(save_ire); 6185 break; 6186 } 6187 /* 6188 * We have a resolved cache entry, 6189 * add in the IRE. 6190 */ 6191 ire_add_then_send(q, ire, first_mp); 6192 if (ip6_asp_table_held) { 6193 ip6_asp_table_refrele(); 6194 ip6_asp_table_held = B_FALSE; 6195 } 6196 6197 /* Assert that it is not deleted yet. */ 6198 ASSERT(save_ire->ire_ptpn != NULL); 6199 IRB_REFRELE(save_ire->ire_bucket); 6200 ire_refrele(save_ire); 6201 if (fire != NULL) { 6202 ire_refrele(fire); 6203 fire = NULL; 6204 } 6205 6206 /* 6207 * The resolution loop is re-entered if we 6208 * actually are in a multirouting case. 6209 */ 6210 if (copy_mp != NULL) { 6211 boolean_t need_resolve = 6212 ire_multirt_need_resolve_v6(v6dstp, 6213 MBLK_GETLABEL(copy_mp)); 6214 if (!need_resolve) { 6215 MULTIRT_DEBUG_UNTAG(copy_mp); 6216 freemsg(copy_mp); 6217 copy_mp = NULL; 6218 } else { 6219 /* 6220 * ipif_lookup_group_v6() calls 6221 * ire_lookup_multi_v6() that 6222 * uses ire_ftable_lookup_v6() 6223 * to find an IRE_INTERFACE for 6224 * the group. In the multirt 6225 * case, ire_lookup_multi_v6() 6226 * then invokes 6227 * ire_multirt_lookup_v6() to 6228 * find the next resolvable ire. 6229 * As a result, we obtain a new 6230 * interface, derived from the 6231 * next ire. 6232 */ 6233 if (ipif_held) { 6234 ipif_refrele(ipif); 6235 ipif_held = B_FALSE; 6236 } 6237 ipif = ipif_lookup_group_v6( 6238 v6dstp, zoneid); 6239 ip2dbg(("ip_newroute_ipif: " 6240 "multirt dst %08x, " 6241 "ipif %p\n", 6242 ntohl(V4_PART_OF_V6( 6243 (*v6dstp))), 6244 (void *)ipif)); 6245 if (ipif != NULL) { 6246 ipif_held = B_TRUE; 6247 mp = copy_mp; 6248 copy_mp = NULL; 6249 multirt_resolve_next = 6250 B_TRUE; 6251 continue; 6252 } else { 6253 freemsg(copy_mp); 6254 } 6255 } 6256 } 6257 ill_refrele(dst_ill); 6258 if (ipif_held) { 6259 ipif_refrele(ipif); 6260 ipif_held = B_FALSE; 6261 } 6262 if (src_ipif != NULL) 6263 ipif_refrele(src_ipif); 6264 return; 6265 6266 case EINPROGRESS: 6267 /* 6268 * mp was consumed - presumably queued. 6269 * No need for ire, presumably resolution is 6270 * in progress, and ire will be added when the 6271 * address is resolved. 6272 */ 6273 if (ip6_asp_table_held) { 6274 ip6_asp_table_refrele(); 6275 ip6_asp_table_held = B_FALSE; 6276 } 6277 ire_delete(ire); 6278 ire_refrele(save_ire); 6279 if (fire != NULL) { 6280 ire_refrele(fire); 6281 fire = NULL; 6282 } 6283 6284 /* 6285 * The resolution loop is re-entered if we 6286 * actually are in a multirouting case. 6287 */ 6288 if (copy_mp != NULL) { 6289 boolean_t need_resolve = 6290 ire_multirt_need_resolve_v6(v6dstp, 6291 MBLK_GETLABEL(copy_mp)); 6292 if (!need_resolve) { 6293 MULTIRT_DEBUG_UNTAG(copy_mp); 6294 freemsg(copy_mp); 6295 copy_mp = NULL; 6296 } else { 6297 /* 6298 * ipif_lookup_group_v6() calls 6299 * ire_lookup_multi_v6() that 6300 * uses ire_ftable_lookup_v6() 6301 * to find an IRE_INTERFACE for 6302 * the group. In the multirt 6303 * case, ire_lookup_multi_v6() 6304 * then invokes 6305 * ire_multirt_lookup_v6() to 6306 * find the next resolvable ire. 6307 * As a result, we obtain a new 6308 * interface, derived from the 6309 * next ire. 6310 */ 6311 if (ipif_held) { 6312 ipif_refrele(ipif); 6313 ipif_held = B_FALSE; 6314 } 6315 ipif = ipif_lookup_group_v6( 6316 v6dstp, zoneid); 6317 ip2dbg(("ip_newroute_ipif: " 6318 "multirt dst %08x, " 6319 "ipif %p\n", 6320 ntohl(V4_PART_OF_V6( 6321 (*v6dstp))), 6322 (void *)ipif)); 6323 if (ipif != NULL) { 6324 ipif_held = B_TRUE; 6325 mp = copy_mp; 6326 copy_mp = NULL; 6327 multirt_resolve_next = 6328 B_TRUE; 6329 continue; 6330 } else { 6331 freemsg(copy_mp); 6332 } 6333 } 6334 } 6335 ill_refrele(dst_ill); 6336 if (ipif_held) { 6337 ipif_refrele(ipif); 6338 ipif_held = B_FALSE; 6339 } 6340 if (src_ipif != NULL) 6341 ipif_refrele(src_ipif); 6342 return; 6343 default: 6344 /* Some transient error */ 6345 ire_refrele(save_ire); 6346 break; 6347 } 6348 break; 6349 } 6350 default: 6351 break; 6352 } 6353 if (ip6_asp_table_held) { 6354 ip6_asp_table_refrele(); 6355 ip6_asp_table_held = B_FALSE; 6356 } 6357 } while (multirt_resolve_next); 6358 6359 err_ret: 6360 if (ip6_asp_table_held) 6361 ip6_asp_table_refrele(); 6362 if (ire != NULL) 6363 ire_refrele(ire); 6364 if (fire != NULL) 6365 ire_refrele(fire); 6366 if (ipif != NULL && ipif_held) 6367 ipif_refrele(ipif); 6368 if (src_ipif != NULL) 6369 ipif_refrele(src_ipif); 6370 /* Multicast - no point in trying to generate ICMP error */ 6371 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6372 if (dst_ill != NULL) { 6373 ill = dst_ill; 6374 ill_held = B_TRUE; 6375 } 6376 if (mp->b_prev || mp->b_next) { 6377 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6378 } else { 6379 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 6380 } 6381 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6382 mp->b_next = NULL; 6383 mp->b_prev = NULL; 6384 freemsg(first_mp); 6385 if (ill_held) 6386 ill_refrele(ill); 6387 } 6388 6389 /* 6390 * Parse and process any hop-by-hop or destination options. 6391 * 6392 * Assumes that q is an ill read queue so that ICMP errors for link-local 6393 * destinations are sent out the correct interface. 6394 * 6395 * Returns -1 if there was an error and mp has been consumed. 6396 * Returns 0 if no special action is needed. 6397 * Returns 1 if the packet contained a router alert option for this node 6398 * which is verified to be "interesting/known" for our implementation. 6399 * 6400 * XXX Note: In future as more hbh or dest options are defined, 6401 * it may be better to have different routines for hbh and dest 6402 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6403 * may have same value in different namespaces. Or is it same namespace ?? 6404 * Current code checks for each opt_type (other than pads) if it is in 6405 * the expected nexthdr (hbh or dest) 6406 */ 6407 static int 6408 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6409 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6410 { 6411 uint8_t opt_type; 6412 uint_t optused; 6413 int ret = 0; 6414 mblk_t *first_mp; 6415 const char *errtype; 6416 6417 first_mp = mp; 6418 if (mp->b_datap->db_type == M_CTL) { 6419 mp = mp->b_cont; 6420 } 6421 6422 while (optlen != 0) { 6423 opt_type = *optptr; 6424 if (opt_type == IP6OPT_PAD1) { 6425 optused = 1; 6426 } else { 6427 if (optlen < 2) 6428 goto bad_opt; 6429 errtype = "malformed"; 6430 if (opt_type == ip6opt_ls) { 6431 optused = 2 + optptr[1]; 6432 if (optused > optlen) 6433 goto bad_opt; 6434 } else switch (opt_type) { 6435 case IP6OPT_PADN: 6436 /* 6437 * Note:We don't verify that (N-2) pad octets 6438 * are zero as required by spec. Adhere to 6439 * "be liberal in what you accept..." part of 6440 * implementation philosophy (RFC791,RFC1122) 6441 */ 6442 optused = 2 + optptr[1]; 6443 if (optused > optlen) 6444 goto bad_opt; 6445 break; 6446 6447 case IP6OPT_JUMBO: 6448 if (hdr_type != IPPROTO_HOPOPTS) 6449 goto opt_error; 6450 goto opt_error; /* XXX Not implemented! */ 6451 6452 case IP6OPT_ROUTER_ALERT: { 6453 struct ip6_opt_router *or; 6454 6455 if (hdr_type != IPPROTO_HOPOPTS) 6456 goto opt_error; 6457 optused = 2 + optptr[1]; 6458 if (optused > optlen) 6459 goto bad_opt; 6460 or = (struct ip6_opt_router *)optptr; 6461 /* Check total length and alignment */ 6462 if (optused != sizeof (*or) || 6463 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6464 goto opt_error; 6465 /* Check value */ 6466 switch (*((uint16_t *)or->ip6or_value)) { 6467 case IP6_ALERT_MLD: 6468 case IP6_ALERT_RSVP: 6469 ret = 1; 6470 } 6471 break; 6472 } 6473 case IP6OPT_HOME_ADDRESS: { 6474 /* 6475 * Minimal support for the home address option 6476 * (which is required by all IPv6 nodes). 6477 * Implement by just swapping the home address 6478 * and source address. 6479 * XXX Note: this has IPsec implications since 6480 * AH needs to take this into account. 6481 * Also, when IPsec is used we need to ensure 6482 * that this is only processed once 6483 * in the received packet (to avoid swapping 6484 * back and forth). 6485 * NOTE:This option processing is considered 6486 * to be unsafe and prone to a denial of 6487 * service attack. 6488 * The current processing is not safe even with 6489 * IPsec secured IP packets. Since the home 6490 * address option processing requirement still 6491 * is in the IETF draft and in the process of 6492 * being redefined for its usage, it has been 6493 * decided to turn off the option by default. 6494 * If this section of code needs to be executed, 6495 * ndd variable ip6_ignore_home_address_opt 6496 * should be set to 0 at the user's own risk. 6497 */ 6498 struct ip6_opt_home_address *oh; 6499 in6_addr_t tmp; 6500 6501 if (ipv6_ignore_home_address_opt) 6502 goto opt_error; 6503 6504 if (hdr_type != IPPROTO_DSTOPTS) 6505 goto opt_error; 6506 optused = 2 + optptr[1]; 6507 if (optused > optlen) 6508 goto bad_opt; 6509 6510 /* 6511 * We did this dest. opt the first time 6512 * around (i.e. before AH processing). 6513 * If we've done AH... stop now. 6514 */ 6515 if (first_mp != mp) { 6516 ipsec_in_t *ii; 6517 6518 ii = (ipsec_in_t *)first_mp->b_rptr; 6519 if (ii->ipsec_in_ah_sa != NULL) 6520 break; 6521 } 6522 6523 oh = (struct ip6_opt_home_address *)optptr; 6524 /* Check total length and alignment */ 6525 if (optused < sizeof (*oh) || 6526 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6527 goto opt_error; 6528 /* Swap ip6_src and the home address */ 6529 tmp = ip6h->ip6_src; 6530 /* XXX Note: only 8 byte alignment option */ 6531 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6532 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6533 break; 6534 } 6535 6536 case IP6OPT_TUNNEL_LIMIT: 6537 if (hdr_type != IPPROTO_DSTOPTS) { 6538 goto opt_error; 6539 } 6540 optused = 2 + optptr[1]; 6541 if (optused > optlen) { 6542 goto bad_opt; 6543 } 6544 if (optused != 3) { 6545 goto opt_error; 6546 } 6547 break; 6548 6549 default: 6550 errtype = "unknown"; 6551 /* FALLTHROUGH */ 6552 opt_error: 6553 switch (IP6OPT_TYPE(opt_type)) { 6554 case IP6OPT_TYPE_SKIP: 6555 optused = 2 + optptr[1]; 6556 if (optused > optlen) 6557 goto bad_opt; 6558 ip1dbg(("ip_process_options_v6: %s " 6559 "opt 0x%x skipped\n", 6560 errtype, opt_type)); 6561 break; 6562 case IP6OPT_TYPE_DISCARD: 6563 ip1dbg(("ip_process_options_v6: %s " 6564 "opt 0x%x; packet dropped\n", 6565 errtype, opt_type)); 6566 freemsg(first_mp); 6567 return (-1); 6568 case IP6OPT_TYPE_ICMP: 6569 icmp_param_problem_v6(WR(q), first_mp, 6570 ICMP6_PARAMPROB_OPTION, 6571 (uint32_t)(optptr - 6572 (uint8_t *)ip6h), 6573 B_FALSE, B_FALSE); 6574 return (-1); 6575 case IP6OPT_TYPE_FORCEICMP: 6576 icmp_param_problem_v6(WR(q), first_mp, 6577 ICMP6_PARAMPROB_OPTION, 6578 (uint32_t)(optptr - 6579 (uint8_t *)ip6h), 6580 B_FALSE, B_TRUE); 6581 return (-1); 6582 default: 6583 ASSERT(0); 6584 } 6585 } 6586 } 6587 optlen -= optused; 6588 optptr += optused; 6589 } 6590 return (ret); 6591 6592 bad_opt: 6593 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6594 (uint32_t)(optptr - (uint8_t *)ip6h), 6595 B_FALSE, B_FALSE); 6596 return (-1); 6597 } 6598 6599 /* 6600 * Process a routing header that is not yet empty. 6601 * Only handles type 0 routing headers. 6602 */ 6603 static void 6604 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6605 ill_t *ill, uint_t flags, mblk_t *hada_mp) 6606 { 6607 ip6_rthdr0_t *rthdr; 6608 uint_t ehdrlen; 6609 uint_t numaddr; 6610 in6_addr_t *addrptr; 6611 in6_addr_t tmp; 6612 6613 ASSERT(rth->ip6r_segleft != 0); 6614 6615 if (!ipv6_forward_src_routed) { 6616 /* XXX Check for source routed out same interface? */ 6617 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 6618 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 6619 freemsg(hada_mp); 6620 freemsg(mp); 6621 return; 6622 } 6623 6624 if (rth->ip6r_type != 0) { 6625 if (hada_mp != NULL) 6626 goto hada_drop; 6627 icmp_param_problem_v6(WR(q), mp, 6628 ICMP6_PARAMPROB_HEADER, 6629 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6630 B_FALSE, B_FALSE); 6631 return; 6632 } 6633 rthdr = (ip6_rthdr0_t *)rth; 6634 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6635 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6636 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6637 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6638 if (rthdr->ip6r0_len & 0x1) { 6639 /* An odd length is impossible */ 6640 if (hada_mp != NULL) 6641 goto hada_drop; 6642 icmp_param_problem_v6(WR(q), mp, 6643 ICMP6_PARAMPROB_HEADER, 6644 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6645 B_FALSE, B_FALSE); 6646 return; 6647 } 6648 numaddr = rthdr->ip6r0_len / 2; 6649 if (rthdr->ip6r0_segleft > numaddr) { 6650 /* segleft exceeds number of addresses in routing header */ 6651 if (hada_mp != NULL) 6652 goto hada_drop; 6653 icmp_param_problem_v6(WR(q), mp, 6654 ICMP6_PARAMPROB_HEADER, 6655 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6656 (uchar_t *)ip6h), 6657 B_FALSE, B_FALSE); 6658 return; 6659 } 6660 addrptr += (numaddr - rthdr->ip6r0_segleft); 6661 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6662 IN6_IS_ADDR_MULTICAST(addrptr)) { 6663 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6664 freemsg(hada_mp); 6665 freemsg(mp); 6666 return; 6667 } 6668 /* Swap */ 6669 tmp = *addrptr; 6670 *addrptr = ip6h->ip6_dst; 6671 ip6h->ip6_dst = tmp; 6672 rthdr->ip6r0_segleft--; 6673 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6674 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6675 if (hada_mp != NULL) 6676 goto hada_drop; 6677 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6678 B_FALSE, B_FALSE); 6679 return; 6680 } 6681 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); 6682 return; 6683 hada_drop: 6684 /* IPsec kstats: bean counter? */ 6685 freemsg(hada_mp); 6686 freemsg(mp); 6687 } 6688 6689 /* 6690 * Read side put procedure for IPv6 module. 6691 */ 6692 static void 6693 ip_rput_v6(queue_t *q, mblk_t *mp) 6694 { 6695 mblk_t *mp1, *first_mp, *hada_mp = NULL; 6696 ip6_t *ip6h; 6697 boolean_t ll_multicast = B_FALSE, mctl_present = B_FALSE; 6698 ill_t *ill; 6699 struct iocblk *iocp; 6700 uint_t flags = 0; 6701 6702 ill = (ill_t *)q->q_ptr; 6703 if (ill->ill_state_flags & ILL_CONDEMNED) { 6704 union DL_primitives *dl; 6705 6706 dl = (union DL_primitives *)mp->b_rptr; 6707 /* 6708 * Things are opening or closing - only accept DLPI 6709 * ack messages. If the stream is closing and ip_wsrv 6710 * has completed, ip_close is out of the qwait, but has 6711 * not yet completed qprocsoff. Don't proceed any further 6712 * because the ill has been cleaned up and things hanging 6713 * off the ill have been freed. 6714 */ 6715 if ((mp->b_datap->db_type != M_PCPROTO) || 6716 (dl->dl_primitive == DL_UNITDATA_IND)) { 6717 inet_freemsg(mp); 6718 return; 6719 } 6720 } 6721 6722 switch (mp->b_datap->db_type) { 6723 case M_DATA: 6724 break; 6725 6726 case M_PROTO: 6727 case M_PCPROTO: 6728 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6729 DL_UNITDATA_IND) { 6730 /* Go handle anything other than data elsewhere. */ 6731 ip_rput_dlpi(q, mp); 6732 return; 6733 } 6734 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6735 ll_multicast = dlur->dl_group_address; 6736 #undef dlur 6737 /* Ditch the DLPI header. */ 6738 mp1 = mp; 6739 mp = mp->b_cont; 6740 freeb(mp1); 6741 break; 6742 case M_BREAK: 6743 panic("ip_rput_v6: got an M_BREAK"); 6744 /*NOTREACHED*/ 6745 case M_IOCACK: 6746 iocp = (struct iocblk *)mp->b_rptr; 6747 switch (iocp->ioc_cmd) { 6748 case DL_IOC_HDR_INFO: 6749 ill = (ill_t *)q->q_ptr; 6750 ill_fastpath_ack(ill, mp); 6751 return; 6752 case SIOCSTUNPARAM: 6753 case SIOCGTUNPARAM: 6754 case OSIOCSTUNPARAM: 6755 case OSIOCGTUNPARAM: 6756 /* Go through qwriter */ 6757 break; 6758 default: 6759 putnext(q, mp); 6760 return; 6761 } 6762 /* FALLTHRU */ 6763 case M_ERROR: 6764 case M_HANGUP: 6765 mutex_enter(&ill->ill_lock); 6766 if (ill->ill_state_flags & ILL_CONDEMNED) { 6767 mutex_exit(&ill->ill_lock); 6768 freemsg(mp); 6769 return; 6770 } 6771 ill_refhold_locked(ill); 6772 mutex_exit(&ill->ill_lock); 6773 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6774 return; 6775 case M_CTL: { 6776 if ((MBLKL(mp) > sizeof (int)) && 6777 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6778 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6779 mctl_present = B_TRUE; 6780 break; 6781 } 6782 putnext(q, mp); 6783 return; 6784 } 6785 case M_IOCNAK: 6786 iocp = (struct iocblk *)mp->b_rptr; 6787 switch (iocp->ioc_cmd) { 6788 case DL_IOC_HDR_INFO: 6789 case SIOCSTUNPARAM: 6790 case SIOCGTUNPARAM: 6791 case OSIOCSTUNPARAM: 6792 case OSIOCGTUNPARAM: 6793 mutex_enter(&ill->ill_lock); 6794 if (ill->ill_state_flags & ILL_CONDEMNED) { 6795 mutex_exit(&ill->ill_lock); 6796 freemsg(mp); 6797 return; 6798 } 6799 ill_refhold_locked(ill); 6800 mutex_exit(&ill->ill_lock); 6801 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6802 B_FALSE); 6803 return; 6804 default: 6805 break; 6806 } 6807 /* FALLTHRU */ 6808 default: 6809 putnext(q, mp); 6810 return; 6811 } 6812 6813 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 6814 /* 6815 * if db_ref > 1 then copymsg and free original. Packet may be 6816 * changed and do not want other entity who has a reference to this 6817 * message to trip over the changes. This is a blind change because 6818 * trying to catch all places that might change packet is too 6819 * difficult (since it may be a module above this one). 6820 */ 6821 if (mp->b_datap->db_ref > 1) { 6822 mblk_t *mp1; 6823 6824 mp1 = copymsg(mp); 6825 freemsg(mp); 6826 if (mp1 == NULL) { 6827 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6828 return; 6829 } 6830 mp = mp1; 6831 } 6832 first_mp = mp; 6833 if (mctl_present) { 6834 hada_mp = first_mp; 6835 mp = first_mp->b_cont; 6836 } 6837 6838 ip6h = (ip6_t *)mp->b_rptr; 6839 6840 /* check for alignment and full IPv6 header */ 6841 if (!OK_32PTR((uchar_t *)ip6h) || 6842 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6843 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6844 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6845 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6846 freemsg(first_mp); 6847 return; 6848 } 6849 ip6h = (ip6_t *)mp->b_rptr; 6850 } 6851 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6852 IPV6_DEFAULT_VERS_AND_FLOW) { 6853 /* 6854 * It may be a bit too expensive to do this mapped address 6855 * check here, but in the interest of robustness, it seems 6856 * like the correct place. 6857 * TODO: Avoid this check for e.g. connected TCP sockets 6858 */ 6859 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6860 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6861 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6862 freemsg(first_mp); 6863 return; 6864 } 6865 6866 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6867 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6868 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6869 freemsg(first_mp); 6870 return; 6871 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6872 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6873 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6874 freemsg(first_mp); 6875 return; 6876 } 6877 6878 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6879 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); 6880 } else { 6881 BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4); 6882 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6883 freemsg(first_mp); 6884 } 6885 } 6886 6887 /* 6888 * Walk through the IPv6 packet in mp and see if there's an AH header 6889 * in it. See if the AH header needs to get done before other headers in 6890 * the packet. (Worker function for ipsec_early_ah_v6().) 6891 */ 6892 #define IPSEC_HDR_DONT_PROCESS 0 6893 #define IPSEC_HDR_PROCESS 1 6894 #define IPSEC_MEMORY_ERROR 2 6895 static int 6896 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6897 { 6898 uint_t length; 6899 uint_t ehdrlen; 6900 uint8_t *whereptr; 6901 uint8_t *endptr; 6902 uint8_t *nexthdrp; 6903 ip6_dest_t *desthdr; 6904 ip6_rthdr_t *rthdr; 6905 ip6_t *ip6h; 6906 6907 /* 6908 * For now just pullup everything. In general, the less pullups, 6909 * the better, but there's so much squirrelling through anyway, 6910 * it's just easier this way. 6911 */ 6912 if (!pullupmsg(mp, -1)) { 6913 return (IPSEC_MEMORY_ERROR); 6914 } 6915 6916 ip6h = (ip6_t *)mp->b_rptr; 6917 length = IPV6_HDR_LEN; 6918 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6919 endptr = mp->b_wptr; 6920 6921 /* 6922 * We can't just use the argument nexthdr in the place 6923 * of nexthdrp becaue we don't dereference nexthdrp 6924 * till we confirm whether it is a valid address. 6925 */ 6926 nexthdrp = &ip6h->ip6_nxt; 6927 while (whereptr < endptr) { 6928 /* Is there enough left for len + nexthdr? */ 6929 if (whereptr + MIN_EHDR_LEN > endptr) 6930 return (IPSEC_MEMORY_ERROR); 6931 6932 switch (*nexthdrp) { 6933 case IPPROTO_HOPOPTS: 6934 case IPPROTO_DSTOPTS: 6935 /* Assumes the headers are identical for hbh and dst */ 6936 desthdr = (ip6_dest_t *)whereptr; 6937 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6938 if ((uchar_t *)desthdr + ehdrlen > endptr) 6939 return (IPSEC_MEMORY_ERROR); 6940 /* 6941 * Return DONT_PROCESS because of potential Mobile IPv6 6942 * cruft for destination options. 6943 */ 6944 if (*nexthdrp == IPPROTO_DSTOPTS) 6945 return (IPSEC_HDR_DONT_PROCESS); 6946 nexthdrp = &desthdr->ip6d_nxt; 6947 break; 6948 case IPPROTO_ROUTING: 6949 rthdr = (ip6_rthdr_t *)whereptr; 6950 6951 /* 6952 * If there's more hops left on the routing header, 6953 * return now with DON'T PROCESS. 6954 */ 6955 if (rthdr->ip6r_segleft > 0) 6956 return (IPSEC_HDR_DONT_PROCESS); 6957 6958 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6959 if ((uchar_t *)rthdr + ehdrlen > endptr) 6960 return (IPSEC_MEMORY_ERROR); 6961 nexthdrp = &rthdr->ip6r_nxt; 6962 break; 6963 case IPPROTO_FRAGMENT: 6964 /* Wait for reassembly */ 6965 return (IPSEC_HDR_DONT_PROCESS); 6966 case IPPROTO_AH: 6967 *nexthdr = IPPROTO_AH; 6968 return (IPSEC_HDR_PROCESS); 6969 case IPPROTO_NONE: 6970 /* No next header means we're finished */ 6971 default: 6972 return (IPSEC_HDR_DONT_PROCESS); 6973 } 6974 length += ehdrlen; 6975 whereptr += ehdrlen; 6976 } 6977 panic("ipsec_needs_processing_v6"); 6978 /*NOTREACHED*/ 6979 } 6980 6981 /* 6982 * Path for AH if options are present. If this is the first time we are 6983 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6984 * Otherwise, just fanout. Return value answers the boolean question: 6985 * "Did I consume the mblk you sent me?" 6986 * 6987 * Sometimes AH needs to be done before other IPv6 headers for security 6988 * reasons. This function (and its ipsec_needs_processing_v6() above) 6989 * indicates if that is so, and fans out to the appropriate IPsec protocol 6990 * for the datagram passed in. 6991 */ 6992 static boolean_t 6993 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6994 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 6995 { 6996 mblk_t *mp; 6997 uint8_t nexthdr; 6998 ipsec_in_t *ii = NULL; 6999 ah_t *ah; 7000 ipsec_status_t ipsec_rc; 7001 7002 ASSERT((hada_mp == NULL) || (!mctl_present)); 7003 7004 switch (ipsec_needs_processing_v6( 7005 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7006 case IPSEC_MEMORY_ERROR: 7007 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7008 freemsg(hada_mp); 7009 freemsg(first_mp); 7010 return (B_TRUE); 7011 case IPSEC_HDR_DONT_PROCESS: 7012 return (B_FALSE); 7013 } 7014 7015 /* Default means send it to AH! */ 7016 ASSERT(nexthdr == IPPROTO_AH); 7017 if (!mctl_present) { 7018 mp = first_mp; 7019 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 7020 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7021 "allocation failure.\n")); 7022 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7023 freemsg(hada_mp); 7024 freemsg(mp); 7025 return (B_TRUE); 7026 } 7027 /* 7028 * Store the ill_index so that when we come back 7029 * from IPSEC we ride on the same queue. 7030 */ 7031 ii = (ipsec_in_t *)first_mp->b_rptr; 7032 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7033 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7034 first_mp->b_cont = mp; 7035 } 7036 /* 7037 * Cache hardware acceleration info. 7038 */ 7039 if (hada_mp != NULL) { 7040 ASSERT(ii != NULL); 7041 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7042 "caching data attr.\n")); 7043 ii->ipsec_in_accelerated = B_TRUE; 7044 ii->ipsec_in_da = hada_mp; 7045 } 7046 7047 if (!ipsec_loaded()) { 7048 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 7049 return (B_TRUE); 7050 } 7051 7052 ah = ipsec_inbound_ah_sa(first_mp); 7053 if (ah == NULL) 7054 return (B_TRUE); 7055 ASSERT(ii->ipsec_in_ah_sa != NULL); 7056 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7057 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7058 7059 switch (ipsec_rc) { 7060 case IPSEC_STATUS_SUCCESS: 7061 /* we're done with IPsec processing, send it up */ 7062 ip_fanout_proto_again(first_mp, ill, ill, ire); 7063 break; 7064 case IPSEC_STATUS_FAILED: 7065 BUMP_MIB(&ip6_mib, ipv6InDiscards); 7066 break; 7067 case IPSEC_STATUS_PENDING: 7068 /* no action needed */ 7069 break; 7070 } 7071 return (B_TRUE); 7072 } 7073 7074 /* 7075 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7076 * ip_rput_v6 has already verified alignment, the min length, the version, 7077 * and db_ref = 1. 7078 * 7079 * The ill passed in (the arg named inill) is the ill that the packet 7080 * actually arrived on. We need to remember this when saving the 7081 * input interface index into potential IPV6_PKTINFO data in 7082 * ip_add_info_v6(). 7083 */ 7084 void 7085 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7086 uint_t flags, mblk_t *hada_mp) 7087 { 7088 ire_t *ire = NULL; 7089 queue_t *rq; 7090 ill_t *ill = inill; 7091 ipif_t *ipif; 7092 uint8_t *whereptr; 7093 uint8_t nexthdr; 7094 uint16_t remlen; 7095 uint_t prev_nexthdr_offset; 7096 uint_t used; 7097 size_t pkt_len; 7098 uint16_t ip6_len; 7099 uint_t hdr_len; 7100 boolean_t mctl_present; 7101 mblk_t *first_mp; 7102 mblk_t *first_mp1; 7103 boolean_t no_forward; 7104 ip6_hbh_t *hbhhdr; 7105 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7106 conn_t *connp; 7107 ilm_t *ilm; 7108 uint32_t ports; 7109 uint_t ipif_id = 0; 7110 zoneid_t zoneid = GLOBAL_ZONEID; 7111 uint16_t hck_flags, reass_hck_flags; 7112 uint32_t reass_sum; 7113 boolean_t cksum_err; 7114 mblk_t *mp1; 7115 7116 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7117 7118 if (hada_mp != NULL) { 7119 /* 7120 * It's an IPsec accelerated packet. 7121 * Keep a pointer to the data attributes around until 7122 * we allocate the ipsecinfo structure. 7123 */ 7124 IPSECHW_DEBUG(IPSECHW_PKT, 7125 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7126 hada_mp->b_cont = NULL; 7127 /* 7128 * Since it is accelerated, it came directly from 7129 * the ill. 7130 */ 7131 ASSERT(mctl_present == B_FALSE); 7132 ASSERT(mp->b_datap->db_type != M_CTL); 7133 } 7134 7135 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7136 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7137 7138 if (mp->b_cont == NULL) 7139 pkt_len = mp->b_wptr - mp->b_rptr; 7140 else 7141 pkt_len = msgdsize(mp); 7142 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7143 7144 /* 7145 * Check for bogus (too short packet) and packet which 7146 * was padded by the link layer. 7147 */ 7148 if (ip6_len != pkt_len) { 7149 ssize_t diff; 7150 7151 if (ip6_len > pkt_len) { 7152 ip1dbg(("ip_rput_data_v6: packet too short %d %lu\n", 7153 ip6_len, pkt_len)); 7154 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 7155 freemsg(hada_mp); 7156 freemsg(first_mp); 7157 return; 7158 } 7159 diff = (ssize_t)(pkt_len - ip6_len); 7160 7161 if (!adjmsg(mp, -diff)) { 7162 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7163 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7164 freemsg(hada_mp); 7165 freemsg(first_mp); 7166 return; 7167 } 7168 pkt_len -= diff; 7169 } 7170 7171 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7172 hck_flags = DB_CKSUMFLAGS(mp); 7173 else 7174 hck_flags = 0; 7175 7176 /* Clear checksum flags in case we need to forward */ 7177 DB_CKSUMFLAGS(mp) = 0; 7178 reass_sum = reass_hck_flags = 0; 7179 7180 nexthdr = ip6h->ip6_nxt; 7181 7182 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7183 (uchar_t *)ip6h); 7184 whereptr = (uint8_t *)&ip6h[1]; 7185 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7186 7187 /* Process hop by hop header options */ 7188 if (nexthdr == IPPROTO_HOPOPTS) { 7189 uint_t ehdrlen; 7190 uint8_t *optptr; 7191 7192 if (remlen < MIN_EHDR_LEN) 7193 goto pkt_too_short; 7194 if (mp->b_cont != NULL && 7195 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7196 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7197 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7198 freemsg(hada_mp); 7199 freemsg(first_mp); 7200 return; 7201 } 7202 ip6h = (ip6_t *)mp->b_rptr; 7203 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7204 } 7205 hbhhdr = (ip6_hbh_t *)whereptr; 7206 nexthdr = hbhhdr->ip6h_nxt; 7207 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7208 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7209 7210 if (remlen < ehdrlen) 7211 goto pkt_too_short; 7212 if (mp->b_cont != NULL && 7213 whereptr + ehdrlen > mp->b_wptr) { 7214 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7215 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7216 freemsg(hada_mp); 7217 freemsg(first_mp); 7218 return; 7219 } 7220 ip6h = (ip6_t *)mp->b_rptr; 7221 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7222 hbhhdr = (ip6_hbh_t *)whereptr; 7223 } 7224 7225 optptr = whereptr + 2; 7226 whereptr += ehdrlen; 7227 remlen -= ehdrlen; 7228 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7229 ehdrlen - 2, IPPROTO_HOPOPTS)) { 7230 case -1: 7231 /* 7232 * Packet has been consumed and any 7233 * needed ICMP messages sent. 7234 */ 7235 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7236 freemsg(hada_mp); 7237 return; 7238 case 0: 7239 /* no action needed */ 7240 break; 7241 case 1: 7242 /* Known router alert */ 7243 goto ipv6forus; 7244 } 7245 } 7246 7247 /* 7248 * Attach any necessary label information to this packet. 7249 */ 7250 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7251 if (ip6opt_ls != 0) 7252 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7253 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7254 freemsg(hada_mp); 7255 freemsg(first_mp); 7256 return; 7257 } 7258 7259 /* 7260 * On incoming v6 multicast packets we will bypass the ire table, 7261 * and assume that the read queue corresponds to the targetted 7262 * interface. 7263 * 7264 * The effect of this is the same as the IPv4 original code, but is 7265 * much cleaner I think. See ip_rput for how that was done. 7266 */ 7267 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7268 BUMP_MIB(ill->ill_ip6_mib, ipv6InMcastPkts); 7269 /* 7270 * XXX TODO Give to mrouted to for multicast forwarding. 7271 */ 7272 ILM_WALKER_HOLD(ill); 7273 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7274 ILM_WALKER_RELE(ill); 7275 if (ilm == NULL) { 7276 if (ip_debug > 3) { 7277 /* ip2dbg */ 7278 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7279 " which is not for us: %s\n", AF_INET6, 7280 &ip6h->ip6_dst); 7281 } 7282 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7283 freemsg(hada_mp); 7284 freemsg(first_mp); 7285 return; 7286 } 7287 if (ip_debug > 3) { 7288 /* ip2dbg */ 7289 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7290 AF_INET6, &ip6h->ip6_dst); 7291 } 7292 rq = ill->ill_rq; 7293 zoneid = GLOBAL_ZONEID; 7294 goto ipv6forus; 7295 } 7296 7297 ipif = ill->ill_ipif; 7298 7299 /* 7300 * If a packet was received on an interface that is a 6to4 tunnel, 7301 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7302 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7303 * the 6to4 prefix of the address configured on the receiving interface. 7304 * Otherwise, the packet was delivered to this interface in error and 7305 * the packet must be dropped. 7306 */ 7307 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7308 7309 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7310 &ip6h->ip6_dst)) { 7311 if (ip_debug > 2) { 7312 /* ip1dbg */ 7313 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7314 "addressed packet which is not for us: " 7315 "%s\n", AF_INET6, &ip6h->ip6_dst); 7316 } 7317 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7318 freemsg(first_mp); 7319 return; 7320 } 7321 } 7322 7323 /* 7324 * Find an ire that matches destination. For link-local addresses 7325 * we have to match the ill. 7326 * TBD for site local addresses. 7327 */ 7328 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7329 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7330 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7331 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7332 } else { 7333 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7334 MBLK_GETLABEL(mp)); 7335 } 7336 if (ire == NULL) { 7337 /* 7338 * No matching IRE found. Mark this packet as having 7339 * originated externally. 7340 */ 7341 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7342 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7343 if (!(ill->ill_flags & ILLF_ROUTER)) 7344 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7345 freemsg(hada_mp); 7346 freemsg(first_mp); 7347 return; 7348 } 7349 if (ip6h->ip6_hops <= 1) { 7350 if (hada_mp != NULL) 7351 goto hada_drop; 7352 icmp_time_exceeded_v6(WR(q), first_mp, 7353 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7354 return; 7355 } 7356 /* 7357 * Per RFC 3513 section 2.5.2, we must not forward packets with 7358 * an unspecified source address. 7359 */ 7360 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7361 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7362 freemsg(hada_mp); 7363 freemsg(first_mp); 7364 return; 7365 } 7366 mp->b_prev = (mblk_t *)(uintptr_t) 7367 ill->ill_phyint->phyint_ifindex; 7368 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7369 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7370 ALL_ZONES); 7371 return; 7372 } 7373 ipif_id = ire->ire_ipif->ipif_seqid; 7374 /* we have a matching IRE */ 7375 if (ire->ire_stq != NULL) { 7376 ill_group_t *ill_group; 7377 ill_group_t *ire_group; 7378 7379 /* 7380 * To be quicker, we may wish not to chase pointers 7381 * (ire->ire_ipif->ipif_ill...) and instead store the 7382 * forwarding policy in the ire. An unfortunate side- 7383 * effect of this would be requiring an ire flush whenever 7384 * the ILLF_ROUTER flag changes. For now, chase pointers 7385 * once and store in the boolean no_forward. 7386 * 7387 * This appears twice to keep it out of the non-forwarding, 7388 * yes-it's-for-us-on-the-right-interface case. 7389 */ 7390 no_forward = ((ill->ill_flags & 7391 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7392 7393 7394 ASSERT(first_mp == mp); 7395 /* 7396 * This ire has a send-to queue - forward the packet. 7397 */ 7398 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7399 freemsg(hada_mp); 7400 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7401 if (no_forward) 7402 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7403 freemsg(mp); 7404 ire_refrele(ire); 7405 return; 7406 } 7407 if (ip6h->ip6_hops <= 1) { 7408 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7409 icmp_time_exceeded_v6(WR(q), mp, 7410 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7411 ire_refrele(ire); 7412 return; 7413 } 7414 /* 7415 * Per RFC 3513 section 2.5.2, we must not forward packets with 7416 * an unspecified source address. 7417 */ 7418 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7419 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7420 freemsg(mp); 7421 ire_refrele(ire); 7422 return; 7423 } 7424 7425 if (is_system_labeled()) { 7426 mblk_t *mp1; 7427 7428 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7429 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7430 freemsg(mp); 7431 ire_refrele(ire); 7432 return; 7433 } 7434 /* Size may have changed */ 7435 mp = mp1; 7436 ip6h = (ip6_t *)mp->b_rptr; 7437 pkt_len = msgdsize(mp); 7438 } 7439 7440 if (pkt_len > ire->ire_max_frag) { 7441 BUMP_MIB(ill->ill_ip6_mib, ipv6InTooBigErrors); 7442 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7443 ll_multicast, B_TRUE); 7444 ire_refrele(ire); 7445 return; 7446 } 7447 7448 /* 7449 * Check to see if we're forwarding the packet to a 7450 * different link from which it came. If so, check the 7451 * source and destination addresses since routers must not 7452 * forward any packets with link-local source or 7453 * destination addresses to other links. Otherwise (if 7454 * we're forwarding onto the same link), conditionally send 7455 * a redirect message. 7456 */ 7457 ill_group = ill->ill_group; 7458 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7459 if (ire->ire_rfq != q && (ill_group == NULL || 7460 ill_group != ire_group)) { 7461 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7462 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7463 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7464 freemsg(mp); 7465 ire_refrele(ire); 7466 return; 7467 } 7468 /* TBD add site-local check at site boundary? */ 7469 } else if (ipv6_send_redirects) { 7470 in6_addr_t *v6targ; 7471 in6_addr_t gw_addr_v6; 7472 ire_t *src_ire_v6 = NULL; 7473 7474 /* 7475 * Don't send a redirect when forwarding a source 7476 * routed packet. 7477 */ 7478 if (ip_source_routed_v6(ip6h, mp)) 7479 goto forward; 7480 7481 mutex_enter(&ire->ire_lock); 7482 gw_addr_v6 = ire->ire_gateway_addr_v6; 7483 mutex_exit(&ire->ire_lock); 7484 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7485 v6targ = &gw_addr_v6; 7486 /* 7487 * We won't send redirects to a router 7488 * that doesn't have a link local 7489 * address, but will forward. 7490 */ 7491 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7492 BUMP_MIB(ill->ill_ip6_mib, 7493 ipv6InAddrErrors); 7494 goto forward; 7495 } 7496 } else { 7497 v6targ = &ip6h->ip6_dst; 7498 } 7499 7500 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7501 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7502 ALL_ZONES, 0, NULL, 7503 MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7504 7505 if (src_ire_v6 != NULL) { 7506 /* 7507 * The source is directly connected. 7508 */ 7509 mp1 = copymsg(mp); 7510 if (mp1 != NULL) { 7511 icmp_send_redirect_v6(WR(q), 7512 mp1, v6targ, &ip6h->ip6_dst, 7513 ill, B_FALSE); 7514 } 7515 ire_refrele(src_ire_v6); 7516 } 7517 } 7518 7519 forward: 7520 /* Hoplimit verified above */ 7521 ip6h->ip6_hops--; 7522 UPDATE_IB_PKT_COUNT(ire); 7523 ire->ire_last_used_time = lbolt; 7524 BUMP_MIB(ill->ill_ip6_mib, ipv6OutForwDatagrams); 7525 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7526 IRE_REFRELE(ire); 7527 return; 7528 } 7529 rq = ire->ire_rfq; 7530 7531 /* 7532 * Need to put on correct queue for reassembly to find it. 7533 * No need to use put() since reassembly has its own locks. 7534 * Note: multicast packets and packets destined to addresses 7535 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7536 * the arriving ill. 7537 */ 7538 if (rq != q) { 7539 boolean_t check_multi = B_TRUE; 7540 ill_group_t *ill_group = NULL; 7541 ill_group_t *ire_group = NULL; 7542 ill_t *ire_ill = NULL; 7543 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7544 7545 /* 7546 * To be quicker, we may wish not to chase pointers 7547 * (ire->ire_ipif->ipif_ill...) and instead store the 7548 * forwarding policy in the ire. An unfortunate side- 7549 * effect of this would be requiring an ire flush whenever 7550 * the ILLF_ROUTER flag changes. For now, chase pointers 7551 * once and store in the boolean no_forward. 7552 */ 7553 no_forward = ((ill->ill_flags & 7554 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7555 7556 ill_group = ill->ill_group; 7557 if (rq != NULL) { 7558 ire_ill = (ill_t *)(rq->q_ptr); 7559 ire_group = ire_ill->ill_group; 7560 } 7561 7562 /* 7563 * If it's part of the same IPMP group, or if it's a legal 7564 * address on the 'usesrc' interface, then bypass strict 7565 * checks. 7566 */ 7567 if (ill_group != NULL && ill_group == ire_group) { 7568 check_multi = B_FALSE; 7569 } else if (ill_ifindex != 0 && ire_ill != NULL && 7570 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7571 check_multi = B_FALSE; 7572 } 7573 7574 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7575 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7576 /* 7577 * This packet came in on an interface other than the 7578 * one associated with the destination address 7579 * and we are strict about matches. 7580 * 7581 * As long as the ills belong to the same group, 7582 * we don't consider them to arriving on the wrong 7583 * interface. Thus, when the switch is doing inbound 7584 * load spreading, we won't drop packets when we 7585 * are doing strict multihoming checks. 7586 */ 7587 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7588 freemsg(hada_mp); 7589 freemsg(first_mp); 7590 ire_refrele(ire); 7591 return; 7592 } 7593 7594 if (rq != NULL) 7595 q = rq; 7596 7597 ill = (ill_t *)q->q_ptr; 7598 ASSERT(ill); 7599 } 7600 7601 zoneid = ire->ire_zoneid; 7602 UPDATE_IB_PKT_COUNT(ire); 7603 ire->ire_last_used_time = lbolt; 7604 /* Don't use the ire after this point. */ 7605 ire_refrele(ire); 7606 ipv6forus: 7607 /* 7608 * Looks like this packet is for us one way or another. 7609 * This is where we'll process destination headers etc. 7610 */ 7611 for (; ; ) { 7612 switch (nexthdr) { 7613 case IPPROTO_TCP: { 7614 uint16_t *up; 7615 uint32_t sum; 7616 int offset; 7617 7618 hdr_len = pkt_len - remlen; 7619 7620 if (hada_mp != NULL) { 7621 ip0dbg(("tcp hada drop\n")); 7622 goto hada_drop; 7623 } 7624 7625 7626 /* TCP needs all of the TCP header */ 7627 if (remlen < TCP_MIN_HEADER_LENGTH) 7628 goto pkt_too_short; 7629 if (mp->b_cont != NULL && 7630 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7631 if (!pullupmsg(mp, 7632 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7633 BUMP_MIB(ill->ill_ip6_mib, 7634 ipv6InDiscards); 7635 freemsg(first_mp); 7636 return; 7637 } 7638 hck_flags = 0; 7639 ip6h = (ip6_t *)mp->b_rptr; 7640 whereptr = (uint8_t *)ip6h + hdr_len; 7641 } 7642 /* 7643 * Extract the offset field from the TCP header. 7644 */ 7645 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7646 if (offset != 5) { 7647 if (offset < 5) { 7648 ip1dbg(("ip_rput_data_v6: short " 7649 "TCP data offset")); 7650 BUMP_MIB(ill->ill_ip6_mib, 7651 ipv6InDiscards); 7652 freemsg(first_mp); 7653 return; 7654 } 7655 /* 7656 * There must be TCP options. 7657 * Make sure we can grab them. 7658 */ 7659 offset <<= 2; 7660 if (remlen < offset) 7661 goto pkt_too_short; 7662 if (mp->b_cont != NULL && 7663 whereptr + offset > mp->b_wptr) { 7664 if (!pullupmsg(mp, 7665 hdr_len + offset)) { 7666 BUMP_MIB(ill->ill_ip6_mib, 7667 ipv6InDiscards); 7668 freemsg(first_mp); 7669 return; 7670 } 7671 hck_flags = 0; 7672 ip6h = (ip6_t *)mp->b_rptr; 7673 whereptr = (uint8_t *)ip6h + hdr_len; 7674 } 7675 } 7676 7677 up = (uint16_t *)&ip6h->ip6_src; 7678 /* 7679 * TCP checksum calculation. First sum up the 7680 * pseudo-header fields: 7681 * - Source IPv6 address 7682 * - Destination IPv6 address 7683 * - TCP payload length 7684 * - TCP protocol ID 7685 */ 7686 sum = htons(IPPROTO_TCP + remlen) + 7687 up[0] + up[1] + up[2] + up[3] + 7688 up[4] + up[5] + up[6] + up[7] + 7689 up[8] + up[9] + up[10] + up[11] + 7690 up[12] + up[13] + up[14] + up[15]; 7691 7692 /* Fold initial sum */ 7693 sum = (sum & 0xffff) + (sum >> 16); 7694 7695 mp1 = mp->b_cont; 7696 7697 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7698 IP6_STAT(ip6_in_sw_cksum); 7699 7700 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7701 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7702 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7703 mp, mp1, cksum_err); 7704 7705 if (cksum_err) { 7706 BUMP_MIB(&ip_mib, tcpInErrs); 7707 7708 if (hck_flags & HCK_FULLCKSUM) 7709 IP6_STAT(ip6_tcp_in_full_hw_cksum_err); 7710 else if (hck_flags & HCK_PARTIALCKSUM) 7711 IP6_STAT(ip6_tcp_in_part_hw_cksum_err); 7712 else 7713 IP6_STAT(ip6_tcp_in_sw_cksum_err); 7714 7715 freemsg(first_mp); 7716 return; 7717 } 7718 tcp_fanout: 7719 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7720 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7721 IP_FF_IP6INFO), hdr_len, mctl_present, zoneid); 7722 return; 7723 } 7724 case IPPROTO_SCTP: 7725 { 7726 sctp_hdr_t *sctph; 7727 uint32_t calcsum, pktsum; 7728 uint_t hdr_len = pkt_len - remlen; 7729 7730 /* SCTP needs all of the SCTP header */ 7731 if (remlen < sizeof (*sctph)) { 7732 goto pkt_too_short; 7733 } 7734 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7735 ASSERT(mp->b_cont != NULL); 7736 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7737 BUMP_MIB(ill->ill_ip6_mib, 7738 ipv6InDiscards); 7739 freemsg(mp); 7740 return; 7741 } 7742 ip6h = (ip6_t *)mp->b_rptr; 7743 whereptr = (uint8_t *)ip6h + hdr_len; 7744 } 7745 7746 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7747 /* checksum */ 7748 pktsum = sctph->sh_chksum; 7749 sctph->sh_chksum = 0; 7750 calcsum = sctp_cksum(mp, hdr_len); 7751 if (calcsum != pktsum) { 7752 BUMP_MIB(&sctp_mib, sctpChecksumError); 7753 freemsg(mp); 7754 return; 7755 } 7756 sctph->sh_chksum = pktsum; 7757 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7758 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7759 ports, ipif_id, zoneid, mp)) == NULL) { 7760 ip_fanout_sctp_raw(first_mp, ill, 7761 (ipha_t *)ip6h, B_FALSE, ports, 7762 mctl_present, 7763 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), 7764 B_TRUE, ipif_id, zoneid); 7765 return; 7766 } 7767 BUMP_MIB(&ip_mib, ipInDelivers); 7768 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7769 B_FALSE, mctl_present); 7770 return; 7771 } 7772 case IPPROTO_UDP: { 7773 uint16_t *up; 7774 uint32_t sum; 7775 7776 hdr_len = pkt_len - remlen; 7777 7778 if (hada_mp != NULL) { 7779 ip0dbg(("udp hada drop\n")); 7780 goto hada_drop; 7781 } 7782 7783 /* Verify that at least the ports are present */ 7784 if (remlen < UDPH_SIZE) 7785 goto pkt_too_short; 7786 if (mp->b_cont != NULL && 7787 whereptr + UDPH_SIZE > mp->b_wptr) { 7788 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7789 BUMP_MIB(ill->ill_ip6_mib, 7790 ipv6InDiscards); 7791 freemsg(first_mp); 7792 return; 7793 } 7794 hck_flags = 0; 7795 ip6h = (ip6_t *)mp->b_rptr; 7796 whereptr = (uint8_t *)ip6h + hdr_len; 7797 } 7798 7799 /* 7800 * Before going through the regular checksum 7801 * calculation, make sure the received checksum 7802 * is non-zero. RFC 2460 says, a 0x0000 checksum 7803 * in a UDP packet (within IPv6 packet) is invalid 7804 * and should be replaced by 0xffff. This makes 7805 * sense as regular checksum calculation will 7806 * pass for both the cases i.e. 0x0000 and 0xffff. 7807 * Removing one of the case makes error detection 7808 * stronger. 7809 */ 7810 7811 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7812 /* 0x0000 checksum is invalid */ 7813 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7814 "checksum value 0x0000\n")); 7815 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7816 freemsg(first_mp); 7817 return; 7818 } 7819 7820 up = (uint16_t *)&ip6h->ip6_src; 7821 7822 /* 7823 * UDP checksum calculation. First sum up the 7824 * pseudo-header fields: 7825 * - Source IPv6 address 7826 * - Destination IPv6 address 7827 * - UDP payload length 7828 * - UDP protocol ID 7829 */ 7830 7831 sum = htons(IPPROTO_UDP + remlen) + 7832 up[0] + up[1] + up[2] + up[3] + 7833 up[4] + up[5] + up[6] + up[7] + 7834 up[8] + up[9] + up[10] + up[11] + 7835 up[12] + up[13] + up[14] + up[15]; 7836 7837 /* Fold initial sum */ 7838 sum = (sum & 0xffff) + (sum >> 16); 7839 7840 if (reass_hck_flags != 0) { 7841 hck_flags = reass_hck_flags; 7842 7843 IP_CKSUM_RECV_REASS(hck_flags, 7844 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7845 sum, reass_sum, cksum_err); 7846 } else { 7847 mp1 = mp->b_cont; 7848 7849 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7850 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7851 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7852 mp, mp1, cksum_err); 7853 } 7854 7855 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7856 IP6_STAT(ip6_in_sw_cksum); 7857 7858 if (cksum_err) { 7859 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7860 7861 if (hck_flags & HCK_FULLCKSUM) 7862 IP6_STAT(ip6_udp_in_full_hw_cksum_err); 7863 else if (hck_flags & HCK_PARTIALCKSUM) 7864 IP6_STAT(ip6_udp_in_part_hw_cksum_err); 7865 else 7866 IP6_STAT(ip6_udp_in_sw_cksum_err); 7867 7868 freemsg(first_mp); 7869 return; 7870 } 7871 goto udp_fanout; 7872 } 7873 case IPPROTO_ICMPV6: { 7874 uint16_t *up; 7875 uint32_t sum; 7876 uint_t hdr_len = pkt_len - remlen; 7877 7878 if (hada_mp != NULL) { 7879 ip0dbg(("icmp hada drop\n")); 7880 goto hada_drop; 7881 } 7882 7883 up = (uint16_t *)&ip6h->ip6_src; 7884 sum = htons(IPPROTO_ICMPV6 + remlen) + 7885 up[0] + up[1] + up[2] + up[3] + 7886 up[4] + up[5] + up[6] + up[7] + 7887 up[8] + up[9] + up[10] + up[11] + 7888 up[12] + up[13] + up[14] + up[15]; 7889 sum = (sum & 0xffff) + (sum >> 16); 7890 sum = IP_CSUM(mp, hdr_len, sum); 7891 if (sum != 0) { 7892 /* IPv6 ICMP checksum failed */ 7893 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7894 "failed %x\n", 7895 sum)); 7896 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7897 BUMP_MIB(ill->ill_icmp6_mib, 7898 ipv6IfIcmpInErrors); 7899 freemsg(first_mp); 7900 return; 7901 } 7902 7903 icmp_fanout: 7904 /* Check variable for testing applications */ 7905 if (ipv6_drop_inbound_icmpv6) { 7906 freemsg(first_mp); 7907 return; 7908 } 7909 /* 7910 * Assume that there is always at least one conn for 7911 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7912 * where there is no conn. 7913 */ 7914 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7915 ASSERT(!(ill->ill_phyint->phyint_flags & 7916 PHYI_LOOPBACK)); 7917 /* 7918 * In the multicast case, applications may have 7919 * joined the group from different zones, so we 7920 * need to deliver the packet to each of them. 7921 * Loop through the multicast memberships 7922 * structures (ilm) on the receive ill and send 7923 * a copy of the packet up each matching one. 7924 */ 7925 ILM_WALKER_HOLD(ill); 7926 for (ilm = ill->ill_ilm; ilm != NULL; 7927 ilm = ilm->ilm_next) { 7928 if (ilm->ilm_flags & ILM_DELETED) 7929 continue; 7930 if (!IN6_ARE_ADDR_EQUAL( 7931 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7932 continue; 7933 if (!ipif_lookup_zoneid(ill, 7934 ilm->ilm_zoneid, IPIF_UP, NULL)) 7935 continue; 7936 7937 first_mp1 = ip_copymsg(first_mp); 7938 if (first_mp1 == NULL) 7939 continue; 7940 icmp_inbound_v6(q, first_mp1, ill, 7941 hdr_len, mctl_present, 0, 7942 ilm->ilm_zoneid); 7943 } 7944 ILM_WALKER_RELE(ill); 7945 } else { 7946 first_mp1 = ip_copymsg(first_mp); 7947 if (first_mp1 != NULL) 7948 icmp_inbound_v6(q, first_mp1, ill, 7949 hdr_len, mctl_present, 0, zoneid); 7950 } 7951 } 7952 /* FALLTHRU */ 7953 default: { 7954 /* 7955 * Handle protocols with which IPv6 is less intimate. 7956 */ 7957 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IP6INFO; 7958 7959 if (hada_mp != NULL) { 7960 ip0dbg(("default hada drop\n")); 7961 goto hada_drop; 7962 } 7963 7964 /* 7965 * Enable sending ICMP for "Unknown" nexthdr 7966 * case. i.e. where we did not FALLTHRU from 7967 * IPPROTO_ICMPV6 processing case above. 7968 * If we did FALLTHRU, then the packet has already been 7969 * processed for IPPF, don't process it again in 7970 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7971 * flags 7972 */ 7973 if (nexthdr != IPPROTO_ICMPV6) 7974 proto_flags |= IP_FF_SEND_ICMP; 7975 else 7976 proto_flags |= IP6_NO_IPPOLICY; 7977 7978 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7979 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7980 mctl_present, zoneid); 7981 return; 7982 } 7983 7984 case IPPROTO_DSTOPTS: { 7985 uint_t ehdrlen; 7986 uint8_t *optptr; 7987 ip6_dest_t *desthdr; 7988 7989 /* Check if AH is present. */ 7990 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7991 ire, hada_mp, zoneid)) { 7992 ip0dbg(("dst early hada drop\n")); 7993 return; 7994 } 7995 7996 /* 7997 * Reinitialize pointers, as ipsec_early_ah_v6() does 7998 * complete pullups. We don't have to do more pullups 7999 * as a result. 8000 */ 8001 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8002 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8003 ip6h = (ip6_t *)mp->b_rptr; 8004 8005 if (remlen < MIN_EHDR_LEN) 8006 goto pkt_too_short; 8007 8008 desthdr = (ip6_dest_t *)whereptr; 8009 nexthdr = desthdr->ip6d_nxt; 8010 prev_nexthdr_offset = (uint_t)(whereptr - 8011 (uint8_t *)ip6h); 8012 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8013 if (remlen < ehdrlen) 8014 goto pkt_too_short; 8015 optptr = whereptr + 2; 8016 /* 8017 * Note: XXX This code does not seem to make 8018 * distinction between Destination Options Header 8019 * being before/after Routing Header which can 8020 * happen if we are at the end of source route. 8021 * This may become significant in future. 8022 * (No real significant Destination Options are 8023 * defined/implemented yet ). 8024 */ 8025 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8026 ehdrlen - 2, IPPROTO_DSTOPTS)) { 8027 case -1: 8028 /* 8029 * Packet has been consumed and any needed 8030 * ICMP errors sent. 8031 */ 8032 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8033 freemsg(hada_mp); 8034 return; 8035 case 0: 8036 /* No action needed continue */ 8037 break; 8038 case 1: 8039 /* 8040 * Unnexpected return value 8041 * (Router alert is a Hop-by-Hop option) 8042 */ 8043 #ifdef DEBUG 8044 panic("ip_rput_data_v6: router " 8045 "alert hbh opt indication in dest opt"); 8046 /*NOTREACHED*/ 8047 #else 8048 freemsg(hada_mp); 8049 freemsg(first_mp); 8050 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8051 return; 8052 #endif 8053 } 8054 used = ehdrlen; 8055 break; 8056 } 8057 case IPPROTO_FRAGMENT: { 8058 ip6_frag_t *fraghdr; 8059 size_t no_frag_hdr_len; 8060 8061 if (hada_mp != NULL) { 8062 ip0dbg(("frag hada drop\n")); 8063 goto hada_drop; 8064 } 8065 8066 ASSERT(first_mp == mp); 8067 if (remlen < sizeof (ip6_frag_t)) 8068 goto pkt_too_short; 8069 8070 if (mp->b_cont != NULL && 8071 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8072 if (!pullupmsg(mp, 8073 pkt_len - remlen + sizeof (ip6_frag_t))) { 8074 BUMP_MIB(ill->ill_ip6_mib, 8075 ipv6InDiscards); 8076 freemsg(mp); 8077 return; 8078 } 8079 hck_flags = 0; 8080 ip6h = (ip6_t *)mp->b_rptr; 8081 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8082 } 8083 8084 fraghdr = (ip6_frag_t *)whereptr; 8085 used = (uint_t)sizeof (ip6_frag_t); 8086 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmReqds); 8087 8088 /* 8089 * Invoke the CGTP (multirouting) filtering module to 8090 * process the incoming packet. Packets identified as 8091 * duplicates must be discarded. Filtering is active 8092 * only if the the ip_cgtp_filter ndd variable is 8093 * non-zero. 8094 */ 8095 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 8096 int cgtp_flt_pkt = 8097 ip_cgtp_filter_ops->cfo_filter_v6( 8098 inill->ill_rq, ip6h, fraghdr); 8099 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8100 freemsg(mp); 8101 return; 8102 } 8103 } 8104 8105 /* Restore the flags */ 8106 DB_CKSUMFLAGS(mp) = hck_flags; 8107 8108 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8109 remlen - used, &prev_nexthdr_offset, 8110 &reass_sum, &reass_hck_flags); 8111 if (mp == NULL) { 8112 /* Reassembly is still pending */ 8113 return; 8114 } 8115 /* The first mblk are the headers before the frag hdr */ 8116 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmOKs); 8117 8118 first_mp = mp; /* mp has most likely changed! */ 8119 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8120 ip6h = (ip6_t *)mp->b_rptr; 8121 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8122 whereptr = mp->b_rptr + no_frag_hdr_len; 8123 remlen = ntohs(ip6h->ip6_plen) + 8124 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8125 pkt_len = msgdsize(mp); 8126 used = 0; 8127 break; 8128 } 8129 case IPPROTO_HOPOPTS: 8130 if (hada_mp != NULL) { 8131 ip0dbg(("hop hada drop\n")); 8132 goto hada_drop; 8133 } 8134 /* 8135 * Illegal header sequence. 8136 * (Hop-by-hop headers are processed above 8137 * and required to immediately follow IPv6 header) 8138 */ 8139 icmp_param_problem_v6(WR(q), first_mp, 8140 ICMP6_PARAMPROB_NEXTHEADER, 8141 prev_nexthdr_offset, 8142 B_FALSE, B_FALSE); 8143 return; 8144 8145 case IPPROTO_ROUTING: { 8146 uint_t ehdrlen; 8147 ip6_rthdr_t *rthdr; 8148 8149 /* Check if AH is present. */ 8150 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8151 ire, hada_mp, zoneid)) { 8152 ip0dbg(("routing hada drop\n")); 8153 return; 8154 } 8155 8156 /* 8157 * Reinitialize pointers, as ipsec_early_ah_v6() does 8158 * complete pullups. We don't have to do more pullups 8159 * as a result. 8160 */ 8161 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8162 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8163 ip6h = (ip6_t *)mp->b_rptr; 8164 8165 if (remlen < MIN_EHDR_LEN) 8166 goto pkt_too_short; 8167 rthdr = (ip6_rthdr_t *)whereptr; 8168 nexthdr = rthdr->ip6r_nxt; 8169 prev_nexthdr_offset = (uint_t)(whereptr - 8170 (uint8_t *)ip6h); 8171 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8172 if (remlen < ehdrlen) 8173 goto pkt_too_short; 8174 if (rthdr->ip6r_segleft != 0) { 8175 /* Not end of source route */ 8176 if (ll_multicast) { 8177 BUMP_MIB(ill->ill_ip6_mib, 8178 ipv6ForwProhibits); 8179 freemsg(hada_mp); 8180 freemsg(mp); 8181 return; 8182 } 8183 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8184 flags, hada_mp); 8185 return; 8186 } 8187 used = ehdrlen; 8188 break; 8189 } 8190 case IPPROTO_AH: 8191 case IPPROTO_ESP: { 8192 /* 8193 * Fast path for AH/ESP. If this is the first time 8194 * we are sending a datagram to AH/ESP, allocate 8195 * a IPSEC_IN message and prepend it. Otherwise, 8196 * just fanout. 8197 */ 8198 8199 ipsec_in_t *ii; 8200 int ipsec_rc; 8201 8202 if (!mctl_present) { 8203 ASSERT(first_mp == mp); 8204 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 8205 NULL) { 8206 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8207 "allocation failure.\n")); 8208 BUMP_MIB(ill->ill_ip6_mib, 8209 ipv6InDiscards); 8210 freemsg(mp); 8211 return; 8212 } 8213 /* 8214 * Store the ill_index so that when we come back 8215 * from IPSEC we ride on the same queue. 8216 */ 8217 ii = (ipsec_in_t *)first_mp->b_rptr; 8218 ii->ipsec_in_ill_index = 8219 ill->ill_phyint->phyint_ifindex; 8220 ii->ipsec_in_rill_index = 8221 ii->ipsec_in_ill_index; 8222 first_mp->b_cont = mp; 8223 /* 8224 * Cache hardware acceleration info. 8225 */ 8226 if (hada_mp != NULL) { 8227 IPSECHW_DEBUG(IPSECHW_PKT, 8228 ("ip_rput_data_v6: " 8229 "caching data attr.\n")); 8230 ii->ipsec_in_accelerated = B_TRUE; 8231 ii->ipsec_in_da = hada_mp; 8232 hada_mp = NULL; 8233 } 8234 } else { 8235 ii = (ipsec_in_t *)first_mp->b_rptr; 8236 } 8237 8238 if (!ipsec_loaded()) { 8239 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8240 ire->ire_zoneid); 8241 return; 8242 } 8243 8244 /* select inbound SA and have IPsec process the pkt */ 8245 if (nexthdr == IPPROTO_ESP) { 8246 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 8247 if (esph == NULL) 8248 return; 8249 ASSERT(ii->ipsec_in_esp_sa != NULL); 8250 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8251 NULL); 8252 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8253 first_mp, esph); 8254 } else { 8255 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 8256 if (ah == NULL) 8257 return; 8258 ASSERT(ii->ipsec_in_ah_sa != NULL); 8259 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8260 NULL); 8261 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8262 first_mp, ah); 8263 } 8264 8265 switch (ipsec_rc) { 8266 case IPSEC_STATUS_SUCCESS: 8267 break; 8268 case IPSEC_STATUS_FAILED: 8269 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8270 /* FALLTHRU */ 8271 case IPSEC_STATUS_PENDING: 8272 return; 8273 } 8274 /* we're done with IPsec processing, send it up */ 8275 ip_fanout_proto_again(first_mp, ill, inill, ire); 8276 return; 8277 } 8278 case IPPROTO_NONE: 8279 /* All processing is done. Count as "delivered". */ 8280 freemsg(hada_mp); 8281 freemsg(first_mp); 8282 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8283 return; 8284 } 8285 whereptr += used; 8286 ASSERT(remlen >= used); 8287 remlen -= used; 8288 } 8289 /* NOTREACHED */ 8290 8291 pkt_too_short: 8292 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8293 ip6_len, pkt_len, remlen)); 8294 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 8295 freemsg(hada_mp); 8296 freemsg(first_mp); 8297 return; 8298 udp_fanout: 8299 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8300 connp = NULL; 8301 } else { 8302 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8303 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8304 CONN_DEC_REF(connp); 8305 connp = NULL; 8306 } 8307 } 8308 8309 if (connp == NULL) { 8310 uint32_t ports; 8311 8312 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8313 UDP_PORTS_OFFSET); 8314 IP6_STAT(ip6_udp_slow_path); 8315 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8316 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), mctl_present, 8317 zoneid); 8318 return; 8319 } 8320 8321 if (CONN_UDP_FLOWCTLD(connp)) { 8322 freemsg(first_mp); 8323 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 8324 CONN_DEC_REF(connp); 8325 return; 8326 } 8327 8328 /* Initiate IPPF processing */ 8329 if (IP6_IN_IPP(flags)) { 8330 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8331 if (mp == NULL) { 8332 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8333 CONN_DEC_REF(connp); 8334 return; 8335 } 8336 } 8337 8338 if (connp->conn_ipv6_recvpktinfo || 8339 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8340 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8341 if (mp == NULL) { 8342 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8343 CONN_DEC_REF(connp); 8344 return; 8345 } 8346 } 8347 8348 IP6_STAT(ip6_udp_fast_path); 8349 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 8350 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8351 8352 /* Send it upstream */ 8353 CONN_UDP_RECV(connp, mp); 8354 8355 CONN_DEC_REF(connp); 8356 freemsg(hada_mp); 8357 return; 8358 8359 hada_drop: 8360 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8361 /* IPsec kstats: bump counter here */ 8362 freemsg(hada_mp); 8363 freemsg(first_mp); 8364 } 8365 8366 /* 8367 * Reassemble fragment. 8368 * When it returns a completed message the first mblk will only contain 8369 * the headers prior to the fragment header. 8370 * 8371 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8372 * of the preceding header. This is needed to patch the previous header's 8373 * nexthdr field when reassembly completes. 8374 */ 8375 static mblk_t * 8376 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8377 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8378 uint32_t *cksum_val, uint16_t *cksum_flags) 8379 { 8380 ill_t *ill = (ill_t *)q->q_ptr; 8381 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8382 uint16_t offset; 8383 boolean_t more_frags; 8384 uint8_t nexthdr = fraghdr->ip6f_nxt; 8385 in6_addr_t *v6dst_ptr; 8386 in6_addr_t *v6src_ptr; 8387 uint_t end; 8388 uint_t hdr_length; 8389 size_t count; 8390 ipf_t *ipf; 8391 ipf_t **ipfp; 8392 ipfb_t *ipfb; 8393 mblk_t *mp1; 8394 uint8_t ecn_info = 0; 8395 size_t msg_len; 8396 mblk_t *tail_mp; 8397 mblk_t *t_mp; 8398 boolean_t pruned = B_FALSE; 8399 uint32_t sum_val; 8400 uint16_t sum_flags; 8401 8402 8403 if (cksum_val != NULL) 8404 *cksum_val = 0; 8405 if (cksum_flags != NULL) 8406 *cksum_flags = 0; 8407 8408 /* 8409 * We utilize hardware computed checksum info only for UDP since 8410 * IP fragmentation is a normal occurence for the protocol. In 8411 * addition, checksum offload support for IP fragments carrying 8412 * UDP payload is commonly implemented across network adapters. 8413 */ 8414 ASSERT(ill != NULL); 8415 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8416 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8417 mblk_t *mp1 = mp->b_cont; 8418 int32_t len; 8419 8420 /* Record checksum information from the packet */ 8421 sum_val = (uint32_t)DB_CKSUM16(mp); 8422 sum_flags = DB_CKSUMFLAGS(mp); 8423 8424 /* fragmented payload offset from beginning of mblk */ 8425 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8426 8427 if ((sum_flags & HCK_PARTIALCKSUM) && 8428 (mp1 == NULL || mp1->b_cont == NULL) && 8429 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8430 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8431 uint32_t adj; 8432 /* 8433 * Partial checksum has been calculated by hardware 8434 * and attached to the packet; in addition, any 8435 * prepended extraneous data is even byte aligned. 8436 * If any such data exists, we adjust the checksum; 8437 * this would also handle any postpended data. 8438 */ 8439 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8440 mp, mp1, len, adj); 8441 8442 /* One's complement subtract extraneous checksum */ 8443 if (adj >= sum_val) 8444 sum_val = ~(adj - sum_val) & 0xFFFF; 8445 else 8446 sum_val -= adj; 8447 } 8448 } else { 8449 sum_val = 0; 8450 sum_flags = 0; 8451 } 8452 8453 /* Clear hardware checksumming flag */ 8454 DB_CKSUMFLAGS(mp) = 0; 8455 8456 /* 8457 * Note: Fragment offset in header is in 8-octet units. 8458 * Clearing least significant 3 bits not only extracts 8459 * it but also gets it in units of octets. 8460 */ 8461 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8462 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8463 8464 /* 8465 * Is the more frags flag on and the payload length not a multiple 8466 * of eight? 8467 */ 8468 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8469 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8470 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8471 (uint32_t)((char *)&ip6h->ip6_plen - 8472 (char *)ip6h), B_FALSE, B_FALSE); 8473 return (NULL); 8474 } 8475 8476 v6src_ptr = &ip6h->ip6_src; 8477 v6dst_ptr = &ip6h->ip6_dst; 8478 end = remlen; 8479 8480 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8481 end += offset; 8482 8483 /* 8484 * Would fragment cause reassembled packet to have a payload length 8485 * greater than IP_MAXPACKET - the max payload size? 8486 */ 8487 if (end > IP_MAXPACKET) { 8488 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8489 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8490 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8491 (char *)ip6h), B_FALSE, B_FALSE); 8492 return (NULL); 8493 } 8494 8495 /* 8496 * This packet just has one fragment. Reassembly not 8497 * needed. 8498 */ 8499 if (!more_frags && offset == 0) { 8500 goto reass_done; 8501 } 8502 8503 /* 8504 * Drop the fragmented as early as possible, if 8505 * we don't have resource(s) to re-assemble. 8506 */ 8507 if (ip_reass_queue_bytes == 0) { 8508 freemsg(mp); 8509 return (NULL); 8510 } 8511 8512 /* Record the ECN field info. */ 8513 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8514 /* 8515 * If this is not the first fragment, dump the unfragmentable 8516 * portion of the packet. 8517 */ 8518 if (offset) 8519 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8520 8521 /* 8522 * Fragmentation reassembly. Each ILL has a hash table for 8523 * queueing packets undergoing reassembly for all IPIFs 8524 * associated with the ILL. The hash is based on the packet 8525 * IP ident field. The ILL frag hash table was allocated 8526 * as a timer block at the time the ILL was created. Whenever 8527 * there is anything on the reassembly queue, the timer will 8528 * be running. 8529 */ 8530 msg_len = MBLKSIZE(mp); 8531 tail_mp = mp; 8532 while (tail_mp->b_cont != NULL) { 8533 tail_mp = tail_mp->b_cont; 8534 msg_len += MBLKSIZE(tail_mp); 8535 } 8536 /* 8537 * If the reassembly list for this ILL will get too big 8538 * prune it. 8539 */ 8540 8541 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8542 ip_reass_queue_bytes) { 8543 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8544 : (ip_reass_queue_bytes - msg_len)); 8545 pruned = B_TRUE; 8546 } 8547 8548 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8549 mutex_enter(&ipfb->ipfb_lock); 8550 8551 ipfp = &ipfb->ipfb_ipf; 8552 /* Try to find an existing fragment queue for this packet. */ 8553 for (;;) { 8554 ipf = ipfp[0]; 8555 if (ipf) { 8556 /* 8557 * It has to match on ident, source address, and 8558 * dest address. 8559 */ 8560 if (ipf->ipf_ident == ident && 8561 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8562 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8563 8564 /* 8565 * If we have received too many 8566 * duplicate fragments for this packet 8567 * free it. 8568 */ 8569 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8570 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8571 freemsg(mp); 8572 mutex_exit(&ipfb->ipfb_lock); 8573 return (NULL); 8574 } 8575 8576 break; 8577 } 8578 ipfp = &ipf->ipf_hash_next; 8579 continue; 8580 } 8581 8582 8583 /* 8584 * If we pruned the list, do we want to store this new 8585 * fragment?. We apply an optimization here based on the 8586 * fact that most fragments will be received in order. 8587 * So if the offset of this incoming fragment is zero, 8588 * it is the first fragment of a new packet. We will 8589 * keep it. Otherwise drop the fragment, as we have 8590 * probably pruned the packet already (since the 8591 * packet cannot be found). 8592 */ 8593 8594 if (pruned && offset != 0) { 8595 mutex_exit(&ipfb->ipfb_lock); 8596 freemsg(mp); 8597 return (NULL); 8598 } 8599 8600 /* New guy. Allocate a frag message. */ 8601 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8602 if (!mp1) { 8603 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8604 freemsg(mp); 8605 partial_reass_done: 8606 mutex_exit(&ipfb->ipfb_lock); 8607 return (NULL); 8608 } 8609 8610 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8611 /* 8612 * Too many fragmented packets in this hash bucket. 8613 * Free the oldest. 8614 */ 8615 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8616 } 8617 8618 mp1->b_cont = mp; 8619 8620 /* Initialize the fragment header. */ 8621 ipf = (ipf_t *)mp1->b_rptr; 8622 ipf->ipf_mp = mp1; 8623 ipf->ipf_ptphn = ipfp; 8624 ipfp[0] = ipf; 8625 ipf->ipf_hash_next = NULL; 8626 ipf->ipf_ident = ident; 8627 ipf->ipf_v6src = *v6src_ptr; 8628 ipf->ipf_v6dst = *v6dst_ptr; 8629 /* Record reassembly start time. */ 8630 ipf->ipf_timestamp = gethrestime_sec(); 8631 /* Record ipf generation and account for frag header */ 8632 ipf->ipf_gen = ill->ill_ipf_gen++; 8633 ipf->ipf_count = MBLKSIZE(mp1); 8634 ipf->ipf_protocol = nexthdr; 8635 ipf->ipf_nf_hdr_len = 0; 8636 ipf->ipf_prev_nexthdr_offset = 0; 8637 ipf->ipf_last_frag_seen = B_FALSE; 8638 ipf->ipf_ecn = ecn_info; 8639 ipf->ipf_num_dups = 0; 8640 ipfb->ipfb_frag_pkts++; 8641 ipf->ipf_checksum = 0; 8642 ipf->ipf_checksum_flags = 0; 8643 8644 /* Store checksum value in fragment header */ 8645 if (sum_flags != 0) { 8646 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8647 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8648 ipf->ipf_checksum = sum_val; 8649 ipf->ipf_checksum_flags = sum_flags; 8650 } 8651 8652 /* 8653 * We handle reassembly two ways. In the easy case, 8654 * where all the fragments show up in order, we do 8655 * minimal bookkeeping, and just clip new pieces on 8656 * the end. If we ever see a hole, then we go off 8657 * to ip_reassemble which has to mark the pieces and 8658 * keep track of the number of holes, etc. Obviously, 8659 * the point of having both mechanisms is so we can 8660 * handle the easy case as efficiently as possible. 8661 */ 8662 if (offset == 0) { 8663 /* Easy case, in-order reassembly so far. */ 8664 /* Update the byte count */ 8665 ipf->ipf_count += msg_len; 8666 ipf->ipf_tail_mp = tail_mp; 8667 /* 8668 * Keep track of next expected offset in 8669 * ipf_end. 8670 */ 8671 ipf->ipf_end = end; 8672 ipf->ipf_nf_hdr_len = hdr_length; 8673 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8674 } else { 8675 /* Hard case, hole at the beginning. */ 8676 ipf->ipf_tail_mp = NULL; 8677 /* 8678 * ipf_end == 0 means that we have given up 8679 * on easy reassembly. 8680 */ 8681 ipf->ipf_end = 0; 8682 8683 /* Forget checksum offload from now on */ 8684 ipf->ipf_checksum_flags = 0; 8685 8686 /* 8687 * ipf_hole_cnt is set by ip_reassemble. 8688 * ipf_count is updated by ip_reassemble. 8689 * No need to check for return value here 8690 * as we don't expect reassembly to complete or 8691 * fail for the first fragment itself. 8692 */ 8693 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8694 msg_len); 8695 } 8696 /* Update per ipfb and ill byte counts */ 8697 ipfb->ipfb_count += ipf->ipf_count; 8698 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8699 ill->ill_frag_count += ipf->ipf_count; 8700 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8701 /* If the frag timer wasn't already going, start it. */ 8702 mutex_enter(&ill->ill_lock); 8703 ill_frag_timer_start(ill); 8704 mutex_exit(&ill->ill_lock); 8705 goto partial_reass_done; 8706 } 8707 8708 /* 8709 * If the packet's flag has changed (it could be coming up 8710 * from an interface different than the previous, therefore 8711 * possibly different checksum capability), then forget about 8712 * any stored checksum states. Otherwise add the value to 8713 * the existing one stored in the fragment header. 8714 */ 8715 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8716 sum_val += ipf->ipf_checksum; 8717 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8718 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8719 ipf->ipf_checksum = sum_val; 8720 } else if (ipf->ipf_checksum_flags != 0) { 8721 /* Forget checksum offload from now on */ 8722 ipf->ipf_checksum_flags = 0; 8723 } 8724 8725 /* 8726 * We have a new piece of a datagram which is already being 8727 * reassembled. Update the ECN info if all IP fragments 8728 * are ECN capable. If there is one which is not, clear 8729 * all the info. If there is at least one which has CE 8730 * code point, IP needs to report that up to transport. 8731 */ 8732 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8733 if (ecn_info == IPH_ECN_CE) 8734 ipf->ipf_ecn = IPH_ECN_CE; 8735 } else { 8736 ipf->ipf_ecn = IPH_ECN_NECT; 8737 } 8738 8739 if (offset && ipf->ipf_end == offset) { 8740 /* The new fragment fits at the end */ 8741 ipf->ipf_tail_mp->b_cont = mp; 8742 /* Update the byte count */ 8743 ipf->ipf_count += msg_len; 8744 /* Update per ipfb and ill byte counts */ 8745 ipfb->ipfb_count += msg_len; 8746 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8747 ill->ill_frag_count += msg_len; 8748 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8749 if (more_frags) { 8750 /* More to come. */ 8751 ipf->ipf_end = end; 8752 ipf->ipf_tail_mp = tail_mp; 8753 goto partial_reass_done; 8754 } 8755 } else { 8756 /* 8757 * Go do the hard cases. 8758 * Call ip_reassemble(). 8759 */ 8760 int ret; 8761 8762 if (offset == 0) { 8763 if (ipf->ipf_prev_nexthdr_offset == 0) { 8764 ipf->ipf_nf_hdr_len = hdr_length; 8765 ipf->ipf_prev_nexthdr_offset = 8766 *prev_nexthdr_offset; 8767 } 8768 } 8769 /* Save current byte count */ 8770 count = ipf->ipf_count; 8771 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8772 8773 /* Count of bytes added and subtracted (freeb()ed) */ 8774 count = ipf->ipf_count - count; 8775 if (count) { 8776 /* Update per ipfb and ill byte counts */ 8777 ipfb->ipfb_count += count; 8778 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8779 ill->ill_frag_count += count; 8780 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8781 } 8782 if (ret == IP_REASS_PARTIAL) { 8783 goto partial_reass_done; 8784 } else if (ret == IP_REASS_FAILED) { 8785 /* Reassembly failed. Free up all resources */ 8786 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8787 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8788 IP_REASS_SET_START(t_mp, 0); 8789 IP_REASS_SET_END(t_mp, 0); 8790 } 8791 freemsg(mp); 8792 goto partial_reass_done; 8793 } 8794 8795 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8796 } 8797 /* 8798 * We have completed reassembly. Unhook the frag header from 8799 * the reassembly list. 8800 * 8801 * Grab the unfragmentable header length next header value out 8802 * of the first fragment 8803 */ 8804 ASSERT(ipf->ipf_nf_hdr_len != 0); 8805 hdr_length = ipf->ipf_nf_hdr_len; 8806 8807 /* 8808 * Before we free the frag header, record the ECN info 8809 * to report back to the transport. 8810 */ 8811 ecn_info = ipf->ipf_ecn; 8812 8813 /* 8814 * Store the nextheader field in the header preceding the fragment 8815 * header 8816 */ 8817 nexthdr = ipf->ipf_protocol; 8818 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8819 ipfp = ipf->ipf_ptphn; 8820 8821 /* We need to supply these to caller */ 8822 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8823 sum_val = ipf->ipf_checksum; 8824 else 8825 sum_val = 0; 8826 8827 mp1 = ipf->ipf_mp; 8828 count = ipf->ipf_count; 8829 ipf = ipf->ipf_hash_next; 8830 if (ipf) 8831 ipf->ipf_ptphn = ipfp; 8832 ipfp[0] = ipf; 8833 ill->ill_frag_count -= count; 8834 ASSERT(ipfb->ipfb_count >= count); 8835 ipfb->ipfb_count -= count; 8836 ipfb->ipfb_frag_pkts--; 8837 mutex_exit(&ipfb->ipfb_lock); 8838 /* Ditch the frag header. */ 8839 mp = mp1->b_cont; 8840 freeb(mp1); 8841 8842 /* 8843 * Make sure the packet is good by doing some sanity 8844 * check. If bad we can silentely drop the packet. 8845 */ 8846 reass_done: 8847 if (hdr_length < sizeof (ip6_frag_t)) { 8848 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8849 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8850 freemsg(mp); 8851 return (NULL); 8852 } 8853 8854 /* 8855 * Remove the fragment header from the initial header by 8856 * splitting the mblk into the non-fragmentable header and 8857 * everthing after the fragment extension header. This has the 8858 * side effect of putting all the headers that need destination 8859 * processing into the b_cont block-- on return this fact is 8860 * used in order to avoid having to look at the extensions 8861 * already processed. 8862 * 8863 * Note that this code assumes that the unfragmentable portion 8864 * of the header is in the first mblk and increments 8865 * the read pointer past it. If this assumption is broken 8866 * this code fails badly. 8867 */ 8868 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8869 mblk_t *nmp; 8870 8871 if (!(nmp = dupb(mp))) { 8872 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8873 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8874 freemsg(mp); 8875 return (NULL); 8876 } 8877 nmp->b_cont = mp->b_cont; 8878 mp->b_cont = nmp; 8879 nmp->b_rptr += hdr_length; 8880 } 8881 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8882 8883 ip6h = (ip6_t *)mp->b_rptr; 8884 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8885 8886 /* Restore original IP length in header. */ 8887 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8888 /* Record the ECN info. */ 8889 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8890 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8891 8892 /* Reassembly is successful; return checksum information if needed */ 8893 if (cksum_val != NULL) 8894 *cksum_val = sum_val; 8895 if (cksum_flags != NULL) 8896 *cksum_flags = sum_flags; 8897 8898 return (mp); 8899 } 8900 8901 /* 8902 * Walk through the options to see if there is a routing header. 8903 * If present get the destination which is the last address of 8904 * the option. 8905 */ 8906 in6_addr_t 8907 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8908 { 8909 uint8_t nexthdr; 8910 uint8_t *whereptr; 8911 ip6_hbh_t *hbhhdr; 8912 ip6_dest_t *dsthdr; 8913 ip6_rthdr0_t *rthdr; 8914 ip6_frag_t *fraghdr; 8915 int ehdrlen; 8916 int left; 8917 in6_addr_t *ap, rv; 8918 8919 if (is_fragment != NULL) 8920 *is_fragment = B_FALSE; 8921 8922 rv = ip6h->ip6_dst; 8923 8924 nexthdr = ip6h->ip6_nxt; 8925 whereptr = (uint8_t *)&ip6h[1]; 8926 for (;;) { 8927 8928 ASSERT(nexthdr != IPPROTO_RAW); 8929 switch (nexthdr) { 8930 case IPPROTO_HOPOPTS: 8931 hbhhdr = (ip6_hbh_t *)whereptr; 8932 nexthdr = hbhhdr->ip6h_nxt; 8933 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8934 break; 8935 case IPPROTO_DSTOPTS: 8936 dsthdr = (ip6_dest_t *)whereptr; 8937 nexthdr = dsthdr->ip6d_nxt; 8938 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8939 break; 8940 case IPPROTO_ROUTING: 8941 rthdr = (ip6_rthdr0_t *)whereptr; 8942 nexthdr = rthdr->ip6r0_nxt; 8943 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 8944 8945 left = rthdr->ip6r0_segleft; 8946 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 8947 rv = *(ap + left - 1); 8948 /* 8949 * If the caller doesn't care whether the packet 8950 * is a fragment or not, we can stop here since 8951 * we have our destination. 8952 */ 8953 if (is_fragment == NULL) 8954 goto done; 8955 break; 8956 case IPPROTO_FRAGMENT: 8957 fraghdr = (ip6_frag_t *)whereptr; 8958 nexthdr = fraghdr->ip6f_nxt; 8959 ehdrlen = sizeof (ip6_frag_t); 8960 if (is_fragment != NULL) 8961 *is_fragment = B_TRUE; 8962 goto done; 8963 default : 8964 goto done; 8965 } 8966 whereptr += ehdrlen; 8967 } 8968 8969 done: 8970 return (rv); 8971 } 8972 8973 /* 8974 * ip_source_routed_v6: 8975 * This function is called by redirect code in ip_rput_data_v6 to 8976 * know whether this packet is source routed through this node i.e 8977 * whether this node (router) is part of the journey. This 8978 * function is called under two cases : 8979 * 8980 * case 1 : Routing header was processed by this node and 8981 * ip_process_rthdr replaced ip6_dst with the next hop 8982 * and we are forwarding the packet to the next hop. 8983 * 8984 * case 2 : Routing header was not processed by this node and we 8985 * are just forwarding the packet. 8986 * 8987 * For case (1) we don't want to send redirects. For case(2) we 8988 * want to send redirects. 8989 */ 8990 static boolean_t 8991 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 8992 { 8993 uint8_t nexthdr; 8994 in6_addr_t *addrptr; 8995 ip6_rthdr0_t *rthdr; 8996 uint8_t numaddr; 8997 ip6_hbh_t *hbhhdr; 8998 uint_t ehdrlen; 8999 uint8_t *byteptr; 9000 9001 ip2dbg(("ip_source_routed_v6\n")); 9002 nexthdr = ip6h->ip6_nxt; 9003 ehdrlen = IPV6_HDR_LEN; 9004 9005 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9006 while (nexthdr == IPPROTO_HOPOPTS || 9007 nexthdr == IPPROTO_DSTOPTS) { 9008 byteptr = (uint8_t *)ip6h + ehdrlen; 9009 /* 9010 * Check if we have already processed 9011 * packets or we are just a forwarding 9012 * router which only pulled up msgs up 9013 * to IPV6HDR and one HBH ext header 9014 */ 9015 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9016 ip2dbg(("ip_source_routed_v6: Extension" 9017 " headers not processed\n")); 9018 return (B_FALSE); 9019 } 9020 hbhhdr = (ip6_hbh_t *)byteptr; 9021 nexthdr = hbhhdr->ip6h_nxt; 9022 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9023 } 9024 switch (nexthdr) { 9025 case IPPROTO_ROUTING: 9026 byteptr = (uint8_t *)ip6h + ehdrlen; 9027 /* 9028 * If for some reason, we haven't pulled up 9029 * the routing hdr data mblk, then we must 9030 * not have processed it at all. So for sure 9031 * we are not part of the source routed journey. 9032 */ 9033 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9034 ip2dbg(("ip_source_routed_v6: Routing" 9035 " header not processed\n")); 9036 return (B_FALSE); 9037 } 9038 rthdr = (ip6_rthdr0_t *)byteptr; 9039 /* 9040 * Either we are an intermediate router or the 9041 * last hop before destination and we have 9042 * already processed the routing header. 9043 * If segment_left is greater than or equal to zero, 9044 * then we must be the (numaddr - segleft) entry 9045 * of the routing header. Although ip6r0_segleft 9046 * is a unit8_t variable, we still check for zero 9047 * or greater value, if in case the data type 9048 * is changed someday in future. 9049 */ 9050 if (rthdr->ip6r0_segleft > 0 || 9051 rthdr->ip6r0_segleft == 0) { 9052 ire_t *ire = NULL; 9053 9054 numaddr = rthdr->ip6r0_len / 2; 9055 addrptr = (in6_addr_t *)((char *)rthdr + 9056 sizeof (*rthdr)); 9057 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9058 if (addrptr != NULL) { 9059 ire = ire_ctable_lookup_v6(addrptr, NULL, 9060 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9061 MATCH_IRE_TYPE); 9062 if (ire != NULL) { 9063 ire_refrele(ire); 9064 return (B_TRUE); 9065 } 9066 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9067 } 9068 } 9069 /* FALLTHRU */ 9070 default: 9071 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9072 return (B_FALSE); 9073 } 9074 } 9075 9076 /* 9077 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9078 * Assumes that the following set of headers appear in the first 9079 * mblk: 9080 * ip6i_t (if present) CAN also appear as a separate mblk. 9081 * ip6_t 9082 * Any extension headers 9083 * TCP/UDP/SCTP header (if present) 9084 * The routine can handle an ICMPv6 header that is not in the first mblk. 9085 * 9086 * The order to determine the outgoing interface is as follows: 9087 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9088 * 2. If conn_nofailover_ill is set then use that ill. 9089 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9090 * 4. If q is an ill queue and (link local or multicast destination) then 9091 * use that ill. 9092 * 5. If IPV6_BOUND_IF has been set use that ill. 9093 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9094 * look for the best IRE match for the unspecified group to determine 9095 * the ill. 9096 * 7. For unicast: Just do an IRE lookup for the best match. 9097 */ 9098 void 9099 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9100 { 9101 conn_t *connp = NULL; 9102 queue_t *q = (queue_t *)arg2; 9103 ire_t *ire = NULL; 9104 ire_t *sctp_ire = NULL; 9105 ip6_t *ip6h; 9106 in6_addr_t *v6dstp; 9107 ill_t *ill = NULL; 9108 ipif_t *ipif; 9109 ip6i_t *ip6i; 9110 int cksum_request; /* -1 => normal. */ 9111 /* 1 => Skip TCP/UDP/SCTP checksum */ 9112 /* Otherwise contains insert offset for checksum */ 9113 int unspec_src; 9114 boolean_t do_outrequests; /* Increment OutRequests? */ 9115 mib2_ipv6IfStatsEntry_t *mibptr; 9116 int match_flags = MATCH_IRE_ILL_GROUP; 9117 boolean_t attach_if = B_FALSE; 9118 mblk_t *first_mp; 9119 boolean_t mctl_present; 9120 ipsec_out_t *io; 9121 boolean_t drop_if_delayed = B_FALSE; 9122 boolean_t multirt_need_resolve = B_FALSE; 9123 mblk_t *copy_mp = NULL; 9124 int err; 9125 int ip6i_flags = 0; 9126 zoneid_t zoneid; 9127 ill_t *saved_ill = NULL; 9128 boolean_t conn_lock_held; 9129 boolean_t need_decref = B_FALSE; 9130 9131 /* 9132 * Highest bit in version field is Reachability Confirmation bit 9133 * used by NUD in ip_xmit_v6(). 9134 */ 9135 #ifdef _BIG_ENDIAN 9136 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9137 #else 9138 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9139 #endif 9140 9141 /* 9142 * M_CTL comes from 5 places 9143 * 9144 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9145 * both V4 and V6 datagrams. 9146 * 9147 * 2) AH/ESP sends down M_CTL after doing their job with both 9148 * V4 and V6 datagrams. 9149 * 9150 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9151 * attached. 9152 * 9153 * 4) Notifications from an external resolver (for XRESOLV ifs) 9154 * 9155 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9156 * IPsec hardware acceleration support. 9157 * 9158 * We need to handle (1)'s IPv6 case and (3) here. For the 9159 * IPv4 case in (1), and (2), IPSEC processing has already 9160 * started. The code in ip_wput() already knows how to handle 9161 * continuing IPSEC processing (for IPv4 and IPv6). All other 9162 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9163 * for handling. 9164 */ 9165 first_mp = mp; 9166 mctl_present = B_FALSE; 9167 io = NULL; 9168 9169 /* Multidata transmit? */ 9170 if (DB_TYPE(mp) == M_MULTIDATA) { 9171 /* 9172 * We should never get here, since all Multidata messages 9173 * originating from tcp should have been directed over to 9174 * tcp_multisend() in the first place. 9175 */ 9176 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 9177 freemsg(mp); 9178 return; 9179 } else if (DB_TYPE(mp) == M_CTL) { 9180 uint32_t mctltype = 0; 9181 uint32_t mlen = MBLKL(first_mp); 9182 9183 mp = mp->b_cont; 9184 mctl_present = B_TRUE; 9185 io = (ipsec_out_t *)first_mp->b_rptr; 9186 9187 /* 9188 * Validate this M_CTL message. The only three types of 9189 * M_CTL messages we expect to see in this code path are 9190 * ipsec_out_t or ipsec_in_t structures (allocated as 9191 * ipsec_info_t unions), or ipsec_ctl_t structures. 9192 * The ipsec_out_type and ipsec_in_type overlap in the two 9193 * data structures, and they are either set to IPSEC_OUT 9194 * or IPSEC_IN depending on which data structure it is. 9195 * ipsec_ctl_t is an IPSEC_CTL. 9196 * 9197 * All other M_CTL messages are sent to ip_wput_nondata() 9198 * for handling. 9199 */ 9200 if (mlen >= sizeof (io->ipsec_out_type)) 9201 mctltype = io->ipsec_out_type; 9202 9203 if ((mlen == sizeof (ipsec_ctl_t)) && 9204 (mctltype == IPSEC_CTL)) { 9205 ip_output(Q_TO_CONN(q), first_mp, q, caller); 9206 return; 9207 } 9208 9209 if ((mlen < sizeof (ipsec_info_t)) || 9210 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9211 mp == NULL) { 9212 ip_wput_nondata(NULL, q, first_mp, NULL); 9213 return; 9214 } 9215 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9216 if (q->q_next == NULL) { 9217 ip6h = (ip6_t *)mp->b_rptr; 9218 /* 9219 * For a freshly-generated TCP dgram that needs IPV6 9220 * processing, don't call ip_wput immediately. We can 9221 * tell this by the ipsec_out_proc_begin. In-progress 9222 * IPSEC_OUT messages have proc_begin set to TRUE, 9223 * and we want to send all IPSEC_IN messages to 9224 * ip_wput() for IPsec processing or finishing. 9225 */ 9226 if (mctltype == IPSEC_IN || 9227 IPVER(ip6h) != IPV6_VERSION || 9228 io->ipsec_out_proc_begin) { 9229 mibptr = &ip6_mib; 9230 goto notv6; 9231 } 9232 } 9233 } else if (DB_TYPE(mp) != M_DATA) { 9234 ip_wput_nondata(NULL, q, mp, NULL); 9235 return; 9236 } 9237 9238 ip6h = (ip6_t *)mp->b_rptr; 9239 9240 if (IPVER(ip6h) != IPV6_VERSION) { 9241 mibptr = &ip6_mib; 9242 goto notv6; 9243 } 9244 9245 if (q->q_next != NULL) { 9246 ill = (ill_t *)q->q_ptr; 9247 /* 9248 * We don't know if this ill will be used for IPv6 9249 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9250 * ipif_set_values() sets the ill_isv6 flag to true if 9251 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9252 * just drop the packet. 9253 */ 9254 if (!ill->ill_isv6) { 9255 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9256 "ILLF_IPV6 was set\n")); 9257 freemsg(first_mp); 9258 return; 9259 } 9260 /* For uniformity do a refhold */ 9261 mutex_enter(&ill->ill_lock); 9262 if (!ILL_CAN_LOOKUP(ill)) { 9263 mutex_exit(&ill->ill_lock); 9264 freemsg(first_mp); 9265 return; 9266 } 9267 ill_refhold_locked(ill); 9268 mutex_exit(&ill->ill_lock); 9269 mibptr = ill->ill_ip6_mib; 9270 /* 9271 * ill_ip6_mib is allocated by ipif_set_values() when 9272 * ill_isv6 is set. Thus if ill_isv6 is true, 9273 * ill_ip6_mib had better not be NULL. 9274 */ 9275 ASSERT(mibptr != NULL); 9276 unspec_src = 0; 9277 BUMP_MIB(mibptr, ipv6OutRequests); 9278 do_outrequests = B_FALSE; 9279 } else { 9280 connp = (conn_t *)arg; 9281 ASSERT(connp != NULL); 9282 9283 /* is queue flow controlled? */ 9284 if ((q->q_first || connp->conn_draining) && 9285 (caller == IP_WPUT)) { 9286 /* 9287 * 1) TCP sends down M_CTL for detached connections. 9288 * 2) AH/ESP sends down M_CTL. 9289 * 9290 * We don't flow control either of the above. Only 9291 * UDP and others are flow controlled for which we 9292 * can't have a M_CTL. 9293 */ 9294 ASSERT(first_mp == mp); 9295 (void) putq(q, mp); 9296 return; 9297 } 9298 mibptr = &ip6_mib; 9299 unspec_src = connp->conn_unspec_src; 9300 do_outrequests = B_TRUE; 9301 if (mp->b_flag & MSGHASREF) { 9302 mp->b_flag &= ~MSGHASREF; 9303 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9304 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9305 need_decref = B_TRUE; 9306 } 9307 9308 /* 9309 * If there is a policy, try to attach an ipsec_out in 9310 * the front. At the end, first_mp either points to a 9311 * M_DATA message or IPSEC_OUT message linked to a 9312 * M_DATA message. We have to do it now as we might 9313 * lose the "conn" if we go through ip_newroute. 9314 */ 9315 if (!mctl_present && 9316 (connp->conn_out_enforce_policy || 9317 connp->conn_latch != NULL)) { 9318 ASSERT(first_mp == mp); 9319 /* XXX Any better way to get the protocol fast ? */ 9320 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 9321 connp->conn_ulp)) == NULL)) { 9322 if (need_decref) 9323 CONN_DEC_REF(connp); 9324 return; 9325 } else { 9326 ASSERT(mp->b_datap->db_type == M_CTL); 9327 first_mp = mp; 9328 mp = mp->b_cont; 9329 mctl_present = B_TRUE; 9330 io = (ipsec_out_t *)first_mp->b_rptr; 9331 } 9332 } 9333 } 9334 9335 /* check for alignment and full IPv6 header */ 9336 if (!OK_32PTR((uchar_t *)ip6h) || 9337 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9338 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9339 if (do_outrequests) 9340 BUMP_MIB(mibptr, ipv6OutRequests); 9341 BUMP_MIB(mibptr, ipv6OutDiscards); 9342 freemsg(first_mp); 9343 if (ill != NULL) 9344 ill_refrele(ill); 9345 if (need_decref) 9346 CONN_DEC_REF(connp); 9347 return; 9348 } 9349 v6dstp = &ip6h->ip6_dst; 9350 cksum_request = -1; 9351 ip6i = NULL; 9352 9353 /* 9354 * Once neighbor discovery has completed, ndp_process() will provide 9355 * locally generated packets for which processing can be reattempted. 9356 * In these cases, connp is NULL and the original zone is part of a 9357 * prepended ipsec_out_t. 9358 */ 9359 if (io != NULL) { 9360 zoneid = io->ipsec_out_zoneid; 9361 ASSERT(zoneid != ALL_ZONES); 9362 } else { 9363 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 9364 } 9365 9366 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9367 /* 9368 * This is an ip6i_t header followed by an ip6_hdr. 9369 * Check which fields are set. 9370 * 9371 * When the packet comes from a transport we should have 9372 * all needed headers in the first mblk. However, when 9373 * going through ip_newroute*_v6 the ip6i might be in 9374 * a separate mblk when we return here. In that case 9375 * we pullup everything to ensure that extension and transport 9376 * headers "stay" in the first mblk. 9377 */ 9378 ip6i = (ip6i_t *)ip6h; 9379 ip6i_flags = ip6i->ip6i_flags; 9380 9381 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9382 ((mp->b_wptr - (uchar_t *)ip6i) >= 9383 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9384 9385 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9386 if (!pullupmsg(mp, -1)) { 9387 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9388 if (do_outrequests) 9389 BUMP_MIB(mibptr, ipv6OutRequests); 9390 BUMP_MIB(mibptr, ipv6OutDiscards); 9391 freemsg(first_mp); 9392 if (ill != NULL) 9393 ill_refrele(ill); 9394 if (need_decref) 9395 CONN_DEC_REF(connp); 9396 return; 9397 } 9398 ip6h = (ip6_t *)mp->b_rptr; 9399 v6dstp = &ip6h->ip6_dst; 9400 ip6i = (ip6i_t *)ip6h; 9401 } 9402 ip6h = (ip6_t *)&ip6i[1]; 9403 9404 /* 9405 * Advance rptr past the ip6i_t to get ready for 9406 * transmitting the packet. However, if the packet gets 9407 * passed to ip_newroute*_v6 then rptr is moved back so 9408 * that the ip6i_t header can be inspected when the 9409 * packet comes back here after passing through 9410 * ire_add_then_send. 9411 */ 9412 mp->b_rptr = (uchar_t *)ip6h; 9413 9414 /* 9415 * IP6I_ATTACH_IF is set in this function when we had a 9416 * conn and it was either bound to the IPFF_NOFAILOVER address 9417 * or IPV6_BOUND_PIF was set. These options override other 9418 * options that set the ifindex. We come here with 9419 * IP6I_ATTACH_IF set when we can't find the ire and 9420 * ip_newroute_v6 is feeding the packet for second time. 9421 */ 9422 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9423 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9424 ASSERT(ip6i->ip6i_ifindex != 0); 9425 if (ill != NULL) 9426 ill_refrele(ill); 9427 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9428 NULL, NULL, NULL, NULL); 9429 if (ill == NULL) { 9430 if (do_outrequests) 9431 BUMP_MIB(mibptr, ipv6OutRequests); 9432 BUMP_MIB(mibptr, ipv6OutDiscards); 9433 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9434 ip6i->ip6i_ifindex)); 9435 if (need_decref) 9436 CONN_DEC_REF(connp); 9437 freemsg(first_mp); 9438 return; 9439 } 9440 mibptr = ill->ill_ip6_mib; 9441 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9442 /* 9443 * Preserve the index so that when we return 9444 * from IPSEC processing, we know where to 9445 * send the packet. 9446 */ 9447 if (mctl_present) { 9448 ASSERT(io != NULL); 9449 io->ipsec_out_ill_index = 9450 ip6i->ip6i_ifindex; 9451 } 9452 } 9453 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9454 /* 9455 * This is a multipathing probe packet that has 9456 * been delayed in ND resolution. Drop the 9457 * packet for the reasons mentioned in 9458 * nce_queue_mp() 9459 */ 9460 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9461 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9462 freemsg(first_mp); 9463 ill_refrele(ill); 9464 if (need_decref) 9465 CONN_DEC_REF(connp); 9466 return; 9467 } 9468 } 9469 } 9470 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9471 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9472 9473 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9474 if (secpolicy_net_rawaccess(cr) != 0) { 9475 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9476 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9477 NULL, zoneid, NULL, 9478 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9479 if (ire == NULL) { 9480 if (do_outrequests) 9481 BUMP_MIB(mibptr, 9482 ipv6OutRequests); 9483 BUMP_MIB(mibptr, ipv6OutDiscards); 9484 ip1dbg(("ip_wput_v6: bad source " 9485 "addr\n")); 9486 freemsg(first_mp); 9487 if (ill != NULL) 9488 ill_refrele(ill); 9489 if (need_decref) 9490 CONN_DEC_REF(connp); 9491 return; 9492 } 9493 ire_refrele(ire); 9494 } 9495 /* No need to verify again when using ip_newroute */ 9496 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9497 } 9498 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9499 /* 9500 * Make sure they match since ip_newroute*_v6 etc might 9501 * (unknown to them) inspect ip6i_nexthop when 9502 * they think they access ip6_dst. 9503 */ 9504 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9505 } 9506 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9507 cksum_request = 1; 9508 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9509 cksum_request = ip6i->ip6i_checksum_off; 9510 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9511 unspec_src = 1; 9512 9513 if (do_outrequests && ill != NULL) { 9514 BUMP_MIB(mibptr, ipv6OutRequests); 9515 do_outrequests = B_FALSE; 9516 } 9517 /* 9518 * Store ip6i_t info that we need after we come back 9519 * from IPSEC processing. 9520 */ 9521 if (mctl_present) { 9522 ASSERT(io != NULL); 9523 io->ipsec_out_unspec_src = unspec_src; 9524 } 9525 } 9526 if (connp != NULL && connp->conn_dontroute) 9527 ip6h->ip6_hops = 1; 9528 9529 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9530 goto ipv6multicast; 9531 9532 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9533 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9534 ill_t *conn_outgoing_pill; 9535 9536 conn_outgoing_pill = conn_get_held_ill(connp, 9537 &connp->conn_outgoing_pill, &err); 9538 if (err == ILL_LOOKUP_FAILED) { 9539 if (ill != NULL) 9540 ill_refrele(ill); 9541 if (need_decref) 9542 CONN_DEC_REF(connp); 9543 freemsg(first_mp); 9544 return; 9545 } 9546 if (conn_outgoing_pill != NULL) { 9547 if (ill != NULL) 9548 ill_refrele(ill); 9549 ill = conn_outgoing_pill; 9550 attach_if = B_TRUE; 9551 match_flags = MATCH_IRE_ILL; 9552 mibptr = ill->ill_ip6_mib; 9553 9554 /* 9555 * Check if we need an ire that will not be 9556 * looked up by anybody else i.e. HIDDEN. 9557 */ 9558 if (ill_is_probeonly(ill)) 9559 match_flags |= MATCH_IRE_MARK_HIDDEN; 9560 goto send_from_ill; 9561 } 9562 } 9563 9564 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9565 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9566 ill_t *conn_nofailover_ill; 9567 9568 conn_nofailover_ill = conn_get_held_ill(connp, 9569 &connp->conn_nofailover_ill, &err); 9570 if (err == ILL_LOOKUP_FAILED) { 9571 if (ill != NULL) 9572 ill_refrele(ill); 9573 if (need_decref) 9574 CONN_DEC_REF(connp); 9575 freemsg(first_mp); 9576 return; 9577 } 9578 if (conn_nofailover_ill != NULL) { 9579 if (ill != NULL) 9580 ill_refrele(ill); 9581 ill = conn_nofailover_ill; 9582 attach_if = B_TRUE; 9583 /* 9584 * Assumes that ipc_nofailover_ill is used only for 9585 * multipathing probe packets. These packets are better 9586 * dropped, if they are delayed in ND resolution, for 9587 * the reasons described in nce_queue_mp(). 9588 * IP6I_DROP_IFDELAYED will be set later on in this 9589 * function for this packet. 9590 */ 9591 drop_if_delayed = B_TRUE; 9592 match_flags = MATCH_IRE_ILL; 9593 mibptr = ill->ill_ip6_mib; 9594 9595 /* 9596 * Check if we need an ire that will not be 9597 * looked up by anybody else i.e. HIDDEN. 9598 */ 9599 if (ill_is_probeonly(ill)) 9600 match_flags |= MATCH_IRE_MARK_HIDDEN; 9601 goto send_from_ill; 9602 } 9603 } 9604 9605 /* 9606 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9607 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9608 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9609 */ 9610 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9611 ASSERT(ip6i->ip6i_ifindex != 0); 9612 attach_if = B_TRUE; 9613 ASSERT(ill != NULL); 9614 match_flags = MATCH_IRE_ILL; 9615 9616 /* 9617 * Check if we need an ire that will not be 9618 * looked up by anybody else i.e. HIDDEN. 9619 */ 9620 if (ill_is_probeonly(ill)) 9621 match_flags |= MATCH_IRE_MARK_HIDDEN; 9622 goto send_from_ill; 9623 } 9624 9625 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9626 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9627 ASSERT(ill != NULL); 9628 goto send_from_ill; 9629 } 9630 9631 /* 9632 * 4. If q is an ill queue and (link local or multicast destination) 9633 * then use that ill. 9634 */ 9635 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9636 goto send_from_ill; 9637 } 9638 9639 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9640 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9641 ill_t *conn_outgoing_ill; 9642 9643 conn_outgoing_ill = conn_get_held_ill(connp, 9644 &connp->conn_outgoing_ill, &err); 9645 if (err == ILL_LOOKUP_FAILED) { 9646 if (ill != NULL) 9647 ill_refrele(ill); 9648 if (need_decref) 9649 CONN_DEC_REF(connp); 9650 freemsg(first_mp); 9651 return; 9652 } 9653 if (ill != NULL) 9654 ill_refrele(ill); 9655 ill = conn_outgoing_ill; 9656 mibptr = ill->ill_ip6_mib; 9657 goto send_from_ill; 9658 } 9659 9660 /* 9661 * 6. For unicast: Just do an IRE lookup for the best match. 9662 * If we get here for a link-local address it is rather random 9663 * what interface we pick on a multihomed host. 9664 * *If* there is an IRE_CACHE (and the link-local address 9665 * isn't duplicated on multi links) this will find the IRE_CACHE. 9666 * Otherwise it will use one of the matching IRE_INTERFACE routes 9667 * for the link-local prefix. Hence, applications 9668 * *should* be encouraged to specify an outgoing interface when sending 9669 * to a link local address. 9670 */ 9671 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9672 !connp->conn_fully_bound)) { 9673 /* 9674 * We cache IRE_CACHEs to avoid lookups. We don't do 9675 * this for the tcp global queue and listen end point 9676 * as it does not really have a real destination to 9677 * talk to. 9678 */ 9679 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp)); 9680 } else { 9681 /* 9682 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9683 * grab a lock here to check for CONDEMNED as it is okay 9684 * to send a packet or two with the IRE_CACHE that is going 9685 * away. 9686 */ 9687 mutex_enter(&connp->conn_lock); 9688 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9689 if (ire != NULL && 9690 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9691 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9692 9693 IRE_REFHOLD(ire); 9694 mutex_exit(&connp->conn_lock); 9695 9696 } else { 9697 boolean_t cached = B_FALSE; 9698 9699 connp->conn_ire_cache = NULL; 9700 mutex_exit(&connp->conn_lock); 9701 /* Release the old ire */ 9702 if (ire != NULL && sctp_ire == NULL) 9703 IRE_REFRELE_NOTR(ire); 9704 9705 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9706 MBLK_GETLABEL(mp)); 9707 if (ire != NULL) { 9708 IRE_REFHOLD_NOTR(ire); 9709 9710 mutex_enter(&connp->conn_lock); 9711 if (!(connp->conn_state_flags & CONN_CLOSING) && 9712 (connp->conn_ire_cache == NULL)) { 9713 rw_enter(&ire->ire_bucket->irb_lock, 9714 RW_READER); 9715 if (!(ire->ire_marks & 9716 IRE_MARK_CONDEMNED)) { 9717 connp->conn_ire_cache = ire; 9718 cached = B_TRUE; 9719 } 9720 rw_exit(&ire->ire_bucket->irb_lock); 9721 } 9722 mutex_exit(&connp->conn_lock); 9723 9724 /* 9725 * We can continue to use the ire but since it 9726 * was not cached, we should drop the extra 9727 * reference. 9728 */ 9729 if (!cached) 9730 IRE_REFRELE_NOTR(ire); 9731 } 9732 } 9733 } 9734 9735 if (ire != NULL) { 9736 if (do_outrequests) { 9737 /* Handle IRE_LOCAL's that might appear here */ 9738 if (ire->ire_type == IRE_CACHE) { 9739 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9740 ill_ip6_mib; 9741 } else { 9742 mibptr = ire->ire_ipif->ipif_ill->ill_ip6_mib; 9743 } 9744 BUMP_MIB(mibptr, ipv6OutRequests); 9745 } 9746 ASSERT(!attach_if); 9747 9748 /* 9749 * Check if the ire has the RTF_MULTIRT flag, inherited 9750 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9751 */ 9752 if (ire->ire_flags & RTF_MULTIRT) { 9753 /* 9754 * Force hop limit of multirouted packets if required. 9755 * The hop limit of such packets is bounded by the 9756 * ip_multirt_ttl ndd variable. 9757 * NDP packets must have a hop limit of 255; don't 9758 * change the hop limit in that case. 9759 */ 9760 if ((ip_multirt_ttl > 0) && 9761 (ip6h->ip6_hops > ip_multirt_ttl) && 9762 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9763 if (ip_debug > 3) { 9764 ip2dbg(("ip_wput_v6: forcing multirt " 9765 "hop limit to %d (was %d) ", 9766 ip_multirt_ttl, ip6h->ip6_hops)); 9767 pr_addr_dbg("v6dst %s\n", AF_INET6, 9768 &ire->ire_addr_v6); 9769 } 9770 ip6h->ip6_hops = ip_multirt_ttl; 9771 } 9772 9773 /* 9774 * We look at this point if there are pending 9775 * unresolved routes. ire_multirt_need_resolve_v6() 9776 * checks in O(n) that all IRE_OFFSUBNET ire 9777 * entries for the packet's destination and 9778 * flagged RTF_MULTIRT are currently resolved. 9779 * If some remain unresolved, we do a copy 9780 * of the current message. It will be used 9781 * to initiate additional route resolutions. 9782 */ 9783 multirt_need_resolve = 9784 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9785 MBLK_GETLABEL(first_mp)); 9786 ip2dbg(("ip_wput_v6: ire %p, " 9787 "multirt_need_resolve %d, first_mp %p\n", 9788 (void *)ire, multirt_need_resolve, 9789 (void *)first_mp)); 9790 if (multirt_need_resolve) { 9791 copy_mp = copymsg(first_mp); 9792 if (copy_mp != NULL) { 9793 MULTIRT_DEBUG_TAG(copy_mp); 9794 } 9795 } 9796 } 9797 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9798 connp, caller, 0, ip6i_flags); 9799 if (need_decref) { 9800 CONN_DEC_REF(connp); 9801 connp = NULL; 9802 } 9803 IRE_REFRELE(ire); 9804 9805 /* 9806 * Try to resolve another multiroute if 9807 * ire_multirt_need_resolve_v6() deemed it necessary. 9808 * copy_mp will be consumed (sent or freed) by 9809 * ip_newroute_v6(). 9810 */ 9811 if (copy_mp != NULL) { 9812 if (mctl_present) { 9813 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9814 } else { 9815 ip6h = (ip6_t *)copy_mp->b_rptr; 9816 } 9817 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9818 &ip6h->ip6_src, NULL, zoneid); 9819 } 9820 if (ill != NULL) 9821 ill_refrele(ill); 9822 return; 9823 } 9824 9825 /* 9826 * No full IRE for this destination. Send it to 9827 * ip_newroute_v6 to see if anything else matches. 9828 * Mark this packet as having originated on this 9829 * machine. 9830 * Update rptr if there was an ip6i_t header. 9831 */ 9832 mp->b_prev = NULL; 9833 mp->b_next = NULL; 9834 if (ip6i != NULL) 9835 mp->b_rptr -= sizeof (ip6i_t); 9836 9837 if (unspec_src) { 9838 if (ip6i == NULL) { 9839 /* 9840 * Add ip6i_t header to carry unspec_src 9841 * until the packet comes back in ip_wput_v6. 9842 */ 9843 mp = ip_add_info_v6(mp, NULL, v6dstp); 9844 if (mp == NULL) { 9845 if (do_outrequests) 9846 BUMP_MIB(mibptr, ipv6OutRequests); 9847 BUMP_MIB(mibptr, ipv6OutDiscards); 9848 if (mctl_present) 9849 freeb(first_mp); 9850 if (ill != NULL) 9851 ill_refrele(ill); 9852 if (need_decref) 9853 CONN_DEC_REF(connp); 9854 return; 9855 } 9856 ip6i = (ip6i_t *)mp->b_rptr; 9857 9858 if (mctl_present) { 9859 ASSERT(first_mp != mp); 9860 first_mp->b_cont = mp; 9861 } else { 9862 first_mp = mp; 9863 } 9864 9865 if ((mp->b_wptr - (uchar_t *)ip6i) == 9866 sizeof (ip6i_t)) { 9867 /* 9868 * ndp_resolver called from ip_newroute_v6 9869 * expects pulled up message. 9870 */ 9871 if (!pullupmsg(mp, -1)) { 9872 ip1dbg(("ip_wput_v6: pullupmsg" 9873 " failed\n")); 9874 if (do_outrequests) { 9875 BUMP_MIB(mibptr, 9876 ipv6OutRequests); 9877 } 9878 BUMP_MIB(mibptr, ipv6OutDiscards); 9879 freemsg(first_mp); 9880 if (ill != NULL) 9881 ill_refrele(ill); 9882 if (need_decref) 9883 CONN_DEC_REF(connp); 9884 return; 9885 } 9886 ip6i = (ip6i_t *)mp->b_rptr; 9887 } 9888 ip6h = (ip6_t *)&ip6i[1]; 9889 v6dstp = &ip6h->ip6_dst; 9890 } 9891 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9892 if (mctl_present) { 9893 ASSERT(io != NULL); 9894 io->ipsec_out_unspec_src = unspec_src; 9895 } 9896 } 9897 if (do_outrequests) 9898 BUMP_MIB(mibptr, ipv6OutRequests); 9899 if (need_decref) 9900 CONN_DEC_REF(connp); 9901 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 9902 if (ill != NULL) 9903 ill_refrele(ill); 9904 return; 9905 9906 9907 /* 9908 * Handle multicast packets with or without an conn. 9909 * Assumes that the transports set ip6_hops taking 9910 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9911 * into account. 9912 */ 9913 ipv6multicast: 9914 ip2dbg(("ip_wput_v6: multicast\n")); 9915 9916 /* 9917 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 9918 * 2. If conn_nofailover_ill is set then use that ill. 9919 * 9920 * Hold the conn_lock till we refhold the ill of interest that is 9921 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9922 * while holding any locks, postpone the refrele until after the 9923 * conn_lock is dropped. 9924 */ 9925 if (connp != NULL) { 9926 mutex_enter(&connp->conn_lock); 9927 conn_lock_held = B_TRUE; 9928 } else { 9929 conn_lock_held = B_FALSE; 9930 } 9931 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9932 err = ill_check_and_refhold(connp->conn_outgoing_pill); 9933 if (err == ILL_LOOKUP_FAILED) { 9934 ip1dbg(("ip_output_v6: multicast" 9935 " conn_outgoing_pill no ipif\n")); 9936 multicast_discard: 9937 ASSERT(saved_ill == NULL); 9938 if (conn_lock_held) 9939 mutex_exit(&connp->conn_lock); 9940 if (ill != NULL) 9941 ill_refrele(ill); 9942 freemsg(first_mp); 9943 if (do_outrequests) 9944 BUMP_MIB(mibptr, ipv6OutDiscards); 9945 if (need_decref) 9946 CONN_DEC_REF(connp); 9947 return; 9948 } 9949 saved_ill = ill; 9950 ill = connp->conn_outgoing_pill; 9951 attach_if = B_TRUE; 9952 match_flags = MATCH_IRE_ILL; 9953 mibptr = ill->ill_ip6_mib; 9954 9955 /* 9956 * Check if we need an ire that will not be 9957 * looked up by anybody else i.e. HIDDEN. 9958 */ 9959 if (ill_is_probeonly(ill)) 9960 match_flags |= MATCH_IRE_MARK_HIDDEN; 9961 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9962 err = ill_check_and_refhold(connp->conn_nofailover_ill); 9963 if (err == ILL_LOOKUP_FAILED) { 9964 ip1dbg(("ip_output_v6: multicast" 9965 " conn_nofailover_ill no ipif\n")); 9966 goto multicast_discard; 9967 } 9968 saved_ill = ill; 9969 ill = connp->conn_nofailover_ill; 9970 attach_if = B_TRUE; 9971 match_flags = MATCH_IRE_ILL; 9972 9973 /* 9974 * Check if we need an ire that will not be 9975 * looked up by anybody else i.e. HIDDEN. 9976 */ 9977 if (ill_is_probeonly(ill)) 9978 match_flags |= MATCH_IRE_MARK_HIDDEN; 9979 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9980 /* 9981 * Redo 1. If we did not find an IRE_CACHE the first time, 9982 * we should have an ip6i_t with IP6I_ATTACH_IF if 9983 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 9984 * used on this endpoint. 9985 */ 9986 ASSERT(ip6i->ip6i_ifindex != 0); 9987 attach_if = B_TRUE; 9988 ASSERT(ill != NULL); 9989 match_flags = MATCH_IRE_ILL; 9990 9991 /* 9992 * Check if we need an ire that will not be 9993 * looked up by anybody else i.e. HIDDEN. 9994 */ 9995 if (ill_is_probeonly(ill)) 9996 match_flags |= MATCH_IRE_MARK_HIDDEN; 9997 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9998 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9999 10000 ASSERT(ill != NULL); 10001 } else if (ill != NULL) { 10002 /* 10003 * 4. If q is an ill queue and (link local or multicast 10004 * destination) then use that ill. 10005 * We don't need the ipif initialization here. 10006 * This useless assert below is just to prevent lint from 10007 * reporting a null body if statement. 10008 */ 10009 ASSERT(ill != NULL); 10010 } else if (connp != NULL) { 10011 /* 10012 * 5. If IPV6_BOUND_IF has been set use that ill. 10013 * 10014 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10015 * Otherwise look for the best IRE match for the unspecified 10016 * group to determine the ill. 10017 * 10018 * conn_multicast_ill is used for only IPv6 packets. 10019 * conn_multicast_ipif is used for only IPv4 packets. 10020 * Thus a PF_INET6 socket send both IPv4 and IPv6 10021 * multicast packets using different IP*_MULTICAST_IF 10022 * interfaces. 10023 */ 10024 if (connp->conn_outgoing_ill != NULL) { 10025 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10026 if (err == ILL_LOOKUP_FAILED) { 10027 ip1dbg(("ip_output_v6: multicast" 10028 " conn_outgoing_ill no ipif\n")); 10029 goto multicast_discard; 10030 } 10031 ill = connp->conn_outgoing_ill; 10032 } else if (connp->conn_multicast_ill != NULL) { 10033 err = ill_check_and_refhold(connp->conn_multicast_ill); 10034 if (err == ILL_LOOKUP_FAILED) { 10035 ip1dbg(("ip_output_v6: multicast" 10036 " conn_multicast_ill no ipif\n")); 10037 goto multicast_discard; 10038 } 10039 ill = connp->conn_multicast_ill; 10040 } else { 10041 mutex_exit(&connp->conn_lock); 10042 conn_lock_held = B_FALSE; 10043 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 10044 if (ipif == NULL) { 10045 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10046 goto multicast_discard; 10047 } 10048 /* 10049 * We have a ref to this ipif, so we can safely 10050 * access ipif_ill. 10051 */ 10052 ill = ipif->ipif_ill; 10053 mutex_enter(&ill->ill_lock); 10054 if (!ILL_CAN_LOOKUP(ill)) { 10055 mutex_exit(&ill->ill_lock); 10056 ipif_refrele(ipif); 10057 ill = NULL; 10058 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10059 goto multicast_discard; 10060 } 10061 ill_refhold_locked(ill); 10062 mutex_exit(&ill->ill_lock); 10063 ipif_refrele(ipif); 10064 /* 10065 * Save binding until IPV6_MULTICAST_IF 10066 * changes it 10067 */ 10068 mutex_enter(&connp->conn_lock); 10069 connp->conn_multicast_ill = ill; 10070 connp->conn_orig_multicast_ifindex = 10071 ill->ill_phyint->phyint_ifindex; 10072 mutex_exit(&connp->conn_lock); 10073 } 10074 } 10075 if (conn_lock_held) 10076 mutex_exit(&connp->conn_lock); 10077 10078 if (saved_ill != NULL) 10079 ill_refrele(saved_ill); 10080 10081 ASSERT(ill != NULL); 10082 /* 10083 * For multicast loopback interfaces replace the multicast address 10084 * with a unicast address for the ire lookup. 10085 */ 10086 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 10087 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10088 10089 mibptr = ill->ill_ip6_mib; 10090 if (do_outrequests) { 10091 BUMP_MIB(mibptr, ipv6OutRequests); 10092 do_outrequests = B_FALSE; 10093 } 10094 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10095 10096 /* 10097 * As we may lose the conn by the time we reach ip_wput_ire_v6 10098 * we copy conn_multicast_loop and conn_dontroute on to an 10099 * ipsec_out. In case if this datagram goes out secure, 10100 * we need the ill_index also. Copy that also into the 10101 * ipsec_out. 10102 */ 10103 if (mctl_present) { 10104 io = (ipsec_out_t *)first_mp->b_rptr; 10105 ASSERT(first_mp->b_datap->db_type == M_CTL); 10106 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10107 } else { 10108 ASSERT(mp == first_mp); 10109 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 10110 BUMP_MIB(mibptr, ipv6OutDiscards); 10111 freemsg(mp); 10112 if (ill != NULL) 10113 ill_refrele(ill); 10114 if (need_decref) 10115 CONN_DEC_REF(connp); 10116 return; 10117 } 10118 io = (ipsec_out_t *)first_mp->b_rptr; 10119 /* This is not a secure packet */ 10120 io->ipsec_out_secure = B_FALSE; 10121 io->ipsec_out_use_global_policy = B_TRUE; 10122 io->ipsec_out_zoneid = 10123 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10124 first_mp->b_cont = mp; 10125 mctl_present = B_TRUE; 10126 } 10127 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10128 io->ipsec_out_unspec_src = unspec_src; 10129 if (connp != NULL) 10130 io->ipsec_out_dontroute = connp->conn_dontroute; 10131 10132 send_from_ill: 10133 ASSERT(ill != NULL); 10134 ASSERT(mibptr == ill->ill_ip6_mib); 10135 if (do_outrequests) { 10136 BUMP_MIB(mibptr, ipv6OutRequests); 10137 do_outrequests = B_FALSE; 10138 } 10139 10140 if (io != NULL) 10141 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10142 10143 /* 10144 * When a specific ill is specified (using IPV6_PKTINFO, 10145 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10146 * on routing entries (ftable and ctable) that have a matching 10147 * ire->ire_ipif->ipif_ill. Thus this can only be used 10148 * for destinations that are on-link for the specific ill 10149 * and that can appear on multiple links. Thus it is useful 10150 * for multicast destinations, link-local destinations, and 10151 * at some point perhaps for site-local destinations (if the 10152 * node sits at a site boundary). 10153 * We create the cache entries in the regular ctable since 10154 * it can not "confuse" things for other destinations. 10155 * table. 10156 * 10157 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10158 * It is used only when ire_cache_lookup is used above. 10159 */ 10160 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10161 zoneid, MBLK_GETLABEL(mp), match_flags); 10162 if (ire != NULL) { 10163 /* 10164 * Check if the ire has the RTF_MULTIRT flag, inherited 10165 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10166 */ 10167 if (ire->ire_flags & RTF_MULTIRT) { 10168 /* 10169 * Force hop limit of multirouted packets if required. 10170 * The hop limit of such packets is bounded by the 10171 * ip_multirt_ttl ndd variable. 10172 * NDP packets must have a hop limit of 255; don't 10173 * change the hop limit in that case. 10174 */ 10175 if ((ip_multirt_ttl > 0) && 10176 (ip6h->ip6_hops > ip_multirt_ttl) && 10177 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10178 if (ip_debug > 3) { 10179 ip2dbg(("ip_wput_v6: forcing multirt " 10180 "hop limit to %d (was %d) ", 10181 ip_multirt_ttl, ip6h->ip6_hops)); 10182 pr_addr_dbg("v6dst %s\n", AF_INET6, 10183 &ire->ire_addr_v6); 10184 } 10185 ip6h->ip6_hops = ip_multirt_ttl; 10186 } 10187 10188 /* 10189 * We look at this point if there are pending 10190 * unresolved routes. ire_multirt_need_resolve_v6() 10191 * checks in O(n) that all IRE_OFFSUBNET ire 10192 * entries for the packet's destination and 10193 * flagged RTF_MULTIRT are currently resolved. 10194 * If some remain unresolved, we make a copy 10195 * of the current message. It will be used 10196 * to initiate additional route resolutions. 10197 */ 10198 multirt_need_resolve = 10199 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10200 MBLK_GETLABEL(first_mp)); 10201 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10202 "multirt_need_resolve %d, first_mp %p\n", 10203 (void *)ire, multirt_need_resolve, 10204 (void *)first_mp)); 10205 if (multirt_need_resolve) { 10206 copy_mp = copymsg(first_mp); 10207 if (copy_mp != NULL) { 10208 MULTIRT_DEBUG_TAG(copy_mp); 10209 } 10210 } 10211 } 10212 10213 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10214 ill->ill_name, (void *)ire, 10215 ill->ill_phyint->phyint_ifindex)); 10216 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10217 connp, caller, 10218 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10219 ip6i_flags); 10220 ire_refrele(ire); 10221 if (need_decref) { 10222 CONN_DEC_REF(connp); 10223 connp = NULL; 10224 } 10225 10226 /* 10227 * Try to resolve another multiroute if 10228 * ire_multirt_need_resolve_v6() deemed it necessary. 10229 * copy_mp will be consumed (sent or freed) by 10230 * ip_newroute_[ipif_]v6(). 10231 */ 10232 if (copy_mp != NULL) { 10233 if (mctl_present) { 10234 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10235 } else { 10236 ip6h = (ip6_t *)copy_mp->b_rptr; 10237 } 10238 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10239 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10240 zoneid); 10241 if (ipif == NULL) { 10242 ip1dbg(("ip_wput_v6: No ipif for " 10243 "multicast\n")); 10244 MULTIRT_DEBUG_UNTAG(copy_mp); 10245 freemsg(copy_mp); 10246 return; 10247 } 10248 ip_newroute_ipif_v6(q, copy_mp, ipif, 10249 ip6h->ip6_dst, unspec_src, zoneid); 10250 ipif_refrele(ipif); 10251 } else { 10252 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10253 &ip6h->ip6_src, ill, zoneid); 10254 } 10255 } 10256 if (ill != NULL) 10257 ill_refrele(ill); 10258 return; 10259 } 10260 if (need_decref) { 10261 CONN_DEC_REF(connp); 10262 connp = NULL; 10263 } 10264 10265 /* Update rptr if there was an ip6i_t header. */ 10266 if (ip6i != NULL) 10267 mp->b_rptr -= sizeof (ip6i_t); 10268 if (unspec_src || attach_if) { 10269 if (ip6i == NULL) { 10270 /* 10271 * Add ip6i_t header to carry unspec_src 10272 * or attach_if until the packet comes back in 10273 * ip_wput_v6. 10274 */ 10275 if (mctl_present) { 10276 first_mp->b_cont = 10277 ip_add_info_v6(mp, NULL, v6dstp); 10278 mp = first_mp->b_cont; 10279 if (mp == NULL) 10280 freeb(first_mp); 10281 } else { 10282 first_mp = mp = ip_add_info_v6(mp, NULL, 10283 v6dstp); 10284 } 10285 if (mp == NULL) { 10286 BUMP_MIB(mibptr, ipv6OutDiscards); 10287 if (ill != NULL) 10288 ill_refrele(ill); 10289 return; 10290 } 10291 ip6i = (ip6i_t *)mp->b_rptr; 10292 if ((mp->b_wptr - (uchar_t *)ip6i) == 10293 sizeof (ip6i_t)) { 10294 /* 10295 * ndp_resolver called from ip_newroute_v6 10296 * expects a pulled up message. 10297 */ 10298 if (!pullupmsg(mp, -1)) { 10299 ip1dbg(("ip_wput_v6: pullupmsg" 10300 " failed\n")); 10301 BUMP_MIB(mibptr, ipv6OutDiscards); 10302 freemsg(first_mp); 10303 return; 10304 } 10305 ip6i = (ip6i_t *)mp->b_rptr; 10306 } 10307 ip6h = (ip6_t *)&ip6i[1]; 10308 v6dstp = &ip6h->ip6_dst; 10309 } 10310 if (unspec_src) 10311 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10312 if (attach_if) { 10313 /* 10314 * Bind to nofailover/BOUND_PIF overrides ifindex. 10315 */ 10316 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10317 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10318 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10319 if (drop_if_delayed) { 10320 /* This is a multipathing probe packet */ 10321 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10322 } 10323 } 10324 if (mctl_present) { 10325 ASSERT(io != NULL); 10326 io->ipsec_out_unspec_src = unspec_src; 10327 } 10328 } 10329 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10330 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10331 unspec_src, zoneid); 10332 } else { 10333 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10334 zoneid); 10335 } 10336 if (ill != NULL) 10337 ill_refrele(ill); 10338 return; 10339 10340 notv6: 10341 /* 10342 * XXX implement a IPv4 and IPv6 packet counter per conn and 10343 * switch when ratio exceeds e.g. 10:1 10344 */ 10345 if (q->q_next == NULL) { 10346 connp = Q_TO_CONN(q); 10347 10348 if (IPCL_IS_TCP(connp)) { 10349 /* change conn_send for the tcp_v4_connections */ 10350 connp->conn_send = ip_output; 10351 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10352 /* The 'q' is the default SCTP queue */ 10353 connp = (conn_t *)arg; 10354 } else { 10355 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 10356 } 10357 } 10358 BUMP_MIB(mibptr, ipv6OutIPv4); 10359 (void) ip_output(connp, first_mp, q, caller); 10360 if (ill != NULL) 10361 ill_refrele(ill); 10362 } 10363 10364 static void 10365 ip_wput_v6(queue_t *q, mblk_t *mp) 10366 { 10367 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10368 } 10369 10370 static void 10371 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10372 { 10373 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10374 io->ipsec_out_attach_if = B_TRUE; 10375 io->ipsec_out_ill_index = attach_index; 10376 } 10377 10378 /* 10379 * NULL send-to queue - packet is to be delivered locally. 10380 */ 10381 void 10382 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10383 ire_t *ire, int fanout_flags) 10384 { 10385 uint32_t ports; 10386 mblk_t *mp = first_mp, *first_mp1; 10387 boolean_t mctl_present; 10388 uint8_t nexthdr; 10389 uint16_t hdr_length; 10390 ipsec_out_t *io; 10391 mib2_ipv6IfStatsEntry_t *mibptr; 10392 ilm_t *ilm; 10393 uint_t nexthdr_offset; 10394 10395 if (DB_TYPE(mp) == M_CTL) { 10396 io = (ipsec_out_t *)mp->b_rptr; 10397 if (!io->ipsec_out_secure) { 10398 mp = mp->b_cont; 10399 freeb(first_mp); 10400 first_mp = mp; 10401 mctl_present = B_FALSE; 10402 } else { 10403 mctl_present = B_TRUE; 10404 mp = first_mp->b_cont; 10405 ipsec_out_to_in(first_mp); 10406 } 10407 } else { 10408 mctl_present = B_FALSE; 10409 } 10410 10411 nexthdr = ip6h->ip6_nxt; 10412 mibptr = ill->ill_ip6_mib; 10413 10414 /* Fastpath */ 10415 switch (nexthdr) { 10416 case IPPROTO_TCP: 10417 case IPPROTO_UDP: 10418 case IPPROTO_ICMPV6: 10419 case IPPROTO_SCTP: 10420 hdr_length = IPV6_HDR_LEN; 10421 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10422 (uchar_t *)ip6h); 10423 break; 10424 default: { 10425 uint8_t *nexthdrp; 10426 10427 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10428 &hdr_length, &nexthdrp)) { 10429 /* Malformed packet */ 10430 BUMP_MIB(mibptr, ipv6OutDiscards); 10431 freemsg(first_mp); 10432 return; 10433 } 10434 nexthdr = *nexthdrp; 10435 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10436 break; 10437 } 10438 } 10439 10440 10441 UPDATE_OB_PKT_COUNT(ire); 10442 ire->ire_last_used_time = lbolt; 10443 10444 /* 10445 * Remove reacability confirmation bit from version field 10446 * before looping back the packet. 10447 */ 10448 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10449 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10450 } 10451 10452 switch (nexthdr) { 10453 case IPPROTO_TCP: 10454 if (DB_TYPE(mp) == M_DATA) { 10455 /* 10456 * M_DATA mblk, so init mblk (chain) for 10457 * no struio(). 10458 */ 10459 mblk_t *mp1 = mp; 10460 10461 do { 10462 mp1->b_datap->db_struioflag = 0; 10463 } while ((mp1 = mp1->b_cont) != NULL); 10464 } 10465 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10466 TCP_PORTS_OFFSET); 10467 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10468 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10469 IP_FF_IP6INFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10470 hdr_length, mctl_present, ire->ire_zoneid); 10471 return; 10472 10473 case IPPROTO_UDP: 10474 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10475 UDP_PORTS_OFFSET); 10476 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10477 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO| 10478 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10479 return; 10480 10481 case IPPROTO_SCTP: 10482 { 10483 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10484 10485 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10486 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10487 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO, 10488 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10489 ire->ire_zoneid); 10490 return; 10491 } 10492 case IPPROTO_ICMPV6: { 10493 icmp6_t *icmp6; 10494 10495 /* check for full IPv6+ICMPv6 header */ 10496 if ((mp->b_wptr - mp->b_rptr) < 10497 (hdr_length + ICMP6_MINLEN)) { 10498 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10499 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10500 " failed\n")); 10501 BUMP_MIB(mibptr, ipv6OutDiscards); 10502 freemsg(first_mp); 10503 return; 10504 } 10505 ip6h = (ip6_t *)mp->b_rptr; 10506 } 10507 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10508 10509 /* Update output mib stats */ 10510 icmp_update_out_mib_v6(ill, icmp6); 10511 10512 /* Check variable for testing applications */ 10513 if (ipv6_drop_inbound_icmpv6) { 10514 freemsg(first_mp); 10515 return; 10516 } 10517 /* 10518 * Assume that there is always at least one conn for 10519 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10520 * where there is no conn. 10521 */ 10522 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10523 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10524 /* 10525 * In the multicast case, applications may have 10526 * joined the group from different zones, so we 10527 * need to deliver the packet to each of them. 10528 * Loop through the multicast memberships 10529 * structures (ilm) on the receive ill and send 10530 * a copy of the packet up each matching one. 10531 * However, we don't do this for multicasts sent 10532 * on the loopback interface (PHYI_LOOPBACK flag 10533 * set) as they must stay in the sender's zone. 10534 */ 10535 ILM_WALKER_HOLD(ill); 10536 for (ilm = ill->ill_ilm; ilm != NULL; 10537 ilm = ilm->ilm_next) { 10538 if (ilm->ilm_flags & ILM_DELETED) 10539 continue; 10540 if (!IN6_ARE_ADDR_EQUAL( 10541 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10542 continue; 10543 if ((fanout_flags & 10544 IP_FF_NO_MCAST_LOOP) && 10545 ilm->ilm_zoneid == ire->ire_zoneid) 10546 continue; 10547 if (!ipif_lookup_zoneid(ill, 10548 ilm->ilm_zoneid, IPIF_UP, NULL)) 10549 continue; 10550 10551 first_mp1 = ip_copymsg(first_mp); 10552 if (first_mp1 == NULL) 10553 continue; 10554 icmp_inbound_v6(q, first_mp1, ill, 10555 hdr_length, mctl_present, 10556 IP6_NO_IPPOLICY, ilm->ilm_zoneid); 10557 } 10558 ILM_WALKER_RELE(ill); 10559 } else { 10560 first_mp1 = ip_copymsg(first_mp); 10561 if (first_mp1 != NULL) 10562 icmp_inbound_v6(q, first_mp1, ill, 10563 hdr_length, mctl_present, 10564 IP6_NO_IPPOLICY, ire->ire_zoneid); 10565 } 10566 } 10567 /* FALLTHRU */ 10568 default: { 10569 /* 10570 * Handle protocols with which IPv6 is less intimate. 10571 */ 10572 fanout_flags |= IP_FF_RAWIP|IP_FF_IP6INFO; 10573 10574 /* 10575 * Enable sending ICMP for "Unknown" nexthdr 10576 * case. i.e. where we did not FALLTHRU from 10577 * IPPROTO_ICMPV6 processing case above. 10578 */ 10579 if (nexthdr != IPPROTO_ICMPV6) 10580 fanout_flags |= IP_FF_SEND_ICMP; 10581 /* 10582 * Note: There can be more than one stream bound 10583 * to a particular protocol. When this is the case, 10584 * each one gets a copy of any incoming packets. 10585 */ 10586 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10587 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10588 mctl_present, ire->ire_zoneid); 10589 return; 10590 } 10591 } 10592 } 10593 10594 /* 10595 * Send packet using IRE. 10596 * Checksumming is controlled by cksum_request: 10597 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10598 * 1 => Skip TCP/UDP/SCTP checksum 10599 * Otherwise => checksum_request contains insert offset for checksum 10600 * 10601 * Assumes that the following set of headers appear in the first 10602 * mblk: 10603 * ip6_t 10604 * Any extension headers 10605 * TCP/UDP/SCTP header (if present) 10606 * The routine can handle an ICMPv6 header that is not in the first mblk. 10607 * 10608 * NOTE : This function does not ire_refrele the ire passed in as the 10609 * argument unlike ip_wput_ire where the REFRELE is done. 10610 * Refer to ip_wput_ire for more on this. 10611 */ 10612 static void 10613 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10614 int cksum_request, conn_t *connp, int caller, int attach_index, int flags) 10615 { 10616 ip6_t *ip6h; 10617 uint8_t nexthdr; 10618 uint16_t hdr_length; 10619 uint_t reachable = 0x0; 10620 ill_t *ill; 10621 mib2_ipv6IfStatsEntry_t *mibptr; 10622 mblk_t *first_mp; 10623 boolean_t mctl_present; 10624 ipsec_out_t *io; 10625 boolean_t conn_dontroute; /* conn value for multicast */ 10626 boolean_t conn_multicast_loop; /* conn value for multicast */ 10627 boolean_t multicast_forward; /* Should we forward ? */ 10628 int max_frag; 10629 zoneid_t zoneid; 10630 10631 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 10632 ill = ire_to_ill(ire); 10633 first_mp = mp; 10634 multicast_forward = B_FALSE; 10635 10636 if (mp->b_datap->db_type != M_CTL) { 10637 ip6h = (ip6_t *)first_mp->b_rptr; 10638 } else { 10639 io = (ipsec_out_t *)first_mp->b_rptr; 10640 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10641 /* 10642 * Grab the zone id now because the M_CTL can be discarded by 10643 * ip_wput_ire_parse_ipsec_out() below. 10644 */ 10645 zoneid = io->ipsec_out_zoneid; 10646 ASSERT(zoneid != ALL_ZONES); 10647 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10648 /* 10649 * For the multicast case, ipsec_out carries conn_dontroute and 10650 * conn_multicast_loop as conn may not be available here. We 10651 * need this for multicast loopback and forwarding which is done 10652 * later in the code. 10653 */ 10654 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10655 conn_dontroute = io->ipsec_out_dontroute; 10656 conn_multicast_loop = io->ipsec_out_multicast_loop; 10657 /* 10658 * If conn_dontroute is not set or conn_multicast_loop 10659 * is set, we need to do forwarding/loopback. For 10660 * datagrams from ip_wput_multicast, conn_dontroute is 10661 * set to B_TRUE and conn_multicast_loop is set to 10662 * B_FALSE so that we neither do forwarding nor 10663 * loopback. 10664 */ 10665 if (!conn_dontroute || conn_multicast_loop) 10666 multicast_forward = B_TRUE; 10667 } 10668 } 10669 10670 /* 10671 * If the sender didn't supply the hop limit and there is a default 10672 * unicast hop limit associated with the output interface, we use 10673 * that if the packet is unicast. Interface specific unicast hop 10674 * limits as set via the SIOCSLIFLNKINFO ioctl. 10675 */ 10676 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10677 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10678 ip6h->ip6_hops = ill->ill_max_hops; 10679 } 10680 10681 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10682 ire->ire_zoneid != ALL_ZONES) { 10683 /* 10684 * When a zone sends a packet to another zone, we try to deliver 10685 * the packet under the same conditions as if the destination 10686 * was a real node on the network. To do so, we look for a 10687 * matching route in the forwarding table. 10688 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10689 * ip_newroute_v6() does. 10690 */ 10691 ire_t *src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10692 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10693 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10694 if (src_ire != NULL && 10695 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 10696 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10697 !unspec_src) { 10698 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10699 } 10700 ire_refrele(src_ire); 10701 } else { 10702 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 10703 if (src_ire != NULL) { 10704 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10705 ire_refrele(src_ire); 10706 freemsg(first_mp); 10707 return; 10708 } 10709 ire_refrele(src_ire); 10710 } 10711 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10712 /* Failed */ 10713 freemsg(first_mp); 10714 return; 10715 } 10716 icmp_unreachable_v6(q, first_mp, 10717 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE); 10718 return; 10719 } 10720 } 10721 10722 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10723 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10724 connp, unspec_src); 10725 if (mp == NULL) { 10726 return; 10727 } 10728 } 10729 10730 first_mp = mp; 10731 if (mp->b_datap->db_type == M_CTL) { 10732 io = (ipsec_out_t *)mp->b_rptr; 10733 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10734 mp = mp->b_cont; 10735 mctl_present = B_TRUE; 10736 } else { 10737 mctl_present = B_FALSE; 10738 } 10739 10740 ip6h = (ip6_t *)mp->b_rptr; 10741 nexthdr = ip6h->ip6_nxt; 10742 mibptr = ill->ill_ip6_mib; 10743 10744 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10745 ipif_t *ipif; 10746 10747 /* 10748 * Select the source address using ipif_select_source_v6. 10749 */ 10750 if (attach_index != 0) { 10751 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10752 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10753 } else { 10754 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10755 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10756 } 10757 if (ipif == NULL) { 10758 if (ip_debug > 2) { 10759 /* ip1dbg */ 10760 pr_addr_dbg("ip_wput_ire_v6: no src for " 10761 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10762 printf("ip_wput_ire_v6: interface name %s\n", 10763 ill->ill_name); 10764 } 10765 freemsg(first_mp); 10766 return; 10767 } 10768 ip6h->ip6_src = ipif->ipif_v6src_addr; 10769 ipif_refrele(ipif); 10770 } 10771 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10772 if ((connp != NULL && connp->conn_multicast_loop) || 10773 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10774 ilm_t *ilm; 10775 10776 ILM_WALKER_HOLD(ill); 10777 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10778 ILM_WALKER_RELE(ill); 10779 if (ilm != NULL) { 10780 mblk_t *nmp; 10781 int fanout_flags = 0; 10782 10783 if (connp != NULL && 10784 !connp->conn_multicast_loop) { 10785 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10786 } 10787 ip1dbg(("ip_wput_ire_v6: " 10788 "Loopback multicast\n")); 10789 nmp = ip_copymsg(first_mp); 10790 if (nmp != NULL) { 10791 ip6_t *nip6h; 10792 10793 if (mctl_present) { 10794 nip6h = (ip6_t *) 10795 nmp->b_cont->b_rptr; 10796 } else { 10797 nip6h = (ip6_t *)nmp->b_rptr; 10798 } 10799 /* 10800 * Deliver locally and to every local 10801 * zone, except the sending zone when 10802 * IPV6_MULTICAST_LOOP is disabled. 10803 */ 10804 ip_wput_local_v6(RD(q), ill, nip6h, nmp, 10805 ire, fanout_flags); 10806 } else { 10807 BUMP_MIB(mibptr, ipv6OutDiscards); 10808 ip1dbg(("ip_wput_ire_v6: " 10809 "copymsg failed\n")); 10810 } 10811 } 10812 } 10813 if (ip6h->ip6_hops == 0 || 10814 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10815 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10816 /* 10817 * Local multicast or just loopback on loopback 10818 * interface. 10819 */ 10820 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10821 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10822 freemsg(first_mp); 10823 return; 10824 } 10825 } 10826 10827 if (ire->ire_stq != NULL) { 10828 uint32_t sum; 10829 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10830 ill_phyint->phyint_ifindex; 10831 queue_t *dev_q = ire->ire_stq->q_next; 10832 10833 /* 10834 * non-NULL send-to queue - packet is to be sent 10835 * out an interface. 10836 */ 10837 10838 /* Driver is flow-controlling? */ 10839 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10840 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 10841 /* 10842 * Queue packet if we have an conn to give back 10843 * pressure. We can't queue packets intended for 10844 * hardware acceleration since we've tossed that 10845 * state already. If the packet is being fed back 10846 * from ire_send_v6, we don't know the position in 10847 * the queue to enqueue the packet and we discard 10848 * the packet. 10849 */ 10850 if (ip_output_queue && connp != NULL && 10851 !mctl_present && caller != IRE_SEND) { 10852 if (caller == IP_WSRV) { 10853 connp->conn_did_putbq = 1; 10854 (void) putbq(connp->conn_wq, mp); 10855 conn_drain_insert(connp); 10856 /* 10857 * caller == IP_WSRV implies we are 10858 * the service thread, and the 10859 * queue is already noenabled. 10860 * The check for canput and 10861 * the putbq is not atomic. 10862 * So we need to check again. 10863 */ 10864 if (canput(dev_q)) 10865 connp->conn_did_putbq = 0; 10866 } else { 10867 (void) putq(connp->conn_wq, mp); 10868 } 10869 return; 10870 } 10871 BUMP_MIB(mibptr, ipv6OutDiscards); 10872 freemsg(first_mp); 10873 return; 10874 } 10875 10876 /* 10877 * Look for reachability confirmations from the transport. 10878 */ 10879 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10880 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10881 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10882 if (mctl_present) 10883 io->ipsec_out_reachable = B_TRUE; 10884 } 10885 /* Fastpath */ 10886 switch (nexthdr) { 10887 case IPPROTO_TCP: 10888 case IPPROTO_UDP: 10889 case IPPROTO_ICMPV6: 10890 case IPPROTO_SCTP: 10891 hdr_length = IPV6_HDR_LEN; 10892 break; 10893 default: { 10894 uint8_t *nexthdrp; 10895 10896 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10897 &hdr_length, &nexthdrp)) { 10898 /* Malformed packet */ 10899 BUMP_MIB(mibptr, ipv6OutDiscards); 10900 freemsg(first_mp); 10901 return; 10902 } 10903 nexthdr = *nexthdrp; 10904 break; 10905 } 10906 } 10907 10908 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10909 uint16_t *up; 10910 uint16_t *insp; 10911 10912 /* 10913 * The packet header is processed once for all, even 10914 * in the multirouting case. We disable hardware 10915 * checksum if the packet is multirouted, as it will be 10916 * replicated via several interfaces, and not all of 10917 * them may have this capability. 10918 */ 10919 if (cksum_request == 1 && 10920 !(ire->ire_flags & RTF_MULTIRT)) { 10921 /* Skip the transport checksum */ 10922 goto cksum_done; 10923 } 10924 /* 10925 * Do user-configured raw checksum. 10926 * Compute checksum and insert at offset "cksum_request" 10927 */ 10928 10929 /* check for enough headers for checksum */ 10930 cksum_request += hdr_length; /* offset from rptr */ 10931 if ((mp->b_wptr - mp->b_rptr) < 10932 (cksum_request + sizeof (int16_t))) { 10933 if (!pullupmsg(mp, 10934 cksum_request + sizeof (int16_t))) { 10935 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10936 " failed\n")); 10937 BUMP_MIB(mibptr, ipv6OutDiscards); 10938 freemsg(first_mp); 10939 return; 10940 } 10941 ip6h = (ip6_t *)mp->b_rptr; 10942 } 10943 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10944 ASSERT(((uintptr_t)insp & 0x1) == 0); 10945 up = (uint16_t *)&ip6h->ip6_src; 10946 /* 10947 * icmp has placed length and routing 10948 * header adjustment in *insp. 10949 */ 10950 sum = htons(nexthdr) + 10951 up[0] + up[1] + up[2] + up[3] + 10952 up[4] + up[5] + up[6] + up[7] + 10953 up[8] + up[9] + up[10] + up[11] + 10954 up[12] + up[13] + up[14] + up[15]; 10955 sum = (sum & 0xffff) + (sum >> 16); 10956 *insp = IP_CSUM(mp, hdr_length, sum); 10957 if (*insp == 0) 10958 *insp = 0xFFFF; 10959 } else if (nexthdr == IPPROTO_TCP) { 10960 uint16_t *up; 10961 10962 /* 10963 * Check for full IPv6 header + enough TCP header 10964 * to get at the checksum field. 10965 */ 10966 if ((mp->b_wptr - mp->b_rptr) < 10967 (hdr_length + TCP_CHECKSUM_OFFSET + 10968 TCP_CHECKSUM_SIZE)) { 10969 if (!pullupmsg(mp, hdr_length + 10970 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 10971 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10972 " failed\n")); 10973 BUMP_MIB(mibptr, ipv6OutDiscards); 10974 freemsg(first_mp); 10975 return; 10976 } 10977 ip6h = (ip6_t *)mp->b_rptr; 10978 } 10979 10980 up = (uint16_t *)&ip6h->ip6_src; 10981 /* 10982 * Note: The TCP module has stored the length value 10983 * into the tcp checksum field, so we don't 10984 * need to explicitly sum it in here. 10985 */ 10986 sum = up[0] + up[1] + up[2] + up[3] + 10987 up[4] + up[5] + up[6] + up[7] + 10988 up[8] + up[9] + up[10] + up[11] + 10989 up[12] + up[13] + up[14] + up[15]; 10990 10991 /* Fold the initial sum */ 10992 sum = (sum & 0xffff) + (sum >> 16); 10993 10994 up = (uint16_t *)(((uchar_t *)ip6h) + 10995 hdr_length + TCP_CHECKSUM_OFFSET); 10996 10997 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 10998 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10999 ire->ire_max_frag, mctl_present, sum); 11000 11001 /* Software checksum? */ 11002 if (DB_CKSUMFLAGS(mp) == 0) { 11003 IP6_STAT(ip6_out_sw_cksum); 11004 IP6_STAT_UPDATE(ip6_tcp_out_sw_cksum_bytes, 11005 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11006 hdr_length); 11007 } 11008 } else if (nexthdr == IPPROTO_UDP) { 11009 uint16_t *up; 11010 11011 /* 11012 * check for full IPv6 header + enough UDP header 11013 * to get at the UDP checksum field 11014 */ 11015 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11016 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11017 if (!pullupmsg(mp, hdr_length + 11018 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11019 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11020 " failed\n")); 11021 BUMP_MIB(mibptr, ipv6OutDiscards); 11022 freemsg(first_mp); 11023 return; 11024 } 11025 ip6h = (ip6_t *)mp->b_rptr; 11026 } 11027 up = (uint16_t *)&ip6h->ip6_src; 11028 /* 11029 * Note: The UDP module has stored the length value 11030 * into the udp checksum field, so we don't 11031 * need to explicitly sum it in here. 11032 */ 11033 sum = up[0] + up[1] + up[2] + up[3] + 11034 up[4] + up[5] + up[6] + up[7] + 11035 up[8] + up[9] + up[10] + up[11] + 11036 up[12] + up[13] + up[14] + up[15]; 11037 11038 /* Fold the initial sum */ 11039 sum = (sum & 0xffff) + (sum >> 16); 11040 11041 up = (uint16_t *)(((uchar_t *)ip6h) + 11042 hdr_length + UDP_CHECKSUM_OFFSET); 11043 11044 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11045 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11046 ire->ire_max_frag, mctl_present, sum); 11047 11048 /* Software checksum? */ 11049 if (DB_CKSUMFLAGS(mp) == 0) { 11050 IP6_STAT(ip6_out_sw_cksum); 11051 IP6_STAT_UPDATE(ip6_udp_out_sw_cksum_bytes, 11052 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11053 hdr_length); 11054 } 11055 } else if (nexthdr == IPPROTO_ICMPV6) { 11056 uint16_t *up; 11057 icmp6_t *icmp6; 11058 11059 /* check for full IPv6+ICMPv6 header */ 11060 if ((mp->b_wptr - mp->b_rptr) < 11061 (hdr_length + ICMP6_MINLEN)) { 11062 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11063 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11064 " failed\n")); 11065 BUMP_MIB(mibptr, ipv6OutDiscards); 11066 freemsg(first_mp); 11067 return; 11068 } 11069 ip6h = (ip6_t *)mp->b_rptr; 11070 } 11071 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11072 up = (uint16_t *)&ip6h->ip6_src; 11073 /* 11074 * icmp has placed length and routing 11075 * header adjustment in icmp6_cksum. 11076 */ 11077 sum = htons(IPPROTO_ICMPV6) + 11078 up[0] + up[1] + up[2] + up[3] + 11079 up[4] + up[5] + up[6] + up[7] + 11080 up[8] + up[9] + up[10] + up[11] + 11081 up[12] + up[13] + up[14] + up[15]; 11082 sum = (sum & 0xffff) + (sum >> 16); 11083 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11084 if (icmp6->icmp6_cksum == 0) 11085 icmp6->icmp6_cksum = 0xFFFF; 11086 11087 /* Update output mib stats */ 11088 icmp_update_out_mib_v6(ill, icmp6); 11089 } else if (nexthdr == IPPROTO_SCTP) { 11090 sctp_hdr_t *sctph; 11091 11092 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11093 if (!pullupmsg(mp, hdr_length + 11094 sizeof (*sctph))) { 11095 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11096 " failed\n")); 11097 BUMP_MIB(ill->ill_ip6_mib, 11098 ipv6OutDiscards); 11099 freemsg(mp); 11100 return; 11101 } 11102 ip6h = (ip6_t *)mp->b_rptr; 11103 } 11104 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11105 sctph->sh_chksum = 0; 11106 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11107 } 11108 11109 cksum_done: 11110 /* 11111 * We force the insertion of a fragment header using the 11112 * IPH_FRAG_HDR flag in two cases: 11113 * - after reception of an ICMPv6 "packet too big" message 11114 * with a MTU < 1280 (cf. RFC 2460 section 5) 11115 * - for multirouted IPv6 packets, so that the receiver can 11116 * discard duplicates according to their fragment identifier 11117 * 11118 * Two flags modifed from the API can modify this behavior. 11119 * The first is IPV6_USE_MIN_MTU. With this API the user 11120 * can specify how to manage PMTUD for unicast and multicast. 11121 * 11122 * IPV6_DONTFRAG disallows fragmentation. 11123 */ 11124 max_frag = ire->ire_max_frag; 11125 switch (IP6I_USE_MIN_MTU_API(flags)) { 11126 case IPV6_USE_MIN_MTU_DEFAULT: 11127 case IPV6_USE_MIN_MTU_UNICAST: 11128 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11129 max_frag = IPV6_MIN_MTU; 11130 } 11131 break; 11132 11133 case IPV6_USE_MIN_MTU_NEVER: 11134 max_frag = IPV6_MIN_MTU; 11135 break; 11136 } 11137 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11138 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11139 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11140 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11141 max_frag, B_FALSE, B_TRUE); 11142 return; 11143 } 11144 11145 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11146 (mp->b_cont ? msgdsize(mp) : 11147 mp->b_wptr - (uchar_t *)ip6h)) { 11148 ip0dbg(("Packet length mismatch: %d, %ld\n", 11149 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11150 msgdsize(mp))); 11151 freemsg(first_mp); 11152 return; 11153 } 11154 /* Do IPSEC processing first */ 11155 if (mctl_present) { 11156 if (attach_index != 0) 11157 ipsec_out_attach_if(io, attach_index); 11158 ipsec_out_process(q, first_mp, ire, ill_index); 11159 return; 11160 } 11161 ASSERT(mp->b_prev == NULL); 11162 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11163 ntohs(ip6h->ip6_plen) + 11164 IPV6_HDR_LEN, max_frag)); 11165 ASSERT(mp == first_mp); 11166 /* Initiate IPPF processing */ 11167 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 11168 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11169 if (mp == NULL) { 11170 return; 11171 } 11172 } 11173 ip_wput_frag_v6(mp, ire, reachable, connp, 11174 caller, max_frag); 11175 return; 11176 } 11177 /* Do IPSEC processing first */ 11178 if (mctl_present) { 11179 int extra_len = ipsec_out_extra_length(first_mp); 11180 11181 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11182 max_frag && ip_ulp_cando_pkt2big(nexthdr)) { 11183 /* 11184 * IPsec headers will push the packet over the 11185 * MTU limit. Issue an ICMPv6 Packet Too Big 11186 * message for this packet if the upper-layer 11187 * that issued this packet will be able to 11188 * react to the icmp_pkt2big_v6() that we'll 11189 * generate. 11190 */ 11191 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11192 max_frag, B_FALSE, B_TRUE); 11193 return; 11194 } 11195 if (attach_index != 0) 11196 ipsec_out_attach_if(io, attach_index); 11197 ipsec_out_process(q, first_mp, ire, ill_index); 11198 return; 11199 } 11200 /* 11201 * XXX multicast: add ip_mforward_v6() here. 11202 * Check conn_dontroute 11203 */ 11204 #ifdef lint 11205 /* 11206 * XXX The only purpose of this statement is to avoid lint 11207 * errors. See the above "XXX multicast". When that gets 11208 * fixed, remove this whole #ifdef lint section. 11209 */ 11210 ip3dbg(("multicast forward is %s.\n", 11211 (multicast_forward ? "TRUE" : "FALSE"))); 11212 #endif 11213 11214 UPDATE_OB_PKT_COUNT(ire); 11215 ire->ire_last_used_time = lbolt; 11216 ASSERT(mp == first_mp); 11217 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11218 } else { 11219 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11220 } 11221 } 11222 11223 /* 11224 * Outbound IPv6 fragmentation routine using MDT. 11225 */ 11226 static void 11227 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11228 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11229 { 11230 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11231 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11232 mblk_t *hdr_mp, *md_mp = NULL; 11233 int i1; 11234 multidata_t *mmd; 11235 unsigned char *hdr_ptr, *pld_ptr; 11236 ip_pdescinfo_t pdi; 11237 uint32_t ident; 11238 size_t len; 11239 uint16_t offset; 11240 queue_t *stq = ire->ire_stq; 11241 ill_t *ill = (ill_t *)stq->q_ptr; 11242 11243 ASSERT(DB_TYPE(mp) == M_DATA); 11244 ASSERT(MBLKL(mp) > unfragmentable_len); 11245 11246 /* 11247 * Move read ptr past unfragmentable portion, we don't want this part 11248 * of the data in our fragments. 11249 */ 11250 mp->b_rptr += unfragmentable_len; 11251 11252 /* Calculate how many packets we will send out */ 11253 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11254 pkts = (i1 + max_chunk - 1) / max_chunk; 11255 ASSERT(pkts > 1); 11256 11257 /* Allocate a message block which will hold all the IP Headers. */ 11258 wroff = ip_wroff_extra; 11259 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11260 11261 i1 = pkts * hdr_chunk_len; 11262 /* 11263 * Create the header buffer, Multidata and destination address 11264 * and SAP attribute that should be associated with it. 11265 */ 11266 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11267 ((hdr_mp->b_wptr += i1), 11268 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11269 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11270 freemsg(mp); 11271 if (md_mp == NULL) { 11272 freemsg(hdr_mp); 11273 } else { 11274 free_mmd: IP6_STAT(ip6_frag_mdt_discarded); 11275 freemsg(md_mp); 11276 } 11277 IP6_STAT(ip6_frag_mdt_allocfail); 11278 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11279 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutDiscards, pkts); 11280 return; 11281 } 11282 IP6_STAT(ip6_frag_mdt_allocd); 11283 11284 /* 11285 * Add a payload buffer to the Multidata; this operation must not 11286 * fail, or otherwise our logic in this routine is broken. There 11287 * is no memory allocation done by the routine, so any returned 11288 * failure simply tells us that we've done something wrong. 11289 * 11290 * A failure tells us that either we're adding the same payload 11291 * buffer more than once, or we're trying to add more buffers than 11292 * allowed. None of the above cases should happen, and we panic 11293 * because either there's horrible heap corruption, and/or 11294 * programming mistake. 11295 */ 11296 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11297 goto pbuf_panic; 11298 } 11299 11300 hdr_ptr = hdr_mp->b_rptr; 11301 pld_ptr = mp->b_rptr; 11302 11303 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11304 11305 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11306 11307 /* 11308 * len is the total length of the fragmentable data in this 11309 * datagram. For each fragment sent, we will decrement len 11310 * by the amount of fragmentable data sent in that fragment 11311 * until len reaches zero. 11312 */ 11313 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11314 11315 offset = 0; 11316 prev_nexthdr_offset += wroff; 11317 11318 while (len != 0) { 11319 size_t mlen; 11320 ip6_t *fip6h; 11321 ip6_frag_t *fraghdr; 11322 int error; 11323 11324 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11325 mlen = MIN(len, max_chunk); 11326 len -= mlen; 11327 11328 fip6h = (ip6_t *)(hdr_ptr + wroff); 11329 ASSERT(OK_32PTR(fip6h)); 11330 bcopy(ip6h, fip6h, unfragmentable_len); 11331 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11332 11333 fip6h->ip6_plen = htons((uint16_t)(mlen + 11334 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11335 11336 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11337 unfragmentable_len); 11338 fraghdr->ip6f_nxt = nexthdr; 11339 fraghdr->ip6f_reserved = 0; 11340 fraghdr->ip6f_offlg = htons(offset) | 11341 ((len != 0) ? IP6F_MORE_FRAG : 0); 11342 fraghdr->ip6f_ident = ident; 11343 11344 /* 11345 * Record offset and size of header and data of the next packet 11346 * in the multidata message. 11347 */ 11348 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11349 unfragmentable_len + sizeof (ip6_frag_t), 0); 11350 PDESC_PLD_INIT(&pdi); 11351 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11352 ASSERT(i1 > 0); 11353 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11354 if (i1 == mlen) { 11355 pld_ptr += mlen; 11356 } else { 11357 i1 = mlen - i1; 11358 mp = mp->b_cont; 11359 ASSERT(mp != NULL); 11360 ASSERT(MBLKL(mp) >= i1); 11361 /* 11362 * Attach the next payload message block to the 11363 * multidata message. 11364 */ 11365 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11366 goto pbuf_panic; 11367 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11368 pld_ptr = mp->b_rptr + i1; 11369 } 11370 11371 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11372 KM_NOSLEEP)) == NULL) { 11373 /* 11374 * Any failure other than ENOMEM indicates that we 11375 * have passed in invalid pdesc info or parameters 11376 * to mmd_addpdesc, which must not happen. 11377 * 11378 * EINVAL is a result of failure on boundary checks 11379 * against the pdesc info contents. It should not 11380 * happen, and we panic because either there's 11381 * horrible heap corruption, and/or programming 11382 * mistake. 11383 */ 11384 if (error != ENOMEM) { 11385 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11386 "pdesc logic error detected for " 11387 "mmd %p pinfo %p (%d)\n", 11388 (void *)mmd, (void *)&pdi, error); 11389 /* NOTREACHED */ 11390 } 11391 IP6_STAT(ip6_frag_mdt_addpdescfail); 11392 /* Free unattached payload message blocks as well */ 11393 md_mp->b_cont = mp->b_cont; 11394 goto free_mmd; 11395 } 11396 11397 /* Advance fragment offset. */ 11398 offset += mlen; 11399 11400 /* Advance to location for next header in the buffer. */ 11401 hdr_ptr += hdr_chunk_len; 11402 11403 /* Did we reach the next payload message block? */ 11404 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11405 mp = mp->b_cont; 11406 /* 11407 * Attach the next message block with payload 11408 * data to the multidata message. 11409 */ 11410 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11411 goto pbuf_panic; 11412 pld_ptr = mp->b_rptr; 11413 } 11414 } 11415 11416 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11417 ASSERT(mp->b_wptr == pld_ptr); 11418 11419 /* Update IP statistics */ 11420 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutFragCreates, pkts); 11421 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11422 IP6_STAT_UPDATE(ip6_frag_mdt_pkt_out, pkts); 11423 11424 ire->ire_ob_pkt_count += pkts; 11425 if (ire->ire_ipif != NULL) 11426 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11427 11428 ire->ire_last_used_time = lbolt; 11429 /* Send it down */ 11430 putnext(stq, md_mp); 11431 return; 11432 11433 pbuf_panic: 11434 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11435 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11436 pbuf_idx); 11437 /* NOTREACHED */ 11438 } 11439 11440 /* 11441 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11442 * We have not optimized this in terms of number of mblks 11443 * allocated. For instance, for each fragment sent we always allocate a 11444 * mblk to hold the IPv6 header and fragment header. 11445 * 11446 * Assumes that all the extension headers are contained in the first mblk. 11447 * 11448 * The fragment header is inserted after an hop-by-hop options header 11449 * and after [an optional destinations header followed by] a routing header. 11450 * 11451 * NOTE : This function does not ire_refrele the ire passed in as 11452 * the argument. 11453 */ 11454 void 11455 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11456 int caller, int max_frag) 11457 { 11458 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11459 ip6_t *fip6h; 11460 mblk_t *hmp; 11461 mblk_t *hmp0; 11462 mblk_t *dmp; 11463 ip6_frag_t *fraghdr; 11464 size_t unfragmentable_len; 11465 size_t len; 11466 size_t mlen; 11467 size_t max_chunk; 11468 uint32_t ident; 11469 uint16_t off_flags; 11470 uint16_t offset = 0; 11471 ill_t *ill; 11472 uint8_t nexthdr; 11473 uint_t prev_nexthdr_offset; 11474 uint8_t *ptr; 11475 11476 ASSERT(ire->ire_type == IRE_CACHE); 11477 ill = (ill_t *)ire->ire_stq->q_ptr; 11478 11479 /* 11480 * Determine the length of the unfragmentable portion of this 11481 * datagram. This consists of the IPv6 header, a potential 11482 * hop-by-hop options header, a potential pre-routing-header 11483 * destination options header, and a potential routing header. 11484 */ 11485 nexthdr = ip6h->ip6_nxt; 11486 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11487 ptr = (uint8_t *)&ip6h[1]; 11488 11489 if (nexthdr == IPPROTO_HOPOPTS) { 11490 ip6_hbh_t *hbh_hdr; 11491 uint_t hdr_len; 11492 11493 hbh_hdr = (ip6_hbh_t *)ptr; 11494 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11495 nexthdr = hbh_hdr->ip6h_nxt; 11496 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11497 - (uint8_t *)ip6h; 11498 ptr += hdr_len; 11499 } 11500 if (nexthdr == IPPROTO_DSTOPTS) { 11501 ip6_dest_t *dest_hdr; 11502 uint_t hdr_len; 11503 11504 dest_hdr = (ip6_dest_t *)ptr; 11505 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11506 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11507 nexthdr = dest_hdr->ip6d_nxt; 11508 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11509 - (uint8_t *)ip6h; 11510 ptr += hdr_len; 11511 } 11512 } 11513 if (nexthdr == IPPROTO_ROUTING) { 11514 ip6_rthdr_t *rthdr; 11515 uint_t hdr_len; 11516 11517 rthdr = (ip6_rthdr_t *)ptr; 11518 nexthdr = rthdr->ip6r_nxt; 11519 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11520 - (uint8_t *)ip6h; 11521 hdr_len = 8 * (rthdr->ip6r_len + 1); 11522 ptr += hdr_len; 11523 } 11524 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11525 11526 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11527 sizeof (ip6_frag_t)) & ~7; 11528 11529 /* Check if we can use MDT to send out the frags. */ 11530 ASSERT(!IRE_IS_LOCAL(ire)); 11531 if (ip_multidata_outbound && reachable == 0 && 11532 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11533 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11534 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11535 nexthdr, prev_nexthdr_offset); 11536 return; 11537 } 11538 11539 /* 11540 * Allocate an mblk with enough room for the link-layer 11541 * header, the unfragmentable part of the datagram, and the 11542 * fragment header. This (or a copy) will be used as the 11543 * first mblk for each fragment we send. 11544 */ 11545 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 11546 BPRI_HI); 11547 if (hmp == NULL) { 11548 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11549 freemsg(mp); 11550 return; 11551 } 11552 hmp->b_rptr += ip_wroff_extra; 11553 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11554 11555 fip6h = (ip6_t *)hmp->b_rptr; 11556 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11557 11558 bcopy(ip6h, fip6h, unfragmentable_len); 11559 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11560 11561 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11562 11563 fraghdr->ip6f_nxt = nexthdr; 11564 fraghdr->ip6f_reserved = 0; 11565 fraghdr->ip6f_offlg = 0; 11566 fraghdr->ip6f_ident = htonl(ident); 11567 11568 /* 11569 * len is the total length of the fragmentable data in this 11570 * datagram. For each fragment sent, we will decrement len 11571 * by the amount of fragmentable data sent in that fragment 11572 * until len reaches zero. 11573 */ 11574 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11575 11576 /* 11577 * Move read ptr past unfragmentable portion, we don't want this part 11578 * of the data in our fragments. 11579 */ 11580 mp->b_rptr += unfragmentable_len; 11581 11582 while (len != 0) { 11583 mlen = MIN(len, max_chunk); 11584 len -= mlen; 11585 if (len != 0) { 11586 /* Not last */ 11587 hmp0 = copyb(hmp); 11588 if (hmp0 == NULL) { 11589 freeb(hmp); 11590 freemsg(mp); 11591 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11592 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11593 return; 11594 } 11595 off_flags = IP6F_MORE_FRAG; 11596 } else { 11597 /* Last fragment */ 11598 hmp0 = hmp; 11599 hmp = NULL; 11600 off_flags = 0; 11601 } 11602 fip6h = (ip6_t *)(hmp0->b_rptr); 11603 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11604 11605 fip6h->ip6_plen = htons((uint16_t)(mlen + 11606 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11607 /* 11608 * Note: Optimization alert. 11609 * In IPv6 (and IPv4) protocol header, Fragment Offset 11610 * ("offset") is 13 bits wide and in 8-octet units. 11611 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11612 * it occupies the most significant 13 bits. 11613 * (least significant 13 bits in IPv4). 11614 * We do not do any shifts here. Not shifting is same effect 11615 * as taking offset value in octet units, dividing by 8 and 11616 * then shifting 3 bits left to line it up in place in proper 11617 * place protocol header. 11618 */ 11619 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11620 11621 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11622 /* mp has already been freed by ip_carve_mp() */ 11623 if (hmp != NULL) 11624 freeb(hmp); 11625 freeb(hmp0); 11626 ip1dbg(("ip_carve_mp: failed\n")); 11627 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11628 return; 11629 } 11630 hmp0->b_cont = dmp; 11631 /* Get the priority marking, if any */ 11632 hmp0->b_band = dmp->b_band; 11633 UPDATE_OB_PKT_COUNT(ire); 11634 ire->ire_last_used_time = lbolt; 11635 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11636 caller, NULL); 11637 reachable = 0; /* No need to redo state machine in loop */ 11638 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragCreates); 11639 offset += mlen; 11640 } 11641 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11642 } 11643 11644 /* 11645 * Determine if the ill and multicast aspects of that packets 11646 * "matches" the conn. 11647 */ 11648 boolean_t 11649 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11650 zoneid_t zoneid) 11651 { 11652 ill_t *in_ill; 11653 boolean_t wantpacket = B_TRUE; 11654 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11655 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11656 11657 /* 11658 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11659 * unicast and multicast reception to conn_incoming_ill. 11660 * conn_wantpacket_v6 is called both for unicast and 11661 * multicast. 11662 * 11663 * 1) The unicast copy of the packet can come anywhere in 11664 * the ill group if it is part of the group. Thus, we 11665 * need to check to see whether the ill group matches 11666 * if in_ill is part of a group. 11667 * 11668 * 2) ip_rput does not suppress duplicate multicast packets. 11669 * If there are two interfaces in a ill group and we have 11670 * 2 applications (conns) joined a multicast group G on 11671 * both the interfaces, ilm_lookup_ill filter in ip_rput 11672 * will give us two packets because we join G on both the 11673 * interfaces rather than nominating just one interface 11674 * for receiving multicast like broadcast above. So, 11675 * we have to call ilg_lookup_ill to filter out duplicate 11676 * copies, if ill is part of a group, to supress duplicates. 11677 */ 11678 in_ill = connp->conn_incoming_ill; 11679 if (in_ill != NULL) { 11680 mutex_enter(&connp->conn_lock); 11681 in_ill = connp->conn_incoming_ill; 11682 mutex_enter(&ill->ill_lock); 11683 /* 11684 * No IPMP, and the packet did not arrive on conn_incoming_ill 11685 * OR, IPMP in use and the packet arrived on an IPMP group 11686 * different from the conn_incoming_ill's IPMP group. 11687 * Reject the packet. 11688 */ 11689 if ((in_ill->ill_group == NULL && in_ill != ill) || 11690 (in_ill->ill_group != NULL && 11691 in_ill->ill_group != ill->ill_group)) { 11692 wantpacket = B_FALSE; 11693 } 11694 mutex_exit(&ill->ill_lock); 11695 mutex_exit(&connp->conn_lock); 11696 if (!wantpacket) 11697 return (B_FALSE); 11698 } 11699 11700 if (connp->conn_multi_router) 11701 return (B_TRUE); 11702 11703 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11704 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11705 /* 11706 * Unicast case: we match the conn only if it's in the specified 11707 * zone. 11708 */ 11709 return (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES); 11710 } 11711 11712 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11713 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11714 /* 11715 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11716 * disabled, therefore we don't dispatch the multicast packet to 11717 * the sending zone. 11718 */ 11719 return (B_FALSE); 11720 } 11721 11722 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 11723 connp->conn_zoneid != zoneid && zoneid != ALL_ZONES) { 11724 /* 11725 * Multicast packet on the loopback interface: we only match 11726 * conns who joined the group in the specified zone. 11727 */ 11728 return (B_FALSE); 11729 } 11730 11731 mutex_enter(&connp->conn_lock); 11732 wantpacket = 11733 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11734 mutex_exit(&connp->conn_lock); 11735 11736 return (wantpacket); 11737 } 11738 11739 11740 /* 11741 * Transmit a packet and update any NUD state based on the flags 11742 * XXX need to "recover" any ip6i_t when doing putq! 11743 * 11744 * NOTE : This function does not ire_refrele the ire passed in as the 11745 * argument. 11746 */ 11747 void 11748 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11749 int caller, ipsec_out_t *io) 11750 { 11751 mblk_t *mp1; 11752 nce_t *nce = ire->ire_nce; 11753 ill_t *ill; 11754 uint64_t delta; 11755 ip6_t *ip6h; 11756 queue_t *stq = ire->ire_stq; 11757 ire_t *ire1 = NULL; 11758 ire_t *save_ire = ire; 11759 boolean_t multirt_send = B_FALSE; 11760 mblk_t *next_mp = NULL; 11761 11762 ip6h = (ip6_t *)mp->b_rptr; 11763 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11764 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11765 ASSERT(nce != NULL); 11766 ASSERT(mp->b_datap->db_type == M_DATA); 11767 ASSERT(stq != NULL); 11768 11769 ill = ire_to_ill(ire); 11770 if (!ill) { 11771 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11772 freemsg(mp); 11773 return; 11774 } 11775 11776 /* 11777 * If a packet is to be sent out an interface that is a 6to4 11778 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11779 * destination, must be checked to have a 6to4 prefix 11780 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11781 * address configured on the sending interface. Otherwise, 11782 * the packet was delivered to this interface in error and the 11783 * packet must be dropped. 11784 */ 11785 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11786 ipif_t *ipif = ill->ill_ipif; 11787 11788 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11789 &ip6h->ip6_dst)) { 11790 if (ip_debug > 2) { 11791 /* ip1dbg */ 11792 pr_addr_dbg("ip_xmit_v6: attempting to " 11793 "send 6to4 addressed IPv6 " 11794 "destination (%s) out the wrong " 11795 "interface.\n", AF_INET6, 11796 &ip6h->ip6_dst); 11797 } 11798 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 11799 freemsg(mp); 11800 return; 11801 } 11802 } 11803 11804 /* Flow-control check has been done in ip_wput_ire_v6 */ 11805 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11806 caller == IP_WSRV || canput(stq->q_next)) { 11807 uint32_t ill_index; 11808 11809 /* 11810 * In most cases, the emission loop below is entered only 11811 * once. Only in the case where the ire holds the 11812 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11813 * flagged ires in the bucket, and send the packet 11814 * through all crossed RTF_MULTIRT routes. 11815 */ 11816 if (ire->ire_flags & RTF_MULTIRT) { 11817 /* 11818 * Multirouting case. The bucket where ire is stored 11819 * probably holds other RTF_MULTIRT flagged ires 11820 * to the destination. In this call to ip_xmit_v6, 11821 * we attempt to send the packet through all 11822 * those ires. Thus, we first ensure that ire is the 11823 * first RTF_MULTIRT ire in the bucket, 11824 * before walking the ire list. 11825 */ 11826 ire_t *first_ire; 11827 irb_t *irb = ire->ire_bucket; 11828 ASSERT(irb != NULL); 11829 multirt_send = B_TRUE; 11830 11831 /* Make sure we do not omit any multiroute ire. */ 11832 IRB_REFHOLD(irb); 11833 for (first_ire = irb->irb_ire; 11834 first_ire != NULL; 11835 first_ire = first_ire->ire_next) { 11836 if ((first_ire->ire_flags & RTF_MULTIRT) && 11837 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11838 &ire->ire_addr_v6)) && 11839 !(first_ire->ire_marks & 11840 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 11841 break; 11842 } 11843 11844 if ((first_ire != NULL) && (first_ire != ire)) { 11845 IRE_REFHOLD(first_ire); 11846 /* ire will be released by the caller */ 11847 ire = first_ire; 11848 nce = ire->ire_nce; 11849 stq = ire->ire_stq; 11850 ill = ire_to_ill(ire); 11851 } 11852 IRB_REFRELE(irb); 11853 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11854 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11855 ILL_MDT_USABLE(ill)) { 11856 /* 11857 * This tcp connection was marked as MDT-capable, but 11858 * it has been turned off due changes in the interface. 11859 * Now that the interface support is back, turn it on 11860 * by notifying tcp. We don't directly modify tcp_mdt, 11861 * since we leave all the details to the tcp code that 11862 * knows better. 11863 */ 11864 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11865 11866 if (mdimp == NULL) { 11867 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11868 "connp %p (ENOMEM)\n", (void *)connp)); 11869 } else { 11870 CONN_INC_REF(connp); 11871 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 11872 connp, SQTAG_TCP_INPUT_MCTL); 11873 } 11874 } 11875 11876 do { 11877 boolean_t qos_done = B_FALSE; 11878 11879 if (multirt_send) { 11880 irb_t *irb; 11881 /* 11882 * We are in a multiple send case, need to get 11883 * the next ire and make a duplicate of the 11884 * packet. ire1 holds here the next ire to 11885 * process in the bucket. If multirouting is 11886 * expected, any non-RTF_MULTIRT ire that has 11887 * the right destination address is ignored. 11888 */ 11889 irb = ire->ire_bucket; 11890 ASSERT(irb != NULL); 11891 11892 IRB_REFHOLD(irb); 11893 for (ire1 = ire->ire_next; 11894 ire1 != NULL; 11895 ire1 = ire1->ire_next) { 11896 if (!(ire1->ire_flags & RTF_MULTIRT)) 11897 continue; 11898 if (!IN6_ARE_ADDR_EQUAL( 11899 &ire1->ire_addr_v6, 11900 &ire->ire_addr_v6)) 11901 continue; 11902 if (ire1->ire_marks & 11903 (IRE_MARK_CONDEMNED| 11904 IRE_MARK_HIDDEN)) 11905 continue; 11906 11907 /* Got one */ 11908 if (ire1 != save_ire) { 11909 IRE_REFHOLD(ire1); 11910 } 11911 break; 11912 } 11913 IRB_REFRELE(irb); 11914 11915 if (ire1 != NULL) { 11916 next_mp = copyb(mp); 11917 if ((next_mp == NULL) || 11918 ((mp->b_cont != NULL) && 11919 ((next_mp->b_cont = 11920 dupmsg(mp->b_cont)) == 11921 NULL))) { 11922 freemsg(next_mp); 11923 next_mp = NULL; 11924 ire_refrele(ire1); 11925 ire1 = NULL; 11926 } 11927 } 11928 11929 /* Last multiroute ire; don't loop anymore. */ 11930 if (ire1 == NULL) { 11931 multirt_send = B_FALSE; 11932 } 11933 } 11934 11935 ill_index = 11936 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11937 11938 /* 11939 * Check for fastpath, we need to hold nce_lock to 11940 * prevent fastpath update from chaining nce_fp_mp. 11941 */ 11942 11943 ASSERT(nce->nce_ipversion != IPV4_VERSION); 11944 mutex_enter(&nce->nce_lock); 11945 if ((mp1 = nce->nce_fp_mp) != NULL) { 11946 uint32_t hlen; 11947 uchar_t *rptr; 11948 11949 /* Initiate IPPF processing */ 11950 if (IP6_OUT_IPP(flags)) { 11951 /* 11952 * We have to release the nce lock since 11953 * IPPF components use 11954 * ill_lookup_on_ifindex(), 11955 * which takes the ill_g_lock and the 11956 * ill_lock locks. 11957 */ 11958 mutex_exit(&nce->nce_lock); 11959 ip_process(IPP_LOCAL_OUT, &mp, 11960 ill_index); 11961 if (mp == NULL) { 11962 BUMP_MIB( 11963 ill->ill_ip6_mib, 11964 ipv6OutDiscards); 11965 if (next_mp != NULL) 11966 freemsg(next_mp); 11967 if (ire != save_ire) { 11968 ire_refrele(ire); 11969 } 11970 return; 11971 } 11972 mutex_enter(&nce->nce_lock); 11973 if ((mp1 = nce->nce_fp_mp) == NULL) { 11974 /* 11975 * Probably disappeared during 11976 * IPQoS processing. 11977 */ 11978 qos_done = B_TRUE; 11979 goto prepend_unitdata; 11980 } 11981 } 11982 hlen = MBLKL(mp1); 11983 rptr = mp->b_rptr - hlen; 11984 /* 11985 * make sure there is room for the fastpath 11986 * datalink header 11987 */ 11988 if (rptr < mp->b_datap->db_base) { 11989 mp1 = copyb(mp1); 11990 if (mp1 == NULL) { 11991 mutex_exit(&nce->nce_lock); 11992 BUMP_MIB(ill->ill_ip6_mib, 11993 ipv6OutDiscards); 11994 freemsg(mp); 11995 if (next_mp != NULL) 11996 freemsg(next_mp); 11997 if (ire != save_ire) { 11998 ire_refrele(ire); 11999 } 12000 return; 12001 } 12002 mp1->b_cont = mp; 12003 12004 /* Get the priority marking, if any */ 12005 mp1->b_band = mp->b_band; 12006 mp = mp1; 12007 } else { 12008 mp->b_rptr = rptr; 12009 /* 12010 * fastpath - pre-pend datalink 12011 * header 12012 */ 12013 bcopy(mp1->b_rptr, rptr, hlen); 12014 } 12015 12016 mutex_exit(&nce->nce_lock); 12017 12018 } else { 12019 prepend_unitdata: 12020 mutex_exit(&nce->nce_lock); 12021 mp1 = nce->nce_res_mp; 12022 if (mp1 == NULL) { 12023 ip1dbg(("ip_xmit_v6: No resolution " 12024 "block ire = %p\n", (void *)ire)); 12025 freemsg(mp); 12026 if (next_mp != NULL) 12027 freemsg(next_mp); 12028 if (ire != save_ire) { 12029 ire_refrele(ire); 12030 } 12031 return; 12032 } 12033 /* 12034 * Prepend the DL_UNITDATA_REQ. 12035 */ 12036 mp1 = copyb(mp1); 12037 if (mp1 == NULL) { 12038 BUMP_MIB(ill->ill_ip6_mib, 12039 ipv6OutDiscards); 12040 freemsg(mp); 12041 if (next_mp != NULL) 12042 freemsg(next_mp); 12043 if (ire != save_ire) { 12044 ire_refrele(ire); 12045 } 12046 return; 12047 } 12048 mp1->b_cont = mp; 12049 mp = mp1; 12050 /* 12051 * Initiate IPPF processing, if it is 12052 * already done, bypass. 12053 */ 12054 if (!qos_done && IP6_OUT_IPP(flags)) { 12055 ip_process(IPP_LOCAL_OUT, &mp, 12056 ill_index); 12057 if (mp == NULL) { 12058 BUMP_MIB(ill->ill_ip6_mib, 12059 ipv6OutDiscards); 12060 if (next_mp != NULL) 12061 freemsg(next_mp); 12062 if (ire != save_ire) { 12063 ire_refrele(ire); 12064 } 12065 return; 12066 } 12067 } 12068 } 12069 12070 /* 12071 * Update ire counters; for save_ire, this has been 12072 * done by the caller. 12073 */ 12074 if (ire != save_ire) { 12075 UPDATE_OB_PKT_COUNT(ire); 12076 ire->ire_last_used_time = lbolt; 12077 } 12078 12079 /* 12080 * Send it down. XXX Do we want to flow control AH/ESP 12081 * packets that carry TCP payloads? We don't flow 12082 * control TCP packets, but we should also not 12083 * flow-control TCP packets that have been protected. 12084 * We don't have an easy way to find out if an AH/ESP 12085 * packet was originally TCP or not currently. 12086 */ 12087 if (io == NULL) { 12088 putnext(stq, mp); 12089 } else { 12090 /* 12091 * Safety Pup says: make sure this is 12092 * going to the right interface! 12093 */ 12094 if (io->ipsec_out_capab_ill_index != 12095 ill_index) { 12096 /* IPsec kstats: bump lose counter */ 12097 freemsg(mp1); 12098 } else { 12099 ipsec_hw_putnext(stq, mp); 12100 } 12101 } 12102 12103 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12104 if (ire != save_ire) { 12105 ire_refrele(ire); 12106 } 12107 if (multirt_send) { 12108 ASSERT(ire1 != NULL); 12109 /* 12110 * Proceed with the next RTF_MULTIRT 12111 * ire, also set up the send-to queue 12112 * accordingly. 12113 */ 12114 ire = ire1; 12115 ire1 = NULL; 12116 stq = ire->ire_stq; 12117 nce = ire->ire_nce; 12118 ill = ire_to_ill(ire); 12119 mp = next_mp; 12120 next_mp = NULL; 12121 continue; 12122 } 12123 ASSERT(next_mp == NULL); 12124 ASSERT(ire1 == NULL); 12125 return; 12126 } 12127 12128 ASSERT(nce->nce_state != ND_INCOMPLETE); 12129 12130 /* 12131 * Check for upper layer advice 12132 */ 12133 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12134 /* 12135 * It should be o.k. to check the state without 12136 * a lock here, at most we lose an advice. 12137 */ 12138 nce->nce_last = TICK_TO_MSEC(lbolt64); 12139 if (nce->nce_state != ND_REACHABLE) { 12140 12141 mutex_enter(&nce->nce_lock); 12142 nce->nce_state = ND_REACHABLE; 12143 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12144 mutex_exit(&nce->nce_lock); 12145 (void) untimeout(nce->nce_timeout_id); 12146 if (ip_debug > 2) { 12147 /* ip1dbg */ 12148 pr_addr_dbg("ip_xmit_v6: state" 12149 " for %s changed to" 12150 " REACHABLE\n", AF_INET6, 12151 &ire->ire_addr_v6); 12152 } 12153 } 12154 if (ire != save_ire) { 12155 ire_refrele(ire); 12156 } 12157 if (multirt_send) { 12158 ASSERT(ire1 != NULL); 12159 /* 12160 * Proceed with the next RTF_MULTIRT 12161 * ire, also set up the send-to queue 12162 * accordingly. 12163 */ 12164 ire = ire1; 12165 ire1 = NULL; 12166 stq = ire->ire_stq; 12167 nce = ire->ire_nce; 12168 ill = ire_to_ill(ire); 12169 mp = next_mp; 12170 next_mp = NULL; 12171 continue; 12172 } 12173 ASSERT(next_mp == NULL); 12174 ASSERT(ire1 == NULL); 12175 return; 12176 } 12177 12178 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12179 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12180 " ill_reachable_time = %d \n", delta, 12181 ill->ill_reachable_time)); 12182 if (delta > (uint64_t)ill->ill_reachable_time) { 12183 nce = ire->ire_nce; 12184 mutex_enter(&nce->nce_lock); 12185 switch (nce->nce_state) { 12186 case ND_REACHABLE: 12187 case ND_STALE: 12188 /* 12189 * ND_REACHABLE is identical to 12190 * ND_STALE in this specific case. If 12191 * reachable time has expired for this 12192 * neighbor (delta is greater than 12193 * reachable time), conceptually, the 12194 * neighbor cache is no longer in 12195 * REACHABLE state, but already in 12196 * STALE state. So the correct 12197 * transition here is to ND_DELAY. 12198 */ 12199 nce->nce_state = ND_DELAY; 12200 mutex_exit(&nce->nce_lock); 12201 NDP_RESTART_TIMER(nce, 12202 delay_first_probe_time); 12203 if (ip_debug > 3) { 12204 /* ip2dbg */ 12205 pr_addr_dbg("ip_xmit_v6: state" 12206 " for %s changed to" 12207 " DELAY\n", AF_INET6, 12208 &ire->ire_addr_v6); 12209 } 12210 break; 12211 case ND_DELAY: 12212 case ND_PROBE: 12213 mutex_exit(&nce->nce_lock); 12214 /* Timers have already started */ 12215 break; 12216 case ND_UNREACHABLE: 12217 /* 12218 * ndp timer has detected that this nce 12219 * is unreachable and initiated deleting 12220 * this nce and all its associated IREs. 12221 * This is a race where we found the 12222 * ire before it was deleted and have 12223 * just sent out a packet using this 12224 * unreachable nce. 12225 */ 12226 mutex_exit(&nce->nce_lock); 12227 break; 12228 default: 12229 ASSERT(0); 12230 } 12231 } 12232 12233 if (multirt_send) { 12234 ASSERT(ire1 != NULL); 12235 /* 12236 * Proceed with the next RTF_MULTIRT ire, 12237 * Also set up the send-to queue accordingly. 12238 */ 12239 if (ire != save_ire) { 12240 ire_refrele(ire); 12241 } 12242 ire = ire1; 12243 ire1 = NULL; 12244 stq = ire->ire_stq; 12245 nce = ire->ire_nce; 12246 ill = ire_to_ill(ire); 12247 mp = next_mp; 12248 next_mp = NULL; 12249 } 12250 } while (multirt_send); 12251 /* 12252 * In the multirouting case, release the last ire used for 12253 * emission. save_ire will be released by the caller. 12254 */ 12255 if (ire != save_ire) { 12256 ire_refrele(ire); 12257 } 12258 } else { 12259 /* 12260 * Queue packet if we have an conn to give back pressure. 12261 * We can't queue packets intended for hardware acceleration 12262 * since we've tossed that state already. If the packet is 12263 * being fed back from ire_send_v6, we don't know the 12264 * position in the queue to enqueue the packet and we discard 12265 * the packet. 12266 */ 12267 if (ip_output_queue && (connp != NULL) && (io == NULL) && 12268 (caller != IRE_SEND)) { 12269 if (caller == IP_WSRV) { 12270 connp->conn_did_putbq = 1; 12271 (void) putbq(connp->conn_wq, mp); 12272 conn_drain_insert(connp); 12273 /* 12274 * caller == IP_WSRV implies we are 12275 * the service thread, and the 12276 * queue is already noenabled. 12277 * The check for canput and 12278 * the putbq is not atomic. 12279 * So we need to check again. 12280 */ 12281 if (canput(stq->q_next)) 12282 connp->conn_did_putbq = 0; 12283 } else { 12284 (void) putq(connp->conn_wq, mp); 12285 } 12286 return; 12287 } 12288 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 12289 freemsg(mp); 12290 return; 12291 } 12292 } 12293 12294 /* 12295 * pr_addr_dbg function provides the needed buffer space to call 12296 * inet_ntop() function's 3rd argument. This function should be 12297 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12298 * stack buffer space in it's own stack frame. This function uses 12299 * a buffer from it's own stack and prints the information. 12300 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12301 * 12302 * Note: This function can call inet_ntop() once. 12303 */ 12304 void 12305 pr_addr_dbg(char *fmt1, int af, const void *addr) 12306 { 12307 char buf[INET6_ADDRSTRLEN]; 12308 12309 if (fmt1 == NULL) { 12310 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12311 return; 12312 } 12313 12314 /* 12315 * This does not compare debug level and just prints 12316 * out. Thus it is the responsibility of the caller 12317 * to check the appropriate debug-level before calling 12318 * this function. 12319 */ 12320 if (ip_debug > 0) { 12321 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12322 } 12323 12324 12325 } 12326 12327 12328 /* 12329 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12330 * if needed and extension headers) that will be needed based on the 12331 * ip6_pkt_t structure passed by the caller. 12332 * 12333 * The returned length does not include the length of the upper level 12334 * protocol (ULP) header. 12335 */ 12336 int 12337 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12338 { 12339 int len; 12340 12341 len = IPV6_HDR_LEN; 12342 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12343 len += sizeof (ip6i_t); 12344 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12345 ASSERT(ipp->ipp_hopoptslen != 0); 12346 len += ipp->ipp_hopoptslen; 12347 } 12348 if (ipp->ipp_fields & IPPF_RTHDR) { 12349 ASSERT(ipp->ipp_rthdrlen != 0); 12350 len += ipp->ipp_rthdrlen; 12351 } 12352 /* 12353 * En-route destination options 12354 * Only do them if there's a routing header as well 12355 */ 12356 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12357 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12358 ASSERT(ipp->ipp_rtdstoptslen != 0); 12359 len += ipp->ipp_rtdstoptslen; 12360 } 12361 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12362 ASSERT(ipp->ipp_dstoptslen != 0); 12363 len += ipp->ipp_dstoptslen; 12364 } 12365 return (len); 12366 } 12367 12368 /* 12369 * All-purpose routine to build a header chain of an IPv6 header 12370 * followed by any required extension headers and a proto header, 12371 * preceeded (where necessary) by an ip6i_t private header. 12372 * 12373 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12374 * will be filled in appropriately. 12375 * Thus the caller must fill in the rest of the IPv6 header, such as 12376 * traffic class/flowid, source address (if not set here), hoplimit (if not 12377 * set here) and destination address. 12378 * 12379 * The extension headers and ip6i_t header will all be fully filled in. 12380 */ 12381 void 12382 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12383 ip6_pkt_t *ipp, uint8_t protocol) 12384 { 12385 uint8_t *nxthdr_ptr; 12386 uint8_t *cp; 12387 ip6i_t *ip6i; 12388 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12389 12390 /* 12391 * If sending private ip6i_t header down (checksum info, nexthop, 12392 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12393 * then fill it in. (The checksum info will be filled in by icmp). 12394 */ 12395 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12396 ip6i = (ip6i_t *)ip6h; 12397 ip6h = (ip6_t *)&ip6i[1]; 12398 12399 ip6i->ip6i_flags = 0; 12400 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12401 if (ipp->ipp_fields & IPPF_IFINDEX || 12402 ipp->ipp_fields & IPPF_SCOPE_ID) { 12403 ASSERT(ipp->ipp_ifindex != 0); 12404 ip6i->ip6i_flags |= IP6I_IFINDEX; 12405 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12406 } 12407 if (ipp->ipp_fields & IPPF_ADDR) { 12408 /* 12409 * Enable per-packet source address verification if 12410 * IPV6_PKTINFO specified the source address. 12411 * ip6_src is set in the transport's _wput function. 12412 */ 12413 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12414 &ipp->ipp_addr)); 12415 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12416 } 12417 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12418 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12419 /* 12420 * We need to set this flag so that IP doesn't 12421 * rewrite the IPv6 header's hoplimit with the 12422 * current default value. 12423 */ 12424 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12425 } 12426 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12427 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12428 &ipp->ipp_nexthop)); 12429 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12430 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12431 } 12432 /* 12433 * tell IP this is an ip6i_t private header 12434 */ 12435 ip6i->ip6i_nxt = IPPROTO_RAW; 12436 } 12437 /* Initialize IPv6 header */ 12438 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12439 if (ipp->ipp_fields & IPPF_TCLASS) { 12440 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12441 (ipp->ipp_tclass << 20); 12442 } 12443 if (ipp->ipp_fields & IPPF_ADDR) 12444 ip6h->ip6_src = ipp->ipp_addr; 12445 12446 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12447 cp = (uint8_t *)&ip6h[1]; 12448 /* 12449 * Here's where we have to start stringing together 12450 * any extension headers in the right order: 12451 * Hop-by-hop, destination, routing, and final destination opts. 12452 */ 12453 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12454 /* Hop-by-hop options */ 12455 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12456 12457 *nxthdr_ptr = IPPROTO_HOPOPTS; 12458 nxthdr_ptr = &hbh->ip6h_nxt; 12459 12460 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12461 cp += ipp->ipp_hopoptslen; 12462 } 12463 /* 12464 * En-route destination options 12465 * Only do them if there's a routing header as well 12466 */ 12467 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12468 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12469 ip6_dest_t *dst = (ip6_dest_t *)cp; 12470 12471 *nxthdr_ptr = IPPROTO_DSTOPTS; 12472 nxthdr_ptr = &dst->ip6d_nxt; 12473 12474 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12475 cp += ipp->ipp_rtdstoptslen; 12476 } 12477 /* 12478 * Routing header next 12479 */ 12480 if (ipp->ipp_fields & IPPF_RTHDR) { 12481 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12482 12483 *nxthdr_ptr = IPPROTO_ROUTING; 12484 nxthdr_ptr = &rt->ip6r_nxt; 12485 12486 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12487 cp += ipp->ipp_rthdrlen; 12488 } 12489 /* 12490 * Do ultimate destination options 12491 */ 12492 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12493 ip6_dest_t *dest = (ip6_dest_t *)cp; 12494 12495 *nxthdr_ptr = IPPROTO_DSTOPTS; 12496 nxthdr_ptr = &dest->ip6d_nxt; 12497 12498 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12499 cp += ipp->ipp_dstoptslen; 12500 } 12501 /* 12502 * Now set the last header pointer to the proto passed in 12503 */ 12504 *nxthdr_ptr = protocol; 12505 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12506 } 12507 12508 /* 12509 * Return a pointer to the routing header extension header 12510 * in the IPv6 header(s) chain passed in. 12511 * If none found, return NULL 12512 * Assumes that all extension headers are in same mblk as the v6 header 12513 */ 12514 ip6_rthdr_t * 12515 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12516 { 12517 ip6_dest_t *desthdr; 12518 ip6_frag_t *fraghdr; 12519 uint_t hdrlen; 12520 uint8_t nexthdr; 12521 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12522 12523 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12524 return ((ip6_rthdr_t *)ptr); 12525 12526 /* 12527 * The routing header will precede all extension headers 12528 * other than the hop-by-hop and destination options 12529 * extension headers, so if we see anything other than those, 12530 * we're done and didn't find it. 12531 * We could see a destination options header alone but no 12532 * routing header, in which case we'll return NULL as soon as 12533 * we see anything after that. 12534 * Hop-by-hop and destination option headers are identical, 12535 * so we can use either one we want as a template. 12536 */ 12537 nexthdr = ip6h->ip6_nxt; 12538 while (ptr < endptr) { 12539 /* Is there enough left for len + nexthdr? */ 12540 if (ptr + MIN_EHDR_LEN > endptr) 12541 return (NULL); 12542 12543 switch (nexthdr) { 12544 case IPPROTO_HOPOPTS: 12545 case IPPROTO_DSTOPTS: 12546 /* Assumes the headers are identical for hbh and dst */ 12547 desthdr = (ip6_dest_t *)ptr; 12548 hdrlen = 8 * (desthdr->ip6d_len + 1); 12549 nexthdr = desthdr->ip6d_nxt; 12550 break; 12551 12552 case IPPROTO_ROUTING: 12553 return ((ip6_rthdr_t *)ptr); 12554 12555 case IPPROTO_FRAGMENT: 12556 fraghdr = (ip6_frag_t *)ptr; 12557 hdrlen = sizeof (ip6_frag_t); 12558 nexthdr = fraghdr->ip6f_nxt; 12559 break; 12560 12561 default: 12562 return (NULL); 12563 } 12564 ptr += hdrlen; 12565 } 12566 return (NULL); 12567 } 12568 12569 /* 12570 * Called for source-routed packets originating on this node. 12571 * Manipulates the original routing header by moving every entry up 12572 * one slot, placing the first entry in the v6 header's v6_dst field, 12573 * and placing the ultimate destination in the routing header's last 12574 * slot. 12575 * 12576 * Returns the checksum diference between the ultimate destination 12577 * (last hop in the routing header when the packet is sent) and 12578 * the first hop (ip6_dst when the packet is sent) 12579 */ 12580 uint32_t 12581 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 12582 { 12583 uint_t numaddr; 12584 uint_t i; 12585 in6_addr_t *addrptr; 12586 in6_addr_t tmp; 12587 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12588 uint32_t cksm; 12589 uint32_t addrsum = 0; 12590 uint16_t *ptr; 12591 12592 /* 12593 * Perform any processing needed for source routing. 12594 * We know that all extension headers will be in the same mblk 12595 * as the IPv6 header. 12596 */ 12597 12598 /* 12599 * If no segments left in header, or the header length field is zero, 12600 * don't move hop addresses around; 12601 * Checksum difference is zero. 12602 */ 12603 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12604 return (0); 12605 12606 ptr = (uint16_t *)&ip6h->ip6_dst; 12607 cksm = 0; 12608 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12609 cksm += ptr[i]; 12610 } 12611 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12612 12613 /* 12614 * Here's where the fun begins - we have to 12615 * move all addresses up one spot, take the 12616 * first hop and make it our first ip6_dst, 12617 * and place the ultimate destination in the 12618 * newly-opened last slot. 12619 */ 12620 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12621 numaddr = rthdr->ip6r0_len / 2; 12622 tmp = *addrptr; 12623 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12624 *addrptr = addrptr[1]; 12625 } 12626 *addrptr = ip6h->ip6_dst; 12627 ip6h->ip6_dst = tmp; 12628 12629 /* 12630 * From the checksummed ultimate destination subtract the checksummed 12631 * current ip6_dst (the first hop address). Return that number. 12632 * (In the v4 case, the second part of this is done in each routine 12633 * that calls ip_massage_options(). We do it all in this one place 12634 * for v6). 12635 */ 12636 ptr = (uint16_t *)&ip6h->ip6_dst; 12637 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12638 addrsum += ptr[i]; 12639 } 12640 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12641 if ((int)cksm < 0) 12642 cksm--; 12643 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12644 12645 return (cksm); 12646 } 12647 12648 /* 12649 * See if the upper-level protocol indicated by 'proto' will be able 12650 * to do something with an ICMP_FRAGMENTATION_NEEDED (IPv4) or 12651 * ICMP6_PACKET_TOO_BIG (IPv6). 12652 */ 12653 static boolean_t 12654 ip_ulp_cando_pkt2big(int proto) 12655 { 12656 /* 12657 * For now, only TCP can handle this. 12658 * Tunnels may be able to also, but since tun isn't working over 12659 * IPv6 yet, don't worry about it for now. 12660 */ 12661 return (proto == IPPROTO_TCP); 12662 } 12663 12664 12665 /* 12666 * Propagate a multicast group membership operation (join/leave) (*fn) on 12667 * all interfaces crossed by the related multirt routes. 12668 * The call is considered successful if the operation succeeds 12669 * on at least one interface. 12670 * The function is called if the destination address in the packet to send 12671 * is multirouted. 12672 */ 12673 int 12674 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12675 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12676 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12677 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12678 { 12679 ire_t *ire_gw; 12680 irb_t *irb; 12681 int index, error = 0; 12682 opt_restart_t *or; 12683 12684 irb = ire->ire_bucket; 12685 ASSERT(irb != NULL); 12686 12687 ASSERT(DB_TYPE(first_mp) == M_CTL); 12688 or = (opt_restart_t *)first_mp->b_rptr; 12689 12690 IRB_REFHOLD(irb); 12691 for (; ire != NULL; ire = ire->ire_next) { 12692 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12693 continue; 12694 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12695 continue; 12696 12697 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12698 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12699 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 12700 /* No resolver exists for the gateway; skip this ire. */ 12701 if (ire_gw == NULL) 12702 continue; 12703 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12704 /* 12705 * A resolver exists: we can get the interface on which we have 12706 * to apply the operation. 12707 */ 12708 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12709 first_mp); 12710 if (error == 0) 12711 or->or_private = CGTP_MCAST_SUCCESS; 12712 12713 if (ip_debug > 0) { 12714 ulong_t off; 12715 char *ksym; 12716 12717 ksym = kobj_getsymname((uintptr_t)fn, &off); 12718 ip2dbg(("ip_multirt_apply_membership_v6: " 12719 "called %s, multirt group 0x%08x via itf 0x%08x, " 12720 "error %d [success %u]\n", 12721 ksym ? ksym : "?", 12722 ntohl(V4_PART_OF_V6((*v6grp))), 12723 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12724 error, or->or_private)); 12725 } 12726 12727 ire_refrele(ire_gw); 12728 if (error == EINPROGRESS) { 12729 IRB_REFRELE(irb); 12730 return (error); 12731 } 12732 } 12733 IRB_REFRELE(irb); 12734 /* 12735 * Consider the call as successful if we succeeded on at least 12736 * one interface. Otherwise, return the last encountered error. 12737 */ 12738 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12739 } 12740 12741 void 12742 ip6_kstat_init(void) 12743 { 12744 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 12745 "net", KSTAT_TYPE_NAMED, 12746 sizeof (ip6_statistics) / sizeof (kstat_named_t), 12747 KSTAT_FLAG_VIRTUAL)) != NULL) { 12748 ip6_kstat->ks_data = &ip6_statistics; 12749 kstat_install(ip6_kstat); 12750 } 12751 } 12752 12753 /* 12754 * The following two functions set and get the value for the 12755 * IPV6_SRC_PREFERENCES socket option. 12756 */ 12757 int 12758 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12759 { 12760 /* 12761 * We only support preferences that are covered by 12762 * IPV6_PREFER_SRC_MASK. 12763 */ 12764 if (prefs & ~IPV6_PREFER_SRC_MASK) 12765 return (EINVAL); 12766 12767 /* 12768 * Look for conflicting preferences or default preferences. If 12769 * both bits of a related pair are clear, the application wants the 12770 * system's default value for that pair. Both bits in a pair can't 12771 * be set. 12772 */ 12773 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12774 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12775 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12776 IPV6_PREFER_SRC_MIPMASK) { 12777 return (EINVAL); 12778 } 12779 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12780 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12781 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12782 IPV6_PREFER_SRC_TMPMASK) { 12783 return (EINVAL); 12784 } 12785 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12786 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12787 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12788 IPV6_PREFER_SRC_CGAMASK) { 12789 return (EINVAL); 12790 } 12791 12792 connp->conn_src_preferences = prefs; 12793 return (0); 12794 } 12795 12796 size_t 12797 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12798 { 12799 *val = connp->conn_src_preferences; 12800 return (sizeof (connp->conn_src_preferences)); 12801 } 12802 12803 int 12804 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 12805 { 12806 ill_t *ill; 12807 ire_t *ire; 12808 int error; 12809 12810 /* 12811 * Verify the source address and ifindex. Privileged users can use 12812 * any source address. For ancillary data the source address is 12813 * checked in ip_wput_v6. 12814 */ 12815 if (pkti->ipi6_ifindex != 0) { 12816 ASSERT(connp != NULL); 12817 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 12818 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 12819 if (ill == NULL) { 12820 /* 12821 * We just want to know if the interface exists, we 12822 * don't really care about the ill pointer itself. 12823 */ 12824 if (error != EINPROGRESS) 12825 return (error); 12826 error = 0; /* Ensure we don't use it below */ 12827 } else { 12828 ill_refrele(ill); 12829 } 12830 } 12831 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12832 secpolicy_net_rawaccess(cr) != 0) { 12833 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12834 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12835 connp->conn_zoneid, NULL, MATCH_IRE_TYPE); 12836 if (ire != NULL) 12837 ire_refrele(ire); 12838 else 12839 return (ENXIO); 12840 } 12841 return (0); 12842 } 12843 12844 /* 12845 * Get the size of the IP options (including the IP headers size) 12846 * without including the AH header's size. If till_ah is B_FALSE, 12847 * and if AH header is present, dest options beyond AH header will 12848 * also be included in the returned size. 12849 */ 12850 int 12851 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12852 { 12853 ip6_t *ip6h; 12854 uint8_t nexthdr; 12855 uint8_t *whereptr; 12856 ip6_hbh_t *hbhhdr; 12857 ip6_dest_t *dsthdr; 12858 ip6_rthdr_t *rthdr; 12859 int ehdrlen; 12860 int size; 12861 ah_t *ah; 12862 12863 ip6h = (ip6_t *)mp->b_rptr; 12864 size = IPV6_HDR_LEN; 12865 nexthdr = ip6h->ip6_nxt; 12866 whereptr = (uint8_t *)&ip6h[1]; 12867 for (;;) { 12868 /* Assume IP has already stripped it */ 12869 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12870 switch (nexthdr) { 12871 case IPPROTO_HOPOPTS: 12872 hbhhdr = (ip6_hbh_t *)whereptr; 12873 nexthdr = hbhhdr->ip6h_nxt; 12874 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12875 break; 12876 case IPPROTO_DSTOPTS: 12877 dsthdr = (ip6_dest_t *)whereptr; 12878 nexthdr = dsthdr->ip6d_nxt; 12879 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12880 break; 12881 case IPPROTO_ROUTING: 12882 rthdr = (ip6_rthdr_t *)whereptr; 12883 nexthdr = rthdr->ip6r_nxt; 12884 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12885 break; 12886 default : 12887 if (till_ah) { 12888 ASSERT(nexthdr == IPPROTO_AH); 12889 return (size); 12890 } 12891 /* 12892 * If we don't have a AH header to traverse, 12893 * return now. This happens normally for 12894 * outbound datagrams where we have not inserted 12895 * the AH header. 12896 */ 12897 if (nexthdr != IPPROTO_AH) { 12898 return (size); 12899 } 12900 12901 /* 12902 * We don't include the AH header's size 12903 * to be symmetrical with other cases where 12904 * we either don't have a AH header (outbound) 12905 * or peek into the AH header yet (inbound and 12906 * not pulled up yet). 12907 */ 12908 ah = (ah_t *)whereptr; 12909 nexthdr = ah->ah_nexthdr; 12910 ehdrlen = (ah->ah_length << 2) + 8; 12911 12912 if (nexthdr == IPPROTO_DSTOPTS) { 12913 if (whereptr + ehdrlen >= mp->b_wptr) { 12914 /* 12915 * The destination options header 12916 * is not part of the first mblk. 12917 */ 12918 whereptr = mp->b_cont->b_rptr; 12919 } else { 12920 whereptr += ehdrlen; 12921 } 12922 12923 dsthdr = (ip6_dest_t *)whereptr; 12924 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12925 size += ehdrlen; 12926 } 12927 return (size); 12928 } 12929 whereptr += ehdrlen; 12930 size += ehdrlen; 12931 } 12932 } 12933