1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/kobj.h> 46 #include <sys/zone.h> 47 48 #include <sys/kmem.h> 49 #include <sys/systm.h> 50 #include <sys/param.h> 51 #include <sys/socket.h> 52 #include <sys/vtrace.h> 53 #include <sys/isa_defs.h> 54 #include <sys/atomic.h> 55 #include <sys/iphada.h> 56 #include <sys/policy.h> 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 #include <net/if_dl.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <netinet/ip6.h> 64 #include <netinet/icmp6.h> 65 #include <netinet/sctp.h> 66 67 #include <inet/common.h> 68 #include <inet/mi.h> 69 #include <inet/mib2.h> 70 #include <inet/nd.h> 71 #include <inet/arp.h> 72 73 #include <inet/ip.h> 74 #include <inet/ip_impl.h> 75 #include <inet/ip6.h> 76 #include <inet/ip6_asp.h> 77 #include <inet/tcp.h> 78 #include <inet/tcp_impl.h> 79 #include <inet/udp_impl.h> 80 #include <inet/ipp_common.h> 81 82 #include <inet/ip_multi.h> 83 #include <inet/ip_if.h> 84 #include <inet/ip_ire.h> 85 #include <inet/ip_rts.h> 86 #include <inet/optcom.h> 87 #include <inet/ip_ndp.h> 88 #include <net/pfkeyv2.h> 89 #include <inet/ipsec_info.h> 90 #include <inet/sadb.h> 91 #include <inet/ipsec_impl.h> 92 #include <inet/tun.h> 93 #include <inet/sctp_ip.h> 94 #include <sys/pattr.h> 95 #include <inet/ipclassifier.h> 96 #include <inet/ipsecah.h> 97 #include <inet/udp_impl.h> 98 #include <sys/squeue.h> 99 100 #include <sys/tsol/label.h> 101 #include <sys/tsol/tnet.h> 102 103 #include <rpc/pmap_prot.h> 104 105 extern squeue_func_t ip_input_proc; 106 107 /* 108 * IP statistics. 109 */ 110 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 111 #define IP6_STAT_UPDATE(x, n) (ip6_statistics.x.value.ui64 += (n)) 112 113 typedef struct ip6_stat { 114 kstat_named_t ip6_udp_fast_path; 115 kstat_named_t ip6_udp_slow_path; 116 kstat_named_t ip6_udp_fannorm; 117 kstat_named_t ip6_udp_fanmb; 118 kstat_named_t ip6_out_sw_cksum; 119 kstat_named_t ip6_in_sw_cksum; 120 kstat_named_t ip6_tcp_in_full_hw_cksum_err; 121 kstat_named_t ip6_tcp_in_part_hw_cksum_err; 122 kstat_named_t ip6_tcp_in_sw_cksum_err; 123 kstat_named_t ip6_tcp_out_sw_cksum_bytes; 124 kstat_named_t ip6_udp_in_full_hw_cksum_err; 125 kstat_named_t ip6_udp_in_part_hw_cksum_err; 126 kstat_named_t ip6_udp_in_sw_cksum_err; 127 kstat_named_t ip6_udp_out_sw_cksum_bytes; 128 kstat_named_t ip6_frag_mdt_pkt_out; 129 kstat_named_t ip6_frag_mdt_discarded; 130 kstat_named_t ip6_frag_mdt_allocfail; 131 kstat_named_t ip6_frag_mdt_addpdescfail; 132 kstat_named_t ip6_frag_mdt_allocd; 133 } ip6_stat_t; 134 135 static ip6_stat_t ip6_statistics = { 136 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 137 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 138 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 139 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 140 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 141 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 142 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 143 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 144 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 145 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 146 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 147 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 148 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 149 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 150 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 151 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 152 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 153 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 154 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 155 }; 156 157 static kstat_t *ip6_kstat; 158 159 /* 160 * Naming conventions: 161 * These rules should be judiciously applied 162 * if there is a need to identify something as IPv6 versus IPv4 163 * IPv6 funcions will end with _v6 in the ip module. 164 * IPv6 funcions will end with _ipv6 in the transport modules. 165 * IPv6 macros: 166 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 167 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 168 * And then there are ..V4_PART_OF_V6. 169 * The intent is that macros in the ip module end with _V6. 170 * IPv6 global variables will start with ipv6_ 171 * IPv6 structures will start with ipv6 172 * IPv6 defined constants should start with IPV6_ 173 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 174 */ 175 176 /* 177 * IPv6 mibs when the interface (ill) is not known. 178 * When the ill is known the per-interface mib in the ill is used. 179 */ 180 mib2_ipv6IfStatsEntry_t ip6_mib; 181 mib2_ipv6IfIcmpEntry_t icmp6_mib; 182 183 /* 184 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 185 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 186 * from IANA. This mechanism will remain in effect until an official 187 * number is obtained. 188 */ 189 uchar_t ip6opt_ls; 190 191 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 192 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 193 194 const in6_addr_t ipv6_all_ones = 195 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 196 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 197 198 #ifdef _BIG_ENDIAN 199 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 200 #else /* _BIG_ENDIAN */ 201 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 202 #endif /* _BIG_ENDIAN */ 203 204 #ifdef _BIG_ENDIAN 205 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 206 #else /* _BIG_ENDIAN */ 207 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 208 #endif /* _BIG_ENDIAN */ 209 210 #ifdef _BIG_ENDIAN 211 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 212 #else /* _BIG_ENDIAN */ 213 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 214 #endif /* _BIG_ENDIAN */ 215 216 #ifdef _BIG_ENDIAN 217 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 218 #else /* _BIG_ENDIAN */ 219 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 220 #endif /* _BIG_ENDIAN */ 221 222 #ifdef _BIG_ENDIAN 223 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 224 #else /* _BIG_ENDIAN */ 225 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 226 #endif /* _BIG_ENDIAN */ 227 228 #ifdef _BIG_ENDIAN 229 const in6_addr_t ipv6_solicited_node_mcast = 230 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 231 #else /* _BIG_ENDIAN */ 232 const in6_addr_t ipv6_solicited_node_mcast = 233 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 234 #endif /* _BIG_ENDIAN */ 235 236 /* 237 * Used by icmp_send_redirect_v6 for picking random src. 238 */ 239 uint_t icmp_redirect_v6_src_index; 240 241 /* Leave room for ip_newroute to tack on the src and target addresses */ 242 #define OK_RESOLVER_MP_V6(mp) \ 243 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 244 245 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 246 boolean_t, zoneid_t); 247 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 248 const in6_addr_t *, boolean_t); 249 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 250 static boolean_t icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp); 251 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 252 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 253 boolean_t, boolean_t, boolean_t, boolean_t); 254 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 255 iulp_t *); 256 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 257 uint16_t, boolean_t, boolean_t, boolean_t); 258 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 259 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 260 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 261 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 262 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 263 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 264 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 265 uint8_t *, uint_t, uint8_t); 266 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 267 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 268 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 269 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 270 conn_t *, int, int, int); 271 static boolean_t ip_ulp_cando_pkt2big(int); 272 273 static void ip_rput_v6(queue_t *, mblk_t *); 274 static void ip_wput_v6(queue_t *, mblk_t *); 275 276 /* 277 * A template for an IPv6 AR_ENTRY_QUERY 278 */ 279 static areq_t ipv6_areq_template = { 280 AR_ENTRY_QUERY, /* cmd */ 281 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 282 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 283 IP6_DL_SAP, /* protocol, from arps perspective */ 284 sizeof (areq_t), /* target addr offset */ 285 IPV6_ADDR_LEN, /* target addr_length */ 286 0, /* flags */ 287 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 288 IPV6_ADDR_LEN, /* sender addr length */ 289 6, /* xmit_count */ 290 1000, /* (re)xmit_interval in milliseconds */ 291 4 /* max # of requests to buffer */ 292 /* anything else filled in by the code */ 293 }; 294 295 struct qinit rinit_ipv6 = { 296 (pfi_t)ip_rput_v6, 297 NULL, 298 ip_open, 299 ip_close, 300 NULL, 301 &ip_mod_info 302 }; 303 304 struct qinit winit_ipv6 = { 305 (pfi_t)ip_wput_v6, 306 (pfi_t)ip_wsrv, 307 ip_open, 308 ip_close, 309 NULL, 310 &ip_mod_info 311 }; 312 313 /* 314 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 315 * The message has already been checksummed and if needed, 316 * a copy has been made to be sent any interested ICMP client (conn) 317 * Note that this is different than icmp_inbound() which does the fanout 318 * to conn's as well as local processing of the ICMP packets. 319 * 320 * All error messages are passed to the matching transport stream. 321 * 322 * Zones notes: 323 * The packet is only processed in the context of the specified zone: typically 324 * only this zone will reply to an echo request. This means that the caller must 325 * call icmp_inbound_v6() for each relevant zone. 326 */ 327 static void 328 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 329 boolean_t mctl_present, uint_t flags, zoneid_t zoneid) 330 { 331 icmp6_t *icmp6; 332 ip6_t *ip6h; 333 boolean_t interested; 334 ip6i_t *ip6i; 335 in6_addr_t origsrc; 336 ire_t *ire; 337 mblk_t *first_mp; 338 ipsec_in_t *ii; 339 340 ASSERT(ill != NULL); 341 first_mp = mp; 342 if (mctl_present) { 343 mp = first_mp->b_cont; 344 ASSERT(mp != NULL); 345 346 ii = (ipsec_in_t *)first_mp->b_rptr; 347 ASSERT(ii->ipsec_in_type == IPSEC_IN); 348 } 349 350 ip6h = (ip6_t *)mp->b_rptr; 351 352 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 353 354 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 355 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 356 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 357 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 358 freemsg(first_mp); 359 return; 360 } 361 ip6h = (ip6_t *)mp->b_rptr; 362 } 363 if (icmp_accept_clear_messages == 0) { 364 first_mp = ipsec_check_global_policy(first_mp, NULL, 365 NULL, ip6h, mctl_present); 366 if (first_mp == NULL) 367 return; 368 } 369 370 /* 371 * On a labeled system, we have to check whether the zone itself is 372 * permitted to receive raw traffic. 373 */ 374 if (is_system_labeled()) { 375 if (zoneid == ALL_ZONES) 376 zoneid = tsol_packet_to_zoneid(mp); 377 if (!tsol_can_accept_raw(mp, B_FALSE)) { 378 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 379 zoneid)); 380 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 381 freemsg(first_mp); 382 return; 383 } 384 } 385 386 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 387 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 388 icmp6->icmp6_code)); 389 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 390 391 /* Initiate IPPF processing here */ 392 if (IP6_IN_IPP(flags)) { 393 394 /* 395 * If the ifindex changes due to SIOCSLIFINDEX 396 * packet may return to IP on the wrong ill. 397 */ 398 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 399 if (mp == NULL) { 400 if (mctl_present) { 401 freeb(first_mp); 402 } 403 return; 404 } 405 } 406 407 switch (icmp6->icmp6_type) { 408 case ICMP6_DST_UNREACH: 409 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 410 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 411 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 412 break; 413 414 case ICMP6_TIME_EXCEEDED: 415 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 416 break; 417 418 case ICMP6_PARAM_PROB: 419 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 420 break; 421 422 case ICMP6_PACKET_TOO_BIG: 423 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 424 zoneid); 425 return; 426 case ICMP6_ECHO_REQUEST: 427 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 428 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 429 !ipv6_resp_echo_mcast) 430 break; 431 432 /* 433 * We must have exclusive use of the mblk to convert it to 434 * a response. 435 * If not, we copy it. 436 */ 437 if (mp->b_datap->db_ref > 1) { 438 mblk_t *mp1; 439 440 mp1 = copymsg(mp); 441 freemsg(mp); 442 if (mp1 == NULL) { 443 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 444 if (mctl_present) 445 freeb(first_mp); 446 return; 447 } 448 mp = mp1; 449 ip6h = (ip6_t *)mp->b_rptr; 450 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 451 if (mctl_present) 452 first_mp->b_cont = mp; 453 else 454 first_mp = mp; 455 } 456 457 /* 458 * Turn the echo into an echo reply. 459 * Remove any extension headers (do not reverse a source route) 460 * and clear the flow id (keep traffic class for now). 461 */ 462 if (hdr_length != IPV6_HDR_LEN) { 463 int i; 464 465 for (i = 0; i < IPV6_HDR_LEN; i++) 466 mp->b_rptr[hdr_length - i - 1] = 467 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 468 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 469 ip6h = (ip6_t *)mp->b_rptr; 470 ip6h->ip6_nxt = IPPROTO_ICMPV6; 471 hdr_length = IPV6_HDR_LEN; 472 } 473 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 474 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 475 476 ip6h->ip6_plen = 477 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 478 origsrc = ip6h->ip6_src; 479 /* 480 * Reverse the source and destination addresses. 481 * If the return address is a multicast, zero out the source 482 * (ip_wput_v6 will set an address). 483 */ 484 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 485 ip6h->ip6_src = ipv6_all_zeros; 486 ip6h->ip6_dst = origsrc; 487 } else { 488 ip6h->ip6_src = ip6h->ip6_dst; 489 ip6h->ip6_dst = origsrc; 490 } 491 492 /* set the hop limit */ 493 ip6h->ip6_hops = ipv6_def_hops; 494 495 /* 496 * Prepare for checksum by putting icmp length in the icmp 497 * checksum field. The checksum is calculated in ip_wput_v6. 498 */ 499 icmp6->icmp6_cksum = ip6h->ip6_plen; 500 /* 501 * ICMP echo replies should go out on the same interface 502 * the request came on as probes used by in.mpathd for 503 * detecting NIC failures are ECHO packets. We turn-off load 504 * spreading by allocating a ip6i and setting ip6i_attach_if 505 * to B_TRUE which is handled both by ip_wput_v6 and 506 * ip_newroute_v6. If we don't turnoff load spreading, 507 * the packets might get dropped if there are no 508 * non-FAILED/INACTIVE interfaces for it to go out on and 509 * in.mpathd would wrongly detect a failure or mis-detect 510 * a NIC failure as a link failure. As load spreading can 511 * happen only if ill_group is not NULL, we do only for 512 * that case and this does not affect the normal case. 513 * 514 * We force this only on echo packets that came from on-link 515 * hosts. We restrict this to link-local addresses which 516 * is used by in.mpathd for probing. In the IPv6 case, 517 * default routes typically have an ire_ipif pointer and 518 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 519 * might work. As a default route out of this interface 520 * may not be present, enforcing this packet to go out in 521 * this case may not work. 522 */ 523 if (ill->ill_group != NULL && 524 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 525 /* 526 * If we are sending replies to ourselves, don't 527 * set ATTACH_IF as we may not be able to find 528 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 529 * causes ip_wput_v6 to look for an IRE_LOCAL on 530 * "ill" which it may not find and will try to 531 * create an IRE_CACHE for our local address. Once 532 * we do this, we will try to forward all packets 533 * meant to our LOCAL address. 534 */ 535 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 536 NULL); 537 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 538 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 539 if (mp == NULL) { 540 BUMP_MIB(ill->ill_icmp6_mib, 541 ipv6IfIcmpInErrors); 542 if (ire != NULL) 543 ire_refrele(ire); 544 if (mctl_present) 545 freeb(first_mp); 546 return; 547 } else if (mctl_present) { 548 first_mp->b_cont = mp; 549 } else { 550 first_mp = mp; 551 } 552 ip6i = (ip6i_t *)mp->b_rptr; 553 ip6i->ip6i_flags = IP6I_ATTACH_IF; 554 ip6i->ip6i_ifindex = 555 ill->ill_phyint->phyint_ifindex; 556 } 557 if (ire != NULL) 558 ire_refrele(ire); 559 } 560 561 if (!mctl_present) { 562 /* 563 * This packet should go out the same way as it 564 * came in i.e in clear. To make sure that global 565 * policy will not be applied to this in ip_wput, 566 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 567 */ 568 ASSERT(first_mp == mp); 569 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 570 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 571 freemsg(mp); 572 return; 573 } 574 ii = (ipsec_in_t *)first_mp->b_rptr; 575 576 /* This is not a secure packet */ 577 ii->ipsec_in_secure = B_FALSE; 578 first_mp->b_cont = mp; 579 } 580 ii->ipsec_in_zoneid = zoneid; 581 ASSERT(zoneid != ALL_ZONES); 582 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 583 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 584 return; 585 } 586 put(WR(q), first_mp); 587 return; 588 589 case ICMP6_ECHO_REPLY: 590 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 591 break; 592 593 case ND_ROUTER_SOLICIT: 594 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 595 break; 596 597 case ND_ROUTER_ADVERT: 598 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 599 break; 600 601 case ND_NEIGHBOR_SOLICIT: 602 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 603 if (mctl_present) 604 freeb(first_mp); 605 /* XXX may wish to pass first_mp up to ndp_input someday. */ 606 ndp_input(ill, mp); 607 return; 608 609 case ND_NEIGHBOR_ADVERT: 610 BUMP_MIB(ill->ill_icmp6_mib, 611 ipv6IfIcmpInNeighborAdvertisements); 612 if (mctl_present) 613 freeb(first_mp); 614 /* XXX may wish to pass first_mp up to ndp_input someday. */ 615 ndp_input(ill, mp); 616 return; 617 618 case ND_REDIRECT: { 619 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 620 621 if (ipv6_ignore_redirect) 622 break; 623 624 /* 625 * As there is no upper client to deliver, we don't 626 * need the first_mp any more. 627 */ 628 if (mctl_present) 629 freeb(first_mp); 630 if (!pullupmsg(mp, -1) || 631 !icmp_redirect_ok_v6(ill, mp)) { 632 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 633 break; 634 } 635 icmp_redirect_v6(q, mp, ill); 636 return; 637 } 638 639 /* 640 * The next three icmp messages will be handled by MLD. 641 * Pass all valid MLD packets up to any process(es) 642 * listening on a raw ICMP socket. MLD messages are 643 * freed by mld_input function. 644 */ 645 case MLD_LISTENER_QUERY: 646 case MLD_LISTENER_REPORT: 647 case MLD_LISTENER_REDUCTION: 648 if (mctl_present) 649 freeb(first_mp); 650 mld_input(q, mp, ill); 651 return; 652 default: 653 break; 654 } 655 if (interested) { 656 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 657 mctl_present, zoneid); 658 } else { 659 freemsg(first_mp); 660 } 661 } 662 663 /* 664 * Process received IPv6 ICMP Packet too big. 665 * After updating any IRE it does the fanout to any matching transport streams. 666 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 667 */ 668 /* ARGSUSED */ 669 static void 670 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 671 boolean_t mctl_present, zoneid_t zoneid) 672 { 673 ip6_t *ip6h; 674 ip6_t *inner_ip6h; 675 icmp6_t *icmp6; 676 uint16_t hdr_length; 677 uint32_t mtu; 678 ire_t *ire, *first_ire; 679 mblk_t *first_mp; 680 681 first_mp = mp; 682 if (mctl_present) 683 mp = first_mp->b_cont; 684 /* 685 * We must have exclusive use of the mblk to update the MTU 686 * in the packet. 687 * If not, we copy it. 688 * 689 * If there's an M_CTL present, we know that allocated first_mp 690 * earlier in this function, so we know first_mp has refcnt of one. 691 */ 692 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 693 if (mp->b_datap->db_ref > 1) { 694 mblk_t *mp1; 695 696 mp1 = copymsg(mp); 697 freemsg(mp); 698 if (mp1 == NULL) { 699 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 700 if (mctl_present) 701 freeb(first_mp); 702 return; 703 } 704 mp = mp1; 705 if (mctl_present) 706 first_mp->b_cont = mp; 707 else 708 first_mp = mp; 709 } 710 ip6h = (ip6_t *)mp->b_rptr; 711 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 712 hdr_length = ip_hdr_length_v6(mp, ip6h); 713 else 714 hdr_length = IPV6_HDR_LEN; 715 716 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 717 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 718 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 719 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 720 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 721 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 722 freemsg(first_mp); 723 return; 724 } 725 ip6h = (ip6_t *)mp->b_rptr; 726 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 727 inner_ip6h = (ip6_t *)&icmp6[1]; 728 } 729 730 /* 731 * For link local destinations matching simply on IRE type is not 732 * sufficient. Same link local addresses for different ILL's is 733 * possible. 734 */ 735 736 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 737 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 738 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 739 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 740 741 if (first_ire == NULL) { 742 if (ip_debug > 2) { 743 /* ip1dbg */ 744 pr_addr_dbg("icmp_inbound_too_big_v6:" 745 "no ire for dst %s\n", AF_INET6, 746 &inner_ip6h->ip6_dst); 747 } 748 freemsg(first_mp); 749 return; 750 } 751 752 mtu = ntohl(icmp6->icmp6_mtu); 753 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 754 for (ire = first_ire; ire != NULL && 755 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 756 ire = ire->ire_next) { 757 mutex_enter(&ire->ire_lock); 758 if (mtu < IPV6_MIN_MTU) { 759 ip1dbg(("Received mtu less than IPv6 " 760 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 761 mtu = IPV6_MIN_MTU; 762 /* 763 * If an mtu less than IPv6 min mtu is received, 764 * we must include a fragment header in 765 * subsequent packets. 766 */ 767 ire->ire_frag_flag |= IPH_FRAG_HDR; 768 } 769 ip1dbg(("Received mtu from router: %d\n", mtu)); 770 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 771 /* Record the new max frag size for the ULP. */ 772 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 773 /* 774 * If we need a fragment header in every packet 775 * (above case or multirouting), make sure the 776 * ULP takes it into account when computing the 777 * payload size. 778 */ 779 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 780 sizeof (ip6_frag_t)); 781 } else { 782 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 783 } 784 mutex_exit(&ire->ire_lock); 785 } 786 rw_exit(&first_ire->ire_bucket->irb_lock); 787 ire_refrele(first_ire); 788 } else { 789 irb_t *irb = NULL; 790 /* 791 * for non-link local destinations we match only on the IRE type 792 */ 793 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 794 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE); 795 if (ire == NULL) { 796 if (ip_debug > 2) { 797 /* ip1dbg */ 798 pr_addr_dbg("icmp_inbound_too_big_v6:" 799 "no ire for dst %s\n", 800 AF_INET6, &inner_ip6h->ip6_dst); 801 } 802 freemsg(first_mp); 803 return; 804 } 805 irb = ire->ire_bucket; 806 ire_refrele(ire); 807 rw_enter(&irb->irb_lock, RW_READER); 808 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 809 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 810 &inner_ip6h->ip6_dst)) { 811 mtu = ntohl(icmp6->icmp6_mtu); 812 mutex_enter(&ire->ire_lock); 813 if (mtu < IPV6_MIN_MTU) { 814 ip1dbg(("Received mtu less than IPv6" 815 "min mtu %d: %d\n", 816 IPV6_MIN_MTU, mtu)); 817 mtu = IPV6_MIN_MTU; 818 /* 819 * If an mtu less than IPv6 min mtu is 820 * received, we must include a fragment 821 * header in subsequent packets. 822 */ 823 ire->ire_frag_flag |= IPH_FRAG_HDR; 824 } 825 826 ip1dbg(("Received mtu from router: %d\n", mtu)); 827 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 828 /* Record the new max frag size for the ULP. */ 829 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 830 /* 831 * If we need a fragment header in 832 * every packet (above case or 833 * multirouting), make sure the ULP 834 * takes it into account when computing 835 * the payload size. 836 */ 837 icmp6->icmp6_mtu = 838 htonl(ire->ire_max_frag - 839 sizeof (ip6_frag_t)); 840 } else { 841 icmp6->icmp6_mtu = 842 htonl(ire->ire_max_frag); 843 } 844 mutex_exit(&ire->ire_lock); 845 } 846 } 847 rw_exit(&irb->irb_lock); 848 } 849 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 850 mctl_present, zoneid); 851 } 852 853 static void 854 pkt_too_big(conn_t *connp, void *arg) 855 { 856 mblk_t *mp; 857 858 if (!connp->conn_ipv6_recvpathmtu) 859 return; 860 861 /* create message and drop it on this connections read queue */ 862 if ((mp = dupb((mblk_t *)arg)) == NULL) { 863 return; 864 } 865 mp->b_datap->db_type = M_CTL; 866 867 putnext(connp->conn_rq, mp); 868 } 869 870 /* 871 * Fanout received ICMPv6 error packets to the transports. 872 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 873 */ 874 void 875 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 876 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 877 { 878 uint16_t *up; /* Pointer to ports in ULP header */ 879 uint32_t ports; /* reversed ports for fanout */ 880 ip6_t rip6h; /* With reversed addresses */ 881 uint16_t hdr_length; 882 uint8_t *nexthdrp; 883 uint8_t nexthdr; 884 mblk_t *first_mp; 885 ipsec_in_t *ii; 886 tcpha_t *tcpha; 887 conn_t *connp; 888 889 first_mp = mp; 890 if (mctl_present) { 891 mp = first_mp->b_cont; 892 ASSERT(mp != NULL); 893 894 ii = (ipsec_in_t *)first_mp->b_rptr; 895 ASSERT(ii->ipsec_in_type == IPSEC_IN); 896 } else { 897 ii = NULL; 898 } 899 900 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 901 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 902 903 /* 904 * Need to pullup everything in order to use 905 * ip_hdr_length_nexthdr_v6() 906 */ 907 if (mp->b_cont != NULL) { 908 if (!pullupmsg(mp, -1)) { 909 ip1dbg(("icmp_inbound_error_fanout_v6: " 910 "pullupmsg failed\n")); 911 goto drop_pkt; 912 } 913 ip6h = (ip6_t *)mp->b_rptr; 914 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 915 } 916 917 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 918 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 919 goto drop_pkt; 920 921 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 922 goto drop_pkt; 923 nexthdr = *nexthdrp; 924 925 /* Set message type, must be done after pullups */ 926 mp->b_datap->db_type = M_CTL; 927 928 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 929 /* 930 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 931 * sockets. 932 * 933 * Note I don't like walking every connection to deliver 934 * this information to a set of listeners. A separate 935 * list could be kept to keep the cost of this down. 936 */ 937 ipcl_walk(pkt_too_big, (void *)mp); 938 } 939 940 /* Try to pass the ICMP message to clients who need it */ 941 switch (nexthdr) { 942 case IPPROTO_UDP: { 943 /* 944 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 945 * UDP header to get the port information. 946 */ 947 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 948 mp->b_wptr) { 949 break; 950 } 951 /* 952 * Attempt to find a client stream based on port. 953 * Note that we do a reverse lookup since the header is 954 * in the form we sent it out. 955 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 956 * and we only set the src and dst addresses and nexthdr. 957 */ 958 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 959 rip6h.ip6_src = ip6h->ip6_dst; 960 rip6h.ip6_dst = ip6h->ip6_src; 961 rip6h.ip6_nxt = nexthdr; 962 ((uint16_t *)&ports)[0] = up[1]; 963 ((uint16_t *)&ports)[1] = up[0]; 964 965 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 966 IP6_NO_IPPOLICY, mctl_present, zoneid); 967 return; 968 } 969 case IPPROTO_TCP: { 970 /* 971 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 972 * the TCP header to get the port information. 973 */ 974 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 975 mp->b_wptr) { 976 break; 977 } 978 979 /* 980 * Attempt to find a client stream based on port. 981 * Note that we do a reverse lookup since the header is 982 * in the form we sent it out. 983 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 984 * we only set the src and dst addresses and nexthdr. 985 */ 986 987 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 988 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 989 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 990 if (connp == NULL) { 991 goto drop_pkt; 992 } 993 994 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 995 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 996 return; 997 998 } 999 case IPPROTO_SCTP: 1000 /* 1001 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 1002 * the SCTP header to get the port information. 1003 */ 1004 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 1005 mp->b_wptr) { 1006 break; 1007 } 1008 1009 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 1010 ((uint16_t *)&ports)[0] = up[1]; 1011 ((uint16_t *)&ports)[1] = up[0]; 1012 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 1013 IP6_NO_IPPOLICY, 0, zoneid); 1014 return; 1015 case IPPROTO_ESP: 1016 case IPPROTO_AH: { 1017 int ipsec_rc; 1018 1019 /* 1020 * We need a IPSEC_IN in the front to fanout to AH/ESP. 1021 * We will re-use the IPSEC_IN if it is already present as 1022 * AH/ESP will not affect any fields in the IPSEC_IN for 1023 * ICMP errors. If there is no IPSEC_IN, allocate a new 1024 * one and attach it in the front. 1025 */ 1026 if (ii != NULL) { 1027 /* 1028 * ip_fanout_proto_again converts the ICMP errors 1029 * that come back from AH/ESP to M_DATA so that 1030 * if it is non-AH/ESP and we do a pullupmsg in 1031 * this function, it would work. Convert it back 1032 * to M_CTL before we send up as this is a ICMP 1033 * error. This could have been generated locally or 1034 * by some router. Validate the inner IPSEC 1035 * headers. 1036 * 1037 * NOTE : ill_index is used by ip_fanout_proto_again 1038 * to locate the ill. 1039 */ 1040 ASSERT(ill != NULL); 1041 ii->ipsec_in_ill_index = 1042 ill->ill_phyint->phyint_ifindex; 1043 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1044 first_mp->b_cont->b_datap->db_type = M_CTL; 1045 } else { 1046 /* 1047 * IPSEC_IN is not present. We attach a ipsec_in 1048 * message and send up to IPSEC for validating 1049 * and removing the IPSEC headers. Clear 1050 * ipsec_in_secure so that when we return 1051 * from IPSEC, we don't mistakenly think that this 1052 * is a secure packet came from the network. 1053 * 1054 * NOTE : ill_index is used by ip_fanout_proto_again 1055 * to locate the ill. 1056 */ 1057 ASSERT(first_mp == mp); 1058 first_mp = ipsec_in_alloc(B_FALSE); 1059 if (first_mp == NULL) { 1060 freemsg(mp); 1061 BUMP_MIB(&ip_mib, ipInDiscards); 1062 return; 1063 } 1064 ii = (ipsec_in_t *)first_mp->b_rptr; 1065 1066 /* This is not a secure packet */ 1067 ii->ipsec_in_secure = B_FALSE; 1068 first_mp->b_cont = mp; 1069 mp->b_datap->db_type = M_CTL; 1070 ASSERT(ill != NULL); 1071 ii->ipsec_in_ill_index = 1072 ill->ill_phyint->phyint_ifindex; 1073 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1074 } 1075 1076 if (!ipsec_loaded()) { 1077 ip_proto_not_sup(q, first_mp, 0, zoneid); 1078 return; 1079 } 1080 1081 if (nexthdr == IPPROTO_ESP) 1082 ipsec_rc = ipsecesp_icmp_error(first_mp); 1083 else 1084 ipsec_rc = ipsecah_icmp_error(first_mp); 1085 if (ipsec_rc == IPSEC_STATUS_FAILED) 1086 return; 1087 1088 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1089 return; 1090 } 1091 case IPPROTO_ENCAP: 1092 case IPPROTO_IPV6: 1093 if ((uint8_t *)ip6h + hdr_length + 1094 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1095 sizeof (ip6_t)) > mp->b_wptr) 1096 goto drop_pkt; 1097 1098 if (nexthdr == IPPROTO_ENCAP || 1099 !IN6_ARE_ADDR_EQUAL( 1100 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1101 &ip6h->ip6_src) || 1102 !IN6_ARE_ADDR_EQUAL( 1103 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1104 &ip6h->ip6_dst)) { 1105 /* 1106 * For tunnels that have used IPsec protection, 1107 * we need to adjust the MTU to take into account 1108 * the IPsec overhead. 1109 */ 1110 if (ii != NULL) 1111 icmp6->icmp6_mtu = htons( 1112 ntohs(icmp6->icmp6_mtu) - 1113 ipsec_in_extra_length(first_mp)); 1114 } else { 1115 /* 1116 * Self-encapsulated case. As in the ipv4 case, 1117 * we need to strip the 2nd IP header. Since mp 1118 * is already pulled-up, we can simply bcopy 1119 * the 3rd header + data over the 2nd header. 1120 */ 1121 uint16_t unused_len; 1122 ip6_t *inner_ip6h = (ip6_t *) 1123 ((uchar_t *)ip6h + hdr_length); 1124 1125 /* 1126 * Make sure we don't do recursion more than once. 1127 */ 1128 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1129 &unused_len, &nexthdrp) || 1130 *nexthdrp == IPPROTO_IPV6) { 1131 goto drop_pkt; 1132 } 1133 1134 /* 1135 * We are about to modify the packet. Make a copy if 1136 * someone else has a reference to it. 1137 */ 1138 if (DB_REF(mp) > 1) { 1139 mblk_t *mp1; 1140 uint16_t icmp6_offset; 1141 1142 mp1 = copymsg(mp); 1143 if (mp1 == NULL) { 1144 goto drop_pkt; 1145 } 1146 icmp6_offset = (uint16_t) 1147 ((uchar_t *)icmp6 - mp->b_rptr); 1148 freemsg(mp); 1149 mp = mp1; 1150 1151 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1152 ip6h = (ip6_t *)&icmp6[1]; 1153 inner_ip6h = (ip6_t *) 1154 ((uchar_t *)ip6h + hdr_length); 1155 1156 if (mctl_present) 1157 first_mp->b_cont = mp; 1158 else 1159 first_mp = mp; 1160 } 1161 1162 /* 1163 * Need to set db_type back to M_DATA before 1164 * refeeding mp into this function. 1165 */ 1166 DB_TYPE(mp) = M_DATA; 1167 1168 /* 1169 * Copy the 3rd header + remaining data on top 1170 * of the 2nd header. 1171 */ 1172 bcopy(inner_ip6h, ip6h, 1173 mp->b_wptr - (uchar_t *)inner_ip6h); 1174 1175 /* 1176 * Subtract length of the 2nd header. 1177 */ 1178 mp->b_wptr -= hdr_length; 1179 1180 /* 1181 * Now recurse, and see what I _really_ should be 1182 * doing here. 1183 */ 1184 icmp_inbound_error_fanout_v6(q, first_mp, 1185 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1186 zoneid); 1187 return; 1188 } 1189 /* FALLTHRU */ 1190 default: 1191 /* 1192 * The rip6h header is only used for the lookup and we 1193 * only set the src and dst addresses and nexthdr. 1194 */ 1195 rip6h.ip6_src = ip6h->ip6_dst; 1196 rip6h.ip6_dst = ip6h->ip6_src; 1197 rip6h.ip6_nxt = nexthdr; 1198 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1199 IP6_NO_IPPOLICY, mctl_present, zoneid); 1200 return; 1201 } 1202 /* NOTREACHED */ 1203 drop_pkt: 1204 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1205 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1206 freemsg(first_mp); 1207 } 1208 1209 /* 1210 * Validate the incoming redirect message, if valid redirect 1211 * processing is done later. This is separated from the actual 1212 * redirect processing to avoid becoming single threaded when not 1213 * necessary. (i.e invalid packet) 1214 * Assumes that any AH or ESP headers have already been removed. 1215 * The mp has already been pulled up. 1216 */ 1217 boolean_t 1218 icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp) 1219 { 1220 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1221 nd_redirect_t *rd; 1222 ire_t *ire; 1223 uint16_t len; 1224 uint16_t hdr_length; 1225 1226 ASSERT(mp->b_cont == NULL); 1227 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1228 hdr_length = ip_hdr_length_v6(mp, ip6h); 1229 else 1230 hdr_length = IPV6_HDR_LEN; 1231 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1232 len = mp->b_wptr - mp->b_rptr - hdr_length; 1233 if (!IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1234 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1235 (rd->nd_rd_code != 0) || 1236 (len < sizeof (nd_redirect_t)) || 1237 (IN6_IS_ADDR_V4MAPPED(&rd->nd_rd_dst)) || 1238 (IN6_IS_ADDR_MULTICAST(&rd->nd_rd_dst))) { 1239 return (B_FALSE); 1240 } 1241 if (!(IN6_IS_ADDR_LINKLOCAL(&rd->nd_rd_target) || 1242 IN6_ARE_ADDR_EQUAL(&rd->nd_rd_target, &rd->nd_rd_dst))) { 1243 return (B_FALSE); 1244 } 1245 1246 /* 1247 * Verify that the IP source address of the redirect is 1248 * the same as the current first-hop router for the specified 1249 * ICMP destination address. Just to be cautious, this test 1250 * will be done again before we add the redirect, in case 1251 * router goes away between now and then. 1252 */ 1253 ire = ire_route_lookup_v6(&rd->nd_rd_dst, 0, 1254 &ip6h->ip6_src, 0, ill->ill_ipif, NULL, ALL_ZONES, NULL, 1255 MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1256 if (ire == NULL) 1257 return (B_FALSE); 1258 ire_refrele(ire); 1259 if (len > sizeof (nd_redirect_t)) { 1260 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1261 len - sizeof (nd_redirect_t))) 1262 return (B_FALSE); 1263 } 1264 return (B_TRUE); 1265 } 1266 1267 /* 1268 * Process received IPv6 ICMP Redirect messages. 1269 * Assumes that the icmp packet has already been verfied to be 1270 * valid, aligned and in a single mblk all done in icmp_redirect_ok_v6(). 1271 */ 1272 /* ARGSUSED */ 1273 static void 1274 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1275 { 1276 ip6_t *ip6h; 1277 uint16_t hdr_length; 1278 nd_redirect_t *rd; 1279 ire_t *ire; 1280 ire_t *prev_ire; 1281 ire_t *redir_ire; 1282 in6_addr_t *src, *dst, *gateway; 1283 nd_opt_hdr_t *opt; 1284 nce_t *nce; 1285 int nce_flags = 0; 1286 int err = 0; 1287 boolean_t redirect_to_router = B_FALSE; 1288 int len; 1289 iulp_t ulp_info = { 0 }; 1290 ill_t *prev_ire_ill; 1291 ipif_t *ipif; 1292 1293 ip6h = (ip6_t *)mp->b_rptr; 1294 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1295 hdr_length = ip_hdr_length_v6(mp, ip6h); 1296 else 1297 hdr_length = IPV6_HDR_LEN; 1298 1299 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1300 src = &ip6h->ip6_src; 1301 dst = &rd->nd_rd_dst; 1302 gateway = &rd->nd_rd_target; 1303 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1304 redirect_to_router = B_TRUE; 1305 nce_flags |= NCE_F_ISROUTER; 1306 } 1307 /* 1308 * Make sure we had a route for the dest in question and that 1309 * route was pointing to the old gateway (the source of the 1310 * redirect packet.) 1311 */ 1312 ipif = ipif_get_next_ipif(NULL, ill); 1313 if (ipif == NULL) { 1314 freemsg(mp); 1315 return; 1316 } 1317 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1318 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1319 ipif_refrele(ipif); 1320 /* 1321 * Check that 1322 * the redirect was not from ourselves 1323 * old gateway is still directly reachable 1324 */ 1325 if (prev_ire == NULL || 1326 prev_ire->ire_type == IRE_LOCAL) { 1327 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1328 goto fail_redirect; 1329 } 1330 prev_ire_ill = ire_to_ill(prev_ire); 1331 ASSERT(prev_ire_ill != NULL); 1332 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1333 nce_flags |= NCE_F_NONUD; 1334 1335 /* 1336 * Should we use the old ULP info to create the new gateway? From 1337 * a user's perspective, we should inherit the info so that it 1338 * is a "smooth" transition. If we do not do that, then new 1339 * connections going thru the new gateway will have no route metrics, 1340 * which is counter-intuitive to user. From a network point of 1341 * view, this may or may not make sense even though the new gateway 1342 * is still directly connected to us so the route metrics should not 1343 * change much. 1344 * 1345 * But if the old ire_uinfo is not initialized, we do another 1346 * recursive lookup on the dest using the new gateway. There may 1347 * be a route to that. If so, use it to initialize the redirect 1348 * route. 1349 */ 1350 if (prev_ire->ire_uinfo.iulp_set) { 1351 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1352 } else if (redirect_to_router) { 1353 /* 1354 * Only do the following if the redirection is really to 1355 * a router. 1356 */ 1357 ire_t *tmp_ire; 1358 ire_t *sire; 1359 1360 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1361 ALL_ZONES, 0, NULL, 1362 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1363 if (sire != NULL) { 1364 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1365 ASSERT(tmp_ire != NULL); 1366 ire_refrele(tmp_ire); 1367 ire_refrele(sire); 1368 } else if (tmp_ire != NULL) { 1369 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1370 sizeof (iulp_t)); 1371 ire_refrele(tmp_ire); 1372 } 1373 } 1374 1375 len = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1376 opt = (nd_opt_hdr_t *)&rd[1]; 1377 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1378 if (opt != NULL) { 1379 err = ndp_lookup_then_add(ill, 1380 (uchar_t *)&opt[1], /* Link layer address */ 1381 gateway, 1382 &ipv6_all_ones, /* prefix mask */ 1383 &ipv6_all_zeros, /* Mapping mask */ 1384 0, 1385 nce_flags, 1386 ND_STALE, 1387 &nce); 1388 switch (err) { 1389 case 0: 1390 NCE_REFRELE(nce); 1391 break; 1392 case EEXIST: 1393 /* 1394 * Check to see if link layer address has changed and 1395 * process the nce_state accordingly. 1396 */ 1397 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1398 NCE_REFRELE(nce); 1399 break; 1400 default: 1401 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1402 err)); 1403 goto fail_redirect; 1404 } 1405 } 1406 if (redirect_to_router) { 1407 /* icmp_redirect_ok_v6() must have already verified this */ 1408 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1409 1410 /* 1411 * Create a Route Association. This will allow us to remember 1412 * a router told us to use the particular gateway. 1413 */ 1414 ire = ire_create_v6( 1415 dst, 1416 &ipv6_all_ones, /* mask */ 1417 &prev_ire->ire_src_addr_v6, /* source addr */ 1418 gateway, /* gateway addr */ 1419 &prev_ire->ire_max_frag, /* max frag */ 1420 NULL, /* Fast Path header */ 1421 NULL, /* no rfq */ 1422 NULL, /* no stq */ 1423 IRE_HOST_REDIRECT, 1424 NULL, 1425 prev_ire->ire_ipif, 1426 NULL, 1427 0, 1428 0, 1429 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1430 &ulp_info, 1431 NULL, 1432 NULL); 1433 } else { 1434 /* 1435 * Just create an on link entry, may or may not be a router 1436 * If there is no link layer address option ire_add() won't 1437 * add this. 1438 */ 1439 ire = ire_create_v6( 1440 dst, /* gateway == dst */ 1441 &ipv6_all_ones, /* mask */ 1442 &prev_ire->ire_src_addr_v6, /* source addr */ 1443 &ipv6_all_zeros, /* gateway addr */ 1444 &prev_ire->ire_max_frag, /* max frag */ 1445 NULL, /* Fast Path header */ 1446 prev_ire->ire_rfq, /* ire rfq */ 1447 prev_ire->ire_stq, /* ire stq */ 1448 IRE_CACHE, 1449 NULL, 1450 prev_ire->ire_ipif, 1451 &ipv6_all_ones, 1452 0, 1453 0, 1454 0, 1455 &ulp_info, 1456 NULL, 1457 NULL); 1458 } 1459 if (ire == NULL) 1460 goto fail_redirect; 1461 1462 /* 1463 * XXX If there is no nce i.e there is no target link layer address 1464 * option with the redirect message, ire_add will fail. In that 1465 * case we never add the IRE_CACHE/IRE_HOST_REDIRECT. We need 1466 * to fix this. 1467 */ 1468 if (ire_add(&ire, NULL, NULL, NULL) == 0) { 1469 1470 /* tell routing sockets that we received a redirect */ 1471 ip_rts_change_v6(RTM_REDIRECT, 1472 &rd->nd_rd_dst, 1473 &rd->nd_rd_target, 1474 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1475 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1476 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1477 1478 /* 1479 * Delete any existing IRE_HOST_REDIRECT for this destination. 1480 * This together with the added IRE has the effect of 1481 * modifying an existing redirect. 1482 */ 1483 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST_REDIRECT, 1484 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1485 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1486 1487 ire_refrele(ire); /* Held in ire_add_v6 */ 1488 1489 if (redir_ire != NULL) { 1490 ire_delete(redir_ire); 1491 ire_refrele(redir_ire); 1492 } 1493 } 1494 1495 if (prev_ire->ire_type == IRE_CACHE) 1496 ire_delete(prev_ire); 1497 ire_refrele(prev_ire); 1498 prev_ire = NULL; 1499 1500 fail_redirect: 1501 if (prev_ire != NULL) 1502 ire_refrele(prev_ire); 1503 freemsg(mp); 1504 } 1505 1506 static ill_t * 1507 ip_queue_to_ill_v6(queue_t *q) 1508 { 1509 ill_t *ill; 1510 1511 ASSERT(WR(q) == q); 1512 1513 if (q->q_next != NULL) { 1514 ill = (ill_t *)q->q_ptr; 1515 if (ILL_CAN_LOOKUP(ill)) 1516 ill_refhold(ill); 1517 else 1518 ill = NULL; 1519 } else { 1520 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1521 NULL, NULL, NULL, NULL, NULL); 1522 } 1523 if (ill == NULL) 1524 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1525 return (ill); 1526 } 1527 1528 /* 1529 * Assigns an appropriate source address to the packet. 1530 * If origdst is one of our IP addresses that use it as the source. 1531 * If the queue is an ill queue then select a source from that ill. 1532 * Otherwise pick a source based on a route lookup back to the origsrc. 1533 * 1534 * src is the return parameter. Returns a pointer to src or NULL if failure. 1535 */ 1536 static in6_addr_t * 1537 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1538 in6_addr_t *src) 1539 { 1540 ill_t *ill; 1541 ire_t *ire; 1542 ipif_t *ipif; 1543 zoneid_t zoneid; 1544 1545 ASSERT(!(wq->q_flag & QREADR)); 1546 if (wq->q_next != NULL) { 1547 ill = (ill_t *)wq->q_ptr; 1548 zoneid = GLOBAL_ZONEID; 1549 } else { 1550 ill = NULL; 1551 zoneid = Q_TO_CONN(wq)->conn_zoneid; 1552 } 1553 1554 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1555 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1556 if (ire != NULL) { 1557 /* Destined to one of our addresses */ 1558 *src = *origdst; 1559 ire_refrele(ire); 1560 return (src); 1561 } 1562 if (ire != NULL) { 1563 ire_refrele(ire); 1564 ire = NULL; 1565 } 1566 if (ill == NULL) { 1567 /* What is the route back to the original source? */ 1568 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1569 NULL, NULL, zoneid, NULL, 1570 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1571 if (ire == NULL) { 1572 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1573 return (NULL); 1574 } 1575 /* 1576 * Does not matter whether we use ire_stq or ire_ipif here. 1577 * Just pick an ill for ICMP replies. 1578 */ 1579 ASSERT(ire->ire_ipif != NULL); 1580 ill = ire->ire_ipif->ipif_ill; 1581 ire_refrele(ire); 1582 } 1583 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1584 IPV6_PREFER_SRC_DEFAULT, zoneid); 1585 if (ipif != NULL) { 1586 *src = ipif->ipif_v6src_addr; 1587 ipif_refrele(ipif); 1588 return (src); 1589 } 1590 /* 1591 * Unusual case - can't find a usable source address to reach the 1592 * original source. Use what in the route to the source. 1593 */ 1594 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1595 NULL, NULL, zoneid, NULL, 1596 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1597 if (ire == NULL) { 1598 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1599 return (NULL); 1600 } 1601 ASSERT(ire != NULL); 1602 *src = ire->ire_src_addr_v6; 1603 ire_refrele(ire); 1604 return (src); 1605 } 1606 1607 /* 1608 * Build and ship an IPv6 ICMP message using the packet data in mp, 1609 * and the ICMP header pointed to by "stuff". (May be called as 1610 * writer.) 1611 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1612 * verify that an icmp error packet can be sent. 1613 * 1614 * If q is an ill write side queue (which is the case when packets 1615 * arrive from ip_rput) then ip_wput code will ensure that packets to 1616 * link-local destinations are sent out that ill. 1617 * 1618 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1619 * source address (see above function). 1620 */ 1621 static void 1622 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1623 const in6_addr_t *v6src_ptr, boolean_t mctl_present) 1624 { 1625 ip6_t *ip6h; 1626 in6_addr_t v6dst; 1627 size_t len_needed; 1628 size_t msg_len; 1629 mblk_t *mp1; 1630 icmp6_t *icmp6; 1631 ill_t *ill; 1632 in6_addr_t v6src; 1633 mblk_t *ipsec_mp; 1634 ipsec_out_t *io; 1635 1636 ill = ip_queue_to_ill_v6(q); 1637 if (ill == NULL) { 1638 freemsg(mp); 1639 return; 1640 } 1641 1642 if (mctl_present) { 1643 /* 1644 * If it is : 1645 * 1646 * 1) a IPSEC_OUT, then this is caused by outbound 1647 * datagram originating on this host. IPSEC processing 1648 * may or may not have been done. Refer to comments above 1649 * icmp_inbound_error_fanout for details. 1650 * 1651 * 2) a IPSEC_IN if we are generating a icmp_message 1652 * for an incoming datagram destined for us i.e called 1653 * from ip_fanout_send_icmp. 1654 */ 1655 ipsec_info_t *in; 1656 1657 ipsec_mp = mp; 1658 mp = ipsec_mp->b_cont; 1659 1660 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1661 ip6h = (ip6_t *)mp->b_rptr; 1662 1663 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1664 in->ipsec_info_type == IPSEC_IN); 1665 1666 if (in->ipsec_info_type == IPSEC_IN) { 1667 /* 1668 * Convert the IPSEC_IN to IPSEC_OUT. 1669 */ 1670 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1671 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1672 ill_refrele(ill); 1673 return; 1674 } 1675 } else { 1676 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1677 io = (ipsec_out_t *)in; 1678 /* 1679 * Clear out ipsec_out_proc_begin, so we do a fresh 1680 * ire lookup. 1681 */ 1682 io->ipsec_out_proc_begin = B_FALSE; 1683 } 1684 } else { 1685 /* 1686 * This is in clear. The icmp message we are building 1687 * here should go out in clear. 1688 */ 1689 ipsec_in_t *ii; 1690 ASSERT(mp->b_datap->db_type == M_DATA); 1691 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1692 freemsg(mp); 1693 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1694 ill_refrele(ill); 1695 return; 1696 } 1697 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1698 1699 /* This is not a secure packet */ 1700 ii->ipsec_in_secure = B_FALSE; 1701 ipsec_mp->b_cont = mp; 1702 ip6h = (ip6_t *)mp->b_rptr; 1703 /* 1704 * Convert the IPSEC_IN to IPSEC_OUT. 1705 */ 1706 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1707 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1708 ill_refrele(ill); 1709 return; 1710 } 1711 } 1712 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1713 1714 if (v6src_ptr != NULL) { 1715 v6src = *v6src_ptr; 1716 } else { 1717 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1718 &v6src) == NULL) { 1719 freemsg(ipsec_mp); 1720 ill_refrele(ill); 1721 return; 1722 } 1723 } 1724 v6dst = ip6h->ip6_src; 1725 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1726 msg_len = msgdsize(mp); 1727 if (msg_len > len_needed) { 1728 if (!adjmsg(mp, len_needed - msg_len)) { 1729 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1730 freemsg(ipsec_mp); 1731 ill_refrele(ill); 1732 return; 1733 } 1734 msg_len = len_needed; 1735 } 1736 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1737 if (mp1 == NULL) { 1738 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1739 freemsg(ipsec_mp); 1740 ill_refrele(ill); 1741 return; 1742 } 1743 ill_refrele(ill); 1744 mp1->b_cont = mp; 1745 mp = mp1; 1746 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1747 io->ipsec_out_type == IPSEC_OUT); 1748 ipsec_mp->b_cont = mp; 1749 1750 /* 1751 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1752 * node generates be accepted in peace by all on-host destinations. 1753 * If we do NOT assume that all on-host destinations trust 1754 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1755 * (Look for ipsec_out_icmp_loopback). 1756 */ 1757 io->ipsec_out_icmp_loopback = B_TRUE; 1758 1759 ip6h = (ip6_t *)mp->b_rptr; 1760 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1761 1762 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1763 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1764 ip6h->ip6_hops = ipv6_def_hops; 1765 ip6h->ip6_dst = v6dst; 1766 ip6h->ip6_src = v6src; 1767 msg_len += IPV6_HDR_LEN + len; 1768 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1769 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1770 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1771 } 1772 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1773 icmp6 = (icmp6_t *)&ip6h[1]; 1774 bcopy(stuff, (char *)icmp6, len); 1775 /* 1776 * Prepare for checksum by putting icmp length in the icmp 1777 * checksum field. The checksum is calculated in ip_wput_v6. 1778 */ 1779 icmp6->icmp6_cksum = ip6h->ip6_plen; 1780 if (icmp6->icmp6_type == ND_REDIRECT) { 1781 ip6h->ip6_hops = IPV6_MAX_HOPS; 1782 } 1783 /* Send to V6 writeside put routine */ 1784 put(q, ipsec_mp); 1785 } 1786 1787 /* 1788 * Update the output mib when ICMPv6 packets are sent. 1789 */ 1790 static void 1791 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1792 { 1793 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1794 1795 switch (icmp6->icmp6_type) { 1796 case ICMP6_DST_UNREACH: 1797 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1798 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1799 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1800 break; 1801 1802 case ICMP6_TIME_EXCEEDED: 1803 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1804 break; 1805 1806 case ICMP6_PARAM_PROB: 1807 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1808 break; 1809 1810 case ICMP6_PACKET_TOO_BIG: 1811 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1812 break; 1813 1814 case ICMP6_ECHO_REQUEST: 1815 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1816 break; 1817 1818 case ICMP6_ECHO_REPLY: 1819 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1820 break; 1821 1822 case ND_ROUTER_SOLICIT: 1823 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1824 break; 1825 1826 case ND_ROUTER_ADVERT: 1827 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1828 break; 1829 1830 case ND_NEIGHBOR_SOLICIT: 1831 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1832 break; 1833 1834 case ND_NEIGHBOR_ADVERT: 1835 BUMP_MIB(ill->ill_icmp6_mib, 1836 ipv6IfIcmpOutNeighborAdvertisements); 1837 break; 1838 1839 case ND_REDIRECT: 1840 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1841 break; 1842 1843 case MLD_LISTENER_QUERY: 1844 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1845 break; 1846 1847 case MLD_LISTENER_REPORT: 1848 case MLD_V2_LISTENER_REPORT: 1849 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1850 break; 1851 1852 case MLD_LISTENER_REDUCTION: 1853 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1854 break; 1855 } 1856 } 1857 1858 /* 1859 * Check if it is ok to send an ICMPv6 error packet in 1860 * response to the IP packet in mp. 1861 * Free the message and return null if no 1862 * ICMP error packet should be sent. 1863 */ 1864 static mblk_t * 1865 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1866 boolean_t llbcast, boolean_t mcast_ok) 1867 { 1868 ip6_t *ip6h; 1869 1870 if (!mp) 1871 return (NULL); 1872 1873 ip6h = (ip6_t *)mp->b_rptr; 1874 1875 /* Check if source address uniquely identifies the host */ 1876 1877 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1878 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1879 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1880 freemsg(mp); 1881 return (NULL); 1882 } 1883 1884 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1885 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1886 icmp6_t *icmp6; 1887 1888 if (mp->b_wptr - mp->b_rptr < len_needed) { 1889 if (!pullupmsg(mp, len_needed)) { 1890 ill_t *ill; 1891 1892 ill = ip_queue_to_ill_v6(q); 1893 if (ill == NULL) { 1894 BUMP_MIB(&icmp6_mib, 1895 ipv6IfIcmpInErrors); 1896 } else { 1897 BUMP_MIB(ill->ill_icmp6_mib, 1898 ipv6IfIcmpInErrors); 1899 ill_refrele(ill); 1900 } 1901 freemsg(mp); 1902 return (NULL); 1903 } 1904 ip6h = (ip6_t *)mp->b_rptr; 1905 } 1906 icmp6 = (icmp6_t *)&ip6h[1]; 1907 /* Explicitly do not generate errors in response to redirects */ 1908 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1909 icmp6->icmp6_type == ND_REDIRECT) { 1910 freemsg(mp); 1911 return (NULL); 1912 } 1913 } 1914 /* 1915 * Check that the destination is not multicast and that the packet 1916 * was not sent on link layer broadcast or multicast. (Exception 1917 * is Packet too big message as per the draft - when mcast_ok is set.) 1918 */ 1919 if (!mcast_ok && 1920 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1921 freemsg(mp); 1922 return (NULL); 1923 } 1924 if (icmp_err_rate_limit()) { 1925 /* 1926 * Only send ICMP error packets every so often. 1927 * This should be done on a per port/source basis, 1928 * but for now this will suffice. 1929 */ 1930 freemsg(mp); 1931 return (NULL); 1932 } 1933 return (mp); 1934 } 1935 1936 /* 1937 * Generate an ICMPv6 redirect message. 1938 * Include target link layer address option if it exits. 1939 * Always include redirect header. 1940 */ 1941 static void 1942 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1943 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1944 { 1945 nd_redirect_t *rd; 1946 nd_opt_rd_hdr_t *rdh; 1947 uchar_t *buf; 1948 nce_t *nce = NULL; 1949 nd_opt_hdr_t *opt; 1950 int len; 1951 int ll_opt_len = 0; 1952 int max_redir_hdr_data_len; 1953 int pkt_len; 1954 in6_addr_t *srcp; 1955 1956 /* 1957 * We are called from ip_rput where we could 1958 * not have attached an IPSEC_IN. 1959 */ 1960 ASSERT(mp->b_datap->db_type == M_DATA); 1961 1962 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1963 if (mp == NULL) 1964 return; 1965 nce = ndp_lookup(ill, targetp, B_FALSE); 1966 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1967 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1968 ill->ill_phys_addr_length + 7)/8 * 8; 1969 } 1970 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1971 ASSERT(len % 4 == 0); 1972 buf = kmem_alloc(len, KM_NOSLEEP); 1973 if (buf == NULL) { 1974 if (nce != NULL) 1975 NCE_REFRELE(nce); 1976 freemsg(mp); 1977 return; 1978 } 1979 1980 rd = (nd_redirect_t *)buf; 1981 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1982 rd->nd_rd_code = 0; 1983 rd->nd_rd_reserved = 0; 1984 rd->nd_rd_target = *targetp; 1985 rd->nd_rd_dst = *dest; 1986 1987 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1988 if (nce != NULL && ll_opt_len != 0) { 1989 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1990 opt->nd_opt_len = ll_opt_len/8; 1991 bcopy((char *)nce->nce_res_mp->b_rptr + 1992 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1993 ill->ill_phys_addr_length); 1994 } 1995 if (nce != NULL) 1996 NCE_REFRELE(nce); 1997 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1998 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1999 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 2000 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 2001 pkt_len = msgdsize(mp); 2002 /* Make sure mp is 8 byte aligned */ 2003 if (pkt_len > max_redir_hdr_data_len) { 2004 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 2005 sizeof (nd_opt_rd_hdr_t))/8; 2006 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 2007 } else { 2008 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 2009 (void) adjmsg(mp, -(pkt_len % 8)); 2010 } 2011 rdh->nd_opt_rh_reserved1 = 0; 2012 rdh->nd_opt_rh_reserved2 = 0; 2013 /* ipif_v6src_addr contains the link-local source address */ 2014 rw_enter(&ill_g_lock, RW_READER); 2015 if (ill->ill_group != NULL) { 2016 /* 2017 * The receiver of the redirect will verify whether it 2018 * had a route through us (srcp that we will use in 2019 * the redirect) or not. As we load spread even link-locals, 2020 * we don't know which source address the receiver of 2021 * redirect has in its route for communicating with us. 2022 * Thus we randomly choose a source here and finally we 2023 * should get to the right one and it will eventually 2024 * accept the redirect from us. We can't call 2025 * ip_lookup_scope_v6 because we don't have the right 2026 * link-local address here. Thus we randomly choose one. 2027 */ 2028 int cnt = ill->ill_group->illgrp_ill_count; 2029 2030 ill = ill->ill_group->illgrp_ill; 2031 cnt = ++icmp_redirect_v6_src_index % cnt; 2032 while (cnt--) 2033 ill = ill->ill_group_next; 2034 srcp = &ill->ill_ipif->ipif_v6src_addr; 2035 } else { 2036 srcp = &ill->ill_ipif->ipif_v6src_addr; 2037 } 2038 rw_exit(&ill_g_lock); 2039 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE); 2040 kmem_free(buf, len); 2041 } 2042 2043 2044 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 2045 void 2046 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 2047 boolean_t llbcast, boolean_t mcast_ok) 2048 { 2049 icmp6_t icmp6; 2050 boolean_t mctl_present; 2051 mblk_t *first_mp; 2052 2053 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2054 2055 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2056 if (mp == NULL) { 2057 if (mctl_present) 2058 freeb(first_mp); 2059 return; 2060 } 2061 bzero(&icmp6, sizeof (icmp6_t)); 2062 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2063 icmp6.icmp6_code = code; 2064 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2065 } 2066 2067 /* 2068 * Generate an ICMP unreachable message. 2069 */ 2070 void 2071 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2072 boolean_t llbcast, boolean_t mcast_ok) 2073 { 2074 icmp6_t icmp6; 2075 boolean_t mctl_present; 2076 mblk_t *first_mp; 2077 2078 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2079 2080 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2081 if (mp == NULL) { 2082 if (mctl_present) 2083 freeb(first_mp); 2084 return; 2085 } 2086 bzero(&icmp6, sizeof (icmp6_t)); 2087 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2088 icmp6.icmp6_code = code; 2089 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2090 } 2091 2092 /* 2093 * Generate an ICMP pkt too big message. 2094 */ 2095 static void 2096 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2097 boolean_t llbcast, boolean_t mcast_ok) 2098 { 2099 icmp6_t icmp6; 2100 mblk_t *first_mp; 2101 boolean_t mctl_present; 2102 2103 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2104 2105 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2106 if (mp == NULL) { 2107 if (mctl_present) 2108 freeb(first_mp); 2109 return; 2110 } 2111 bzero(&icmp6, sizeof (icmp6_t)); 2112 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2113 icmp6.icmp6_code = 0; 2114 icmp6.icmp6_mtu = htonl(mtu); 2115 2116 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2117 } 2118 2119 /* 2120 * Generate an ICMP parameter problem message. (May be called as writer.) 2121 * 'offset' is the offset from the beginning of the packet in error. 2122 */ 2123 static void 2124 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2125 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok) 2126 { 2127 icmp6_t icmp6; 2128 boolean_t mctl_present; 2129 mblk_t *first_mp; 2130 2131 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2132 2133 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2134 if (mp == NULL) { 2135 if (mctl_present) 2136 freeb(first_mp); 2137 return; 2138 } 2139 bzero((char *)&icmp6, sizeof (icmp6_t)); 2140 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2141 icmp6.icmp6_code = code; 2142 icmp6.icmp6_pptr = htonl(offset); 2143 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2144 } 2145 2146 /* 2147 * This code will need to take into account the possibility of binding 2148 * to a link local address on a multi-homed host, in which case the 2149 * outgoing interface (from the conn) will need to be used when getting 2150 * an ire for the dst. Going through proper outgoing interface and 2151 * choosing the source address corresponding to the outgoing interface 2152 * is necessary when the destination address is a link-local address and 2153 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2154 * This can happen when active connection is setup; thus ipp pointer 2155 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2156 * pointer is passed as ipp pointer. 2157 */ 2158 mblk_t * 2159 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2160 { 2161 ssize_t len; 2162 int protocol; 2163 struct T_bind_req *tbr; 2164 sin6_t *sin6; 2165 ipa6_conn_t *ac6; 2166 in6_addr_t *v6srcp; 2167 in6_addr_t *v6dstp; 2168 uint16_t lport; 2169 uint16_t fport; 2170 uchar_t *ucp; 2171 mblk_t *mp1; 2172 boolean_t ire_requested; 2173 boolean_t ipsec_policy_set; 2174 int error = 0; 2175 boolean_t local_bind; 2176 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2177 ipa6_conn_x_t *acx6; 2178 boolean_t verify_dst; 2179 2180 ASSERT(connp->conn_af_isv6); 2181 len = mp->b_wptr - mp->b_rptr; 2182 if (len < (sizeof (*tbr) + 1)) { 2183 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2184 "ip_bind_v6: bogus msg, len %ld", len); 2185 goto bad_addr; 2186 } 2187 /* Back up and extract the protocol identifier. */ 2188 mp->b_wptr--; 2189 tbr = (struct T_bind_req *)mp->b_rptr; 2190 /* Reset the message type in preparation for shipping it back. */ 2191 mp->b_datap->db_type = M_PCPROTO; 2192 2193 protocol = *mp->b_wptr & 0xFF; 2194 connp->conn_ulp = (uint8_t)protocol; 2195 2196 /* 2197 * Check for a zero length address. This is from a protocol that 2198 * wants to register to receive all packets of its type. 2199 */ 2200 if (tbr->ADDR_length == 0) { 2201 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2202 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2203 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2204 /* 2205 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2206 * Do not allow others to bind to these. 2207 */ 2208 goto bad_addr; 2209 } 2210 2211 /* 2212 * 2213 * The udp module never sends down a zero-length address, 2214 * and allowing this on a labeled system will break MLP 2215 * functionality. 2216 */ 2217 if (is_system_labeled() && protocol == IPPROTO_UDP) 2218 goto bad_addr; 2219 2220 /* Allow ipsec plumbing */ 2221 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2222 protocol != IPPROTO_ESP) 2223 goto bad_addr; 2224 2225 connp->conn_srcv6 = ipv6_all_zeros; 2226 ipcl_proto_insert_v6(connp, protocol); 2227 2228 tbr->PRIM_type = T_BIND_ACK; 2229 return (mp); 2230 } 2231 2232 /* Extract the address pointer from the message. */ 2233 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2234 tbr->ADDR_length); 2235 if (ucp == NULL) { 2236 ip1dbg(("ip_bind_v6: no address\n")); 2237 goto bad_addr; 2238 } 2239 if (!OK_32PTR(ucp)) { 2240 ip1dbg(("ip_bind_v6: unaligned address\n")); 2241 goto bad_addr; 2242 } 2243 mp1 = mp->b_cont; /* trailing mp if any */ 2244 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2245 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2246 2247 switch (tbr->ADDR_length) { 2248 default: 2249 ip1dbg(("ip_bind_v6: bad address length %d\n", 2250 (int)tbr->ADDR_length)); 2251 goto bad_addr; 2252 2253 case IPV6_ADDR_LEN: 2254 /* Verification of local address only */ 2255 v6srcp = (in6_addr_t *)ucp; 2256 lport = 0; 2257 local_bind = B_TRUE; 2258 break; 2259 2260 case sizeof (sin6_t): 2261 sin6 = (sin6_t *)ucp; 2262 v6srcp = &sin6->sin6_addr; 2263 lport = sin6->sin6_port; 2264 local_bind = B_TRUE; 2265 break; 2266 2267 case sizeof (ipa6_conn_t): 2268 /* 2269 * Verify that both the source and destination addresses 2270 * are valid. 2271 * Note that we allow connect to broadcast and multicast 2272 * addresses when ire_requested is set. Thus the ULP 2273 * has to check for IRE_BROADCAST and multicast. 2274 */ 2275 ac6 = (ipa6_conn_t *)ucp; 2276 v6srcp = &ac6->ac6_laddr; 2277 v6dstp = &ac6->ac6_faddr; 2278 fport = ac6->ac6_fport; 2279 /* For raw socket, the local port is not set. */ 2280 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2281 connp->conn_lport; 2282 local_bind = B_FALSE; 2283 /* Always verify destination reachability. */ 2284 verify_dst = B_TRUE; 2285 break; 2286 2287 case sizeof (ipa6_conn_x_t): 2288 /* 2289 * Verify that the source address is valid. 2290 * Note that we allow connect to broadcast and multicast 2291 * addresses when ire_requested is set. Thus the ULP 2292 * has to check for IRE_BROADCAST and multicast. 2293 */ 2294 acx6 = (ipa6_conn_x_t *)ucp; 2295 ac6 = &acx6->ac6x_conn; 2296 v6srcp = &ac6->ac6_laddr; 2297 v6dstp = &ac6->ac6_faddr; 2298 fport = ac6->ac6_fport; 2299 lport = ac6->ac6_lport; 2300 local_bind = B_FALSE; 2301 /* 2302 * Client that passed ipa6_conn_x_t to us specifies whether to 2303 * verify destination reachability. 2304 */ 2305 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2306 break; 2307 } 2308 if (local_bind) { 2309 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2310 /* Bind to IPv4 address */ 2311 ipaddr_t v4src; 2312 2313 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2314 2315 error = ip_bind_laddr(connp, mp, v4src, lport, 2316 ire_requested, ipsec_policy_set, 2317 tbr->ADDR_length != IPV6_ADDR_LEN); 2318 if (error != 0) 2319 goto bad_addr; 2320 connp->conn_pkt_isv6 = B_FALSE; 2321 } else { 2322 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2323 error = 0; 2324 goto bad_addr; 2325 } 2326 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2327 ire_requested, ipsec_policy_set, 2328 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2329 if (error != 0) 2330 goto bad_addr; 2331 connp->conn_pkt_isv6 = B_TRUE; 2332 } 2333 if (protocol == IPPROTO_TCP) 2334 connp->conn_recv = tcp_conn_request; 2335 } else { 2336 /* 2337 * Bind to local and remote address. Local might be 2338 * unspecified in which case it will be extracted from 2339 * ire_src_addr_v6 2340 */ 2341 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2342 /* Connect to IPv4 address */ 2343 ipaddr_t v4src; 2344 ipaddr_t v4dst; 2345 2346 /* Is the source unspecified or mapped? */ 2347 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2348 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2349 ip1dbg(("ip_bind_v6: " 2350 "dst is mapped, but not the src\n")); 2351 goto bad_addr; 2352 } 2353 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2354 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2355 2356 /* 2357 * XXX Fix needed. Need to pass ipsec_policy_set 2358 * instead of B_FALSE. 2359 */ 2360 2361 /* Always verify destination reachability. */ 2362 error = ip_bind_connected(connp, mp, &v4src, lport, 2363 v4dst, fport, ire_requested, ipsec_policy_set, 2364 B_TRUE, B_TRUE); 2365 if (error != 0) 2366 goto bad_addr; 2367 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2368 connp->conn_pkt_isv6 = B_FALSE; 2369 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2370 ip1dbg(("ip_bind_v6: " 2371 "src is mapped, but not the dst\n")); 2372 goto bad_addr; 2373 } else { 2374 error = ip_bind_connected_v6(connp, mp, v6srcp, 2375 lport, v6dstp, ipp, fport, ire_requested, 2376 ipsec_policy_set, B_TRUE, verify_dst); 2377 if (error != 0) 2378 goto bad_addr; 2379 connp->conn_pkt_isv6 = B_TRUE; 2380 } 2381 if (protocol == IPPROTO_TCP) 2382 connp->conn_recv = tcp_input; 2383 } 2384 /* Update qinfo if v4/v6 changed */ 2385 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2386 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2387 if (connp->conn_pkt_isv6) 2388 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2389 else 2390 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2391 } 2392 2393 /* 2394 * Pass the IPSEC headers size in ire_ipsec_overhead. 2395 * We can't do this in ip_bind_insert_ire because the policy 2396 * may not have been inherited at that point in time and hence 2397 * conn_out_enforce_policy may not be set. 2398 */ 2399 mp1 = mp->b_cont; 2400 if (ire_requested && connp->conn_out_enforce_policy && 2401 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2402 ire_t *ire = (ire_t *)mp1->b_rptr; 2403 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2404 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2405 } 2406 2407 /* Send it home. */ 2408 mp->b_datap->db_type = M_PCPROTO; 2409 tbr->PRIM_type = T_BIND_ACK; 2410 return (mp); 2411 2412 bad_addr: 2413 if (error == EINPROGRESS) 2414 return (NULL); 2415 if (error > 0) 2416 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2417 else 2418 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2419 return (mp); 2420 } 2421 2422 /* 2423 * Here address is verified to be a valid local address. 2424 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2425 * address is also considered a valid local address. 2426 * In the case of a multicast address, however, the 2427 * upper protocol is expected to reset the src address 2428 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2429 * no packets are emitted with multicast address as 2430 * source address. 2431 * The addresses valid for bind are: 2432 * (1) - in6addr_any 2433 * (2) - IP address of an UP interface 2434 * (3) - IP address of a DOWN interface 2435 * (4) - a multicast address. In this case 2436 * the conn will only receive packets destined to 2437 * the specified multicast address. Note: the 2438 * application still has to issue an 2439 * IPV6_JOIN_GROUP socket option. 2440 * 2441 * In all the above cases, the bound address must be valid in the current zone. 2442 * When the address is loopback or multicast, there might be many matching IREs 2443 * so bind has to look up based on the zone. 2444 */ 2445 static int 2446 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2447 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2448 boolean_t fanout_insert) 2449 { 2450 int error = 0; 2451 ire_t *src_ire = NULL; 2452 ipif_t *ipif = NULL; 2453 mblk_t *policy_mp; 2454 zoneid_t zoneid; 2455 2456 if (ipsec_policy_set) 2457 policy_mp = mp->b_cont; 2458 2459 /* 2460 * If it was previously connected, conn_fully_bound would have 2461 * been set. 2462 */ 2463 connp->conn_fully_bound = B_FALSE; 2464 2465 zoneid = connp->conn_zoneid; 2466 2467 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2468 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2469 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2470 /* 2471 * If an address other than in6addr_any is requested, 2472 * we verify that it is a valid address for bind 2473 * Note: Following code is in if-else-if form for 2474 * readability compared to a condition check. 2475 */ 2476 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2477 if (IRE_IS_LOCAL(src_ire)) { 2478 /* 2479 * (2) Bind to address of local UP interface 2480 */ 2481 ipif = src_ire->ire_ipif; 2482 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2483 ipif_t *multi_ipif = NULL; 2484 ire_t *save_ire; 2485 /* 2486 * (4) bind to multicast address. 2487 * Fake out the IRE returned to upper 2488 * layer to be a broadcast IRE in 2489 * ip_bind_insert_ire_v6(). 2490 * Pass other information that matches 2491 * the ipif (e.g. the source address). 2492 * conn_multicast_ill is only used for 2493 * IPv6 packets 2494 */ 2495 mutex_enter(&connp->conn_lock); 2496 if (connp->conn_multicast_ill != NULL) { 2497 (void) ipif_lookup_zoneid( 2498 connp->conn_multicast_ill, zoneid, 0, 2499 &multi_ipif); 2500 } else { 2501 /* 2502 * Look for default like 2503 * ip_wput_v6 2504 */ 2505 multi_ipif = ipif_lookup_group_v6( 2506 &ipv6_unspecified_group, zoneid); 2507 } 2508 mutex_exit(&connp->conn_lock); 2509 save_ire = src_ire; 2510 src_ire = NULL; 2511 if (multi_ipif == NULL || !ire_requested || 2512 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2513 src_ire = save_ire; 2514 error = EADDRNOTAVAIL; 2515 } else { 2516 ASSERT(src_ire != NULL); 2517 if (save_ire != NULL) 2518 ire_refrele(save_ire); 2519 } 2520 if (multi_ipif != NULL) 2521 ipif_refrele(multi_ipif); 2522 } else { 2523 *mp->b_wptr++ = (char)connp->conn_ulp; 2524 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2525 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2526 if (ipif == NULL) { 2527 if (error == EINPROGRESS) { 2528 if (src_ire != NULL) 2529 ire_refrele(src_ire); 2530 return (error); 2531 } 2532 /* 2533 * Not a valid address for bind 2534 */ 2535 error = EADDRNOTAVAIL; 2536 } else { 2537 ipif_refrele(ipif); 2538 } 2539 /* 2540 * Just to keep it consistent with the processing in 2541 * ip_bind_v6(). 2542 */ 2543 mp->b_wptr--; 2544 } 2545 2546 if (error != 0) { 2547 /* Red Alert! Attempting to be a bogon! */ 2548 if (ip_debug > 2) { 2549 /* ip1dbg */ 2550 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2551 " address %s\n", AF_INET6, v6src); 2552 } 2553 goto bad_addr; 2554 } 2555 } 2556 2557 /* 2558 * Allow setting new policies. For example, disconnects come 2559 * down as ipa_t bind. As we would have set conn_policy_cached 2560 * to B_TRUE before, we should set it to B_FALSE, so that policy 2561 * can change after the disconnect. 2562 */ 2563 connp->conn_policy_cached = B_FALSE; 2564 2565 /* If not fanout_insert this was just an address verification */ 2566 if (fanout_insert) { 2567 /* 2568 * The addresses have been verified. Time to insert in 2569 * the correct fanout list. 2570 */ 2571 connp->conn_srcv6 = *v6src; 2572 connp->conn_remv6 = ipv6_all_zeros; 2573 connp->conn_lport = lport; 2574 connp->conn_fport = 0; 2575 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2576 } 2577 if (error == 0) { 2578 if (ire_requested) { 2579 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2580 error = -1; 2581 goto bad_addr; 2582 } 2583 } else if (ipsec_policy_set) { 2584 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2585 error = -1; 2586 goto bad_addr; 2587 } 2588 } 2589 } 2590 bad_addr: 2591 if (error != 0) { 2592 if (connp->conn_anon_port) { 2593 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2594 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2595 B_FALSE); 2596 } 2597 connp->conn_mlp_type = mlptSingle; 2598 } 2599 2600 if (src_ire != NULL) 2601 ire_refrele(src_ire); 2602 2603 if (ipsec_policy_set) { 2604 ASSERT(policy_mp != NULL); 2605 freeb(policy_mp); 2606 /* 2607 * As of now assume that nothing else accompanies 2608 * IPSEC_POLICY_SET. 2609 */ 2610 mp->b_cont = NULL; 2611 } 2612 return (error); 2613 } 2614 2615 /* ARGSUSED */ 2616 static void 2617 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2618 void *dummy_arg) 2619 { 2620 conn_t *connp = NULL; 2621 t_scalar_t prim; 2622 2623 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2624 2625 if (CONN_Q(q)) 2626 connp = Q_TO_CONN(q); 2627 ASSERT(connp != NULL); 2628 2629 prim = ((union T_primitives *)mp->b_rptr)->type; 2630 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2631 2632 if (IPCL_IS_TCP(connp)) { 2633 /* Pass sticky_ipp for scope_id and pktinfo */ 2634 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2635 } else { 2636 /* For UDP and ICMP */ 2637 mp = ip_bind_v6(q, mp, connp, NULL); 2638 } 2639 if (mp != NULL) { 2640 if (IPCL_IS_TCP(connp)) { 2641 CONN_INC_REF(connp); 2642 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2643 connp, SQTAG_TCP_RPUTOTHER); 2644 } else if (IPCL_IS_UDP(connp)) { 2645 udp_resume_bind(connp, mp); 2646 } else { 2647 qreply(q, mp); 2648 CONN_OPER_PENDING_DONE(connp); 2649 } 2650 } 2651 } 2652 2653 /* 2654 * Verify that both the source and destination addresses 2655 * are valid. If verify_dst, then destination address must also be reachable, 2656 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2657 * It takes ip6_pkt_t * as one of the arguments to determine correct 2658 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2659 * destination address. Note that parameter ipp is only useful for TCP connect 2660 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2661 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2662 * 2663 */ 2664 static int 2665 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2666 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2667 boolean_t ire_requested, boolean_t ipsec_policy_set, 2668 boolean_t fanout_insert, boolean_t verify_dst) 2669 { 2670 ire_t *src_ire; 2671 ire_t *dst_ire; 2672 int error = 0; 2673 int protocol; 2674 mblk_t *policy_mp; 2675 ire_t *sire = NULL; 2676 ire_t *md_dst_ire = NULL; 2677 ill_t *md_ill = NULL; 2678 ill_t *dst_ill = NULL; 2679 ipif_t *src_ipif = NULL; 2680 zoneid_t zoneid; 2681 boolean_t ill_held = B_FALSE; 2682 2683 src_ire = dst_ire = NULL; 2684 /* 2685 * NOTE: The protocol is beyond the wptr because that's how 2686 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2687 */ 2688 protocol = *mp->b_wptr & 0xFF; 2689 2690 /* 2691 * If we never got a disconnect before, clear it now. 2692 */ 2693 connp->conn_fully_bound = B_FALSE; 2694 2695 if (ipsec_policy_set) { 2696 policy_mp = mp->b_cont; 2697 } 2698 2699 zoneid = connp->conn_zoneid; 2700 2701 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2702 ipif_t *ipif; 2703 2704 /* 2705 * Use an "emulated" IRE_BROADCAST to tell the transport it 2706 * is a multicast. 2707 * Pass other information that matches 2708 * the ipif (e.g. the source address). 2709 * 2710 * conn_multicast_ill is only used for IPv6 packets 2711 */ 2712 mutex_enter(&connp->conn_lock); 2713 if (connp->conn_multicast_ill != NULL) { 2714 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2715 zoneid, 0, &ipif); 2716 } else { 2717 /* Look for default like ip_wput_v6 */ 2718 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2719 } 2720 mutex_exit(&connp->conn_lock); 2721 if (ipif == NULL || !ire_requested || 2722 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2723 if (ipif != NULL) 2724 ipif_refrele(ipif); 2725 if (ip_debug > 2) { 2726 /* ip1dbg */ 2727 pr_addr_dbg("ip_bind_connected_v6: bad " 2728 "connected multicast %s\n", AF_INET6, 2729 v6dst); 2730 } 2731 error = ENETUNREACH; 2732 goto bad_addr; 2733 } 2734 if (ipif != NULL) 2735 ipif_refrele(ipif); 2736 } else { 2737 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2738 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2739 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2740 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR); 2741 /* 2742 * We also prevent ire's with src address INADDR_ANY to 2743 * be used, which are created temporarily for 2744 * sending out packets from endpoints that have 2745 * conn_unspec_src set. 2746 */ 2747 if (dst_ire == NULL || 2748 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2749 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2750 /* 2751 * When verifying destination reachability, we always 2752 * complain. 2753 * 2754 * When not verifying destination reachability but we 2755 * found an IRE, i.e. the destination is reachable, 2756 * then the other tests still apply and we complain. 2757 */ 2758 if (verify_dst || (dst_ire != NULL)) { 2759 if (ip_debug > 2) { 2760 /* ip1dbg */ 2761 pr_addr_dbg("ip_bind_connected_v6: bad" 2762 " connected dst %s\n", AF_INET6, 2763 v6dst); 2764 } 2765 if (dst_ire == NULL || 2766 !(dst_ire->ire_type & IRE_HOST)) { 2767 error = ENETUNREACH; 2768 } else { 2769 error = EHOSTUNREACH; 2770 } 2771 goto bad_addr; 2772 } 2773 } 2774 } 2775 2776 /* 2777 * We now know that routing will allow us to reach the destination. 2778 * Check whether Trusted Solaris policy allows communication with this 2779 * host, and pretend that the destination is unreachable if not. 2780 * 2781 * This is never a problem for TCP, since that transport is known to 2782 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2783 * handling. If the remote is unreachable, it will be detected at that 2784 * point, so there's no reason to check it here. 2785 * 2786 * Note that for sendto (and other datagram-oriented friends), this 2787 * check is done as part of the data path label computation instead. 2788 * The check here is just to make non-TCP connect() report the right 2789 * error. 2790 */ 2791 if (dst_ire != NULL && is_system_labeled() && 2792 !IPCL_IS_TCP(connp) && 2793 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2794 connp->conn_mac_exempt) != 0) { 2795 error = EHOSTUNREACH; 2796 if (ip_debug > 2) { 2797 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2798 AF_INET6, v6dst); 2799 } 2800 goto bad_addr; 2801 } 2802 2803 /* 2804 * If the app does a connect(), it means that it will most likely 2805 * send more than 1 packet to the destination. It makes sense 2806 * to clear the temporary flag. 2807 */ 2808 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2809 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2810 irb_t *irb = dst_ire->ire_bucket; 2811 2812 rw_enter(&irb->irb_lock, RW_WRITER); 2813 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2814 irb->irb_tmp_ire_cnt--; 2815 rw_exit(&irb->irb_lock); 2816 } 2817 2818 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2819 2820 /* 2821 * See if we should notify ULP about MDT; we do this whether or not 2822 * ire_requested is TRUE, in order to handle active connects; MDT 2823 * eligibility tests for passive connects are handled separately 2824 * through tcp_adapt_ire(). We do this before the source address 2825 * selection, because dst_ire may change after a call to 2826 * ipif_select_source_v6(). This is a best-effort check, as the 2827 * packet for this connection may not actually go through 2828 * dst_ire->ire_stq, and the exact IRE can only be known after 2829 * calling ip_newroute_v6(). This is why we further check on the 2830 * IRE during Multidata packet transmission in tcp_multisend(). 2831 */ 2832 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2833 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2834 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2835 ILL_MDT_CAPABLE(md_ill)) { 2836 md_dst_ire = dst_ire; 2837 IRE_REFHOLD(md_dst_ire); 2838 } 2839 2840 if (dst_ire != NULL && 2841 dst_ire->ire_type == IRE_LOCAL && 2842 dst_ire->ire_zoneid != zoneid && 2843 dst_ire->ire_zoneid != ALL_ZONES) { 2844 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2845 zoneid, 0, NULL, 2846 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2847 MATCH_IRE_RJ_BHOLE); 2848 if (src_ire == NULL) { 2849 error = EHOSTUNREACH; 2850 goto bad_addr; 2851 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2852 if (!(src_ire->ire_type & IRE_HOST)) 2853 error = ENETUNREACH; 2854 else 2855 error = EHOSTUNREACH; 2856 goto bad_addr; 2857 } 2858 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2859 src_ipif = src_ire->ire_ipif; 2860 ipif_refhold(src_ipif); 2861 *v6src = src_ipif->ipif_v6lcl_addr; 2862 } 2863 ire_refrele(src_ire); 2864 src_ire = NULL; 2865 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2866 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2867 *v6src = sire->ire_src_addr_v6; 2868 ire_refrele(dst_ire); 2869 dst_ire = sire; 2870 sire = NULL; 2871 } else if (dst_ire->ire_type == IRE_CACHE && 2872 (dst_ire->ire_flags & RTF_SETSRC)) { 2873 ASSERT(dst_ire->ire_zoneid == zoneid || 2874 dst_ire->ire_zoneid == ALL_ZONES); 2875 *v6src = dst_ire->ire_src_addr_v6; 2876 } else { 2877 /* 2878 * Pick a source address so that a proper inbound load 2879 * spreading would happen. Use dst_ill specified by the 2880 * app. when socket option or scopeid is set. 2881 */ 2882 int err; 2883 2884 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2885 uint_t if_index; 2886 2887 /* 2888 * Scope id or IPV6_PKTINFO 2889 */ 2890 2891 if_index = ipp->ipp_ifindex; 2892 dst_ill = ill_lookup_on_ifindex( 2893 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2894 if (dst_ill == NULL) { 2895 ip1dbg(("ip_bind_connected_v6:" 2896 " bad ifindex %d\n", if_index)); 2897 error = EADDRNOTAVAIL; 2898 goto bad_addr; 2899 } 2900 ill_held = B_TRUE; 2901 } else if (connp->conn_outgoing_ill != NULL) { 2902 /* 2903 * For IPV6_BOUND_IF socket option, 2904 * conn_outgoing_ill should be set 2905 * already in TCP or UDP/ICMP. 2906 */ 2907 dst_ill = conn_get_held_ill(connp, 2908 &connp->conn_outgoing_ill, &err); 2909 if (err == ILL_LOOKUP_FAILED) { 2910 ip1dbg(("ip_bind_connected_v6:" 2911 "no ill for bound_if\n")); 2912 error = EADDRNOTAVAIL; 2913 goto bad_addr; 2914 } 2915 ill_held = B_TRUE; 2916 } else if (dst_ire->ire_stq != NULL) { 2917 /* No need to hold ill here */ 2918 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2919 } else { 2920 /* No need to hold ill here */ 2921 dst_ill = dst_ire->ire_ipif->ipif_ill; 2922 } 2923 if (!ip6_asp_can_lookup()) { 2924 *mp->b_wptr++ = (char)protocol; 2925 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2926 ip_bind_connected_resume_v6); 2927 error = EINPROGRESS; 2928 goto refrele_and_quit; 2929 } 2930 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2931 RESTRICT_TO_NONE, connp->conn_src_preferences, 2932 zoneid); 2933 ip6_asp_table_refrele(); 2934 if (src_ipif == NULL) { 2935 pr_addr_dbg("ip_bind_connected_v6: " 2936 "no usable source address for " 2937 "connection to %s\n", AF_INET6, v6dst); 2938 error = EADDRNOTAVAIL; 2939 goto bad_addr; 2940 } 2941 *v6src = src_ipif->ipif_v6lcl_addr; 2942 } 2943 } 2944 2945 /* 2946 * We do ire_route_lookup_v6() here (and not an interface lookup) 2947 * as we assert that v6src should only come from an 2948 * UP interface for hard binding. 2949 */ 2950 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2951 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2952 2953 /* src_ire must be a local|loopback */ 2954 if (!IRE_IS_LOCAL(src_ire)) { 2955 if (ip_debug > 2) { 2956 /* ip1dbg */ 2957 pr_addr_dbg("ip_bind_connected_v6: bad " 2958 "connected src %s\n", AF_INET6, v6src); 2959 } 2960 error = EADDRNOTAVAIL; 2961 goto bad_addr; 2962 } 2963 2964 /* 2965 * If the source address is a loopback address, the 2966 * destination had best be local or multicast. 2967 * The transports that can't handle multicast will reject 2968 * those addresses. 2969 */ 2970 if (src_ire->ire_type == IRE_LOOPBACK && 2971 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2972 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2973 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2974 error = -1; 2975 goto bad_addr; 2976 } 2977 /* 2978 * Allow setting new policies. For example, disconnects come 2979 * down as ipa_t bind. As we would have set conn_policy_cached 2980 * to B_TRUE before, we should set it to B_FALSE, so that policy 2981 * can change after the disconnect. 2982 */ 2983 connp->conn_policy_cached = B_FALSE; 2984 2985 /* 2986 * The addresses have been verified. Initialize the conn 2987 * before calling the policy as they expect the conns 2988 * initialized. 2989 */ 2990 connp->conn_srcv6 = *v6src; 2991 connp->conn_remv6 = *v6dst; 2992 connp->conn_lport = lport; 2993 connp->conn_fport = fport; 2994 2995 ASSERT(!(ipsec_policy_set && ire_requested)); 2996 if (ire_requested) { 2997 iulp_t *ulp_info = NULL; 2998 2999 /* 3000 * Note that sire will not be NULL if this is an off-link 3001 * connection and there is not cache for that dest yet. 3002 * 3003 * XXX Because of an existing bug, if there are multiple 3004 * default routes, the IRE returned now may not be the actual 3005 * default route used (default routes are chosen in a 3006 * round robin fashion). So if the metrics for different 3007 * default routes are different, we may return the wrong 3008 * metrics. This will not be a problem if the existing 3009 * bug is fixed. 3010 */ 3011 if (sire != NULL) 3012 ulp_info = &(sire->ire_uinfo); 3013 3014 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 3015 error = -1; 3016 goto bad_addr; 3017 } 3018 } else if (ipsec_policy_set) { 3019 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 3020 error = -1; 3021 goto bad_addr; 3022 } 3023 } 3024 3025 /* 3026 * Cache IPsec policy in this conn. If we have per-socket policy, 3027 * we'll cache that. If we don't, we'll inherit global policy. 3028 * 3029 * We can't insert until the conn reflects the policy. Note that 3030 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 3031 * connections where we don't have a policy. This is to prevent 3032 * global policy lookups in the inbound path. 3033 * 3034 * If we insert before we set conn_policy_cached, 3035 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 3036 * because global policy cound be non-empty. We normally call 3037 * ipsec_check_policy() for conn_policy_cached connections only if 3038 * conn_in_enforce_policy is set. But in this case, 3039 * conn_policy_cached can get set anytime since we made the 3040 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 3041 * is called, which will make the above assumption false. Thus, we 3042 * need to insert after we set conn_policy_cached. 3043 */ 3044 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 3045 goto bad_addr; 3046 3047 /* If not fanout_insert this was just an address verification */ 3048 if (fanout_insert) { 3049 /* 3050 * The addresses have been verified. Time to insert in 3051 * the correct fanout list. 3052 */ 3053 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 3054 connp->conn_ports, 3055 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 3056 } 3057 if (error == 0) { 3058 connp->conn_fully_bound = B_TRUE; 3059 /* 3060 * Our initial checks for MDT have passed; the IRE is not 3061 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 3062 * be supporting MDT. Pass the IRE, IPC and ILL into 3063 * ip_mdinfo_return(), which performs further checks 3064 * against them and upon success, returns the MDT info 3065 * mblk which we will attach to the bind acknowledgment. 3066 */ 3067 if (md_dst_ire != NULL) { 3068 mblk_t *mdinfo_mp; 3069 3070 ASSERT(md_ill != NULL); 3071 ASSERT(md_ill->ill_mdt_capab != NULL); 3072 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 3073 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 3074 linkb(mp, mdinfo_mp); 3075 } 3076 } 3077 bad_addr: 3078 if (ipsec_policy_set) { 3079 ASSERT(policy_mp != NULL); 3080 freeb(policy_mp); 3081 /* 3082 * As of now assume that nothing else accompanies 3083 * IPSEC_POLICY_SET. 3084 */ 3085 mp->b_cont = NULL; 3086 } 3087 refrele_and_quit: 3088 if (src_ire != NULL) 3089 IRE_REFRELE(src_ire); 3090 if (dst_ire != NULL) 3091 IRE_REFRELE(dst_ire); 3092 if (sire != NULL) 3093 IRE_REFRELE(sire); 3094 if (src_ipif != NULL) 3095 ipif_refrele(src_ipif); 3096 if (md_dst_ire != NULL) 3097 IRE_REFRELE(md_dst_ire); 3098 if (ill_held && dst_ill != NULL) 3099 ill_refrele(dst_ill); 3100 return (error); 3101 } 3102 3103 /* 3104 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3105 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3106 */ 3107 static boolean_t 3108 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3109 iulp_t *ulp_info) 3110 { 3111 mblk_t *mp1; 3112 ire_t *ret_ire; 3113 3114 mp1 = mp->b_cont; 3115 ASSERT(mp1 != NULL); 3116 3117 if (ire != NULL) { 3118 /* 3119 * mp1 initialized above to IRE_DB_REQ_TYPE 3120 * appended mblk. Its <upper protocol>'s 3121 * job to make sure there is room. 3122 */ 3123 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3124 return (B_FALSE); 3125 3126 mp1->b_datap->db_type = IRE_DB_TYPE; 3127 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3128 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3129 ret_ire = (ire_t *)mp1->b_rptr; 3130 if (IN6_IS_ADDR_MULTICAST(dst) || 3131 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3132 ret_ire->ire_type = IRE_BROADCAST; 3133 ret_ire->ire_addr_v6 = *dst; 3134 } 3135 if (ulp_info != NULL) { 3136 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3137 sizeof (iulp_t)); 3138 } 3139 ret_ire->ire_mp = mp1; 3140 } else { 3141 /* 3142 * No IRE was found. Remove IRE mblk. 3143 */ 3144 mp->b_cont = mp1->b_cont; 3145 freeb(mp1); 3146 } 3147 return (B_TRUE); 3148 } 3149 3150 /* 3151 * Add an ip6i_t header to the front of the mblk. 3152 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3153 * Returns NULL if allocation fails (and frees original message). 3154 * Used in outgoing path when going through ip_newroute_*v6(). 3155 * Used in incoming path to pass ifindex to transports. 3156 */ 3157 mblk_t * 3158 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3159 { 3160 mblk_t *mp1; 3161 ip6i_t *ip6i; 3162 ip6_t *ip6h; 3163 3164 ip6h = (ip6_t *)mp->b_rptr; 3165 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3166 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3167 mp->b_datap->db_ref > 1) { 3168 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3169 if (mp1 == NULL) { 3170 freemsg(mp); 3171 return (NULL); 3172 } 3173 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3174 mp1->b_cont = mp; 3175 mp = mp1; 3176 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3177 } 3178 mp->b_rptr = (uchar_t *)ip6i; 3179 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3180 ip6i->ip6i_nxt = IPPROTO_RAW; 3181 if (ill != NULL) { 3182 ip6i->ip6i_flags = IP6I_IFINDEX; 3183 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3184 } else { 3185 ip6i->ip6i_flags = 0; 3186 } 3187 ip6i->ip6i_nexthop = *dst; 3188 return (mp); 3189 } 3190 3191 /* 3192 * Handle protocols with which IP is less intimate. There 3193 * can be more than one stream bound to a particular 3194 * protocol. When this is the case, normally each one gets a copy 3195 * of any incoming packets. 3196 * However, if the packet was tunneled and not multicast we only send to it 3197 * the first match. 3198 * 3199 * Zones notes: 3200 * Packets will be distributed to streams in all zones. This is really only 3201 * useful for ICMPv6 as only applications in the global zone can create raw 3202 * sockets for other protocols. 3203 */ 3204 static void 3205 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3206 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3207 boolean_t mctl_present, zoneid_t zoneid) 3208 { 3209 queue_t *rq; 3210 mblk_t *mp1, *first_mp1; 3211 in6_addr_t dst = ip6h->ip6_dst; 3212 in6_addr_t src = ip6h->ip6_src; 3213 boolean_t one_only; 3214 mblk_t *first_mp = mp; 3215 boolean_t secure, shared_addr; 3216 conn_t *connp, *first_connp, *next_connp; 3217 connf_t *connfp; 3218 3219 if (mctl_present) { 3220 mp = first_mp->b_cont; 3221 secure = ipsec_in_is_secure(first_mp); 3222 ASSERT(mp != NULL); 3223 } else { 3224 secure = B_FALSE; 3225 } 3226 3227 /* 3228 * If the packet was tunneled and not multicast we only send to it 3229 * the first match. 3230 */ 3231 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3232 !IN6_IS_ADDR_MULTICAST(&dst)); 3233 3234 shared_addr = (zoneid == ALL_ZONES); 3235 if (shared_addr) { 3236 /* 3237 * We don't allow multilevel ports for raw IP, so no need to 3238 * check for that here. 3239 */ 3240 zoneid = tsol_packet_to_zoneid(mp); 3241 } 3242 3243 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3244 mutex_enter(&connfp->connf_lock); 3245 connp = connfp->connf_head; 3246 for (connp = connfp->connf_head; connp != NULL; 3247 connp = connp->conn_next) { 3248 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3249 zoneid) && 3250 (!is_system_labeled() || 3251 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3252 connp))) 3253 break; 3254 } 3255 3256 if (connp == NULL || connp->conn_upq == NULL) { 3257 /* 3258 * No one bound to this port. Is 3259 * there a client that wants all 3260 * unclaimed datagrams? 3261 */ 3262 mutex_exit(&connfp->connf_lock); 3263 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3264 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3265 nexthdr_offset, mctl_present, zoneid)) { 3266 BUMP_MIB(ill->ill_ip6_mib, ipv6InUnknownProtos); 3267 } 3268 3269 return; 3270 } 3271 3272 CONN_INC_REF(connp); 3273 first_connp = connp; 3274 3275 /* 3276 * XXX: Fix the multiple protocol listeners case. We should not 3277 * be walking the conn->next list here. 3278 */ 3279 if (one_only) { 3280 /* 3281 * Only send message to one tunnel driver by immediately 3282 * terminating the loop. 3283 */ 3284 connp = NULL; 3285 } else { 3286 connp = connp->conn_next; 3287 3288 } 3289 for (;;) { 3290 while (connp != NULL) { 3291 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3292 flags, zoneid) && 3293 (!is_system_labeled() || 3294 tsol_receive_local(mp, &dst, IPV6_VERSION, 3295 shared_addr, connp))) 3296 break; 3297 connp = connp->conn_next; 3298 } 3299 3300 /* 3301 * Just copy the data part alone. The mctl part is 3302 * needed just for verifying policy and it is never 3303 * sent up. 3304 */ 3305 if (connp == NULL || connp->conn_upq == NULL || 3306 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3307 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3308 /* 3309 * No more intested clients or memory 3310 * allocation failed 3311 */ 3312 connp = first_connp; 3313 break; 3314 } 3315 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3316 CONN_INC_REF(connp); 3317 mutex_exit(&connfp->connf_lock); 3318 rq = connp->conn_rq; 3319 /* 3320 * For link-local always add ifindex so that transport can set 3321 * sin6_scope_id. Avoid it for ICMP error fanout. 3322 */ 3323 if ((connp->conn_ipv6_recvpktinfo || 3324 IN6_IS_ADDR_LINKLOCAL(&src)) && 3325 (flags & IP_FF_IP6INFO)) { 3326 /* Add header */ 3327 mp1 = ip_add_info_v6(mp1, inill, &dst); 3328 } 3329 if (mp1 == NULL) { 3330 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3331 } else if (!canputnext(rq)) { 3332 if (flags & IP_FF_RAWIP) { 3333 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3334 } else { 3335 BUMP_MIB(ill->ill_icmp6_mib, 3336 ipv6IfIcmpInOverflows); 3337 } 3338 3339 freemsg(mp1); 3340 } else { 3341 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3342 first_mp1 = ipsec_check_inbound_policy 3343 (first_mp1, connp, NULL, ip6h, 3344 mctl_present); 3345 } 3346 if (first_mp1 != NULL) { 3347 if (mctl_present) 3348 freeb(first_mp1); 3349 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3350 putnext(rq, mp1); 3351 } 3352 } 3353 mutex_enter(&connfp->connf_lock); 3354 /* Follow the next pointer before releasing the conn. */ 3355 next_connp = connp->conn_next; 3356 CONN_DEC_REF(connp); 3357 connp = next_connp; 3358 } 3359 3360 /* Last one. Send it upstream. */ 3361 mutex_exit(&connfp->connf_lock); 3362 3363 /* Initiate IPPF processing */ 3364 if (IP6_IN_IPP(flags)) { 3365 uint_t ifindex; 3366 3367 mutex_enter(&ill->ill_lock); 3368 ifindex = ill->ill_phyint->phyint_ifindex; 3369 mutex_exit(&ill->ill_lock); 3370 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3371 if (mp == NULL) { 3372 CONN_DEC_REF(connp); 3373 if (mctl_present) 3374 freeb(first_mp); 3375 return; 3376 } 3377 } 3378 3379 /* 3380 * For link-local always add ifindex so that transport can set 3381 * sin6_scope_id. Avoid it for ICMP error fanout. 3382 */ 3383 if ((connp->conn_ipv6_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3384 (flags & IP_FF_IP6INFO)) { 3385 /* Add header */ 3386 mp = ip_add_info_v6(mp, inill, &dst); 3387 if (mp == NULL) { 3388 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3389 CONN_DEC_REF(connp); 3390 if (mctl_present) 3391 freeb(first_mp); 3392 return; 3393 } else if (mctl_present) { 3394 first_mp->b_cont = mp; 3395 } else { 3396 first_mp = mp; 3397 } 3398 } 3399 3400 rq = connp->conn_rq; 3401 if (!canputnext(rq)) { 3402 if (flags & IP_FF_RAWIP) { 3403 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3404 } else { 3405 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3406 } 3407 3408 freemsg(first_mp); 3409 } else { 3410 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3411 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3412 NULL, ip6h, mctl_present); 3413 if (first_mp == NULL) { 3414 CONN_DEC_REF(connp); 3415 return; 3416 } 3417 } 3418 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3419 putnext(rq, mp); 3420 if (mctl_present) 3421 freeb(first_mp); 3422 } 3423 CONN_DEC_REF(connp); 3424 } 3425 3426 /* 3427 * Send an ICMP error after patching up the packet appropriately. Returns 3428 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3429 */ 3430 int 3431 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3432 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3433 boolean_t mctl_present, zoneid_t zoneid) 3434 { 3435 ip6_t *ip6h; 3436 mblk_t *first_mp; 3437 boolean_t secure; 3438 unsigned char db_type; 3439 3440 first_mp = mp; 3441 if (mctl_present) { 3442 mp = mp->b_cont; 3443 secure = ipsec_in_is_secure(first_mp); 3444 ASSERT(mp != NULL); 3445 } else { 3446 /* 3447 * If this is an ICMP error being reported - which goes 3448 * up as M_CTLs, we need to convert them to M_DATA till 3449 * we finish checking with global policy because 3450 * ipsec_check_global_policy() assumes M_DATA as clear 3451 * and M_CTL as secure. 3452 */ 3453 db_type = mp->b_datap->db_type; 3454 mp->b_datap->db_type = M_DATA; 3455 secure = B_FALSE; 3456 } 3457 /* 3458 * We are generating an icmp error for some inbound packet. 3459 * Called from all ip_fanout_(udp, tcp, proto) functions. 3460 * Before we generate an error, check with global policy 3461 * to see whether this is allowed to enter the system. As 3462 * there is no "conn", we are checking with global policy. 3463 */ 3464 ip6h = (ip6_t *)mp->b_rptr; 3465 if (secure || ipsec_inbound_v6_policy_present) { 3466 first_mp = ipsec_check_global_policy(first_mp, NULL, 3467 NULL, ip6h, mctl_present); 3468 if (first_mp == NULL) 3469 return (0); 3470 } 3471 3472 if (!mctl_present) 3473 mp->b_datap->db_type = db_type; 3474 3475 if (flags & IP_FF_SEND_ICMP) { 3476 if (flags & IP_FF_HDR_COMPLETE) { 3477 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3478 freemsg(first_mp); 3479 return (1); 3480 } 3481 } 3482 switch (icmp_type) { 3483 case ICMP6_DST_UNREACH: 3484 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3485 B_FALSE, B_FALSE); 3486 break; 3487 case ICMP6_PARAM_PROB: 3488 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3489 nexthdr_offset, B_FALSE, B_FALSE); 3490 break; 3491 default: 3492 #ifdef DEBUG 3493 panic("ip_fanout_send_icmp_v6: wrong type"); 3494 /*NOTREACHED*/ 3495 #else 3496 freemsg(first_mp); 3497 break; 3498 #endif 3499 } 3500 } else { 3501 freemsg(first_mp); 3502 return (0); 3503 } 3504 3505 return (1); 3506 } 3507 3508 3509 /* 3510 * Fanout for TCP packets 3511 * The caller puts <fport, lport> in the ports parameter. 3512 */ 3513 static void 3514 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3515 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3516 { 3517 mblk_t *first_mp; 3518 boolean_t secure; 3519 conn_t *connp; 3520 tcph_t *tcph; 3521 boolean_t syn_present = B_FALSE; 3522 3523 first_mp = mp; 3524 if (mctl_present) { 3525 mp = first_mp->b_cont; 3526 secure = ipsec_in_is_secure(first_mp); 3527 ASSERT(mp != NULL); 3528 } else { 3529 secure = B_FALSE; 3530 } 3531 3532 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3533 3534 if (connp == NULL || 3535 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3536 /* 3537 * No hard-bound match. Send Reset. 3538 */ 3539 dblk_t *dp = mp->b_datap; 3540 uint32_t ill_index; 3541 3542 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3543 3544 /* Initiate IPPf processing, if needed. */ 3545 if (IPP_ENABLED(IPP_LOCAL_IN) && (flags & IP6_NO_IPPOLICY)) { 3546 ill_index = ill->ill_phyint->phyint_ifindex; 3547 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3548 if (first_mp == NULL) { 3549 if (connp != NULL) 3550 CONN_DEC_REF(connp); 3551 return; 3552 } 3553 } 3554 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3555 tcp_xmit_listeners_reset(first_mp, hdr_len); 3556 if (connp != NULL) 3557 CONN_DEC_REF(connp); 3558 return; 3559 } 3560 3561 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3562 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3563 if (connp->conn_flags & IPCL_TCP) { 3564 squeue_t *sqp; 3565 3566 /* 3567 * For fused tcp loopback, assign the eager's 3568 * squeue to be that of the active connect's. 3569 */ 3570 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3571 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3572 !IP6_IN_IPP(flags)) { 3573 ASSERT(Q_TO_CONN(q) != NULL); 3574 sqp = Q_TO_CONN(q)->conn_sqp; 3575 } else { 3576 sqp = IP_SQUEUE_GET(lbolt); 3577 } 3578 3579 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3580 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3581 3582 /* 3583 * db_cksumstuff is unused in the incoming 3584 * path; Thus store the ifindex here. It will 3585 * be cleared in tcp_conn_create_v6(). 3586 */ 3587 DB_CKSUMSTUFF(mp) = 3588 (intptr_t)ill->ill_phyint->phyint_ifindex; 3589 syn_present = B_TRUE; 3590 } 3591 } 3592 3593 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3594 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3595 if ((flags & TH_RST) || (flags & TH_URG)) { 3596 CONN_DEC_REF(connp); 3597 freemsg(first_mp); 3598 return; 3599 } 3600 if (flags & TH_ACK) { 3601 tcp_xmit_listeners_reset(first_mp, hdr_len); 3602 CONN_DEC_REF(connp); 3603 return; 3604 } 3605 3606 CONN_DEC_REF(connp); 3607 freemsg(first_mp); 3608 return; 3609 } 3610 3611 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3612 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3613 NULL, ip6h, mctl_present); 3614 if (first_mp == NULL) { 3615 CONN_DEC_REF(connp); 3616 return; 3617 } 3618 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3619 ASSERT(syn_present); 3620 if (mctl_present) { 3621 ASSERT(first_mp != mp); 3622 first_mp->b_datap->db_struioflag |= 3623 STRUIO_POLICY; 3624 } else { 3625 ASSERT(first_mp == mp); 3626 mp->b_datap->db_struioflag &= 3627 ~STRUIO_EAGER; 3628 mp->b_datap->db_struioflag |= 3629 STRUIO_POLICY; 3630 } 3631 } else { 3632 /* 3633 * Discard first_mp early since we're dealing with a 3634 * fully-connected conn_t and tcp doesn't do policy in 3635 * this case. Also, if someone is bound to IPPROTO_TCP 3636 * over raw IP, they don't expect to see a M_CTL. 3637 */ 3638 if (mctl_present) { 3639 freeb(first_mp); 3640 mctl_present = B_FALSE; 3641 } 3642 first_mp = mp; 3643 } 3644 } 3645 3646 /* Initiate IPPF processing */ 3647 if (IP6_IN_IPP(flags)) { 3648 uint_t ifindex; 3649 3650 mutex_enter(&ill->ill_lock); 3651 ifindex = ill->ill_phyint->phyint_ifindex; 3652 mutex_exit(&ill->ill_lock); 3653 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3654 if (mp == NULL) { 3655 CONN_DEC_REF(connp); 3656 if (mctl_present) { 3657 freeb(first_mp); 3658 } 3659 return; 3660 } else if (mctl_present) { 3661 /* 3662 * ip_add_info_v6 might return a new mp. 3663 */ 3664 ASSERT(first_mp != mp); 3665 first_mp->b_cont = mp; 3666 } else { 3667 first_mp = mp; 3668 } 3669 } 3670 3671 /* 3672 * For link-local always add ifindex so that TCP can bind to that 3673 * interface. Avoid it for ICMP error fanout. 3674 */ 3675 if (!syn_present && ((connp->conn_ipv6_recvpktinfo || 3676 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3677 (flags & IP_FF_IP6INFO))) { 3678 /* Add header */ 3679 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3680 if (mp == NULL) { 3681 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3682 CONN_DEC_REF(connp); 3683 if (mctl_present) 3684 freeb(first_mp); 3685 return; 3686 } else if (mctl_present) { 3687 ASSERT(first_mp != mp); 3688 first_mp->b_cont = mp; 3689 } else { 3690 first_mp = mp; 3691 } 3692 } 3693 3694 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3695 if (IPCL_IS_TCP(connp)) { 3696 (*ip_input_proc)(connp->conn_sqp, first_mp, 3697 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3698 } else { 3699 putnext(connp->conn_rq, first_mp); 3700 CONN_DEC_REF(connp); 3701 } 3702 } 3703 3704 /* 3705 * Fanout for UDP packets. 3706 * The caller puts <fport, lport> in the ports parameter. 3707 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3708 * 3709 * If SO_REUSEADDR is set all multicast and broadcast packets 3710 * will be delivered to all streams bound to the same port. 3711 * 3712 * Zones notes: 3713 * Multicast packets will be distributed to streams in all zones. 3714 */ 3715 static void 3716 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3717 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3718 zoneid_t zoneid) 3719 { 3720 uint32_t dstport, srcport; 3721 in6_addr_t dst; 3722 mblk_t *first_mp; 3723 boolean_t secure; 3724 conn_t *connp; 3725 connf_t *connfp; 3726 conn_t *first_conn; 3727 conn_t *next_conn; 3728 mblk_t *mp1, *first_mp1; 3729 in6_addr_t src; 3730 boolean_t shared_addr; 3731 3732 first_mp = mp; 3733 if (mctl_present) { 3734 mp = first_mp->b_cont; 3735 secure = ipsec_in_is_secure(first_mp); 3736 ASSERT(mp != NULL); 3737 } else { 3738 secure = B_FALSE; 3739 } 3740 3741 /* Extract ports in net byte order */ 3742 dstport = htons(ntohl(ports) & 0xFFFF); 3743 srcport = htons(ntohl(ports) >> 16); 3744 dst = ip6h->ip6_dst; 3745 src = ip6h->ip6_src; 3746 3747 shared_addr = (zoneid == ALL_ZONES); 3748 if (shared_addr) { 3749 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3750 /* 3751 * If no shared MLP is found, tsol_mlp_findzone returns 3752 * ALL_ZONES. In that case, we assume it's SLP, and 3753 * search for the zone based on the packet label. 3754 * That will also return ALL_ZONES on failure, but 3755 * we never allow conn_zoneid to be set to ALL_ZONES. 3756 */ 3757 if (zoneid == ALL_ZONES) 3758 zoneid = tsol_packet_to_zoneid(mp); 3759 } 3760 3761 /* Attempt to find a client stream based on destination port. */ 3762 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3763 mutex_enter(&connfp->connf_lock); 3764 connp = connfp->connf_head; 3765 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3766 /* 3767 * Not multicast. Send to the one (first) client we find. 3768 */ 3769 while (connp != NULL) { 3770 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3771 src) && connp->conn_zoneid == zoneid && 3772 conn_wantpacket_v6(connp, ill, ip6h, 3773 flags, zoneid)) { 3774 break; 3775 } 3776 connp = connp->conn_next; 3777 } 3778 if (connp == NULL || connp->conn_upq == NULL) 3779 goto notfound; 3780 3781 if (is_system_labeled() && 3782 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3783 connp)) 3784 goto notfound; 3785 3786 /* Found a client */ 3787 CONN_INC_REF(connp); 3788 mutex_exit(&connfp->connf_lock); 3789 3790 if (CONN_UDP_FLOWCTLD(connp)) { 3791 freemsg(first_mp); 3792 CONN_DEC_REF(connp); 3793 return; 3794 } 3795 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3796 first_mp = ipsec_check_inbound_policy(first_mp, 3797 connp, NULL, ip6h, mctl_present); 3798 if (first_mp == NULL) { 3799 CONN_DEC_REF(connp); 3800 return; 3801 } 3802 } 3803 /* Initiate IPPF processing */ 3804 if (IP6_IN_IPP(flags)) { 3805 uint_t ifindex; 3806 3807 mutex_enter(&ill->ill_lock); 3808 ifindex = ill->ill_phyint->phyint_ifindex; 3809 mutex_exit(&ill->ill_lock); 3810 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3811 if (mp == NULL) { 3812 CONN_DEC_REF(connp); 3813 if (mctl_present) 3814 freeb(first_mp); 3815 return; 3816 } 3817 } 3818 /* 3819 * For link-local always add ifindex so that 3820 * transport can set sin6_scope_id. Avoid it for 3821 * ICMP error fanout. 3822 */ 3823 if ((connp->conn_ipv6_recvpktinfo || 3824 IN6_IS_ADDR_LINKLOCAL(&src)) && 3825 (flags & IP_FF_IP6INFO)) { 3826 /* Add header */ 3827 mp = ip_add_info_v6(mp, inill, &dst); 3828 if (mp == NULL) { 3829 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3830 CONN_DEC_REF(connp); 3831 if (mctl_present) 3832 freeb(first_mp); 3833 return; 3834 } else if (mctl_present) { 3835 first_mp->b_cont = mp; 3836 } else { 3837 first_mp = mp; 3838 } 3839 } 3840 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3841 3842 /* Send it upstream */ 3843 CONN_UDP_RECV(connp, mp); 3844 3845 IP6_STAT(ip6_udp_fannorm); 3846 CONN_DEC_REF(connp); 3847 if (mctl_present) 3848 freeb(first_mp); 3849 return; 3850 } 3851 3852 while (connp != NULL) { 3853 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3854 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3855 (!is_system_labeled() || 3856 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3857 connp))) 3858 break; 3859 connp = connp->conn_next; 3860 } 3861 3862 if (connp == NULL || connp->conn_upq == NULL) 3863 goto notfound; 3864 3865 first_conn = connp; 3866 3867 CONN_INC_REF(connp); 3868 connp = connp->conn_next; 3869 for (;;) { 3870 while (connp != NULL) { 3871 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3872 src) && conn_wantpacket_v6(connp, ill, ip6h, 3873 flags, zoneid) && 3874 (!is_system_labeled() || 3875 tsol_receive_local(mp, &dst, IPV6_VERSION, 3876 shared_addr, connp))) 3877 break; 3878 connp = connp->conn_next; 3879 } 3880 /* 3881 * Just copy the data part alone. The mctl part is 3882 * needed just for verifying policy and it is never 3883 * sent up. 3884 */ 3885 if (connp == NULL || 3886 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3887 ((first_mp1 = ip_copymsg(first_mp)) 3888 == NULL))) { 3889 /* 3890 * No more interested clients or memory 3891 * allocation failed 3892 */ 3893 connp = first_conn; 3894 break; 3895 } 3896 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3897 CONN_INC_REF(connp); 3898 mutex_exit(&connfp->connf_lock); 3899 /* 3900 * For link-local always add ifindex so that transport 3901 * can set sin6_scope_id. Avoid it for ICMP error 3902 * fanout. 3903 */ 3904 if ((connp->conn_ipv6_recvpktinfo || 3905 IN6_IS_ADDR_LINKLOCAL(&src)) && 3906 (flags & IP_FF_IP6INFO)) { 3907 /* Add header */ 3908 mp1 = ip_add_info_v6(mp1, inill, &dst); 3909 } 3910 /* mp1 could have changed */ 3911 if (mctl_present) 3912 first_mp1->b_cont = mp1; 3913 else 3914 first_mp1 = mp1; 3915 if (mp1 == NULL) { 3916 if (mctl_present) 3917 freeb(first_mp1); 3918 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3919 goto next_one; 3920 } 3921 if (CONN_UDP_FLOWCTLD(connp)) { 3922 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3923 freemsg(first_mp1); 3924 goto next_one; 3925 } 3926 3927 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3928 secure) { 3929 first_mp1 = ipsec_check_inbound_policy 3930 (first_mp1, connp, NULL, ip6h, 3931 mctl_present); 3932 } 3933 if (first_mp1 != NULL) { 3934 if (mctl_present) 3935 freeb(first_mp1); 3936 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3937 3938 /* Send it upstream */ 3939 CONN_UDP_RECV(connp, mp1); 3940 } 3941 next_one: 3942 mutex_enter(&connfp->connf_lock); 3943 /* Follow the next pointer before releasing the conn. */ 3944 next_conn = connp->conn_next; 3945 IP6_STAT(ip6_udp_fanmb); 3946 CONN_DEC_REF(connp); 3947 connp = next_conn; 3948 } 3949 3950 /* Last one. Send it upstream. */ 3951 mutex_exit(&connfp->connf_lock); 3952 3953 /* Initiate IPPF processing */ 3954 if (IP6_IN_IPP(flags)) { 3955 uint_t ifindex; 3956 3957 mutex_enter(&ill->ill_lock); 3958 ifindex = ill->ill_phyint->phyint_ifindex; 3959 mutex_exit(&ill->ill_lock); 3960 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3961 if (mp == NULL) { 3962 CONN_DEC_REF(connp); 3963 if (mctl_present) { 3964 freeb(first_mp); 3965 } 3966 return; 3967 } 3968 } 3969 3970 /* 3971 * For link-local always add ifindex so that transport can set 3972 * sin6_scope_id. Avoid it for ICMP error fanout. 3973 */ 3974 if ((connp->conn_ipv6_recvpktinfo || 3975 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IP6INFO)) { 3976 /* Add header */ 3977 mp = ip_add_info_v6(mp, inill, &dst); 3978 if (mp == NULL) { 3979 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3980 CONN_DEC_REF(connp); 3981 if (mctl_present) 3982 freeb(first_mp); 3983 return; 3984 } else if (mctl_present) { 3985 first_mp->b_cont = mp; 3986 } else { 3987 first_mp = mp; 3988 } 3989 } 3990 if (CONN_UDP_FLOWCTLD(connp)) { 3991 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3992 freemsg(mp); 3993 } else { 3994 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3995 first_mp = ipsec_check_inbound_policy(first_mp, 3996 connp, NULL, ip6h, mctl_present); 3997 if (first_mp == NULL) { 3998 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3999 CONN_DEC_REF(connp); 4000 return; 4001 } 4002 } 4003 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 4004 4005 /* Send it upstream */ 4006 CONN_UDP_RECV(connp, mp); 4007 } 4008 IP6_STAT(ip6_udp_fanmb); 4009 CONN_DEC_REF(connp); 4010 if (mctl_present) 4011 freeb(first_mp); 4012 return; 4013 4014 notfound: 4015 mutex_exit(&connfp->connf_lock); 4016 /* 4017 * No one bound to this port. Is 4018 * there a client that wants all 4019 * unclaimed datagrams? 4020 */ 4021 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4022 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4023 0, flags | IP_FF_RAWIP | IP_FF_IP6INFO, mctl_present, 4024 zoneid); 4025 } else { 4026 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4027 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4028 mctl_present, zoneid)) { 4029 BUMP_MIB(&ip_mib, udpNoPorts); 4030 } 4031 } 4032 } 4033 4034 /* 4035 * int ip_find_hdr_v6() 4036 * 4037 * This routine is used by the upper layer protocols and the IP tunnel 4038 * module to: 4039 * - Set extension header pointers to appropriate locations 4040 * - Determine IPv6 header length and return it 4041 * - Return a pointer to the last nexthdr value 4042 * 4043 * The caller must initialize ipp_fields. 4044 * 4045 * NOTE: If multiple extension headers of the same type are present, 4046 * ip_find_hdr_v6() will set the respective extension header pointers 4047 * to the first one that it encounters in the IPv6 header. It also 4048 * skips fragment headers. This routine deals with malformed packets 4049 * of various sorts in which case the returned length is up to the 4050 * malformed part. 4051 */ 4052 int 4053 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4054 { 4055 uint_t length, ehdrlen; 4056 uint8_t nexthdr; 4057 uint8_t *whereptr, *endptr; 4058 ip6_dest_t *tmpdstopts; 4059 ip6_rthdr_t *tmprthdr; 4060 ip6_hbh_t *tmphopopts; 4061 ip6_frag_t *tmpfraghdr; 4062 4063 length = IPV6_HDR_LEN; 4064 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4065 endptr = mp->b_wptr; 4066 4067 nexthdr = ip6h->ip6_nxt; 4068 while (whereptr < endptr) { 4069 /* Is there enough left for len + nexthdr? */ 4070 if (whereptr + MIN_EHDR_LEN > endptr) 4071 goto done; 4072 4073 switch (nexthdr) { 4074 case IPPROTO_HOPOPTS: 4075 tmphopopts = (ip6_hbh_t *)whereptr; 4076 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4077 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4078 goto done; 4079 nexthdr = tmphopopts->ip6h_nxt; 4080 /* return only 1st hbh */ 4081 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4082 ipp->ipp_fields |= IPPF_HOPOPTS; 4083 ipp->ipp_hopopts = tmphopopts; 4084 ipp->ipp_hopoptslen = ehdrlen; 4085 } 4086 break; 4087 case IPPROTO_DSTOPTS: 4088 tmpdstopts = (ip6_dest_t *)whereptr; 4089 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4090 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4091 goto done; 4092 nexthdr = tmpdstopts->ip6d_nxt; 4093 /* 4094 * ipp_dstopts is set to the destination header after a 4095 * routing header. 4096 * Assume it is a post-rthdr destination header 4097 * and adjust when we find an rthdr. 4098 */ 4099 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4100 ipp->ipp_fields |= IPPF_DSTOPTS; 4101 ipp->ipp_dstopts = tmpdstopts; 4102 ipp->ipp_dstoptslen = ehdrlen; 4103 } 4104 break; 4105 case IPPROTO_ROUTING: 4106 tmprthdr = (ip6_rthdr_t *)whereptr; 4107 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4108 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4109 goto done; 4110 nexthdr = tmprthdr->ip6r_nxt; 4111 /* return only 1st rthdr */ 4112 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4113 ipp->ipp_fields |= IPPF_RTHDR; 4114 ipp->ipp_rthdr = tmprthdr; 4115 ipp->ipp_rthdrlen = ehdrlen; 4116 } 4117 /* 4118 * Make any destination header we've seen be a 4119 * pre-rthdr destination header. 4120 */ 4121 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4122 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4123 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4124 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4125 ipp->ipp_dstopts = NULL; 4126 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4127 ipp->ipp_dstoptslen = 0; 4128 } 4129 break; 4130 case IPPROTO_FRAGMENT: 4131 /* 4132 * Fragment headers are skipped. Currently, only 4133 * IP cares for their existence. If anyone other 4134 * than IP ever has the need to know about the 4135 * location of fragment headers, support can be 4136 * added to the ip6_pkt_t at that time. 4137 */ 4138 tmpfraghdr = (ip6_frag_t *)whereptr; 4139 ehdrlen = sizeof (ip6_frag_t); 4140 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4141 goto done; 4142 nexthdr = tmpfraghdr->ip6f_nxt; 4143 break; 4144 case IPPROTO_NONE: 4145 default: 4146 goto done; 4147 } 4148 length += ehdrlen; 4149 whereptr += ehdrlen; 4150 } 4151 done: 4152 if (nexthdrp != NULL) 4153 *nexthdrp = nexthdr; 4154 return (length); 4155 } 4156 4157 int 4158 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4159 { 4160 ire_t *ire; 4161 4162 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4163 ire = ire_lookup_local_v6(zoneid); 4164 if (ire == NULL) { 4165 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4166 return (1); 4167 } 4168 ip6h->ip6_src = ire->ire_addr_v6; 4169 ire_refrele(ire); 4170 } 4171 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4172 ip6h->ip6_hops = ipv6_def_hops; 4173 return (0); 4174 } 4175 4176 /* 4177 * Try to determine where and what are the IPv6 header length and 4178 * pointer to nexthdr value for the upper layer protocol (or an 4179 * unknown next hdr). 4180 * 4181 * Parameters returns a pointer to the nexthdr value; 4182 * Must handle malformed packets of various sorts. 4183 * Function returns failure for malformed cases. 4184 */ 4185 boolean_t 4186 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4187 uint8_t **nexthdrpp) 4188 { 4189 uint16_t length; 4190 uint_t ehdrlen; 4191 uint8_t *nexthdrp; 4192 uint8_t *whereptr; 4193 uint8_t *endptr; 4194 ip6_dest_t *desthdr; 4195 ip6_rthdr_t *rthdr; 4196 ip6_frag_t *fraghdr; 4197 4198 length = IPV6_HDR_LEN; 4199 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4200 endptr = mp->b_wptr; 4201 4202 nexthdrp = &ip6h->ip6_nxt; 4203 while (whereptr < endptr) { 4204 /* Is there enough left for len + nexthdr? */ 4205 if (whereptr + MIN_EHDR_LEN > endptr) 4206 break; 4207 4208 switch (*nexthdrp) { 4209 case IPPROTO_HOPOPTS: 4210 case IPPROTO_DSTOPTS: 4211 /* Assumes the headers are identical for hbh and dst */ 4212 desthdr = (ip6_dest_t *)whereptr; 4213 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4214 if ((uchar_t *)desthdr + ehdrlen > endptr) 4215 return (B_FALSE); 4216 nexthdrp = &desthdr->ip6d_nxt; 4217 break; 4218 case IPPROTO_ROUTING: 4219 rthdr = (ip6_rthdr_t *)whereptr; 4220 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4221 if ((uchar_t *)rthdr + ehdrlen > endptr) 4222 return (B_FALSE); 4223 nexthdrp = &rthdr->ip6r_nxt; 4224 break; 4225 case IPPROTO_FRAGMENT: 4226 fraghdr = (ip6_frag_t *)whereptr; 4227 ehdrlen = sizeof (ip6_frag_t); 4228 if ((uchar_t *)&fraghdr[1] > endptr) 4229 return (B_FALSE); 4230 nexthdrp = &fraghdr->ip6f_nxt; 4231 break; 4232 case IPPROTO_NONE: 4233 /* No next header means we're finished */ 4234 default: 4235 *hdr_length_ptr = length; 4236 *nexthdrpp = nexthdrp; 4237 return (B_TRUE); 4238 } 4239 length += ehdrlen; 4240 whereptr += ehdrlen; 4241 *hdr_length_ptr = length; 4242 *nexthdrpp = nexthdrp; 4243 } 4244 switch (*nexthdrp) { 4245 case IPPROTO_HOPOPTS: 4246 case IPPROTO_DSTOPTS: 4247 case IPPROTO_ROUTING: 4248 case IPPROTO_FRAGMENT: 4249 /* 4250 * If any know extension headers are still to be processed, 4251 * the packet's malformed (or at least all the IP header(s) are 4252 * not in the same mblk - and that should never happen. 4253 */ 4254 return (B_FALSE); 4255 4256 default: 4257 /* 4258 * If we get here, we know that all of the IP headers were in 4259 * the same mblk, even if the ULP header is in the next mblk. 4260 */ 4261 *hdr_length_ptr = length; 4262 *nexthdrpp = nexthdrp; 4263 return (B_TRUE); 4264 } 4265 } 4266 4267 /* 4268 * Return the length of the IPv6 related headers (including extension headers) 4269 * Returns a length even if the packet is malformed. 4270 */ 4271 int 4272 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4273 { 4274 uint16_t hdr_len; 4275 uint8_t *nexthdrp; 4276 4277 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4278 return (hdr_len); 4279 } 4280 4281 /* 4282 * Select an ill for the packet by considering load spreading across 4283 * a different ill in the group if dst_ill is part of some group. 4284 */ 4285 static ill_t * 4286 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4287 { 4288 ill_t *ill; 4289 4290 /* 4291 * We schedule irrespective of whether the source address is 4292 * INADDR_UNSPECIED or not. 4293 */ 4294 ill = illgrp_scheduler(dst_ill); 4295 if (ill == NULL) 4296 return (NULL); 4297 4298 /* 4299 * For groups with names ip_sioctl_groupname ensures that all 4300 * ills are of same type. For groups without names, ifgrp_insert 4301 * ensures this. 4302 */ 4303 ASSERT(dst_ill->ill_type == ill->ill_type); 4304 4305 return (ill); 4306 } 4307 4308 /* 4309 * IPv6 - 4310 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4311 * to send out a packet to a destination address for which we do not have 4312 * specific routing information. 4313 * 4314 * Handle non-multicast packets. If ill is non-NULL the match is done 4315 * for that ill. 4316 * 4317 * When a specific ill is specified (using IPV6_PKTINFO, 4318 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4319 * on routing entries (ftable and ctable) that have a matching 4320 * ire->ire_ipif->ipif_ill. Thus this can only be used 4321 * for destinations that are on-link for the specific ill 4322 * and that can appear on multiple links. Thus it is useful 4323 * for multicast destinations, link-local destinations, and 4324 * at some point perhaps for site-local destinations (if the 4325 * node sits at a site boundary). 4326 * We create the cache entries in the regular ctable since 4327 * it can not "confuse" things for other destinations. 4328 * table. 4329 * 4330 * When ill is part of a ill group, we subject the packets 4331 * to load spreading even if the ill is specified by the 4332 * means described above. We disable only for IPV6_BOUND_PIF 4333 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4334 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4335 * set. 4336 * 4337 * NOTE : These are the scopes of some of the variables that point at IRE, 4338 * which needs to be followed while making any future modifications 4339 * to avoid memory leaks. 4340 * 4341 * - ire and sire are the entries looked up initially by 4342 * ire_ftable_lookup_v6. 4343 * - ipif_ire is used to hold the interface ire associated with 4344 * the new cache ire. But it's scope is limited, so we always REFRELE 4345 * it before branching out to error paths. 4346 * - save_ire is initialized before ire_create, so that ire returned 4347 * by ire_create will not over-write the ire. We REFRELE save_ire 4348 * before breaking out of the switch. 4349 * 4350 * Thus on failures, we have to REFRELE only ire and sire, if they 4351 * are not NULL. 4352 * 4353 * v6srcp may be used in the future. Currently unused. 4354 */ 4355 /* ARGSUSED */ 4356 void 4357 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4358 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4359 { 4360 in6_addr_t v6gw; 4361 in6_addr_t dst; 4362 ire_t *ire = NULL; 4363 ipif_t *src_ipif = NULL; 4364 ill_t *dst_ill = NULL; 4365 ire_t *sire = NULL; 4366 ire_t *save_ire; 4367 mblk_t *dlureq_mp; 4368 ip6_t *ip6h; 4369 int err = 0; 4370 mblk_t *first_mp; 4371 ipsec_out_t *io; 4372 ill_t *attach_ill = NULL; 4373 ushort_t ire_marks = 0; 4374 int match_flags; 4375 boolean_t ip6i_present; 4376 ire_t *first_sire = NULL; 4377 mblk_t *copy_mp = NULL; 4378 mblk_t *xmit_mp = NULL; 4379 in6_addr_t save_dst; 4380 uint32_t multirt_flags = 4381 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4382 boolean_t multirt_is_resolvable; 4383 boolean_t multirt_resolve_next; 4384 boolean_t need_rele = B_FALSE; 4385 boolean_t do_attach_ill = B_FALSE; 4386 boolean_t ip6_asp_table_held = B_FALSE; 4387 tsol_ire_gw_secattr_t *attrp = NULL; 4388 tsol_gcgrp_t *gcgrp = NULL; 4389 tsol_gcgrp_addr_t ga; 4390 4391 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4392 4393 first_mp = mp; 4394 if (mp->b_datap->db_type == M_CTL) { 4395 mp = mp->b_cont; 4396 io = (ipsec_out_t *)first_mp->b_rptr; 4397 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4398 } else { 4399 io = NULL; 4400 } 4401 4402 /* 4403 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4404 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4405 * could be NULL. 4406 * 4407 * This information can appear either in an ip6i_t or an IPSEC_OUT 4408 * message. 4409 */ 4410 ip6h = (ip6_t *)mp->b_rptr; 4411 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4412 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4413 if (!ip6i_present || 4414 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4415 attach_ill = ip_grab_attach_ill(ill, first_mp, 4416 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4417 io->ipsec_out_ill_index), B_TRUE); 4418 /* Failure case frees things for us. */ 4419 if (attach_ill == NULL) 4420 return; 4421 4422 /* 4423 * Check if we need an ire that will not be 4424 * looked up by anybody else i.e. HIDDEN. 4425 */ 4426 if (ill_is_probeonly(attach_ill)) 4427 ire_marks = IRE_MARK_HIDDEN; 4428 } 4429 } 4430 4431 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4432 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4433 goto icmp_err_ret; 4434 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4435 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4436 goto icmp_err_ret; 4437 } 4438 4439 /* 4440 * If this IRE is created for forwarding or it is not for 4441 * TCP traffic, mark it as temporary. 4442 * 4443 * Is it sufficient just to check the next header?? 4444 */ 4445 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4446 ire_marks |= IRE_MARK_TEMPORARY; 4447 4448 /* 4449 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4450 * chain until it gets the most specific information available. 4451 * For example, we know that there is no IRE_CACHE for this dest, 4452 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4453 * ire_ftable_lookup_v6 will look up the gateway, etc. 4454 */ 4455 4456 if (ill == NULL) { 4457 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4458 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4459 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4460 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4461 match_flags); 4462 /* 4463 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4464 * in a NULL ill, but the packet could be a neighbor 4465 * solicitation/advertisment and could have a valid attach_ill. 4466 */ 4467 if (attach_ill != NULL) 4468 ill_refrele(attach_ill); 4469 } else { 4470 if (attach_ill != NULL) { 4471 /* 4472 * attach_ill is set only for communicating with 4473 * on-link hosts. So, don't look for DEFAULT. 4474 * ip_wput_v6 passes the right ill in this case and 4475 * hence we can assert. 4476 */ 4477 ASSERT(ill == attach_ill); 4478 ill_refrele(attach_ill); 4479 do_attach_ill = B_TRUE; 4480 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4481 } else { 4482 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4483 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4484 } 4485 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4486 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4487 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags); 4488 } 4489 4490 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4491 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4492 4493 if (zoneid == ALL_ZONES && ire != NULL) { 4494 /* 4495 * In the forwarding case, we can use a route from any zone 4496 * since we won't change the source address. We can easily 4497 * assert that the source address is already set when there's no 4498 * ip6_info header - otherwise we'd have to call pullupmsg(). 4499 */ 4500 ASSERT(ip6i_present || 4501 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4502 zoneid = ire->ire_zoneid; 4503 } 4504 4505 /* 4506 * We enter a loop that will be run only once in most cases. 4507 * The loop is re-entered in the case where the destination 4508 * can be reached through multiple RTF_MULTIRT-flagged routes. 4509 * The intention is to compute multiple routes to a single 4510 * destination in a single ip_newroute_v6 call. 4511 * The information is contained in sire->ire_flags. 4512 */ 4513 do { 4514 multirt_resolve_next = B_FALSE; 4515 4516 if (dst_ill != NULL) { 4517 ill_refrele(dst_ill); 4518 dst_ill = NULL; 4519 } 4520 if (src_ipif != NULL) { 4521 ipif_refrele(src_ipif); 4522 src_ipif = NULL; 4523 } 4524 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4525 ip3dbg(("ip_newroute_v6: starting new resolution " 4526 "with first_mp %p, tag %d\n", 4527 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4528 4529 /* 4530 * We check if there are trailing unresolved routes for 4531 * the destination contained in sire. 4532 */ 4533 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4534 &sire, multirt_flags, MBLK_GETLABEL(mp)); 4535 4536 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4537 "ire %p, sire %p\n", 4538 multirt_is_resolvable, (void *)ire, (void *)sire)); 4539 4540 if (!multirt_is_resolvable) { 4541 /* 4542 * No more multirt routes to resolve; give up 4543 * (all routes resolved or no more resolvable 4544 * routes). 4545 */ 4546 if (ire != NULL) { 4547 ire_refrele(ire); 4548 ire = NULL; 4549 } 4550 } else { 4551 ASSERT(sire != NULL); 4552 ASSERT(ire != NULL); 4553 /* 4554 * We simply use first_sire as a flag that 4555 * indicates if a resolvable multirt route has 4556 * already been found during the preceding 4557 * loops. If it is not the case, we may have 4558 * to send an ICMP error to report that the 4559 * destination is unreachable. We do not 4560 * IRE_REFHOLD first_sire. 4561 */ 4562 if (first_sire == NULL) { 4563 first_sire = sire; 4564 } 4565 } 4566 } 4567 if ((ire == NULL) || (ire == sire)) { 4568 /* 4569 * either ire == NULL (the destination cannot be 4570 * resolved) or ire == sire (the gateway cannot be 4571 * resolved). At this point, there are no more routes 4572 * to resolve for the destination, thus we exit. 4573 */ 4574 if (ip_debug > 3) { 4575 /* ip2dbg */ 4576 pr_addr_dbg("ip_newroute_v6: " 4577 "can't resolve %s\n", AF_INET6, v6dstp); 4578 } 4579 ip3dbg(("ip_newroute_v6: " 4580 "ire %p, sire %p, first_sire %p\n", 4581 (void *)ire, (void *)sire, (void *)first_sire)); 4582 4583 if (sire != NULL) { 4584 ire_refrele(sire); 4585 sire = NULL; 4586 } 4587 4588 if (first_sire != NULL) { 4589 /* 4590 * At least one multirt route has been found 4591 * in the same ip_newroute() call; there is no 4592 * need to report an ICMP error. 4593 * first_sire was not IRE_REFHOLDed. 4594 */ 4595 MULTIRT_DEBUG_UNTAG(first_mp); 4596 freemsg(first_mp); 4597 return; 4598 } 4599 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4600 RTA_DST); 4601 goto icmp_err_ret; 4602 } 4603 4604 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4605 4606 /* 4607 * Verify that the returned IRE does not have either the 4608 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4609 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4610 */ 4611 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4612 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4613 goto icmp_err_ret; 4614 4615 /* 4616 * Increment the ire_ob_pkt_count field for ire if it is an 4617 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4618 * increment the same for the parent IRE, sire, if it is some 4619 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4620 * and HOST_REDIRECT). 4621 */ 4622 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4623 UPDATE_OB_PKT_COUNT(ire); 4624 ire->ire_last_used_time = lbolt; 4625 } 4626 4627 if (sire != NULL) { 4628 mutex_enter(&sire->ire_lock); 4629 v6gw = sire->ire_gateway_addr_v6; 4630 mutex_exit(&sire->ire_lock); 4631 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4632 IRE_INTERFACE)) == 0); 4633 UPDATE_OB_PKT_COUNT(sire); 4634 sire->ire_last_used_time = lbolt; 4635 } else { 4636 v6gw = ipv6_all_zeros; 4637 } 4638 4639 /* 4640 * We have a route to reach the destination. 4641 * 4642 * 1) If the interface is part of ill group, try to get a new 4643 * ill taking load spreading into account. 4644 * 4645 * 2) After selecting the ill, get a source address that might 4646 * create good inbound load spreading and that matches the 4647 * right scope. ipif_select_source_v6 does this for us. 4648 * 4649 * If the application specified the ill (ifindex), we still 4650 * load spread. Only if the packets needs to go out specifically 4651 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4652 * IPV6_BOUND_PIF we don't try to use a different ill for load 4653 * spreading. 4654 */ 4655 if (!do_attach_ill) { 4656 /* 4657 * If the interface belongs to an interface group, 4658 * make sure the next possible interface in the group 4659 * is used. This encourages load spreading among 4660 * peers in an interface group. However, in the case 4661 * of multirouting, load spreading is not used, as we 4662 * actually want to replicate outgoing packets through 4663 * particular interfaces. 4664 * 4665 * Note: While we pick a dst_ill we are really only 4666 * interested in the ill for load spreading. 4667 * The source ipif is determined by source address 4668 * selection below. 4669 */ 4670 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4671 dst_ill = ire->ire_ipif->ipif_ill; 4672 /* For uniformity do a refhold */ 4673 ill_refhold(dst_ill); 4674 } else { 4675 /* 4676 * If we are here trying to create an IRE_CACHE 4677 * for an offlink destination and have the 4678 * IRE_CACHE for the next hop and the latter is 4679 * using virtual IP source address selection i.e 4680 * it's ire->ire_ipif is pointing to a virtual 4681 * network interface (vni) then 4682 * ip_newroute_get_dst_ll() will return the vni 4683 * interface as the dst_ill. Since the vni is 4684 * virtual i.e not associated with any physical 4685 * interface, it cannot be the dst_ill, hence 4686 * in such a case call ip_newroute_get_dst_ll() 4687 * with the stq_ill instead of the ire_ipif ILL. 4688 * The function returns a refheld ill. 4689 */ 4690 if ((ire->ire_type == IRE_CACHE) && 4691 IS_VNI(ire->ire_ipif->ipif_ill)) 4692 dst_ill = ip_newroute_get_dst_ill_v6( 4693 ire->ire_stq->q_ptr); 4694 else 4695 dst_ill = ip_newroute_get_dst_ill_v6( 4696 ire->ire_ipif->ipif_ill); 4697 } 4698 if (dst_ill == NULL) { 4699 if (ip_debug > 2) { 4700 pr_addr_dbg("ip_newroute_v6 : no dst " 4701 "ill for dst %s\n", 4702 AF_INET6, v6dstp); 4703 } 4704 goto icmp_err_ret; 4705 } else if (dst_ill->ill_group == NULL && ill != NULL && 4706 dst_ill != ill) { 4707 /* 4708 * If "ill" is not part of any group, we should 4709 * have found a route matching "ill" as we 4710 * called ire_ftable_lookup_v6 with 4711 * MATCH_IRE_ILL_GROUP. 4712 * Rather than asserting when there is a 4713 * mismatch, we just drop the packet. 4714 */ 4715 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4716 "dst_ill %s ill %s\n", 4717 dst_ill->ill_name, 4718 ill->ill_name)); 4719 goto icmp_err_ret; 4720 } 4721 } else { 4722 dst_ill = ire->ire_ipif->ipif_ill; 4723 /* For uniformity do refhold */ 4724 ill_refhold(dst_ill); 4725 /* 4726 * We should have found a route matching ill as we 4727 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4728 * Rather than asserting, while there is a mismatch, 4729 * we just drop the packet. 4730 */ 4731 if (dst_ill != ill) { 4732 ip0dbg(("ip_newroute_v6: Packet dropped as " 4733 "IP6I_ATTACH_IF ill is %s, " 4734 "ire->ire_ipif->ipif_ill is %s\n", 4735 ill->ill_name, 4736 dst_ill->ill_name)); 4737 goto icmp_err_ret; 4738 } 4739 } 4740 /* 4741 * Pick a source address which matches the scope of the 4742 * destination address. 4743 * For RTF_SETSRC routes, the source address is imposed by the 4744 * parent ire (sire). 4745 */ 4746 ASSERT(src_ipif == NULL); 4747 if (ire->ire_type == IRE_IF_RESOLVER && 4748 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4749 ip6_asp_can_lookup()) { 4750 /* 4751 * The ire cache entry we're adding is for the 4752 * gateway itself. The source address in this case 4753 * is relative to the gateway's address. 4754 */ 4755 ip6_asp_table_held = B_TRUE; 4756 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4757 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4758 if (src_ipif != NULL) 4759 ire_marks |= IRE_MARK_USESRC_CHECK; 4760 } else { 4761 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4762 /* 4763 * Check that the ipif matching the requested 4764 * source address still exists. 4765 */ 4766 src_ipif = ipif_lookup_addr_v6( 4767 &sire->ire_src_addr_v6, NULL, zoneid, 4768 NULL, NULL, NULL, NULL); 4769 } 4770 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4771 uint_t restrict_ill = RESTRICT_TO_NONE; 4772 4773 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4774 & IP6I_ATTACH_IF) 4775 restrict_ill = RESTRICT_TO_ILL; 4776 ip6_asp_table_held = B_TRUE; 4777 src_ipif = ipif_select_source_v6(dst_ill, 4778 v6dstp, restrict_ill, 4779 IPV6_PREFER_SRC_DEFAULT, zoneid); 4780 if (src_ipif != NULL) 4781 ire_marks |= IRE_MARK_USESRC_CHECK; 4782 } 4783 } 4784 4785 if (src_ipif == NULL) { 4786 if (ip_debug > 2) { 4787 /* ip1dbg */ 4788 pr_addr_dbg("ip_newroute_v6: no src for " 4789 "dst %s\n, ", AF_INET6, v6dstp); 4790 printf("ip_newroute_v6: interface name %s\n", 4791 dst_ill->ill_name); 4792 } 4793 goto icmp_err_ret; 4794 } 4795 4796 if (ip_debug > 3) { 4797 /* ip2dbg */ 4798 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4799 AF_INET6, &v6gw); 4800 } 4801 ip2dbg(("\tire type %s (%d)\n", 4802 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4803 4804 /* 4805 * At this point in ip_newroute_v6(), ire is either the 4806 * IRE_CACHE of the next-hop gateway for an off-subnet 4807 * destination or an IRE_INTERFACE type that should be used 4808 * to resolve an on-subnet destination or an on-subnet 4809 * next-hop gateway. 4810 * 4811 * In the IRE_CACHE case, we have the following : 4812 * 4813 * 1) src_ipif - used for getting a source address. 4814 * 4815 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4816 * means packets using this IRE_CACHE will go out on dst_ill. 4817 * 4818 * 3) The IRE sire will point to the prefix that is the longest 4819 * matching route for the destination. These prefix types 4820 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST, and 4821 * IRE_HOST_REDIRECT. 4822 * 4823 * The newly created IRE_CACHE entry for the off-subnet 4824 * destination is tied to both the prefix route and the 4825 * interface route used to resolve the next-hop gateway 4826 * via the ire_phandle and ire_ihandle fields, respectively. 4827 * 4828 * In the IRE_INTERFACE case, we have the following : 4829 * 4830 * 1) src_ipif - used for getting a source address. 4831 * 4832 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4833 * means packets using the IRE_CACHE that we will build 4834 * here will go out on dst_ill. 4835 * 4836 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4837 * to be created will only be tied to the IRE_INTERFACE that 4838 * was derived from the ire_ihandle field. 4839 * 4840 * If sire is non-NULL, it means the destination is off-link 4841 * and we will first create the IRE_CACHE for the gateway. 4842 * Next time through ip_newroute_v6, we will create the 4843 * IRE_CACHE for the final destination as described above. 4844 */ 4845 save_ire = ire; 4846 switch (ire->ire_type) { 4847 case IRE_CACHE: { 4848 ire_t *ipif_ire; 4849 4850 ASSERT(sire != NULL); 4851 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4852 mutex_enter(&ire->ire_lock); 4853 v6gw = ire->ire_gateway_addr_v6; 4854 mutex_exit(&ire->ire_lock); 4855 } 4856 /* 4857 * We need 3 ire's to create a new cache ire for an 4858 * off-link destination from the cache ire of the 4859 * gateway. 4860 * 4861 * 1. The prefix ire 'sire' 4862 * 2. The cache ire of the gateway 'ire' 4863 * 3. The interface ire 'ipif_ire' 4864 * 4865 * We have (1) and (2). We lookup (3) below. 4866 * 4867 * If there is no interface route to the gateway, 4868 * it is a race condition, where we found the cache 4869 * but the inteface route has been deleted. 4870 */ 4871 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4872 if (ipif_ire == NULL) { 4873 ip1dbg(("ip_newroute_v6:" 4874 "ire_ihandle_lookup_offlink_v6 failed\n")); 4875 goto icmp_err_ret; 4876 } 4877 /* 4878 * Assume DL_UNITDATA_REQ is same for all physical 4879 * interfaces in the ifgrp. If it isn't, this code will 4880 * have to be seriously rewhacked to allow the 4881 * fastpath probing (such that I cache the link 4882 * header in the IRE_CACHE) to work over ifgrps. 4883 * We have what we need to build an IRE_CACHE. 4884 */ 4885 /* 4886 * Note: the new ire inherits RTF_SETSRC 4887 * and RTF_MULTIRT to propagate these flags from prefix 4888 * to cache. 4889 */ 4890 4891 /* 4892 * Check cached gateway IRE for any security 4893 * attributes; if found, associate the gateway 4894 * credentials group to the destination IRE. 4895 */ 4896 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4897 mutex_enter(&attrp->igsa_lock); 4898 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4899 GCGRP_REFHOLD(gcgrp); 4900 mutex_exit(&attrp->igsa_lock); 4901 } 4902 4903 ire = ire_create_v6( 4904 v6dstp, /* dest address */ 4905 &ipv6_all_ones, /* mask */ 4906 &src_ipif->ipif_v6src_addr, /* source address */ 4907 &v6gw, /* gateway address */ 4908 &save_ire->ire_max_frag, 4909 NULL, /* Fast Path header */ 4910 dst_ill->ill_rq, /* recv-from queue */ 4911 dst_ill->ill_wq, /* send-to queue */ 4912 IRE_CACHE, 4913 NULL, 4914 src_ipif, 4915 &sire->ire_mask_v6, /* Parent mask */ 4916 sire->ire_phandle, /* Parent handle */ 4917 ipif_ire->ire_ihandle, /* Interface handle */ 4918 sire->ire_flags & /* flags if any */ 4919 (RTF_SETSRC | RTF_MULTIRT), 4920 &(sire->ire_uinfo), 4921 NULL, 4922 gcgrp); 4923 4924 if (ire == NULL) { 4925 if (gcgrp != NULL) { 4926 GCGRP_REFRELE(gcgrp); 4927 gcgrp = NULL; 4928 } 4929 ire_refrele(save_ire); 4930 ire_refrele(ipif_ire); 4931 break; 4932 } 4933 4934 /* reference now held by IRE */ 4935 gcgrp = NULL; 4936 4937 ire->ire_marks |= ire_marks; 4938 4939 /* 4940 * Prevent sire and ipif_ire from getting deleted. The 4941 * newly created ire is tied to both of them via the 4942 * phandle and ihandle respectively. 4943 */ 4944 IRB_REFHOLD(sire->ire_bucket); 4945 /* Has it been removed already ? */ 4946 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4947 IRB_REFRELE(sire->ire_bucket); 4948 ire_refrele(ipif_ire); 4949 ire_refrele(save_ire); 4950 break; 4951 } 4952 4953 IRB_REFHOLD(ipif_ire->ire_bucket); 4954 /* Has it been removed already ? */ 4955 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4956 IRB_REFRELE(ipif_ire->ire_bucket); 4957 IRB_REFRELE(sire->ire_bucket); 4958 ire_refrele(ipif_ire); 4959 ire_refrele(save_ire); 4960 break; 4961 } 4962 4963 xmit_mp = first_mp; 4964 if (ire->ire_flags & RTF_MULTIRT) { 4965 copy_mp = copymsg(first_mp); 4966 if (copy_mp != NULL) { 4967 xmit_mp = copy_mp; 4968 MULTIRT_DEBUG_TAG(first_mp); 4969 } 4970 } 4971 ire_add_then_send(q, ire, xmit_mp); 4972 if (ip6_asp_table_held) { 4973 ip6_asp_table_refrele(); 4974 ip6_asp_table_held = B_FALSE; 4975 } 4976 ire_refrele(save_ire); 4977 4978 /* Assert that sire is not deleted yet. */ 4979 ASSERT(sire->ire_ptpn != NULL); 4980 IRB_REFRELE(sire->ire_bucket); 4981 4982 /* Assert that ipif_ire is not deleted yet. */ 4983 ASSERT(ipif_ire->ire_ptpn != NULL); 4984 IRB_REFRELE(ipif_ire->ire_bucket); 4985 ire_refrele(ipif_ire); 4986 4987 if (copy_mp != NULL) { 4988 /* 4989 * Search for the next unresolved 4990 * multirt route. 4991 */ 4992 copy_mp = NULL; 4993 ipif_ire = NULL; 4994 ire = NULL; 4995 /* re-enter the loop */ 4996 multirt_resolve_next = B_TRUE; 4997 continue; 4998 } 4999 ire_refrele(sire); 5000 ill_refrele(dst_ill); 5001 ipif_refrele(src_ipif); 5002 return; 5003 } 5004 case IRE_IF_NORESOLVER: 5005 /* 5006 * We have what we need to build an IRE_CACHE. 5007 * 5008 * Create a new dlureq_mp with the IPv6 gateway 5009 * address in destination address in the DLPI hdr 5010 * if the physical length is exactly 16 bytes. 5011 */ 5012 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5013 const in6_addr_t *addr; 5014 5015 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5016 addr = &v6gw; 5017 else 5018 addr = v6dstp; 5019 5020 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 5021 dst_ill->ill_phys_addr_length, 5022 dst_ill->ill_sap, 5023 dst_ill->ill_sap_length); 5024 } else { 5025 dlureq_mp = ire->ire_dlureq_mp; 5026 } 5027 if (dlureq_mp == NULL) 5028 break; 5029 /* 5030 * TSol note: We are creating the ire cache for the 5031 * destination 'dst'. If 'dst' is offlink, going 5032 * through the first hop 'gw', the security attributes 5033 * of 'dst' must be set to point to the gateway 5034 * credentials of gateway 'gw'. If 'dst' is onlink, it 5035 * is possible that 'dst' is a potential gateway that is 5036 * referenced by some route that has some security 5037 * attributes. Thus in the former case, we need to do a 5038 * gcgrp_lookup of 'gw' while in the latter case we 5039 * need to do gcgrp_lookup of 'dst' itself. 5040 */ 5041 ga.ga_af = AF_INET6; 5042 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5043 ga.ga_addr = v6gw; 5044 else 5045 ga.ga_addr = *v6dstp; 5046 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5047 5048 /* 5049 * Note: the new ire inherits sire flags RTF_SETSRC 5050 * and RTF_MULTIRT to propagate those rules from prefix 5051 * to cache. 5052 */ 5053 ire = ire_create_v6( 5054 v6dstp, /* dest address */ 5055 &ipv6_all_ones, /* mask */ 5056 &src_ipif->ipif_v6src_addr, /* source address */ 5057 &v6gw, /* gateway address */ 5058 &save_ire->ire_max_frag, 5059 NULL, /* Fast Path header */ 5060 dst_ill->ill_rq, /* recv-from queue */ 5061 dst_ill->ill_wq, /* send-to queue */ 5062 IRE_CACHE, 5063 dlureq_mp, 5064 src_ipif, 5065 &save_ire->ire_mask_v6, /* Parent mask */ 5066 (sire != NULL) ? /* Parent handle */ 5067 sire->ire_phandle : 0, 5068 save_ire->ire_ihandle, /* Interface handle */ 5069 (sire != NULL) ? /* flags if any */ 5070 sire->ire_flags & 5071 (RTF_SETSRC | RTF_MULTIRT) : 0, 5072 &(save_ire->ire_uinfo), 5073 NULL, 5074 gcgrp); 5075 5076 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 5077 freeb(dlureq_mp); 5078 5079 if (ire == NULL) { 5080 if (gcgrp != NULL) { 5081 GCGRP_REFRELE(gcgrp); 5082 gcgrp = NULL; 5083 } 5084 ire_refrele(save_ire); 5085 break; 5086 } 5087 5088 /* reference now held by IRE */ 5089 gcgrp = NULL; 5090 5091 ire->ire_marks |= ire_marks; 5092 5093 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5094 dst = v6gw; 5095 else 5096 dst = *v6dstp; 5097 err = ndp_noresolver(dst_ill, &dst); 5098 if (err != 0) { 5099 ire_refrele(save_ire); 5100 break; 5101 } 5102 5103 /* Prevent save_ire from getting deleted */ 5104 IRB_REFHOLD(save_ire->ire_bucket); 5105 /* Has it been removed already ? */ 5106 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5107 IRB_REFRELE(save_ire->ire_bucket); 5108 ire_refrele(save_ire); 5109 break; 5110 } 5111 5112 xmit_mp = first_mp; 5113 /* 5114 * In case of MULTIRT, a copy of the current packet 5115 * to send is made to further re-enter the 5116 * loop and attempt another route resolution 5117 */ 5118 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5119 copy_mp = copymsg(first_mp); 5120 if (copy_mp != NULL) { 5121 xmit_mp = copy_mp; 5122 MULTIRT_DEBUG_TAG(first_mp); 5123 } 5124 } 5125 ire_add_then_send(q, ire, xmit_mp); 5126 if (ip6_asp_table_held) { 5127 ip6_asp_table_refrele(); 5128 ip6_asp_table_held = B_FALSE; 5129 } 5130 5131 /* Assert that it is not deleted yet. */ 5132 ASSERT(save_ire->ire_ptpn != NULL); 5133 IRB_REFRELE(save_ire->ire_bucket); 5134 ire_refrele(save_ire); 5135 5136 if (copy_mp != NULL) { 5137 /* 5138 * If we found a (no)resolver, we ignore any 5139 * trailing top priority IRE_CACHE in 5140 * further loops. This ensures that we do not 5141 * omit any (no)resolver despite the priority 5142 * in this call. 5143 * IRE_CACHE, if any, will be processed 5144 * by another thread entering ip_newroute(), 5145 * (on resolver response, for example). 5146 * We use this to force multiple parallel 5147 * resolution as soon as a packet needs to be 5148 * sent. The result is, after one packet 5149 * emission all reachable routes are generally 5150 * resolved. 5151 * Otherwise, complete resolution of MULTIRT 5152 * routes would require several emissions as 5153 * side effect. 5154 */ 5155 multirt_flags &= ~MULTIRT_CACHEGW; 5156 5157 /* 5158 * Search for the next unresolved multirt 5159 * route. 5160 */ 5161 copy_mp = NULL; 5162 save_ire = NULL; 5163 ire = NULL; 5164 /* re-enter the loop */ 5165 multirt_resolve_next = B_TRUE; 5166 continue; 5167 } 5168 5169 /* Don't need sire anymore */ 5170 if (sire != NULL) 5171 ire_refrele(sire); 5172 ill_refrele(dst_ill); 5173 ipif_refrele(src_ipif); 5174 return; 5175 5176 case IRE_IF_RESOLVER: 5177 /* 5178 * We can't build an IRE_CACHE yet, but at least we 5179 * found a resolver that can help. 5180 */ 5181 dst = *v6dstp; 5182 5183 /* 5184 * To be at this point in the code with a non-zero gw 5185 * means that dst is reachable through a gateway that 5186 * we have never resolved. By changing dst to the gw 5187 * addr we resolve the gateway first. When 5188 * ire_add_then_send() tries to put the IP dg to dst, 5189 * it will reenter ip_newroute() at which time we will 5190 * find the IRE_CACHE for the gw and create another 5191 * IRE_CACHE above (for dst itself). 5192 */ 5193 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5194 save_dst = dst; 5195 dst = v6gw; 5196 v6gw = ipv6_all_zeros; 5197 } 5198 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5199 /* 5200 * Ask the external resolver to do its thing. 5201 * Make an mblk chain in the following form: 5202 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5203 */ 5204 mblk_t *ire_mp; 5205 mblk_t *areq_mp; 5206 areq_t *areq; 5207 in6_addr_t *addrp; 5208 5209 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5210 if (ip6_asp_table_held) { 5211 ip6_asp_table_refrele(); 5212 ip6_asp_table_held = B_FALSE; 5213 } 5214 ire = ire_create_mp_v6( 5215 &dst, /* dest address */ 5216 &ipv6_all_ones, /* mask */ 5217 &src_ipif->ipif_v6src_addr, 5218 /* source address */ 5219 &v6gw, /* gateway address */ 5220 NULL, /* Fast Path header */ 5221 dst_ill->ill_rq, /* recv-from queue */ 5222 dst_ill->ill_wq, /* send-to queue */ 5223 IRE_CACHE, 5224 NULL, 5225 src_ipif, 5226 &save_ire->ire_mask_v6, 5227 /* Parent mask */ 5228 0, 5229 save_ire->ire_ihandle, 5230 /* Interface handle */ 5231 0, /* flags if any */ 5232 &(save_ire->ire_uinfo), 5233 NULL, 5234 NULL); 5235 5236 ire_refrele(save_ire); 5237 if (ire == NULL) { 5238 ip1dbg(("ip_newroute_v6:" 5239 "ire is NULL\n")); 5240 break; 5241 } 5242 5243 if ((sire != NULL) && 5244 (sire->ire_flags & RTF_MULTIRT)) { 5245 /* 5246 * processing a copy of the packet to 5247 * send for further resolution loops 5248 */ 5249 copy_mp = copymsg(first_mp); 5250 if (copy_mp != NULL) 5251 MULTIRT_DEBUG_TAG(copy_mp); 5252 } 5253 ire->ire_marks |= ire_marks; 5254 ire_mp = ire->ire_mp; 5255 /* 5256 * Now create or find an nce for this interface. 5257 * The hw addr will need to to be set from 5258 * the reply to the AR_ENTRY_QUERY that 5259 * we're about to send. This will be done in 5260 * ire_add_v6(). 5261 */ 5262 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5263 switch (err) { 5264 case 0: 5265 /* 5266 * New cache entry created. 5267 * Break, then ask the external 5268 * resolver. 5269 */ 5270 break; 5271 case EINPROGRESS: 5272 /* 5273 * Resolution in progress; 5274 * packet has been queued by 5275 * ndp_resolver(). 5276 */ 5277 ire_delete(ire); 5278 ire = NULL; 5279 /* 5280 * Check if another multirt 5281 * route must be resolved. 5282 */ 5283 if (copy_mp != NULL) { 5284 /* 5285 * If we found a resolver, we 5286 * ignore any trailing top 5287 * priority IRE_CACHE in 5288 * further loops. The reason is 5289 * the same as for noresolver. 5290 */ 5291 multirt_flags &= 5292 ~MULTIRT_CACHEGW; 5293 /* 5294 * Search for the next 5295 * unresolved multirt route. 5296 */ 5297 first_mp = copy_mp; 5298 copy_mp = NULL; 5299 mp = first_mp; 5300 if (mp->b_datap->db_type == 5301 M_CTL) { 5302 mp = mp->b_cont; 5303 } 5304 ASSERT(sire != NULL); 5305 dst = save_dst; 5306 /* 5307 * re-enter the loop 5308 */ 5309 multirt_resolve_next = 5310 B_TRUE; 5311 continue; 5312 } 5313 5314 if (sire != NULL) 5315 ire_refrele(sire); 5316 ill_refrele(dst_ill); 5317 ipif_refrele(src_ipif); 5318 return; 5319 default: 5320 /* 5321 * Transient error; packet will be 5322 * freed. 5323 */ 5324 ire_delete(ire); 5325 ire = NULL; 5326 break; 5327 } 5328 if (err != 0) 5329 break; 5330 /* 5331 * Now set up the AR_ENTRY_QUERY and send it. 5332 */ 5333 areq_mp = ill_arp_alloc(dst_ill, 5334 (uchar_t *)&ipv6_areq_template, 5335 (caddr_t)&dst); 5336 if (areq_mp == NULL) { 5337 ip1dbg(("ip_newroute_v6:" 5338 "areq_mp is NULL\n")); 5339 freemsg(ire_mp); 5340 break; 5341 } 5342 areq = (areq_t *)areq_mp->b_rptr; 5343 addrp = (in6_addr_t *)((char *)areq + 5344 areq->areq_target_addr_offset); 5345 *addrp = dst; 5346 addrp = (in6_addr_t *)((char *)areq + 5347 areq->areq_sender_addr_offset); 5348 *addrp = src_ipif->ipif_v6src_addr; 5349 /* 5350 * link the chain, then send up to the resolver. 5351 */ 5352 linkb(areq_mp, ire_mp); 5353 linkb(areq_mp, mp); 5354 ip1dbg(("ip_newroute_v6:" 5355 "putnext to resolver\n")); 5356 putnext(dst_ill->ill_rq, areq_mp); 5357 /* 5358 * Check if another multirt route 5359 * must be resolved. 5360 */ 5361 ire = NULL; 5362 if (copy_mp != NULL) { 5363 /* 5364 * If we find a resolver, we ignore any 5365 * trailing top priority IRE_CACHE in 5366 * further loops. The reason is the 5367 * same as for noresolver. 5368 */ 5369 multirt_flags &= ~MULTIRT_CACHEGW; 5370 /* 5371 * Search for the next unresolved 5372 * multirt route. 5373 */ 5374 first_mp = copy_mp; 5375 copy_mp = NULL; 5376 mp = first_mp; 5377 if (mp->b_datap->db_type == M_CTL) { 5378 mp = mp->b_cont; 5379 } 5380 ASSERT(sire != NULL); 5381 dst = save_dst; 5382 /* 5383 * re-enter the loop 5384 */ 5385 multirt_resolve_next = B_TRUE; 5386 continue; 5387 } 5388 5389 if (sire != NULL) 5390 ire_refrele(sire); 5391 ill_refrele(dst_ill); 5392 ipif_refrele(src_ipif); 5393 return; 5394 } 5395 /* 5396 * Non-external resolver case. 5397 * 5398 * TSol note: Please see the note above the 5399 * IRE_IF_NORESOLVER case. 5400 */ 5401 ga.ga_af = AF_INET6; 5402 ga.ga_addr = dst; 5403 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5404 5405 ire = ire_create_v6( 5406 &dst, /* dest address */ 5407 &ipv6_all_ones, /* mask */ 5408 &src_ipif->ipif_v6src_addr, /* source address */ 5409 &v6gw, /* gateway address */ 5410 &save_ire->ire_max_frag, 5411 NULL, /* Fast Path header */ 5412 dst_ill->ill_rq, /* recv-from queue */ 5413 dst_ill->ill_wq, /* send-to queue */ 5414 IRE_CACHE, 5415 NULL, 5416 src_ipif, 5417 &save_ire->ire_mask_v6, /* Parent mask */ 5418 0, 5419 save_ire->ire_ihandle, /* Interface handle */ 5420 0, /* flags if any */ 5421 &(save_ire->ire_uinfo), 5422 NULL, 5423 gcgrp); 5424 5425 if (ire == NULL) { 5426 if (gcgrp != NULL) { 5427 GCGRP_REFRELE(gcgrp); 5428 gcgrp = NULL; 5429 } 5430 ire_refrele(save_ire); 5431 break; 5432 } 5433 5434 /* reference now held by IRE */ 5435 gcgrp = NULL; 5436 5437 if ((sire != NULL) && 5438 (sire->ire_flags & RTF_MULTIRT)) { 5439 copy_mp = copymsg(first_mp); 5440 if (copy_mp != NULL) 5441 MULTIRT_DEBUG_TAG(copy_mp); 5442 } 5443 5444 ire->ire_marks |= ire_marks; 5445 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5446 switch (err) { 5447 case 0: 5448 /* Prevent save_ire from getting deleted */ 5449 IRB_REFHOLD(save_ire->ire_bucket); 5450 /* Has it been removed already ? */ 5451 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5452 IRB_REFRELE(save_ire->ire_bucket); 5453 ire_refrele(save_ire); 5454 break; 5455 } 5456 5457 /* 5458 * We have a resolved cache entry, 5459 * add in the IRE. 5460 */ 5461 ire_add_then_send(q, ire, first_mp); 5462 if (ip6_asp_table_held) { 5463 ip6_asp_table_refrele(); 5464 ip6_asp_table_held = B_FALSE; 5465 } 5466 5467 /* Assert that it is not deleted yet. */ 5468 ASSERT(save_ire->ire_ptpn != NULL); 5469 IRB_REFRELE(save_ire->ire_bucket); 5470 ire_refrele(save_ire); 5471 /* 5472 * Check if another multirt route 5473 * must be resolved. 5474 */ 5475 ire = NULL; 5476 if (copy_mp != NULL) { 5477 /* 5478 * If we find a resolver, we ignore any 5479 * trailing top priority IRE_CACHE in 5480 * further loops. The reason is the 5481 * same as for noresolver. 5482 */ 5483 multirt_flags &= ~MULTIRT_CACHEGW; 5484 /* 5485 * Search for the next unresolved 5486 * multirt route. 5487 */ 5488 first_mp = copy_mp; 5489 copy_mp = NULL; 5490 mp = first_mp; 5491 if (mp->b_datap->db_type == M_CTL) { 5492 mp = mp->b_cont; 5493 } 5494 ASSERT(sire != NULL); 5495 dst = save_dst; 5496 /* 5497 * re-enter the loop 5498 */ 5499 multirt_resolve_next = B_TRUE; 5500 continue; 5501 } 5502 5503 if (sire != NULL) 5504 ire_refrele(sire); 5505 ill_refrele(dst_ill); 5506 ipif_refrele(src_ipif); 5507 return; 5508 5509 case EINPROGRESS: 5510 /* 5511 * mp was consumed - presumably queued. 5512 * No need for ire, presumably resolution is 5513 * in progress, and ire will be added when the 5514 * address is resolved. 5515 */ 5516 if (ip6_asp_table_held) { 5517 ip6_asp_table_refrele(); 5518 ip6_asp_table_held = B_FALSE; 5519 } 5520 ASSERT(ire->ire_nce == NULL); 5521 ire_delete(ire); 5522 ire_refrele(save_ire); 5523 /* 5524 * Check if another multirt route 5525 * must be resolved. 5526 */ 5527 ire = NULL; 5528 if (copy_mp != NULL) { 5529 /* 5530 * If we find a resolver, we ignore any 5531 * trailing top priority IRE_CACHE in 5532 * further loops. The reason is the 5533 * same as for noresolver. 5534 */ 5535 multirt_flags &= ~MULTIRT_CACHEGW; 5536 /* 5537 * Search for the next unresolved 5538 * multirt route. 5539 */ 5540 first_mp = copy_mp; 5541 copy_mp = NULL; 5542 mp = first_mp; 5543 if (mp->b_datap->db_type == M_CTL) { 5544 mp = mp->b_cont; 5545 } 5546 ASSERT(sire != NULL); 5547 dst = save_dst; 5548 /* 5549 * re-enter the loop 5550 */ 5551 multirt_resolve_next = B_TRUE; 5552 continue; 5553 } 5554 if (sire != NULL) 5555 ire_refrele(sire); 5556 ill_refrele(dst_ill); 5557 ipif_refrele(src_ipif); 5558 return; 5559 default: 5560 /* Some transient error */ 5561 ASSERT(ire->ire_nce == NULL); 5562 ire_refrele(save_ire); 5563 break; 5564 } 5565 break; 5566 default: 5567 break; 5568 } 5569 if (ip6_asp_table_held) { 5570 ip6_asp_table_refrele(); 5571 ip6_asp_table_held = B_FALSE; 5572 } 5573 } while (multirt_resolve_next); 5574 5575 err_ret: 5576 ip1dbg(("ip_newroute_v6: dropped\n")); 5577 if (src_ipif != NULL) 5578 ipif_refrele(src_ipif); 5579 if (dst_ill != NULL) { 5580 need_rele = B_TRUE; 5581 ill = dst_ill; 5582 } 5583 if (ill != NULL) { 5584 if (mp->b_prev != NULL) { 5585 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 5586 } else { 5587 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 5588 } 5589 5590 if (need_rele) 5591 ill_refrele(ill); 5592 } else { 5593 if (mp->b_prev != NULL) { 5594 BUMP_MIB(&ip6_mib, ipv6InDiscards); 5595 } else { 5596 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 5597 } 5598 } 5599 /* Did this packet originate externally? */ 5600 if (mp->b_prev) { 5601 mp->b_next = NULL; 5602 mp->b_prev = NULL; 5603 } 5604 if (copy_mp != NULL) { 5605 MULTIRT_DEBUG_UNTAG(copy_mp); 5606 freemsg(copy_mp); 5607 } 5608 MULTIRT_DEBUG_UNTAG(first_mp); 5609 freemsg(first_mp); 5610 if (ire != NULL) 5611 ire_refrele(ire); 5612 if (sire != NULL) 5613 ire_refrele(sire); 5614 return; 5615 5616 icmp_err_ret: 5617 if (ip6_asp_table_held) 5618 ip6_asp_table_refrele(); 5619 if (src_ipif != NULL) 5620 ipif_refrele(src_ipif); 5621 if (dst_ill != NULL) { 5622 need_rele = B_TRUE; 5623 ill = dst_ill; 5624 } 5625 ip1dbg(("ip_newroute_v6: no route\n")); 5626 if (sire != NULL) 5627 ire_refrele(sire); 5628 /* 5629 * We need to set sire to NULL to avoid double freeing if we 5630 * ever goto err_ret from below. 5631 */ 5632 sire = NULL; 5633 ip6h = (ip6_t *)mp->b_rptr; 5634 /* Skip ip6i_t header if present */ 5635 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5636 /* Make sure the IPv6 header is present */ 5637 if ((mp->b_wptr - (uchar_t *)ip6h) < 5638 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5639 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5640 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5641 goto err_ret; 5642 } 5643 } 5644 mp->b_rptr += sizeof (ip6i_t); 5645 ip6h = (ip6_t *)mp->b_rptr; 5646 } 5647 /* Did this packet originate externally? */ 5648 if (mp->b_prev) { 5649 if (ill != NULL) { 5650 BUMP_MIB(ill->ill_ip6_mib, ipv6InNoRoutes); 5651 } else { 5652 BUMP_MIB(&ip6_mib, ipv6InNoRoutes); 5653 } 5654 mp->b_next = NULL; 5655 mp->b_prev = NULL; 5656 q = WR(q); 5657 } else { 5658 if (ill != NULL) { 5659 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 5660 } else { 5661 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 5662 } 5663 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5664 /* Failed */ 5665 if (copy_mp != NULL) { 5666 MULTIRT_DEBUG_UNTAG(copy_mp); 5667 freemsg(copy_mp); 5668 } 5669 MULTIRT_DEBUG_UNTAG(first_mp); 5670 freemsg(first_mp); 5671 if (ire != NULL) 5672 ire_refrele(ire); 5673 if (need_rele) 5674 ill_refrele(ill); 5675 return; 5676 } 5677 } 5678 5679 if (need_rele) 5680 ill_refrele(ill); 5681 5682 /* 5683 * At this point we will have ire only if RTF_BLACKHOLE 5684 * or RTF_REJECT flags are set on the IRE. It will not 5685 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5686 */ 5687 if (ire != NULL) { 5688 if (ire->ire_flags & RTF_BLACKHOLE) { 5689 ire_refrele(ire); 5690 if (copy_mp != NULL) { 5691 MULTIRT_DEBUG_UNTAG(copy_mp); 5692 freemsg(copy_mp); 5693 } 5694 MULTIRT_DEBUG_UNTAG(first_mp); 5695 freemsg(first_mp); 5696 return; 5697 } 5698 ire_refrele(ire); 5699 } 5700 if (ip_debug > 3) { 5701 /* ip2dbg */ 5702 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5703 AF_INET6, v6dstp); 5704 } 5705 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5706 B_FALSE, B_FALSE); 5707 } 5708 5709 /* 5710 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5711 * we need to send out a packet to a destination address for which we do not 5712 * have specific routing information. It is only used for multicast packets. 5713 * 5714 * If unspec_src we allow creating an IRE with source address zero. 5715 * ire_send_v6() will delete it after the packet is sent. 5716 */ 5717 void 5718 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5719 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5720 { 5721 ire_t *ire = NULL; 5722 ipif_t *src_ipif = NULL; 5723 int err = 0; 5724 ill_t *dst_ill = NULL; 5725 ire_t *save_ire; 5726 ushort_t ire_marks = 0; 5727 ipsec_out_t *io; 5728 ill_t *attach_ill = NULL; 5729 ill_t *ill; 5730 ip6_t *ip6h; 5731 mblk_t *first_mp; 5732 boolean_t ip6i_present; 5733 ire_t *fire = NULL; 5734 mblk_t *copy_mp = NULL; 5735 boolean_t multirt_resolve_next; 5736 in6_addr_t *v6dstp = &v6dst; 5737 boolean_t ipif_held = B_FALSE; 5738 boolean_t ill_held = B_FALSE; 5739 boolean_t ip6_asp_table_held = B_FALSE; 5740 5741 /* 5742 * This loop is run only once in most cases. 5743 * We loop to resolve further routes only when the destination 5744 * can be reached through multiple RTF_MULTIRT-flagged ires. 5745 */ 5746 do { 5747 multirt_resolve_next = B_FALSE; 5748 if (dst_ill != NULL) { 5749 ill_refrele(dst_ill); 5750 dst_ill = NULL; 5751 } 5752 5753 if (src_ipif != NULL) { 5754 ipif_refrele(src_ipif); 5755 src_ipif = NULL; 5756 } 5757 ASSERT(ipif != NULL); 5758 ill = ipif->ipif_ill; 5759 5760 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5761 if (ip_debug > 2) { 5762 /* ip1dbg */ 5763 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5764 AF_INET6, v6dstp); 5765 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5766 ill->ill_name, ipif->ipif_isv6); 5767 } 5768 5769 first_mp = mp; 5770 if (mp->b_datap->db_type == M_CTL) { 5771 mp = mp->b_cont; 5772 io = (ipsec_out_t *)first_mp->b_rptr; 5773 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5774 } else { 5775 io = NULL; 5776 } 5777 5778 /* 5779 * If the interface is a pt-pt interface we look for an 5780 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5781 * local_address and the pt-pt destination address. 5782 * Otherwise we just match the local address. 5783 */ 5784 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5785 goto err_ret; 5786 } 5787 /* 5788 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5789 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5790 * as it could be NULL. 5791 * 5792 * This information can appear either in an ip6i_t or an 5793 * IPSEC_OUT message. 5794 */ 5795 ip6h = (ip6_t *)mp->b_rptr; 5796 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5797 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5798 if (!ip6i_present || 5799 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5800 attach_ill = ip_grab_attach_ill(ill, first_mp, 5801 (ip6i_present ? 5802 ((ip6i_t *)ip6h)->ip6i_ifindex : 5803 io->ipsec_out_ill_index), B_TRUE); 5804 /* Failure case frees things for us. */ 5805 if (attach_ill == NULL) 5806 return; 5807 5808 /* 5809 * Check if we need an ire that will not be 5810 * looked up by anybody else i.e. HIDDEN. 5811 */ 5812 if (ill_is_probeonly(attach_ill)) 5813 ire_marks = IRE_MARK_HIDDEN; 5814 } 5815 } 5816 5817 /* 5818 * We check if an IRE_OFFSUBNET for the addr that goes through 5819 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5820 * RTF_MULTIRT flags must be honored. 5821 */ 5822 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5823 ip2dbg(("ip_newroute_ipif_v6: " 5824 "ipif_lookup_multi_ire_v6(" 5825 "ipif %p, dst %08x) = fire %p\n", 5826 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5827 (void *)fire)); 5828 5829 /* 5830 * If the application specified the ill (ifindex), we still 5831 * load spread. Only if the packets needs to go out specifically 5832 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5833 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5834 * multirouting, then we don't try to use a different ill for 5835 * load spreading. 5836 */ 5837 if (attach_ill == NULL) { 5838 /* 5839 * If the interface belongs to an interface group, 5840 * make sure the next possible interface in the group 5841 * is used. This encourages load spreading among peers 5842 * in an interface group. 5843 * 5844 * Note: While we pick a dst_ill we are really only 5845 * interested in the ill for load spreading. The source 5846 * ipif is determined by source address selection below. 5847 */ 5848 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5849 dst_ill = ipif->ipif_ill; 5850 /* For uniformity do a refhold */ 5851 ill_refhold(dst_ill); 5852 } else { 5853 /* refheld by ip_newroute_get_dst_ill_v6 */ 5854 dst_ill = 5855 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5856 } 5857 if (dst_ill == NULL) { 5858 if (ip_debug > 2) { 5859 pr_addr_dbg("ip_newroute_ipif_v6: " 5860 "no dst ill for dst %s\n", 5861 AF_INET6, v6dstp); 5862 } 5863 goto err_ret; 5864 } 5865 } else { 5866 dst_ill = ipif->ipif_ill; 5867 /* 5868 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5869 * and IPV6_BOUND_PIF case. 5870 */ 5871 ASSERT(dst_ill == attach_ill); 5872 /* attach_ill is already refheld */ 5873 } 5874 /* 5875 * Pick a source address which matches the scope of the 5876 * destination address. 5877 * For RTF_SETSRC routes, the source address is imposed by the 5878 * parent ire (fire). 5879 */ 5880 ASSERT(src_ipif == NULL); 5881 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5882 /* 5883 * Check that the ipif matching the requested source 5884 * address still exists. 5885 */ 5886 src_ipif = 5887 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5888 NULL, zoneid, NULL, NULL, NULL, NULL); 5889 } 5890 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5891 ip6_asp_table_held = B_TRUE; 5892 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5893 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5894 } 5895 5896 if (src_ipif == NULL) { 5897 if (!unspec_src) { 5898 if (ip_debug > 2) { 5899 /* ip1dbg */ 5900 pr_addr_dbg("ip_newroute_ipif_v6: " 5901 "no src for dst %s\n,", 5902 AF_INET6, v6dstp); 5903 printf(" through interface %s\n", 5904 dst_ill->ill_name); 5905 } 5906 goto err_ret; 5907 } 5908 /* Use any ipif for source */ 5909 for (src_ipif = dst_ill->ill_ipif; src_ipif != NULL; 5910 src_ipif = src_ipif->ipif_next) { 5911 if ((src_ipif->ipif_flags & IPIF_UP) && 5912 IN6_IS_ADDR_UNSPECIFIED( 5913 &src_ipif->ipif_v6src_addr)) 5914 break; 5915 } 5916 if (src_ipif == NULL) { 5917 if (ip_debug > 2) { 5918 /* ip1dbg */ 5919 pr_addr_dbg("ip_newroute_ipif_v6: " 5920 "no src for dst %s\n ", 5921 AF_INET6, v6dstp); 5922 printf("ip_newroute_ipif_v6: if %s" 5923 "(UNSPEC_SRC)\n", 5924 dst_ill->ill_name); 5925 } 5926 goto err_ret; 5927 } 5928 src_ipif = ipif; 5929 ipif_refhold(src_ipif); 5930 } 5931 ire = ipif_to_ire_v6(ipif); 5932 if (ire == NULL) { 5933 if (ip_debug > 2) { 5934 /* ip1dbg */ 5935 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5936 AF_INET6, &ipif->ipif_v6lcl_addr); 5937 printf("ip_newroute_ipif_v6: " 5938 "if %s\n", dst_ill->ill_name); 5939 } 5940 goto err_ret; 5941 } 5942 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5943 goto err_ret; 5944 5945 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5946 5947 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5948 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5949 if (ip_debug > 2) { 5950 /* ip1dbg */ 5951 pr_addr_dbg(" address %s\n", 5952 AF_INET6, &ire->ire_src_addr_v6); 5953 } 5954 save_ire = ire; 5955 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5956 (void *)ire, (void *)ipif)); 5957 5958 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5959 /* 5960 * an IRE_OFFSUBET was looked up 5961 * on that interface. 5962 * this ire has RTF_MULTIRT flag, 5963 * so the resolution loop 5964 * will be re-entered to resolve 5965 * additional routes on other 5966 * interfaces. For that purpose, 5967 * a copy of the packet is 5968 * made at this point. 5969 */ 5970 fire->ire_last_used_time = lbolt; 5971 copy_mp = copymsg(first_mp); 5972 if (copy_mp) { 5973 MULTIRT_DEBUG_TAG(copy_mp); 5974 } 5975 } 5976 5977 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5978 switch (ire->ire_type) { 5979 case IRE_IF_NORESOLVER: { 5980 /* We have what we need to build an IRE_CACHE. */ 5981 mblk_t *dlureq_mp; 5982 5983 /* 5984 * Create a new dlureq_mp with the 5985 * IPv6 gateway address in destination address in the 5986 * DLPI hdr if the physical length is exactly 16 bytes. 5987 */ 5988 ASSERT(dst_ill->ill_isv6); 5989 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5990 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5991 dst_ill->ill_phys_addr_length, 5992 dst_ill->ill_sap, 5993 dst_ill->ill_sap_length); 5994 } else { 5995 dlureq_mp = ire->ire_dlureq_mp; 5996 } 5997 5998 if (dlureq_mp == NULL) 5999 break; 6000 /* 6001 * The newly created ire will inherit the flags of the 6002 * parent ire, if any. 6003 */ 6004 ire = ire_create_v6( 6005 v6dstp, /* dest address */ 6006 &ipv6_all_ones, /* mask */ 6007 &src_ipif->ipif_v6src_addr, /* source address */ 6008 NULL, /* gateway address */ 6009 &save_ire->ire_max_frag, 6010 NULL, /* Fast Path header */ 6011 dst_ill->ill_rq, /* recv-from queue */ 6012 dst_ill->ill_wq, /* send-to queue */ 6013 IRE_CACHE, 6014 dlureq_mp, 6015 src_ipif, 6016 NULL, 6017 (fire != NULL) ? /* Parent handle */ 6018 fire->ire_phandle : 0, 6019 save_ire->ire_ihandle, /* Interface handle */ 6020 (fire != NULL) ? 6021 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6022 0, 6023 &ire_uinfo_null, 6024 NULL, 6025 NULL); 6026 6027 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 6028 freeb(dlureq_mp); 6029 6030 if (ire == NULL) { 6031 ire_refrele(save_ire); 6032 break; 6033 } 6034 6035 ire->ire_marks |= ire_marks; 6036 6037 err = ndp_noresolver(dst_ill, v6dstp); 6038 if (err != 0) { 6039 ire_refrele(save_ire); 6040 break; 6041 } 6042 6043 /* Prevent save_ire from getting deleted */ 6044 IRB_REFHOLD(save_ire->ire_bucket); 6045 /* Has it been removed already ? */ 6046 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6047 IRB_REFRELE(save_ire->ire_bucket); 6048 ire_refrele(save_ire); 6049 break; 6050 } 6051 6052 ire_add_then_send(q, ire, first_mp); 6053 if (ip6_asp_table_held) { 6054 ip6_asp_table_refrele(); 6055 ip6_asp_table_held = B_FALSE; 6056 } 6057 6058 /* Assert that it is not deleted yet. */ 6059 ASSERT(save_ire->ire_ptpn != NULL); 6060 IRB_REFRELE(save_ire->ire_bucket); 6061 ire_refrele(save_ire); 6062 if (fire != NULL) { 6063 ire_refrele(fire); 6064 fire = NULL; 6065 } 6066 6067 /* 6068 * The resolution loop is re-entered if we 6069 * actually are in a multirouting case. 6070 */ 6071 if (copy_mp != NULL) { 6072 boolean_t need_resolve = 6073 ire_multirt_need_resolve_v6(v6dstp, 6074 MBLK_GETLABEL(copy_mp)); 6075 if (!need_resolve) { 6076 MULTIRT_DEBUG_UNTAG(copy_mp); 6077 freemsg(copy_mp); 6078 copy_mp = NULL; 6079 } else { 6080 /* 6081 * ipif_lookup_group_v6() calls 6082 * ire_lookup_multi_v6() that uses 6083 * ire_ftable_lookup_v6() to find 6084 * an IRE_INTERFACE for the group. 6085 * In the multirt case, 6086 * ire_lookup_multi_v6() then invokes 6087 * ire_multirt_lookup_v6() to find 6088 * the next resolvable ire. 6089 * As a result, we obtain a new 6090 * interface, derived from the 6091 * next ire. 6092 */ 6093 if (ipif_held) { 6094 ipif_refrele(ipif); 6095 ipif_held = B_FALSE; 6096 } 6097 ipif = ipif_lookup_group_v6(v6dstp, 6098 zoneid); 6099 ip2dbg(("ip_newroute_ipif: " 6100 "multirt dst %08x, ipif %p\n", 6101 ntohl(V4_PART_OF_V6((*v6dstp))), 6102 (void *)ipif)); 6103 if (ipif != NULL) { 6104 ipif_held = B_TRUE; 6105 mp = copy_mp; 6106 copy_mp = NULL; 6107 multirt_resolve_next = 6108 B_TRUE; 6109 continue; 6110 } else { 6111 freemsg(copy_mp); 6112 } 6113 } 6114 } 6115 ill_refrele(dst_ill); 6116 if (ipif_held) { 6117 ipif_refrele(ipif); 6118 ipif_held = B_FALSE; 6119 } 6120 if (src_ipif != NULL) 6121 ipif_refrele(src_ipif); 6122 return; 6123 } 6124 case IRE_IF_RESOLVER: { 6125 6126 ASSERT(dst_ill->ill_isv6); 6127 6128 /* 6129 * We obtain a partial IRE_CACHE which we will pass 6130 * along with the resolver query. When the response 6131 * comes back it will be there ready for us to add. 6132 */ 6133 /* 6134 * the newly created ire will inherit the flags of the 6135 * parent ire, if any. 6136 */ 6137 ire = ire_create_v6( 6138 v6dstp, /* dest address */ 6139 &ipv6_all_ones, /* mask */ 6140 &src_ipif->ipif_v6src_addr, /* source address */ 6141 NULL, /* gateway address */ 6142 &save_ire->ire_max_frag, 6143 NULL, /* Fast Path header */ 6144 dst_ill->ill_rq, /* recv-from queue */ 6145 dst_ill->ill_wq, /* send-to queue */ 6146 IRE_CACHE, 6147 NULL, 6148 src_ipif, 6149 NULL, 6150 (fire != NULL) ? /* Parent handle */ 6151 fire->ire_phandle : 0, 6152 save_ire->ire_ihandle, /* Interface handle */ 6153 (fire != NULL) ? 6154 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6155 0, 6156 &ire_uinfo_null, 6157 NULL, 6158 NULL); 6159 6160 if (ire == NULL) { 6161 ire_refrele(save_ire); 6162 break; 6163 } 6164 6165 ire->ire_marks |= ire_marks; 6166 6167 /* Resolve and add ire to the ctable */ 6168 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6169 switch (err) { 6170 case 0: 6171 /* Prevent save_ire from getting deleted */ 6172 IRB_REFHOLD(save_ire->ire_bucket); 6173 /* Has it been removed already ? */ 6174 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6175 IRB_REFRELE(save_ire->ire_bucket); 6176 ire_refrele(save_ire); 6177 break; 6178 } 6179 /* 6180 * We have a resolved cache entry, 6181 * add in the IRE. 6182 */ 6183 ire_add_then_send(q, ire, first_mp); 6184 if (ip6_asp_table_held) { 6185 ip6_asp_table_refrele(); 6186 ip6_asp_table_held = B_FALSE; 6187 } 6188 6189 /* Assert that it is not deleted yet. */ 6190 ASSERT(save_ire->ire_ptpn != NULL); 6191 IRB_REFRELE(save_ire->ire_bucket); 6192 ire_refrele(save_ire); 6193 if (fire != NULL) { 6194 ire_refrele(fire); 6195 fire = NULL; 6196 } 6197 6198 /* 6199 * The resolution loop is re-entered if we 6200 * actually are in a multirouting case. 6201 */ 6202 if (copy_mp != NULL) { 6203 boolean_t need_resolve = 6204 ire_multirt_need_resolve_v6(v6dstp, 6205 MBLK_GETLABEL(copy_mp)); 6206 if (!need_resolve) { 6207 MULTIRT_DEBUG_UNTAG(copy_mp); 6208 freemsg(copy_mp); 6209 copy_mp = NULL; 6210 } else { 6211 /* 6212 * ipif_lookup_group_v6() calls 6213 * ire_lookup_multi_v6() that 6214 * uses ire_ftable_lookup_v6() 6215 * to find an IRE_INTERFACE for 6216 * the group. In the multirt 6217 * case, ire_lookup_multi_v6() 6218 * then invokes 6219 * ire_multirt_lookup_v6() to 6220 * find the next resolvable ire. 6221 * As a result, we obtain a new 6222 * interface, derived from the 6223 * next ire. 6224 */ 6225 if (ipif_held) { 6226 ipif_refrele(ipif); 6227 ipif_held = B_FALSE; 6228 } 6229 ipif = ipif_lookup_group_v6( 6230 v6dstp, zoneid); 6231 ip2dbg(("ip_newroute_ipif: " 6232 "multirt dst %08x, " 6233 "ipif %p\n", 6234 ntohl(V4_PART_OF_V6( 6235 (*v6dstp))), 6236 (void *)ipif)); 6237 if (ipif != NULL) { 6238 ipif_held = B_TRUE; 6239 mp = copy_mp; 6240 copy_mp = NULL; 6241 multirt_resolve_next = 6242 B_TRUE; 6243 continue; 6244 } else { 6245 freemsg(copy_mp); 6246 } 6247 } 6248 } 6249 ill_refrele(dst_ill); 6250 if (ipif_held) { 6251 ipif_refrele(ipif); 6252 ipif_held = B_FALSE; 6253 } 6254 if (src_ipif != NULL) 6255 ipif_refrele(src_ipif); 6256 return; 6257 6258 case EINPROGRESS: 6259 /* 6260 * mp was consumed - presumably queued. 6261 * No need for ire, presumably resolution is 6262 * in progress, and ire will be added when the 6263 * address is resolved. 6264 */ 6265 if (ip6_asp_table_held) { 6266 ip6_asp_table_refrele(); 6267 ip6_asp_table_held = B_FALSE; 6268 } 6269 ire_delete(ire); 6270 ire_refrele(save_ire); 6271 if (fire != NULL) { 6272 ire_refrele(fire); 6273 fire = NULL; 6274 } 6275 6276 /* 6277 * The resolution loop is re-entered if we 6278 * actually are in a multirouting case. 6279 */ 6280 if (copy_mp != NULL) { 6281 boolean_t need_resolve = 6282 ire_multirt_need_resolve_v6(v6dstp, 6283 MBLK_GETLABEL(copy_mp)); 6284 if (!need_resolve) { 6285 MULTIRT_DEBUG_UNTAG(copy_mp); 6286 freemsg(copy_mp); 6287 copy_mp = NULL; 6288 } else { 6289 /* 6290 * ipif_lookup_group_v6() calls 6291 * ire_lookup_multi_v6() that 6292 * uses ire_ftable_lookup_v6() 6293 * to find an IRE_INTERFACE for 6294 * the group. In the multirt 6295 * case, ire_lookup_multi_v6() 6296 * then invokes 6297 * ire_multirt_lookup_v6() to 6298 * find the next resolvable ire. 6299 * As a result, we obtain a new 6300 * interface, derived from the 6301 * next ire. 6302 */ 6303 if (ipif_held) { 6304 ipif_refrele(ipif); 6305 ipif_held = B_FALSE; 6306 } 6307 ipif = ipif_lookup_group_v6( 6308 v6dstp, zoneid); 6309 ip2dbg(("ip_newroute_ipif: " 6310 "multirt dst %08x, " 6311 "ipif %p\n", 6312 ntohl(V4_PART_OF_V6( 6313 (*v6dstp))), 6314 (void *)ipif)); 6315 if (ipif != NULL) { 6316 ipif_held = B_TRUE; 6317 mp = copy_mp; 6318 copy_mp = NULL; 6319 multirt_resolve_next = 6320 B_TRUE; 6321 continue; 6322 } else { 6323 freemsg(copy_mp); 6324 } 6325 } 6326 } 6327 ill_refrele(dst_ill); 6328 if (ipif_held) { 6329 ipif_refrele(ipif); 6330 ipif_held = B_FALSE; 6331 } 6332 if (src_ipif != NULL) 6333 ipif_refrele(src_ipif); 6334 return; 6335 default: 6336 /* Some transient error */ 6337 ire_refrele(save_ire); 6338 break; 6339 } 6340 break; 6341 } 6342 default: 6343 break; 6344 } 6345 if (ip6_asp_table_held) { 6346 ip6_asp_table_refrele(); 6347 ip6_asp_table_held = B_FALSE; 6348 } 6349 } while (multirt_resolve_next); 6350 6351 err_ret: 6352 if (ip6_asp_table_held) 6353 ip6_asp_table_refrele(); 6354 if (ire != NULL) 6355 ire_refrele(ire); 6356 if (fire != NULL) 6357 ire_refrele(fire); 6358 if (ipif != NULL && ipif_held) 6359 ipif_refrele(ipif); 6360 if (src_ipif != NULL) 6361 ipif_refrele(src_ipif); 6362 /* Multicast - no point in trying to generate ICMP error */ 6363 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6364 if (dst_ill != NULL) { 6365 ill = dst_ill; 6366 ill_held = B_TRUE; 6367 } 6368 if (mp->b_prev || mp->b_next) { 6369 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6370 } else { 6371 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 6372 } 6373 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6374 mp->b_next = NULL; 6375 mp->b_prev = NULL; 6376 freemsg(first_mp); 6377 if (ill_held) 6378 ill_refrele(ill); 6379 } 6380 6381 /* 6382 * Parse and process any hop-by-hop or destination options. 6383 * 6384 * Assumes that q is an ill read queue so that ICMP errors for link-local 6385 * destinations are sent out the correct interface. 6386 * 6387 * Returns -1 if there was an error and mp has been consumed. 6388 * Returns 0 if no special action is needed. 6389 * Returns 1 if the packet contained a router alert option for this node 6390 * which is verified to be "interesting/known" for our implementation. 6391 * 6392 * XXX Note: In future as more hbh or dest options are defined, 6393 * it may be better to have different routines for hbh and dest 6394 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6395 * may have same value in different namespaces. Or is it same namespace ?? 6396 * Current code checks for each opt_type (other than pads) if it is in 6397 * the expected nexthdr (hbh or dest) 6398 */ 6399 static int 6400 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6401 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6402 { 6403 uint8_t opt_type; 6404 uint_t optused; 6405 int ret = 0; 6406 mblk_t *first_mp; 6407 const char *errtype; 6408 6409 first_mp = mp; 6410 if (mp->b_datap->db_type == M_CTL) { 6411 mp = mp->b_cont; 6412 } 6413 6414 while (optlen != 0) { 6415 opt_type = *optptr; 6416 if (opt_type == IP6OPT_PAD1) { 6417 optused = 1; 6418 } else { 6419 if (optlen < 2) 6420 goto bad_opt; 6421 errtype = "malformed"; 6422 if (opt_type == ip6opt_ls) { 6423 optused = 2 + optptr[1]; 6424 if (optused > optlen) 6425 goto bad_opt; 6426 } else switch (opt_type) { 6427 case IP6OPT_PADN: 6428 /* 6429 * Note:We don't verify that (N-2) pad octets 6430 * are zero as required by spec. Adhere to 6431 * "be liberal in what you accept..." part of 6432 * implementation philosophy (RFC791,RFC1122) 6433 */ 6434 optused = 2 + optptr[1]; 6435 if (optused > optlen) 6436 goto bad_opt; 6437 break; 6438 6439 case IP6OPT_JUMBO: 6440 if (hdr_type != IPPROTO_HOPOPTS) 6441 goto opt_error; 6442 goto opt_error; /* XXX Not implemented! */ 6443 6444 case IP6OPT_ROUTER_ALERT: { 6445 struct ip6_opt_router *or; 6446 6447 if (hdr_type != IPPROTO_HOPOPTS) 6448 goto opt_error; 6449 optused = 2 + optptr[1]; 6450 if (optused > optlen) 6451 goto bad_opt; 6452 or = (struct ip6_opt_router *)optptr; 6453 /* Check total length and alignment */ 6454 if (optused != sizeof (*or) || 6455 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6456 goto opt_error; 6457 /* Check value */ 6458 switch (*((uint16_t *)or->ip6or_value)) { 6459 case IP6_ALERT_MLD: 6460 case IP6_ALERT_RSVP: 6461 ret = 1; 6462 } 6463 break; 6464 } 6465 case IP6OPT_HOME_ADDRESS: { 6466 /* 6467 * Minimal support for the home address option 6468 * (which is required by all IPv6 nodes). 6469 * Implement by just swapping the home address 6470 * and source address. 6471 * XXX Note: this has IPsec implications since 6472 * AH needs to take this into account. 6473 * Also, when IPsec is used we need to ensure 6474 * that this is only processed once 6475 * in the received packet (to avoid swapping 6476 * back and forth). 6477 * NOTE:This option processing is considered 6478 * to be unsafe and prone to a denial of 6479 * service attack. 6480 * The current processing is not safe even with 6481 * IPsec secured IP packets. Since the home 6482 * address option processing requirement still 6483 * is in the IETF draft and in the process of 6484 * being redefined for its usage, it has been 6485 * decided to turn off the option by default. 6486 * If this section of code needs to be executed, 6487 * ndd variable ip6_ignore_home_address_opt 6488 * should be set to 0 at the user's own risk. 6489 */ 6490 struct ip6_opt_home_address *oh; 6491 in6_addr_t tmp; 6492 6493 if (ipv6_ignore_home_address_opt) 6494 goto opt_error; 6495 6496 if (hdr_type != IPPROTO_DSTOPTS) 6497 goto opt_error; 6498 optused = 2 + optptr[1]; 6499 if (optused > optlen) 6500 goto bad_opt; 6501 6502 /* 6503 * We did this dest. opt the first time 6504 * around (i.e. before AH processing). 6505 * If we've done AH... stop now. 6506 */ 6507 if (first_mp != mp) { 6508 ipsec_in_t *ii; 6509 6510 ii = (ipsec_in_t *)first_mp->b_rptr; 6511 if (ii->ipsec_in_ah_sa != NULL) 6512 break; 6513 } 6514 6515 oh = (struct ip6_opt_home_address *)optptr; 6516 /* Check total length and alignment */ 6517 if (optused < sizeof (*oh) || 6518 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6519 goto opt_error; 6520 /* Swap ip6_src and the home address */ 6521 tmp = ip6h->ip6_src; 6522 /* XXX Note: only 8 byte alignment option */ 6523 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6524 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6525 break; 6526 } 6527 6528 case IP6OPT_TUNNEL_LIMIT: 6529 if (hdr_type != IPPROTO_DSTOPTS) { 6530 goto opt_error; 6531 } 6532 optused = 2 + optptr[1]; 6533 if (optused > optlen) { 6534 goto bad_opt; 6535 } 6536 if (optused != 3) { 6537 goto opt_error; 6538 } 6539 break; 6540 6541 default: 6542 errtype = "unknown"; 6543 /* FALLTHROUGH */ 6544 opt_error: 6545 switch (IP6OPT_TYPE(opt_type)) { 6546 case IP6OPT_TYPE_SKIP: 6547 optused = 2 + optptr[1]; 6548 if (optused > optlen) 6549 goto bad_opt; 6550 ip1dbg(("ip_process_options_v6: %s " 6551 "opt 0x%x skipped\n", 6552 errtype, opt_type)); 6553 break; 6554 case IP6OPT_TYPE_DISCARD: 6555 ip1dbg(("ip_process_options_v6: %s " 6556 "opt 0x%x; packet dropped\n", 6557 errtype, opt_type)); 6558 freemsg(first_mp); 6559 return (-1); 6560 case IP6OPT_TYPE_ICMP: 6561 icmp_param_problem_v6(WR(q), first_mp, 6562 ICMP6_PARAMPROB_OPTION, 6563 (uint32_t)(optptr - 6564 (uint8_t *)ip6h), 6565 B_FALSE, B_FALSE); 6566 return (-1); 6567 case IP6OPT_TYPE_FORCEICMP: 6568 icmp_param_problem_v6(WR(q), first_mp, 6569 ICMP6_PARAMPROB_OPTION, 6570 (uint32_t)(optptr - 6571 (uint8_t *)ip6h), 6572 B_FALSE, B_TRUE); 6573 return (-1); 6574 default: 6575 ASSERT(0); 6576 } 6577 } 6578 } 6579 optlen -= optused; 6580 optptr += optused; 6581 } 6582 return (ret); 6583 6584 bad_opt: 6585 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6586 (uint32_t)(optptr - (uint8_t *)ip6h), 6587 B_FALSE, B_FALSE); 6588 return (-1); 6589 } 6590 6591 /* 6592 * Process a routing header that is not yet empty. 6593 * Only handles type 0 routing headers. 6594 */ 6595 static void 6596 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6597 ill_t *ill, uint_t flags, mblk_t *hada_mp) 6598 { 6599 ip6_rthdr0_t *rthdr; 6600 uint_t ehdrlen; 6601 uint_t numaddr; 6602 in6_addr_t *addrptr; 6603 in6_addr_t tmp; 6604 6605 ASSERT(rth->ip6r_segleft != 0); 6606 6607 if (!ipv6_forward_src_routed) { 6608 /* XXX Check for source routed out same interface? */ 6609 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 6610 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 6611 freemsg(hada_mp); 6612 freemsg(mp); 6613 return; 6614 } 6615 6616 if (rth->ip6r_type != 0) { 6617 if (hada_mp != NULL) 6618 goto hada_drop; 6619 icmp_param_problem_v6(WR(q), mp, 6620 ICMP6_PARAMPROB_HEADER, 6621 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6622 B_FALSE, B_FALSE); 6623 return; 6624 } 6625 rthdr = (ip6_rthdr0_t *)rth; 6626 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6627 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6628 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6629 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6630 if (rthdr->ip6r0_len & 0x1) { 6631 /* An odd length is impossible */ 6632 if (hada_mp != NULL) 6633 goto hada_drop; 6634 icmp_param_problem_v6(WR(q), mp, 6635 ICMP6_PARAMPROB_HEADER, 6636 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6637 B_FALSE, B_FALSE); 6638 return; 6639 } 6640 numaddr = rthdr->ip6r0_len / 2; 6641 if (rthdr->ip6r0_segleft > numaddr) { 6642 /* segleft exceeds number of addresses in routing header */ 6643 if (hada_mp != NULL) 6644 goto hada_drop; 6645 icmp_param_problem_v6(WR(q), mp, 6646 ICMP6_PARAMPROB_HEADER, 6647 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6648 (uchar_t *)ip6h), 6649 B_FALSE, B_FALSE); 6650 return; 6651 } 6652 addrptr += (numaddr - rthdr->ip6r0_segleft); 6653 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6654 IN6_IS_ADDR_MULTICAST(addrptr)) { 6655 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6656 freemsg(hada_mp); 6657 freemsg(mp); 6658 return; 6659 } 6660 /* Swap */ 6661 tmp = *addrptr; 6662 *addrptr = ip6h->ip6_dst; 6663 ip6h->ip6_dst = tmp; 6664 rthdr->ip6r0_segleft--; 6665 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6666 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6667 if (hada_mp != NULL) 6668 goto hada_drop; 6669 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6670 B_FALSE, B_FALSE); 6671 return; 6672 } 6673 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); 6674 return; 6675 hada_drop: 6676 /* IPsec kstats: bean counter? */ 6677 freemsg(hada_mp); 6678 freemsg(mp); 6679 } 6680 6681 /* 6682 * Read side put procedure for IPv6 module. 6683 */ 6684 static void 6685 ip_rput_v6(queue_t *q, mblk_t *mp) 6686 { 6687 mblk_t *mp1, *first_mp, *hada_mp = NULL; 6688 ip6_t *ip6h; 6689 boolean_t ll_multicast = B_FALSE, mctl_present = B_FALSE; 6690 ill_t *ill; 6691 struct iocblk *iocp; 6692 uint_t flags = 0; 6693 6694 ill = (ill_t *)q->q_ptr; 6695 if (ill->ill_state_flags & ILL_CONDEMNED) { 6696 union DL_primitives *dl; 6697 6698 dl = (union DL_primitives *)mp->b_rptr; 6699 /* 6700 * Things are opening or closing - only accept DLPI 6701 * ack messages. If the stream is closing and ip_wsrv 6702 * has completed, ip_close is out of the qwait, but has 6703 * not yet completed qprocsoff. Don't proceed any further 6704 * because the ill has been cleaned up and things hanging 6705 * off the ill have been freed. 6706 */ 6707 if ((mp->b_datap->db_type != M_PCPROTO) || 6708 (dl->dl_primitive == DL_UNITDATA_IND)) { 6709 inet_freemsg(mp); 6710 return; 6711 } 6712 } 6713 6714 switch (mp->b_datap->db_type) { 6715 case M_DATA: 6716 break; 6717 6718 case M_PROTO: 6719 case M_PCPROTO: 6720 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6721 DL_UNITDATA_IND) { 6722 /* Go handle anything other than data elsewhere. */ 6723 ip_rput_dlpi(q, mp); 6724 return; 6725 } 6726 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6727 ll_multicast = dlur->dl_group_address; 6728 #undef dlur 6729 /* Ditch the DLPI header. */ 6730 mp1 = mp; 6731 mp = mp->b_cont; 6732 freeb(mp1); 6733 break; 6734 case M_BREAK: 6735 panic("ip_rput_v6: got an M_BREAK"); 6736 /*NOTREACHED*/ 6737 case M_IOCACK: 6738 iocp = (struct iocblk *)mp->b_rptr; 6739 switch (iocp->ioc_cmd) { 6740 case DL_IOC_HDR_INFO: 6741 ill = (ill_t *)q->q_ptr; 6742 ill_fastpath_ack(ill, mp); 6743 return; 6744 case SIOCSTUNPARAM: 6745 case SIOCGTUNPARAM: 6746 case OSIOCSTUNPARAM: 6747 case OSIOCGTUNPARAM: 6748 /* Go through qwriter */ 6749 break; 6750 default: 6751 putnext(q, mp); 6752 return; 6753 } 6754 /* FALLTHRU */ 6755 case M_ERROR: 6756 case M_HANGUP: 6757 mutex_enter(&ill->ill_lock); 6758 if (ill->ill_state_flags & ILL_CONDEMNED) { 6759 mutex_exit(&ill->ill_lock); 6760 freemsg(mp); 6761 return; 6762 } 6763 ill_refhold_locked(ill); 6764 mutex_exit(&ill->ill_lock); 6765 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6766 return; 6767 case M_CTL: { 6768 if ((MBLKL(mp) > sizeof (int)) && 6769 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6770 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6771 mctl_present = B_TRUE; 6772 break; 6773 } 6774 putnext(q, mp); 6775 return; 6776 } 6777 case M_IOCNAK: 6778 iocp = (struct iocblk *)mp->b_rptr; 6779 switch (iocp->ioc_cmd) { 6780 case DL_IOC_HDR_INFO: 6781 case SIOCSTUNPARAM: 6782 case SIOCGTUNPARAM: 6783 case OSIOCSTUNPARAM: 6784 case OSIOCGTUNPARAM: 6785 mutex_enter(&ill->ill_lock); 6786 if (ill->ill_state_flags & ILL_CONDEMNED) { 6787 mutex_exit(&ill->ill_lock); 6788 freemsg(mp); 6789 return; 6790 } 6791 ill_refhold_locked(ill); 6792 mutex_exit(&ill->ill_lock); 6793 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6794 B_FALSE); 6795 return; 6796 default: 6797 break; 6798 } 6799 /* FALLTHRU */ 6800 default: 6801 putnext(q, mp); 6802 return; 6803 } 6804 6805 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 6806 /* 6807 * if db_ref > 1 then copymsg and free original. Packet may be 6808 * changed and do not want other entity who has a reference to this 6809 * message to trip over the changes. This is a blind change because 6810 * trying to catch all places that might change packet is too 6811 * difficult (since it may be a module above this one). 6812 */ 6813 if (mp->b_datap->db_ref > 1) { 6814 mblk_t *mp1; 6815 6816 mp1 = copymsg(mp); 6817 freemsg(mp); 6818 if (mp1 == NULL) { 6819 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6820 return; 6821 } 6822 mp = mp1; 6823 } 6824 first_mp = mp; 6825 if (mctl_present) { 6826 hada_mp = first_mp; 6827 mp = first_mp->b_cont; 6828 } 6829 6830 ip6h = (ip6_t *)mp->b_rptr; 6831 6832 /* check for alignment and full IPv6 header */ 6833 if (!OK_32PTR((uchar_t *)ip6h) || 6834 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6835 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6836 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6837 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6838 freemsg(first_mp); 6839 return; 6840 } 6841 ip6h = (ip6_t *)mp->b_rptr; 6842 } 6843 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6844 IPV6_DEFAULT_VERS_AND_FLOW) { 6845 /* 6846 * It may be a bit too expensive to do this mapped address 6847 * check here, but in the interest of robustness, it seems 6848 * like the correct place. 6849 * TODO: Avoid this check for e.g. connected TCP sockets 6850 */ 6851 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6852 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6853 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6854 freemsg(first_mp); 6855 return; 6856 } 6857 6858 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6859 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6860 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6861 freemsg(first_mp); 6862 return; 6863 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6864 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6865 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6866 freemsg(first_mp); 6867 return; 6868 } 6869 6870 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6871 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); 6872 } else { 6873 BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4); 6874 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6875 freemsg(first_mp); 6876 } 6877 } 6878 6879 /* 6880 * Walk through the IPv6 packet in mp and see if there's an AH header 6881 * in it. See if the AH header needs to get done before other headers in 6882 * the packet. (Worker function for ipsec_early_ah_v6().) 6883 */ 6884 #define IPSEC_HDR_DONT_PROCESS 0 6885 #define IPSEC_HDR_PROCESS 1 6886 #define IPSEC_MEMORY_ERROR 2 6887 static int 6888 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6889 { 6890 uint_t length; 6891 uint_t ehdrlen; 6892 uint8_t *whereptr; 6893 uint8_t *endptr; 6894 uint8_t *nexthdrp; 6895 ip6_dest_t *desthdr; 6896 ip6_rthdr_t *rthdr; 6897 ip6_t *ip6h; 6898 6899 /* 6900 * For now just pullup everything. In general, the less pullups, 6901 * the better, but there's so much squirrelling through anyway, 6902 * it's just easier this way. 6903 */ 6904 if (!pullupmsg(mp, -1)) { 6905 return (IPSEC_MEMORY_ERROR); 6906 } 6907 6908 ip6h = (ip6_t *)mp->b_rptr; 6909 length = IPV6_HDR_LEN; 6910 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6911 endptr = mp->b_wptr; 6912 6913 /* 6914 * We can't just use the argument nexthdr in the place 6915 * of nexthdrp becaue we don't dereference nexthdrp 6916 * till we confirm whether it is a valid address. 6917 */ 6918 nexthdrp = &ip6h->ip6_nxt; 6919 while (whereptr < endptr) { 6920 /* Is there enough left for len + nexthdr? */ 6921 if (whereptr + MIN_EHDR_LEN > endptr) 6922 return (IPSEC_MEMORY_ERROR); 6923 6924 switch (*nexthdrp) { 6925 case IPPROTO_HOPOPTS: 6926 case IPPROTO_DSTOPTS: 6927 /* Assumes the headers are identical for hbh and dst */ 6928 desthdr = (ip6_dest_t *)whereptr; 6929 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6930 if ((uchar_t *)desthdr + ehdrlen > endptr) 6931 return (IPSEC_MEMORY_ERROR); 6932 /* 6933 * Return DONT_PROCESS because of potential Mobile IPv6 6934 * cruft for destination options. 6935 */ 6936 if (*nexthdrp == IPPROTO_DSTOPTS) 6937 return (IPSEC_HDR_DONT_PROCESS); 6938 nexthdrp = &desthdr->ip6d_nxt; 6939 break; 6940 case IPPROTO_ROUTING: 6941 rthdr = (ip6_rthdr_t *)whereptr; 6942 6943 /* 6944 * If there's more hops left on the routing header, 6945 * return now with DON'T PROCESS. 6946 */ 6947 if (rthdr->ip6r_segleft > 0) 6948 return (IPSEC_HDR_DONT_PROCESS); 6949 6950 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6951 if ((uchar_t *)rthdr + ehdrlen > endptr) 6952 return (IPSEC_MEMORY_ERROR); 6953 nexthdrp = &rthdr->ip6r_nxt; 6954 break; 6955 case IPPROTO_FRAGMENT: 6956 /* Wait for reassembly */ 6957 return (IPSEC_HDR_DONT_PROCESS); 6958 case IPPROTO_AH: 6959 *nexthdr = IPPROTO_AH; 6960 return (IPSEC_HDR_PROCESS); 6961 case IPPROTO_NONE: 6962 /* No next header means we're finished */ 6963 default: 6964 return (IPSEC_HDR_DONT_PROCESS); 6965 } 6966 length += ehdrlen; 6967 whereptr += ehdrlen; 6968 } 6969 panic("ipsec_needs_processing_v6"); 6970 /*NOTREACHED*/ 6971 } 6972 6973 /* 6974 * Path for AH if options are present. If this is the first time we are 6975 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6976 * Otherwise, just fanout. Return value answers the boolean question: 6977 * "Did I consume the mblk you sent me?" 6978 * 6979 * Sometimes AH needs to be done before other IPv6 headers for security 6980 * reasons. This function (and its ipsec_needs_processing_v6() above) 6981 * indicates if that is so, and fans out to the appropriate IPsec protocol 6982 * for the datagram passed in. 6983 */ 6984 static boolean_t 6985 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6986 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 6987 { 6988 mblk_t *mp; 6989 uint8_t nexthdr; 6990 ipsec_in_t *ii = NULL; 6991 ah_t *ah; 6992 ipsec_status_t ipsec_rc; 6993 6994 ASSERT((hada_mp == NULL) || (!mctl_present)); 6995 6996 switch (ipsec_needs_processing_v6( 6997 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6998 case IPSEC_MEMORY_ERROR: 6999 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7000 freemsg(hada_mp); 7001 freemsg(first_mp); 7002 return (B_TRUE); 7003 case IPSEC_HDR_DONT_PROCESS: 7004 return (B_FALSE); 7005 } 7006 7007 /* Default means send it to AH! */ 7008 ASSERT(nexthdr == IPPROTO_AH); 7009 if (!mctl_present) { 7010 mp = first_mp; 7011 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 7012 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7013 "allocation failure.\n")); 7014 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7015 freemsg(hada_mp); 7016 freemsg(mp); 7017 return (B_TRUE); 7018 } 7019 /* 7020 * Store the ill_index so that when we come back 7021 * from IPSEC we ride on the same queue. 7022 */ 7023 ii = (ipsec_in_t *)first_mp->b_rptr; 7024 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7025 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7026 first_mp->b_cont = mp; 7027 } 7028 /* 7029 * Cache hardware acceleration info. 7030 */ 7031 if (hada_mp != NULL) { 7032 ASSERT(ii != NULL); 7033 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7034 "caching data attr.\n")); 7035 ii->ipsec_in_accelerated = B_TRUE; 7036 ii->ipsec_in_da = hada_mp; 7037 } 7038 7039 if (!ipsec_loaded()) { 7040 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 7041 return (B_TRUE); 7042 } 7043 7044 ah = ipsec_inbound_ah_sa(first_mp); 7045 if (ah == NULL) 7046 return (B_TRUE); 7047 ASSERT(ii->ipsec_in_ah_sa != NULL); 7048 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7049 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7050 7051 switch (ipsec_rc) { 7052 case IPSEC_STATUS_SUCCESS: 7053 /* we're done with IPsec processing, send it up */ 7054 ip_fanout_proto_again(first_mp, ill, ill, ire); 7055 break; 7056 case IPSEC_STATUS_FAILED: 7057 BUMP_MIB(&ip6_mib, ipv6InDiscards); 7058 break; 7059 case IPSEC_STATUS_PENDING: 7060 /* no action needed */ 7061 break; 7062 } 7063 return (B_TRUE); 7064 } 7065 7066 /* 7067 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7068 * ip_rput_v6 has already verified alignment, the min length, the version, 7069 * and db_ref = 1. 7070 * 7071 * The ill passed in (the arg named inill) is the ill that the packet 7072 * actually arrived on. We need to remember this when saving the 7073 * input interface index into potential IPV6_PKTINFO data in 7074 * ip_add_info_v6(). 7075 */ 7076 void 7077 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7078 uint_t flags, mblk_t *hada_mp) 7079 { 7080 ire_t *ire = NULL; 7081 queue_t *rq; 7082 ill_t *ill = inill; 7083 ipif_t *ipif; 7084 uint8_t *whereptr; 7085 uint8_t nexthdr; 7086 uint16_t remlen; 7087 uint_t prev_nexthdr_offset; 7088 uint_t used; 7089 size_t pkt_len; 7090 uint16_t ip6_len; 7091 uint_t hdr_len; 7092 boolean_t mctl_present; 7093 mblk_t *first_mp; 7094 mblk_t *first_mp1; 7095 boolean_t no_forward; 7096 ip6_hbh_t *hbhhdr; 7097 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7098 conn_t *connp; 7099 ilm_t *ilm; 7100 uint32_t ports; 7101 uint_t ipif_id = 0; 7102 zoneid_t zoneid = GLOBAL_ZONEID; 7103 uint16_t hck_flags, reass_hck_flags; 7104 uint32_t reass_sum; 7105 boolean_t cksum_err; 7106 mblk_t *mp1; 7107 7108 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7109 7110 if (hada_mp != NULL) { 7111 /* 7112 * It's an IPsec accelerated packet. 7113 * Keep a pointer to the data attributes around until 7114 * we allocate the ipsecinfo structure. 7115 */ 7116 IPSECHW_DEBUG(IPSECHW_PKT, 7117 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7118 hada_mp->b_cont = NULL; 7119 /* 7120 * Since it is accelerated, it came directly from 7121 * the ill. 7122 */ 7123 ASSERT(mctl_present == B_FALSE); 7124 ASSERT(mp->b_datap->db_type != M_CTL); 7125 } 7126 7127 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7128 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7129 7130 if (mp->b_cont == NULL) 7131 pkt_len = mp->b_wptr - mp->b_rptr; 7132 else 7133 pkt_len = msgdsize(mp); 7134 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7135 7136 /* 7137 * Check for bogus (too short packet) and packet which 7138 * was padded by the link layer. 7139 */ 7140 if (ip6_len != pkt_len) { 7141 ssize_t diff; 7142 7143 if (ip6_len > pkt_len) { 7144 ip1dbg(("ip_rput_data_v6: packet too short %d %lu\n", 7145 ip6_len, pkt_len)); 7146 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 7147 freemsg(hada_mp); 7148 freemsg(first_mp); 7149 return; 7150 } 7151 diff = (ssize_t)(pkt_len - ip6_len); 7152 7153 if (!adjmsg(mp, -diff)) { 7154 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7155 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7156 freemsg(hada_mp); 7157 freemsg(first_mp); 7158 return; 7159 } 7160 pkt_len -= diff; 7161 } 7162 7163 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7164 hck_flags = DB_CKSUMFLAGS(mp); 7165 else 7166 hck_flags = 0; 7167 7168 /* Clear checksum flags in case we need to forward */ 7169 DB_CKSUMFLAGS(mp) = 0; 7170 reass_sum = reass_hck_flags = 0; 7171 7172 nexthdr = ip6h->ip6_nxt; 7173 7174 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7175 (uchar_t *)ip6h); 7176 whereptr = (uint8_t *)&ip6h[1]; 7177 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7178 7179 /* Process hop by hop header options */ 7180 if (nexthdr == IPPROTO_HOPOPTS) { 7181 uint_t ehdrlen; 7182 uint8_t *optptr; 7183 7184 if (remlen < MIN_EHDR_LEN) 7185 goto pkt_too_short; 7186 if (mp->b_cont != NULL && 7187 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7188 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7189 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7190 freemsg(hada_mp); 7191 freemsg(first_mp); 7192 return; 7193 } 7194 ip6h = (ip6_t *)mp->b_rptr; 7195 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7196 } 7197 hbhhdr = (ip6_hbh_t *)whereptr; 7198 nexthdr = hbhhdr->ip6h_nxt; 7199 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7200 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7201 7202 if (remlen < ehdrlen) 7203 goto pkt_too_short; 7204 if (mp->b_cont != NULL && 7205 whereptr + ehdrlen > mp->b_wptr) { 7206 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7207 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7208 freemsg(hada_mp); 7209 freemsg(first_mp); 7210 return; 7211 } 7212 ip6h = (ip6_t *)mp->b_rptr; 7213 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7214 hbhhdr = (ip6_hbh_t *)whereptr; 7215 } 7216 7217 optptr = whereptr + 2; 7218 whereptr += ehdrlen; 7219 remlen -= ehdrlen; 7220 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7221 ehdrlen - 2, IPPROTO_HOPOPTS)) { 7222 case -1: 7223 /* 7224 * Packet has been consumed and any 7225 * needed ICMP messages sent. 7226 */ 7227 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7228 freemsg(hada_mp); 7229 return; 7230 case 0: 7231 /* no action needed */ 7232 break; 7233 case 1: 7234 /* Known router alert */ 7235 goto ipv6forus; 7236 } 7237 } 7238 7239 /* 7240 * Attach any necessary label information to this packet. 7241 */ 7242 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7243 if (ip6opt_ls != 0) 7244 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7245 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7246 freemsg(hada_mp); 7247 freemsg(first_mp); 7248 return; 7249 } 7250 7251 /* 7252 * On incoming v6 multicast packets we will bypass the ire table, 7253 * and assume that the read queue corresponds to the targetted 7254 * interface. 7255 * 7256 * The effect of this is the same as the IPv4 original code, but is 7257 * much cleaner I think. See ip_rput for how that was done. 7258 */ 7259 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7260 BUMP_MIB(ill->ill_ip6_mib, ipv6InMcastPkts); 7261 /* 7262 * XXX TODO Give to mrouted to for multicast forwarding. 7263 */ 7264 ILM_WALKER_HOLD(ill); 7265 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7266 ILM_WALKER_RELE(ill); 7267 if (ilm == NULL) { 7268 if (ip_debug > 3) { 7269 /* ip2dbg */ 7270 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7271 " which is not for us: %s\n", AF_INET6, 7272 &ip6h->ip6_dst); 7273 } 7274 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7275 freemsg(hada_mp); 7276 freemsg(first_mp); 7277 return; 7278 } 7279 if (ip_debug > 3) { 7280 /* ip2dbg */ 7281 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7282 AF_INET6, &ip6h->ip6_dst); 7283 } 7284 rq = ill->ill_rq; 7285 zoneid = GLOBAL_ZONEID; 7286 goto ipv6forus; 7287 } 7288 7289 ipif = ill->ill_ipif; 7290 7291 /* 7292 * If a packet was received on an interface that is a 6to4 tunnel, 7293 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7294 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7295 * the 6to4 prefix of the address configured on the receiving interface. 7296 * Otherwise, the packet was delivered to this interface in error and 7297 * the packet must be dropped. 7298 */ 7299 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7300 7301 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7302 &ip6h->ip6_dst)) { 7303 if (ip_debug > 2) { 7304 /* ip1dbg */ 7305 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7306 "addressed packet which is not for us: " 7307 "%s\n", AF_INET6, &ip6h->ip6_dst); 7308 } 7309 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7310 freemsg(first_mp); 7311 return; 7312 } 7313 } 7314 7315 /* 7316 * Find an ire that matches destination. For link-local addresses 7317 * we have to match the ill. 7318 * TBD for site local addresses. 7319 */ 7320 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7321 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7322 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7323 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7324 } else { 7325 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7326 MBLK_GETLABEL(mp)); 7327 } 7328 if (ire == NULL) { 7329 /* 7330 * No matching IRE found. Mark this packet as having 7331 * originated externally. 7332 */ 7333 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7334 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7335 if (!(ill->ill_flags & ILLF_ROUTER)) 7336 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7337 freemsg(hada_mp); 7338 freemsg(first_mp); 7339 return; 7340 } 7341 if (ip6h->ip6_hops <= 1) { 7342 if (hada_mp != NULL) 7343 goto hada_drop; 7344 icmp_time_exceeded_v6(WR(q), first_mp, 7345 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7346 return; 7347 } 7348 /* 7349 * Per RFC 3513 section 2.5.2, we must not forward packets with 7350 * an unspecified source address. 7351 */ 7352 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7353 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7354 freemsg(hada_mp); 7355 freemsg(first_mp); 7356 return; 7357 } 7358 mp->b_prev = (mblk_t *)(uintptr_t) 7359 ill->ill_phyint->phyint_ifindex; 7360 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7361 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7362 ALL_ZONES); 7363 return; 7364 } 7365 ipif_id = ire->ire_ipif->ipif_seqid; 7366 /* we have a matching IRE */ 7367 if (ire->ire_stq != NULL) { 7368 ill_group_t *ill_group; 7369 ill_group_t *ire_group; 7370 7371 /* 7372 * To be quicker, we may wish not to chase pointers 7373 * (ire->ire_ipif->ipif_ill...) and instead store the 7374 * forwarding policy in the ire. An unfortunate side- 7375 * effect of this would be requiring an ire flush whenever 7376 * the ILLF_ROUTER flag changes. For now, chase pointers 7377 * once and store in the boolean no_forward. 7378 * 7379 * This appears twice to keep it out of the non-forwarding, 7380 * yes-it's-for-us-on-the-right-interface case. 7381 */ 7382 no_forward = ((ill->ill_flags & 7383 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7384 7385 7386 ASSERT(first_mp == mp); 7387 /* 7388 * This ire has a send-to queue - forward the packet. 7389 */ 7390 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7391 freemsg(hada_mp); 7392 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7393 if (no_forward) 7394 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7395 freemsg(mp); 7396 ire_refrele(ire); 7397 return; 7398 } 7399 if (ip6h->ip6_hops <= 1) { 7400 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7401 icmp_time_exceeded_v6(WR(q), mp, 7402 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7403 ire_refrele(ire); 7404 return; 7405 } 7406 /* 7407 * Per RFC 3513 section 2.5.2, we must not forward packets with 7408 * an unspecified source address. 7409 */ 7410 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7411 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7412 freemsg(mp); 7413 ire_refrele(ire); 7414 return; 7415 } 7416 7417 if (is_system_labeled()) { 7418 mblk_t *mp1; 7419 7420 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7421 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7422 freemsg(mp); 7423 ire_refrele(ire); 7424 return; 7425 } 7426 /* Size may have changed */ 7427 mp = mp1; 7428 ip6h = (ip6_t *)mp->b_rptr; 7429 pkt_len = msgdsize(mp); 7430 } 7431 7432 if (pkt_len > ire->ire_max_frag) { 7433 BUMP_MIB(ill->ill_ip6_mib, ipv6InTooBigErrors); 7434 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7435 ll_multicast, B_TRUE); 7436 ire_refrele(ire); 7437 return; 7438 } 7439 7440 /* 7441 * Check to see if we're forwarding the packet to a 7442 * different link from which it came. If so, check the 7443 * source and destination addresses since routers must not 7444 * forward any packets with link-local source or 7445 * destination addresses to other links. Otherwise (if 7446 * we're forwarding onto the same link), conditionally send 7447 * a redirect message. 7448 */ 7449 ill_group = ill->ill_group; 7450 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7451 if (ire->ire_rfq != q && (ill_group == NULL || 7452 ill_group != ire_group)) { 7453 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7454 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7455 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7456 freemsg(mp); 7457 ire_refrele(ire); 7458 return; 7459 } 7460 /* TBD add site-local check at site boundary? */ 7461 } else if (ipv6_send_redirects) { 7462 in6_addr_t *v6targ; 7463 in6_addr_t gw_addr_v6; 7464 ire_t *src_ire_v6 = NULL; 7465 7466 /* 7467 * Don't send a redirect when forwarding a source 7468 * routed packet. 7469 */ 7470 if (ip_source_routed_v6(ip6h, mp)) 7471 goto forward; 7472 7473 mutex_enter(&ire->ire_lock); 7474 gw_addr_v6 = ire->ire_gateway_addr_v6; 7475 mutex_exit(&ire->ire_lock); 7476 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7477 v6targ = &gw_addr_v6; 7478 /* 7479 * We won't send redirects to a router 7480 * that doesn't have a link local 7481 * address, but will forward. 7482 */ 7483 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7484 BUMP_MIB(ill->ill_ip6_mib, 7485 ipv6InAddrErrors); 7486 goto forward; 7487 } 7488 } else { 7489 v6targ = &ip6h->ip6_dst; 7490 } 7491 7492 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7493 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7494 ALL_ZONES, 0, NULL, 7495 MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7496 7497 if (src_ire_v6 != NULL) { 7498 /* 7499 * The source is directly connected. 7500 */ 7501 mp1 = copymsg(mp); 7502 if (mp1 != NULL) { 7503 icmp_send_redirect_v6(WR(q), 7504 mp1, v6targ, &ip6h->ip6_dst, 7505 ill, B_FALSE); 7506 } 7507 ire_refrele(src_ire_v6); 7508 } 7509 } 7510 7511 forward: 7512 /* Hoplimit verified above */ 7513 ip6h->ip6_hops--; 7514 UPDATE_IB_PKT_COUNT(ire); 7515 ire->ire_last_used_time = lbolt; 7516 BUMP_MIB(ill->ill_ip6_mib, ipv6OutForwDatagrams); 7517 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7518 IRE_REFRELE(ire); 7519 return; 7520 } 7521 rq = ire->ire_rfq; 7522 7523 /* 7524 * Need to put on correct queue for reassembly to find it. 7525 * No need to use put() since reassembly has its own locks. 7526 * Note: multicast packets and packets destined to addresses 7527 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7528 * the arriving ill. 7529 */ 7530 if (rq != q) { 7531 boolean_t check_multi = B_TRUE; 7532 ill_group_t *ill_group = NULL; 7533 ill_group_t *ire_group = NULL; 7534 ill_t *ire_ill = NULL; 7535 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7536 7537 /* 7538 * To be quicker, we may wish not to chase pointers 7539 * (ire->ire_ipif->ipif_ill...) and instead store the 7540 * forwarding policy in the ire. An unfortunate side- 7541 * effect of this would be requiring an ire flush whenever 7542 * the ILLF_ROUTER flag changes. For now, chase pointers 7543 * once and store in the boolean no_forward. 7544 */ 7545 no_forward = ((ill->ill_flags & 7546 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7547 7548 ill_group = ill->ill_group; 7549 if (rq != NULL) { 7550 ire_ill = (ill_t *)(rq->q_ptr); 7551 ire_group = ire_ill->ill_group; 7552 } 7553 7554 /* 7555 * If it's part of the same IPMP group, or if it's a legal 7556 * address on the 'usesrc' interface, then bypass strict 7557 * checks. 7558 */ 7559 if (ill_group != NULL && ill_group == ire_group) { 7560 check_multi = B_FALSE; 7561 } else if (ill_ifindex != 0 && ire_ill != NULL && 7562 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7563 check_multi = B_FALSE; 7564 } 7565 7566 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7567 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7568 /* 7569 * This packet came in on an interface other than the 7570 * one associated with the destination address 7571 * and we are strict about matches. 7572 * 7573 * As long as the ills belong to the same group, 7574 * we don't consider them to arriving on the wrong 7575 * interface. Thus, when the switch is doing inbound 7576 * load spreading, we won't drop packets when we 7577 * are doing strict multihoming checks. 7578 */ 7579 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7580 freemsg(hada_mp); 7581 freemsg(first_mp); 7582 ire_refrele(ire); 7583 return; 7584 } 7585 7586 if (rq != NULL) 7587 q = rq; 7588 7589 ill = (ill_t *)q->q_ptr; 7590 ASSERT(ill); 7591 } 7592 7593 zoneid = ire->ire_zoneid; 7594 UPDATE_IB_PKT_COUNT(ire); 7595 ire->ire_last_used_time = lbolt; 7596 /* Don't use the ire after this point. */ 7597 ire_refrele(ire); 7598 ipv6forus: 7599 /* 7600 * Looks like this packet is for us one way or another. 7601 * This is where we'll process destination headers etc. 7602 */ 7603 for (; ; ) { 7604 switch (nexthdr) { 7605 case IPPROTO_TCP: { 7606 uint16_t *up; 7607 uint32_t sum; 7608 int offset; 7609 7610 hdr_len = pkt_len - remlen; 7611 7612 if (hada_mp != NULL) { 7613 ip0dbg(("tcp hada drop\n")); 7614 goto hada_drop; 7615 } 7616 7617 7618 /* TCP needs all of the TCP header */ 7619 if (remlen < TCP_MIN_HEADER_LENGTH) 7620 goto pkt_too_short; 7621 if (mp->b_cont != NULL && 7622 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7623 if (!pullupmsg(mp, 7624 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7625 BUMP_MIB(ill->ill_ip6_mib, 7626 ipv6InDiscards); 7627 freemsg(first_mp); 7628 return; 7629 } 7630 hck_flags = 0; 7631 ip6h = (ip6_t *)mp->b_rptr; 7632 whereptr = (uint8_t *)ip6h + hdr_len; 7633 } 7634 /* 7635 * Extract the offset field from the TCP header. 7636 */ 7637 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7638 if (offset != 5) { 7639 if (offset < 5) { 7640 ip1dbg(("ip_rput_data_v6: short " 7641 "TCP data offset")); 7642 BUMP_MIB(ill->ill_ip6_mib, 7643 ipv6InDiscards); 7644 freemsg(first_mp); 7645 return; 7646 } 7647 /* 7648 * There must be TCP options. 7649 * Make sure we can grab them. 7650 */ 7651 offset <<= 2; 7652 if (remlen < offset) 7653 goto pkt_too_short; 7654 if (mp->b_cont != NULL && 7655 whereptr + offset > mp->b_wptr) { 7656 if (!pullupmsg(mp, 7657 hdr_len + offset)) { 7658 BUMP_MIB(ill->ill_ip6_mib, 7659 ipv6InDiscards); 7660 freemsg(first_mp); 7661 return; 7662 } 7663 hck_flags = 0; 7664 ip6h = (ip6_t *)mp->b_rptr; 7665 whereptr = (uint8_t *)ip6h + hdr_len; 7666 } 7667 } 7668 7669 up = (uint16_t *)&ip6h->ip6_src; 7670 /* 7671 * TCP checksum calculation. First sum up the 7672 * pseudo-header fields: 7673 * - Source IPv6 address 7674 * - Destination IPv6 address 7675 * - TCP payload length 7676 * - TCP protocol ID 7677 */ 7678 sum = htons(IPPROTO_TCP + remlen) + 7679 up[0] + up[1] + up[2] + up[3] + 7680 up[4] + up[5] + up[6] + up[7] + 7681 up[8] + up[9] + up[10] + up[11] + 7682 up[12] + up[13] + up[14] + up[15]; 7683 7684 /* Fold initial sum */ 7685 sum = (sum & 0xffff) + (sum >> 16); 7686 7687 mp1 = mp->b_cont; 7688 7689 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7690 IP6_STAT(ip6_in_sw_cksum); 7691 7692 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7693 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7694 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7695 mp, mp1, cksum_err); 7696 7697 if (cksum_err) { 7698 BUMP_MIB(&ip_mib, tcpInErrs); 7699 7700 if (hck_flags & HCK_FULLCKSUM) 7701 IP6_STAT(ip6_tcp_in_full_hw_cksum_err); 7702 else if (hck_flags & HCK_PARTIALCKSUM) 7703 IP6_STAT(ip6_tcp_in_part_hw_cksum_err); 7704 else 7705 IP6_STAT(ip6_tcp_in_sw_cksum_err); 7706 7707 freemsg(first_mp); 7708 return; 7709 } 7710 tcp_fanout: 7711 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7712 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7713 IP_FF_IP6INFO), hdr_len, mctl_present, zoneid); 7714 return; 7715 } 7716 case IPPROTO_SCTP: 7717 { 7718 sctp_hdr_t *sctph; 7719 uint32_t calcsum, pktsum; 7720 uint_t hdr_len = pkt_len - remlen; 7721 7722 /* SCTP needs all of the SCTP header */ 7723 if (remlen < sizeof (*sctph)) { 7724 goto pkt_too_short; 7725 } 7726 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7727 ASSERT(mp->b_cont != NULL); 7728 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7729 BUMP_MIB(ill->ill_ip6_mib, 7730 ipv6InDiscards); 7731 freemsg(mp); 7732 return; 7733 } 7734 ip6h = (ip6_t *)mp->b_rptr; 7735 whereptr = (uint8_t *)ip6h + hdr_len; 7736 } 7737 7738 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7739 /* checksum */ 7740 pktsum = sctph->sh_chksum; 7741 sctph->sh_chksum = 0; 7742 calcsum = sctp_cksum(mp, hdr_len); 7743 if (calcsum != pktsum) { 7744 BUMP_MIB(&sctp_mib, sctpChecksumError); 7745 freemsg(mp); 7746 return; 7747 } 7748 sctph->sh_chksum = pktsum; 7749 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7750 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7751 ports, ipif_id, zoneid, mp)) == NULL) { 7752 ip_fanout_sctp_raw(first_mp, ill, 7753 (ipha_t *)ip6h, B_FALSE, ports, 7754 mctl_present, 7755 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), 7756 B_TRUE, ipif_id, zoneid); 7757 return; 7758 } 7759 BUMP_MIB(&ip_mib, ipInDelivers); 7760 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7761 B_FALSE, mctl_present); 7762 return; 7763 } 7764 case IPPROTO_UDP: { 7765 uint16_t *up; 7766 uint32_t sum; 7767 7768 hdr_len = pkt_len - remlen; 7769 7770 if (hada_mp != NULL) { 7771 ip0dbg(("udp hada drop\n")); 7772 goto hada_drop; 7773 } 7774 7775 /* Verify that at least the ports are present */ 7776 if (remlen < UDPH_SIZE) 7777 goto pkt_too_short; 7778 if (mp->b_cont != NULL && 7779 whereptr + UDPH_SIZE > mp->b_wptr) { 7780 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7781 BUMP_MIB(ill->ill_ip6_mib, 7782 ipv6InDiscards); 7783 freemsg(first_mp); 7784 return; 7785 } 7786 hck_flags = 0; 7787 ip6h = (ip6_t *)mp->b_rptr; 7788 whereptr = (uint8_t *)ip6h + hdr_len; 7789 } 7790 7791 /* 7792 * Before going through the regular checksum 7793 * calculation, make sure the received checksum 7794 * is non-zero. RFC 2460 says, a 0x0000 checksum 7795 * in a UDP packet (within IPv6 packet) is invalid 7796 * and should be replaced by 0xffff. This makes 7797 * sense as regular checksum calculation will 7798 * pass for both the cases i.e. 0x0000 and 0xffff. 7799 * Removing one of the case makes error detection 7800 * stronger. 7801 */ 7802 7803 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7804 /* 0x0000 checksum is invalid */ 7805 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7806 "checksum value 0x0000\n")); 7807 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7808 freemsg(first_mp); 7809 return; 7810 } 7811 7812 up = (uint16_t *)&ip6h->ip6_src; 7813 7814 /* 7815 * UDP checksum calculation. First sum up the 7816 * pseudo-header fields: 7817 * - Source IPv6 address 7818 * - Destination IPv6 address 7819 * - UDP payload length 7820 * - UDP protocol ID 7821 */ 7822 7823 sum = htons(IPPROTO_UDP + remlen) + 7824 up[0] + up[1] + up[2] + up[3] + 7825 up[4] + up[5] + up[6] + up[7] + 7826 up[8] + up[9] + up[10] + up[11] + 7827 up[12] + up[13] + up[14] + up[15]; 7828 7829 /* Fold initial sum */ 7830 sum = (sum & 0xffff) + (sum >> 16); 7831 7832 if (reass_hck_flags != 0) { 7833 hck_flags = reass_hck_flags; 7834 7835 IP_CKSUM_RECV_REASS(hck_flags, 7836 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7837 sum, reass_sum, cksum_err); 7838 } else { 7839 mp1 = mp->b_cont; 7840 7841 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7842 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7843 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7844 mp, mp1, cksum_err); 7845 } 7846 7847 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7848 IP6_STAT(ip6_in_sw_cksum); 7849 7850 if (cksum_err) { 7851 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7852 7853 if (hck_flags & HCK_FULLCKSUM) 7854 IP6_STAT(ip6_udp_in_full_hw_cksum_err); 7855 else if (hck_flags & HCK_PARTIALCKSUM) 7856 IP6_STAT(ip6_udp_in_part_hw_cksum_err); 7857 else 7858 IP6_STAT(ip6_udp_in_sw_cksum_err); 7859 7860 freemsg(first_mp); 7861 return; 7862 } 7863 goto udp_fanout; 7864 } 7865 case IPPROTO_ICMPV6: { 7866 uint16_t *up; 7867 uint32_t sum; 7868 uint_t hdr_len = pkt_len - remlen; 7869 7870 if (hada_mp != NULL) { 7871 ip0dbg(("icmp hada drop\n")); 7872 goto hada_drop; 7873 } 7874 7875 up = (uint16_t *)&ip6h->ip6_src; 7876 sum = htons(IPPROTO_ICMPV6 + remlen) + 7877 up[0] + up[1] + up[2] + up[3] + 7878 up[4] + up[5] + up[6] + up[7] + 7879 up[8] + up[9] + up[10] + up[11] + 7880 up[12] + up[13] + up[14] + up[15]; 7881 sum = (sum & 0xffff) + (sum >> 16); 7882 sum = IP_CSUM(mp, hdr_len, sum); 7883 if (sum != 0) { 7884 /* IPv6 ICMP checksum failed */ 7885 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7886 "failed %x\n", 7887 sum)); 7888 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7889 BUMP_MIB(ill->ill_icmp6_mib, 7890 ipv6IfIcmpInErrors); 7891 freemsg(first_mp); 7892 return; 7893 } 7894 7895 icmp_fanout: 7896 /* Check variable for testing applications */ 7897 if (ipv6_drop_inbound_icmpv6) { 7898 freemsg(first_mp); 7899 return; 7900 } 7901 /* 7902 * Assume that there is always at least one conn for 7903 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7904 * where there is no conn. 7905 */ 7906 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7907 ASSERT(!(ill->ill_phyint->phyint_flags & 7908 PHYI_LOOPBACK)); 7909 /* 7910 * In the multicast case, applications may have 7911 * joined the group from different zones, so we 7912 * need to deliver the packet to each of them. 7913 * Loop through the multicast memberships 7914 * structures (ilm) on the receive ill and send 7915 * a copy of the packet up each matching one. 7916 */ 7917 ILM_WALKER_HOLD(ill); 7918 for (ilm = ill->ill_ilm; ilm != NULL; 7919 ilm = ilm->ilm_next) { 7920 if (ilm->ilm_flags & ILM_DELETED) 7921 continue; 7922 if (!IN6_ARE_ADDR_EQUAL( 7923 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7924 continue; 7925 if (!ipif_lookup_zoneid(ill, 7926 ilm->ilm_zoneid, IPIF_UP, NULL)) 7927 continue; 7928 7929 first_mp1 = ip_copymsg(first_mp); 7930 if (first_mp1 == NULL) 7931 continue; 7932 icmp_inbound_v6(q, first_mp1, ill, 7933 hdr_len, mctl_present, 0, 7934 ilm->ilm_zoneid); 7935 } 7936 ILM_WALKER_RELE(ill); 7937 } else { 7938 first_mp1 = ip_copymsg(first_mp); 7939 if (first_mp1 != NULL) 7940 icmp_inbound_v6(q, first_mp1, ill, 7941 hdr_len, mctl_present, 0, zoneid); 7942 } 7943 } 7944 /* FALLTHRU */ 7945 default: { 7946 /* 7947 * Handle protocols with which IPv6 is less intimate. 7948 */ 7949 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IP6INFO; 7950 7951 if (hada_mp != NULL) { 7952 ip0dbg(("default hada drop\n")); 7953 goto hada_drop; 7954 } 7955 7956 /* 7957 * Enable sending ICMP for "Unknown" nexthdr 7958 * case. i.e. where we did not FALLTHRU from 7959 * IPPROTO_ICMPV6 processing case above. 7960 * If we did FALLTHRU, then the packet has already been 7961 * processed for IPPF, don't process it again in 7962 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7963 * flags 7964 */ 7965 if (nexthdr != IPPROTO_ICMPV6) 7966 proto_flags |= IP_FF_SEND_ICMP; 7967 else 7968 proto_flags |= IP6_NO_IPPOLICY; 7969 7970 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7971 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7972 mctl_present, zoneid); 7973 return; 7974 } 7975 7976 case IPPROTO_DSTOPTS: { 7977 uint_t ehdrlen; 7978 uint8_t *optptr; 7979 ip6_dest_t *desthdr; 7980 7981 /* Check if AH is present. */ 7982 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7983 ire, hada_mp, zoneid)) { 7984 ip0dbg(("dst early hada drop\n")); 7985 return; 7986 } 7987 7988 /* 7989 * Reinitialize pointers, as ipsec_early_ah_v6() does 7990 * complete pullups. We don't have to do more pullups 7991 * as a result. 7992 */ 7993 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7994 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7995 ip6h = (ip6_t *)mp->b_rptr; 7996 7997 if (remlen < MIN_EHDR_LEN) 7998 goto pkt_too_short; 7999 8000 desthdr = (ip6_dest_t *)whereptr; 8001 nexthdr = desthdr->ip6d_nxt; 8002 prev_nexthdr_offset = (uint_t)(whereptr - 8003 (uint8_t *)ip6h); 8004 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8005 if (remlen < ehdrlen) 8006 goto pkt_too_short; 8007 optptr = whereptr + 2; 8008 /* 8009 * Note: XXX This code does not seem to make 8010 * distinction between Destination Options Header 8011 * being before/after Routing Header which can 8012 * happen if we are at the end of source route. 8013 * This may become significant in future. 8014 * (No real significant Destination Options are 8015 * defined/implemented yet ). 8016 */ 8017 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8018 ehdrlen - 2, IPPROTO_DSTOPTS)) { 8019 case -1: 8020 /* 8021 * Packet has been consumed and any needed 8022 * ICMP errors sent. 8023 */ 8024 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8025 freemsg(hada_mp); 8026 return; 8027 case 0: 8028 /* No action needed continue */ 8029 break; 8030 case 1: 8031 /* 8032 * Unnexpected return value 8033 * (Router alert is a Hop-by-Hop option) 8034 */ 8035 #ifdef DEBUG 8036 panic("ip_rput_data_v6: router " 8037 "alert hbh opt indication in dest opt"); 8038 /*NOTREACHED*/ 8039 #else 8040 freemsg(hada_mp); 8041 freemsg(first_mp); 8042 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8043 return; 8044 #endif 8045 } 8046 used = ehdrlen; 8047 break; 8048 } 8049 case IPPROTO_FRAGMENT: { 8050 ip6_frag_t *fraghdr; 8051 size_t no_frag_hdr_len; 8052 8053 if (hada_mp != NULL) { 8054 ip0dbg(("frag hada drop\n")); 8055 goto hada_drop; 8056 } 8057 8058 ASSERT(first_mp == mp); 8059 if (remlen < sizeof (ip6_frag_t)) 8060 goto pkt_too_short; 8061 8062 if (mp->b_cont != NULL && 8063 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8064 if (!pullupmsg(mp, 8065 pkt_len - remlen + sizeof (ip6_frag_t))) { 8066 BUMP_MIB(ill->ill_ip6_mib, 8067 ipv6InDiscards); 8068 freemsg(mp); 8069 return; 8070 } 8071 hck_flags = 0; 8072 ip6h = (ip6_t *)mp->b_rptr; 8073 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8074 } 8075 8076 fraghdr = (ip6_frag_t *)whereptr; 8077 used = (uint_t)sizeof (ip6_frag_t); 8078 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmReqds); 8079 8080 /* 8081 * Invoke the CGTP (multirouting) filtering module to 8082 * process the incoming packet. Packets identified as 8083 * duplicates must be discarded. Filtering is active 8084 * only if the the ip_cgtp_filter ndd variable is 8085 * non-zero. 8086 */ 8087 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 8088 int cgtp_flt_pkt = 8089 ip_cgtp_filter_ops->cfo_filter_v6( 8090 inill->ill_rq, ip6h, fraghdr); 8091 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8092 freemsg(mp); 8093 return; 8094 } 8095 } 8096 8097 /* Restore the flags */ 8098 DB_CKSUMFLAGS(mp) = hck_flags; 8099 8100 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8101 remlen - used, &prev_nexthdr_offset, 8102 &reass_sum, &reass_hck_flags); 8103 if (mp == NULL) { 8104 /* Reassembly is still pending */ 8105 return; 8106 } 8107 /* The first mblk are the headers before the frag hdr */ 8108 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmOKs); 8109 8110 first_mp = mp; /* mp has most likely changed! */ 8111 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8112 ip6h = (ip6_t *)mp->b_rptr; 8113 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8114 whereptr = mp->b_rptr + no_frag_hdr_len; 8115 remlen = ntohs(ip6h->ip6_plen) + 8116 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8117 pkt_len = msgdsize(mp); 8118 used = 0; 8119 break; 8120 } 8121 case IPPROTO_HOPOPTS: 8122 if (hada_mp != NULL) { 8123 ip0dbg(("hop hada drop\n")); 8124 goto hada_drop; 8125 } 8126 /* 8127 * Illegal header sequence. 8128 * (Hop-by-hop headers are processed above 8129 * and required to immediately follow IPv6 header) 8130 */ 8131 icmp_param_problem_v6(WR(q), first_mp, 8132 ICMP6_PARAMPROB_NEXTHEADER, 8133 prev_nexthdr_offset, 8134 B_FALSE, B_FALSE); 8135 return; 8136 8137 case IPPROTO_ROUTING: { 8138 uint_t ehdrlen; 8139 ip6_rthdr_t *rthdr; 8140 8141 /* Check if AH is present. */ 8142 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8143 ire, hada_mp, zoneid)) { 8144 ip0dbg(("routing hada drop\n")); 8145 return; 8146 } 8147 8148 /* 8149 * Reinitialize pointers, as ipsec_early_ah_v6() does 8150 * complete pullups. We don't have to do more pullups 8151 * as a result. 8152 */ 8153 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8154 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8155 ip6h = (ip6_t *)mp->b_rptr; 8156 8157 if (remlen < MIN_EHDR_LEN) 8158 goto pkt_too_short; 8159 rthdr = (ip6_rthdr_t *)whereptr; 8160 nexthdr = rthdr->ip6r_nxt; 8161 prev_nexthdr_offset = (uint_t)(whereptr - 8162 (uint8_t *)ip6h); 8163 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8164 if (remlen < ehdrlen) 8165 goto pkt_too_short; 8166 if (rthdr->ip6r_segleft != 0) { 8167 /* Not end of source route */ 8168 if (ll_multicast) { 8169 BUMP_MIB(ill->ill_ip6_mib, 8170 ipv6ForwProhibits); 8171 freemsg(hada_mp); 8172 freemsg(mp); 8173 return; 8174 } 8175 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8176 flags, hada_mp); 8177 return; 8178 } 8179 used = ehdrlen; 8180 break; 8181 } 8182 case IPPROTO_AH: 8183 case IPPROTO_ESP: { 8184 /* 8185 * Fast path for AH/ESP. If this is the first time 8186 * we are sending a datagram to AH/ESP, allocate 8187 * a IPSEC_IN message and prepend it. Otherwise, 8188 * just fanout. 8189 */ 8190 8191 ipsec_in_t *ii; 8192 int ipsec_rc; 8193 8194 if (!mctl_present) { 8195 ASSERT(first_mp == mp); 8196 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 8197 NULL) { 8198 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8199 "allocation failure.\n")); 8200 BUMP_MIB(ill->ill_ip6_mib, 8201 ipv6InDiscards); 8202 freemsg(mp); 8203 return; 8204 } 8205 /* 8206 * Store the ill_index so that when we come back 8207 * from IPSEC we ride on the same queue. 8208 */ 8209 ii = (ipsec_in_t *)first_mp->b_rptr; 8210 ii->ipsec_in_ill_index = 8211 ill->ill_phyint->phyint_ifindex; 8212 ii->ipsec_in_rill_index = 8213 ii->ipsec_in_ill_index; 8214 first_mp->b_cont = mp; 8215 /* 8216 * Cache hardware acceleration info. 8217 */ 8218 if (hada_mp != NULL) { 8219 IPSECHW_DEBUG(IPSECHW_PKT, 8220 ("ip_rput_data_v6: " 8221 "caching data attr.\n")); 8222 ii->ipsec_in_accelerated = B_TRUE; 8223 ii->ipsec_in_da = hada_mp; 8224 hada_mp = NULL; 8225 } 8226 } else { 8227 ii = (ipsec_in_t *)first_mp->b_rptr; 8228 } 8229 8230 if (!ipsec_loaded()) { 8231 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8232 ire->ire_zoneid); 8233 return; 8234 } 8235 8236 /* select inbound SA and have IPsec process the pkt */ 8237 if (nexthdr == IPPROTO_ESP) { 8238 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 8239 if (esph == NULL) 8240 return; 8241 ASSERT(ii->ipsec_in_esp_sa != NULL); 8242 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8243 NULL); 8244 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8245 first_mp, esph); 8246 } else { 8247 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 8248 if (ah == NULL) 8249 return; 8250 ASSERT(ii->ipsec_in_ah_sa != NULL); 8251 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8252 NULL); 8253 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8254 first_mp, ah); 8255 } 8256 8257 switch (ipsec_rc) { 8258 case IPSEC_STATUS_SUCCESS: 8259 break; 8260 case IPSEC_STATUS_FAILED: 8261 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8262 /* FALLTHRU */ 8263 case IPSEC_STATUS_PENDING: 8264 return; 8265 } 8266 /* we're done with IPsec processing, send it up */ 8267 ip_fanout_proto_again(first_mp, ill, inill, ire); 8268 return; 8269 } 8270 case IPPROTO_NONE: 8271 /* All processing is done. Count as "delivered". */ 8272 freemsg(hada_mp); 8273 freemsg(first_mp); 8274 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8275 return; 8276 } 8277 whereptr += used; 8278 ASSERT(remlen >= used); 8279 remlen -= used; 8280 } 8281 /* NOTREACHED */ 8282 8283 pkt_too_short: 8284 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8285 ip6_len, pkt_len, remlen)); 8286 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 8287 freemsg(hada_mp); 8288 freemsg(first_mp); 8289 return; 8290 udp_fanout: 8291 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8292 connp = NULL; 8293 } else { 8294 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8295 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8296 CONN_DEC_REF(connp); 8297 connp = NULL; 8298 } 8299 } 8300 8301 if (connp == NULL) { 8302 uint32_t ports; 8303 8304 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8305 UDP_PORTS_OFFSET); 8306 IP6_STAT(ip6_udp_slow_path); 8307 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8308 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), mctl_present, 8309 zoneid); 8310 return; 8311 } 8312 8313 if (CONN_UDP_FLOWCTLD(connp)) { 8314 freemsg(first_mp); 8315 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 8316 CONN_DEC_REF(connp); 8317 return; 8318 } 8319 8320 /* Initiate IPPF processing */ 8321 if (IP6_IN_IPP(flags)) { 8322 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8323 if (mp == NULL) { 8324 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8325 CONN_DEC_REF(connp); 8326 return; 8327 } 8328 } 8329 8330 if (connp->conn_ipv6_recvpktinfo || 8331 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8332 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8333 if (mp == NULL) { 8334 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8335 CONN_DEC_REF(connp); 8336 return; 8337 } 8338 } 8339 8340 IP6_STAT(ip6_udp_fast_path); 8341 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 8342 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8343 8344 /* Send it upstream */ 8345 CONN_UDP_RECV(connp, mp); 8346 8347 CONN_DEC_REF(connp); 8348 freemsg(hada_mp); 8349 return; 8350 8351 hada_drop: 8352 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8353 /* IPsec kstats: bump counter here */ 8354 freemsg(hada_mp); 8355 freemsg(first_mp); 8356 } 8357 8358 /* 8359 * Reassemble fragment. 8360 * When it returns a completed message the first mblk will only contain 8361 * the headers prior to the fragment header. 8362 * 8363 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8364 * of the preceding header. This is needed to patch the previous header's 8365 * nexthdr field when reassembly completes. 8366 */ 8367 static mblk_t * 8368 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8369 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8370 uint32_t *cksum_val, uint16_t *cksum_flags) 8371 { 8372 ill_t *ill = (ill_t *)q->q_ptr; 8373 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8374 uint16_t offset; 8375 boolean_t more_frags; 8376 uint8_t nexthdr = fraghdr->ip6f_nxt; 8377 in6_addr_t *v6dst_ptr; 8378 in6_addr_t *v6src_ptr; 8379 uint_t end; 8380 uint_t hdr_length; 8381 size_t count; 8382 ipf_t *ipf; 8383 ipf_t **ipfp; 8384 ipfb_t *ipfb; 8385 mblk_t *mp1; 8386 uint8_t ecn_info = 0; 8387 size_t msg_len; 8388 mblk_t *tail_mp; 8389 mblk_t *t_mp; 8390 boolean_t pruned = B_FALSE; 8391 uint32_t sum_val; 8392 uint16_t sum_flags; 8393 8394 8395 if (cksum_val != NULL) 8396 *cksum_val = 0; 8397 if (cksum_flags != NULL) 8398 *cksum_flags = 0; 8399 8400 /* 8401 * We utilize hardware computed checksum info only for UDP since 8402 * IP fragmentation is a normal occurence for the protocol. In 8403 * addition, checksum offload support for IP fragments carrying 8404 * UDP payload is commonly implemented across network adapters. 8405 */ 8406 ASSERT(ill != NULL); 8407 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8408 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8409 mblk_t *mp1 = mp->b_cont; 8410 int32_t len; 8411 8412 /* Record checksum information from the packet */ 8413 sum_val = (uint32_t)DB_CKSUM16(mp); 8414 sum_flags = DB_CKSUMFLAGS(mp); 8415 8416 /* fragmented payload offset from beginning of mblk */ 8417 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8418 8419 if ((sum_flags & HCK_PARTIALCKSUM) && 8420 (mp1 == NULL || mp1->b_cont == NULL) && 8421 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8422 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8423 uint32_t adj; 8424 /* 8425 * Partial checksum has been calculated by hardware 8426 * and attached to the packet; in addition, any 8427 * prepended extraneous data is even byte aligned. 8428 * If any such data exists, we adjust the checksum; 8429 * this would also handle any postpended data. 8430 */ 8431 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8432 mp, mp1, len, adj); 8433 8434 /* One's complement subtract extraneous checksum */ 8435 if (adj >= sum_val) 8436 sum_val = ~(adj - sum_val) & 0xFFFF; 8437 else 8438 sum_val -= adj; 8439 } 8440 } else { 8441 sum_val = 0; 8442 sum_flags = 0; 8443 } 8444 8445 /* Clear hardware checksumming flag */ 8446 DB_CKSUMFLAGS(mp) = 0; 8447 8448 /* 8449 * Note: Fragment offset in header is in 8-octet units. 8450 * Clearing least significant 3 bits not only extracts 8451 * it but also gets it in units of octets. 8452 */ 8453 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8454 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8455 8456 /* 8457 * Is the more frags flag on and the payload length not a multiple 8458 * of eight? 8459 */ 8460 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8461 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8462 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8463 (uint32_t)((char *)&ip6h->ip6_plen - 8464 (char *)ip6h), B_FALSE, B_FALSE); 8465 return (NULL); 8466 } 8467 8468 v6src_ptr = &ip6h->ip6_src; 8469 v6dst_ptr = &ip6h->ip6_dst; 8470 end = remlen; 8471 8472 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8473 end += offset; 8474 8475 /* 8476 * Would fragment cause reassembled packet to have a payload length 8477 * greater than IP_MAXPACKET - the max payload size? 8478 */ 8479 if (end > IP_MAXPACKET) { 8480 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8481 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8482 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8483 (char *)ip6h), B_FALSE, B_FALSE); 8484 return (NULL); 8485 } 8486 8487 /* 8488 * This packet just has one fragment. Reassembly not 8489 * needed. 8490 */ 8491 if (!more_frags && offset == 0) { 8492 goto reass_done; 8493 } 8494 8495 /* 8496 * Drop the fragmented as early as possible, if 8497 * we don't have resource(s) to re-assemble. 8498 */ 8499 if (ip_reass_queue_bytes == 0) { 8500 freemsg(mp); 8501 return (NULL); 8502 } 8503 8504 /* Record the ECN field info. */ 8505 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8506 /* 8507 * If this is not the first fragment, dump the unfragmentable 8508 * portion of the packet. 8509 */ 8510 if (offset) 8511 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8512 8513 /* 8514 * Fragmentation reassembly. Each ILL has a hash table for 8515 * queueing packets undergoing reassembly for all IPIFs 8516 * associated with the ILL. The hash is based on the packet 8517 * IP ident field. The ILL frag hash table was allocated 8518 * as a timer block at the time the ILL was created. Whenever 8519 * there is anything on the reassembly queue, the timer will 8520 * be running. 8521 */ 8522 msg_len = MBLKSIZE(mp); 8523 tail_mp = mp; 8524 while (tail_mp->b_cont != NULL) { 8525 tail_mp = tail_mp->b_cont; 8526 msg_len += MBLKSIZE(tail_mp); 8527 } 8528 /* 8529 * If the reassembly list for this ILL will get too big 8530 * prune it. 8531 */ 8532 8533 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8534 ip_reass_queue_bytes) { 8535 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8536 : (ip_reass_queue_bytes - msg_len)); 8537 pruned = B_TRUE; 8538 } 8539 8540 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8541 mutex_enter(&ipfb->ipfb_lock); 8542 8543 ipfp = &ipfb->ipfb_ipf; 8544 /* Try to find an existing fragment queue for this packet. */ 8545 for (;;) { 8546 ipf = ipfp[0]; 8547 if (ipf) { 8548 /* 8549 * It has to match on ident, source address, and 8550 * dest address. 8551 */ 8552 if (ipf->ipf_ident == ident && 8553 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8554 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8555 8556 /* 8557 * If we have received too many 8558 * duplicate fragments for this packet 8559 * free it. 8560 */ 8561 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8562 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8563 freemsg(mp); 8564 mutex_exit(&ipfb->ipfb_lock); 8565 return (NULL); 8566 } 8567 8568 break; 8569 } 8570 ipfp = &ipf->ipf_hash_next; 8571 continue; 8572 } 8573 8574 8575 /* 8576 * If we pruned the list, do we want to store this new 8577 * fragment?. We apply an optimization here based on the 8578 * fact that most fragments will be received in order. 8579 * So if the offset of this incoming fragment is zero, 8580 * it is the first fragment of a new packet. We will 8581 * keep it. Otherwise drop the fragment, as we have 8582 * probably pruned the packet already (since the 8583 * packet cannot be found). 8584 */ 8585 8586 if (pruned && offset != 0) { 8587 mutex_exit(&ipfb->ipfb_lock); 8588 freemsg(mp); 8589 return (NULL); 8590 } 8591 8592 /* New guy. Allocate a frag message. */ 8593 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8594 if (!mp1) { 8595 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8596 freemsg(mp); 8597 partial_reass_done: 8598 mutex_exit(&ipfb->ipfb_lock); 8599 return (NULL); 8600 } 8601 8602 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8603 /* 8604 * Too many fragmented packets in this hash bucket. 8605 * Free the oldest. 8606 */ 8607 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8608 } 8609 8610 mp1->b_cont = mp; 8611 8612 /* Initialize the fragment header. */ 8613 ipf = (ipf_t *)mp1->b_rptr; 8614 ipf->ipf_mp = mp1; 8615 ipf->ipf_ptphn = ipfp; 8616 ipfp[0] = ipf; 8617 ipf->ipf_hash_next = NULL; 8618 ipf->ipf_ident = ident; 8619 ipf->ipf_v6src = *v6src_ptr; 8620 ipf->ipf_v6dst = *v6dst_ptr; 8621 /* Record reassembly start time. */ 8622 ipf->ipf_timestamp = gethrestime_sec(); 8623 /* Record ipf generation and account for frag header */ 8624 ipf->ipf_gen = ill->ill_ipf_gen++; 8625 ipf->ipf_count = MBLKSIZE(mp1); 8626 ipf->ipf_protocol = nexthdr; 8627 ipf->ipf_nf_hdr_len = 0; 8628 ipf->ipf_prev_nexthdr_offset = 0; 8629 ipf->ipf_last_frag_seen = B_FALSE; 8630 ipf->ipf_ecn = ecn_info; 8631 ipf->ipf_num_dups = 0; 8632 ipfb->ipfb_frag_pkts++; 8633 ipf->ipf_checksum = 0; 8634 ipf->ipf_checksum_flags = 0; 8635 8636 /* Store checksum value in fragment header */ 8637 if (sum_flags != 0) { 8638 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8639 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8640 ipf->ipf_checksum = sum_val; 8641 ipf->ipf_checksum_flags = sum_flags; 8642 } 8643 8644 /* 8645 * We handle reassembly two ways. In the easy case, 8646 * where all the fragments show up in order, we do 8647 * minimal bookkeeping, and just clip new pieces on 8648 * the end. If we ever see a hole, then we go off 8649 * to ip_reassemble which has to mark the pieces and 8650 * keep track of the number of holes, etc. Obviously, 8651 * the point of having both mechanisms is so we can 8652 * handle the easy case as efficiently as possible. 8653 */ 8654 if (offset == 0) { 8655 /* Easy case, in-order reassembly so far. */ 8656 /* Update the byte count */ 8657 ipf->ipf_count += msg_len; 8658 ipf->ipf_tail_mp = tail_mp; 8659 /* 8660 * Keep track of next expected offset in 8661 * ipf_end. 8662 */ 8663 ipf->ipf_end = end; 8664 ipf->ipf_nf_hdr_len = hdr_length; 8665 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8666 } else { 8667 /* Hard case, hole at the beginning. */ 8668 ipf->ipf_tail_mp = NULL; 8669 /* 8670 * ipf_end == 0 means that we have given up 8671 * on easy reassembly. 8672 */ 8673 ipf->ipf_end = 0; 8674 8675 /* Forget checksum offload from now on */ 8676 ipf->ipf_checksum_flags = 0; 8677 8678 /* 8679 * ipf_hole_cnt is set by ip_reassemble. 8680 * ipf_count is updated by ip_reassemble. 8681 * No need to check for return value here 8682 * as we don't expect reassembly to complete or 8683 * fail for the first fragment itself. 8684 */ 8685 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8686 msg_len); 8687 } 8688 /* Update per ipfb and ill byte counts */ 8689 ipfb->ipfb_count += ipf->ipf_count; 8690 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8691 ill->ill_frag_count += ipf->ipf_count; 8692 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8693 /* If the frag timer wasn't already going, start it. */ 8694 mutex_enter(&ill->ill_lock); 8695 ill_frag_timer_start(ill); 8696 mutex_exit(&ill->ill_lock); 8697 goto partial_reass_done; 8698 } 8699 8700 /* 8701 * If the packet's flag has changed (it could be coming up 8702 * from an interface different than the previous, therefore 8703 * possibly different checksum capability), then forget about 8704 * any stored checksum states. Otherwise add the value to 8705 * the existing one stored in the fragment header. 8706 */ 8707 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8708 sum_val += ipf->ipf_checksum; 8709 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8710 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8711 ipf->ipf_checksum = sum_val; 8712 } else if (ipf->ipf_checksum_flags != 0) { 8713 /* Forget checksum offload from now on */ 8714 ipf->ipf_checksum_flags = 0; 8715 } 8716 8717 /* 8718 * We have a new piece of a datagram which is already being 8719 * reassembled. Update the ECN info if all IP fragments 8720 * are ECN capable. If there is one which is not, clear 8721 * all the info. If there is at least one which has CE 8722 * code point, IP needs to report that up to transport. 8723 */ 8724 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8725 if (ecn_info == IPH_ECN_CE) 8726 ipf->ipf_ecn = IPH_ECN_CE; 8727 } else { 8728 ipf->ipf_ecn = IPH_ECN_NECT; 8729 } 8730 8731 if (offset && ipf->ipf_end == offset) { 8732 /* The new fragment fits at the end */ 8733 ipf->ipf_tail_mp->b_cont = mp; 8734 /* Update the byte count */ 8735 ipf->ipf_count += msg_len; 8736 /* Update per ipfb and ill byte counts */ 8737 ipfb->ipfb_count += msg_len; 8738 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8739 ill->ill_frag_count += msg_len; 8740 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8741 if (more_frags) { 8742 /* More to come. */ 8743 ipf->ipf_end = end; 8744 ipf->ipf_tail_mp = tail_mp; 8745 goto partial_reass_done; 8746 } 8747 } else { 8748 /* 8749 * Go do the hard cases. 8750 * Call ip_reassemble(). 8751 */ 8752 int ret; 8753 8754 if (offset == 0) { 8755 if (ipf->ipf_prev_nexthdr_offset == 0) { 8756 ipf->ipf_nf_hdr_len = hdr_length; 8757 ipf->ipf_prev_nexthdr_offset = 8758 *prev_nexthdr_offset; 8759 } 8760 } 8761 /* Save current byte count */ 8762 count = ipf->ipf_count; 8763 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8764 8765 /* Count of bytes added and subtracted (freeb()ed) */ 8766 count = ipf->ipf_count - count; 8767 if (count) { 8768 /* Update per ipfb and ill byte counts */ 8769 ipfb->ipfb_count += count; 8770 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8771 ill->ill_frag_count += count; 8772 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8773 } 8774 if (ret == IP_REASS_PARTIAL) { 8775 goto partial_reass_done; 8776 } else if (ret == IP_REASS_FAILED) { 8777 /* Reassembly failed. Free up all resources */ 8778 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8779 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8780 IP_REASS_SET_START(t_mp, 0); 8781 IP_REASS_SET_END(t_mp, 0); 8782 } 8783 freemsg(mp); 8784 goto partial_reass_done; 8785 } 8786 8787 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8788 } 8789 /* 8790 * We have completed reassembly. Unhook the frag header from 8791 * the reassembly list. 8792 * 8793 * Grab the unfragmentable header length next header value out 8794 * of the first fragment 8795 */ 8796 ASSERT(ipf->ipf_nf_hdr_len != 0); 8797 hdr_length = ipf->ipf_nf_hdr_len; 8798 8799 /* 8800 * Before we free the frag header, record the ECN info 8801 * to report back to the transport. 8802 */ 8803 ecn_info = ipf->ipf_ecn; 8804 8805 /* 8806 * Store the nextheader field in the header preceding the fragment 8807 * header 8808 */ 8809 nexthdr = ipf->ipf_protocol; 8810 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8811 ipfp = ipf->ipf_ptphn; 8812 8813 /* We need to supply these to caller */ 8814 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8815 sum_val = ipf->ipf_checksum; 8816 else 8817 sum_val = 0; 8818 8819 mp1 = ipf->ipf_mp; 8820 count = ipf->ipf_count; 8821 ipf = ipf->ipf_hash_next; 8822 if (ipf) 8823 ipf->ipf_ptphn = ipfp; 8824 ipfp[0] = ipf; 8825 ill->ill_frag_count -= count; 8826 ASSERT(ipfb->ipfb_count >= count); 8827 ipfb->ipfb_count -= count; 8828 ipfb->ipfb_frag_pkts--; 8829 mutex_exit(&ipfb->ipfb_lock); 8830 /* Ditch the frag header. */ 8831 mp = mp1->b_cont; 8832 freeb(mp1); 8833 8834 /* 8835 * Make sure the packet is good by doing some sanity 8836 * check. If bad we can silentely drop the packet. 8837 */ 8838 reass_done: 8839 if (hdr_length < sizeof (ip6_frag_t)) { 8840 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8841 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8842 freemsg(mp); 8843 return (NULL); 8844 } 8845 8846 /* 8847 * Remove the fragment header from the initial header by 8848 * splitting the mblk into the non-fragmentable header and 8849 * everthing after the fragment extension header. This has the 8850 * side effect of putting all the headers that need destination 8851 * processing into the b_cont block-- on return this fact is 8852 * used in order to avoid having to look at the extensions 8853 * already processed. 8854 * 8855 * Note that this code assumes that the unfragmentable portion 8856 * of the header is in the first mblk and increments 8857 * the read pointer past it. If this assumption is broken 8858 * this code fails badly. 8859 */ 8860 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8861 mblk_t *nmp; 8862 8863 if (!(nmp = dupb(mp))) { 8864 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8865 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8866 freemsg(mp); 8867 return (NULL); 8868 } 8869 nmp->b_cont = mp->b_cont; 8870 mp->b_cont = nmp; 8871 nmp->b_rptr += hdr_length; 8872 } 8873 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8874 8875 ip6h = (ip6_t *)mp->b_rptr; 8876 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8877 8878 /* Restore original IP length in header. */ 8879 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8880 /* Record the ECN info. */ 8881 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8882 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8883 8884 /* Reassembly is successful; return checksum information if needed */ 8885 if (cksum_val != NULL) 8886 *cksum_val = sum_val; 8887 if (cksum_flags != NULL) 8888 *cksum_flags = sum_flags; 8889 8890 return (mp); 8891 } 8892 8893 /* 8894 * Walk through the options to see if there is a routing header. 8895 * If present get the destination which is the last address of 8896 * the option. 8897 */ 8898 in6_addr_t 8899 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8900 { 8901 uint8_t nexthdr; 8902 uint8_t *whereptr; 8903 ip6_hbh_t *hbhhdr; 8904 ip6_dest_t *dsthdr; 8905 ip6_rthdr0_t *rthdr; 8906 ip6_frag_t *fraghdr; 8907 int ehdrlen; 8908 int left; 8909 in6_addr_t *ap, rv; 8910 8911 if (is_fragment != NULL) 8912 *is_fragment = B_FALSE; 8913 8914 rv = ip6h->ip6_dst; 8915 8916 nexthdr = ip6h->ip6_nxt; 8917 whereptr = (uint8_t *)&ip6h[1]; 8918 for (;;) { 8919 8920 ASSERT(nexthdr != IPPROTO_RAW); 8921 switch (nexthdr) { 8922 case IPPROTO_HOPOPTS: 8923 hbhhdr = (ip6_hbh_t *)whereptr; 8924 nexthdr = hbhhdr->ip6h_nxt; 8925 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8926 break; 8927 case IPPROTO_DSTOPTS: 8928 dsthdr = (ip6_dest_t *)whereptr; 8929 nexthdr = dsthdr->ip6d_nxt; 8930 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8931 break; 8932 case IPPROTO_ROUTING: 8933 rthdr = (ip6_rthdr0_t *)whereptr; 8934 nexthdr = rthdr->ip6r0_nxt; 8935 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 8936 8937 left = rthdr->ip6r0_segleft; 8938 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 8939 rv = *(ap + left - 1); 8940 /* 8941 * If the caller doesn't care whether the packet 8942 * is a fragment or not, we can stop here since 8943 * we have our destination. 8944 */ 8945 if (is_fragment == NULL) 8946 goto done; 8947 break; 8948 case IPPROTO_FRAGMENT: 8949 fraghdr = (ip6_frag_t *)whereptr; 8950 nexthdr = fraghdr->ip6f_nxt; 8951 ehdrlen = sizeof (ip6_frag_t); 8952 if (is_fragment != NULL) 8953 *is_fragment = B_TRUE; 8954 goto done; 8955 default : 8956 goto done; 8957 } 8958 whereptr += ehdrlen; 8959 } 8960 8961 done: 8962 return (rv); 8963 } 8964 8965 /* 8966 * ip_source_routed_v6: 8967 * This function is called by redirect code in ip_rput_data_v6 to 8968 * know whether this packet is source routed through this node i.e 8969 * whether this node (router) is part of the journey. This 8970 * function is called under two cases : 8971 * 8972 * case 1 : Routing header was processed by this node and 8973 * ip_process_rthdr replaced ip6_dst with the next hop 8974 * and we are forwarding the packet to the next hop. 8975 * 8976 * case 2 : Routing header was not processed by this node and we 8977 * are just forwarding the packet. 8978 * 8979 * For case (1) we don't want to send redirects. For case(2) we 8980 * want to send redirects. 8981 */ 8982 static boolean_t 8983 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 8984 { 8985 uint8_t nexthdr; 8986 in6_addr_t *addrptr; 8987 ip6_rthdr0_t *rthdr; 8988 uint8_t numaddr; 8989 ip6_hbh_t *hbhhdr; 8990 uint_t ehdrlen; 8991 uint8_t *byteptr; 8992 8993 ip2dbg(("ip_source_routed_v6\n")); 8994 nexthdr = ip6h->ip6_nxt; 8995 ehdrlen = IPV6_HDR_LEN; 8996 8997 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 8998 while (nexthdr == IPPROTO_HOPOPTS || 8999 nexthdr == IPPROTO_DSTOPTS) { 9000 byteptr = (uint8_t *)ip6h + ehdrlen; 9001 /* 9002 * Check if we have already processed 9003 * packets or we are just a forwarding 9004 * router which only pulled up msgs up 9005 * to IPV6HDR and one HBH ext header 9006 */ 9007 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9008 ip2dbg(("ip_source_routed_v6: Extension" 9009 " headers not processed\n")); 9010 return (B_FALSE); 9011 } 9012 hbhhdr = (ip6_hbh_t *)byteptr; 9013 nexthdr = hbhhdr->ip6h_nxt; 9014 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9015 } 9016 switch (nexthdr) { 9017 case IPPROTO_ROUTING: 9018 byteptr = (uint8_t *)ip6h + ehdrlen; 9019 /* 9020 * If for some reason, we haven't pulled up 9021 * the routing hdr data mblk, then we must 9022 * not have processed it at all. So for sure 9023 * we are not part of the source routed journey. 9024 */ 9025 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9026 ip2dbg(("ip_source_routed_v6: Routing" 9027 " header not processed\n")); 9028 return (B_FALSE); 9029 } 9030 rthdr = (ip6_rthdr0_t *)byteptr; 9031 /* 9032 * Either we are an intermediate router or the 9033 * last hop before destination and we have 9034 * already processed the routing header. 9035 * If segment_left is greater than or equal to zero, 9036 * then we must be the (numaddr - segleft) entry 9037 * of the routing header. Although ip6r0_segleft 9038 * is a unit8_t variable, we still check for zero 9039 * or greater value, if in case the data type 9040 * is changed someday in future. 9041 */ 9042 if (rthdr->ip6r0_segleft > 0 || 9043 rthdr->ip6r0_segleft == 0) { 9044 ire_t *ire = NULL; 9045 9046 numaddr = rthdr->ip6r0_len / 2; 9047 addrptr = (in6_addr_t *)((char *)rthdr + 9048 sizeof (*rthdr)); 9049 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9050 if (addrptr != NULL) { 9051 ire = ire_ctable_lookup_v6(addrptr, NULL, 9052 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9053 MATCH_IRE_TYPE); 9054 if (ire != NULL) { 9055 ire_refrele(ire); 9056 return (B_TRUE); 9057 } 9058 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9059 } 9060 } 9061 /* FALLTHRU */ 9062 default: 9063 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9064 return (B_FALSE); 9065 } 9066 } 9067 9068 /* 9069 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9070 * Assumes that the following set of headers appear in the first 9071 * mblk: 9072 * ip6i_t (if present) CAN also appear as a separate mblk. 9073 * ip6_t 9074 * Any extension headers 9075 * TCP/UDP/SCTP header (if present) 9076 * The routine can handle an ICMPv6 header that is not in the first mblk. 9077 * 9078 * The order to determine the outgoing interface is as follows: 9079 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9080 * 2. If conn_nofailover_ill is set then use that ill. 9081 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9082 * 4. If q is an ill queue and (link local or multicast destination) then 9083 * use that ill. 9084 * 5. If IPV6_BOUND_IF has been set use that ill. 9085 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9086 * look for the best IRE match for the unspecified group to determine 9087 * the ill. 9088 * 7. For unicast: Just do an IRE lookup for the best match. 9089 */ 9090 void 9091 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9092 { 9093 conn_t *connp = NULL; 9094 queue_t *q = (queue_t *)arg2; 9095 ire_t *ire = NULL; 9096 ire_t *sctp_ire = NULL; 9097 ip6_t *ip6h; 9098 in6_addr_t *v6dstp; 9099 ill_t *ill = NULL; 9100 ipif_t *ipif; 9101 ip6i_t *ip6i; 9102 int cksum_request; /* -1 => normal. */ 9103 /* 1 => Skip TCP/UDP/SCTP checksum */ 9104 /* Otherwise contains insert offset for checksum */ 9105 int unspec_src; 9106 boolean_t do_outrequests; /* Increment OutRequests? */ 9107 mib2_ipv6IfStatsEntry_t *mibptr; 9108 int match_flags = MATCH_IRE_ILL_GROUP; 9109 boolean_t attach_if = B_FALSE; 9110 mblk_t *first_mp; 9111 boolean_t mctl_present; 9112 ipsec_out_t *io; 9113 boolean_t drop_if_delayed = B_FALSE; 9114 boolean_t multirt_need_resolve = B_FALSE; 9115 mblk_t *copy_mp = NULL; 9116 int err; 9117 int ip6i_flags = 0; 9118 zoneid_t zoneid; 9119 ill_t *saved_ill = NULL; 9120 boolean_t conn_lock_held; 9121 boolean_t need_decref = B_FALSE; 9122 9123 /* 9124 * Highest bit in version field is Reachability Confirmation bit 9125 * used by NUD in ip_xmit_v6(). 9126 */ 9127 #ifdef _BIG_ENDIAN 9128 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9129 #else 9130 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9131 #endif 9132 9133 /* 9134 * M_CTL comes from 5 places 9135 * 9136 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9137 * both V4 and V6 datagrams. 9138 * 9139 * 2) AH/ESP sends down M_CTL after doing their job with both 9140 * V4 and V6 datagrams. 9141 * 9142 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9143 * attached. 9144 * 9145 * 4) Notifications from an external resolver (for XRESOLV ifs) 9146 * 9147 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9148 * IPsec hardware acceleration support. 9149 * 9150 * We need to handle (1)'s IPv6 case and (3) here. For the 9151 * IPv4 case in (1), and (2), IPSEC processing has already 9152 * started. The code in ip_wput() already knows how to handle 9153 * continuing IPSEC processing (for IPv4 and IPv6). All other 9154 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9155 * for handling. 9156 */ 9157 first_mp = mp; 9158 mctl_present = B_FALSE; 9159 io = NULL; 9160 9161 /* Multidata transmit? */ 9162 if (DB_TYPE(mp) == M_MULTIDATA) { 9163 /* 9164 * We should never get here, since all Multidata messages 9165 * originating from tcp should have been directed over to 9166 * tcp_multisend() in the first place. 9167 */ 9168 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 9169 freemsg(mp); 9170 return; 9171 } else if (DB_TYPE(mp) == M_CTL) { 9172 uint32_t mctltype = 0; 9173 uint32_t mlen = MBLKL(first_mp); 9174 9175 mp = mp->b_cont; 9176 mctl_present = B_TRUE; 9177 io = (ipsec_out_t *)first_mp->b_rptr; 9178 9179 /* 9180 * Validate this M_CTL message. The only three types of 9181 * M_CTL messages we expect to see in this code path are 9182 * ipsec_out_t or ipsec_in_t structures (allocated as 9183 * ipsec_info_t unions), or ipsec_ctl_t structures. 9184 * The ipsec_out_type and ipsec_in_type overlap in the two 9185 * data structures, and they are either set to IPSEC_OUT 9186 * or IPSEC_IN depending on which data structure it is. 9187 * ipsec_ctl_t is an IPSEC_CTL. 9188 * 9189 * All other M_CTL messages are sent to ip_wput_nondata() 9190 * for handling. 9191 */ 9192 if (mlen >= sizeof (io->ipsec_out_type)) 9193 mctltype = io->ipsec_out_type; 9194 9195 if ((mlen == sizeof (ipsec_ctl_t)) && 9196 (mctltype == IPSEC_CTL)) { 9197 ip_output(Q_TO_CONN(q), first_mp, q, caller); 9198 return; 9199 } 9200 9201 if ((mlen < sizeof (ipsec_info_t)) || 9202 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9203 mp == NULL) { 9204 ip_wput_nondata(NULL, q, first_mp, NULL); 9205 return; 9206 } 9207 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9208 if (q->q_next == NULL) { 9209 ip6h = (ip6_t *)mp->b_rptr; 9210 /* 9211 * For a freshly-generated TCP dgram that needs IPV6 9212 * processing, don't call ip_wput immediately. We can 9213 * tell this by the ipsec_out_proc_begin. In-progress 9214 * IPSEC_OUT messages have proc_begin set to TRUE, 9215 * and we want to send all IPSEC_IN messages to 9216 * ip_wput() for IPsec processing or finishing. 9217 */ 9218 if (mctltype == IPSEC_IN || 9219 IPVER(ip6h) != IPV6_VERSION || 9220 io->ipsec_out_proc_begin) { 9221 mibptr = &ip6_mib; 9222 goto notv6; 9223 } 9224 } 9225 } else if (DB_TYPE(mp) != M_DATA) { 9226 ip_wput_nondata(NULL, q, mp, NULL); 9227 return; 9228 } 9229 9230 ip6h = (ip6_t *)mp->b_rptr; 9231 9232 if (IPVER(ip6h) != IPV6_VERSION) { 9233 mibptr = &ip6_mib; 9234 goto notv6; 9235 } 9236 9237 if (q->q_next != NULL) { 9238 ill = (ill_t *)q->q_ptr; 9239 /* 9240 * We don't know if this ill will be used for IPv6 9241 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9242 * ipif_set_values() sets the ill_isv6 flag to true if 9243 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9244 * just drop the packet. 9245 */ 9246 if (!ill->ill_isv6) { 9247 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9248 "ILLF_IPV6 was set\n")); 9249 freemsg(first_mp); 9250 return; 9251 } 9252 /* For uniformity do a refhold */ 9253 mutex_enter(&ill->ill_lock); 9254 if (!ILL_CAN_LOOKUP(ill)) { 9255 mutex_exit(&ill->ill_lock); 9256 freemsg(first_mp); 9257 return; 9258 } 9259 ill_refhold_locked(ill); 9260 mutex_exit(&ill->ill_lock); 9261 mibptr = ill->ill_ip6_mib; 9262 /* 9263 * ill_ip6_mib is allocated by ipif_set_values() when 9264 * ill_isv6 is set. Thus if ill_isv6 is true, 9265 * ill_ip6_mib had better not be NULL. 9266 */ 9267 ASSERT(mibptr != NULL); 9268 unspec_src = 0; 9269 BUMP_MIB(mibptr, ipv6OutRequests); 9270 do_outrequests = B_FALSE; 9271 } else { 9272 connp = (conn_t *)arg; 9273 ASSERT(connp != NULL); 9274 9275 /* is queue flow controlled? */ 9276 if ((q->q_first || connp->conn_draining) && 9277 (caller == IP_WPUT)) { 9278 /* 9279 * 1) TCP sends down M_CTL for detached connections. 9280 * 2) AH/ESP sends down M_CTL. 9281 * 9282 * We don't flow control either of the above. Only 9283 * UDP and others are flow controlled for which we 9284 * can't have a M_CTL. 9285 */ 9286 ASSERT(first_mp == mp); 9287 (void) putq(q, mp); 9288 return; 9289 } 9290 mibptr = &ip6_mib; 9291 unspec_src = connp->conn_unspec_src; 9292 do_outrequests = B_TRUE; 9293 if (mp->b_flag & MSGHASREF) { 9294 mp->b_flag &= ~MSGHASREF; 9295 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9296 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9297 need_decref = B_TRUE; 9298 } 9299 9300 /* 9301 * If there is a policy, try to attach an ipsec_out in 9302 * the front. At the end, first_mp either points to a 9303 * M_DATA message or IPSEC_OUT message linked to a 9304 * M_DATA message. We have to do it now as we might 9305 * lose the "conn" if we go through ip_newroute. 9306 */ 9307 if (!mctl_present && 9308 (connp->conn_out_enforce_policy || 9309 connp->conn_latch != NULL)) { 9310 ASSERT(first_mp == mp); 9311 /* XXX Any better way to get the protocol fast ? */ 9312 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 9313 connp->conn_ulp)) == NULL)) { 9314 if (need_decref) 9315 CONN_DEC_REF(connp); 9316 return; 9317 } else { 9318 ASSERT(mp->b_datap->db_type == M_CTL); 9319 first_mp = mp; 9320 mp = mp->b_cont; 9321 mctl_present = B_TRUE; 9322 io = (ipsec_out_t *)first_mp->b_rptr; 9323 } 9324 } 9325 } 9326 9327 /* check for alignment and full IPv6 header */ 9328 if (!OK_32PTR((uchar_t *)ip6h) || 9329 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9330 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9331 if (do_outrequests) 9332 BUMP_MIB(mibptr, ipv6OutRequests); 9333 BUMP_MIB(mibptr, ipv6OutDiscards); 9334 freemsg(first_mp); 9335 if (ill != NULL) 9336 ill_refrele(ill); 9337 if (need_decref) 9338 CONN_DEC_REF(connp); 9339 return; 9340 } 9341 v6dstp = &ip6h->ip6_dst; 9342 cksum_request = -1; 9343 ip6i = NULL; 9344 9345 /* 9346 * Once neighbor discovery has completed, ndp_process() will provide 9347 * locally generated packets for which processing can be reattempted. 9348 * In these cases, connp is NULL and the original zone is part of a 9349 * prepended ipsec_out_t. 9350 */ 9351 if (io != NULL) { 9352 zoneid = io->ipsec_out_zoneid; 9353 ASSERT(zoneid != ALL_ZONES); 9354 } else { 9355 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 9356 } 9357 9358 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9359 /* 9360 * This is an ip6i_t header followed by an ip6_hdr. 9361 * Check which fields are set. 9362 * 9363 * When the packet comes from a transport we should have 9364 * all needed headers in the first mblk. However, when 9365 * going through ip_newroute*_v6 the ip6i might be in 9366 * a separate mblk when we return here. In that case 9367 * we pullup everything to ensure that extension and transport 9368 * headers "stay" in the first mblk. 9369 */ 9370 ip6i = (ip6i_t *)ip6h; 9371 ip6i_flags = ip6i->ip6i_flags; 9372 9373 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9374 ((mp->b_wptr - (uchar_t *)ip6i) >= 9375 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9376 9377 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9378 if (!pullupmsg(mp, -1)) { 9379 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9380 if (do_outrequests) 9381 BUMP_MIB(mibptr, ipv6OutRequests); 9382 BUMP_MIB(mibptr, ipv6OutDiscards); 9383 freemsg(first_mp); 9384 if (ill != NULL) 9385 ill_refrele(ill); 9386 if (need_decref) 9387 CONN_DEC_REF(connp); 9388 return; 9389 } 9390 ip6h = (ip6_t *)mp->b_rptr; 9391 v6dstp = &ip6h->ip6_dst; 9392 ip6i = (ip6i_t *)ip6h; 9393 } 9394 ip6h = (ip6_t *)&ip6i[1]; 9395 9396 /* 9397 * Advance rptr past the ip6i_t to get ready for 9398 * transmitting the packet. However, if the packet gets 9399 * passed to ip_newroute*_v6 then rptr is moved back so 9400 * that the ip6i_t header can be inspected when the 9401 * packet comes back here after passing through 9402 * ire_add_then_send. 9403 */ 9404 mp->b_rptr = (uchar_t *)ip6h; 9405 9406 /* 9407 * IP6I_ATTACH_IF is set in this function when we had a 9408 * conn and it was either bound to the IPFF_NOFAILOVER address 9409 * or IPV6_BOUND_PIF was set. These options override other 9410 * options that set the ifindex. We come here with 9411 * IP6I_ATTACH_IF set when we can't find the ire and 9412 * ip_newroute_v6 is feeding the packet for second time. 9413 */ 9414 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9415 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9416 ASSERT(ip6i->ip6i_ifindex != 0); 9417 if (ill != NULL) 9418 ill_refrele(ill); 9419 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9420 NULL, NULL, NULL, NULL); 9421 if (ill == NULL) { 9422 if (do_outrequests) 9423 BUMP_MIB(mibptr, ipv6OutRequests); 9424 BUMP_MIB(mibptr, ipv6OutDiscards); 9425 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9426 ip6i->ip6i_ifindex)); 9427 if (need_decref) 9428 CONN_DEC_REF(connp); 9429 freemsg(first_mp); 9430 return; 9431 } 9432 mibptr = ill->ill_ip6_mib; 9433 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9434 /* 9435 * Preserve the index so that when we return 9436 * from IPSEC processing, we know where to 9437 * send the packet. 9438 */ 9439 if (mctl_present) { 9440 ASSERT(io != NULL); 9441 io->ipsec_out_ill_index = 9442 ip6i->ip6i_ifindex; 9443 } 9444 } 9445 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9446 /* 9447 * This is a multipathing probe packet that has 9448 * been delayed in ND resolution. Drop the 9449 * packet for the reasons mentioned in 9450 * nce_queue_mp() 9451 */ 9452 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9453 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9454 freemsg(first_mp); 9455 ill_refrele(ill); 9456 if (need_decref) 9457 CONN_DEC_REF(connp); 9458 return; 9459 } 9460 } 9461 } 9462 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9463 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9464 9465 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9466 if (secpolicy_net_rawaccess(cr) != 0) { 9467 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9468 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9469 NULL, zoneid, NULL, 9470 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9471 if (ire == NULL) { 9472 if (do_outrequests) 9473 BUMP_MIB(mibptr, 9474 ipv6OutRequests); 9475 BUMP_MIB(mibptr, ipv6OutDiscards); 9476 ip1dbg(("ip_wput_v6: bad source " 9477 "addr\n")); 9478 freemsg(first_mp); 9479 if (ill != NULL) 9480 ill_refrele(ill); 9481 if (need_decref) 9482 CONN_DEC_REF(connp); 9483 return; 9484 } 9485 ire_refrele(ire); 9486 } 9487 /* No need to verify again when using ip_newroute */ 9488 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9489 } 9490 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9491 /* 9492 * Make sure they match since ip_newroute*_v6 etc might 9493 * (unknown to them) inspect ip6i_nexthop when 9494 * they think they access ip6_dst. 9495 */ 9496 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9497 } 9498 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9499 cksum_request = 1; 9500 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9501 cksum_request = ip6i->ip6i_checksum_off; 9502 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9503 unspec_src = 1; 9504 9505 if (do_outrequests && ill != NULL) { 9506 BUMP_MIB(mibptr, ipv6OutRequests); 9507 do_outrequests = B_FALSE; 9508 } 9509 /* 9510 * Store ip6i_t info that we need after we come back 9511 * from IPSEC processing. 9512 */ 9513 if (mctl_present) { 9514 ASSERT(io != NULL); 9515 io->ipsec_out_unspec_src = unspec_src; 9516 } 9517 } 9518 if (connp != NULL && connp->conn_dontroute) 9519 ip6h->ip6_hops = 1; 9520 9521 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9522 goto ipv6multicast; 9523 9524 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9525 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9526 ill_t *conn_outgoing_pill; 9527 9528 conn_outgoing_pill = conn_get_held_ill(connp, 9529 &connp->conn_outgoing_pill, &err); 9530 if (err == ILL_LOOKUP_FAILED) { 9531 if (ill != NULL) 9532 ill_refrele(ill); 9533 if (need_decref) 9534 CONN_DEC_REF(connp); 9535 freemsg(first_mp); 9536 return; 9537 } 9538 if (conn_outgoing_pill != NULL) { 9539 if (ill != NULL) 9540 ill_refrele(ill); 9541 ill = conn_outgoing_pill; 9542 attach_if = B_TRUE; 9543 match_flags = MATCH_IRE_ILL; 9544 mibptr = ill->ill_ip6_mib; 9545 9546 /* 9547 * Check if we need an ire that will not be 9548 * looked up by anybody else i.e. HIDDEN. 9549 */ 9550 if (ill_is_probeonly(ill)) 9551 match_flags |= MATCH_IRE_MARK_HIDDEN; 9552 goto send_from_ill; 9553 } 9554 } 9555 9556 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9557 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9558 ill_t *conn_nofailover_ill; 9559 9560 conn_nofailover_ill = conn_get_held_ill(connp, 9561 &connp->conn_nofailover_ill, &err); 9562 if (err == ILL_LOOKUP_FAILED) { 9563 if (ill != NULL) 9564 ill_refrele(ill); 9565 if (need_decref) 9566 CONN_DEC_REF(connp); 9567 freemsg(first_mp); 9568 return; 9569 } 9570 if (conn_nofailover_ill != NULL) { 9571 if (ill != NULL) 9572 ill_refrele(ill); 9573 ill = conn_nofailover_ill; 9574 attach_if = B_TRUE; 9575 /* 9576 * Assumes that ipc_nofailover_ill is used only for 9577 * multipathing probe packets. These packets are better 9578 * dropped, if they are delayed in ND resolution, for 9579 * the reasons described in nce_queue_mp(). 9580 * IP6I_DROP_IFDELAYED will be set later on in this 9581 * function for this packet. 9582 */ 9583 drop_if_delayed = B_TRUE; 9584 match_flags = MATCH_IRE_ILL; 9585 mibptr = ill->ill_ip6_mib; 9586 9587 /* 9588 * Check if we need an ire that will not be 9589 * looked up by anybody else i.e. HIDDEN. 9590 */ 9591 if (ill_is_probeonly(ill)) 9592 match_flags |= MATCH_IRE_MARK_HIDDEN; 9593 goto send_from_ill; 9594 } 9595 } 9596 9597 /* 9598 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9599 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9600 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9601 */ 9602 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9603 ASSERT(ip6i->ip6i_ifindex != 0); 9604 attach_if = B_TRUE; 9605 ASSERT(ill != NULL); 9606 match_flags = MATCH_IRE_ILL; 9607 9608 /* 9609 * Check if we need an ire that will not be 9610 * looked up by anybody else i.e. HIDDEN. 9611 */ 9612 if (ill_is_probeonly(ill)) 9613 match_flags |= MATCH_IRE_MARK_HIDDEN; 9614 goto send_from_ill; 9615 } 9616 9617 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9618 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9619 ASSERT(ill != NULL); 9620 goto send_from_ill; 9621 } 9622 9623 /* 9624 * 4. If q is an ill queue and (link local or multicast destination) 9625 * then use that ill. 9626 */ 9627 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9628 goto send_from_ill; 9629 } 9630 9631 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9632 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9633 ill_t *conn_outgoing_ill; 9634 9635 conn_outgoing_ill = conn_get_held_ill(connp, 9636 &connp->conn_outgoing_ill, &err); 9637 if (err == ILL_LOOKUP_FAILED) { 9638 if (ill != NULL) 9639 ill_refrele(ill); 9640 if (need_decref) 9641 CONN_DEC_REF(connp); 9642 freemsg(first_mp); 9643 return; 9644 } 9645 if (ill != NULL) 9646 ill_refrele(ill); 9647 ill = conn_outgoing_ill; 9648 mibptr = ill->ill_ip6_mib; 9649 goto send_from_ill; 9650 } 9651 9652 /* 9653 * 6. For unicast: Just do an IRE lookup for the best match. 9654 * If we get here for a link-local address it is rather random 9655 * what interface we pick on a multihomed host. 9656 * *If* there is an IRE_CACHE (and the link-local address 9657 * isn't duplicated on multi links) this will find the IRE_CACHE. 9658 * Otherwise it will use one of the matching IRE_INTERFACE routes 9659 * for the link-local prefix. Hence, applications 9660 * *should* be encouraged to specify an outgoing interface when sending 9661 * to a link local address. 9662 */ 9663 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9664 !connp->conn_fully_bound)) { 9665 /* 9666 * We cache IRE_CACHEs to avoid lookups. We don't do 9667 * this for the tcp global queue and listen end point 9668 * as it does not really have a real destination to 9669 * talk to. 9670 */ 9671 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp)); 9672 } else { 9673 /* 9674 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9675 * grab a lock here to check for CONDEMNED as it is okay 9676 * to send a packet or two with the IRE_CACHE that is going 9677 * away. 9678 */ 9679 mutex_enter(&connp->conn_lock); 9680 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9681 if (ire != NULL && 9682 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9683 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9684 9685 IRE_REFHOLD(ire); 9686 mutex_exit(&connp->conn_lock); 9687 9688 } else { 9689 boolean_t cached = B_FALSE; 9690 9691 connp->conn_ire_cache = NULL; 9692 mutex_exit(&connp->conn_lock); 9693 /* Release the old ire */ 9694 if (ire != NULL && sctp_ire == NULL) 9695 IRE_REFRELE_NOTR(ire); 9696 9697 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9698 MBLK_GETLABEL(mp)); 9699 if (ire != NULL) { 9700 IRE_REFHOLD_NOTR(ire); 9701 9702 mutex_enter(&connp->conn_lock); 9703 if (!(connp->conn_state_flags & CONN_CLOSING) && 9704 (connp->conn_ire_cache == NULL)) { 9705 rw_enter(&ire->ire_bucket->irb_lock, 9706 RW_READER); 9707 if (!(ire->ire_marks & 9708 IRE_MARK_CONDEMNED)) { 9709 connp->conn_ire_cache = ire; 9710 cached = B_TRUE; 9711 } 9712 rw_exit(&ire->ire_bucket->irb_lock); 9713 } 9714 mutex_exit(&connp->conn_lock); 9715 9716 /* 9717 * We can continue to use the ire but since it 9718 * was not cached, we should drop the extra 9719 * reference. 9720 */ 9721 if (!cached) 9722 IRE_REFRELE_NOTR(ire); 9723 } 9724 } 9725 } 9726 9727 if (ire != NULL) { 9728 if (do_outrequests) { 9729 /* Handle IRE_LOCAL's that might appear here */ 9730 if (ire->ire_type == IRE_CACHE) { 9731 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9732 ill_ip6_mib; 9733 } else { 9734 mibptr = ire->ire_ipif->ipif_ill->ill_ip6_mib; 9735 } 9736 BUMP_MIB(mibptr, ipv6OutRequests); 9737 } 9738 ASSERT(!attach_if); 9739 9740 /* 9741 * Check if the ire has the RTF_MULTIRT flag, inherited 9742 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9743 */ 9744 if (ire->ire_flags & RTF_MULTIRT) { 9745 /* 9746 * Force hop limit of multirouted packets if required. 9747 * The hop limit of such packets is bounded by the 9748 * ip_multirt_ttl ndd variable. 9749 * NDP packets must have a hop limit of 255; don't 9750 * change the hop limit in that case. 9751 */ 9752 if ((ip_multirt_ttl > 0) && 9753 (ip6h->ip6_hops > ip_multirt_ttl) && 9754 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9755 if (ip_debug > 3) { 9756 ip2dbg(("ip_wput_v6: forcing multirt " 9757 "hop limit to %d (was %d) ", 9758 ip_multirt_ttl, ip6h->ip6_hops)); 9759 pr_addr_dbg("v6dst %s\n", AF_INET6, 9760 &ire->ire_addr_v6); 9761 } 9762 ip6h->ip6_hops = ip_multirt_ttl; 9763 } 9764 9765 /* 9766 * We look at this point if there are pending 9767 * unresolved routes. ire_multirt_need_resolve_v6() 9768 * checks in O(n) that all IRE_OFFSUBNET ire 9769 * entries for the packet's destination and 9770 * flagged RTF_MULTIRT are currently resolved. 9771 * If some remain unresolved, we do a copy 9772 * of the current message. It will be used 9773 * to initiate additional route resolutions. 9774 */ 9775 multirt_need_resolve = 9776 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9777 MBLK_GETLABEL(first_mp)); 9778 ip2dbg(("ip_wput_v6: ire %p, " 9779 "multirt_need_resolve %d, first_mp %p\n", 9780 (void *)ire, multirt_need_resolve, 9781 (void *)first_mp)); 9782 if (multirt_need_resolve) { 9783 copy_mp = copymsg(first_mp); 9784 if (copy_mp != NULL) { 9785 MULTIRT_DEBUG_TAG(copy_mp); 9786 } 9787 } 9788 } 9789 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9790 connp, caller, 0, ip6i_flags); 9791 if (need_decref) { 9792 CONN_DEC_REF(connp); 9793 connp = NULL; 9794 } 9795 IRE_REFRELE(ire); 9796 9797 /* 9798 * Try to resolve another multiroute if 9799 * ire_multirt_need_resolve_v6() deemed it necessary. 9800 * copy_mp will be consumed (sent or freed) by 9801 * ip_newroute_v6(). 9802 */ 9803 if (copy_mp != NULL) { 9804 if (mctl_present) { 9805 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9806 } else { 9807 ip6h = (ip6_t *)copy_mp->b_rptr; 9808 } 9809 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9810 &ip6h->ip6_src, NULL, zoneid); 9811 } 9812 if (ill != NULL) 9813 ill_refrele(ill); 9814 return; 9815 } 9816 9817 /* 9818 * No full IRE for this destination. Send it to 9819 * ip_newroute_v6 to see if anything else matches. 9820 * Mark this packet as having originated on this 9821 * machine. 9822 * Update rptr if there was an ip6i_t header. 9823 */ 9824 mp->b_prev = NULL; 9825 mp->b_next = NULL; 9826 if (ip6i != NULL) 9827 mp->b_rptr -= sizeof (ip6i_t); 9828 9829 if (unspec_src) { 9830 if (ip6i == NULL) { 9831 /* 9832 * Add ip6i_t header to carry unspec_src 9833 * until the packet comes back in ip_wput_v6. 9834 */ 9835 mp = ip_add_info_v6(mp, NULL, v6dstp); 9836 if (mp == NULL) { 9837 if (do_outrequests) 9838 BUMP_MIB(mibptr, ipv6OutRequests); 9839 BUMP_MIB(mibptr, ipv6OutDiscards); 9840 if (mctl_present) 9841 freeb(first_mp); 9842 if (ill != NULL) 9843 ill_refrele(ill); 9844 if (need_decref) 9845 CONN_DEC_REF(connp); 9846 return; 9847 } 9848 ip6i = (ip6i_t *)mp->b_rptr; 9849 9850 if (mctl_present) { 9851 ASSERT(first_mp != mp); 9852 first_mp->b_cont = mp; 9853 } else { 9854 first_mp = mp; 9855 } 9856 9857 if ((mp->b_wptr - (uchar_t *)ip6i) == 9858 sizeof (ip6i_t)) { 9859 /* 9860 * ndp_resolver called from ip_newroute_v6 9861 * expects pulled up message. 9862 */ 9863 if (!pullupmsg(mp, -1)) { 9864 ip1dbg(("ip_wput_v6: pullupmsg" 9865 " failed\n")); 9866 if (do_outrequests) { 9867 BUMP_MIB(mibptr, 9868 ipv6OutRequests); 9869 } 9870 BUMP_MIB(mibptr, ipv6OutDiscards); 9871 freemsg(first_mp); 9872 if (ill != NULL) 9873 ill_refrele(ill); 9874 if (need_decref) 9875 CONN_DEC_REF(connp); 9876 return; 9877 } 9878 ip6i = (ip6i_t *)mp->b_rptr; 9879 } 9880 ip6h = (ip6_t *)&ip6i[1]; 9881 v6dstp = &ip6h->ip6_dst; 9882 } 9883 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9884 if (mctl_present) { 9885 ASSERT(io != NULL); 9886 io->ipsec_out_unspec_src = unspec_src; 9887 } 9888 } 9889 if (do_outrequests) 9890 BUMP_MIB(mibptr, ipv6OutRequests); 9891 if (need_decref) 9892 CONN_DEC_REF(connp); 9893 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 9894 if (ill != NULL) 9895 ill_refrele(ill); 9896 return; 9897 9898 9899 /* 9900 * Handle multicast packets with or without an conn. 9901 * Assumes that the transports set ip6_hops taking 9902 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9903 * into account. 9904 */ 9905 ipv6multicast: 9906 ip2dbg(("ip_wput_v6: multicast\n")); 9907 9908 /* 9909 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 9910 * 2. If conn_nofailover_ill is set then use that ill. 9911 * 9912 * Hold the conn_lock till we refhold the ill of interest that is 9913 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9914 * while holding any locks, postpone the refrele until after the 9915 * conn_lock is dropped. 9916 */ 9917 if (connp != NULL) { 9918 mutex_enter(&connp->conn_lock); 9919 conn_lock_held = B_TRUE; 9920 } else { 9921 conn_lock_held = B_FALSE; 9922 } 9923 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9924 err = ill_check_and_refhold(connp->conn_outgoing_pill); 9925 if (err == ILL_LOOKUP_FAILED) { 9926 ip1dbg(("ip_output_v6: multicast" 9927 " conn_outgoing_pill no ipif\n")); 9928 multicast_discard: 9929 ASSERT(saved_ill == NULL); 9930 if (conn_lock_held) 9931 mutex_exit(&connp->conn_lock); 9932 if (ill != NULL) 9933 ill_refrele(ill); 9934 freemsg(first_mp); 9935 if (do_outrequests) 9936 BUMP_MIB(mibptr, ipv6OutDiscards); 9937 if (need_decref) 9938 CONN_DEC_REF(connp); 9939 return; 9940 } 9941 saved_ill = ill; 9942 ill = connp->conn_outgoing_pill; 9943 attach_if = B_TRUE; 9944 match_flags = MATCH_IRE_ILL; 9945 mibptr = ill->ill_ip6_mib; 9946 9947 /* 9948 * Check if we need an ire that will not be 9949 * looked up by anybody else i.e. HIDDEN. 9950 */ 9951 if (ill_is_probeonly(ill)) 9952 match_flags |= MATCH_IRE_MARK_HIDDEN; 9953 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9954 err = ill_check_and_refhold(connp->conn_nofailover_ill); 9955 if (err == ILL_LOOKUP_FAILED) { 9956 ip1dbg(("ip_output_v6: multicast" 9957 " conn_nofailover_ill no ipif\n")); 9958 goto multicast_discard; 9959 } 9960 saved_ill = ill; 9961 ill = connp->conn_nofailover_ill; 9962 attach_if = B_TRUE; 9963 match_flags = MATCH_IRE_ILL; 9964 9965 /* 9966 * Check if we need an ire that will not be 9967 * looked up by anybody else i.e. HIDDEN. 9968 */ 9969 if (ill_is_probeonly(ill)) 9970 match_flags |= MATCH_IRE_MARK_HIDDEN; 9971 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9972 /* 9973 * Redo 1. If we did not find an IRE_CACHE the first time, 9974 * we should have an ip6i_t with IP6I_ATTACH_IF if 9975 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 9976 * used on this endpoint. 9977 */ 9978 ASSERT(ip6i->ip6i_ifindex != 0); 9979 attach_if = B_TRUE; 9980 ASSERT(ill != NULL); 9981 match_flags = MATCH_IRE_ILL; 9982 9983 /* 9984 * Check if we need an ire that will not be 9985 * looked up by anybody else i.e. HIDDEN. 9986 */ 9987 if (ill_is_probeonly(ill)) 9988 match_flags |= MATCH_IRE_MARK_HIDDEN; 9989 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9990 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9991 9992 ASSERT(ill != NULL); 9993 } else if (ill != NULL) { 9994 /* 9995 * 4. If q is an ill queue and (link local or multicast 9996 * destination) then use that ill. 9997 * We don't need the ipif initialization here. 9998 * This useless assert below is just to prevent lint from 9999 * reporting a null body if statement. 10000 */ 10001 ASSERT(ill != NULL); 10002 } else if (connp != NULL) { 10003 /* 10004 * 5. If IPV6_BOUND_IF has been set use that ill. 10005 * 10006 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10007 * Otherwise look for the best IRE match for the unspecified 10008 * group to determine the ill. 10009 * 10010 * conn_multicast_ill is used for only IPv6 packets. 10011 * conn_multicast_ipif is used for only IPv4 packets. 10012 * Thus a PF_INET6 socket send both IPv4 and IPv6 10013 * multicast packets using different IP*_MULTICAST_IF 10014 * interfaces. 10015 */ 10016 if (connp->conn_outgoing_ill != NULL) { 10017 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10018 if (err == ILL_LOOKUP_FAILED) { 10019 ip1dbg(("ip_output_v6: multicast" 10020 " conn_outgoing_ill no ipif\n")); 10021 goto multicast_discard; 10022 } 10023 ill = connp->conn_outgoing_ill; 10024 } else if (connp->conn_multicast_ill != NULL) { 10025 err = ill_check_and_refhold(connp->conn_multicast_ill); 10026 if (err == ILL_LOOKUP_FAILED) { 10027 ip1dbg(("ip_output_v6: multicast" 10028 " conn_multicast_ill no ipif\n")); 10029 goto multicast_discard; 10030 } 10031 ill = connp->conn_multicast_ill; 10032 } else { 10033 mutex_exit(&connp->conn_lock); 10034 conn_lock_held = B_FALSE; 10035 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 10036 if (ipif == NULL) { 10037 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10038 goto multicast_discard; 10039 } 10040 /* 10041 * We have a ref to this ipif, so we can safely 10042 * access ipif_ill. 10043 */ 10044 ill = ipif->ipif_ill; 10045 mutex_enter(&ill->ill_lock); 10046 if (!ILL_CAN_LOOKUP(ill)) { 10047 mutex_exit(&ill->ill_lock); 10048 ipif_refrele(ipif); 10049 ill = NULL; 10050 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10051 goto multicast_discard; 10052 } 10053 ill_refhold_locked(ill); 10054 mutex_exit(&ill->ill_lock); 10055 ipif_refrele(ipif); 10056 /* 10057 * Save binding until IPV6_MULTICAST_IF 10058 * changes it 10059 */ 10060 mutex_enter(&connp->conn_lock); 10061 connp->conn_multicast_ill = ill; 10062 connp->conn_orig_multicast_ifindex = 10063 ill->ill_phyint->phyint_ifindex; 10064 mutex_exit(&connp->conn_lock); 10065 } 10066 } 10067 if (conn_lock_held) 10068 mutex_exit(&connp->conn_lock); 10069 10070 if (saved_ill != NULL) 10071 ill_refrele(saved_ill); 10072 10073 ASSERT(ill != NULL); 10074 /* 10075 * For multicast loopback interfaces replace the multicast address 10076 * with a unicast address for the ire lookup. 10077 */ 10078 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 10079 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10080 10081 mibptr = ill->ill_ip6_mib; 10082 if (do_outrequests) { 10083 BUMP_MIB(mibptr, ipv6OutRequests); 10084 do_outrequests = B_FALSE; 10085 } 10086 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10087 10088 /* 10089 * As we may lose the conn by the time we reach ip_wput_ire_v6 10090 * we copy conn_multicast_loop and conn_dontroute on to an 10091 * ipsec_out. In case if this datagram goes out secure, 10092 * we need the ill_index also. Copy that also into the 10093 * ipsec_out. 10094 */ 10095 if (mctl_present) { 10096 io = (ipsec_out_t *)first_mp->b_rptr; 10097 ASSERT(first_mp->b_datap->db_type == M_CTL); 10098 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10099 } else { 10100 ASSERT(mp == first_mp); 10101 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 10102 BUMP_MIB(mibptr, ipv6OutDiscards); 10103 freemsg(mp); 10104 if (ill != NULL) 10105 ill_refrele(ill); 10106 if (need_decref) 10107 CONN_DEC_REF(connp); 10108 return; 10109 } 10110 io = (ipsec_out_t *)first_mp->b_rptr; 10111 /* This is not a secure packet */ 10112 io->ipsec_out_secure = B_FALSE; 10113 io->ipsec_out_use_global_policy = B_TRUE; 10114 io->ipsec_out_zoneid = 10115 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10116 first_mp->b_cont = mp; 10117 mctl_present = B_TRUE; 10118 } 10119 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10120 io->ipsec_out_unspec_src = unspec_src; 10121 if (connp != NULL) 10122 io->ipsec_out_dontroute = connp->conn_dontroute; 10123 10124 send_from_ill: 10125 ASSERT(ill != NULL); 10126 ASSERT(mibptr == ill->ill_ip6_mib); 10127 if (do_outrequests) { 10128 BUMP_MIB(mibptr, ipv6OutRequests); 10129 do_outrequests = B_FALSE; 10130 } 10131 10132 if (io != NULL) 10133 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10134 10135 /* 10136 * When a specific ill is specified (using IPV6_PKTINFO, 10137 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10138 * on routing entries (ftable and ctable) that have a matching 10139 * ire->ire_ipif->ipif_ill. Thus this can only be used 10140 * for destinations that are on-link for the specific ill 10141 * and that can appear on multiple links. Thus it is useful 10142 * for multicast destinations, link-local destinations, and 10143 * at some point perhaps for site-local destinations (if the 10144 * node sits at a site boundary). 10145 * We create the cache entries in the regular ctable since 10146 * it can not "confuse" things for other destinations. 10147 * table. 10148 * 10149 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10150 * It is used only when ire_cache_lookup is used above. 10151 */ 10152 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10153 zoneid, MBLK_GETLABEL(mp), match_flags); 10154 if (ire != NULL) { 10155 /* 10156 * Check if the ire has the RTF_MULTIRT flag, inherited 10157 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10158 */ 10159 if (ire->ire_flags & RTF_MULTIRT) { 10160 /* 10161 * Force hop limit of multirouted packets if required. 10162 * The hop limit of such packets is bounded by the 10163 * ip_multirt_ttl ndd variable. 10164 * NDP packets must have a hop limit of 255; don't 10165 * change the hop limit in that case. 10166 */ 10167 if ((ip_multirt_ttl > 0) && 10168 (ip6h->ip6_hops > ip_multirt_ttl) && 10169 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10170 if (ip_debug > 3) { 10171 ip2dbg(("ip_wput_v6: forcing multirt " 10172 "hop limit to %d (was %d) ", 10173 ip_multirt_ttl, ip6h->ip6_hops)); 10174 pr_addr_dbg("v6dst %s\n", AF_INET6, 10175 &ire->ire_addr_v6); 10176 } 10177 ip6h->ip6_hops = ip_multirt_ttl; 10178 } 10179 10180 /* 10181 * We look at this point if there are pending 10182 * unresolved routes. ire_multirt_need_resolve_v6() 10183 * checks in O(n) that all IRE_OFFSUBNET ire 10184 * entries for the packet's destination and 10185 * flagged RTF_MULTIRT are currently resolved. 10186 * If some remain unresolved, we make a copy 10187 * of the current message. It will be used 10188 * to initiate additional route resolutions. 10189 */ 10190 multirt_need_resolve = 10191 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10192 MBLK_GETLABEL(first_mp)); 10193 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10194 "multirt_need_resolve %d, first_mp %p\n", 10195 (void *)ire, multirt_need_resolve, 10196 (void *)first_mp)); 10197 if (multirt_need_resolve) { 10198 copy_mp = copymsg(first_mp); 10199 if (copy_mp != NULL) { 10200 MULTIRT_DEBUG_TAG(copy_mp); 10201 } 10202 } 10203 } 10204 10205 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10206 ill->ill_name, (void *)ire, 10207 ill->ill_phyint->phyint_ifindex)); 10208 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10209 connp, caller, 10210 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10211 ip6i_flags); 10212 ire_refrele(ire); 10213 if (need_decref) { 10214 CONN_DEC_REF(connp); 10215 connp = NULL; 10216 } 10217 10218 /* 10219 * Try to resolve another multiroute if 10220 * ire_multirt_need_resolve_v6() deemed it necessary. 10221 * copy_mp will be consumed (sent or freed) by 10222 * ip_newroute_[ipif_]v6(). 10223 */ 10224 if (copy_mp != NULL) { 10225 if (mctl_present) { 10226 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10227 } else { 10228 ip6h = (ip6_t *)copy_mp->b_rptr; 10229 } 10230 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10231 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10232 zoneid); 10233 if (ipif == NULL) { 10234 ip1dbg(("ip_wput_v6: No ipif for " 10235 "multicast\n")); 10236 MULTIRT_DEBUG_UNTAG(copy_mp); 10237 freemsg(copy_mp); 10238 return; 10239 } 10240 ip_newroute_ipif_v6(q, copy_mp, ipif, 10241 ip6h->ip6_dst, unspec_src, zoneid); 10242 ipif_refrele(ipif); 10243 } else { 10244 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10245 &ip6h->ip6_src, ill, zoneid); 10246 } 10247 } 10248 if (ill != NULL) 10249 ill_refrele(ill); 10250 return; 10251 } 10252 if (need_decref) { 10253 CONN_DEC_REF(connp); 10254 connp = NULL; 10255 } 10256 10257 /* Update rptr if there was an ip6i_t header. */ 10258 if (ip6i != NULL) 10259 mp->b_rptr -= sizeof (ip6i_t); 10260 if (unspec_src || attach_if) { 10261 if (ip6i == NULL) { 10262 /* 10263 * Add ip6i_t header to carry unspec_src 10264 * or attach_if until the packet comes back in 10265 * ip_wput_v6. 10266 */ 10267 if (mctl_present) { 10268 first_mp->b_cont = 10269 ip_add_info_v6(mp, NULL, v6dstp); 10270 mp = first_mp->b_cont; 10271 if (mp == NULL) 10272 freeb(first_mp); 10273 } else { 10274 first_mp = mp = ip_add_info_v6(mp, NULL, 10275 v6dstp); 10276 } 10277 if (mp == NULL) { 10278 BUMP_MIB(mibptr, ipv6OutDiscards); 10279 if (ill != NULL) 10280 ill_refrele(ill); 10281 return; 10282 } 10283 ip6i = (ip6i_t *)mp->b_rptr; 10284 if ((mp->b_wptr - (uchar_t *)ip6i) == 10285 sizeof (ip6i_t)) { 10286 /* 10287 * ndp_resolver called from ip_newroute_v6 10288 * expects a pulled up message. 10289 */ 10290 if (!pullupmsg(mp, -1)) { 10291 ip1dbg(("ip_wput_v6: pullupmsg" 10292 " failed\n")); 10293 BUMP_MIB(mibptr, ipv6OutDiscards); 10294 freemsg(first_mp); 10295 return; 10296 } 10297 ip6i = (ip6i_t *)mp->b_rptr; 10298 } 10299 ip6h = (ip6_t *)&ip6i[1]; 10300 v6dstp = &ip6h->ip6_dst; 10301 } 10302 if (unspec_src) 10303 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10304 if (attach_if) { 10305 /* 10306 * Bind to nofailover/BOUND_PIF overrides ifindex. 10307 */ 10308 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10309 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10310 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10311 if (drop_if_delayed) { 10312 /* This is a multipathing probe packet */ 10313 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10314 } 10315 } 10316 if (mctl_present) { 10317 ASSERT(io != NULL); 10318 io->ipsec_out_unspec_src = unspec_src; 10319 } 10320 } 10321 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10322 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10323 unspec_src, zoneid); 10324 } else { 10325 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10326 zoneid); 10327 } 10328 if (ill != NULL) 10329 ill_refrele(ill); 10330 return; 10331 10332 notv6: 10333 /* 10334 * XXX implement a IPv4 and IPv6 packet counter per conn and 10335 * switch when ratio exceeds e.g. 10:1 10336 */ 10337 if (q->q_next == NULL) { 10338 connp = Q_TO_CONN(q); 10339 10340 if (IPCL_IS_TCP(connp)) { 10341 /* change conn_send for the tcp_v4_connections */ 10342 connp->conn_send = ip_output; 10343 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10344 /* The 'q' is the default SCTP queue */ 10345 connp = (conn_t *)arg; 10346 } else { 10347 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 10348 } 10349 } 10350 BUMP_MIB(mibptr, ipv6OutIPv4); 10351 (void) ip_output(connp, first_mp, q, caller); 10352 if (ill != NULL) 10353 ill_refrele(ill); 10354 } 10355 10356 static void 10357 ip_wput_v6(queue_t *q, mblk_t *mp) 10358 { 10359 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10360 } 10361 10362 static void 10363 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10364 { 10365 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10366 io->ipsec_out_attach_if = B_TRUE; 10367 io->ipsec_out_ill_index = attach_index; 10368 } 10369 10370 /* 10371 * NULL send-to queue - packet is to be delivered locally. 10372 */ 10373 void 10374 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10375 ire_t *ire, int fanout_flags) 10376 { 10377 uint32_t ports; 10378 mblk_t *mp = first_mp, *first_mp1; 10379 boolean_t mctl_present; 10380 uint8_t nexthdr; 10381 uint16_t hdr_length; 10382 ipsec_out_t *io; 10383 mib2_ipv6IfStatsEntry_t *mibptr; 10384 ilm_t *ilm; 10385 uint_t nexthdr_offset; 10386 10387 if (DB_TYPE(mp) == M_CTL) { 10388 io = (ipsec_out_t *)mp->b_rptr; 10389 if (!io->ipsec_out_secure) { 10390 mp = mp->b_cont; 10391 freeb(first_mp); 10392 first_mp = mp; 10393 mctl_present = B_FALSE; 10394 } else { 10395 mctl_present = B_TRUE; 10396 mp = first_mp->b_cont; 10397 ipsec_out_to_in(first_mp); 10398 } 10399 } else { 10400 mctl_present = B_FALSE; 10401 } 10402 10403 nexthdr = ip6h->ip6_nxt; 10404 mibptr = ill->ill_ip6_mib; 10405 10406 /* Fastpath */ 10407 switch (nexthdr) { 10408 case IPPROTO_TCP: 10409 case IPPROTO_UDP: 10410 case IPPROTO_ICMPV6: 10411 case IPPROTO_SCTP: 10412 hdr_length = IPV6_HDR_LEN; 10413 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10414 (uchar_t *)ip6h); 10415 break; 10416 default: { 10417 uint8_t *nexthdrp; 10418 10419 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10420 &hdr_length, &nexthdrp)) { 10421 /* Malformed packet */ 10422 BUMP_MIB(mibptr, ipv6OutDiscards); 10423 freemsg(first_mp); 10424 return; 10425 } 10426 nexthdr = *nexthdrp; 10427 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10428 break; 10429 } 10430 } 10431 10432 10433 UPDATE_OB_PKT_COUNT(ire); 10434 ire->ire_last_used_time = lbolt; 10435 10436 /* 10437 * Remove reacability confirmation bit from version field 10438 * before looping back the packet. 10439 */ 10440 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10441 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10442 } 10443 10444 switch (nexthdr) { 10445 case IPPROTO_TCP: 10446 if (DB_TYPE(mp) == M_DATA) { 10447 /* 10448 * M_DATA mblk, so init mblk (chain) for 10449 * no struio(). 10450 */ 10451 mblk_t *mp1 = mp; 10452 10453 do { 10454 mp1->b_datap->db_struioflag = 0; 10455 } while ((mp1 = mp1->b_cont) != NULL); 10456 } 10457 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10458 TCP_PORTS_OFFSET); 10459 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10460 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10461 IP_FF_IP6INFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10462 hdr_length, mctl_present, ire->ire_zoneid); 10463 return; 10464 10465 case IPPROTO_UDP: 10466 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10467 UDP_PORTS_OFFSET); 10468 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10469 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO| 10470 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10471 return; 10472 10473 case IPPROTO_SCTP: 10474 { 10475 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10476 10477 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10478 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10479 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO, 10480 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10481 ire->ire_zoneid); 10482 return; 10483 } 10484 case IPPROTO_ICMPV6: { 10485 icmp6_t *icmp6; 10486 10487 /* check for full IPv6+ICMPv6 header */ 10488 if ((mp->b_wptr - mp->b_rptr) < 10489 (hdr_length + ICMP6_MINLEN)) { 10490 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10491 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10492 " failed\n")); 10493 BUMP_MIB(mibptr, ipv6OutDiscards); 10494 freemsg(first_mp); 10495 return; 10496 } 10497 ip6h = (ip6_t *)mp->b_rptr; 10498 } 10499 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10500 10501 /* Update output mib stats */ 10502 icmp_update_out_mib_v6(ill, icmp6); 10503 10504 /* Check variable for testing applications */ 10505 if (ipv6_drop_inbound_icmpv6) { 10506 freemsg(first_mp); 10507 return; 10508 } 10509 /* 10510 * Assume that there is always at least one conn for 10511 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10512 * where there is no conn. 10513 */ 10514 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10515 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10516 /* 10517 * In the multicast case, applications may have 10518 * joined the group from different zones, so we 10519 * need to deliver the packet to each of them. 10520 * Loop through the multicast memberships 10521 * structures (ilm) on the receive ill and send 10522 * a copy of the packet up each matching one. 10523 * However, we don't do this for multicasts sent 10524 * on the loopback interface (PHYI_LOOPBACK flag 10525 * set) as they must stay in the sender's zone. 10526 */ 10527 ILM_WALKER_HOLD(ill); 10528 for (ilm = ill->ill_ilm; ilm != NULL; 10529 ilm = ilm->ilm_next) { 10530 if (ilm->ilm_flags & ILM_DELETED) 10531 continue; 10532 if (!IN6_ARE_ADDR_EQUAL( 10533 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10534 continue; 10535 if ((fanout_flags & 10536 IP_FF_NO_MCAST_LOOP) && 10537 ilm->ilm_zoneid == ire->ire_zoneid) 10538 continue; 10539 if (!ipif_lookup_zoneid(ill, 10540 ilm->ilm_zoneid, IPIF_UP, NULL)) 10541 continue; 10542 10543 first_mp1 = ip_copymsg(first_mp); 10544 if (first_mp1 == NULL) 10545 continue; 10546 icmp_inbound_v6(q, first_mp1, ill, 10547 hdr_length, mctl_present, 10548 IP6_NO_IPPOLICY, ilm->ilm_zoneid); 10549 } 10550 ILM_WALKER_RELE(ill); 10551 } else { 10552 first_mp1 = ip_copymsg(first_mp); 10553 if (first_mp1 != NULL) 10554 icmp_inbound_v6(q, first_mp1, ill, 10555 hdr_length, mctl_present, 10556 IP6_NO_IPPOLICY, ire->ire_zoneid); 10557 } 10558 } 10559 /* FALLTHRU */ 10560 default: { 10561 /* 10562 * Handle protocols with which IPv6 is less intimate. 10563 */ 10564 fanout_flags |= IP_FF_RAWIP|IP_FF_IP6INFO; 10565 10566 /* 10567 * Enable sending ICMP for "Unknown" nexthdr 10568 * case. i.e. where we did not FALLTHRU from 10569 * IPPROTO_ICMPV6 processing case above. 10570 */ 10571 if (nexthdr != IPPROTO_ICMPV6) 10572 fanout_flags |= IP_FF_SEND_ICMP; 10573 /* 10574 * Note: There can be more than one stream bound 10575 * to a particular protocol. When this is the case, 10576 * each one gets a copy of any incoming packets. 10577 */ 10578 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10579 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10580 mctl_present, ire->ire_zoneid); 10581 return; 10582 } 10583 } 10584 } 10585 10586 /* 10587 * Send packet using IRE. 10588 * Checksumming is controlled by cksum_request: 10589 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10590 * 1 => Skip TCP/UDP/SCTP checksum 10591 * Otherwise => checksum_request contains insert offset for checksum 10592 * 10593 * Assumes that the following set of headers appear in the first 10594 * mblk: 10595 * ip6_t 10596 * Any extension headers 10597 * TCP/UDP/SCTP header (if present) 10598 * The routine can handle an ICMPv6 header that is not in the first mblk. 10599 * 10600 * NOTE : This function does not ire_refrele the ire passed in as the 10601 * argument unlike ip_wput_ire where the REFRELE is done. 10602 * Refer to ip_wput_ire for more on this. 10603 */ 10604 static void 10605 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10606 int cksum_request, conn_t *connp, int caller, int attach_index, int flags) 10607 { 10608 ip6_t *ip6h; 10609 uint8_t nexthdr; 10610 uint16_t hdr_length; 10611 uint_t reachable = 0x0; 10612 ill_t *ill; 10613 mib2_ipv6IfStatsEntry_t *mibptr; 10614 mblk_t *first_mp; 10615 boolean_t mctl_present; 10616 ipsec_out_t *io; 10617 boolean_t conn_dontroute; /* conn value for multicast */ 10618 boolean_t conn_multicast_loop; /* conn value for multicast */ 10619 boolean_t multicast_forward; /* Should we forward ? */ 10620 int max_frag; 10621 zoneid_t zoneid; 10622 10623 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 10624 ill = ire_to_ill(ire); 10625 first_mp = mp; 10626 multicast_forward = B_FALSE; 10627 10628 if (mp->b_datap->db_type != M_CTL) { 10629 ip6h = (ip6_t *)first_mp->b_rptr; 10630 } else { 10631 io = (ipsec_out_t *)first_mp->b_rptr; 10632 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10633 /* 10634 * Grab the zone id now because the M_CTL can be discarded by 10635 * ip_wput_ire_parse_ipsec_out() below. 10636 */ 10637 zoneid = io->ipsec_out_zoneid; 10638 ASSERT(zoneid != ALL_ZONES); 10639 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10640 /* 10641 * For the multicast case, ipsec_out carries conn_dontroute and 10642 * conn_multicast_loop as conn may not be available here. We 10643 * need this for multicast loopback and forwarding which is done 10644 * later in the code. 10645 */ 10646 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10647 conn_dontroute = io->ipsec_out_dontroute; 10648 conn_multicast_loop = io->ipsec_out_multicast_loop; 10649 /* 10650 * If conn_dontroute is not set or conn_multicast_loop 10651 * is set, we need to do forwarding/loopback. For 10652 * datagrams from ip_wput_multicast, conn_dontroute is 10653 * set to B_TRUE and conn_multicast_loop is set to 10654 * B_FALSE so that we neither do forwarding nor 10655 * loopback. 10656 */ 10657 if (!conn_dontroute || conn_multicast_loop) 10658 multicast_forward = B_TRUE; 10659 } 10660 } 10661 10662 /* 10663 * If the sender didn't supply the hop limit and there is a default 10664 * unicast hop limit associated with the output interface, we use 10665 * that if the packet is unicast. Interface specific unicast hop 10666 * limits as set via the SIOCSLIFLNKINFO ioctl. 10667 */ 10668 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10669 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10670 ip6h->ip6_hops = ill->ill_max_hops; 10671 } 10672 10673 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10674 ire->ire_zoneid != ALL_ZONES) { 10675 /* 10676 * When a zone sends a packet to another zone, we try to deliver 10677 * the packet under the same conditions as if the destination 10678 * was a real node on the network. To do so, we look for a 10679 * matching route in the forwarding table. 10680 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10681 * ip_newroute_v6() does. 10682 */ 10683 ire_t *src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10684 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10685 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10686 if (src_ire != NULL && 10687 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 10688 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10689 !unspec_src) { 10690 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10691 } 10692 ire_refrele(src_ire); 10693 } else { 10694 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 10695 if (src_ire != NULL) { 10696 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10697 ire_refrele(src_ire); 10698 freemsg(first_mp); 10699 return; 10700 } 10701 ire_refrele(src_ire); 10702 } 10703 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10704 /* Failed */ 10705 freemsg(first_mp); 10706 return; 10707 } 10708 icmp_unreachable_v6(q, first_mp, 10709 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE); 10710 return; 10711 } 10712 } 10713 10714 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10715 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10716 connp, unspec_src); 10717 if (mp == NULL) { 10718 return; 10719 } 10720 } 10721 10722 first_mp = mp; 10723 if (mp->b_datap->db_type == M_CTL) { 10724 io = (ipsec_out_t *)mp->b_rptr; 10725 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10726 mp = mp->b_cont; 10727 mctl_present = B_TRUE; 10728 } else { 10729 mctl_present = B_FALSE; 10730 } 10731 10732 ip6h = (ip6_t *)mp->b_rptr; 10733 nexthdr = ip6h->ip6_nxt; 10734 mibptr = ill->ill_ip6_mib; 10735 10736 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10737 ipif_t *ipif; 10738 10739 /* 10740 * Select the source address using ipif_select_source_v6. 10741 */ 10742 if (attach_index != 0) { 10743 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10744 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10745 } else { 10746 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10747 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10748 } 10749 if (ipif == NULL) { 10750 if (ip_debug > 2) { 10751 /* ip1dbg */ 10752 pr_addr_dbg("ip_wput_ire_v6: no src for " 10753 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10754 printf("ip_wput_ire_v6: interface name %s\n", 10755 ill->ill_name); 10756 } 10757 freemsg(first_mp); 10758 return; 10759 } 10760 ip6h->ip6_src = ipif->ipif_v6src_addr; 10761 ipif_refrele(ipif); 10762 } 10763 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10764 if ((connp != NULL && connp->conn_multicast_loop) || 10765 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10766 ilm_t *ilm; 10767 10768 ILM_WALKER_HOLD(ill); 10769 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10770 ILM_WALKER_RELE(ill); 10771 if (ilm != NULL) { 10772 mblk_t *nmp; 10773 int fanout_flags = 0; 10774 10775 if (connp != NULL && 10776 !connp->conn_multicast_loop) { 10777 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10778 } 10779 ip1dbg(("ip_wput_ire_v6: " 10780 "Loopback multicast\n")); 10781 nmp = ip_copymsg(first_mp); 10782 if (nmp != NULL) { 10783 ip6_t *nip6h; 10784 10785 if (mctl_present) { 10786 nip6h = (ip6_t *) 10787 nmp->b_cont->b_rptr; 10788 } else { 10789 nip6h = (ip6_t *)nmp->b_rptr; 10790 } 10791 /* 10792 * Deliver locally and to every local 10793 * zone, except the sending zone when 10794 * IPV6_MULTICAST_LOOP is disabled. 10795 */ 10796 ip_wput_local_v6(RD(q), ill, nip6h, nmp, 10797 ire, fanout_flags); 10798 } else { 10799 BUMP_MIB(mibptr, ipv6OutDiscards); 10800 ip1dbg(("ip_wput_ire_v6: " 10801 "copymsg failed\n")); 10802 } 10803 } 10804 } 10805 if (ip6h->ip6_hops == 0 || 10806 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10807 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10808 /* 10809 * Local multicast or just loopback on loopback 10810 * interface. 10811 */ 10812 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10813 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10814 freemsg(first_mp); 10815 return; 10816 } 10817 } 10818 10819 if (ire->ire_stq != NULL) { 10820 uint32_t sum; 10821 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10822 ill_phyint->phyint_ifindex; 10823 queue_t *dev_q = ire->ire_stq->q_next; 10824 10825 /* 10826 * non-NULL send-to queue - packet is to be sent 10827 * out an interface. 10828 */ 10829 10830 /* Driver is flow-controlling? */ 10831 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10832 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 10833 /* 10834 * Queue packet if we have an conn to give back 10835 * pressure. We can't queue packets intended for 10836 * hardware acceleration since we've tossed that 10837 * state already. If the packet is being fed back 10838 * from ire_send_v6, we don't know the position in 10839 * the queue to enqueue the packet and we discard 10840 * the packet. 10841 */ 10842 if (ip_output_queue && connp != NULL && 10843 !mctl_present && caller != IRE_SEND) { 10844 if (caller == IP_WSRV) { 10845 connp->conn_did_putbq = 1; 10846 (void) putbq(connp->conn_wq, mp); 10847 conn_drain_insert(connp); 10848 /* 10849 * caller == IP_WSRV implies we are 10850 * the service thread, and the 10851 * queue is already noenabled. 10852 * The check for canput and 10853 * the putbq is not atomic. 10854 * So we need to check again. 10855 */ 10856 if (canput(dev_q)) 10857 connp->conn_did_putbq = 0; 10858 } else { 10859 (void) putq(connp->conn_wq, mp); 10860 } 10861 return; 10862 } 10863 BUMP_MIB(mibptr, ipv6OutDiscards); 10864 freemsg(first_mp); 10865 return; 10866 } 10867 10868 /* 10869 * Look for reachability confirmations from the transport. 10870 */ 10871 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10872 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10873 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10874 if (mctl_present) 10875 io->ipsec_out_reachable = B_TRUE; 10876 } 10877 /* Fastpath */ 10878 switch (nexthdr) { 10879 case IPPROTO_TCP: 10880 case IPPROTO_UDP: 10881 case IPPROTO_ICMPV6: 10882 case IPPROTO_SCTP: 10883 hdr_length = IPV6_HDR_LEN; 10884 break; 10885 default: { 10886 uint8_t *nexthdrp; 10887 10888 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10889 &hdr_length, &nexthdrp)) { 10890 /* Malformed packet */ 10891 BUMP_MIB(mibptr, ipv6OutDiscards); 10892 freemsg(first_mp); 10893 return; 10894 } 10895 nexthdr = *nexthdrp; 10896 break; 10897 } 10898 } 10899 10900 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10901 uint16_t *up; 10902 uint16_t *insp; 10903 10904 /* 10905 * The packet header is processed once for all, even 10906 * in the multirouting case. We disable hardware 10907 * checksum if the packet is multirouted, as it will be 10908 * replicated via several interfaces, and not all of 10909 * them may have this capability. 10910 */ 10911 if (cksum_request == 1 && 10912 !(ire->ire_flags & RTF_MULTIRT)) { 10913 /* Skip the transport checksum */ 10914 goto cksum_done; 10915 } 10916 /* 10917 * Do user-configured raw checksum. 10918 * Compute checksum and insert at offset "cksum_request" 10919 */ 10920 10921 /* check for enough headers for checksum */ 10922 cksum_request += hdr_length; /* offset from rptr */ 10923 if ((mp->b_wptr - mp->b_rptr) < 10924 (cksum_request + sizeof (int16_t))) { 10925 if (!pullupmsg(mp, 10926 cksum_request + sizeof (int16_t))) { 10927 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10928 " failed\n")); 10929 BUMP_MIB(mibptr, ipv6OutDiscards); 10930 freemsg(first_mp); 10931 return; 10932 } 10933 ip6h = (ip6_t *)mp->b_rptr; 10934 } 10935 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10936 ASSERT(((uintptr_t)insp & 0x1) == 0); 10937 up = (uint16_t *)&ip6h->ip6_src; 10938 /* 10939 * icmp has placed length and routing 10940 * header adjustment in *insp. 10941 */ 10942 sum = htons(nexthdr) + 10943 up[0] + up[1] + up[2] + up[3] + 10944 up[4] + up[5] + up[6] + up[7] + 10945 up[8] + up[9] + up[10] + up[11] + 10946 up[12] + up[13] + up[14] + up[15]; 10947 sum = (sum & 0xffff) + (sum >> 16); 10948 *insp = IP_CSUM(mp, hdr_length, sum); 10949 if (*insp == 0) 10950 *insp = 0xFFFF; 10951 } else if (nexthdr == IPPROTO_TCP) { 10952 uint16_t *up; 10953 10954 /* 10955 * Check for full IPv6 header + enough TCP header 10956 * to get at the checksum field. 10957 */ 10958 if ((mp->b_wptr - mp->b_rptr) < 10959 (hdr_length + TCP_CHECKSUM_OFFSET + 10960 TCP_CHECKSUM_SIZE)) { 10961 if (!pullupmsg(mp, hdr_length + 10962 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 10963 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10964 " failed\n")); 10965 BUMP_MIB(mibptr, ipv6OutDiscards); 10966 freemsg(first_mp); 10967 return; 10968 } 10969 ip6h = (ip6_t *)mp->b_rptr; 10970 } 10971 10972 up = (uint16_t *)&ip6h->ip6_src; 10973 /* 10974 * Note: The TCP module has stored the length value 10975 * into the tcp checksum field, so we don't 10976 * need to explicitly sum it in here. 10977 */ 10978 sum = up[0] + up[1] + up[2] + up[3] + 10979 up[4] + up[5] + up[6] + up[7] + 10980 up[8] + up[9] + up[10] + up[11] + 10981 up[12] + up[13] + up[14] + up[15]; 10982 10983 /* Fold the initial sum */ 10984 sum = (sum & 0xffff) + (sum >> 16); 10985 10986 up = (uint16_t *)(((uchar_t *)ip6h) + 10987 hdr_length + TCP_CHECKSUM_OFFSET); 10988 10989 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 10990 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10991 ire->ire_max_frag, mctl_present, sum); 10992 10993 /* Software checksum? */ 10994 if (DB_CKSUMFLAGS(mp) == 0) { 10995 IP6_STAT(ip6_out_sw_cksum); 10996 IP6_STAT_UPDATE(ip6_tcp_out_sw_cksum_bytes, 10997 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10998 hdr_length); 10999 } 11000 } else if (nexthdr == IPPROTO_UDP) { 11001 uint16_t *up; 11002 11003 /* 11004 * check for full IPv6 header + enough UDP header 11005 * to get at the UDP checksum field 11006 */ 11007 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11008 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11009 if (!pullupmsg(mp, hdr_length + 11010 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11011 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11012 " failed\n")); 11013 BUMP_MIB(mibptr, ipv6OutDiscards); 11014 freemsg(first_mp); 11015 return; 11016 } 11017 ip6h = (ip6_t *)mp->b_rptr; 11018 } 11019 up = (uint16_t *)&ip6h->ip6_src; 11020 /* 11021 * Note: The UDP module has stored the length value 11022 * into the udp checksum field, so we don't 11023 * need to explicitly sum it in here. 11024 */ 11025 sum = up[0] + up[1] + up[2] + up[3] + 11026 up[4] + up[5] + up[6] + up[7] + 11027 up[8] + up[9] + up[10] + up[11] + 11028 up[12] + up[13] + up[14] + up[15]; 11029 11030 /* Fold the initial sum */ 11031 sum = (sum & 0xffff) + (sum >> 16); 11032 11033 up = (uint16_t *)(((uchar_t *)ip6h) + 11034 hdr_length + UDP_CHECKSUM_OFFSET); 11035 11036 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11037 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11038 ire->ire_max_frag, mctl_present, sum); 11039 11040 /* Software checksum? */ 11041 if (DB_CKSUMFLAGS(mp) == 0) { 11042 IP6_STAT(ip6_out_sw_cksum); 11043 IP6_STAT_UPDATE(ip6_udp_out_sw_cksum_bytes, 11044 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11045 hdr_length); 11046 } 11047 } else if (nexthdr == IPPROTO_ICMPV6) { 11048 uint16_t *up; 11049 icmp6_t *icmp6; 11050 11051 /* check for full IPv6+ICMPv6 header */ 11052 if ((mp->b_wptr - mp->b_rptr) < 11053 (hdr_length + ICMP6_MINLEN)) { 11054 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11055 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11056 " failed\n")); 11057 BUMP_MIB(mibptr, ipv6OutDiscards); 11058 freemsg(first_mp); 11059 return; 11060 } 11061 ip6h = (ip6_t *)mp->b_rptr; 11062 } 11063 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11064 up = (uint16_t *)&ip6h->ip6_src; 11065 /* 11066 * icmp has placed length and routing 11067 * header adjustment in icmp6_cksum. 11068 */ 11069 sum = htons(IPPROTO_ICMPV6) + 11070 up[0] + up[1] + up[2] + up[3] + 11071 up[4] + up[5] + up[6] + up[7] + 11072 up[8] + up[9] + up[10] + up[11] + 11073 up[12] + up[13] + up[14] + up[15]; 11074 sum = (sum & 0xffff) + (sum >> 16); 11075 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11076 if (icmp6->icmp6_cksum == 0) 11077 icmp6->icmp6_cksum = 0xFFFF; 11078 11079 /* Update output mib stats */ 11080 icmp_update_out_mib_v6(ill, icmp6); 11081 } else if (nexthdr == IPPROTO_SCTP) { 11082 sctp_hdr_t *sctph; 11083 11084 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11085 if (!pullupmsg(mp, hdr_length + 11086 sizeof (*sctph))) { 11087 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11088 " failed\n")); 11089 BUMP_MIB(ill->ill_ip6_mib, 11090 ipv6OutDiscards); 11091 freemsg(mp); 11092 return; 11093 } 11094 ip6h = (ip6_t *)mp->b_rptr; 11095 } 11096 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11097 sctph->sh_chksum = 0; 11098 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11099 } 11100 11101 cksum_done: 11102 /* 11103 * We force the insertion of a fragment header using the 11104 * IPH_FRAG_HDR flag in two cases: 11105 * - after reception of an ICMPv6 "packet too big" message 11106 * with a MTU < 1280 (cf. RFC 2460 section 5) 11107 * - for multirouted IPv6 packets, so that the receiver can 11108 * discard duplicates according to their fragment identifier 11109 * 11110 * Two flags modifed from the API can modify this behavior. 11111 * The first is IPV6_USE_MIN_MTU. With this API the user 11112 * can specify how to manage PMTUD for unicast and multicast. 11113 * 11114 * IPV6_DONTFRAG disallows fragmentation. 11115 */ 11116 max_frag = ire->ire_max_frag; 11117 switch (IP6I_USE_MIN_MTU_API(flags)) { 11118 case IPV6_USE_MIN_MTU_DEFAULT: 11119 case IPV6_USE_MIN_MTU_UNICAST: 11120 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11121 max_frag = IPV6_MIN_MTU; 11122 } 11123 break; 11124 11125 case IPV6_USE_MIN_MTU_NEVER: 11126 max_frag = IPV6_MIN_MTU; 11127 break; 11128 } 11129 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11130 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11131 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11132 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11133 max_frag, B_FALSE, B_TRUE); 11134 return; 11135 } 11136 11137 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11138 (mp->b_cont ? msgdsize(mp) : 11139 mp->b_wptr - (uchar_t *)ip6h)) { 11140 ip0dbg(("Packet length mismatch: %d, %ld\n", 11141 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11142 msgdsize(mp))); 11143 freemsg(first_mp); 11144 return; 11145 } 11146 /* Do IPSEC processing first */ 11147 if (mctl_present) { 11148 if (attach_index != 0) 11149 ipsec_out_attach_if(io, attach_index); 11150 ipsec_out_process(q, first_mp, ire, ill_index); 11151 return; 11152 } 11153 ASSERT(mp->b_prev == NULL); 11154 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11155 ntohs(ip6h->ip6_plen) + 11156 IPV6_HDR_LEN, max_frag)); 11157 ASSERT(mp == first_mp); 11158 /* Initiate IPPF processing */ 11159 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 11160 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11161 if (mp == NULL) { 11162 return; 11163 } 11164 } 11165 ip_wput_frag_v6(mp, ire, reachable, connp, 11166 caller, max_frag); 11167 return; 11168 } 11169 /* Do IPSEC processing first */ 11170 if (mctl_present) { 11171 int extra_len = ipsec_out_extra_length(first_mp); 11172 11173 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11174 max_frag && ip_ulp_cando_pkt2big(nexthdr)) { 11175 /* 11176 * IPsec headers will push the packet over the 11177 * MTU limit. Issue an ICMPv6 Packet Too Big 11178 * message for this packet if the upper-layer 11179 * that issued this packet will be able to 11180 * react to the icmp_pkt2big_v6() that we'll 11181 * generate. 11182 */ 11183 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11184 max_frag, B_FALSE, B_TRUE); 11185 return; 11186 } 11187 if (attach_index != 0) 11188 ipsec_out_attach_if(io, attach_index); 11189 ipsec_out_process(q, first_mp, ire, ill_index); 11190 return; 11191 } 11192 /* 11193 * XXX multicast: add ip_mforward_v6() here. 11194 * Check conn_dontroute 11195 */ 11196 #ifdef lint 11197 /* 11198 * XXX The only purpose of this statement is to avoid lint 11199 * errors. See the above "XXX multicast". When that gets 11200 * fixed, remove this whole #ifdef lint section. 11201 */ 11202 ip3dbg(("multicast forward is %s.\n", 11203 (multicast_forward ? "TRUE" : "FALSE"))); 11204 #endif 11205 11206 UPDATE_OB_PKT_COUNT(ire); 11207 ire->ire_last_used_time = lbolt; 11208 ASSERT(mp == first_mp); 11209 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11210 } else { 11211 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11212 } 11213 } 11214 11215 /* 11216 * Outbound IPv6 fragmentation routine using MDT. 11217 */ 11218 static void 11219 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11220 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11221 { 11222 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11223 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11224 mblk_t *hdr_mp, *md_mp = NULL; 11225 int i1; 11226 multidata_t *mmd; 11227 unsigned char *hdr_ptr, *pld_ptr; 11228 ip_pdescinfo_t pdi; 11229 uint32_t ident; 11230 size_t len; 11231 uint16_t offset; 11232 queue_t *stq = ire->ire_stq; 11233 ill_t *ill = (ill_t *)stq->q_ptr; 11234 11235 ASSERT(DB_TYPE(mp) == M_DATA); 11236 ASSERT(MBLKL(mp) > unfragmentable_len); 11237 11238 /* 11239 * Move read ptr past unfragmentable portion, we don't want this part 11240 * of the data in our fragments. 11241 */ 11242 mp->b_rptr += unfragmentable_len; 11243 11244 /* Calculate how many packets we will send out */ 11245 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11246 pkts = (i1 + max_chunk - 1) / max_chunk; 11247 ASSERT(pkts > 1); 11248 11249 /* Allocate a message block which will hold all the IP Headers. */ 11250 wroff = ip_wroff_extra; 11251 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11252 11253 i1 = pkts * hdr_chunk_len; 11254 /* 11255 * Create the header buffer, Multidata and destination address 11256 * and SAP attribute that should be associated with it. 11257 */ 11258 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11259 ((hdr_mp->b_wptr += i1), 11260 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11261 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11262 freemsg(mp); 11263 if (md_mp == NULL) { 11264 freemsg(hdr_mp); 11265 } else { 11266 free_mmd: IP6_STAT(ip6_frag_mdt_discarded); 11267 freemsg(md_mp); 11268 } 11269 IP6_STAT(ip6_frag_mdt_allocfail); 11270 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11271 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutDiscards, pkts); 11272 return; 11273 } 11274 IP6_STAT(ip6_frag_mdt_allocd); 11275 11276 /* 11277 * Add a payload buffer to the Multidata; this operation must not 11278 * fail, or otherwise our logic in this routine is broken. There 11279 * is no memory allocation done by the routine, so any returned 11280 * failure simply tells us that we've done something wrong. 11281 * 11282 * A failure tells us that either we're adding the same payload 11283 * buffer more than once, or we're trying to add more buffers than 11284 * allowed. None of the above cases should happen, and we panic 11285 * because either there's horrible heap corruption, and/or 11286 * programming mistake. 11287 */ 11288 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11289 goto pbuf_panic; 11290 } 11291 11292 hdr_ptr = hdr_mp->b_rptr; 11293 pld_ptr = mp->b_rptr; 11294 11295 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11296 11297 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11298 11299 /* 11300 * len is the total length of the fragmentable data in this 11301 * datagram. For each fragment sent, we will decrement len 11302 * by the amount of fragmentable data sent in that fragment 11303 * until len reaches zero. 11304 */ 11305 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11306 11307 offset = 0; 11308 prev_nexthdr_offset += wroff; 11309 11310 while (len != 0) { 11311 size_t mlen; 11312 ip6_t *fip6h; 11313 ip6_frag_t *fraghdr; 11314 int error; 11315 11316 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11317 mlen = MIN(len, max_chunk); 11318 len -= mlen; 11319 11320 fip6h = (ip6_t *)(hdr_ptr + wroff); 11321 ASSERT(OK_32PTR(fip6h)); 11322 bcopy(ip6h, fip6h, unfragmentable_len); 11323 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11324 11325 fip6h->ip6_plen = htons((uint16_t)(mlen + 11326 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11327 11328 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11329 unfragmentable_len); 11330 fraghdr->ip6f_nxt = nexthdr; 11331 fraghdr->ip6f_reserved = 0; 11332 fraghdr->ip6f_offlg = htons(offset) | 11333 ((len != 0) ? IP6F_MORE_FRAG : 0); 11334 fraghdr->ip6f_ident = ident; 11335 11336 /* 11337 * Record offset and size of header and data of the next packet 11338 * in the multidata message. 11339 */ 11340 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11341 unfragmentable_len + sizeof (ip6_frag_t), 0); 11342 PDESC_PLD_INIT(&pdi); 11343 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11344 ASSERT(i1 > 0); 11345 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11346 if (i1 == mlen) { 11347 pld_ptr += mlen; 11348 } else { 11349 i1 = mlen - i1; 11350 mp = mp->b_cont; 11351 ASSERT(mp != NULL); 11352 ASSERT(MBLKL(mp) >= i1); 11353 /* 11354 * Attach the next payload message block to the 11355 * multidata message. 11356 */ 11357 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11358 goto pbuf_panic; 11359 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11360 pld_ptr = mp->b_rptr + i1; 11361 } 11362 11363 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11364 KM_NOSLEEP)) == NULL) { 11365 /* 11366 * Any failure other than ENOMEM indicates that we 11367 * have passed in invalid pdesc info or parameters 11368 * to mmd_addpdesc, which must not happen. 11369 * 11370 * EINVAL is a result of failure on boundary checks 11371 * against the pdesc info contents. It should not 11372 * happen, and we panic because either there's 11373 * horrible heap corruption, and/or programming 11374 * mistake. 11375 */ 11376 if (error != ENOMEM) { 11377 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11378 "pdesc logic error detected for " 11379 "mmd %p pinfo %p (%d)\n", 11380 (void *)mmd, (void *)&pdi, error); 11381 /* NOTREACHED */ 11382 } 11383 IP6_STAT(ip6_frag_mdt_addpdescfail); 11384 /* Free unattached payload message blocks as well */ 11385 md_mp->b_cont = mp->b_cont; 11386 goto free_mmd; 11387 } 11388 11389 /* Advance fragment offset. */ 11390 offset += mlen; 11391 11392 /* Advance to location for next header in the buffer. */ 11393 hdr_ptr += hdr_chunk_len; 11394 11395 /* Did we reach the next payload message block? */ 11396 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11397 mp = mp->b_cont; 11398 /* 11399 * Attach the next message block with payload 11400 * data to the multidata message. 11401 */ 11402 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11403 goto pbuf_panic; 11404 pld_ptr = mp->b_rptr; 11405 } 11406 } 11407 11408 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11409 ASSERT(mp->b_wptr == pld_ptr); 11410 11411 /* Update IP statistics */ 11412 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutFragCreates, pkts); 11413 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11414 IP6_STAT_UPDATE(ip6_frag_mdt_pkt_out, pkts); 11415 11416 ire->ire_ob_pkt_count += pkts; 11417 if (ire->ire_ipif != NULL) 11418 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11419 11420 ire->ire_last_used_time = lbolt; 11421 /* Send it down */ 11422 putnext(stq, md_mp); 11423 return; 11424 11425 pbuf_panic: 11426 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11427 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11428 pbuf_idx); 11429 /* NOTREACHED */ 11430 } 11431 11432 /* 11433 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11434 * We have not optimized this in terms of number of mblks 11435 * allocated. For instance, for each fragment sent we always allocate a 11436 * mblk to hold the IPv6 header and fragment header. 11437 * 11438 * Assumes that all the extension headers are contained in the first mblk. 11439 * 11440 * The fragment header is inserted after an hop-by-hop options header 11441 * and after [an optional destinations header followed by] a routing header. 11442 * 11443 * NOTE : This function does not ire_refrele the ire passed in as 11444 * the argument. 11445 */ 11446 void 11447 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11448 int caller, int max_frag) 11449 { 11450 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11451 ip6_t *fip6h; 11452 mblk_t *hmp; 11453 mblk_t *hmp0; 11454 mblk_t *dmp; 11455 ip6_frag_t *fraghdr; 11456 size_t unfragmentable_len; 11457 size_t len; 11458 size_t mlen; 11459 size_t max_chunk; 11460 uint32_t ident; 11461 uint16_t off_flags; 11462 uint16_t offset = 0; 11463 ill_t *ill; 11464 uint8_t nexthdr; 11465 uint_t prev_nexthdr_offset; 11466 uint8_t *ptr; 11467 11468 ASSERT(ire->ire_type == IRE_CACHE); 11469 ill = (ill_t *)ire->ire_stq->q_ptr; 11470 11471 /* 11472 * Determine the length of the unfragmentable portion of this 11473 * datagram. This consists of the IPv6 header, a potential 11474 * hop-by-hop options header, a potential pre-routing-header 11475 * destination options header, and a potential routing header. 11476 */ 11477 nexthdr = ip6h->ip6_nxt; 11478 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11479 ptr = (uint8_t *)&ip6h[1]; 11480 11481 if (nexthdr == IPPROTO_HOPOPTS) { 11482 ip6_hbh_t *hbh_hdr; 11483 uint_t hdr_len; 11484 11485 hbh_hdr = (ip6_hbh_t *)ptr; 11486 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11487 nexthdr = hbh_hdr->ip6h_nxt; 11488 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11489 - (uint8_t *)ip6h; 11490 ptr += hdr_len; 11491 } 11492 if (nexthdr == IPPROTO_DSTOPTS) { 11493 ip6_dest_t *dest_hdr; 11494 uint_t hdr_len; 11495 11496 dest_hdr = (ip6_dest_t *)ptr; 11497 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11498 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11499 nexthdr = dest_hdr->ip6d_nxt; 11500 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11501 - (uint8_t *)ip6h; 11502 ptr += hdr_len; 11503 } 11504 } 11505 if (nexthdr == IPPROTO_ROUTING) { 11506 ip6_rthdr_t *rthdr; 11507 uint_t hdr_len; 11508 11509 rthdr = (ip6_rthdr_t *)ptr; 11510 nexthdr = rthdr->ip6r_nxt; 11511 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11512 - (uint8_t *)ip6h; 11513 hdr_len = 8 * (rthdr->ip6r_len + 1); 11514 ptr += hdr_len; 11515 } 11516 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11517 11518 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11519 sizeof (ip6_frag_t)) & ~7; 11520 11521 /* Check if we can use MDT to send out the frags. */ 11522 ASSERT(!IRE_IS_LOCAL(ire)); 11523 if (ip_multidata_outbound && reachable == 0 && 11524 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11525 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11526 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11527 nexthdr, prev_nexthdr_offset); 11528 return; 11529 } 11530 11531 /* 11532 * Allocate an mblk with enough room for the link-layer 11533 * header, the unfragmentable part of the datagram, and the 11534 * fragment header. This (or a copy) will be used as the 11535 * first mblk for each fragment we send. 11536 */ 11537 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 11538 BPRI_HI); 11539 if (hmp == NULL) { 11540 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11541 freemsg(mp); 11542 return; 11543 } 11544 hmp->b_rptr += ip_wroff_extra; 11545 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11546 11547 fip6h = (ip6_t *)hmp->b_rptr; 11548 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11549 11550 bcopy(ip6h, fip6h, unfragmentable_len); 11551 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11552 11553 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11554 11555 fraghdr->ip6f_nxt = nexthdr; 11556 fraghdr->ip6f_reserved = 0; 11557 fraghdr->ip6f_offlg = 0; 11558 fraghdr->ip6f_ident = htonl(ident); 11559 11560 /* 11561 * len is the total length of the fragmentable data in this 11562 * datagram. For each fragment sent, we will decrement len 11563 * by the amount of fragmentable data sent in that fragment 11564 * until len reaches zero. 11565 */ 11566 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11567 11568 /* 11569 * Move read ptr past unfragmentable portion, we don't want this part 11570 * of the data in our fragments. 11571 */ 11572 mp->b_rptr += unfragmentable_len; 11573 11574 while (len != 0) { 11575 mlen = MIN(len, max_chunk); 11576 len -= mlen; 11577 if (len != 0) { 11578 /* Not last */ 11579 hmp0 = copyb(hmp); 11580 if (hmp0 == NULL) { 11581 freeb(hmp); 11582 freemsg(mp); 11583 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11584 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11585 return; 11586 } 11587 off_flags = IP6F_MORE_FRAG; 11588 } else { 11589 /* Last fragment */ 11590 hmp0 = hmp; 11591 hmp = NULL; 11592 off_flags = 0; 11593 } 11594 fip6h = (ip6_t *)(hmp0->b_rptr); 11595 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11596 11597 fip6h->ip6_plen = htons((uint16_t)(mlen + 11598 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11599 /* 11600 * Note: Optimization alert. 11601 * In IPv6 (and IPv4) protocol header, Fragment Offset 11602 * ("offset") is 13 bits wide and in 8-octet units. 11603 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11604 * it occupies the most significant 13 bits. 11605 * (least significant 13 bits in IPv4). 11606 * We do not do any shifts here. Not shifting is same effect 11607 * as taking offset value in octet units, dividing by 8 and 11608 * then shifting 3 bits left to line it up in place in proper 11609 * place protocol header. 11610 */ 11611 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11612 11613 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11614 /* mp has already been freed by ip_carve_mp() */ 11615 if (hmp != NULL) 11616 freeb(hmp); 11617 freeb(hmp0); 11618 ip1dbg(("ip_carve_mp: failed\n")); 11619 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11620 return; 11621 } 11622 hmp0->b_cont = dmp; 11623 /* Get the priority marking, if any */ 11624 hmp0->b_band = dmp->b_band; 11625 UPDATE_OB_PKT_COUNT(ire); 11626 ire->ire_last_used_time = lbolt; 11627 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11628 caller, NULL); 11629 reachable = 0; /* No need to redo state machine in loop */ 11630 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragCreates); 11631 offset += mlen; 11632 } 11633 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11634 } 11635 11636 /* 11637 * Determine if the ill and multicast aspects of that packets 11638 * "matches" the conn. 11639 */ 11640 boolean_t 11641 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11642 zoneid_t zoneid) 11643 { 11644 ill_t *in_ill; 11645 boolean_t wantpacket = B_TRUE; 11646 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11647 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11648 11649 /* 11650 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11651 * unicast and multicast reception to conn_incoming_ill. 11652 * conn_wantpacket_v6 is called both for unicast and 11653 * multicast. 11654 * 11655 * 1) The unicast copy of the packet can come anywhere in 11656 * the ill group if it is part of the group. Thus, we 11657 * need to check to see whether the ill group matches 11658 * if in_ill is part of a group. 11659 * 11660 * 2) ip_rput does not suppress duplicate multicast packets. 11661 * If there are two interfaces in a ill group and we have 11662 * 2 applications (conns) joined a multicast group G on 11663 * both the interfaces, ilm_lookup_ill filter in ip_rput 11664 * will give us two packets because we join G on both the 11665 * interfaces rather than nominating just one interface 11666 * for receiving multicast like broadcast above. So, 11667 * we have to call ilg_lookup_ill to filter out duplicate 11668 * copies, if ill is part of a group, to supress duplicates. 11669 */ 11670 in_ill = connp->conn_incoming_ill; 11671 if (in_ill != NULL) { 11672 mutex_enter(&connp->conn_lock); 11673 in_ill = connp->conn_incoming_ill; 11674 mutex_enter(&ill->ill_lock); 11675 /* 11676 * No IPMP, and the packet did not arrive on conn_incoming_ill 11677 * OR, IPMP in use and the packet arrived on an IPMP group 11678 * different from the conn_incoming_ill's IPMP group. 11679 * Reject the packet. 11680 */ 11681 if ((in_ill->ill_group == NULL && in_ill != ill) || 11682 (in_ill->ill_group != NULL && 11683 in_ill->ill_group != ill->ill_group)) { 11684 wantpacket = B_FALSE; 11685 } 11686 mutex_exit(&ill->ill_lock); 11687 mutex_exit(&connp->conn_lock); 11688 if (!wantpacket) 11689 return (B_FALSE); 11690 } 11691 11692 if (connp->conn_multi_router) 11693 return (B_TRUE); 11694 11695 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11696 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11697 /* 11698 * Unicast case: we match the conn only if it's in the specified 11699 * zone. 11700 */ 11701 return (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES); 11702 } 11703 11704 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11705 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11706 /* 11707 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11708 * disabled, therefore we don't dispatch the multicast packet to 11709 * the sending zone. 11710 */ 11711 return (B_FALSE); 11712 } 11713 11714 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 11715 connp->conn_zoneid != zoneid && zoneid != ALL_ZONES) { 11716 /* 11717 * Multicast packet on the loopback interface: we only match 11718 * conns who joined the group in the specified zone. 11719 */ 11720 return (B_FALSE); 11721 } 11722 11723 mutex_enter(&connp->conn_lock); 11724 wantpacket = 11725 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11726 mutex_exit(&connp->conn_lock); 11727 11728 return (wantpacket); 11729 } 11730 11731 11732 /* 11733 * Transmit a packet and update any NUD state based on the flags 11734 * XXX need to "recover" any ip6i_t when doing putq! 11735 * 11736 * NOTE : This function does not ire_refrele the ire passed in as the 11737 * argument. 11738 */ 11739 void 11740 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11741 int caller, ipsec_out_t *io) 11742 { 11743 mblk_t *mp1; 11744 nce_t *nce = ire->ire_nce; 11745 ill_t *ill; 11746 uint64_t delta; 11747 ip6_t *ip6h; 11748 queue_t *stq = ire->ire_stq; 11749 ire_t *ire1 = NULL; 11750 ire_t *save_ire = ire; 11751 boolean_t multirt_send = B_FALSE; 11752 mblk_t *next_mp = NULL; 11753 11754 ip6h = (ip6_t *)mp->b_rptr; 11755 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11756 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11757 ASSERT(nce != NULL); 11758 ASSERT(mp->b_datap->db_type == M_DATA); 11759 ASSERT(stq != NULL); 11760 11761 ill = ire_to_ill(ire); 11762 if (!ill) { 11763 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11764 freemsg(mp); 11765 return; 11766 } 11767 11768 /* 11769 * If a packet is to be sent out an interface that is a 6to4 11770 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11771 * destination, must be checked to have a 6to4 prefix 11772 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11773 * address configured on the sending interface. Otherwise, 11774 * the packet was delivered to this interface in error and the 11775 * packet must be dropped. 11776 */ 11777 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11778 ipif_t *ipif = ill->ill_ipif; 11779 11780 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11781 &ip6h->ip6_dst)) { 11782 if (ip_debug > 2) { 11783 /* ip1dbg */ 11784 pr_addr_dbg("ip_xmit_v6: attempting to " 11785 "send 6to4 addressed IPv6 " 11786 "destination (%s) out the wrong " 11787 "interface.\n", AF_INET6, 11788 &ip6h->ip6_dst); 11789 } 11790 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 11791 freemsg(mp); 11792 return; 11793 } 11794 } 11795 11796 /* Flow-control check has been done in ip_wput_ire_v6 */ 11797 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11798 caller == IP_WSRV || canput(stq->q_next)) { 11799 uint32_t ill_index; 11800 11801 /* 11802 * In most cases, the emission loop below is entered only 11803 * once. Only in the case where the ire holds the 11804 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11805 * flagged ires in the bucket, and send the packet 11806 * through all crossed RTF_MULTIRT routes. 11807 */ 11808 if (ire->ire_flags & RTF_MULTIRT) { 11809 /* 11810 * Multirouting case. The bucket where ire is stored 11811 * probably holds other RTF_MULTIRT flagged ires 11812 * to the destination. In this call to ip_xmit_v6, 11813 * we attempt to send the packet through all 11814 * those ires. Thus, we first ensure that ire is the 11815 * first RTF_MULTIRT ire in the bucket, 11816 * before walking the ire list. 11817 */ 11818 ire_t *first_ire; 11819 irb_t *irb = ire->ire_bucket; 11820 ASSERT(irb != NULL); 11821 multirt_send = B_TRUE; 11822 11823 /* Make sure we do not omit any multiroute ire. */ 11824 IRB_REFHOLD(irb); 11825 for (first_ire = irb->irb_ire; 11826 first_ire != NULL; 11827 first_ire = first_ire->ire_next) { 11828 if ((first_ire->ire_flags & RTF_MULTIRT) && 11829 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11830 &ire->ire_addr_v6)) && 11831 !(first_ire->ire_marks & 11832 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 11833 break; 11834 } 11835 11836 if ((first_ire != NULL) && (first_ire != ire)) { 11837 IRE_REFHOLD(first_ire); 11838 /* ire will be released by the caller */ 11839 ire = first_ire; 11840 nce = ire->ire_nce; 11841 stq = ire->ire_stq; 11842 ill = ire_to_ill(ire); 11843 } 11844 IRB_REFRELE(irb); 11845 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11846 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11847 ILL_MDT_USABLE(ill)) { 11848 /* 11849 * This tcp connection was marked as MDT-capable, but 11850 * it has been turned off due changes in the interface. 11851 * Now that the interface support is back, turn it on 11852 * by notifying tcp. We don't directly modify tcp_mdt, 11853 * since we leave all the details to the tcp code that 11854 * knows better. 11855 */ 11856 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11857 11858 if (mdimp == NULL) { 11859 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11860 "connp %p (ENOMEM)\n", (void *)connp)); 11861 } else { 11862 CONN_INC_REF(connp); 11863 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 11864 connp, SQTAG_TCP_INPUT_MCTL); 11865 } 11866 } 11867 11868 do { 11869 boolean_t qos_done = B_FALSE; 11870 11871 if (multirt_send) { 11872 irb_t *irb; 11873 /* 11874 * We are in a multiple send case, need to get 11875 * the next ire and make a duplicate of the 11876 * packet. ire1 holds here the next ire to 11877 * process in the bucket. If multirouting is 11878 * expected, any non-RTF_MULTIRT ire that has 11879 * the right destination address is ignored. 11880 */ 11881 irb = ire->ire_bucket; 11882 ASSERT(irb != NULL); 11883 11884 IRB_REFHOLD(irb); 11885 for (ire1 = ire->ire_next; 11886 ire1 != NULL; 11887 ire1 = ire1->ire_next) { 11888 if (!(ire1->ire_flags & RTF_MULTIRT)) 11889 continue; 11890 if (!IN6_ARE_ADDR_EQUAL( 11891 &ire1->ire_addr_v6, 11892 &ire->ire_addr_v6)) 11893 continue; 11894 if (ire1->ire_marks & 11895 (IRE_MARK_CONDEMNED| 11896 IRE_MARK_HIDDEN)) 11897 continue; 11898 11899 /* Got one */ 11900 if (ire1 != save_ire) { 11901 IRE_REFHOLD(ire1); 11902 } 11903 break; 11904 } 11905 IRB_REFRELE(irb); 11906 11907 if (ire1 != NULL) { 11908 next_mp = copyb(mp); 11909 if ((next_mp == NULL) || 11910 ((mp->b_cont != NULL) && 11911 ((next_mp->b_cont = 11912 dupmsg(mp->b_cont)) == 11913 NULL))) { 11914 freemsg(next_mp); 11915 next_mp = NULL; 11916 ire_refrele(ire1); 11917 ire1 = NULL; 11918 } 11919 } 11920 11921 /* Last multiroute ire; don't loop anymore. */ 11922 if (ire1 == NULL) { 11923 multirt_send = B_FALSE; 11924 } 11925 } 11926 11927 ill_index = 11928 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11929 11930 /* 11931 * Check for fastpath, we need to hold nce_lock to 11932 * prevent fastpath update from chaining nce_fp_mp. 11933 */ 11934 mutex_enter(&nce->nce_lock); 11935 if ((mp1 = nce->nce_fp_mp) != NULL) { 11936 uint32_t hlen; 11937 uchar_t *rptr; 11938 11939 /* Initiate IPPF processing */ 11940 if (IP6_OUT_IPP(flags)) { 11941 /* 11942 * We have to release the nce lock since 11943 * IPPF components use 11944 * ill_lookup_on_ifindex(), 11945 * which takes the ill_g_lock and the 11946 * ill_lock locks. 11947 */ 11948 mutex_exit(&nce->nce_lock); 11949 ip_process(IPP_LOCAL_OUT, &mp, 11950 ill_index); 11951 if (mp == NULL) { 11952 BUMP_MIB( 11953 ill->ill_ip6_mib, 11954 ipv6OutDiscards); 11955 if (next_mp != NULL) 11956 freemsg(next_mp); 11957 if (ire != save_ire) { 11958 ire_refrele(ire); 11959 } 11960 return; 11961 } 11962 mutex_enter(&nce->nce_lock); 11963 if ((mp1 = nce->nce_fp_mp) == NULL) { 11964 /* 11965 * Probably disappeared during 11966 * IPQoS processing. 11967 */ 11968 qos_done = B_TRUE; 11969 goto prepend_unitdata; 11970 } 11971 } 11972 hlen = MBLKL(mp1); 11973 rptr = mp->b_rptr - hlen; 11974 /* 11975 * make sure there is room for the fastpath 11976 * datalink header 11977 */ 11978 if (rptr < mp->b_datap->db_base) { 11979 mp1 = copyb(mp1); 11980 if (mp1 == NULL) { 11981 mutex_exit(&nce->nce_lock); 11982 BUMP_MIB(ill->ill_ip6_mib, 11983 ipv6OutDiscards); 11984 freemsg(mp); 11985 if (next_mp != NULL) 11986 freemsg(next_mp); 11987 if (ire != save_ire) { 11988 ire_refrele(ire); 11989 } 11990 return; 11991 } 11992 mp1->b_cont = mp; 11993 11994 /* Get the priority marking, if any */ 11995 mp1->b_band = mp->b_band; 11996 mp = mp1; 11997 } else { 11998 mp->b_rptr = rptr; 11999 /* 12000 * fastpath - pre-pend datalink 12001 * header 12002 */ 12003 bcopy(mp1->b_rptr, rptr, hlen); 12004 } 12005 12006 mutex_exit(&nce->nce_lock); 12007 12008 } else { 12009 prepend_unitdata: 12010 mutex_exit(&nce->nce_lock); 12011 mp1 = nce->nce_res_mp; 12012 if (mp1 == NULL) { 12013 ip1dbg(("ip_xmit_v6: No resolution " 12014 "block ire = %p\n", (void *)ire)); 12015 freemsg(mp); 12016 if (next_mp != NULL) 12017 freemsg(next_mp); 12018 if (ire != save_ire) { 12019 ire_refrele(ire); 12020 } 12021 return; 12022 } 12023 /* 12024 * Prepend the DL_UNITDATA_REQ. 12025 */ 12026 mp1 = copyb(mp1); 12027 if (mp1 == NULL) { 12028 BUMP_MIB(ill->ill_ip6_mib, 12029 ipv6OutDiscards); 12030 freemsg(mp); 12031 if (next_mp != NULL) 12032 freemsg(next_mp); 12033 if (ire != save_ire) { 12034 ire_refrele(ire); 12035 } 12036 return; 12037 } 12038 mp1->b_cont = mp; 12039 mp = mp1; 12040 /* 12041 * Initiate IPPF processing, if it is 12042 * already done, bypass. 12043 */ 12044 if (!qos_done && IP6_OUT_IPP(flags)) { 12045 ip_process(IPP_LOCAL_OUT, &mp, 12046 ill_index); 12047 if (mp == NULL) { 12048 BUMP_MIB(ill->ill_ip6_mib, 12049 ipv6OutDiscards); 12050 if (next_mp != NULL) 12051 freemsg(next_mp); 12052 if (ire != save_ire) { 12053 ire_refrele(ire); 12054 } 12055 return; 12056 } 12057 } 12058 } 12059 12060 /* 12061 * Update ire counters; for save_ire, this has been 12062 * done by the caller. 12063 */ 12064 if (ire != save_ire) { 12065 UPDATE_OB_PKT_COUNT(ire); 12066 ire->ire_last_used_time = lbolt; 12067 } 12068 12069 /* 12070 * Send it down. XXX Do we want to flow control AH/ESP 12071 * packets that carry TCP payloads? We don't flow 12072 * control TCP packets, but we should also not 12073 * flow-control TCP packets that have been protected. 12074 * We don't have an easy way to find out if an AH/ESP 12075 * packet was originally TCP or not currently. 12076 */ 12077 if (io == NULL) { 12078 putnext(stq, mp); 12079 } else { 12080 /* 12081 * Safety Pup says: make sure this is 12082 * going to the right interface! 12083 */ 12084 if (io->ipsec_out_capab_ill_index != 12085 ill_index) { 12086 /* IPsec kstats: bump lose counter */ 12087 freemsg(mp1); 12088 } else { 12089 ipsec_hw_putnext(stq, mp); 12090 } 12091 } 12092 12093 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12094 if (ire != save_ire) { 12095 ire_refrele(ire); 12096 } 12097 if (multirt_send) { 12098 ASSERT(ire1 != NULL); 12099 /* 12100 * Proceed with the next RTF_MULTIRT 12101 * ire, also set up the send-to queue 12102 * accordingly. 12103 */ 12104 ire = ire1; 12105 ire1 = NULL; 12106 stq = ire->ire_stq; 12107 nce = ire->ire_nce; 12108 ill = ire_to_ill(ire); 12109 mp = next_mp; 12110 next_mp = NULL; 12111 continue; 12112 } 12113 ASSERT(next_mp == NULL); 12114 ASSERT(ire1 == NULL); 12115 return; 12116 } 12117 12118 ASSERT(nce->nce_state != ND_INCOMPLETE); 12119 12120 /* 12121 * Check for upper layer advice 12122 */ 12123 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12124 /* 12125 * It should be o.k. to check the state without 12126 * a lock here, at most we lose an advice. 12127 */ 12128 nce->nce_last = TICK_TO_MSEC(lbolt64); 12129 if (nce->nce_state != ND_REACHABLE) { 12130 12131 mutex_enter(&nce->nce_lock); 12132 nce->nce_state = ND_REACHABLE; 12133 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12134 mutex_exit(&nce->nce_lock); 12135 (void) untimeout(nce->nce_timeout_id); 12136 if (ip_debug > 2) { 12137 /* ip1dbg */ 12138 pr_addr_dbg("ip_xmit_v6: state" 12139 " for %s changed to" 12140 " REACHABLE\n", AF_INET6, 12141 &ire->ire_addr_v6); 12142 } 12143 } 12144 if (ire != save_ire) { 12145 ire_refrele(ire); 12146 } 12147 if (multirt_send) { 12148 ASSERT(ire1 != NULL); 12149 /* 12150 * Proceed with the next RTF_MULTIRT 12151 * ire, also set up the send-to queue 12152 * accordingly. 12153 */ 12154 ire = ire1; 12155 ire1 = NULL; 12156 stq = ire->ire_stq; 12157 nce = ire->ire_nce; 12158 ill = ire_to_ill(ire); 12159 mp = next_mp; 12160 next_mp = NULL; 12161 continue; 12162 } 12163 ASSERT(next_mp == NULL); 12164 ASSERT(ire1 == NULL); 12165 return; 12166 } 12167 12168 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12169 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12170 " ill_reachable_time = %d \n", delta, 12171 ill->ill_reachable_time)); 12172 if (delta > (uint64_t)ill->ill_reachable_time) { 12173 nce = ire->ire_nce; 12174 mutex_enter(&nce->nce_lock); 12175 switch (nce->nce_state) { 12176 case ND_REACHABLE: 12177 case ND_STALE: 12178 /* 12179 * ND_REACHABLE is identical to 12180 * ND_STALE in this specific case. If 12181 * reachable time has expired for this 12182 * neighbor (delta is greater than 12183 * reachable time), conceptually, the 12184 * neighbor cache is no longer in 12185 * REACHABLE state, but already in 12186 * STALE state. So the correct 12187 * transition here is to ND_DELAY. 12188 */ 12189 nce->nce_state = ND_DELAY; 12190 mutex_exit(&nce->nce_lock); 12191 NDP_RESTART_TIMER(nce, 12192 delay_first_probe_time); 12193 if (ip_debug > 3) { 12194 /* ip2dbg */ 12195 pr_addr_dbg("ip_xmit_v6: state" 12196 " for %s changed to" 12197 " DELAY\n", AF_INET6, 12198 &ire->ire_addr_v6); 12199 } 12200 break; 12201 case ND_DELAY: 12202 case ND_PROBE: 12203 mutex_exit(&nce->nce_lock); 12204 /* Timers have already started */ 12205 break; 12206 case ND_UNREACHABLE: 12207 /* 12208 * ndp timer has detected that this nce 12209 * is unreachable and initiated deleting 12210 * this nce and all its associated IREs. 12211 * This is a race where we found the 12212 * ire before it was deleted and have 12213 * just sent out a packet using this 12214 * unreachable nce. 12215 */ 12216 mutex_exit(&nce->nce_lock); 12217 break; 12218 default: 12219 ASSERT(0); 12220 } 12221 } 12222 12223 if (multirt_send) { 12224 ASSERT(ire1 != NULL); 12225 /* 12226 * Proceed with the next RTF_MULTIRT ire, 12227 * Also set up the send-to queue accordingly. 12228 */ 12229 if (ire != save_ire) { 12230 ire_refrele(ire); 12231 } 12232 ire = ire1; 12233 ire1 = NULL; 12234 stq = ire->ire_stq; 12235 nce = ire->ire_nce; 12236 ill = ire_to_ill(ire); 12237 mp = next_mp; 12238 next_mp = NULL; 12239 } 12240 } while (multirt_send); 12241 /* 12242 * In the multirouting case, release the last ire used for 12243 * emission. save_ire will be released by the caller. 12244 */ 12245 if (ire != save_ire) { 12246 ire_refrele(ire); 12247 } 12248 } else { 12249 /* 12250 * Queue packet if we have an conn to give back pressure. 12251 * We can't queue packets intended for hardware acceleration 12252 * since we've tossed that state already. If the packet is 12253 * being fed back from ire_send_v6, we don't know the 12254 * position in the queue to enqueue the packet and we discard 12255 * the packet. 12256 */ 12257 if (ip_output_queue && (connp != NULL) && (io == NULL) && 12258 (caller != IRE_SEND)) { 12259 if (caller == IP_WSRV) { 12260 connp->conn_did_putbq = 1; 12261 (void) putbq(connp->conn_wq, mp); 12262 conn_drain_insert(connp); 12263 /* 12264 * caller == IP_WSRV implies we are 12265 * the service thread, and the 12266 * queue is already noenabled. 12267 * The check for canput and 12268 * the putbq is not atomic. 12269 * So we need to check again. 12270 */ 12271 if (canput(stq->q_next)) 12272 connp->conn_did_putbq = 0; 12273 } else { 12274 (void) putq(connp->conn_wq, mp); 12275 } 12276 return; 12277 } 12278 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 12279 freemsg(mp); 12280 return; 12281 } 12282 } 12283 12284 /* 12285 * pr_addr_dbg function provides the needed buffer space to call 12286 * inet_ntop() function's 3rd argument. This function should be 12287 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12288 * stack buffer space in it's own stack frame. This function uses 12289 * a buffer from it's own stack and prints the information. 12290 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12291 * 12292 * Note: This function can call inet_ntop() once. 12293 */ 12294 void 12295 pr_addr_dbg(char *fmt1, int af, const void *addr) 12296 { 12297 char buf[INET6_ADDRSTRLEN]; 12298 12299 if (fmt1 == NULL) { 12300 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12301 return; 12302 } 12303 12304 /* 12305 * This does not compare debug level and just prints 12306 * out. Thus it is the responsibility of the caller 12307 * to check the appropriate debug-level before calling 12308 * this function. 12309 */ 12310 if (ip_debug > 0) { 12311 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12312 } 12313 12314 12315 } 12316 12317 12318 /* 12319 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12320 * if needed and extension headers) that will be needed based on the 12321 * ip6_pkt_t structure passed by the caller. 12322 * 12323 * The returned length does not include the length of the upper level 12324 * protocol (ULP) header. 12325 */ 12326 int 12327 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12328 { 12329 int len; 12330 12331 len = IPV6_HDR_LEN; 12332 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12333 len += sizeof (ip6i_t); 12334 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12335 ASSERT(ipp->ipp_hopoptslen != 0); 12336 len += ipp->ipp_hopoptslen; 12337 } 12338 if (ipp->ipp_fields & IPPF_RTHDR) { 12339 ASSERT(ipp->ipp_rthdrlen != 0); 12340 len += ipp->ipp_rthdrlen; 12341 } 12342 /* 12343 * En-route destination options 12344 * Only do them if there's a routing header as well 12345 */ 12346 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12347 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12348 ASSERT(ipp->ipp_rtdstoptslen != 0); 12349 len += ipp->ipp_rtdstoptslen; 12350 } 12351 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12352 ASSERT(ipp->ipp_dstoptslen != 0); 12353 len += ipp->ipp_dstoptslen; 12354 } 12355 return (len); 12356 } 12357 12358 /* 12359 * All-purpose routine to build a header chain of an IPv6 header 12360 * followed by any required extension headers and a proto header, 12361 * preceeded (where necessary) by an ip6i_t private header. 12362 * 12363 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12364 * will be filled in appropriately. 12365 * Thus the caller must fill in the rest of the IPv6 header, such as 12366 * traffic class/flowid, source address (if not set here), hoplimit (if not 12367 * set here) and destination address. 12368 * 12369 * The extension headers and ip6i_t header will all be fully filled in. 12370 */ 12371 void 12372 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12373 ip6_pkt_t *ipp, uint8_t protocol) 12374 { 12375 uint8_t *nxthdr_ptr; 12376 uint8_t *cp; 12377 ip6i_t *ip6i; 12378 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12379 12380 /* 12381 * If sending private ip6i_t header down (checksum info, nexthop, 12382 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12383 * then fill it in. (The checksum info will be filled in by icmp). 12384 */ 12385 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12386 ip6i = (ip6i_t *)ip6h; 12387 ip6h = (ip6_t *)&ip6i[1]; 12388 12389 ip6i->ip6i_flags = 0; 12390 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12391 if (ipp->ipp_fields & IPPF_IFINDEX || 12392 ipp->ipp_fields & IPPF_SCOPE_ID) { 12393 ASSERT(ipp->ipp_ifindex != 0); 12394 ip6i->ip6i_flags |= IP6I_IFINDEX; 12395 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12396 } 12397 if (ipp->ipp_fields & IPPF_ADDR) { 12398 /* 12399 * Enable per-packet source address verification if 12400 * IPV6_PKTINFO specified the source address. 12401 * ip6_src is set in the transport's _wput function. 12402 */ 12403 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12404 &ipp->ipp_addr)); 12405 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12406 } 12407 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12408 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12409 /* 12410 * We need to set this flag so that IP doesn't 12411 * rewrite the IPv6 header's hoplimit with the 12412 * current default value. 12413 */ 12414 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12415 } 12416 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12417 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12418 &ipp->ipp_nexthop)); 12419 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12420 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12421 } 12422 /* 12423 * tell IP this is an ip6i_t private header 12424 */ 12425 ip6i->ip6i_nxt = IPPROTO_RAW; 12426 } 12427 /* Initialize IPv6 header */ 12428 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12429 if (ipp->ipp_fields & IPPF_TCLASS) { 12430 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12431 (ipp->ipp_tclass << 20); 12432 } 12433 if (ipp->ipp_fields & IPPF_ADDR) 12434 ip6h->ip6_src = ipp->ipp_addr; 12435 12436 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12437 cp = (uint8_t *)&ip6h[1]; 12438 /* 12439 * Here's where we have to start stringing together 12440 * any extension headers in the right order: 12441 * Hop-by-hop, destination, routing, and final destination opts. 12442 */ 12443 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12444 /* Hop-by-hop options */ 12445 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12446 12447 *nxthdr_ptr = IPPROTO_HOPOPTS; 12448 nxthdr_ptr = &hbh->ip6h_nxt; 12449 12450 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12451 cp += ipp->ipp_hopoptslen; 12452 } 12453 /* 12454 * En-route destination options 12455 * Only do them if there's a routing header as well 12456 */ 12457 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12458 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12459 ip6_dest_t *dst = (ip6_dest_t *)cp; 12460 12461 *nxthdr_ptr = IPPROTO_DSTOPTS; 12462 nxthdr_ptr = &dst->ip6d_nxt; 12463 12464 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12465 cp += ipp->ipp_rtdstoptslen; 12466 } 12467 /* 12468 * Routing header next 12469 */ 12470 if (ipp->ipp_fields & IPPF_RTHDR) { 12471 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12472 12473 *nxthdr_ptr = IPPROTO_ROUTING; 12474 nxthdr_ptr = &rt->ip6r_nxt; 12475 12476 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12477 cp += ipp->ipp_rthdrlen; 12478 } 12479 /* 12480 * Do ultimate destination options 12481 */ 12482 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12483 ip6_dest_t *dest = (ip6_dest_t *)cp; 12484 12485 *nxthdr_ptr = IPPROTO_DSTOPTS; 12486 nxthdr_ptr = &dest->ip6d_nxt; 12487 12488 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12489 cp += ipp->ipp_dstoptslen; 12490 } 12491 /* 12492 * Now set the last header pointer to the proto passed in 12493 */ 12494 *nxthdr_ptr = protocol; 12495 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12496 } 12497 12498 /* 12499 * Return a pointer to the routing header extension header 12500 * in the IPv6 header(s) chain passed in. 12501 * If none found, return NULL 12502 * Assumes that all extension headers are in same mblk as the v6 header 12503 */ 12504 ip6_rthdr_t * 12505 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12506 { 12507 ip6_dest_t *desthdr; 12508 ip6_frag_t *fraghdr; 12509 uint_t hdrlen; 12510 uint8_t nexthdr; 12511 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12512 12513 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12514 return ((ip6_rthdr_t *)ptr); 12515 12516 /* 12517 * The routing header will precede all extension headers 12518 * other than the hop-by-hop and destination options 12519 * extension headers, so if we see anything other than those, 12520 * we're done and didn't find it. 12521 * We could see a destination options header alone but no 12522 * routing header, in which case we'll return NULL as soon as 12523 * we see anything after that. 12524 * Hop-by-hop and destination option headers are identical, 12525 * so we can use either one we want as a template. 12526 */ 12527 nexthdr = ip6h->ip6_nxt; 12528 while (ptr < endptr) { 12529 /* Is there enough left for len + nexthdr? */ 12530 if (ptr + MIN_EHDR_LEN > endptr) 12531 return (NULL); 12532 12533 switch (nexthdr) { 12534 case IPPROTO_HOPOPTS: 12535 case IPPROTO_DSTOPTS: 12536 /* Assumes the headers are identical for hbh and dst */ 12537 desthdr = (ip6_dest_t *)ptr; 12538 hdrlen = 8 * (desthdr->ip6d_len + 1); 12539 nexthdr = desthdr->ip6d_nxt; 12540 break; 12541 12542 case IPPROTO_ROUTING: 12543 return ((ip6_rthdr_t *)ptr); 12544 12545 case IPPROTO_FRAGMENT: 12546 fraghdr = (ip6_frag_t *)ptr; 12547 hdrlen = sizeof (ip6_frag_t); 12548 nexthdr = fraghdr->ip6f_nxt; 12549 break; 12550 12551 default: 12552 return (NULL); 12553 } 12554 ptr += hdrlen; 12555 } 12556 return (NULL); 12557 } 12558 12559 /* 12560 * Called for source-routed packets originating on this node. 12561 * Manipulates the original routing header by moving every entry up 12562 * one slot, placing the first entry in the v6 header's v6_dst field, 12563 * and placing the ultimate destination in the routing header's last 12564 * slot. 12565 * 12566 * Returns the checksum diference between the ultimate destination 12567 * (last hop in the routing header when the packet is sent) and 12568 * the first hop (ip6_dst when the packet is sent) 12569 */ 12570 uint32_t 12571 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 12572 { 12573 uint_t numaddr; 12574 uint_t i; 12575 in6_addr_t *addrptr; 12576 in6_addr_t tmp; 12577 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12578 uint32_t cksm; 12579 uint32_t addrsum = 0; 12580 uint16_t *ptr; 12581 12582 /* 12583 * Perform any processing needed for source routing. 12584 * We know that all extension headers will be in the same mblk 12585 * as the IPv6 header. 12586 */ 12587 12588 /* 12589 * If no segments left in header, or the header length field is zero, 12590 * don't move hop addresses around; 12591 * Checksum difference is zero. 12592 */ 12593 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12594 return (0); 12595 12596 ptr = (uint16_t *)&ip6h->ip6_dst; 12597 cksm = 0; 12598 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12599 cksm += ptr[i]; 12600 } 12601 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12602 12603 /* 12604 * Here's where the fun begins - we have to 12605 * move all addresses up one spot, take the 12606 * first hop and make it our first ip6_dst, 12607 * and place the ultimate destination in the 12608 * newly-opened last slot. 12609 */ 12610 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12611 numaddr = rthdr->ip6r0_len / 2; 12612 tmp = *addrptr; 12613 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12614 *addrptr = addrptr[1]; 12615 } 12616 *addrptr = ip6h->ip6_dst; 12617 ip6h->ip6_dst = tmp; 12618 12619 /* 12620 * From the checksummed ultimate destination subtract the checksummed 12621 * current ip6_dst (the first hop address). Return that number. 12622 * (In the v4 case, the second part of this is done in each routine 12623 * that calls ip_massage_options(). We do it all in this one place 12624 * for v6). 12625 */ 12626 ptr = (uint16_t *)&ip6h->ip6_dst; 12627 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12628 addrsum += ptr[i]; 12629 } 12630 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12631 if ((int)cksm < 0) 12632 cksm--; 12633 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12634 12635 return (cksm); 12636 } 12637 12638 /* 12639 * See if the upper-level protocol indicated by 'proto' will be able 12640 * to do something with an ICMP_FRAGMENTATION_NEEDED (IPv4) or 12641 * ICMP6_PACKET_TOO_BIG (IPv6). 12642 */ 12643 static boolean_t 12644 ip_ulp_cando_pkt2big(int proto) 12645 { 12646 /* 12647 * For now, only TCP can handle this. 12648 * Tunnels may be able to also, but since tun isn't working over 12649 * IPv6 yet, don't worry about it for now. 12650 */ 12651 return (proto == IPPROTO_TCP); 12652 } 12653 12654 12655 /* 12656 * Propagate a multicast group membership operation (join/leave) (*fn) on 12657 * all interfaces crossed by the related multirt routes. 12658 * The call is considered successful if the operation succeeds 12659 * on at least one interface. 12660 * The function is called if the destination address in the packet to send 12661 * is multirouted. 12662 */ 12663 int 12664 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12665 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12666 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12667 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12668 { 12669 ire_t *ire_gw; 12670 irb_t *irb; 12671 int index, error = 0; 12672 opt_restart_t *or; 12673 12674 irb = ire->ire_bucket; 12675 ASSERT(irb != NULL); 12676 12677 ASSERT(DB_TYPE(first_mp) == M_CTL); 12678 or = (opt_restart_t *)first_mp->b_rptr; 12679 12680 IRB_REFHOLD(irb); 12681 for (; ire != NULL; ire = ire->ire_next) { 12682 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12683 continue; 12684 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12685 continue; 12686 12687 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12688 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12689 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 12690 /* No resolver exists for the gateway; skip this ire. */ 12691 if (ire_gw == NULL) 12692 continue; 12693 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12694 /* 12695 * A resolver exists: we can get the interface on which we have 12696 * to apply the operation. 12697 */ 12698 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12699 first_mp); 12700 if (error == 0) 12701 or->or_private = CGTP_MCAST_SUCCESS; 12702 12703 if (ip_debug > 0) { 12704 ulong_t off; 12705 char *ksym; 12706 12707 ksym = kobj_getsymname((uintptr_t)fn, &off); 12708 ip2dbg(("ip_multirt_apply_membership_v6: " 12709 "called %s, multirt group 0x%08x via itf 0x%08x, " 12710 "error %d [success %u]\n", 12711 ksym ? ksym : "?", 12712 ntohl(V4_PART_OF_V6((*v6grp))), 12713 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12714 error, or->or_private)); 12715 } 12716 12717 ire_refrele(ire_gw); 12718 if (error == EINPROGRESS) { 12719 IRB_REFRELE(irb); 12720 return (error); 12721 } 12722 } 12723 IRB_REFRELE(irb); 12724 /* 12725 * Consider the call as successful if we succeeded on at least 12726 * one interface. Otherwise, return the last encountered error. 12727 */ 12728 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12729 } 12730 12731 void 12732 ip6_kstat_init(void) 12733 { 12734 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 12735 "net", KSTAT_TYPE_NAMED, 12736 sizeof (ip6_statistics) / sizeof (kstat_named_t), 12737 KSTAT_FLAG_VIRTUAL)) != NULL) { 12738 ip6_kstat->ks_data = &ip6_statistics; 12739 kstat_install(ip6_kstat); 12740 } 12741 } 12742 12743 /* 12744 * The following two functions set and get the value for the 12745 * IPV6_SRC_PREFERENCES socket option. 12746 */ 12747 int 12748 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12749 { 12750 /* 12751 * We only support preferences that are covered by 12752 * IPV6_PREFER_SRC_MASK. 12753 */ 12754 if (prefs & ~IPV6_PREFER_SRC_MASK) 12755 return (EINVAL); 12756 12757 /* 12758 * Look for conflicting preferences or default preferences. If 12759 * both bits of a related pair are clear, the application wants the 12760 * system's default value for that pair. Both bits in a pair can't 12761 * be set. 12762 */ 12763 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12764 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12765 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12766 IPV6_PREFER_SRC_MIPMASK) { 12767 return (EINVAL); 12768 } 12769 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12770 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12771 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12772 IPV6_PREFER_SRC_TMPMASK) { 12773 return (EINVAL); 12774 } 12775 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12776 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12777 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12778 IPV6_PREFER_SRC_CGAMASK) { 12779 return (EINVAL); 12780 } 12781 12782 connp->conn_src_preferences = prefs; 12783 return (0); 12784 } 12785 12786 size_t 12787 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12788 { 12789 *val = connp->conn_src_preferences; 12790 return (sizeof (connp->conn_src_preferences)); 12791 } 12792 12793 int 12794 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 12795 { 12796 ill_t *ill; 12797 ire_t *ire; 12798 int error; 12799 12800 /* 12801 * Verify the source address and ifindex. Privileged users can use 12802 * any source address. For ancillary data the source address is 12803 * checked in ip_wput_v6. 12804 */ 12805 if (pkti->ipi6_ifindex != 0) { 12806 ASSERT(connp != NULL); 12807 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 12808 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 12809 if (ill == NULL) { 12810 /* 12811 * We just want to know if the interface exists, we 12812 * don't really care about the ill pointer itself. 12813 */ 12814 if (error != EINPROGRESS) 12815 return (error); 12816 error = 0; /* Ensure we don't use it below */ 12817 } else { 12818 ill_refrele(ill); 12819 } 12820 } 12821 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12822 secpolicy_net_rawaccess(cr) != 0) { 12823 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12824 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12825 connp->conn_zoneid, NULL, MATCH_IRE_TYPE); 12826 if (ire != NULL) 12827 ire_refrele(ire); 12828 else 12829 return (ENXIO); 12830 } 12831 return (0); 12832 } 12833 12834 /* 12835 * Get the size of the IP options (including the IP headers size) 12836 * without including the AH header's size. If till_ah is B_FALSE, 12837 * and if AH header is present, dest options beyond AH header will 12838 * also be included in the returned size. 12839 */ 12840 int 12841 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12842 { 12843 ip6_t *ip6h; 12844 uint8_t nexthdr; 12845 uint8_t *whereptr; 12846 ip6_hbh_t *hbhhdr; 12847 ip6_dest_t *dsthdr; 12848 ip6_rthdr_t *rthdr; 12849 int ehdrlen; 12850 int size; 12851 ah_t *ah; 12852 12853 ip6h = (ip6_t *)mp->b_rptr; 12854 size = IPV6_HDR_LEN; 12855 nexthdr = ip6h->ip6_nxt; 12856 whereptr = (uint8_t *)&ip6h[1]; 12857 for (;;) { 12858 /* Assume IP has already stripped it */ 12859 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12860 switch (nexthdr) { 12861 case IPPROTO_HOPOPTS: 12862 hbhhdr = (ip6_hbh_t *)whereptr; 12863 nexthdr = hbhhdr->ip6h_nxt; 12864 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12865 break; 12866 case IPPROTO_DSTOPTS: 12867 dsthdr = (ip6_dest_t *)whereptr; 12868 nexthdr = dsthdr->ip6d_nxt; 12869 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12870 break; 12871 case IPPROTO_ROUTING: 12872 rthdr = (ip6_rthdr_t *)whereptr; 12873 nexthdr = rthdr->ip6r_nxt; 12874 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12875 break; 12876 default : 12877 if (till_ah) { 12878 ASSERT(nexthdr == IPPROTO_AH); 12879 return (size); 12880 } 12881 /* 12882 * If we don't have a AH header to traverse, 12883 * return now. This happens normally for 12884 * outbound datagrams where we have not inserted 12885 * the AH header. 12886 */ 12887 if (nexthdr != IPPROTO_AH) { 12888 return (size); 12889 } 12890 12891 /* 12892 * We don't include the AH header's size 12893 * to be symmetrical with other cases where 12894 * we either don't have a AH header (outbound) 12895 * or peek into the AH header yet (inbound and 12896 * not pulled up yet). 12897 */ 12898 ah = (ah_t *)whereptr; 12899 nexthdr = ah->ah_nexthdr; 12900 ehdrlen = (ah->ah_length << 2) + 8; 12901 12902 if (nexthdr == IPPROTO_DSTOPTS) { 12903 if (whereptr + ehdrlen >= mp->b_wptr) { 12904 /* 12905 * The destination options header 12906 * is not part of the first mblk. 12907 */ 12908 whereptr = mp->b_cont->b_rptr; 12909 } else { 12910 whereptr += ehdrlen; 12911 } 12912 12913 dsthdr = (ip6_dest_t *)whereptr; 12914 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12915 size += ehdrlen; 12916 } 12917 return (size); 12918 } 12919 whereptr += ehdrlen; 12920 size += ehdrlen; 12921 } 12922 } 12923