1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/sdt.h> 46 #include <sys/kobj.h> 47 #include <sys/zone.h> 48 #include <sys/neti.h> 49 #include <sys/hook.h> 50 51 #include <sys/kmem.h> 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/atomic.h> 58 #include <sys/iphada.h> 59 #include <sys/policy.h> 60 #include <net/if.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/if_dl.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 #include <netinet/sctp.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/arp.h> 75 76 #include <inet/ip.h> 77 #include <inet/ip_impl.h> 78 #include <inet/ip6.h> 79 #include <inet/ip6_asp.h> 80 #include <inet/tcp.h> 81 #include <inet/tcp_impl.h> 82 #include <inet/udp_impl.h> 83 #include <inet/ipp_common.h> 84 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_rts.h> 89 #include <inet/optcom.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/tun.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/udp_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern squeue_func_t ip_input_proc; 112 113 /* 114 * IP statistics. 115 */ 116 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 117 #define IP6_STAT_UPDATE(x, n) (ip6_statistics.x.value.ui64 += (n)) 118 119 typedef struct ip6_stat { 120 kstat_named_t ip6_udp_fast_path; 121 kstat_named_t ip6_udp_slow_path; 122 kstat_named_t ip6_udp_fannorm; 123 kstat_named_t ip6_udp_fanmb; 124 kstat_named_t ip6_out_sw_cksum; 125 kstat_named_t ip6_in_sw_cksum; 126 kstat_named_t ip6_tcp_in_full_hw_cksum_err; 127 kstat_named_t ip6_tcp_in_part_hw_cksum_err; 128 kstat_named_t ip6_tcp_in_sw_cksum_err; 129 kstat_named_t ip6_tcp_out_sw_cksum_bytes; 130 kstat_named_t ip6_udp_in_full_hw_cksum_err; 131 kstat_named_t ip6_udp_in_part_hw_cksum_err; 132 kstat_named_t ip6_udp_in_sw_cksum_err; 133 kstat_named_t ip6_udp_out_sw_cksum_bytes; 134 kstat_named_t ip6_frag_mdt_pkt_out; 135 kstat_named_t ip6_frag_mdt_discarded; 136 kstat_named_t ip6_frag_mdt_allocfail; 137 kstat_named_t ip6_frag_mdt_addpdescfail; 138 kstat_named_t ip6_frag_mdt_allocd; 139 } ip6_stat_t; 140 141 static ip6_stat_t ip6_statistics = { 142 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 143 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 144 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 145 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 146 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 147 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 148 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 149 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 150 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 151 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 152 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 153 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 154 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 155 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 156 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 157 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 158 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 159 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 160 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 161 }; 162 163 static kstat_t *ip6_kstat; 164 165 /* 166 * Naming conventions: 167 * These rules should be judiciously applied 168 * if there is a need to identify something as IPv6 versus IPv4 169 * IPv6 funcions will end with _v6 in the ip module. 170 * IPv6 funcions will end with _ipv6 in the transport modules. 171 * IPv6 macros: 172 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 173 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 174 * And then there are ..V4_PART_OF_V6. 175 * The intent is that macros in the ip module end with _V6. 176 * IPv6 global variables will start with ipv6_ 177 * IPv6 structures will start with ipv6 178 * IPv6 defined constants should start with IPV6_ 179 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 180 */ 181 182 /* 183 * IPv6 mibs when the interface (ill) is not known. 184 * When the ill is known the per-interface mib in the ill is used. 185 */ 186 mib2_ipIfStatsEntry_t ip6_mib; 187 mib2_ipv6IfIcmpEntry_t icmp6_mib; 188 189 /* 190 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 191 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 192 * from IANA. This mechanism will remain in effect until an official 193 * number is obtained. 194 */ 195 uchar_t ip6opt_ls; 196 197 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 198 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 199 200 const in6_addr_t ipv6_all_ones = 201 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 202 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 203 204 #ifdef _BIG_ENDIAN 205 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 206 #else /* _BIG_ENDIAN */ 207 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 208 #endif /* _BIG_ENDIAN */ 209 210 #ifdef _BIG_ENDIAN 211 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 212 #else /* _BIG_ENDIAN */ 213 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 214 #endif /* _BIG_ENDIAN */ 215 216 #ifdef _BIG_ENDIAN 217 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 218 #else /* _BIG_ENDIAN */ 219 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 220 #endif /* _BIG_ENDIAN */ 221 222 #ifdef _BIG_ENDIAN 223 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 224 #else /* _BIG_ENDIAN */ 225 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 226 #endif /* _BIG_ENDIAN */ 227 228 #ifdef _BIG_ENDIAN 229 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 230 #else /* _BIG_ENDIAN */ 231 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 232 #endif /* _BIG_ENDIAN */ 233 234 #ifdef _BIG_ENDIAN 235 const in6_addr_t ipv6_solicited_node_mcast = 236 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 237 #else /* _BIG_ENDIAN */ 238 const in6_addr_t ipv6_solicited_node_mcast = 239 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 240 #endif /* _BIG_ENDIAN */ 241 242 /* 243 * Used by icmp_send_redirect_v6 for picking random src. 244 */ 245 uint_t icmp_redirect_v6_src_index; 246 247 /* Leave room for ip_newroute to tack on the src and target addresses */ 248 #define OK_RESOLVER_MP_V6(mp) \ 249 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 250 251 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 252 boolean_t, zoneid_t); 253 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 254 const in6_addr_t *, boolean_t, zoneid_t); 255 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 256 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 257 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 258 boolean_t, boolean_t, boolean_t, boolean_t); 259 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 260 iulp_t *); 261 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 262 uint16_t, boolean_t, boolean_t, boolean_t); 263 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 264 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 265 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 266 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 267 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 268 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 269 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 270 uint8_t *, uint_t, uint8_t); 271 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 272 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 273 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 274 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 275 conn_t *, int, int, int, zoneid_t); 276 static boolean_t ip_ulp_cando_pkt2big(int); 277 278 void ip_rput_v6(queue_t *, mblk_t *); 279 static void ip_wput_v6(queue_t *, mblk_t *); 280 281 /* 282 * A template for an IPv6 AR_ENTRY_QUERY 283 */ 284 static areq_t ipv6_areq_template = { 285 AR_ENTRY_QUERY, /* cmd */ 286 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 287 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 288 IP6_DL_SAP, /* protocol, from arps perspective */ 289 sizeof (areq_t), /* target addr offset */ 290 IPV6_ADDR_LEN, /* target addr_length */ 291 0, /* flags */ 292 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 293 IPV6_ADDR_LEN, /* sender addr length */ 294 6, /* xmit_count */ 295 1000, /* (re)xmit_interval in milliseconds */ 296 4 /* max # of requests to buffer */ 297 /* anything else filled in by the code */ 298 }; 299 300 struct qinit rinit_ipv6 = { 301 (pfi_t)ip_rput_v6, 302 NULL, 303 ip_open, 304 ip_close, 305 NULL, 306 &ip_mod_info 307 }; 308 309 struct qinit winit_ipv6 = { 310 (pfi_t)ip_wput_v6, 311 (pfi_t)ip_wsrv, 312 ip_open, 313 ip_close, 314 NULL, 315 &ip_mod_info 316 }; 317 318 /* 319 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 320 * The message has already been checksummed and if needed, 321 * a copy has been made to be sent any interested ICMP client (conn) 322 * Note that this is different than icmp_inbound() which does the fanout 323 * to conn's as well as local processing of the ICMP packets. 324 * 325 * All error messages are passed to the matching transport stream. 326 * 327 * Zones notes: 328 * The packet is only processed in the context of the specified zone: typically 329 * only this zone will reply to an echo request. This means that the caller must 330 * call icmp_inbound_v6() for each relevant zone. 331 */ 332 static void 333 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 334 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 335 { 336 icmp6_t *icmp6; 337 ip6_t *ip6h; 338 boolean_t interested; 339 ip6i_t *ip6i; 340 in6_addr_t origsrc; 341 ire_t *ire; 342 mblk_t *first_mp; 343 ipsec_in_t *ii; 344 345 ASSERT(ill != NULL); 346 first_mp = mp; 347 if (mctl_present) { 348 mp = first_mp->b_cont; 349 ASSERT(mp != NULL); 350 351 ii = (ipsec_in_t *)first_mp->b_rptr; 352 ASSERT(ii->ipsec_in_type == IPSEC_IN); 353 } 354 355 ip6h = (ip6_t *)mp->b_rptr; 356 357 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 358 359 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 360 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 361 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 362 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 363 freemsg(first_mp); 364 return; 365 } 366 ip6h = (ip6_t *)mp->b_rptr; 367 } 368 if (icmp_accept_clear_messages == 0) { 369 first_mp = ipsec_check_global_policy(first_mp, NULL, 370 NULL, ip6h, mctl_present); 371 if (first_mp == NULL) 372 return; 373 } 374 375 /* 376 * On a labeled system, we have to check whether the zone itself is 377 * permitted to receive raw traffic. 378 */ 379 if (is_system_labeled()) { 380 if (zoneid == ALL_ZONES) 381 zoneid = tsol_packet_to_zoneid(mp); 382 if (!tsol_can_accept_raw(mp, B_FALSE)) { 383 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 384 zoneid)); 385 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 386 freemsg(first_mp); 387 return; 388 } 389 } 390 391 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 392 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 393 icmp6->icmp6_code)); 394 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 395 396 /* Initiate IPPF processing here */ 397 if (IP6_IN_IPP(flags)) { 398 399 /* 400 * If the ifindex changes due to SIOCSLIFINDEX 401 * packet may return to IP on the wrong ill. 402 */ 403 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 404 if (mp == NULL) { 405 if (mctl_present) { 406 freeb(first_mp); 407 } 408 return; 409 } 410 } 411 412 switch (icmp6->icmp6_type) { 413 case ICMP6_DST_UNREACH: 414 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 415 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 416 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 417 break; 418 419 case ICMP6_TIME_EXCEEDED: 420 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 421 break; 422 423 case ICMP6_PARAM_PROB: 424 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 425 break; 426 427 case ICMP6_PACKET_TOO_BIG: 428 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 429 zoneid); 430 return; 431 case ICMP6_ECHO_REQUEST: 432 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 433 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 434 !ipv6_resp_echo_mcast) 435 break; 436 437 /* 438 * We must have exclusive use of the mblk to convert it to 439 * a response. 440 * If not, we copy it. 441 */ 442 if (mp->b_datap->db_ref > 1) { 443 mblk_t *mp1; 444 445 mp1 = copymsg(mp); 446 freemsg(mp); 447 if (mp1 == NULL) { 448 BUMP_MIB(ill->ill_icmp6_mib, 449 ipv6IfIcmpInErrors); 450 if (mctl_present) 451 freeb(first_mp); 452 return; 453 } 454 mp = mp1; 455 ip6h = (ip6_t *)mp->b_rptr; 456 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 457 if (mctl_present) 458 first_mp->b_cont = mp; 459 else 460 first_mp = mp; 461 } 462 463 /* 464 * Turn the echo into an echo reply. 465 * Remove any extension headers (do not reverse a source route) 466 * and clear the flow id (keep traffic class for now). 467 */ 468 if (hdr_length != IPV6_HDR_LEN) { 469 int i; 470 471 for (i = 0; i < IPV6_HDR_LEN; i++) 472 mp->b_rptr[hdr_length - i - 1] = 473 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 474 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 475 ip6h = (ip6_t *)mp->b_rptr; 476 ip6h->ip6_nxt = IPPROTO_ICMPV6; 477 hdr_length = IPV6_HDR_LEN; 478 } 479 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 480 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 481 482 ip6h->ip6_plen = 483 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 484 origsrc = ip6h->ip6_src; 485 /* 486 * Reverse the source and destination addresses. 487 * If the return address is a multicast, zero out the source 488 * (ip_wput_v6 will set an address). 489 */ 490 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 491 ip6h->ip6_src = ipv6_all_zeros; 492 ip6h->ip6_dst = origsrc; 493 } else { 494 ip6h->ip6_src = ip6h->ip6_dst; 495 ip6h->ip6_dst = origsrc; 496 } 497 498 /* set the hop limit */ 499 ip6h->ip6_hops = ipv6_def_hops; 500 501 /* 502 * Prepare for checksum by putting icmp length in the icmp 503 * checksum field. The checksum is calculated in ip_wput_v6. 504 */ 505 icmp6->icmp6_cksum = ip6h->ip6_plen; 506 /* 507 * ICMP echo replies should go out on the same interface 508 * the request came on as probes used by in.mpathd for 509 * detecting NIC failures are ECHO packets. We turn-off load 510 * spreading by allocating a ip6i and setting ip6i_attach_if 511 * to B_TRUE which is handled both by ip_wput_v6 and 512 * ip_newroute_v6. If we don't turnoff load spreading, 513 * the packets might get dropped if there are no 514 * non-FAILED/INACTIVE interfaces for it to go out on and 515 * in.mpathd would wrongly detect a failure or mis-detect 516 * a NIC failure as a link failure. As load spreading can 517 * happen only if ill_group is not NULL, we do only for 518 * that case and this does not affect the normal case. 519 * 520 * We force this only on echo packets that came from on-link 521 * hosts. We restrict this to link-local addresses which 522 * is used by in.mpathd for probing. In the IPv6 case, 523 * default routes typically have an ire_ipif pointer and 524 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 525 * might work. As a default route out of this interface 526 * may not be present, enforcing this packet to go out in 527 * this case may not work. 528 */ 529 if (ill->ill_group != NULL && 530 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 531 /* 532 * If we are sending replies to ourselves, don't 533 * set ATTACH_IF as we may not be able to find 534 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 535 * causes ip_wput_v6 to look for an IRE_LOCAL on 536 * "ill" which it may not find and will try to 537 * create an IRE_CACHE for our local address. Once 538 * we do this, we will try to forward all packets 539 * meant to our LOCAL address. 540 */ 541 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 542 NULL); 543 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 544 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 545 if (mp == NULL) { 546 BUMP_MIB(ill->ill_icmp6_mib, 547 ipv6IfIcmpInErrors); 548 if (ire != NULL) 549 ire_refrele(ire); 550 if (mctl_present) 551 freeb(first_mp); 552 return; 553 } else if (mctl_present) { 554 first_mp->b_cont = mp; 555 } else { 556 first_mp = mp; 557 } 558 ip6i = (ip6i_t *)mp->b_rptr; 559 ip6i->ip6i_flags = IP6I_ATTACH_IF; 560 ip6i->ip6i_ifindex = 561 ill->ill_phyint->phyint_ifindex; 562 } 563 if (ire != NULL) 564 ire_refrele(ire); 565 } 566 567 if (!mctl_present) { 568 /* 569 * This packet should go out the same way as it 570 * came in i.e in clear. To make sure that global 571 * policy will not be applied to this in ip_wput, 572 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 573 */ 574 ASSERT(first_mp == mp); 575 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 576 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 577 freemsg(mp); 578 return; 579 } 580 ii = (ipsec_in_t *)first_mp->b_rptr; 581 582 /* This is not a secure packet */ 583 ii->ipsec_in_secure = B_FALSE; 584 first_mp->b_cont = mp; 585 } 586 ii->ipsec_in_zoneid = zoneid; 587 ASSERT(zoneid != ALL_ZONES); 588 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 589 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 590 return; 591 } 592 put(WR(q), first_mp); 593 return; 594 595 case ICMP6_ECHO_REPLY: 596 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 597 break; 598 599 case ND_ROUTER_SOLICIT: 600 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 601 break; 602 603 case ND_ROUTER_ADVERT: 604 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 605 break; 606 607 case ND_NEIGHBOR_SOLICIT: 608 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 609 if (mctl_present) 610 freeb(first_mp); 611 /* XXX may wish to pass first_mp up to ndp_input someday. */ 612 ndp_input(ill, mp, dl_mp); 613 return; 614 615 case ND_NEIGHBOR_ADVERT: 616 BUMP_MIB(ill->ill_icmp6_mib, 617 ipv6IfIcmpInNeighborAdvertisements); 618 if (mctl_present) 619 freeb(first_mp); 620 /* XXX may wish to pass first_mp up to ndp_input someday. */ 621 ndp_input(ill, mp, dl_mp); 622 return; 623 624 case ND_REDIRECT: { 625 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 626 627 if (ipv6_ignore_redirect) 628 break; 629 630 /* 631 * As there is no upper client to deliver, we don't 632 * need the first_mp any more. 633 */ 634 if (mctl_present) 635 freeb(first_mp); 636 if (!pullupmsg(mp, -1)) { 637 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 638 break; 639 } 640 icmp_redirect_v6(q, mp, ill); 641 return; 642 } 643 644 /* 645 * The next three icmp messages will be handled by MLD. 646 * Pass all valid MLD packets up to any process(es) 647 * listening on a raw ICMP socket. MLD messages are 648 * freed by mld_input function. 649 */ 650 case MLD_LISTENER_QUERY: 651 case MLD_LISTENER_REPORT: 652 case MLD_LISTENER_REDUCTION: 653 if (mctl_present) 654 freeb(first_mp); 655 mld_input(q, mp, ill); 656 return; 657 default: 658 break; 659 } 660 if (interested) { 661 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 662 mctl_present, zoneid); 663 } else { 664 freemsg(first_mp); 665 } 666 } 667 668 /* 669 * Process received IPv6 ICMP Packet too big. 670 * After updating any IRE it does the fanout to any matching transport streams. 671 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 672 */ 673 /* ARGSUSED */ 674 static void 675 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 676 boolean_t mctl_present, zoneid_t zoneid) 677 { 678 ip6_t *ip6h; 679 ip6_t *inner_ip6h; 680 icmp6_t *icmp6; 681 uint16_t hdr_length; 682 uint32_t mtu; 683 ire_t *ire, *first_ire; 684 mblk_t *first_mp; 685 686 first_mp = mp; 687 if (mctl_present) 688 mp = first_mp->b_cont; 689 /* 690 * We must have exclusive use of the mblk to update the MTU 691 * in the packet. 692 * If not, we copy it. 693 * 694 * If there's an M_CTL present, we know that allocated first_mp 695 * earlier in this function, so we know first_mp has refcnt of one. 696 */ 697 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 698 if (mp->b_datap->db_ref > 1) { 699 mblk_t *mp1; 700 701 mp1 = copymsg(mp); 702 freemsg(mp); 703 if (mp1 == NULL) { 704 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 705 if (mctl_present) 706 freeb(first_mp); 707 return; 708 } 709 mp = mp1; 710 if (mctl_present) 711 first_mp->b_cont = mp; 712 else 713 first_mp = mp; 714 } 715 ip6h = (ip6_t *)mp->b_rptr; 716 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 717 hdr_length = ip_hdr_length_v6(mp, ip6h); 718 else 719 hdr_length = IPV6_HDR_LEN; 720 721 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 722 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 723 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 724 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 725 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 726 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 727 freemsg(first_mp); 728 return; 729 } 730 ip6h = (ip6_t *)mp->b_rptr; 731 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 732 inner_ip6h = (ip6_t *)&icmp6[1]; 733 } 734 735 /* 736 * For link local destinations matching simply on IRE type is not 737 * sufficient. Same link local addresses for different ILL's is 738 * possible. 739 */ 740 741 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 742 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 743 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 744 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 745 746 if (first_ire == NULL) { 747 if (ip_debug > 2) { 748 /* ip1dbg */ 749 pr_addr_dbg("icmp_inbound_too_big_v6:" 750 "no ire for dst %s\n", AF_INET6, 751 &inner_ip6h->ip6_dst); 752 } 753 freemsg(first_mp); 754 return; 755 } 756 757 mtu = ntohl(icmp6->icmp6_mtu); 758 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 759 for (ire = first_ire; ire != NULL && 760 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 761 ire = ire->ire_next) { 762 mutex_enter(&ire->ire_lock); 763 if (mtu < IPV6_MIN_MTU) { 764 ip1dbg(("Received mtu less than IPv6 " 765 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 766 mtu = IPV6_MIN_MTU; 767 /* 768 * If an mtu less than IPv6 min mtu is received, 769 * we must include a fragment header in 770 * subsequent packets. 771 */ 772 ire->ire_frag_flag |= IPH_FRAG_HDR; 773 } 774 ip1dbg(("Received mtu from router: %d\n", mtu)); 775 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 776 /* Record the new max frag size for the ULP. */ 777 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 778 /* 779 * If we need a fragment header in every packet 780 * (above case or multirouting), make sure the 781 * ULP takes it into account when computing the 782 * payload size. 783 */ 784 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 785 sizeof (ip6_frag_t)); 786 } else { 787 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 788 } 789 mutex_exit(&ire->ire_lock); 790 } 791 rw_exit(&first_ire->ire_bucket->irb_lock); 792 ire_refrele(first_ire); 793 } else { 794 irb_t *irb = NULL; 795 /* 796 * for non-link local destinations we match only on the IRE type 797 */ 798 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 799 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE); 800 if (ire == NULL) { 801 if (ip_debug > 2) { 802 /* ip1dbg */ 803 pr_addr_dbg("icmp_inbound_too_big_v6:" 804 "no ire for dst %s\n", 805 AF_INET6, &inner_ip6h->ip6_dst); 806 } 807 freemsg(first_mp); 808 return; 809 } 810 irb = ire->ire_bucket; 811 ire_refrele(ire); 812 rw_enter(&irb->irb_lock, RW_READER); 813 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 814 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 815 &inner_ip6h->ip6_dst)) { 816 mtu = ntohl(icmp6->icmp6_mtu); 817 mutex_enter(&ire->ire_lock); 818 if (mtu < IPV6_MIN_MTU) { 819 ip1dbg(("Received mtu less than IPv6" 820 "min mtu %d: %d\n", 821 IPV6_MIN_MTU, mtu)); 822 mtu = IPV6_MIN_MTU; 823 /* 824 * If an mtu less than IPv6 min mtu is 825 * received, we must include a fragment 826 * header in subsequent packets. 827 */ 828 ire->ire_frag_flag |= IPH_FRAG_HDR; 829 } 830 831 ip1dbg(("Received mtu from router: %d\n", mtu)); 832 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 833 /* Record the new max frag size for the ULP. */ 834 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 835 /* 836 * If we need a fragment header in 837 * every packet (above case or 838 * multirouting), make sure the ULP 839 * takes it into account when computing 840 * the payload size. 841 */ 842 icmp6->icmp6_mtu = 843 htonl(ire->ire_max_frag - 844 sizeof (ip6_frag_t)); 845 } else { 846 icmp6->icmp6_mtu = 847 htonl(ire->ire_max_frag); 848 } 849 mutex_exit(&ire->ire_lock); 850 } 851 } 852 rw_exit(&irb->irb_lock); 853 } 854 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 855 mctl_present, zoneid); 856 } 857 858 static void 859 pkt_too_big(conn_t *connp, void *arg) 860 { 861 mblk_t *mp; 862 863 if (!connp->conn_ipv6_recvpathmtu) 864 return; 865 866 /* create message and drop it on this connections read queue */ 867 if ((mp = dupb((mblk_t *)arg)) == NULL) { 868 return; 869 } 870 mp->b_datap->db_type = M_CTL; 871 872 putnext(connp->conn_rq, mp); 873 } 874 875 /* 876 * Fanout received ICMPv6 error packets to the transports. 877 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 878 */ 879 void 880 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 881 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 882 { 883 uint16_t *up; /* Pointer to ports in ULP header */ 884 uint32_t ports; /* reversed ports for fanout */ 885 ip6_t rip6h; /* With reversed addresses */ 886 uint16_t hdr_length; 887 uint8_t *nexthdrp; 888 uint8_t nexthdr; 889 mblk_t *first_mp; 890 ipsec_in_t *ii; 891 tcpha_t *tcpha; 892 conn_t *connp; 893 894 first_mp = mp; 895 if (mctl_present) { 896 mp = first_mp->b_cont; 897 ASSERT(mp != NULL); 898 899 ii = (ipsec_in_t *)first_mp->b_rptr; 900 ASSERT(ii->ipsec_in_type == IPSEC_IN); 901 } else { 902 ii = NULL; 903 } 904 905 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 906 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 907 908 /* 909 * Need to pullup everything in order to use 910 * ip_hdr_length_nexthdr_v6() 911 */ 912 if (mp->b_cont != NULL) { 913 if (!pullupmsg(mp, -1)) { 914 ip1dbg(("icmp_inbound_error_fanout_v6: " 915 "pullupmsg failed\n")); 916 goto drop_pkt; 917 } 918 ip6h = (ip6_t *)mp->b_rptr; 919 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 920 } 921 922 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 923 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 924 goto drop_pkt; 925 926 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 927 goto drop_pkt; 928 nexthdr = *nexthdrp; 929 930 /* Set message type, must be done after pullups */ 931 mp->b_datap->db_type = M_CTL; 932 933 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 934 /* 935 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 936 * sockets. 937 * 938 * Note I don't like walking every connection to deliver 939 * this information to a set of listeners. A separate 940 * list could be kept to keep the cost of this down. 941 */ 942 ipcl_walk(pkt_too_big, (void *)mp); 943 } 944 945 /* Try to pass the ICMP message to clients who need it */ 946 switch (nexthdr) { 947 case IPPROTO_UDP: { 948 /* 949 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 950 * UDP header to get the port information. 951 */ 952 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 953 mp->b_wptr) { 954 break; 955 } 956 /* 957 * Attempt to find a client stream based on port. 958 * Note that we do a reverse lookup since the header is 959 * in the form we sent it out. 960 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 961 * and we only set the src and dst addresses and nexthdr. 962 */ 963 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 964 rip6h.ip6_src = ip6h->ip6_dst; 965 rip6h.ip6_dst = ip6h->ip6_src; 966 rip6h.ip6_nxt = nexthdr; 967 ((uint16_t *)&ports)[0] = up[1]; 968 ((uint16_t *)&ports)[1] = up[0]; 969 970 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 971 IP6_NO_IPPOLICY, mctl_present, zoneid); 972 return; 973 } 974 case IPPROTO_TCP: { 975 /* 976 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 977 * the TCP header to get the port information. 978 */ 979 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 980 mp->b_wptr) { 981 break; 982 } 983 984 /* 985 * Attempt to find a client stream based on port. 986 * Note that we do a reverse lookup since the header is 987 * in the form we sent it out. 988 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 989 * we only set the src and dst addresses and nexthdr. 990 */ 991 992 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 993 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 994 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 995 if (connp == NULL) { 996 goto drop_pkt; 997 } 998 999 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 1000 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 1001 return; 1002 1003 } 1004 case IPPROTO_SCTP: 1005 /* 1006 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 1007 * the SCTP header to get the port information. 1008 */ 1009 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 1010 mp->b_wptr) { 1011 break; 1012 } 1013 1014 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 1015 ((uint16_t *)&ports)[0] = up[1]; 1016 ((uint16_t *)&ports)[1] = up[0]; 1017 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 1018 IP6_NO_IPPOLICY, 0, zoneid); 1019 return; 1020 case IPPROTO_ESP: 1021 case IPPROTO_AH: { 1022 int ipsec_rc; 1023 1024 /* 1025 * We need a IPSEC_IN in the front to fanout to AH/ESP. 1026 * We will re-use the IPSEC_IN if it is already present as 1027 * AH/ESP will not affect any fields in the IPSEC_IN for 1028 * ICMP errors. If there is no IPSEC_IN, allocate a new 1029 * one and attach it in the front. 1030 */ 1031 if (ii != NULL) { 1032 /* 1033 * ip_fanout_proto_again converts the ICMP errors 1034 * that come back from AH/ESP to M_DATA so that 1035 * if it is non-AH/ESP and we do a pullupmsg in 1036 * this function, it would work. Convert it back 1037 * to M_CTL before we send up as this is a ICMP 1038 * error. This could have been generated locally or 1039 * by some router. Validate the inner IPSEC 1040 * headers. 1041 * 1042 * NOTE : ill_index is used by ip_fanout_proto_again 1043 * to locate the ill. 1044 */ 1045 ASSERT(ill != NULL); 1046 ii->ipsec_in_ill_index = 1047 ill->ill_phyint->phyint_ifindex; 1048 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1049 first_mp->b_cont->b_datap->db_type = M_CTL; 1050 } else { 1051 /* 1052 * IPSEC_IN is not present. We attach a ipsec_in 1053 * message and send up to IPSEC for validating 1054 * and removing the IPSEC headers. Clear 1055 * ipsec_in_secure so that when we return 1056 * from IPSEC, we don't mistakenly think that this 1057 * is a secure packet came from the network. 1058 * 1059 * NOTE : ill_index is used by ip_fanout_proto_again 1060 * to locate the ill. 1061 */ 1062 ASSERT(first_mp == mp); 1063 first_mp = ipsec_in_alloc(B_FALSE); 1064 ASSERT(ill != NULL); 1065 if (first_mp == NULL) { 1066 freemsg(mp); 1067 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1068 return; 1069 } 1070 ii = (ipsec_in_t *)first_mp->b_rptr; 1071 1072 /* This is not a secure packet */ 1073 ii->ipsec_in_secure = B_FALSE; 1074 first_mp->b_cont = mp; 1075 mp->b_datap->db_type = M_CTL; 1076 ii->ipsec_in_ill_index = 1077 ill->ill_phyint->phyint_ifindex; 1078 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1079 } 1080 1081 if (!ipsec_loaded()) { 1082 ip_proto_not_sup(q, first_mp, 0, zoneid); 1083 return; 1084 } 1085 1086 if (nexthdr == IPPROTO_ESP) 1087 ipsec_rc = ipsecesp_icmp_error(first_mp); 1088 else 1089 ipsec_rc = ipsecah_icmp_error(first_mp); 1090 if (ipsec_rc == IPSEC_STATUS_FAILED) 1091 return; 1092 1093 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1094 return; 1095 } 1096 case IPPROTO_ENCAP: 1097 case IPPROTO_IPV6: 1098 if ((uint8_t *)ip6h + hdr_length + 1099 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1100 sizeof (ip6_t)) > mp->b_wptr) 1101 goto drop_pkt; 1102 1103 if (nexthdr == IPPROTO_ENCAP || 1104 !IN6_ARE_ADDR_EQUAL( 1105 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1106 &ip6h->ip6_src) || 1107 !IN6_ARE_ADDR_EQUAL( 1108 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1109 &ip6h->ip6_dst)) { 1110 /* 1111 * For tunnels that have used IPsec protection, 1112 * we need to adjust the MTU to take into account 1113 * the IPsec overhead. 1114 */ 1115 if (ii != NULL) 1116 icmp6->icmp6_mtu = htons( 1117 ntohs(icmp6->icmp6_mtu) - 1118 ipsec_in_extra_length(first_mp)); 1119 } else { 1120 /* 1121 * Self-encapsulated case. As in the ipv4 case, 1122 * we need to strip the 2nd IP header. Since mp 1123 * is already pulled-up, we can simply bcopy 1124 * the 3rd header + data over the 2nd header. 1125 */ 1126 uint16_t unused_len; 1127 ip6_t *inner_ip6h = (ip6_t *) 1128 ((uchar_t *)ip6h + hdr_length); 1129 1130 /* 1131 * Make sure we don't do recursion more than once. 1132 */ 1133 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1134 &unused_len, &nexthdrp) || 1135 *nexthdrp == IPPROTO_IPV6) { 1136 goto drop_pkt; 1137 } 1138 1139 /* 1140 * We are about to modify the packet. Make a copy if 1141 * someone else has a reference to it. 1142 */ 1143 if (DB_REF(mp) > 1) { 1144 mblk_t *mp1; 1145 uint16_t icmp6_offset; 1146 1147 mp1 = copymsg(mp); 1148 if (mp1 == NULL) { 1149 goto drop_pkt; 1150 } 1151 icmp6_offset = (uint16_t) 1152 ((uchar_t *)icmp6 - mp->b_rptr); 1153 freemsg(mp); 1154 mp = mp1; 1155 1156 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1157 ip6h = (ip6_t *)&icmp6[1]; 1158 inner_ip6h = (ip6_t *) 1159 ((uchar_t *)ip6h + hdr_length); 1160 1161 if (mctl_present) 1162 first_mp->b_cont = mp; 1163 else 1164 first_mp = mp; 1165 } 1166 1167 /* 1168 * Need to set db_type back to M_DATA before 1169 * refeeding mp into this function. 1170 */ 1171 DB_TYPE(mp) = M_DATA; 1172 1173 /* 1174 * Copy the 3rd header + remaining data on top 1175 * of the 2nd header. 1176 */ 1177 bcopy(inner_ip6h, ip6h, 1178 mp->b_wptr - (uchar_t *)inner_ip6h); 1179 1180 /* 1181 * Subtract length of the 2nd header. 1182 */ 1183 mp->b_wptr -= hdr_length; 1184 1185 /* 1186 * Now recurse, and see what I _really_ should be 1187 * doing here. 1188 */ 1189 icmp_inbound_error_fanout_v6(q, first_mp, 1190 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1191 zoneid); 1192 return; 1193 } 1194 /* FALLTHRU */ 1195 default: 1196 /* 1197 * The rip6h header is only used for the lookup and we 1198 * only set the src and dst addresses and nexthdr. 1199 */ 1200 rip6h.ip6_src = ip6h->ip6_dst; 1201 rip6h.ip6_dst = ip6h->ip6_src; 1202 rip6h.ip6_nxt = nexthdr; 1203 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1204 IP6_NO_IPPOLICY, mctl_present, zoneid); 1205 return; 1206 } 1207 /* NOTREACHED */ 1208 drop_pkt: 1209 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1210 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1211 freemsg(first_mp); 1212 } 1213 1214 /* 1215 * Process received IPv6 ICMP Redirect messages. 1216 */ 1217 /* ARGSUSED */ 1218 static void 1219 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1220 { 1221 ip6_t *ip6h; 1222 uint16_t hdr_length; 1223 nd_redirect_t *rd; 1224 ire_t *ire; 1225 ire_t *prev_ire; 1226 ire_t *redir_ire; 1227 in6_addr_t *src, *dst, *gateway; 1228 nd_opt_hdr_t *opt; 1229 nce_t *nce; 1230 int nce_flags = 0; 1231 int err = 0; 1232 boolean_t redirect_to_router = B_FALSE; 1233 int len; 1234 int optlen; 1235 iulp_t ulp_info = { 0 }; 1236 ill_t *prev_ire_ill; 1237 ipif_t *ipif; 1238 1239 ip6h = (ip6_t *)mp->b_rptr; 1240 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1241 hdr_length = ip_hdr_length_v6(mp, ip6h); 1242 else 1243 hdr_length = IPV6_HDR_LEN; 1244 1245 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1246 len = mp->b_wptr - mp->b_rptr - hdr_length; 1247 src = &ip6h->ip6_src; 1248 dst = &rd->nd_rd_dst; 1249 gateway = &rd->nd_rd_target; 1250 1251 /* Verify if it is a valid redirect */ 1252 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1253 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1254 (rd->nd_rd_code != 0) || 1255 (len < sizeof (nd_redirect_t)) || 1256 (IN6_IS_ADDR_V4MAPPED(dst)) || 1257 (IN6_IS_ADDR_MULTICAST(dst))) { 1258 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1259 freemsg(mp); 1260 return; 1261 } 1262 1263 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1264 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1265 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1266 freemsg(mp); 1267 return; 1268 } 1269 1270 if (len > sizeof (nd_redirect_t)) { 1271 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1272 len - sizeof (nd_redirect_t))) { 1273 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1274 freemsg(mp); 1275 return; 1276 } 1277 } 1278 1279 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1280 redirect_to_router = B_TRUE; 1281 nce_flags |= NCE_F_ISROUTER; 1282 } 1283 1284 /* ipif will be refreleased afterwards */ 1285 ipif = ipif_get_next_ipif(NULL, ill); 1286 if (ipif == NULL) { 1287 freemsg(mp); 1288 return; 1289 } 1290 1291 /* 1292 * Verify that the IP source address of the redirect is 1293 * the same as the current first-hop router for the specified 1294 * ICMP destination address. 1295 * Also, Make sure we had a route for the dest in question and 1296 * that route was pointing to the old gateway (the source of the 1297 * redirect packet.) 1298 */ 1299 1300 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1301 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1302 MATCH_IRE_DEFAULT); 1303 1304 /* 1305 * Check that 1306 * the redirect was not from ourselves 1307 * old gateway is still directly reachable 1308 */ 1309 if (prev_ire == NULL || 1310 prev_ire->ire_type == IRE_LOCAL) { 1311 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1312 ipif_refrele(ipif); 1313 goto fail_redirect; 1314 } 1315 prev_ire_ill = ire_to_ill(prev_ire); 1316 ASSERT(prev_ire_ill != NULL); 1317 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1318 nce_flags |= NCE_F_NONUD; 1319 1320 /* 1321 * Should we use the old ULP info to create the new gateway? From 1322 * a user's perspective, we should inherit the info so that it 1323 * is a "smooth" transition. If we do not do that, then new 1324 * connections going thru the new gateway will have no route metrics, 1325 * which is counter-intuitive to user. From a network point of 1326 * view, this may or may not make sense even though the new gateway 1327 * is still directly connected to us so the route metrics should not 1328 * change much. 1329 * 1330 * But if the old ire_uinfo is not initialized, we do another 1331 * recursive lookup on the dest using the new gateway. There may 1332 * be a route to that. If so, use it to initialize the redirect 1333 * route. 1334 */ 1335 if (prev_ire->ire_uinfo.iulp_set) { 1336 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1337 } else if (redirect_to_router) { 1338 /* 1339 * Only do the following if the redirection is really to 1340 * a router. 1341 */ 1342 ire_t *tmp_ire; 1343 ire_t *sire; 1344 1345 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1346 ALL_ZONES, 0, NULL, 1347 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1348 if (sire != NULL) { 1349 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1350 ASSERT(tmp_ire != NULL); 1351 ire_refrele(tmp_ire); 1352 ire_refrele(sire); 1353 } else if (tmp_ire != NULL) { 1354 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1355 sizeof (iulp_t)); 1356 ire_refrele(tmp_ire); 1357 } 1358 } 1359 1360 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1361 opt = (nd_opt_hdr_t *)&rd[1]; 1362 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1363 if (opt != NULL) { 1364 err = ndp_lookup_then_add(ill, 1365 (uchar_t *)&opt[1], /* Link layer address */ 1366 gateway, 1367 &ipv6_all_ones, /* prefix mask */ 1368 &ipv6_all_zeros, /* Mapping mask */ 1369 0, 1370 nce_flags, 1371 ND_STALE, 1372 &nce, 1373 NULL, 1374 NULL); 1375 switch (err) { 1376 case 0: 1377 NCE_REFRELE(nce); 1378 break; 1379 case EEXIST: 1380 /* 1381 * Check to see if link layer address has changed and 1382 * process the nce_state accordingly. 1383 */ 1384 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1385 NCE_REFRELE(nce); 1386 break; 1387 default: 1388 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1389 err)); 1390 ipif_refrele(ipif); 1391 goto fail_redirect; 1392 } 1393 } 1394 if (redirect_to_router) { 1395 /* icmp_redirect_ok_v6() must have already verified this */ 1396 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1397 1398 /* 1399 * Create a Route Association. This will allow us to remember 1400 * a router told us to use the particular gateway. 1401 */ 1402 ire = ire_create_v6( 1403 dst, 1404 &ipv6_all_ones, /* mask */ 1405 &prev_ire->ire_src_addr_v6, /* source addr */ 1406 gateway, /* gateway addr */ 1407 &prev_ire->ire_max_frag, /* max frag */ 1408 NULL, /* Fast Path header */ 1409 NULL, /* no rfq */ 1410 NULL, /* no stq */ 1411 IRE_HOST, 1412 NULL, 1413 prev_ire->ire_ipif, 1414 NULL, 1415 0, 1416 0, 1417 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1418 &ulp_info, 1419 NULL, 1420 NULL); 1421 } else { 1422 queue_t *stq; 1423 1424 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1425 ? ipif->ipif_rq : ipif->ipif_wq; 1426 1427 /* 1428 * Just create an on link entry, i.e. interface route. 1429 */ 1430 ire = ire_create_v6( 1431 dst, /* gateway == dst */ 1432 &ipv6_all_ones, /* mask */ 1433 &prev_ire->ire_src_addr_v6, /* source addr */ 1434 &ipv6_all_zeros, /* gateway addr */ 1435 &prev_ire->ire_max_frag, /* max frag */ 1436 NULL, /* Fast Path header */ 1437 NULL, /* ire rfq */ 1438 stq, /* ire stq */ 1439 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1440 NULL, 1441 prev_ire->ire_ipif, 1442 &ipv6_all_ones, 1443 0, 1444 0, 1445 (RTF_DYNAMIC | RTF_HOST), 1446 &ulp_info, 1447 NULL, 1448 NULL); 1449 } 1450 1451 /* Release reference from earlier ipif_get_next_ipif() */ 1452 ipif_refrele(ipif); 1453 1454 if (ire == NULL) 1455 goto fail_redirect; 1456 1457 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1458 1459 /* tell routing sockets that we received a redirect */ 1460 ip_rts_change_v6(RTM_REDIRECT, 1461 &rd->nd_rd_dst, 1462 &rd->nd_rd_target, 1463 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1464 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1465 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1466 1467 /* 1468 * Delete any existing IRE_HOST type ires for this destination. 1469 * This together with the added IRE has the effect of 1470 * modifying an existing redirect. 1471 */ 1472 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1473 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1474 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1475 1476 ire_refrele(ire); /* Held in ire_add_v6 */ 1477 1478 if (redir_ire != NULL) { 1479 if (redir_ire->ire_flags & RTF_DYNAMIC) 1480 ire_delete(redir_ire); 1481 ire_refrele(redir_ire); 1482 } 1483 } 1484 1485 if (prev_ire->ire_type == IRE_CACHE) 1486 ire_delete(prev_ire); 1487 ire_refrele(prev_ire); 1488 prev_ire = NULL; 1489 1490 fail_redirect: 1491 if (prev_ire != NULL) 1492 ire_refrele(prev_ire); 1493 freemsg(mp); 1494 } 1495 1496 static ill_t * 1497 ip_queue_to_ill_v6(queue_t *q) 1498 { 1499 ill_t *ill; 1500 1501 ASSERT(WR(q) == q); 1502 1503 if (q->q_next != NULL) { 1504 ill = (ill_t *)q->q_ptr; 1505 if (ILL_CAN_LOOKUP(ill)) 1506 ill_refhold(ill); 1507 else 1508 ill = NULL; 1509 } else { 1510 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1511 NULL, NULL, NULL, NULL, NULL); 1512 } 1513 if (ill == NULL) 1514 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1515 return (ill); 1516 } 1517 1518 /* 1519 * Assigns an appropriate source address to the packet. 1520 * If origdst is one of our IP addresses that use it as the source. 1521 * If the queue is an ill queue then select a source from that ill. 1522 * Otherwise pick a source based on a route lookup back to the origsrc. 1523 * 1524 * src is the return parameter. Returns a pointer to src or NULL if failure. 1525 */ 1526 static in6_addr_t * 1527 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1528 in6_addr_t *src, zoneid_t zoneid) 1529 { 1530 ill_t *ill; 1531 ire_t *ire; 1532 ipif_t *ipif; 1533 1534 ASSERT(!(wq->q_flag & QREADR)); 1535 if (wq->q_next != NULL) { 1536 ill = (ill_t *)wq->q_ptr; 1537 } else { 1538 ill = NULL; 1539 } 1540 1541 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1542 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1543 if (ire != NULL) { 1544 /* Destined to one of our addresses */ 1545 *src = *origdst; 1546 ire_refrele(ire); 1547 return (src); 1548 } 1549 if (ire != NULL) { 1550 ire_refrele(ire); 1551 ire = NULL; 1552 } 1553 if (ill == NULL) { 1554 /* What is the route back to the original source? */ 1555 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1556 NULL, NULL, zoneid, NULL, 1557 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1558 if (ire == NULL) { 1559 BUMP_MIB(&ip6_mib, ipIfStatsOutNoRoutes); 1560 return (NULL); 1561 } 1562 /* 1563 * Does not matter whether we use ire_stq or ire_ipif here. 1564 * Just pick an ill for ICMP replies. 1565 */ 1566 ASSERT(ire->ire_ipif != NULL); 1567 ill = ire->ire_ipif->ipif_ill; 1568 ire_refrele(ire); 1569 } 1570 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1571 IPV6_PREFER_SRC_DEFAULT, zoneid); 1572 if (ipif != NULL) { 1573 *src = ipif->ipif_v6src_addr; 1574 ipif_refrele(ipif); 1575 return (src); 1576 } 1577 /* 1578 * Unusual case - can't find a usable source address to reach the 1579 * original source. Use what in the route to the source. 1580 */ 1581 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1582 NULL, NULL, zoneid, NULL, 1583 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1584 if (ire == NULL) { 1585 BUMP_MIB(&ip6_mib, ipIfStatsOutNoRoutes); 1586 return (NULL); 1587 } 1588 ASSERT(ire != NULL); 1589 *src = ire->ire_src_addr_v6; 1590 ire_refrele(ire); 1591 return (src); 1592 } 1593 1594 /* 1595 * Build and ship an IPv6 ICMP message using the packet data in mp, 1596 * and the ICMP header pointed to by "stuff". (May be called as 1597 * writer.) 1598 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1599 * verify that an icmp error packet can be sent. 1600 * 1601 * If q is an ill write side queue (which is the case when packets 1602 * arrive from ip_rput) then ip_wput code will ensure that packets to 1603 * link-local destinations are sent out that ill. 1604 * 1605 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1606 * source address (see above function). 1607 */ 1608 static void 1609 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1610 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid) 1611 { 1612 ip6_t *ip6h; 1613 in6_addr_t v6dst; 1614 size_t len_needed; 1615 size_t msg_len; 1616 mblk_t *mp1; 1617 icmp6_t *icmp6; 1618 ill_t *ill; 1619 in6_addr_t v6src; 1620 mblk_t *ipsec_mp; 1621 ipsec_out_t *io; 1622 1623 ill = ip_queue_to_ill_v6(q); 1624 if (ill == NULL) { 1625 freemsg(mp); 1626 return; 1627 } 1628 1629 if (mctl_present) { 1630 /* 1631 * If it is : 1632 * 1633 * 1) a IPSEC_OUT, then this is caused by outbound 1634 * datagram originating on this host. IPSEC processing 1635 * may or may not have been done. Refer to comments above 1636 * icmp_inbound_error_fanout for details. 1637 * 1638 * 2) a IPSEC_IN if we are generating a icmp_message 1639 * for an incoming datagram destined for us i.e called 1640 * from ip_fanout_send_icmp. 1641 */ 1642 ipsec_info_t *in; 1643 1644 ipsec_mp = mp; 1645 mp = ipsec_mp->b_cont; 1646 1647 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1648 ip6h = (ip6_t *)mp->b_rptr; 1649 1650 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1651 in->ipsec_info_type == IPSEC_IN); 1652 1653 if (in->ipsec_info_type == IPSEC_IN) { 1654 /* 1655 * Convert the IPSEC_IN to IPSEC_OUT. 1656 */ 1657 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1658 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1659 ill_refrele(ill); 1660 return; 1661 } 1662 } else { 1663 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1664 io = (ipsec_out_t *)in; 1665 /* 1666 * Clear out ipsec_out_proc_begin, so we do a fresh 1667 * ire lookup. 1668 */ 1669 io->ipsec_out_proc_begin = B_FALSE; 1670 } 1671 } else { 1672 /* 1673 * This is in clear. The icmp message we are building 1674 * here should go out in clear. 1675 */ 1676 ipsec_in_t *ii; 1677 ASSERT(mp->b_datap->db_type == M_DATA); 1678 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1679 freemsg(mp); 1680 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1681 ill_refrele(ill); 1682 return; 1683 } 1684 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1685 1686 /* This is not a secure packet */ 1687 ii->ipsec_in_secure = B_FALSE; 1688 /* 1689 * For trusted extensions using a shared IP address we can 1690 * send using any zoneid. 1691 */ 1692 if (zoneid == ALL_ZONES) 1693 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1694 else 1695 ii->ipsec_in_zoneid = zoneid; 1696 ipsec_mp->b_cont = mp; 1697 ip6h = (ip6_t *)mp->b_rptr; 1698 /* 1699 * Convert the IPSEC_IN to IPSEC_OUT. 1700 */ 1701 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1702 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1703 ill_refrele(ill); 1704 return; 1705 } 1706 } 1707 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1708 1709 if (v6src_ptr != NULL) { 1710 v6src = *v6src_ptr; 1711 } else { 1712 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1713 &v6src, zoneid) == NULL) { 1714 freemsg(ipsec_mp); 1715 ill_refrele(ill); 1716 return; 1717 } 1718 } 1719 v6dst = ip6h->ip6_src; 1720 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1721 msg_len = msgdsize(mp); 1722 if (msg_len > len_needed) { 1723 if (!adjmsg(mp, len_needed - msg_len)) { 1724 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1725 freemsg(ipsec_mp); 1726 ill_refrele(ill); 1727 return; 1728 } 1729 msg_len = len_needed; 1730 } 1731 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1732 if (mp1 == NULL) { 1733 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1734 freemsg(ipsec_mp); 1735 ill_refrele(ill); 1736 return; 1737 } 1738 ill_refrele(ill); 1739 mp1->b_cont = mp; 1740 mp = mp1; 1741 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1742 io->ipsec_out_type == IPSEC_OUT); 1743 ipsec_mp->b_cont = mp; 1744 1745 /* 1746 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1747 * node generates be accepted in peace by all on-host destinations. 1748 * If we do NOT assume that all on-host destinations trust 1749 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1750 * (Look for ipsec_out_icmp_loopback). 1751 */ 1752 io->ipsec_out_icmp_loopback = B_TRUE; 1753 1754 ip6h = (ip6_t *)mp->b_rptr; 1755 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1756 1757 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1758 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1759 ip6h->ip6_hops = ipv6_def_hops; 1760 ip6h->ip6_dst = v6dst; 1761 ip6h->ip6_src = v6src; 1762 msg_len += IPV6_HDR_LEN + len; 1763 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1764 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1765 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1766 } 1767 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1768 icmp6 = (icmp6_t *)&ip6h[1]; 1769 bcopy(stuff, (char *)icmp6, len); 1770 /* 1771 * Prepare for checksum by putting icmp length in the icmp 1772 * checksum field. The checksum is calculated in ip_wput_v6. 1773 */ 1774 icmp6->icmp6_cksum = ip6h->ip6_plen; 1775 if (icmp6->icmp6_type == ND_REDIRECT) { 1776 ip6h->ip6_hops = IPV6_MAX_HOPS; 1777 } 1778 /* Send to V6 writeside put routine */ 1779 put(q, ipsec_mp); 1780 } 1781 1782 /* 1783 * Update the output mib when ICMPv6 packets are sent. 1784 */ 1785 static void 1786 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1787 { 1788 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1789 1790 switch (icmp6->icmp6_type) { 1791 case ICMP6_DST_UNREACH: 1792 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1793 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1794 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1795 break; 1796 1797 case ICMP6_TIME_EXCEEDED: 1798 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1799 break; 1800 1801 case ICMP6_PARAM_PROB: 1802 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1803 break; 1804 1805 case ICMP6_PACKET_TOO_BIG: 1806 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1807 break; 1808 1809 case ICMP6_ECHO_REQUEST: 1810 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1811 break; 1812 1813 case ICMP6_ECHO_REPLY: 1814 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1815 break; 1816 1817 case ND_ROUTER_SOLICIT: 1818 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1819 break; 1820 1821 case ND_ROUTER_ADVERT: 1822 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1823 break; 1824 1825 case ND_NEIGHBOR_SOLICIT: 1826 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1827 break; 1828 1829 case ND_NEIGHBOR_ADVERT: 1830 BUMP_MIB(ill->ill_icmp6_mib, 1831 ipv6IfIcmpOutNeighborAdvertisements); 1832 break; 1833 1834 case ND_REDIRECT: 1835 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1836 break; 1837 1838 case MLD_LISTENER_QUERY: 1839 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1840 break; 1841 1842 case MLD_LISTENER_REPORT: 1843 case MLD_V2_LISTENER_REPORT: 1844 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1845 break; 1846 1847 case MLD_LISTENER_REDUCTION: 1848 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1849 break; 1850 } 1851 } 1852 1853 /* 1854 * Check if it is ok to send an ICMPv6 error packet in 1855 * response to the IP packet in mp. 1856 * Free the message and return null if no 1857 * ICMP error packet should be sent. 1858 */ 1859 static mblk_t * 1860 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1861 boolean_t llbcast, boolean_t mcast_ok) 1862 { 1863 ip6_t *ip6h; 1864 1865 if (!mp) 1866 return (NULL); 1867 1868 ip6h = (ip6_t *)mp->b_rptr; 1869 1870 /* Check if source address uniquely identifies the host */ 1871 1872 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1873 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1874 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1875 freemsg(mp); 1876 return (NULL); 1877 } 1878 1879 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1880 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1881 icmp6_t *icmp6; 1882 1883 if (mp->b_wptr - mp->b_rptr < len_needed) { 1884 if (!pullupmsg(mp, len_needed)) { 1885 ill_t *ill; 1886 1887 ill = ip_queue_to_ill_v6(q); 1888 if (ill == NULL) { 1889 BUMP_MIB(&icmp6_mib, 1890 ipv6IfIcmpInErrors); 1891 } else { 1892 BUMP_MIB(ill->ill_icmp6_mib, 1893 ipv6IfIcmpInErrors); 1894 ill_refrele(ill); 1895 } 1896 freemsg(mp); 1897 return (NULL); 1898 } 1899 ip6h = (ip6_t *)mp->b_rptr; 1900 } 1901 icmp6 = (icmp6_t *)&ip6h[1]; 1902 /* Explicitly do not generate errors in response to redirects */ 1903 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1904 icmp6->icmp6_type == ND_REDIRECT) { 1905 freemsg(mp); 1906 return (NULL); 1907 } 1908 } 1909 /* 1910 * Check that the destination is not multicast and that the packet 1911 * was not sent on link layer broadcast or multicast. (Exception 1912 * is Packet too big message as per the draft - when mcast_ok is set.) 1913 */ 1914 if (!mcast_ok && 1915 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1916 freemsg(mp); 1917 return (NULL); 1918 } 1919 if (icmp_err_rate_limit()) { 1920 /* 1921 * Only send ICMP error packets every so often. 1922 * This should be done on a per port/source basis, 1923 * but for now this will suffice. 1924 */ 1925 freemsg(mp); 1926 return (NULL); 1927 } 1928 return (mp); 1929 } 1930 1931 /* 1932 * Generate an ICMPv6 redirect message. 1933 * Include target link layer address option if it exits. 1934 * Always include redirect header. 1935 */ 1936 static void 1937 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1938 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1939 { 1940 nd_redirect_t *rd; 1941 nd_opt_rd_hdr_t *rdh; 1942 uchar_t *buf; 1943 nce_t *nce = NULL; 1944 nd_opt_hdr_t *opt; 1945 int len; 1946 int ll_opt_len = 0; 1947 int max_redir_hdr_data_len; 1948 int pkt_len; 1949 in6_addr_t *srcp; 1950 1951 /* 1952 * We are called from ip_rput where we could 1953 * not have attached an IPSEC_IN. 1954 */ 1955 ASSERT(mp->b_datap->db_type == M_DATA); 1956 1957 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1958 if (mp == NULL) 1959 return; 1960 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1961 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1962 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1963 ill->ill_phys_addr_length + 7)/8 * 8; 1964 } 1965 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1966 ASSERT(len % 4 == 0); 1967 buf = kmem_alloc(len, KM_NOSLEEP); 1968 if (buf == NULL) { 1969 if (nce != NULL) 1970 NCE_REFRELE(nce); 1971 freemsg(mp); 1972 return; 1973 } 1974 1975 rd = (nd_redirect_t *)buf; 1976 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1977 rd->nd_rd_code = 0; 1978 rd->nd_rd_reserved = 0; 1979 rd->nd_rd_target = *targetp; 1980 rd->nd_rd_dst = *dest; 1981 1982 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1983 if (nce != NULL && ll_opt_len != 0) { 1984 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1985 opt->nd_opt_len = ll_opt_len/8; 1986 bcopy((char *)nce->nce_res_mp->b_rptr + 1987 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1988 ill->ill_phys_addr_length); 1989 } 1990 if (nce != NULL) 1991 NCE_REFRELE(nce); 1992 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1993 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1994 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1995 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1996 pkt_len = msgdsize(mp); 1997 /* Make sure mp is 8 byte aligned */ 1998 if (pkt_len > max_redir_hdr_data_len) { 1999 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 2000 sizeof (nd_opt_rd_hdr_t))/8; 2001 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 2002 } else { 2003 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 2004 (void) adjmsg(mp, -(pkt_len % 8)); 2005 } 2006 rdh->nd_opt_rh_reserved1 = 0; 2007 rdh->nd_opt_rh_reserved2 = 0; 2008 /* ipif_v6src_addr contains the link-local source address */ 2009 rw_enter(&ill_g_lock, RW_READER); 2010 if (ill->ill_group != NULL) { 2011 /* 2012 * The receiver of the redirect will verify whether it 2013 * had a route through us (srcp that we will use in 2014 * the redirect) or not. As we load spread even link-locals, 2015 * we don't know which source address the receiver of 2016 * redirect has in its route for communicating with us. 2017 * Thus we randomly choose a source here and finally we 2018 * should get to the right one and it will eventually 2019 * accept the redirect from us. We can't call 2020 * ip_lookup_scope_v6 because we don't have the right 2021 * link-local address here. Thus we randomly choose one. 2022 */ 2023 int cnt = ill->ill_group->illgrp_ill_count; 2024 2025 ill = ill->ill_group->illgrp_ill; 2026 cnt = ++icmp_redirect_v6_src_index % cnt; 2027 while (cnt--) 2028 ill = ill->ill_group_next; 2029 srcp = &ill->ill_ipif->ipif_v6src_addr; 2030 } else { 2031 srcp = &ill->ill_ipif->ipif_v6src_addr; 2032 } 2033 rw_exit(&ill_g_lock); 2034 /* Redirects sent by router, and router is global zone */ 2035 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID); 2036 kmem_free(buf, len); 2037 } 2038 2039 2040 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 2041 void 2042 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 2043 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2044 { 2045 icmp6_t icmp6; 2046 boolean_t mctl_present; 2047 mblk_t *first_mp; 2048 2049 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2050 2051 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2052 if (mp == NULL) { 2053 if (mctl_present) 2054 freeb(first_mp); 2055 return; 2056 } 2057 bzero(&icmp6, sizeof (icmp6_t)); 2058 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2059 icmp6.icmp6_code = code; 2060 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2061 zoneid); 2062 } 2063 2064 /* 2065 * Generate an ICMP unreachable message. 2066 */ 2067 void 2068 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2069 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2070 { 2071 icmp6_t icmp6; 2072 boolean_t mctl_present; 2073 mblk_t *first_mp; 2074 2075 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2076 2077 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2078 if (mp == NULL) { 2079 if (mctl_present) 2080 freeb(first_mp); 2081 return; 2082 } 2083 bzero(&icmp6, sizeof (icmp6_t)); 2084 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2085 icmp6.icmp6_code = code; 2086 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2087 zoneid); 2088 } 2089 2090 /* 2091 * Generate an ICMP pkt too big message. 2092 */ 2093 static void 2094 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2095 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2096 { 2097 icmp6_t icmp6; 2098 mblk_t *first_mp; 2099 boolean_t mctl_present; 2100 2101 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2102 2103 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2104 if (mp == NULL) { 2105 if (mctl_present) 2106 freeb(first_mp); 2107 return; 2108 } 2109 bzero(&icmp6, sizeof (icmp6_t)); 2110 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2111 icmp6.icmp6_code = 0; 2112 icmp6.icmp6_mtu = htonl(mtu); 2113 2114 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2115 zoneid); 2116 } 2117 2118 /* 2119 * Generate an ICMP parameter problem message. (May be called as writer.) 2120 * 'offset' is the offset from the beginning of the packet in error. 2121 */ 2122 static void 2123 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2124 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2125 { 2126 icmp6_t icmp6; 2127 boolean_t mctl_present; 2128 mblk_t *first_mp; 2129 2130 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2131 2132 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2133 if (mp == NULL) { 2134 if (mctl_present) 2135 freeb(first_mp); 2136 return; 2137 } 2138 bzero((char *)&icmp6, sizeof (icmp6_t)); 2139 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2140 icmp6.icmp6_code = code; 2141 icmp6.icmp6_pptr = htonl(offset); 2142 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2143 zoneid); 2144 } 2145 2146 /* 2147 * This code will need to take into account the possibility of binding 2148 * to a link local address on a multi-homed host, in which case the 2149 * outgoing interface (from the conn) will need to be used when getting 2150 * an ire for the dst. Going through proper outgoing interface and 2151 * choosing the source address corresponding to the outgoing interface 2152 * is necessary when the destination address is a link-local address and 2153 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2154 * This can happen when active connection is setup; thus ipp pointer 2155 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2156 * pointer is passed as ipp pointer. 2157 */ 2158 mblk_t * 2159 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2160 { 2161 ssize_t len; 2162 int protocol; 2163 struct T_bind_req *tbr; 2164 sin6_t *sin6; 2165 ipa6_conn_t *ac6; 2166 in6_addr_t *v6srcp; 2167 in6_addr_t *v6dstp; 2168 uint16_t lport; 2169 uint16_t fport; 2170 uchar_t *ucp; 2171 mblk_t *mp1; 2172 boolean_t ire_requested; 2173 boolean_t ipsec_policy_set; 2174 int error = 0; 2175 boolean_t local_bind; 2176 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2177 ipa6_conn_x_t *acx6; 2178 boolean_t verify_dst; 2179 2180 ASSERT(connp->conn_af_isv6); 2181 len = mp->b_wptr - mp->b_rptr; 2182 if (len < (sizeof (*tbr) + 1)) { 2183 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2184 "ip_bind_v6: bogus msg, len %ld", len); 2185 goto bad_addr; 2186 } 2187 /* Back up and extract the protocol identifier. */ 2188 mp->b_wptr--; 2189 tbr = (struct T_bind_req *)mp->b_rptr; 2190 /* Reset the message type in preparation for shipping it back. */ 2191 mp->b_datap->db_type = M_PCPROTO; 2192 2193 protocol = *mp->b_wptr & 0xFF; 2194 connp->conn_ulp = (uint8_t)protocol; 2195 2196 /* 2197 * Check for a zero length address. This is from a protocol that 2198 * wants to register to receive all packets of its type. 2199 */ 2200 if (tbr->ADDR_length == 0) { 2201 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2202 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2203 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2204 /* 2205 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2206 * Do not allow others to bind to these. 2207 */ 2208 goto bad_addr; 2209 } 2210 2211 /* 2212 * 2213 * The udp module never sends down a zero-length address, 2214 * and allowing this on a labeled system will break MLP 2215 * functionality. 2216 */ 2217 if (is_system_labeled() && protocol == IPPROTO_UDP) 2218 goto bad_addr; 2219 2220 /* Allow ipsec plumbing */ 2221 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2222 protocol != IPPROTO_ESP) 2223 goto bad_addr; 2224 2225 connp->conn_srcv6 = ipv6_all_zeros; 2226 ipcl_proto_insert_v6(connp, protocol); 2227 2228 tbr->PRIM_type = T_BIND_ACK; 2229 return (mp); 2230 } 2231 2232 /* Extract the address pointer from the message. */ 2233 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2234 tbr->ADDR_length); 2235 if (ucp == NULL) { 2236 ip1dbg(("ip_bind_v6: no address\n")); 2237 goto bad_addr; 2238 } 2239 if (!OK_32PTR(ucp)) { 2240 ip1dbg(("ip_bind_v6: unaligned address\n")); 2241 goto bad_addr; 2242 } 2243 mp1 = mp->b_cont; /* trailing mp if any */ 2244 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2245 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2246 2247 switch (tbr->ADDR_length) { 2248 default: 2249 ip1dbg(("ip_bind_v6: bad address length %d\n", 2250 (int)tbr->ADDR_length)); 2251 goto bad_addr; 2252 2253 case IPV6_ADDR_LEN: 2254 /* Verification of local address only */ 2255 v6srcp = (in6_addr_t *)ucp; 2256 lport = 0; 2257 local_bind = B_TRUE; 2258 break; 2259 2260 case sizeof (sin6_t): 2261 sin6 = (sin6_t *)ucp; 2262 v6srcp = &sin6->sin6_addr; 2263 lport = sin6->sin6_port; 2264 local_bind = B_TRUE; 2265 break; 2266 2267 case sizeof (ipa6_conn_t): 2268 /* 2269 * Verify that both the source and destination addresses 2270 * are valid. 2271 * Note that we allow connect to broadcast and multicast 2272 * addresses when ire_requested is set. Thus the ULP 2273 * has to check for IRE_BROADCAST and multicast. 2274 */ 2275 ac6 = (ipa6_conn_t *)ucp; 2276 v6srcp = &ac6->ac6_laddr; 2277 v6dstp = &ac6->ac6_faddr; 2278 fport = ac6->ac6_fport; 2279 /* For raw socket, the local port is not set. */ 2280 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2281 connp->conn_lport; 2282 local_bind = B_FALSE; 2283 /* Always verify destination reachability. */ 2284 verify_dst = B_TRUE; 2285 break; 2286 2287 case sizeof (ipa6_conn_x_t): 2288 /* 2289 * Verify that the source address is valid. 2290 * Note that we allow connect to broadcast and multicast 2291 * addresses when ire_requested is set. Thus the ULP 2292 * has to check for IRE_BROADCAST and multicast. 2293 */ 2294 acx6 = (ipa6_conn_x_t *)ucp; 2295 ac6 = &acx6->ac6x_conn; 2296 v6srcp = &ac6->ac6_laddr; 2297 v6dstp = &ac6->ac6_faddr; 2298 fport = ac6->ac6_fport; 2299 lport = ac6->ac6_lport; 2300 local_bind = B_FALSE; 2301 /* 2302 * Client that passed ipa6_conn_x_t to us specifies whether to 2303 * verify destination reachability. 2304 */ 2305 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2306 break; 2307 } 2308 if (local_bind) { 2309 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2310 /* Bind to IPv4 address */ 2311 ipaddr_t v4src; 2312 2313 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2314 2315 error = ip_bind_laddr(connp, mp, v4src, lport, 2316 ire_requested, ipsec_policy_set, 2317 tbr->ADDR_length != IPV6_ADDR_LEN); 2318 if (error != 0) 2319 goto bad_addr; 2320 connp->conn_pkt_isv6 = B_FALSE; 2321 } else { 2322 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2323 error = 0; 2324 goto bad_addr; 2325 } 2326 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2327 ire_requested, ipsec_policy_set, 2328 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2329 if (error != 0) 2330 goto bad_addr; 2331 connp->conn_pkt_isv6 = B_TRUE; 2332 } 2333 } else { 2334 /* 2335 * Bind to local and remote address. Local might be 2336 * unspecified in which case it will be extracted from 2337 * ire_src_addr_v6 2338 */ 2339 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2340 /* Connect to IPv4 address */ 2341 ipaddr_t v4src; 2342 ipaddr_t v4dst; 2343 2344 /* Is the source unspecified or mapped? */ 2345 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2346 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2347 ip1dbg(("ip_bind_v6: " 2348 "dst is mapped, but not the src\n")); 2349 goto bad_addr; 2350 } 2351 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2352 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2353 2354 /* 2355 * XXX Fix needed. Need to pass ipsec_policy_set 2356 * instead of B_FALSE. 2357 */ 2358 2359 /* Always verify destination reachability. */ 2360 error = ip_bind_connected(connp, mp, &v4src, lport, 2361 v4dst, fport, ire_requested, ipsec_policy_set, 2362 B_TRUE, B_TRUE); 2363 if (error != 0) 2364 goto bad_addr; 2365 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2366 connp->conn_pkt_isv6 = B_FALSE; 2367 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2368 ip1dbg(("ip_bind_v6: " 2369 "src is mapped, but not the dst\n")); 2370 goto bad_addr; 2371 } else { 2372 error = ip_bind_connected_v6(connp, mp, v6srcp, 2373 lport, v6dstp, ipp, fport, ire_requested, 2374 ipsec_policy_set, B_TRUE, verify_dst); 2375 if (error != 0) 2376 goto bad_addr; 2377 connp->conn_pkt_isv6 = B_TRUE; 2378 } 2379 } 2380 /* Update qinfo if v4/v6 changed */ 2381 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2382 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2383 if (connp->conn_pkt_isv6) 2384 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2385 else 2386 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2387 } 2388 2389 /* 2390 * Pass the IPSEC headers size in ire_ipsec_overhead. 2391 * We can't do this in ip_bind_insert_ire because the policy 2392 * may not have been inherited at that point in time and hence 2393 * conn_out_enforce_policy may not be set. 2394 */ 2395 mp1 = mp->b_cont; 2396 if (ire_requested && connp->conn_out_enforce_policy && 2397 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2398 ire_t *ire = (ire_t *)mp1->b_rptr; 2399 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2400 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2401 } 2402 2403 /* Send it home. */ 2404 mp->b_datap->db_type = M_PCPROTO; 2405 tbr->PRIM_type = T_BIND_ACK; 2406 return (mp); 2407 2408 bad_addr: 2409 if (error == EINPROGRESS) 2410 return (NULL); 2411 if (error > 0) 2412 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2413 else 2414 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2415 return (mp); 2416 } 2417 2418 /* 2419 * Here address is verified to be a valid local address. 2420 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2421 * address is also considered a valid local address. 2422 * In the case of a multicast address, however, the 2423 * upper protocol is expected to reset the src address 2424 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2425 * no packets are emitted with multicast address as 2426 * source address. 2427 * The addresses valid for bind are: 2428 * (1) - in6addr_any 2429 * (2) - IP address of an UP interface 2430 * (3) - IP address of a DOWN interface 2431 * (4) - a multicast address. In this case 2432 * the conn will only receive packets destined to 2433 * the specified multicast address. Note: the 2434 * application still has to issue an 2435 * IPV6_JOIN_GROUP socket option. 2436 * 2437 * In all the above cases, the bound address must be valid in the current zone. 2438 * When the address is loopback or multicast, there might be many matching IREs 2439 * so bind has to look up based on the zone. 2440 */ 2441 static int 2442 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2443 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2444 boolean_t fanout_insert) 2445 { 2446 int error = 0; 2447 ire_t *src_ire = NULL; 2448 ipif_t *ipif = NULL; 2449 mblk_t *policy_mp; 2450 zoneid_t zoneid; 2451 2452 if (ipsec_policy_set) 2453 policy_mp = mp->b_cont; 2454 2455 /* 2456 * If it was previously connected, conn_fully_bound would have 2457 * been set. 2458 */ 2459 connp->conn_fully_bound = B_FALSE; 2460 2461 zoneid = connp->conn_zoneid; 2462 2463 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2464 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2465 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2466 /* 2467 * If an address other than in6addr_any is requested, 2468 * we verify that it is a valid address for bind 2469 * Note: Following code is in if-else-if form for 2470 * readability compared to a condition check. 2471 */ 2472 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2473 if (IRE_IS_LOCAL(src_ire)) { 2474 /* 2475 * (2) Bind to address of local UP interface 2476 */ 2477 ipif = src_ire->ire_ipif; 2478 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2479 ipif_t *multi_ipif = NULL; 2480 ire_t *save_ire; 2481 /* 2482 * (4) bind to multicast address. 2483 * Fake out the IRE returned to upper 2484 * layer to be a broadcast IRE in 2485 * ip_bind_insert_ire_v6(). 2486 * Pass other information that matches 2487 * the ipif (e.g. the source address). 2488 * conn_multicast_ill is only used for 2489 * IPv6 packets 2490 */ 2491 mutex_enter(&connp->conn_lock); 2492 if (connp->conn_multicast_ill != NULL) { 2493 (void) ipif_lookup_zoneid( 2494 connp->conn_multicast_ill, zoneid, 0, 2495 &multi_ipif); 2496 } else { 2497 /* 2498 * Look for default like 2499 * ip_wput_v6 2500 */ 2501 multi_ipif = ipif_lookup_group_v6( 2502 &ipv6_unspecified_group, zoneid); 2503 } 2504 mutex_exit(&connp->conn_lock); 2505 save_ire = src_ire; 2506 src_ire = NULL; 2507 if (multi_ipif == NULL || !ire_requested || 2508 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2509 src_ire = save_ire; 2510 error = EADDRNOTAVAIL; 2511 } else { 2512 ASSERT(src_ire != NULL); 2513 if (save_ire != NULL) 2514 ire_refrele(save_ire); 2515 } 2516 if (multi_ipif != NULL) 2517 ipif_refrele(multi_ipif); 2518 } else { 2519 *mp->b_wptr++ = (char)connp->conn_ulp; 2520 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2521 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2522 if (ipif == NULL) { 2523 if (error == EINPROGRESS) { 2524 if (src_ire != NULL) 2525 ire_refrele(src_ire); 2526 return (error); 2527 } 2528 /* 2529 * Not a valid address for bind 2530 */ 2531 error = EADDRNOTAVAIL; 2532 } else { 2533 ipif_refrele(ipif); 2534 } 2535 /* 2536 * Just to keep it consistent with the processing in 2537 * ip_bind_v6(). 2538 */ 2539 mp->b_wptr--; 2540 } 2541 2542 if (error != 0) { 2543 /* Red Alert! Attempting to be a bogon! */ 2544 if (ip_debug > 2) { 2545 /* ip1dbg */ 2546 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2547 " address %s\n", AF_INET6, v6src); 2548 } 2549 goto bad_addr; 2550 } 2551 } 2552 2553 /* 2554 * Allow setting new policies. For example, disconnects come 2555 * down as ipa_t bind. As we would have set conn_policy_cached 2556 * to B_TRUE before, we should set it to B_FALSE, so that policy 2557 * can change after the disconnect. 2558 */ 2559 connp->conn_policy_cached = B_FALSE; 2560 2561 /* If not fanout_insert this was just an address verification */ 2562 if (fanout_insert) { 2563 /* 2564 * The addresses have been verified. Time to insert in 2565 * the correct fanout list. 2566 */ 2567 connp->conn_srcv6 = *v6src; 2568 connp->conn_remv6 = ipv6_all_zeros; 2569 connp->conn_lport = lport; 2570 connp->conn_fport = 0; 2571 2572 /* 2573 * We need to make sure that the conn_recv is set to a non-null 2574 * value before we insert the conn_t into the classifier table. 2575 * This is to avoid a race with an incoming packet which does 2576 * an ipcl_classify(). 2577 */ 2578 if (*mp->b_wptr == IPPROTO_TCP) 2579 connp->conn_recv = tcp_conn_request; 2580 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2581 } 2582 if (error == 0) { 2583 if (ire_requested) { 2584 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2585 error = -1; 2586 goto bad_addr; 2587 } 2588 } else if (ipsec_policy_set) { 2589 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2590 error = -1; 2591 goto bad_addr; 2592 } 2593 } 2594 } else if (connp->conn_ulp == IPPROTO_TCP) { 2595 connp->conn_recv = tcp_input; 2596 } 2597 bad_addr: 2598 if (error != 0) { 2599 if (connp->conn_anon_port) { 2600 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2601 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2602 B_FALSE); 2603 } 2604 connp->conn_mlp_type = mlptSingle; 2605 } 2606 2607 if (src_ire != NULL) 2608 ire_refrele(src_ire); 2609 2610 if (ipsec_policy_set) { 2611 ASSERT(policy_mp != NULL); 2612 freeb(policy_mp); 2613 /* 2614 * As of now assume that nothing else accompanies 2615 * IPSEC_POLICY_SET. 2616 */ 2617 mp->b_cont = NULL; 2618 } 2619 return (error); 2620 } 2621 2622 /* ARGSUSED */ 2623 static void 2624 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2625 void *dummy_arg) 2626 { 2627 conn_t *connp = NULL; 2628 t_scalar_t prim; 2629 2630 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2631 2632 if (CONN_Q(q)) 2633 connp = Q_TO_CONN(q); 2634 ASSERT(connp != NULL); 2635 2636 prim = ((union T_primitives *)mp->b_rptr)->type; 2637 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2638 2639 if (IPCL_IS_TCP(connp)) { 2640 /* Pass sticky_ipp for scope_id and pktinfo */ 2641 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2642 } else { 2643 /* For UDP and ICMP */ 2644 mp = ip_bind_v6(q, mp, connp, NULL); 2645 } 2646 if (mp != NULL) { 2647 if (IPCL_IS_TCP(connp)) { 2648 CONN_INC_REF(connp); 2649 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2650 connp, SQTAG_TCP_RPUTOTHER); 2651 } else if (IPCL_IS_UDP(connp)) { 2652 udp_resume_bind(connp, mp); 2653 } else { 2654 qreply(q, mp); 2655 CONN_OPER_PENDING_DONE(connp); 2656 } 2657 } 2658 } 2659 2660 /* 2661 * Verify that both the source and destination addresses 2662 * are valid. If verify_dst, then destination address must also be reachable, 2663 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2664 * It takes ip6_pkt_t * as one of the arguments to determine correct 2665 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2666 * destination address. Note that parameter ipp is only useful for TCP connect 2667 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2668 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2669 * 2670 */ 2671 static int 2672 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2673 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2674 boolean_t ire_requested, boolean_t ipsec_policy_set, 2675 boolean_t fanout_insert, boolean_t verify_dst) 2676 { 2677 ire_t *src_ire; 2678 ire_t *dst_ire; 2679 int error = 0; 2680 int protocol; 2681 mblk_t *policy_mp; 2682 ire_t *sire = NULL; 2683 ire_t *md_dst_ire = NULL; 2684 ill_t *md_ill = NULL; 2685 ill_t *dst_ill = NULL; 2686 ipif_t *src_ipif = NULL; 2687 zoneid_t zoneid; 2688 boolean_t ill_held = B_FALSE; 2689 2690 src_ire = dst_ire = NULL; 2691 /* 2692 * NOTE: The protocol is beyond the wptr because that's how 2693 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2694 */ 2695 protocol = *mp->b_wptr & 0xFF; 2696 2697 /* 2698 * If we never got a disconnect before, clear it now. 2699 */ 2700 connp->conn_fully_bound = B_FALSE; 2701 2702 if (ipsec_policy_set) { 2703 policy_mp = mp->b_cont; 2704 } 2705 2706 zoneid = connp->conn_zoneid; 2707 2708 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2709 ipif_t *ipif; 2710 2711 /* 2712 * Use an "emulated" IRE_BROADCAST to tell the transport it 2713 * is a multicast. 2714 * Pass other information that matches 2715 * the ipif (e.g. the source address). 2716 * 2717 * conn_multicast_ill is only used for IPv6 packets 2718 */ 2719 mutex_enter(&connp->conn_lock); 2720 if (connp->conn_multicast_ill != NULL) { 2721 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2722 zoneid, 0, &ipif); 2723 } else { 2724 /* Look for default like ip_wput_v6 */ 2725 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2726 } 2727 mutex_exit(&connp->conn_lock); 2728 if (ipif == NULL || !ire_requested || 2729 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2730 if (ipif != NULL) 2731 ipif_refrele(ipif); 2732 if (ip_debug > 2) { 2733 /* ip1dbg */ 2734 pr_addr_dbg("ip_bind_connected_v6: bad " 2735 "connected multicast %s\n", AF_INET6, 2736 v6dst); 2737 } 2738 error = ENETUNREACH; 2739 goto bad_addr; 2740 } 2741 if (ipif != NULL) 2742 ipif_refrele(ipif); 2743 } else { 2744 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2745 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2746 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2747 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR); 2748 /* 2749 * We also prevent ire's with src address INADDR_ANY to 2750 * be used, which are created temporarily for 2751 * sending out packets from endpoints that have 2752 * conn_unspec_src set. 2753 */ 2754 if (dst_ire == NULL || 2755 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2756 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2757 /* 2758 * When verifying destination reachability, we always 2759 * complain. 2760 * 2761 * When not verifying destination reachability but we 2762 * found an IRE, i.e. the destination is reachable, 2763 * then the other tests still apply and we complain. 2764 */ 2765 if (verify_dst || (dst_ire != NULL)) { 2766 if (ip_debug > 2) { 2767 /* ip1dbg */ 2768 pr_addr_dbg("ip_bind_connected_v6: bad" 2769 " connected dst %s\n", AF_INET6, 2770 v6dst); 2771 } 2772 if (dst_ire == NULL || 2773 !(dst_ire->ire_type & IRE_HOST)) { 2774 error = ENETUNREACH; 2775 } else { 2776 error = EHOSTUNREACH; 2777 } 2778 goto bad_addr; 2779 } 2780 } 2781 } 2782 2783 /* 2784 * We now know that routing will allow us to reach the destination. 2785 * Check whether Trusted Solaris policy allows communication with this 2786 * host, and pretend that the destination is unreachable if not. 2787 * 2788 * This is never a problem for TCP, since that transport is known to 2789 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2790 * handling. If the remote is unreachable, it will be detected at that 2791 * point, so there's no reason to check it here. 2792 * 2793 * Note that for sendto (and other datagram-oriented friends), this 2794 * check is done as part of the data path label computation instead. 2795 * The check here is just to make non-TCP connect() report the right 2796 * error. 2797 */ 2798 if (dst_ire != NULL && is_system_labeled() && 2799 !IPCL_IS_TCP(connp) && 2800 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2801 connp->conn_mac_exempt) != 0) { 2802 error = EHOSTUNREACH; 2803 if (ip_debug > 2) { 2804 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2805 AF_INET6, v6dst); 2806 } 2807 goto bad_addr; 2808 } 2809 2810 /* 2811 * If the app does a connect(), it means that it will most likely 2812 * send more than 1 packet to the destination. It makes sense 2813 * to clear the temporary flag. 2814 */ 2815 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2816 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2817 irb_t *irb = dst_ire->ire_bucket; 2818 2819 rw_enter(&irb->irb_lock, RW_WRITER); 2820 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2821 irb->irb_tmp_ire_cnt--; 2822 rw_exit(&irb->irb_lock); 2823 } 2824 2825 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2826 2827 /* 2828 * See if we should notify ULP about MDT; we do this whether or not 2829 * ire_requested is TRUE, in order to handle active connects; MDT 2830 * eligibility tests for passive connects are handled separately 2831 * through tcp_adapt_ire(). We do this before the source address 2832 * selection, because dst_ire may change after a call to 2833 * ipif_select_source_v6(). This is a best-effort check, as the 2834 * packet for this connection may not actually go through 2835 * dst_ire->ire_stq, and the exact IRE can only be known after 2836 * calling ip_newroute_v6(). This is why we further check on the 2837 * IRE during Multidata packet transmission in tcp_multisend(). 2838 */ 2839 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2840 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2841 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2842 ILL_MDT_CAPABLE(md_ill)) { 2843 md_dst_ire = dst_ire; 2844 IRE_REFHOLD(md_dst_ire); 2845 } 2846 2847 if (dst_ire != NULL && 2848 dst_ire->ire_type == IRE_LOCAL && 2849 dst_ire->ire_zoneid != zoneid && 2850 dst_ire->ire_zoneid != ALL_ZONES) { 2851 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2852 zoneid, 0, NULL, 2853 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2854 MATCH_IRE_RJ_BHOLE); 2855 if (src_ire == NULL) { 2856 error = EHOSTUNREACH; 2857 goto bad_addr; 2858 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2859 if (!(src_ire->ire_type & IRE_HOST)) 2860 error = ENETUNREACH; 2861 else 2862 error = EHOSTUNREACH; 2863 goto bad_addr; 2864 } 2865 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2866 src_ipif = src_ire->ire_ipif; 2867 ipif_refhold(src_ipif); 2868 *v6src = src_ipif->ipif_v6lcl_addr; 2869 } 2870 ire_refrele(src_ire); 2871 src_ire = NULL; 2872 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2873 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2874 *v6src = sire->ire_src_addr_v6; 2875 ire_refrele(dst_ire); 2876 dst_ire = sire; 2877 sire = NULL; 2878 } else if (dst_ire->ire_type == IRE_CACHE && 2879 (dst_ire->ire_flags & RTF_SETSRC)) { 2880 ASSERT(dst_ire->ire_zoneid == zoneid || 2881 dst_ire->ire_zoneid == ALL_ZONES); 2882 *v6src = dst_ire->ire_src_addr_v6; 2883 } else { 2884 /* 2885 * Pick a source address so that a proper inbound load 2886 * spreading would happen. Use dst_ill specified by the 2887 * app. when socket option or scopeid is set. 2888 */ 2889 int err; 2890 2891 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2892 uint_t if_index; 2893 2894 /* 2895 * Scope id or IPV6_PKTINFO 2896 */ 2897 2898 if_index = ipp->ipp_ifindex; 2899 dst_ill = ill_lookup_on_ifindex( 2900 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2901 if (dst_ill == NULL) { 2902 ip1dbg(("ip_bind_connected_v6:" 2903 " bad ifindex %d\n", if_index)); 2904 error = EADDRNOTAVAIL; 2905 goto bad_addr; 2906 } 2907 ill_held = B_TRUE; 2908 } else if (connp->conn_outgoing_ill != NULL) { 2909 /* 2910 * For IPV6_BOUND_IF socket option, 2911 * conn_outgoing_ill should be set 2912 * already in TCP or UDP/ICMP. 2913 */ 2914 dst_ill = conn_get_held_ill(connp, 2915 &connp->conn_outgoing_ill, &err); 2916 if (err == ILL_LOOKUP_FAILED) { 2917 ip1dbg(("ip_bind_connected_v6:" 2918 "no ill for bound_if\n")); 2919 error = EADDRNOTAVAIL; 2920 goto bad_addr; 2921 } 2922 ill_held = B_TRUE; 2923 } else if (dst_ire->ire_stq != NULL) { 2924 /* No need to hold ill here */ 2925 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2926 } else { 2927 /* No need to hold ill here */ 2928 dst_ill = dst_ire->ire_ipif->ipif_ill; 2929 } 2930 if (!ip6_asp_can_lookup()) { 2931 *mp->b_wptr++ = (char)protocol; 2932 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2933 ip_bind_connected_resume_v6); 2934 error = EINPROGRESS; 2935 goto refrele_and_quit; 2936 } 2937 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2938 RESTRICT_TO_NONE, connp->conn_src_preferences, 2939 zoneid); 2940 ip6_asp_table_refrele(); 2941 if (src_ipif == NULL) { 2942 pr_addr_dbg("ip_bind_connected_v6: " 2943 "no usable source address for " 2944 "connection to %s\n", AF_INET6, v6dst); 2945 error = EADDRNOTAVAIL; 2946 goto bad_addr; 2947 } 2948 *v6src = src_ipif->ipif_v6lcl_addr; 2949 } 2950 } 2951 2952 /* 2953 * We do ire_route_lookup_v6() here (and not an interface lookup) 2954 * as we assert that v6src should only come from an 2955 * UP interface for hard binding. 2956 */ 2957 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2958 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2959 2960 /* src_ire must be a local|loopback */ 2961 if (!IRE_IS_LOCAL(src_ire)) { 2962 if (ip_debug > 2) { 2963 /* ip1dbg */ 2964 pr_addr_dbg("ip_bind_connected_v6: bad " 2965 "connected src %s\n", AF_INET6, v6src); 2966 } 2967 error = EADDRNOTAVAIL; 2968 goto bad_addr; 2969 } 2970 2971 /* 2972 * If the source address is a loopback address, the 2973 * destination had best be local or multicast. 2974 * The transports that can't handle multicast will reject 2975 * those addresses. 2976 */ 2977 if (src_ire->ire_type == IRE_LOOPBACK && 2978 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2979 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2980 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2981 error = -1; 2982 goto bad_addr; 2983 } 2984 /* 2985 * Allow setting new policies. For example, disconnects come 2986 * down as ipa_t bind. As we would have set conn_policy_cached 2987 * to B_TRUE before, we should set it to B_FALSE, so that policy 2988 * can change after the disconnect. 2989 */ 2990 connp->conn_policy_cached = B_FALSE; 2991 2992 /* 2993 * The addresses have been verified. Initialize the conn 2994 * before calling the policy as they expect the conns 2995 * initialized. 2996 */ 2997 connp->conn_srcv6 = *v6src; 2998 connp->conn_remv6 = *v6dst; 2999 connp->conn_lport = lport; 3000 connp->conn_fport = fport; 3001 3002 ASSERT(!(ipsec_policy_set && ire_requested)); 3003 if (ire_requested) { 3004 iulp_t *ulp_info = NULL; 3005 3006 /* 3007 * Note that sire will not be NULL if this is an off-link 3008 * connection and there is not cache for that dest yet. 3009 * 3010 * XXX Because of an existing bug, if there are multiple 3011 * default routes, the IRE returned now may not be the actual 3012 * default route used (default routes are chosen in a 3013 * round robin fashion). So if the metrics for different 3014 * default routes are different, we may return the wrong 3015 * metrics. This will not be a problem if the existing 3016 * bug is fixed. 3017 */ 3018 if (sire != NULL) 3019 ulp_info = &(sire->ire_uinfo); 3020 3021 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 3022 error = -1; 3023 goto bad_addr; 3024 } 3025 } else if (ipsec_policy_set) { 3026 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 3027 error = -1; 3028 goto bad_addr; 3029 } 3030 } 3031 3032 /* 3033 * Cache IPsec policy in this conn. If we have per-socket policy, 3034 * we'll cache that. If we don't, we'll inherit global policy. 3035 * 3036 * We can't insert until the conn reflects the policy. Note that 3037 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 3038 * connections where we don't have a policy. This is to prevent 3039 * global policy lookups in the inbound path. 3040 * 3041 * If we insert before we set conn_policy_cached, 3042 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 3043 * because global policy cound be non-empty. We normally call 3044 * ipsec_check_policy() for conn_policy_cached connections only if 3045 * conn_in_enforce_policy is set. But in this case, 3046 * conn_policy_cached can get set anytime since we made the 3047 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 3048 * is called, which will make the above assumption false. Thus, we 3049 * need to insert after we set conn_policy_cached. 3050 */ 3051 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 3052 goto bad_addr; 3053 3054 /* If not fanout_insert this was just an address verification */ 3055 if (fanout_insert) { 3056 /* 3057 * The addresses have been verified. Time to insert in 3058 * the correct fanout list. 3059 * We need to make sure that the conn_recv is set to a non-null 3060 * value before we insert the conn_t into the classifier table. 3061 * This is to avoid a race with an incoming packet which does 3062 * an ipcl_classify(). 3063 */ 3064 if (protocol == IPPROTO_TCP) 3065 connp->conn_recv = tcp_input; 3066 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 3067 connp->conn_ports, 3068 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 3069 } 3070 if (error == 0) { 3071 connp->conn_fully_bound = B_TRUE; 3072 /* 3073 * Our initial checks for MDT have passed; the IRE is not 3074 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 3075 * be supporting MDT. Pass the IRE, IPC and ILL into 3076 * ip_mdinfo_return(), which performs further checks 3077 * against them and upon success, returns the MDT info 3078 * mblk which we will attach to the bind acknowledgment. 3079 */ 3080 if (md_dst_ire != NULL) { 3081 mblk_t *mdinfo_mp; 3082 3083 ASSERT(md_ill != NULL); 3084 ASSERT(md_ill->ill_mdt_capab != NULL); 3085 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 3086 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 3087 linkb(mp, mdinfo_mp); 3088 } 3089 } 3090 bad_addr: 3091 if (ipsec_policy_set) { 3092 ASSERT(policy_mp != NULL); 3093 freeb(policy_mp); 3094 /* 3095 * As of now assume that nothing else accompanies 3096 * IPSEC_POLICY_SET. 3097 */ 3098 mp->b_cont = NULL; 3099 } 3100 refrele_and_quit: 3101 if (src_ire != NULL) 3102 IRE_REFRELE(src_ire); 3103 if (dst_ire != NULL) 3104 IRE_REFRELE(dst_ire); 3105 if (sire != NULL) 3106 IRE_REFRELE(sire); 3107 if (src_ipif != NULL) 3108 ipif_refrele(src_ipif); 3109 if (md_dst_ire != NULL) 3110 IRE_REFRELE(md_dst_ire); 3111 if (ill_held && dst_ill != NULL) 3112 ill_refrele(dst_ill); 3113 return (error); 3114 } 3115 3116 /* 3117 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3118 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3119 */ 3120 static boolean_t 3121 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3122 iulp_t *ulp_info) 3123 { 3124 mblk_t *mp1; 3125 ire_t *ret_ire; 3126 3127 mp1 = mp->b_cont; 3128 ASSERT(mp1 != NULL); 3129 3130 if (ire != NULL) { 3131 /* 3132 * mp1 initialized above to IRE_DB_REQ_TYPE 3133 * appended mblk. Its <upper protocol>'s 3134 * job to make sure there is room. 3135 */ 3136 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3137 return (B_FALSE); 3138 3139 mp1->b_datap->db_type = IRE_DB_TYPE; 3140 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3141 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3142 ret_ire = (ire_t *)mp1->b_rptr; 3143 if (IN6_IS_ADDR_MULTICAST(dst) || 3144 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3145 ret_ire->ire_type = IRE_BROADCAST; 3146 ret_ire->ire_addr_v6 = *dst; 3147 } 3148 if (ulp_info != NULL) { 3149 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3150 sizeof (iulp_t)); 3151 } 3152 ret_ire->ire_mp = mp1; 3153 } else { 3154 /* 3155 * No IRE was found. Remove IRE mblk. 3156 */ 3157 mp->b_cont = mp1->b_cont; 3158 freeb(mp1); 3159 } 3160 return (B_TRUE); 3161 } 3162 3163 /* 3164 * Add an ip6i_t header to the front of the mblk. 3165 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3166 * Returns NULL if allocation fails (and frees original message). 3167 * Used in outgoing path when going through ip_newroute_*v6(). 3168 * Used in incoming path to pass ifindex to transports. 3169 */ 3170 mblk_t * 3171 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3172 { 3173 mblk_t *mp1; 3174 ip6i_t *ip6i; 3175 ip6_t *ip6h; 3176 3177 ip6h = (ip6_t *)mp->b_rptr; 3178 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3179 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3180 mp->b_datap->db_ref > 1) { 3181 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3182 if (mp1 == NULL) { 3183 freemsg(mp); 3184 return (NULL); 3185 } 3186 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3187 mp1->b_cont = mp; 3188 mp = mp1; 3189 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3190 } 3191 mp->b_rptr = (uchar_t *)ip6i; 3192 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3193 ip6i->ip6i_nxt = IPPROTO_RAW; 3194 if (ill != NULL) { 3195 ip6i->ip6i_flags = IP6I_IFINDEX; 3196 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3197 } else { 3198 ip6i->ip6i_flags = 0; 3199 } 3200 ip6i->ip6i_nexthop = *dst; 3201 return (mp); 3202 } 3203 3204 /* 3205 * Handle protocols with which IP is less intimate. There 3206 * can be more than one stream bound to a particular 3207 * protocol. When this is the case, normally each one gets a copy 3208 * of any incoming packets. 3209 * However, if the packet was tunneled and not multicast we only send to it 3210 * the first match. 3211 * 3212 * Zones notes: 3213 * Packets will be distributed to streams in all zones. This is really only 3214 * useful for ICMPv6 as only applications in the global zone can create raw 3215 * sockets for other protocols. 3216 */ 3217 static void 3218 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3219 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3220 boolean_t mctl_present, zoneid_t zoneid) 3221 { 3222 queue_t *rq; 3223 mblk_t *mp1, *first_mp1; 3224 in6_addr_t dst = ip6h->ip6_dst; 3225 in6_addr_t src = ip6h->ip6_src; 3226 boolean_t one_only; 3227 mblk_t *first_mp = mp; 3228 boolean_t secure, shared_addr; 3229 conn_t *connp, *first_connp, *next_connp; 3230 connf_t *connfp; 3231 3232 if (mctl_present) { 3233 mp = first_mp->b_cont; 3234 secure = ipsec_in_is_secure(first_mp); 3235 ASSERT(mp != NULL); 3236 } else { 3237 secure = B_FALSE; 3238 } 3239 3240 /* 3241 * If the packet was tunneled and not multicast we only send to it 3242 * the first match. 3243 */ 3244 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3245 !IN6_IS_ADDR_MULTICAST(&dst)); 3246 3247 shared_addr = (zoneid == ALL_ZONES); 3248 if (shared_addr) { 3249 /* 3250 * We don't allow multilevel ports for raw IP, so no need to 3251 * check for that here. 3252 */ 3253 zoneid = tsol_packet_to_zoneid(mp); 3254 } 3255 3256 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3257 mutex_enter(&connfp->connf_lock); 3258 connp = connfp->connf_head; 3259 for (connp = connfp->connf_head; connp != NULL; 3260 connp = connp->conn_next) { 3261 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3262 zoneid) && 3263 (!is_system_labeled() || 3264 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3265 connp))) 3266 break; 3267 } 3268 3269 if (connp == NULL || connp->conn_upq == NULL) { 3270 /* 3271 * No one bound to this port. Is 3272 * there a client that wants all 3273 * unclaimed datagrams? 3274 */ 3275 mutex_exit(&connfp->connf_lock); 3276 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3277 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3278 nexthdr_offset, mctl_present, zoneid)) { 3279 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3280 } 3281 3282 return; 3283 } 3284 3285 CONN_INC_REF(connp); 3286 first_connp = connp; 3287 3288 /* 3289 * XXX: Fix the multiple protocol listeners case. We should not 3290 * be walking the conn->next list here. 3291 */ 3292 if (one_only) { 3293 /* 3294 * Only send message to one tunnel driver by immediately 3295 * terminating the loop. 3296 */ 3297 connp = NULL; 3298 } else { 3299 connp = connp->conn_next; 3300 3301 } 3302 for (;;) { 3303 while (connp != NULL) { 3304 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3305 flags, zoneid) && 3306 (!is_system_labeled() || 3307 tsol_receive_local(mp, &dst, IPV6_VERSION, 3308 shared_addr, connp))) 3309 break; 3310 connp = connp->conn_next; 3311 } 3312 3313 /* 3314 * Just copy the data part alone. The mctl part is 3315 * needed just for verifying policy and it is never 3316 * sent up. 3317 */ 3318 if (connp == NULL || connp->conn_upq == NULL || 3319 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3320 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3321 /* 3322 * No more intested clients or memory 3323 * allocation failed 3324 */ 3325 connp = first_connp; 3326 break; 3327 } 3328 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3329 CONN_INC_REF(connp); 3330 mutex_exit(&connfp->connf_lock); 3331 rq = connp->conn_rq; 3332 /* 3333 * For link-local always add ifindex so that transport can set 3334 * sin6_scope_id. Avoid it for ICMP error fanout. 3335 */ 3336 if ((connp->conn_ipv6_recvpktinfo || 3337 IN6_IS_ADDR_LINKLOCAL(&src)) && 3338 (flags & IP_FF_IP6INFO)) { 3339 /* Add header */ 3340 mp1 = ip_add_info_v6(mp1, inill, &dst); 3341 } 3342 if (mp1 == NULL) { 3343 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3344 } else if (!canputnext(rq)) { 3345 if (flags & IP_FF_RAWIP) { 3346 BUMP_MIB(ill->ill_ip_mib, 3347 rawipIfStatsInOverflows); 3348 } else { 3349 BUMP_MIB(ill->ill_icmp6_mib, 3350 ipv6IfIcmpInOverflows); 3351 } 3352 3353 freemsg(mp1); 3354 } else { 3355 /* 3356 * Don't enforce here if we're a tunnel - let "tun" do 3357 * it instead. 3358 */ 3359 if (!IPCL_IS_IPTUN(connp) && 3360 (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure)) { 3361 first_mp1 = ipsec_check_inbound_policy 3362 (first_mp1, connp, NULL, ip6h, 3363 mctl_present); 3364 } 3365 if (first_mp1 != NULL) { 3366 if (mctl_present) 3367 freeb(first_mp1); 3368 BUMP_MIB(ill->ill_ip_mib, 3369 ipIfStatsHCInDelivers); 3370 putnext(rq, mp1); 3371 } 3372 } 3373 mutex_enter(&connfp->connf_lock); 3374 /* Follow the next pointer before releasing the conn. */ 3375 next_connp = connp->conn_next; 3376 CONN_DEC_REF(connp); 3377 connp = next_connp; 3378 } 3379 3380 /* Last one. Send it upstream. */ 3381 mutex_exit(&connfp->connf_lock); 3382 3383 /* Initiate IPPF processing */ 3384 if (IP6_IN_IPP(flags)) { 3385 uint_t ifindex; 3386 3387 mutex_enter(&ill->ill_lock); 3388 ifindex = ill->ill_phyint->phyint_ifindex; 3389 mutex_exit(&ill->ill_lock); 3390 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3391 if (mp == NULL) { 3392 CONN_DEC_REF(connp); 3393 if (mctl_present) 3394 freeb(first_mp); 3395 return; 3396 } 3397 } 3398 3399 /* 3400 * For link-local always add ifindex so that transport can set 3401 * sin6_scope_id. Avoid it for ICMP error fanout. 3402 */ 3403 if ((connp->conn_ipv6_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3404 (flags & IP_FF_IP6INFO)) { 3405 /* Add header */ 3406 mp = ip_add_info_v6(mp, inill, &dst); 3407 if (mp == NULL) { 3408 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3409 CONN_DEC_REF(connp); 3410 if (mctl_present) 3411 freeb(first_mp); 3412 return; 3413 } else if (mctl_present) { 3414 first_mp->b_cont = mp; 3415 } else { 3416 first_mp = mp; 3417 } 3418 } 3419 3420 rq = connp->conn_rq; 3421 if (!canputnext(rq)) { 3422 if (flags & IP_FF_RAWIP) { 3423 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3424 } else { 3425 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3426 } 3427 3428 freemsg(first_mp); 3429 } else { 3430 if (IPCL_IS_IPTUN(connp)) { 3431 /* 3432 * Tunneled packet. We enforce policy in the tunnel 3433 * module itself. 3434 * 3435 * Send the WHOLE packet up (incl. IPSEC_IN) without 3436 * a policy check. 3437 */ 3438 putnext(rq, first_mp); 3439 CONN_DEC_REF(connp); 3440 return; 3441 } 3442 /* 3443 * Don't enforce here if we're a tunnel - let "tun" do 3444 * it instead. 3445 */ 3446 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3447 (CONN_INBOUND_POLICY_PRESENT(connp) || secure)) { 3448 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3449 NULL, ip6h, mctl_present); 3450 if (first_mp == NULL) { 3451 CONN_DEC_REF(connp); 3452 return; 3453 } 3454 } 3455 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3456 putnext(rq, mp); 3457 if (mctl_present) 3458 freeb(first_mp); 3459 } 3460 CONN_DEC_REF(connp); 3461 } 3462 3463 /* 3464 * Send an ICMP error after patching up the packet appropriately. Returns 3465 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3466 */ 3467 int 3468 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3469 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3470 boolean_t mctl_present, zoneid_t zoneid) 3471 { 3472 ip6_t *ip6h; 3473 mblk_t *first_mp; 3474 boolean_t secure; 3475 unsigned char db_type; 3476 3477 first_mp = mp; 3478 if (mctl_present) { 3479 mp = mp->b_cont; 3480 secure = ipsec_in_is_secure(first_mp); 3481 ASSERT(mp != NULL); 3482 } else { 3483 /* 3484 * If this is an ICMP error being reported - which goes 3485 * up as M_CTLs, we need to convert them to M_DATA till 3486 * we finish checking with global policy because 3487 * ipsec_check_global_policy() assumes M_DATA as clear 3488 * and M_CTL as secure. 3489 */ 3490 db_type = mp->b_datap->db_type; 3491 mp->b_datap->db_type = M_DATA; 3492 secure = B_FALSE; 3493 } 3494 /* 3495 * We are generating an icmp error for some inbound packet. 3496 * Called from all ip_fanout_(udp, tcp, proto) functions. 3497 * Before we generate an error, check with global policy 3498 * to see whether this is allowed to enter the system. As 3499 * there is no "conn", we are checking with global policy. 3500 */ 3501 ip6h = (ip6_t *)mp->b_rptr; 3502 if (secure || ipsec_inbound_v6_policy_present) { 3503 first_mp = ipsec_check_global_policy(first_mp, NULL, 3504 NULL, ip6h, mctl_present); 3505 if (first_mp == NULL) 3506 return (0); 3507 } 3508 3509 if (!mctl_present) 3510 mp->b_datap->db_type = db_type; 3511 3512 if (flags & IP_FF_SEND_ICMP) { 3513 if (flags & IP_FF_HDR_COMPLETE) { 3514 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3515 freemsg(first_mp); 3516 return (1); 3517 } 3518 } 3519 switch (icmp_type) { 3520 case ICMP6_DST_UNREACH: 3521 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3522 B_FALSE, B_FALSE, zoneid); 3523 break; 3524 case ICMP6_PARAM_PROB: 3525 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3526 nexthdr_offset, B_FALSE, B_FALSE, zoneid); 3527 break; 3528 default: 3529 #ifdef DEBUG 3530 panic("ip_fanout_send_icmp_v6: wrong type"); 3531 /*NOTREACHED*/ 3532 #else 3533 freemsg(first_mp); 3534 break; 3535 #endif 3536 } 3537 } else { 3538 freemsg(first_mp); 3539 return (0); 3540 } 3541 3542 return (1); 3543 } 3544 3545 3546 /* 3547 * Fanout for TCP packets 3548 * The caller puts <fport, lport> in the ports parameter. 3549 */ 3550 static void 3551 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3552 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3553 { 3554 mblk_t *first_mp; 3555 boolean_t secure; 3556 conn_t *connp; 3557 tcph_t *tcph; 3558 boolean_t syn_present = B_FALSE; 3559 3560 first_mp = mp; 3561 if (mctl_present) { 3562 mp = first_mp->b_cont; 3563 secure = ipsec_in_is_secure(first_mp); 3564 ASSERT(mp != NULL); 3565 } else { 3566 secure = B_FALSE; 3567 } 3568 3569 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3570 3571 if (connp == NULL || 3572 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3573 /* 3574 * No hard-bound match. Send Reset. 3575 */ 3576 dblk_t *dp = mp->b_datap; 3577 uint32_t ill_index; 3578 3579 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3580 3581 /* Initiate IPPf processing, if needed. */ 3582 if (IPP_ENABLED(IPP_LOCAL_IN) && (flags & IP6_NO_IPPOLICY)) { 3583 ill_index = ill->ill_phyint->phyint_ifindex; 3584 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3585 if (first_mp == NULL) { 3586 if (connp != NULL) 3587 CONN_DEC_REF(connp); 3588 return; 3589 } 3590 } 3591 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3592 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid); 3593 if (connp != NULL) 3594 CONN_DEC_REF(connp); 3595 return; 3596 } 3597 3598 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3599 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3600 if (connp->conn_flags & IPCL_TCP) { 3601 squeue_t *sqp; 3602 3603 /* 3604 * For fused tcp loopback, assign the eager's 3605 * squeue to be that of the active connect's. 3606 */ 3607 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3608 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3609 !IP6_IN_IPP(flags)) { 3610 ASSERT(Q_TO_CONN(q) != NULL); 3611 sqp = Q_TO_CONN(q)->conn_sqp; 3612 } else { 3613 sqp = IP_SQUEUE_GET(lbolt); 3614 } 3615 3616 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3617 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3618 3619 /* 3620 * db_cksumstuff is unused in the incoming 3621 * path; Thus store the ifindex here. It will 3622 * be cleared in tcp_conn_create_v6(). 3623 */ 3624 DB_CKSUMSTUFF(mp) = 3625 (intptr_t)ill->ill_phyint->phyint_ifindex; 3626 syn_present = B_TRUE; 3627 } 3628 } 3629 3630 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3631 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3632 if ((flags & TH_RST) || (flags & TH_URG)) { 3633 CONN_DEC_REF(connp); 3634 freemsg(first_mp); 3635 return; 3636 } 3637 if (flags & TH_ACK) { 3638 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid); 3639 CONN_DEC_REF(connp); 3640 return; 3641 } 3642 3643 CONN_DEC_REF(connp); 3644 freemsg(first_mp); 3645 return; 3646 } 3647 3648 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3649 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3650 NULL, ip6h, mctl_present); 3651 if (first_mp == NULL) { 3652 CONN_DEC_REF(connp); 3653 return; 3654 } 3655 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3656 ASSERT(syn_present); 3657 if (mctl_present) { 3658 ASSERT(first_mp != mp); 3659 first_mp->b_datap->db_struioflag |= 3660 STRUIO_POLICY; 3661 } else { 3662 ASSERT(first_mp == mp); 3663 mp->b_datap->db_struioflag &= 3664 ~STRUIO_EAGER; 3665 mp->b_datap->db_struioflag |= 3666 STRUIO_POLICY; 3667 } 3668 } else { 3669 /* 3670 * Discard first_mp early since we're dealing with a 3671 * fully-connected conn_t and tcp doesn't do policy in 3672 * this case. Also, if someone is bound to IPPROTO_TCP 3673 * over raw IP, they don't expect to see a M_CTL. 3674 */ 3675 if (mctl_present) { 3676 freeb(first_mp); 3677 mctl_present = B_FALSE; 3678 } 3679 first_mp = mp; 3680 } 3681 } 3682 3683 /* Initiate IPPF processing */ 3684 if (IP6_IN_IPP(flags)) { 3685 uint_t ifindex; 3686 3687 mutex_enter(&ill->ill_lock); 3688 ifindex = ill->ill_phyint->phyint_ifindex; 3689 mutex_exit(&ill->ill_lock); 3690 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3691 if (mp == NULL) { 3692 CONN_DEC_REF(connp); 3693 if (mctl_present) { 3694 freeb(first_mp); 3695 } 3696 return; 3697 } else if (mctl_present) { 3698 /* 3699 * ip_add_info_v6 might return a new mp. 3700 */ 3701 ASSERT(first_mp != mp); 3702 first_mp->b_cont = mp; 3703 } else { 3704 first_mp = mp; 3705 } 3706 } 3707 3708 /* 3709 * For link-local always add ifindex so that TCP can bind to that 3710 * interface. Avoid it for ICMP error fanout. 3711 */ 3712 if (!syn_present && ((connp->conn_ipv6_recvpktinfo || 3713 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3714 (flags & IP_FF_IP6INFO))) { 3715 /* Add header */ 3716 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3717 if (mp == NULL) { 3718 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3719 CONN_DEC_REF(connp); 3720 if (mctl_present) 3721 freeb(first_mp); 3722 return; 3723 } else if (mctl_present) { 3724 ASSERT(first_mp != mp); 3725 first_mp->b_cont = mp; 3726 } else { 3727 first_mp = mp; 3728 } 3729 } 3730 3731 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3732 if (IPCL_IS_TCP(connp)) { 3733 (*ip_input_proc)(connp->conn_sqp, first_mp, 3734 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3735 } else { 3736 putnext(connp->conn_rq, first_mp); 3737 CONN_DEC_REF(connp); 3738 } 3739 } 3740 3741 /* 3742 * Fanout for UDP packets. 3743 * The caller puts <fport, lport> in the ports parameter. 3744 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3745 * 3746 * If SO_REUSEADDR is set all multicast and broadcast packets 3747 * will be delivered to all streams bound to the same port. 3748 * 3749 * Zones notes: 3750 * Multicast packets will be distributed to streams in all zones. 3751 */ 3752 static void 3753 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3754 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3755 zoneid_t zoneid) 3756 { 3757 uint32_t dstport, srcport; 3758 in6_addr_t dst; 3759 mblk_t *first_mp; 3760 boolean_t secure; 3761 conn_t *connp; 3762 connf_t *connfp; 3763 conn_t *first_conn; 3764 conn_t *next_conn; 3765 mblk_t *mp1, *first_mp1; 3766 in6_addr_t src; 3767 boolean_t shared_addr; 3768 3769 first_mp = mp; 3770 if (mctl_present) { 3771 mp = first_mp->b_cont; 3772 secure = ipsec_in_is_secure(first_mp); 3773 ASSERT(mp != NULL); 3774 } else { 3775 secure = B_FALSE; 3776 } 3777 3778 /* Extract ports in net byte order */ 3779 dstport = htons(ntohl(ports) & 0xFFFF); 3780 srcport = htons(ntohl(ports) >> 16); 3781 dst = ip6h->ip6_dst; 3782 src = ip6h->ip6_src; 3783 3784 shared_addr = (zoneid == ALL_ZONES); 3785 if (shared_addr) { 3786 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3787 /* 3788 * If no shared MLP is found, tsol_mlp_findzone returns 3789 * ALL_ZONES. In that case, we assume it's SLP, and 3790 * search for the zone based on the packet label. 3791 * That will also return ALL_ZONES on failure, but 3792 * we never allow conn_zoneid to be set to ALL_ZONES. 3793 */ 3794 if (zoneid == ALL_ZONES) 3795 zoneid = tsol_packet_to_zoneid(mp); 3796 } 3797 3798 /* Attempt to find a client stream based on destination port. */ 3799 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3800 mutex_enter(&connfp->connf_lock); 3801 connp = connfp->connf_head; 3802 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3803 /* 3804 * Not multicast. Send to the one (first) client we find. 3805 */ 3806 while (connp != NULL) { 3807 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3808 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3809 conn_wantpacket_v6(connp, ill, ip6h, 3810 flags, zoneid)) { 3811 break; 3812 } 3813 connp = connp->conn_next; 3814 } 3815 if (connp == NULL || connp->conn_upq == NULL) 3816 goto notfound; 3817 3818 if (is_system_labeled() && 3819 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3820 connp)) 3821 goto notfound; 3822 3823 /* Found a client */ 3824 CONN_INC_REF(connp); 3825 mutex_exit(&connfp->connf_lock); 3826 3827 if (CONN_UDP_FLOWCTLD(connp)) { 3828 freemsg(first_mp); 3829 CONN_DEC_REF(connp); 3830 return; 3831 } 3832 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3833 first_mp = ipsec_check_inbound_policy(first_mp, 3834 connp, NULL, ip6h, mctl_present); 3835 if (first_mp == NULL) { 3836 CONN_DEC_REF(connp); 3837 return; 3838 } 3839 } 3840 /* Initiate IPPF processing */ 3841 if (IP6_IN_IPP(flags)) { 3842 uint_t ifindex; 3843 3844 mutex_enter(&ill->ill_lock); 3845 ifindex = ill->ill_phyint->phyint_ifindex; 3846 mutex_exit(&ill->ill_lock); 3847 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3848 if (mp == NULL) { 3849 CONN_DEC_REF(connp); 3850 if (mctl_present) 3851 freeb(first_mp); 3852 return; 3853 } 3854 } 3855 /* 3856 * For link-local always add ifindex so that 3857 * transport can set sin6_scope_id. Avoid it for 3858 * ICMP error fanout. 3859 */ 3860 if ((connp->conn_ipv6_recvpktinfo || 3861 IN6_IS_ADDR_LINKLOCAL(&src)) && 3862 (flags & IP_FF_IP6INFO)) { 3863 /* Add header */ 3864 mp = ip_add_info_v6(mp, inill, &dst); 3865 if (mp == NULL) { 3866 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3867 CONN_DEC_REF(connp); 3868 if (mctl_present) 3869 freeb(first_mp); 3870 return; 3871 } else if (mctl_present) { 3872 first_mp->b_cont = mp; 3873 } else { 3874 first_mp = mp; 3875 } 3876 } 3877 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3878 3879 /* Send it upstream */ 3880 CONN_UDP_RECV(connp, mp); 3881 3882 IP6_STAT(ip6_udp_fannorm); 3883 CONN_DEC_REF(connp); 3884 if (mctl_present) 3885 freeb(first_mp); 3886 return; 3887 } 3888 3889 while (connp != NULL) { 3890 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3891 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3892 (!is_system_labeled() || 3893 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3894 connp))) 3895 break; 3896 connp = connp->conn_next; 3897 } 3898 3899 if (connp == NULL || connp->conn_upq == NULL) 3900 goto notfound; 3901 3902 first_conn = connp; 3903 3904 CONN_INC_REF(connp); 3905 connp = connp->conn_next; 3906 for (;;) { 3907 while (connp != NULL) { 3908 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3909 src) && conn_wantpacket_v6(connp, ill, ip6h, 3910 flags, zoneid) && 3911 (!is_system_labeled() || 3912 tsol_receive_local(mp, &dst, IPV6_VERSION, 3913 shared_addr, connp))) 3914 break; 3915 connp = connp->conn_next; 3916 } 3917 /* 3918 * Just copy the data part alone. The mctl part is 3919 * needed just for verifying policy and it is never 3920 * sent up. 3921 */ 3922 if (connp == NULL || 3923 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3924 ((first_mp1 = ip_copymsg(first_mp)) 3925 == NULL))) { 3926 /* 3927 * No more interested clients or memory 3928 * allocation failed 3929 */ 3930 connp = first_conn; 3931 break; 3932 } 3933 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3934 CONN_INC_REF(connp); 3935 mutex_exit(&connfp->connf_lock); 3936 /* 3937 * For link-local always add ifindex so that transport 3938 * can set sin6_scope_id. Avoid it for ICMP error 3939 * fanout. 3940 */ 3941 if ((connp->conn_ipv6_recvpktinfo || 3942 IN6_IS_ADDR_LINKLOCAL(&src)) && 3943 (flags & IP_FF_IP6INFO)) { 3944 /* Add header */ 3945 mp1 = ip_add_info_v6(mp1, inill, &dst); 3946 } 3947 /* mp1 could have changed */ 3948 if (mctl_present) 3949 first_mp1->b_cont = mp1; 3950 else 3951 first_mp1 = mp1; 3952 if (mp1 == NULL) { 3953 if (mctl_present) 3954 freeb(first_mp1); 3955 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3956 goto next_one; 3957 } 3958 if (CONN_UDP_FLOWCTLD(connp)) { 3959 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3960 freemsg(first_mp1); 3961 goto next_one; 3962 } 3963 3964 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3965 secure) { 3966 first_mp1 = ipsec_check_inbound_policy 3967 (first_mp1, connp, NULL, ip6h, 3968 mctl_present); 3969 } 3970 if (first_mp1 != NULL) { 3971 if (mctl_present) 3972 freeb(first_mp1); 3973 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3974 3975 /* Send it upstream */ 3976 CONN_UDP_RECV(connp, mp1); 3977 } 3978 next_one: 3979 mutex_enter(&connfp->connf_lock); 3980 /* Follow the next pointer before releasing the conn. */ 3981 next_conn = connp->conn_next; 3982 IP6_STAT(ip6_udp_fanmb); 3983 CONN_DEC_REF(connp); 3984 connp = next_conn; 3985 } 3986 3987 /* Last one. Send it upstream. */ 3988 mutex_exit(&connfp->connf_lock); 3989 3990 /* Initiate IPPF processing */ 3991 if (IP6_IN_IPP(flags)) { 3992 uint_t ifindex; 3993 3994 mutex_enter(&ill->ill_lock); 3995 ifindex = ill->ill_phyint->phyint_ifindex; 3996 mutex_exit(&ill->ill_lock); 3997 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3998 if (mp == NULL) { 3999 CONN_DEC_REF(connp); 4000 if (mctl_present) { 4001 freeb(first_mp); 4002 } 4003 return; 4004 } 4005 } 4006 4007 /* 4008 * For link-local always add ifindex so that transport can set 4009 * sin6_scope_id. Avoid it for ICMP error fanout. 4010 */ 4011 if ((connp->conn_ipv6_recvpktinfo || 4012 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IP6INFO)) { 4013 /* Add header */ 4014 mp = ip_add_info_v6(mp, inill, &dst); 4015 if (mp == NULL) { 4016 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 4017 CONN_DEC_REF(connp); 4018 if (mctl_present) 4019 freeb(first_mp); 4020 return; 4021 } else if (mctl_present) { 4022 first_mp->b_cont = mp; 4023 } else { 4024 first_mp = mp; 4025 } 4026 } 4027 if (CONN_UDP_FLOWCTLD(connp)) { 4028 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 4029 freemsg(mp); 4030 } else { 4031 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 4032 first_mp = ipsec_check_inbound_policy(first_mp, 4033 connp, NULL, ip6h, mctl_present); 4034 if (first_mp == NULL) { 4035 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 4036 CONN_DEC_REF(connp); 4037 return; 4038 } 4039 } 4040 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 4041 4042 /* Send it upstream */ 4043 CONN_UDP_RECV(connp, mp); 4044 } 4045 IP6_STAT(ip6_udp_fanmb); 4046 CONN_DEC_REF(connp); 4047 if (mctl_present) 4048 freeb(first_mp); 4049 return; 4050 4051 notfound: 4052 mutex_exit(&connfp->connf_lock); 4053 /* 4054 * No one bound to this port. Is 4055 * there a client that wants all 4056 * unclaimed datagrams? 4057 */ 4058 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4059 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4060 0, flags | IP_FF_RAWIP | IP_FF_IP6INFO, mctl_present, 4061 zoneid); 4062 } else { 4063 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4064 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4065 mctl_present, zoneid)) { 4066 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 4067 } 4068 } 4069 } 4070 4071 /* 4072 * int ip_find_hdr_v6() 4073 * 4074 * This routine is used by the upper layer protocols and the IP tunnel 4075 * module to: 4076 * - Set extension header pointers to appropriate locations 4077 * - Determine IPv6 header length and return it 4078 * - Return a pointer to the last nexthdr value 4079 * 4080 * The caller must initialize ipp_fields. 4081 * 4082 * NOTE: If multiple extension headers of the same type are present, 4083 * ip_find_hdr_v6() will set the respective extension header pointers 4084 * to the first one that it encounters in the IPv6 header. It also 4085 * skips fragment headers. This routine deals with malformed packets 4086 * of various sorts in which case the returned length is up to the 4087 * malformed part. 4088 */ 4089 int 4090 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4091 { 4092 uint_t length, ehdrlen; 4093 uint8_t nexthdr; 4094 uint8_t *whereptr, *endptr; 4095 ip6_dest_t *tmpdstopts; 4096 ip6_rthdr_t *tmprthdr; 4097 ip6_hbh_t *tmphopopts; 4098 ip6_frag_t *tmpfraghdr; 4099 4100 length = IPV6_HDR_LEN; 4101 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4102 endptr = mp->b_wptr; 4103 4104 nexthdr = ip6h->ip6_nxt; 4105 while (whereptr < endptr) { 4106 /* Is there enough left for len + nexthdr? */ 4107 if (whereptr + MIN_EHDR_LEN > endptr) 4108 goto done; 4109 4110 switch (nexthdr) { 4111 case IPPROTO_HOPOPTS: 4112 tmphopopts = (ip6_hbh_t *)whereptr; 4113 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4114 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4115 goto done; 4116 nexthdr = tmphopopts->ip6h_nxt; 4117 /* return only 1st hbh */ 4118 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4119 ipp->ipp_fields |= IPPF_HOPOPTS; 4120 ipp->ipp_hopopts = tmphopopts; 4121 ipp->ipp_hopoptslen = ehdrlen; 4122 } 4123 break; 4124 case IPPROTO_DSTOPTS: 4125 tmpdstopts = (ip6_dest_t *)whereptr; 4126 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4127 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4128 goto done; 4129 nexthdr = tmpdstopts->ip6d_nxt; 4130 /* 4131 * ipp_dstopts is set to the destination header after a 4132 * routing header. 4133 * Assume it is a post-rthdr destination header 4134 * and adjust when we find an rthdr. 4135 */ 4136 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4137 ipp->ipp_fields |= IPPF_DSTOPTS; 4138 ipp->ipp_dstopts = tmpdstopts; 4139 ipp->ipp_dstoptslen = ehdrlen; 4140 } 4141 break; 4142 case IPPROTO_ROUTING: 4143 tmprthdr = (ip6_rthdr_t *)whereptr; 4144 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4145 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4146 goto done; 4147 nexthdr = tmprthdr->ip6r_nxt; 4148 /* return only 1st rthdr */ 4149 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4150 ipp->ipp_fields |= IPPF_RTHDR; 4151 ipp->ipp_rthdr = tmprthdr; 4152 ipp->ipp_rthdrlen = ehdrlen; 4153 } 4154 /* 4155 * Make any destination header we've seen be a 4156 * pre-rthdr destination header. 4157 */ 4158 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4159 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4160 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4161 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4162 ipp->ipp_dstopts = NULL; 4163 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4164 ipp->ipp_dstoptslen = 0; 4165 } 4166 break; 4167 case IPPROTO_FRAGMENT: 4168 tmpfraghdr = (ip6_frag_t *)whereptr; 4169 ehdrlen = sizeof (ip6_frag_t); 4170 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4171 goto done; 4172 nexthdr = tmpfraghdr->ip6f_nxt; 4173 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4174 ipp->ipp_fields |= IPPF_FRAGHDR; 4175 ipp->ipp_fraghdr = tmpfraghdr; 4176 ipp->ipp_fraghdrlen = ehdrlen; 4177 } 4178 break; 4179 case IPPROTO_NONE: 4180 default: 4181 goto done; 4182 } 4183 length += ehdrlen; 4184 whereptr += ehdrlen; 4185 } 4186 done: 4187 if (nexthdrp != NULL) 4188 *nexthdrp = nexthdr; 4189 return (length); 4190 } 4191 4192 int 4193 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4194 { 4195 ire_t *ire; 4196 4197 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4198 ire = ire_lookup_local_v6(zoneid); 4199 if (ire == NULL) { 4200 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4201 return (1); 4202 } 4203 ip6h->ip6_src = ire->ire_addr_v6; 4204 ire_refrele(ire); 4205 } 4206 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4207 ip6h->ip6_hops = ipv6_def_hops; 4208 return (0); 4209 } 4210 4211 /* 4212 * Try to determine where and what are the IPv6 header length and 4213 * pointer to nexthdr value for the upper layer protocol (or an 4214 * unknown next hdr). 4215 * 4216 * Parameters returns a pointer to the nexthdr value; 4217 * Must handle malformed packets of various sorts. 4218 * Function returns failure for malformed cases. 4219 */ 4220 boolean_t 4221 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4222 uint8_t **nexthdrpp) 4223 { 4224 uint16_t length; 4225 uint_t ehdrlen; 4226 uint8_t *nexthdrp; 4227 uint8_t *whereptr; 4228 uint8_t *endptr; 4229 ip6_dest_t *desthdr; 4230 ip6_rthdr_t *rthdr; 4231 ip6_frag_t *fraghdr; 4232 4233 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 4234 length = IPV6_HDR_LEN; 4235 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4236 endptr = mp->b_wptr; 4237 4238 nexthdrp = &ip6h->ip6_nxt; 4239 while (whereptr < endptr) { 4240 /* Is there enough left for len + nexthdr? */ 4241 if (whereptr + MIN_EHDR_LEN > endptr) 4242 break; 4243 4244 switch (*nexthdrp) { 4245 case IPPROTO_HOPOPTS: 4246 case IPPROTO_DSTOPTS: 4247 /* Assumes the headers are identical for hbh and dst */ 4248 desthdr = (ip6_dest_t *)whereptr; 4249 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4250 if ((uchar_t *)desthdr + ehdrlen > endptr) 4251 return (B_FALSE); 4252 nexthdrp = &desthdr->ip6d_nxt; 4253 break; 4254 case IPPROTO_ROUTING: 4255 rthdr = (ip6_rthdr_t *)whereptr; 4256 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4257 if ((uchar_t *)rthdr + ehdrlen > endptr) 4258 return (B_FALSE); 4259 nexthdrp = &rthdr->ip6r_nxt; 4260 break; 4261 case IPPROTO_FRAGMENT: 4262 fraghdr = (ip6_frag_t *)whereptr; 4263 ehdrlen = sizeof (ip6_frag_t); 4264 if ((uchar_t *)&fraghdr[1] > endptr) 4265 return (B_FALSE); 4266 nexthdrp = &fraghdr->ip6f_nxt; 4267 break; 4268 case IPPROTO_NONE: 4269 /* No next header means we're finished */ 4270 default: 4271 *hdr_length_ptr = length; 4272 *nexthdrpp = nexthdrp; 4273 return (B_TRUE); 4274 } 4275 length += ehdrlen; 4276 whereptr += ehdrlen; 4277 *hdr_length_ptr = length; 4278 *nexthdrpp = nexthdrp; 4279 } 4280 switch (*nexthdrp) { 4281 case IPPROTO_HOPOPTS: 4282 case IPPROTO_DSTOPTS: 4283 case IPPROTO_ROUTING: 4284 case IPPROTO_FRAGMENT: 4285 /* 4286 * If any know extension headers are still to be processed, 4287 * the packet's malformed (or at least all the IP header(s) are 4288 * not in the same mblk - and that should never happen. 4289 */ 4290 return (B_FALSE); 4291 4292 default: 4293 /* 4294 * If we get here, we know that all of the IP headers were in 4295 * the same mblk, even if the ULP header is in the next mblk. 4296 */ 4297 *hdr_length_ptr = length; 4298 *nexthdrpp = nexthdrp; 4299 return (B_TRUE); 4300 } 4301 } 4302 4303 /* 4304 * Return the length of the IPv6 related headers (including extension headers) 4305 * Returns a length even if the packet is malformed. 4306 */ 4307 int 4308 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4309 { 4310 uint16_t hdr_len; 4311 uint8_t *nexthdrp; 4312 4313 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4314 return (hdr_len); 4315 } 4316 4317 /* 4318 * Select an ill for the packet by considering load spreading across 4319 * a different ill in the group if dst_ill is part of some group. 4320 */ 4321 static ill_t * 4322 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4323 { 4324 ill_t *ill; 4325 4326 /* 4327 * We schedule irrespective of whether the source address is 4328 * INADDR_UNSPECIED or not. 4329 */ 4330 ill = illgrp_scheduler(dst_ill); 4331 if (ill == NULL) 4332 return (NULL); 4333 4334 /* 4335 * For groups with names ip_sioctl_groupname ensures that all 4336 * ills are of same type. For groups without names, ifgrp_insert 4337 * ensures this. 4338 */ 4339 ASSERT(dst_ill->ill_type == ill->ill_type); 4340 4341 return (ill); 4342 } 4343 4344 /* 4345 * IPv6 - 4346 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4347 * to send out a packet to a destination address for which we do not have 4348 * specific routing information. 4349 * 4350 * Handle non-multicast packets. If ill is non-NULL the match is done 4351 * for that ill. 4352 * 4353 * When a specific ill is specified (using IPV6_PKTINFO, 4354 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4355 * on routing entries (ftable and ctable) that have a matching 4356 * ire->ire_ipif->ipif_ill. Thus this can only be used 4357 * for destinations that are on-link for the specific ill 4358 * and that can appear on multiple links. Thus it is useful 4359 * for multicast destinations, link-local destinations, and 4360 * at some point perhaps for site-local destinations (if the 4361 * node sits at a site boundary). 4362 * We create the cache entries in the regular ctable since 4363 * it can not "confuse" things for other destinations. 4364 * table. 4365 * 4366 * When ill is part of a ill group, we subject the packets 4367 * to load spreading even if the ill is specified by the 4368 * means described above. We disable only for IPV6_BOUND_PIF 4369 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4370 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4371 * set. 4372 * 4373 * NOTE : These are the scopes of some of the variables that point at IRE, 4374 * which needs to be followed while making any future modifications 4375 * to avoid memory leaks. 4376 * 4377 * - ire and sire are the entries looked up initially by 4378 * ire_ftable_lookup_v6. 4379 * - ipif_ire is used to hold the interface ire associated with 4380 * the new cache ire. But it's scope is limited, so we always REFRELE 4381 * it before branching out to error paths. 4382 * - save_ire is initialized before ire_create, so that ire returned 4383 * by ire_create will not over-write the ire. We REFRELE save_ire 4384 * before breaking out of the switch. 4385 * 4386 * Thus on failures, we have to REFRELE only ire and sire, if they 4387 * are not NULL. 4388 * 4389 * v6srcp may be used in the future. Currently unused. 4390 */ 4391 /* ARGSUSED */ 4392 void 4393 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4394 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4395 { 4396 in6_addr_t v6gw; 4397 in6_addr_t dst; 4398 ire_t *ire = NULL; 4399 ipif_t *src_ipif = NULL; 4400 ill_t *dst_ill = NULL; 4401 ire_t *sire = NULL; 4402 ire_t *save_ire; 4403 mblk_t *dlureq_mp; 4404 ip6_t *ip6h; 4405 int err = 0; 4406 mblk_t *first_mp; 4407 ipsec_out_t *io; 4408 ill_t *attach_ill = NULL; 4409 ushort_t ire_marks = 0; 4410 int match_flags; 4411 boolean_t ip6i_present; 4412 ire_t *first_sire = NULL; 4413 mblk_t *copy_mp = NULL; 4414 mblk_t *xmit_mp = NULL; 4415 in6_addr_t save_dst; 4416 uint32_t multirt_flags = 4417 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4418 boolean_t multirt_is_resolvable; 4419 boolean_t multirt_resolve_next; 4420 boolean_t need_rele = B_FALSE; 4421 boolean_t do_attach_ill = B_FALSE; 4422 boolean_t ip6_asp_table_held = B_FALSE; 4423 tsol_ire_gw_secattr_t *attrp = NULL; 4424 tsol_gcgrp_t *gcgrp = NULL; 4425 tsol_gcgrp_addr_t ga; 4426 4427 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4428 4429 first_mp = mp; 4430 if (mp->b_datap->db_type == M_CTL) { 4431 mp = mp->b_cont; 4432 io = (ipsec_out_t *)first_mp->b_rptr; 4433 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4434 } else { 4435 io = NULL; 4436 } 4437 4438 /* 4439 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4440 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4441 * could be NULL. 4442 * 4443 * This information can appear either in an ip6i_t or an IPSEC_OUT 4444 * message. 4445 */ 4446 ip6h = (ip6_t *)mp->b_rptr; 4447 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4448 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4449 if (!ip6i_present || 4450 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4451 attach_ill = ip_grab_attach_ill(ill, first_mp, 4452 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4453 io->ipsec_out_ill_index), B_TRUE); 4454 /* Failure case frees things for us. */ 4455 if (attach_ill == NULL) 4456 return; 4457 4458 /* 4459 * Check if we need an ire that will not be 4460 * looked up by anybody else i.e. HIDDEN. 4461 */ 4462 if (ill_is_probeonly(attach_ill)) 4463 ire_marks = IRE_MARK_HIDDEN; 4464 } 4465 } 4466 4467 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4468 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4469 goto icmp_err_ret; 4470 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4471 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4472 goto icmp_err_ret; 4473 } 4474 4475 /* 4476 * If this IRE is created for forwarding or it is not for 4477 * TCP traffic, mark it as temporary. 4478 * 4479 * Is it sufficient just to check the next header?? 4480 */ 4481 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4482 ire_marks |= IRE_MARK_TEMPORARY; 4483 4484 /* 4485 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4486 * chain until it gets the most specific information available. 4487 * For example, we know that there is no IRE_CACHE for this dest, 4488 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4489 * ire_ftable_lookup_v6 will look up the gateway, etc. 4490 */ 4491 4492 if (ill == NULL) { 4493 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4494 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4495 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4496 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4497 match_flags); 4498 /* 4499 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4500 * in a NULL ill, but the packet could be a neighbor 4501 * solicitation/advertisment and could have a valid attach_ill. 4502 */ 4503 if (attach_ill != NULL) 4504 ill_refrele(attach_ill); 4505 } else { 4506 if (attach_ill != NULL) { 4507 /* 4508 * attach_ill is set only for communicating with 4509 * on-link hosts. So, don't look for DEFAULT. 4510 * ip_wput_v6 passes the right ill in this case and 4511 * hence we can assert. 4512 */ 4513 ASSERT(ill == attach_ill); 4514 ill_refrele(attach_ill); 4515 do_attach_ill = B_TRUE; 4516 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4517 } else { 4518 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4519 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4520 } 4521 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4522 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4523 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags); 4524 } 4525 4526 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4527 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4528 4529 if (zoneid == ALL_ZONES && ire != NULL) { 4530 /* 4531 * In the forwarding case, we can use a route from any zone 4532 * since we won't change the source address. We can easily 4533 * assert that the source address is already set when there's no 4534 * ip6_info header - otherwise we'd have to call pullupmsg(). 4535 */ 4536 ASSERT(ip6i_present || 4537 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4538 zoneid = ire->ire_zoneid; 4539 } 4540 4541 /* 4542 * We enter a loop that will be run only once in most cases. 4543 * The loop is re-entered in the case where the destination 4544 * can be reached through multiple RTF_MULTIRT-flagged routes. 4545 * The intention is to compute multiple routes to a single 4546 * destination in a single ip_newroute_v6 call. 4547 * The information is contained in sire->ire_flags. 4548 */ 4549 do { 4550 multirt_resolve_next = B_FALSE; 4551 4552 if (dst_ill != NULL) { 4553 ill_refrele(dst_ill); 4554 dst_ill = NULL; 4555 } 4556 if (src_ipif != NULL) { 4557 ipif_refrele(src_ipif); 4558 src_ipif = NULL; 4559 } 4560 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4561 ip3dbg(("ip_newroute_v6: starting new resolution " 4562 "with first_mp %p, tag %d\n", 4563 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4564 4565 /* 4566 * We check if there are trailing unresolved routes for 4567 * the destination contained in sire. 4568 */ 4569 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4570 &sire, multirt_flags, MBLK_GETLABEL(mp)); 4571 4572 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4573 "ire %p, sire %p\n", 4574 multirt_is_resolvable, (void *)ire, (void *)sire)); 4575 4576 if (!multirt_is_resolvable) { 4577 /* 4578 * No more multirt routes to resolve; give up 4579 * (all routes resolved or no more resolvable 4580 * routes). 4581 */ 4582 if (ire != NULL) { 4583 ire_refrele(ire); 4584 ire = NULL; 4585 } 4586 } else { 4587 ASSERT(sire != NULL); 4588 ASSERT(ire != NULL); 4589 /* 4590 * We simply use first_sire as a flag that 4591 * indicates if a resolvable multirt route has 4592 * already been found during the preceding 4593 * loops. If it is not the case, we may have 4594 * to send an ICMP error to report that the 4595 * destination is unreachable. We do not 4596 * IRE_REFHOLD first_sire. 4597 */ 4598 if (first_sire == NULL) { 4599 first_sire = sire; 4600 } 4601 } 4602 } 4603 if ((ire == NULL) || (ire == sire)) { 4604 /* 4605 * either ire == NULL (the destination cannot be 4606 * resolved) or ire == sire (the gateway cannot be 4607 * resolved). At this point, there are no more routes 4608 * to resolve for the destination, thus we exit. 4609 */ 4610 if (ip_debug > 3) { 4611 /* ip2dbg */ 4612 pr_addr_dbg("ip_newroute_v6: " 4613 "can't resolve %s\n", AF_INET6, v6dstp); 4614 } 4615 ip3dbg(("ip_newroute_v6: " 4616 "ire %p, sire %p, first_sire %p\n", 4617 (void *)ire, (void *)sire, (void *)first_sire)); 4618 4619 if (sire != NULL) { 4620 ire_refrele(sire); 4621 sire = NULL; 4622 } 4623 4624 if (first_sire != NULL) { 4625 /* 4626 * At least one multirt route has been found 4627 * in the same ip_newroute() call; there is no 4628 * need to report an ICMP error. 4629 * first_sire was not IRE_REFHOLDed. 4630 */ 4631 MULTIRT_DEBUG_UNTAG(first_mp); 4632 freemsg(first_mp); 4633 return; 4634 } 4635 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4636 RTA_DST); 4637 goto icmp_err_ret; 4638 } 4639 4640 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4641 4642 /* 4643 * Verify that the returned IRE does not have either the 4644 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4645 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4646 */ 4647 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4648 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4649 goto icmp_err_ret; 4650 4651 /* 4652 * Increment the ire_ob_pkt_count field for ire if it is an 4653 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4654 * increment the same for the parent IRE, sire, if it is some 4655 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4656 * and HOST_REDIRECT). 4657 */ 4658 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4659 UPDATE_OB_PKT_COUNT(ire); 4660 ire->ire_last_used_time = lbolt; 4661 } 4662 4663 if (sire != NULL) { 4664 mutex_enter(&sire->ire_lock); 4665 v6gw = sire->ire_gateway_addr_v6; 4666 mutex_exit(&sire->ire_lock); 4667 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4668 IRE_INTERFACE)) == 0); 4669 UPDATE_OB_PKT_COUNT(sire); 4670 sire->ire_last_used_time = lbolt; 4671 } else { 4672 v6gw = ipv6_all_zeros; 4673 } 4674 4675 /* 4676 * We have a route to reach the destination. 4677 * 4678 * 1) If the interface is part of ill group, try to get a new 4679 * ill taking load spreading into account. 4680 * 4681 * 2) After selecting the ill, get a source address that might 4682 * create good inbound load spreading and that matches the 4683 * right scope. ipif_select_source_v6 does this for us. 4684 * 4685 * If the application specified the ill (ifindex), we still 4686 * load spread. Only if the packets needs to go out specifically 4687 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4688 * IPV6_BOUND_PIF we don't try to use a different ill for load 4689 * spreading. 4690 */ 4691 if (!do_attach_ill) { 4692 /* 4693 * If the interface belongs to an interface group, 4694 * make sure the next possible interface in the group 4695 * is used. This encourages load spreading among 4696 * peers in an interface group. However, in the case 4697 * of multirouting, load spreading is not used, as we 4698 * actually want to replicate outgoing packets through 4699 * particular interfaces. 4700 * 4701 * Note: While we pick a dst_ill we are really only 4702 * interested in the ill for load spreading. 4703 * The source ipif is determined by source address 4704 * selection below. 4705 */ 4706 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4707 dst_ill = ire->ire_ipif->ipif_ill; 4708 /* For uniformity do a refhold */ 4709 ill_refhold(dst_ill); 4710 } else { 4711 /* 4712 * If we are here trying to create an IRE_CACHE 4713 * for an offlink destination and have the 4714 * IRE_CACHE for the next hop and the latter is 4715 * using virtual IP source address selection i.e 4716 * it's ire->ire_ipif is pointing to a virtual 4717 * network interface (vni) then 4718 * ip_newroute_get_dst_ll() will return the vni 4719 * interface as the dst_ill. Since the vni is 4720 * virtual i.e not associated with any physical 4721 * interface, it cannot be the dst_ill, hence 4722 * in such a case call ip_newroute_get_dst_ll() 4723 * with the stq_ill instead of the ire_ipif ILL. 4724 * The function returns a refheld ill. 4725 */ 4726 if ((ire->ire_type == IRE_CACHE) && 4727 IS_VNI(ire->ire_ipif->ipif_ill)) 4728 dst_ill = ip_newroute_get_dst_ill_v6( 4729 ire->ire_stq->q_ptr); 4730 else 4731 dst_ill = ip_newroute_get_dst_ill_v6( 4732 ire->ire_ipif->ipif_ill); 4733 } 4734 if (dst_ill == NULL) { 4735 if (ip_debug > 2) { 4736 pr_addr_dbg("ip_newroute_v6 : no dst " 4737 "ill for dst %s\n", 4738 AF_INET6, v6dstp); 4739 } 4740 goto icmp_err_ret; 4741 } else if (dst_ill->ill_group == NULL && ill != NULL && 4742 dst_ill != ill) { 4743 /* 4744 * If "ill" is not part of any group, we should 4745 * have found a route matching "ill" as we 4746 * called ire_ftable_lookup_v6 with 4747 * MATCH_IRE_ILL_GROUP. 4748 * Rather than asserting when there is a 4749 * mismatch, we just drop the packet. 4750 */ 4751 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4752 "dst_ill %s ill %s\n", 4753 dst_ill->ill_name, 4754 ill->ill_name)); 4755 goto icmp_err_ret; 4756 } 4757 } else { 4758 dst_ill = ire->ire_ipif->ipif_ill; 4759 /* For uniformity do refhold */ 4760 ill_refhold(dst_ill); 4761 /* 4762 * We should have found a route matching ill as we 4763 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4764 * Rather than asserting, while there is a mismatch, 4765 * we just drop the packet. 4766 */ 4767 if (dst_ill != ill) { 4768 ip0dbg(("ip_newroute_v6: Packet dropped as " 4769 "IP6I_ATTACH_IF ill is %s, " 4770 "ire->ire_ipif->ipif_ill is %s\n", 4771 ill->ill_name, 4772 dst_ill->ill_name)); 4773 goto icmp_err_ret; 4774 } 4775 } 4776 /* 4777 * Pick a source address which matches the scope of the 4778 * destination address. 4779 * For RTF_SETSRC routes, the source address is imposed by the 4780 * parent ire (sire). 4781 */ 4782 ASSERT(src_ipif == NULL); 4783 if (ire->ire_type == IRE_IF_RESOLVER && 4784 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4785 ip6_asp_can_lookup()) { 4786 /* 4787 * The ire cache entry we're adding is for the 4788 * gateway itself. The source address in this case 4789 * is relative to the gateway's address. 4790 */ 4791 ip6_asp_table_held = B_TRUE; 4792 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4793 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4794 if (src_ipif != NULL) 4795 ire_marks |= IRE_MARK_USESRC_CHECK; 4796 } else { 4797 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4798 /* 4799 * Check that the ipif matching the requested 4800 * source address still exists. 4801 */ 4802 src_ipif = ipif_lookup_addr_v6( 4803 &sire->ire_src_addr_v6, NULL, zoneid, 4804 NULL, NULL, NULL, NULL); 4805 } 4806 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4807 uint_t restrict_ill = RESTRICT_TO_NONE; 4808 4809 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4810 & IP6I_ATTACH_IF) 4811 restrict_ill = RESTRICT_TO_ILL; 4812 ip6_asp_table_held = B_TRUE; 4813 src_ipif = ipif_select_source_v6(dst_ill, 4814 v6dstp, restrict_ill, 4815 IPV6_PREFER_SRC_DEFAULT, zoneid); 4816 if (src_ipif != NULL) 4817 ire_marks |= IRE_MARK_USESRC_CHECK; 4818 } 4819 } 4820 4821 if (src_ipif == NULL) { 4822 if (ip_debug > 2) { 4823 /* ip1dbg */ 4824 pr_addr_dbg("ip_newroute_v6: no src for " 4825 "dst %s\n, ", AF_INET6, v6dstp); 4826 printf("ip_newroute_v6: interface name %s\n", 4827 dst_ill->ill_name); 4828 } 4829 goto icmp_err_ret; 4830 } 4831 4832 if (ip_debug > 3) { 4833 /* ip2dbg */ 4834 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4835 AF_INET6, &v6gw); 4836 } 4837 ip2dbg(("\tire type %s (%d)\n", 4838 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4839 4840 /* 4841 * At this point in ip_newroute_v6(), ire is either the 4842 * IRE_CACHE of the next-hop gateway for an off-subnet 4843 * destination or an IRE_INTERFACE type that should be used 4844 * to resolve an on-subnet destination or an on-subnet 4845 * next-hop gateway. 4846 * 4847 * In the IRE_CACHE case, we have the following : 4848 * 4849 * 1) src_ipif - used for getting a source address. 4850 * 4851 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4852 * means packets using this IRE_CACHE will go out on dst_ill. 4853 * 4854 * 3) The IRE sire will point to the prefix that is the longest 4855 * matching route for the destination. These prefix types 4856 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4857 * 4858 * The newly created IRE_CACHE entry for the off-subnet 4859 * destination is tied to both the prefix route and the 4860 * interface route used to resolve the next-hop gateway 4861 * via the ire_phandle and ire_ihandle fields, respectively. 4862 * 4863 * In the IRE_INTERFACE case, we have the following : 4864 * 4865 * 1) src_ipif - used for getting a source address. 4866 * 4867 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4868 * means packets using the IRE_CACHE that we will build 4869 * here will go out on dst_ill. 4870 * 4871 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4872 * to be created will only be tied to the IRE_INTERFACE that 4873 * was derived from the ire_ihandle field. 4874 * 4875 * If sire is non-NULL, it means the destination is off-link 4876 * and we will first create the IRE_CACHE for the gateway. 4877 * Next time through ip_newroute_v6, we will create the 4878 * IRE_CACHE for the final destination as described above. 4879 */ 4880 save_ire = ire; 4881 switch (ire->ire_type) { 4882 case IRE_CACHE: { 4883 ire_t *ipif_ire; 4884 4885 ASSERT(sire != NULL); 4886 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4887 mutex_enter(&ire->ire_lock); 4888 v6gw = ire->ire_gateway_addr_v6; 4889 mutex_exit(&ire->ire_lock); 4890 } 4891 /* 4892 * We need 3 ire's to create a new cache ire for an 4893 * off-link destination from the cache ire of the 4894 * gateway. 4895 * 4896 * 1. The prefix ire 'sire' 4897 * 2. The cache ire of the gateway 'ire' 4898 * 3. The interface ire 'ipif_ire' 4899 * 4900 * We have (1) and (2). We lookup (3) below. 4901 * 4902 * If there is no interface route to the gateway, 4903 * it is a race condition, where we found the cache 4904 * but the inteface route has been deleted. 4905 */ 4906 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4907 if (ipif_ire == NULL) { 4908 ip1dbg(("ip_newroute_v6:" 4909 "ire_ihandle_lookup_offlink_v6 failed\n")); 4910 goto icmp_err_ret; 4911 } 4912 /* 4913 * Assume DL_UNITDATA_REQ is same for all physical 4914 * interfaces in the ifgrp. If it isn't, this code will 4915 * have to be seriously rewhacked to allow the 4916 * fastpath probing (such that I cache the link 4917 * header in the IRE_CACHE) to work over ifgrps. 4918 * We have what we need to build an IRE_CACHE. 4919 */ 4920 /* 4921 * Note: the new ire inherits RTF_SETSRC 4922 * and RTF_MULTIRT to propagate these flags from prefix 4923 * to cache. 4924 */ 4925 4926 /* 4927 * Check cached gateway IRE for any security 4928 * attributes; if found, associate the gateway 4929 * credentials group to the destination IRE. 4930 */ 4931 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4932 mutex_enter(&attrp->igsa_lock); 4933 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4934 GCGRP_REFHOLD(gcgrp); 4935 mutex_exit(&attrp->igsa_lock); 4936 } 4937 4938 ire = ire_create_v6( 4939 v6dstp, /* dest address */ 4940 &ipv6_all_ones, /* mask */ 4941 &src_ipif->ipif_v6src_addr, /* source address */ 4942 &v6gw, /* gateway address */ 4943 &save_ire->ire_max_frag, 4944 NULL, /* Fast Path header */ 4945 dst_ill->ill_rq, /* recv-from queue */ 4946 dst_ill->ill_wq, /* send-to queue */ 4947 IRE_CACHE, 4948 NULL, 4949 src_ipif, 4950 &sire->ire_mask_v6, /* Parent mask */ 4951 sire->ire_phandle, /* Parent handle */ 4952 ipif_ire->ire_ihandle, /* Interface handle */ 4953 sire->ire_flags & /* flags if any */ 4954 (RTF_SETSRC | RTF_MULTIRT), 4955 &(sire->ire_uinfo), 4956 NULL, 4957 gcgrp); 4958 4959 if (ire == NULL) { 4960 if (gcgrp != NULL) { 4961 GCGRP_REFRELE(gcgrp); 4962 gcgrp = NULL; 4963 } 4964 ire_refrele(save_ire); 4965 ire_refrele(ipif_ire); 4966 break; 4967 } 4968 4969 /* reference now held by IRE */ 4970 gcgrp = NULL; 4971 4972 ire->ire_marks |= ire_marks; 4973 4974 /* 4975 * Prevent sire and ipif_ire from getting deleted. The 4976 * newly created ire is tied to both of them via the 4977 * phandle and ihandle respectively. 4978 */ 4979 IRB_REFHOLD(sire->ire_bucket); 4980 /* Has it been removed already ? */ 4981 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4982 IRB_REFRELE(sire->ire_bucket); 4983 ire_refrele(ipif_ire); 4984 ire_refrele(save_ire); 4985 break; 4986 } 4987 4988 IRB_REFHOLD(ipif_ire->ire_bucket); 4989 /* Has it been removed already ? */ 4990 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4991 IRB_REFRELE(ipif_ire->ire_bucket); 4992 IRB_REFRELE(sire->ire_bucket); 4993 ire_refrele(ipif_ire); 4994 ire_refrele(save_ire); 4995 break; 4996 } 4997 4998 xmit_mp = first_mp; 4999 if (ire->ire_flags & RTF_MULTIRT) { 5000 copy_mp = copymsg(first_mp); 5001 if (copy_mp != NULL) { 5002 xmit_mp = copy_mp; 5003 MULTIRT_DEBUG_TAG(first_mp); 5004 } 5005 } 5006 ire_add_then_send(q, ire, xmit_mp); 5007 if (ip6_asp_table_held) { 5008 ip6_asp_table_refrele(); 5009 ip6_asp_table_held = B_FALSE; 5010 } 5011 ire_refrele(save_ire); 5012 5013 /* Assert that sire is not deleted yet. */ 5014 ASSERT(sire->ire_ptpn != NULL); 5015 IRB_REFRELE(sire->ire_bucket); 5016 5017 /* Assert that ipif_ire is not deleted yet. */ 5018 ASSERT(ipif_ire->ire_ptpn != NULL); 5019 IRB_REFRELE(ipif_ire->ire_bucket); 5020 ire_refrele(ipif_ire); 5021 5022 if (copy_mp != NULL) { 5023 /* 5024 * Search for the next unresolved 5025 * multirt route. 5026 */ 5027 copy_mp = NULL; 5028 ipif_ire = NULL; 5029 ire = NULL; 5030 /* re-enter the loop */ 5031 multirt_resolve_next = B_TRUE; 5032 continue; 5033 } 5034 ire_refrele(sire); 5035 ill_refrele(dst_ill); 5036 ipif_refrele(src_ipif); 5037 return; 5038 } 5039 case IRE_IF_NORESOLVER: 5040 /* 5041 * We have what we need to build an IRE_CACHE. 5042 * 5043 * Create a new dlureq_mp with the IPv6 gateway 5044 * address in destination address in the DLPI hdr 5045 * if the physical length is exactly 16 bytes. 5046 */ 5047 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5048 const in6_addr_t *addr; 5049 5050 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5051 addr = &v6gw; 5052 else 5053 addr = v6dstp; 5054 5055 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 5056 dst_ill->ill_phys_addr_length, 5057 dst_ill->ill_sap, 5058 dst_ill->ill_sap_length); 5059 } else { 5060 /* 5061 * handle the Gated case, where we create 5062 * a NORESOLVER route for loopback. 5063 */ 5064 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5065 break; 5066 dlureq_mp = ill_dlur_gen(NULL, 5067 dst_ill->ill_phys_addr_length, 5068 dst_ill->ill_sap, 5069 dst_ill->ill_sap_length); 5070 } 5071 if (dlureq_mp == NULL) 5072 break; 5073 /* 5074 * TSol note: We are creating the ire cache for the 5075 * destination 'dst'. If 'dst' is offlink, going 5076 * through the first hop 'gw', the security attributes 5077 * of 'dst' must be set to point to the gateway 5078 * credentials of gateway 'gw'. If 'dst' is onlink, it 5079 * is possible that 'dst' is a potential gateway that is 5080 * referenced by some route that has some security 5081 * attributes. Thus in the former case, we need to do a 5082 * gcgrp_lookup of 'gw' while in the latter case we 5083 * need to do gcgrp_lookup of 'dst' itself. 5084 */ 5085 ga.ga_af = AF_INET6; 5086 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5087 ga.ga_addr = v6gw; 5088 else 5089 ga.ga_addr = *v6dstp; 5090 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5091 5092 /* 5093 * Note: the new ire inherits sire flags RTF_SETSRC 5094 * and RTF_MULTIRT to propagate those rules from prefix 5095 * to cache. 5096 */ 5097 ire = ire_create_v6( 5098 v6dstp, /* dest address */ 5099 &ipv6_all_ones, /* mask */ 5100 &src_ipif->ipif_v6src_addr, /* source address */ 5101 &v6gw, /* gateway address */ 5102 &save_ire->ire_max_frag, 5103 NULL, /* Fast Path header */ 5104 dst_ill->ill_rq, /* recv-from queue */ 5105 dst_ill->ill_wq, /* send-to queue */ 5106 IRE_CACHE, 5107 dlureq_mp, 5108 src_ipif, 5109 &save_ire->ire_mask_v6, /* Parent mask */ 5110 (sire != NULL) ? /* Parent handle */ 5111 sire->ire_phandle : 0, 5112 save_ire->ire_ihandle, /* Interface handle */ 5113 (sire != NULL) ? /* flags if any */ 5114 sire->ire_flags & 5115 (RTF_SETSRC | RTF_MULTIRT) : 0, 5116 &(save_ire->ire_uinfo), 5117 NULL, 5118 gcgrp); 5119 5120 freeb(dlureq_mp); 5121 5122 if (ire == NULL) { 5123 if (gcgrp != NULL) { 5124 GCGRP_REFRELE(gcgrp); 5125 gcgrp = NULL; 5126 } 5127 ire_refrele(save_ire); 5128 break; 5129 } 5130 5131 /* reference now held by IRE */ 5132 gcgrp = NULL; 5133 5134 ire->ire_marks |= ire_marks; 5135 5136 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5137 dst = v6gw; 5138 else 5139 dst = *v6dstp; 5140 err = ndp_noresolver(dst_ill, &dst); 5141 if (err != 0) { 5142 ire_refrele(save_ire); 5143 break; 5144 } 5145 5146 /* Prevent save_ire from getting deleted */ 5147 IRB_REFHOLD(save_ire->ire_bucket); 5148 /* Has it been removed already ? */ 5149 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5150 IRB_REFRELE(save_ire->ire_bucket); 5151 ire_refrele(save_ire); 5152 break; 5153 } 5154 5155 xmit_mp = first_mp; 5156 /* 5157 * In case of MULTIRT, a copy of the current packet 5158 * to send is made to further re-enter the 5159 * loop and attempt another route resolution 5160 */ 5161 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5162 copy_mp = copymsg(first_mp); 5163 if (copy_mp != NULL) { 5164 xmit_mp = copy_mp; 5165 MULTIRT_DEBUG_TAG(first_mp); 5166 } 5167 } 5168 ire_add_then_send(q, ire, xmit_mp); 5169 if (ip6_asp_table_held) { 5170 ip6_asp_table_refrele(); 5171 ip6_asp_table_held = B_FALSE; 5172 } 5173 5174 /* Assert that it is not deleted yet. */ 5175 ASSERT(save_ire->ire_ptpn != NULL); 5176 IRB_REFRELE(save_ire->ire_bucket); 5177 ire_refrele(save_ire); 5178 5179 if (copy_mp != NULL) { 5180 /* 5181 * If we found a (no)resolver, we ignore any 5182 * trailing top priority IRE_CACHE in 5183 * further loops. This ensures that we do not 5184 * omit any (no)resolver despite the priority 5185 * in this call. 5186 * IRE_CACHE, if any, will be processed 5187 * by another thread entering ip_newroute(), 5188 * (on resolver response, for example). 5189 * We use this to force multiple parallel 5190 * resolution as soon as a packet needs to be 5191 * sent. The result is, after one packet 5192 * emission all reachable routes are generally 5193 * resolved. 5194 * Otherwise, complete resolution of MULTIRT 5195 * routes would require several emissions as 5196 * side effect. 5197 */ 5198 multirt_flags &= ~MULTIRT_CACHEGW; 5199 5200 /* 5201 * Search for the next unresolved multirt 5202 * route. 5203 */ 5204 copy_mp = NULL; 5205 save_ire = NULL; 5206 ire = NULL; 5207 /* re-enter the loop */ 5208 multirt_resolve_next = B_TRUE; 5209 continue; 5210 } 5211 5212 /* Don't need sire anymore */ 5213 if (sire != NULL) 5214 ire_refrele(sire); 5215 ill_refrele(dst_ill); 5216 ipif_refrele(src_ipif); 5217 return; 5218 5219 case IRE_IF_RESOLVER: 5220 /* 5221 * We can't build an IRE_CACHE yet, but at least we 5222 * found a resolver that can help. 5223 */ 5224 dst = *v6dstp; 5225 5226 /* 5227 * To be at this point in the code with a non-zero gw 5228 * means that dst is reachable through a gateway that 5229 * we have never resolved. By changing dst to the gw 5230 * addr we resolve the gateway first. When 5231 * ire_add_then_send() tries to put the IP dg to dst, 5232 * it will reenter ip_newroute() at which time we will 5233 * find the IRE_CACHE for the gw and create another 5234 * IRE_CACHE above (for dst itself). 5235 */ 5236 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5237 save_dst = dst; 5238 dst = v6gw; 5239 v6gw = ipv6_all_zeros; 5240 } 5241 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5242 /* 5243 * Ask the external resolver to do its thing. 5244 * Make an mblk chain in the following form: 5245 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5246 */ 5247 mblk_t *ire_mp; 5248 mblk_t *areq_mp; 5249 areq_t *areq; 5250 in6_addr_t *addrp; 5251 5252 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5253 if (ip6_asp_table_held) { 5254 ip6_asp_table_refrele(); 5255 ip6_asp_table_held = B_FALSE; 5256 } 5257 ire = ire_create_mp_v6( 5258 &dst, /* dest address */ 5259 &ipv6_all_ones, /* mask */ 5260 &src_ipif->ipif_v6src_addr, 5261 /* source address */ 5262 &v6gw, /* gateway address */ 5263 NULL, /* Fast Path header */ 5264 dst_ill->ill_rq, /* recv-from queue */ 5265 dst_ill->ill_wq, /* send-to queue */ 5266 IRE_CACHE, 5267 NULL, 5268 src_ipif, 5269 &save_ire->ire_mask_v6, 5270 /* Parent mask */ 5271 0, 5272 save_ire->ire_ihandle, 5273 /* Interface handle */ 5274 0, /* flags if any */ 5275 &(save_ire->ire_uinfo), 5276 NULL, 5277 NULL); 5278 5279 ire_refrele(save_ire); 5280 if (ire == NULL) { 5281 ip1dbg(("ip_newroute_v6:" 5282 "ire is NULL\n")); 5283 break; 5284 } 5285 5286 if ((sire != NULL) && 5287 (sire->ire_flags & RTF_MULTIRT)) { 5288 /* 5289 * processing a copy of the packet to 5290 * send for further resolution loops 5291 */ 5292 copy_mp = copymsg(first_mp); 5293 if (copy_mp != NULL) 5294 MULTIRT_DEBUG_TAG(copy_mp); 5295 } 5296 ire->ire_marks |= ire_marks; 5297 ire_mp = ire->ire_mp; 5298 /* 5299 * Now create or find an nce for this interface. 5300 * The hw addr will need to to be set from 5301 * the reply to the AR_ENTRY_QUERY that 5302 * we're about to send. This will be done in 5303 * ire_add_v6(). 5304 */ 5305 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5306 switch (err) { 5307 case 0: 5308 /* 5309 * New cache entry created. 5310 * Break, then ask the external 5311 * resolver. 5312 */ 5313 break; 5314 case EINPROGRESS: 5315 /* 5316 * Resolution in progress; 5317 * packet has been queued by 5318 * ndp_resolver(). 5319 */ 5320 ire_delete(ire); 5321 ire = NULL; 5322 /* 5323 * Check if another multirt 5324 * route must be resolved. 5325 */ 5326 if (copy_mp != NULL) { 5327 /* 5328 * If we found a resolver, we 5329 * ignore any trailing top 5330 * priority IRE_CACHE in 5331 * further loops. The reason is 5332 * the same as for noresolver. 5333 */ 5334 multirt_flags &= 5335 ~MULTIRT_CACHEGW; 5336 /* 5337 * Search for the next 5338 * unresolved multirt route. 5339 */ 5340 first_mp = copy_mp; 5341 copy_mp = NULL; 5342 mp = first_mp; 5343 if (mp->b_datap->db_type == 5344 M_CTL) { 5345 mp = mp->b_cont; 5346 } 5347 ASSERT(sire != NULL); 5348 dst = save_dst; 5349 /* 5350 * re-enter the loop 5351 */ 5352 multirt_resolve_next = 5353 B_TRUE; 5354 continue; 5355 } 5356 5357 if (sire != NULL) 5358 ire_refrele(sire); 5359 ill_refrele(dst_ill); 5360 ipif_refrele(src_ipif); 5361 return; 5362 default: 5363 /* 5364 * Transient error; packet will be 5365 * freed. 5366 */ 5367 ire_delete(ire); 5368 ire = NULL; 5369 break; 5370 } 5371 if (err != 0) 5372 break; 5373 /* 5374 * Now set up the AR_ENTRY_QUERY and send it. 5375 */ 5376 areq_mp = ill_arp_alloc(dst_ill, 5377 (uchar_t *)&ipv6_areq_template, 5378 (caddr_t)&dst); 5379 if (areq_mp == NULL) { 5380 ip1dbg(("ip_newroute_v6:" 5381 "areq_mp is NULL\n")); 5382 freemsg(ire_mp); 5383 break; 5384 } 5385 areq = (areq_t *)areq_mp->b_rptr; 5386 addrp = (in6_addr_t *)((char *)areq + 5387 areq->areq_target_addr_offset); 5388 *addrp = dst; 5389 addrp = (in6_addr_t *)((char *)areq + 5390 areq->areq_sender_addr_offset); 5391 *addrp = src_ipif->ipif_v6src_addr; 5392 /* 5393 * link the chain, then send up to the resolver. 5394 */ 5395 linkb(areq_mp, ire_mp); 5396 linkb(areq_mp, mp); 5397 ip1dbg(("ip_newroute_v6:" 5398 "putnext to resolver\n")); 5399 putnext(dst_ill->ill_rq, areq_mp); 5400 /* 5401 * Check if another multirt route 5402 * must be resolved. 5403 */ 5404 ire = NULL; 5405 if (copy_mp != NULL) { 5406 /* 5407 * If we find a resolver, we ignore any 5408 * trailing top priority IRE_CACHE in 5409 * further loops. The reason is the 5410 * same as for noresolver. 5411 */ 5412 multirt_flags &= ~MULTIRT_CACHEGW; 5413 /* 5414 * Search for the next unresolved 5415 * multirt route. 5416 */ 5417 first_mp = copy_mp; 5418 copy_mp = NULL; 5419 mp = first_mp; 5420 if (mp->b_datap->db_type == M_CTL) { 5421 mp = mp->b_cont; 5422 } 5423 ASSERT(sire != NULL); 5424 dst = save_dst; 5425 /* 5426 * re-enter the loop 5427 */ 5428 multirt_resolve_next = B_TRUE; 5429 continue; 5430 } 5431 5432 if (sire != NULL) 5433 ire_refrele(sire); 5434 ill_refrele(dst_ill); 5435 ipif_refrele(src_ipif); 5436 return; 5437 } 5438 /* 5439 * Non-external resolver case. 5440 * 5441 * TSol note: Please see the note above the 5442 * IRE_IF_NORESOLVER case. 5443 */ 5444 ga.ga_af = AF_INET6; 5445 ga.ga_addr = dst; 5446 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5447 5448 ire = ire_create_v6( 5449 &dst, /* dest address */ 5450 &ipv6_all_ones, /* mask */ 5451 &src_ipif->ipif_v6src_addr, /* source address */ 5452 &v6gw, /* gateway address */ 5453 &save_ire->ire_max_frag, 5454 NULL, /* Fast Path header */ 5455 dst_ill->ill_rq, /* recv-from queue */ 5456 dst_ill->ill_wq, /* send-to queue */ 5457 IRE_CACHE, 5458 NULL, 5459 src_ipif, 5460 &save_ire->ire_mask_v6, /* Parent mask */ 5461 0, 5462 save_ire->ire_ihandle, /* Interface handle */ 5463 0, /* flags if any */ 5464 &(save_ire->ire_uinfo), 5465 NULL, 5466 gcgrp); 5467 5468 if (ire == NULL) { 5469 if (gcgrp != NULL) { 5470 GCGRP_REFRELE(gcgrp); 5471 gcgrp = NULL; 5472 } 5473 ire_refrele(save_ire); 5474 break; 5475 } 5476 5477 /* reference now held by IRE */ 5478 gcgrp = NULL; 5479 5480 if ((sire != NULL) && 5481 (sire->ire_flags & RTF_MULTIRT)) { 5482 copy_mp = copymsg(first_mp); 5483 if (copy_mp != NULL) 5484 MULTIRT_DEBUG_TAG(copy_mp); 5485 } 5486 5487 ire->ire_marks |= ire_marks; 5488 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5489 switch (err) { 5490 case 0: 5491 /* Prevent save_ire from getting deleted */ 5492 IRB_REFHOLD(save_ire->ire_bucket); 5493 /* Has it been removed already ? */ 5494 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5495 IRB_REFRELE(save_ire->ire_bucket); 5496 ire_refrele(save_ire); 5497 break; 5498 } 5499 5500 /* 5501 * We have a resolved cache entry, 5502 * add in the IRE. 5503 */ 5504 ire_add_then_send(q, ire, first_mp); 5505 if (ip6_asp_table_held) { 5506 ip6_asp_table_refrele(); 5507 ip6_asp_table_held = B_FALSE; 5508 } 5509 5510 /* Assert that it is not deleted yet. */ 5511 ASSERT(save_ire->ire_ptpn != NULL); 5512 IRB_REFRELE(save_ire->ire_bucket); 5513 ire_refrele(save_ire); 5514 /* 5515 * Check if another multirt route 5516 * must be resolved. 5517 */ 5518 ire = NULL; 5519 if (copy_mp != NULL) { 5520 /* 5521 * If we find a resolver, we ignore any 5522 * trailing top priority IRE_CACHE in 5523 * further loops. The reason is the 5524 * same as for noresolver. 5525 */ 5526 multirt_flags &= ~MULTIRT_CACHEGW; 5527 /* 5528 * Search for the next unresolved 5529 * multirt route. 5530 */ 5531 first_mp = copy_mp; 5532 copy_mp = NULL; 5533 mp = first_mp; 5534 if (mp->b_datap->db_type == M_CTL) { 5535 mp = mp->b_cont; 5536 } 5537 ASSERT(sire != NULL); 5538 dst = save_dst; 5539 /* 5540 * re-enter the loop 5541 */ 5542 multirt_resolve_next = B_TRUE; 5543 continue; 5544 } 5545 5546 if (sire != NULL) 5547 ire_refrele(sire); 5548 ill_refrele(dst_ill); 5549 ipif_refrele(src_ipif); 5550 return; 5551 5552 case EINPROGRESS: 5553 /* 5554 * mp was consumed - presumably queued. 5555 * No need for ire, presumably resolution is 5556 * in progress, and ire will be added when the 5557 * address is resolved. 5558 */ 5559 if (ip6_asp_table_held) { 5560 ip6_asp_table_refrele(); 5561 ip6_asp_table_held = B_FALSE; 5562 } 5563 ASSERT(ire->ire_nce == NULL); 5564 ire_delete(ire); 5565 ire_refrele(save_ire); 5566 /* 5567 * Check if another multirt route 5568 * must be resolved. 5569 */ 5570 ire = NULL; 5571 if (copy_mp != NULL) { 5572 /* 5573 * If we find a resolver, we ignore any 5574 * trailing top priority IRE_CACHE in 5575 * further loops. The reason is the 5576 * same as for noresolver. 5577 */ 5578 multirt_flags &= ~MULTIRT_CACHEGW; 5579 /* 5580 * Search for the next unresolved 5581 * multirt route. 5582 */ 5583 first_mp = copy_mp; 5584 copy_mp = NULL; 5585 mp = first_mp; 5586 if (mp->b_datap->db_type == M_CTL) { 5587 mp = mp->b_cont; 5588 } 5589 ASSERT(sire != NULL); 5590 dst = save_dst; 5591 /* 5592 * re-enter the loop 5593 */ 5594 multirt_resolve_next = B_TRUE; 5595 continue; 5596 } 5597 if (sire != NULL) 5598 ire_refrele(sire); 5599 ill_refrele(dst_ill); 5600 ipif_refrele(src_ipif); 5601 return; 5602 default: 5603 /* Some transient error */ 5604 ASSERT(ire->ire_nce == NULL); 5605 ire_refrele(save_ire); 5606 break; 5607 } 5608 break; 5609 default: 5610 break; 5611 } 5612 if (ip6_asp_table_held) { 5613 ip6_asp_table_refrele(); 5614 ip6_asp_table_held = B_FALSE; 5615 } 5616 } while (multirt_resolve_next); 5617 5618 err_ret: 5619 ip1dbg(("ip_newroute_v6: dropped\n")); 5620 if (src_ipif != NULL) 5621 ipif_refrele(src_ipif); 5622 if (dst_ill != NULL) { 5623 need_rele = B_TRUE; 5624 ill = dst_ill; 5625 } 5626 if (ill != NULL) { 5627 if (mp->b_prev != NULL) { 5628 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5629 } else { 5630 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5631 } 5632 5633 if (need_rele) 5634 ill_refrele(ill); 5635 } else { 5636 if (mp->b_prev != NULL) { 5637 BUMP_MIB(&ip6_mib, ipIfStatsInDiscards); 5638 } else { 5639 BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); 5640 } 5641 } 5642 /* Did this packet originate externally? */ 5643 if (mp->b_prev) { 5644 mp->b_next = NULL; 5645 mp->b_prev = NULL; 5646 } 5647 if (copy_mp != NULL) { 5648 MULTIRT_DEBUG_UNTAG(copy_mp); 5649 freemsg(copy_mp); 5650 } 5651 MULTIRT_DEBUG_UNTAG(first_mp); 5652 freemsg(first_mp); 5653 if (ire != NULL) 5654 ire_refrele(ire); 5655 if (sire != NULL) 5656 ire_refrele(sire); 5657 return; 5658 5659 icmp_err_ret: 5660 if (ip6_asp_table_held) 5661 ip6_asp_table_refrele(); 5662 if (src_ipif != NULL) 5663 ipif_refrele(src_ipif); 5664 if (dst_ill != NULL) { 5665 need_rele = B_TRUE; 5666 ill = dst_ill; 5667 } 5668 ip1dbg(("ip_newroute_v6: no route\n")); 5669 if (sire != NULL) 5670 ire_refrele(sire); 5671 /* 5672 * We need to set sire to NULL to avoid double freeing if we 5673 * ever goto err_ret from below. 5674 */ 5675 sire = NULL; 5676 ip6h = (ip6_t *)mp->b_rptr; 5677 /* Skip ip6i_t header if present */ 5678 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5679 /* Make sure the IPv6 header is present */ 5680 if ((mp->b_wptr - (uchar_t *)ip6h) < 5681 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5682 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5683 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5684 goto err_ret; 5685 } 5686 } 5687 mp->b_rptr += sizeof (ip6i_t); 5688 ip6h = (ip6_t *)mp->b_rptr; 5689 } 5690 /* Did this packet originate externally? */ 5691 if (mp->b_prev) { 5692 if (ill != NULL) { 5693 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5694 } else { 5695 BUMP_MIB(&ip6_mib, ipIfStatsInNoRoutes); 5696 } 5697 mp->b_next = NULL; 5698 mp->b_prev = NULL; 5699 q = WR(q); 5700 } else { 5701 if (ill != NULL) { 5702 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5703 } else { 5704 BUMP_MIB(&ip6_mib, ipIfStatsOutNoRoutes); 5705 } 5706 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5707 /* Failed */ 5708 if (copy_mp != NULL) { 5709 MULTIRT_DEBUG_UNTAG(copy_mp); 5710 freemsg(copy_mp); 5711 } 5712 MULTIRT_DEBUG_UNTAG(first_mp); 5713 freemsg(first_mp); 5714 if (ire != NULL) 5715 ire_refrele(ire); 5716 if (need_rele) 5717 ill_refrele(ill); 5718 return; 5719 } 5720 } 5721 5722 if (need_rele) 5723 ill_refrele(ill); 5724 5725 /* 5726 * At this point we will have ire only if RTF_BLACKHOLE 5727 * or RTF_REJECT flags are set on the IRE. It will not 5728 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5729 */ 5730 if (ire != NULL) { 5731 if (ire->ire_flags & RTF_BLACKHOLE) { 5732 ire_refrele(ire); 5733 if (copy_mp != NULL) { 5734 MULTIRT_DEBUG_UNTAG(copy_mp); 5735 freemsg(copy_mp); 5736 } 5737 MULTIRT_DEBUG_UNTAG(first_mp); 5738 freemsg(first_mp); 5739 return; 5740 } 5741 ire_refrele(ire); 5742 } 5743 if (ip_debug > 3) { 5744 /* ip2dbg */ 5745 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5746 AF_INET6, v6dstp); 5747 } 5748 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5749 B_FALSE, B_FALSE, zoneid); 5750 } 5751 5752 /* 5753 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5754 * we need to send out a packet to a destination address for which we do not 5755 * have specific routing information. It is only used for multicast packets. 5756 * 5757 * If unspec_src we allow creating an IRE with source address zero. 5758 * ire_send_v6() will delete it after the packet is sent. 5759 */ 5760 void 5761 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5762 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5763 { 5764 ire_t *ire = NULL; 5765 ipif_t *src_ipif = NULL; 5766 int err = 0; 5767 ill_t *dst_ill = NULL; 5768 ire_t *save_ire; 5769 ushort_t ire_marks = 0; 5770 ipsec_out_t *io; 5771 ill_t *attach_ill = NULL; 5772 ill_t *ill; 5773 ip6_t *ip6h; 5774 mblk_t *first_mp; 5775 boolean_t ip6i_present; 5776 ire_t *fire = NULL; 5777 mblk_t *copy_mp = NULL; 5778 boolean_t multirt_resolve_next; 5779 in6_addr_t *v6dstp = &v6dst; 5780 boolean_t ipif_held = B_FALSE; 5781 boolean_t ill_held = B_FALSE; 5782 boolean_t ip6_asp_table_held = B_FALSE; 5783 5784 /* 5785 * This loop is run only once in most cases. 5786 * We loop to resolve further routes only when the destination 5787 * can be reached through multiple RTF_MULTIRT-flagged ires. 5788 */ 5789 do { 5790 multirt_resolve_next = B_FALSE; 5791 if (dst_ill != NULL) { 5792 ill_refrele(dst_ill); 5793 dst_ill = NULL; 5794 } 5795 5796 if (src_ipif != NULL) { 5797 ipif_refrele(src_ipif); 5798 src_ipif = NULL; 5799 } 5800 ASSERT(ipif != NULL); 5801 ill = ipif->ipif_ill; 5802 5803 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5804 if (ip_debug > 2) { 5805 /* ip1dbg */ 5806 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5807 AF_INET6, v6dstp); 5808 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5809 ill->ill_name, ipif->ipif_isv6); 5810 } 5811 5812 first_mp = mp; 5813 if (mp->b_datap->db_type == M_CTL) { 5814 mp = mp->b_cont; 5815 io = (ipsec_out_t *)first_mp->b_rptr; 5816 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5817 } else { 5818 io = NULL; 5819 } 5820 5821 /* 5822 * If the interface is a pt-pt interface we look for an 5823 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5824 * local_address and the pt-pt destination address. 5825 * Otherwise we just match the local address. 5826 */ 5827 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5828 goto err_ret; 5829 } 5830 /* 5831 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5832 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5833 * as it could be NULL. 5834 * 5835 * This information can appear either in an ip6i_t or an 5836 * IPSEC_OUT message. 5837 */ 5838 ip6h = (ip6_t *)mp->b_rptr; 5839 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5840 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5841 if (!ip6i_present || 5842 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5843 attach_ill = ip_grab_attach_ill(ill, first_mp, 5844 (ip6i_present ? 5845 ((ip6i_t *)ip6h)->ip6i_ifindex : 5846 io->ipsec_out_ill_index), B_TRUE); 5847 /* Failure case frees things for us. */ 5848 if (attach_ill == NULL) 5849 return; 5850 5851 /* 5852 * Check if we need an ire that will not be 5853 * looked up by anybody else i.e. HIDDEN. 5854 */ 5855 if (ill_is_probeonly(attach_ill)) 5856 ire_marks = IRE_MARK_HIDDEN; 5857 } 5858 } 5859 5860 /* 5861 * We check if an IRE_OFFSUBNET for the addr that goes through 5862 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5863 * RTF_MULTIRT flags must be honored. 5864 */ 5865 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5866 ip2dbg(("ip_newroute_ipif_v6: " 5867 "ipif_lookup_multi_ire_v6(" 5868 "ipif %p, dst %08x) = fire %p\n", 5869 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5870 (void *)fire)); 5871 5872 /* 5873 * If the application specified the ill (ifindex), we still 5874 * load spread. Only if the packets needs to go out specifically 5875 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5876 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5877 * multirouting, then we don't try to use a different ill for 5878 * load spreading. 5879 */ 5880 if (attach_ill == NULL) { 5881 /* 5882 * If the interface belongs to an interface group, 5883 * make sure the next possible interface in the group 5884 * is used. This encourages load spreading among peers 5885 * in an interface group. 5886 * 5887 * Note: While we pick a dst_ill we are really only 5888 * interested in the ill for load spreading. The source 5889 * ipif is determined by source address selection below. 5890 */ 5891 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5892 dst_ill = ipif->ipif_ill; 5893 /* For uniformity do a refhold */ 5894 ill_refhold(dst_ill); 5895 } else { 5896 /* refheld by ip_newroute_get_dst_ill_v6 */ 5897 dst_ill = 5898 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5899 } 5900 if (dst_ill == NULL) { 5901 if (ip_debug > 2) { 5902 pr_addr_dbg("ip_newroute_ipif_v6: " 5903 "no dst ill for dst %s\n", 5904 AF_INET6, v6dstp); 5905 } 5906 goto err_ret; 5907 } 5908 } else { 5909 dst_ill = ipif->ipif_ill; 5910 /* 5911 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5912 * and IPV6_BOUND_PIF case. 5913 */ 5914 ASSERT(dst_ill == attach_ill); 5915 /* attach_ill is already refheld */ 5916 } 5917 /* 5918 * Pick a source address which matches the scope of the 5919 * destination address. 5920 * For RTF_SETSRC routes, the source address is imposed by the 5921 * parent ire (fire). 5922 */ 5923 ASSERT(src_ipif == NULL); 5924 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5925 /* 5926 * Check that the ipif matching the requested source 5927 * address still exists. 5928 */ 5929 src_ipif = 5930 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5931 NULL, zoneid, NULL, NULL, NULL, NULL); 5932 } 5933 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5934 ip6_asp_table_held = B_TRUE; 5935 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5936 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5937 } 5938 5939 if (src_ipif == NULL) { 5940 if (!unspec_src) { 5941 if (ip_debug > 2) { 5942 /* ip1dbg */ 5943 pr_addr_dbg("ip_newroute_ipif_v6: " 5944 "no src for dst %s\n,", 5945 AF_INET6, v6dstp); 5946 printf(" through interface %s\n", 5947 dst_ill->ill_name); 5948 } 5949 goto err_ret; 5950 } 5951 src_ipif = ipif; 5952 ipif_refhold(src_ipif); 5953 } 5954 ire = ipif_to_ire_v6(ipif); 5955 if (ire == NULL) { 5956 if (ip_debug > 2) { 5957 /* ip1dbg */ 5958 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5959 AF_INET6, &ipif->ipif_v6lcl_addr); 5960 printf("ip_newroute_ipif_v6: " 5961 "if %s\n", dst_ill->ill_name); 5962 } 5963 goto err_ret; 5964 } 5965 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5966 goto err_ret; 5967 5968 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5969 5970 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5971 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5972 if (ip_debug > 2) { 5973 /* ip1dbg */ 5974 pr_addr_dbg(" address %s\n", 5975 AF_INET6, &ire->ire_src_addr_v6); 5976 } 5977 save_ire = ire; 5978 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5979 (void *)ire, (void *)ipif)); 5980 5981 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5982 /* 5983 * an IRE_OFFSUBET was looked up 5984 * on that interface. 5985 * this ire has RTF_MULTIRT flag, 5986 * so the resolution loop 5987 * will be re-entered to resolve 5988 * additional routes on other 5989 * interfaces. For that purpose, 5990 * a copy of the packet is 5991 * made at this point. 5992 */ 5993 fire->ire_last_used_time = lbolt; 5994 copy_mp = copymsg(first_mp); 5995 if (copy_mp) { 5996 MULTIRT_DEBUG_TAG(copy_mp); 5997 } 5998 } 5999 6000 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6001 switch (ire->ire_type) { 6002 case IRE_IF_NORESOLVER: { 6003 /* We have what we need to build an IRE_CACHE. */ 6004 mblk_t *dlureq_mp; 6005 6006 /* 6007 * Create a new dlureq_mp with the 6008 * IPv6 gateway address in destination address in the 6009 * DLPI hdr if the physical length is exactly 16 bytes. 6010 */ 6011 ASSERT(dst_ill->ill_isv6); 6012 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 6013 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 6014 dst_ill->ill_phys_addr_length, 6015 dst_ill->ill_sap, 6016 dst_ill->ill_sap_length); 6017 } else { 6018 /* 6019 * handle the Gated case, where we create 6020 * a NORESOLVER route for loopback. 6021 */ 6022 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 6023 break; 6024 dlureq_mp = ill_dlur_gen(NULL, 6025 dst_ill->ill_phys_addr_length, 6026 dst_ill->ill_sap, 6027 dst_ill->ill_sap_length); 6028 } 6029 6030 if (dlureq_mp == NULL) 6031 break; 6032 /* 6033 * The newly created ire will inherit the flags of the 6034 * parent ire, if any. 6035 */ 6036 ire = ire_create_v6( 6037 v6dstp, /* dest address */ 6038 &ipv6_all_ones, /* mask */ 6039 &src_ipif->ipif_v6src_addr, /* source address */ 6040 NULL, /* gateway address */ 6041 &save_ire->ire_max_frag, 6042 NULL, /* Fast Path header */ 6043 dst_ill->ill_rq, /* recv-from queue */ 6044 dst_ill->ill_wq, /* send-to queue */ 6045 IRE_CACHE, 6046 dlureq_mp, 6047 src_ipif, 6048 NULL, 6049 (fire != NULL) ? /* Parent handle */ 6050 fire->ire_phandle : 0, 6051 save_ire->ire_ihandle, /* Interface handle */ 6052 (fire != NULL) ? 6053 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6054 0, 6055 &ire_uinfo_null, 6056 NULL, 6057 NULL); 6058 6059 freeb(dlureq_mp); 6060 6061 if (ire == NULL) { 6062 ire_refrele(save_ire); 6063 break; 6064 } 6065 6066 ire->ire_marks |= ire_marks; 6067 6068 err = ndp_noresolver(dst_ill, v6dstp); 6069 if (err != 0) { 6070 ire_refrele(save_ire); 6071 break; 6072 } 6073 6074 /* Prevent save_ire from getting deleted */ 6075 IRB_REFHOLD(save_ire->ire_bucket); 6076 /* Has it been removed already ? */ 6077 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6078 IRB_REFRELE(save_ire->ire_bucket); 6079 ire_refrele(save_ire); 6080 break; 6081 } 6082 6083 ire_add_then_send(q, ire, first_mp); 6084 if (ip6_asp_table_held) { 6085 ip6_asp_table_refrele(); 6086 ip6_asp_table_held = B_FALSE; 6087 } 6088 6089 /* Assert that it is not deleted yet. */ 6090 ASSERT(save_ire->ire_ptpn != NULL); 6091 IRB_REFRELE(save_ire->ire_bucket); 6092 ire_refrele(save_ire); 6093 if (fire != NULL) { 6094 ire_refrele(fire); 6095 fire = NULL; 6096 } 6097 6098 /* 6099 * The resolution loop is re-entered if we 6100 * actually are in a multirouting case. 6101 */ 6102 if (copy_mp != NULL) { 6103 boolean_t need_resolve = 6104 ire_multirt_need_resolve_v6(v6dstp, 6105 MBLK_GETLABEL(copy_mp)); 6106 if (!need_resolve) { 6107 MULTIRT_DEBUG_UNTAG(copy_mp); 6108 freemsg(copy_mp); 6109 copy_mp = NULL; 6110 } else { 6111 /* 6112 * ipif_lookup_group_v6() calls 6113 * ire_lookup_multi_v6() that uses 6114 * ire_ftable_lookup_v6() to find 6115 * an IRE_INTERFACE for the group. 6116 * In the multirt case, 6117 * ire_lookup_multi_v6() then invokes 6118 * ire_multirt_lookup_v6() to find 6119 * the next resolvable ire. 6120 * As a result, we obtain a new 6121 * interface, derived from the 6122 * next ire. 6123 */ 6124 if (ipif_held) { 6125 ipif_refrele(ipif); 6126 ipif_held = B_FALSE; 6127 } 6128 ipif = ipif_lookup_group_v6(v6dstp, 6129 zoneid); 6130 ip2dbg(("ip_newroute_ipif: " 6131 "multirt dst %08x, ipif %p\n", 6132 ntohl(V4_PART_OF_V6((*v6dstp))), 6133 (void *)ipif)); 6134 if (ipif != NULL) { 6135 ipif_held = B_TRUE; 6136 mp = copy_mp; 6137 copy_mp = NULL; 6138 multirt_resolve_next = 6139 B_TRUE; 6140 continue; 6141 } else { 6142 freemsg(copy_mp); 6143 } 6144 } 6145 } 6146 ill_refrele(dst_ill); 6147 if (ipif_held) { 6148 ipif_refrele(ipif); 6149 ipif_held = B_FALSE; 6150 } 6151 if (src_ipif != NULL) 6152 ipif_refrele(src_ipif); 6153 return; 6154 } 6155 case IRE_IF_RESOLVER: { 6156 6157 ASSERT(dst_ill->ill_isv6); 6158 6159 /* 6160 * We obtain a partial IRE_CACHE which we will pass 6161 * along with the resolver query. When the response 6162 * comes back it will be there ready for us to add. 6163 */ 6164 /* 6165 * the newly created ire will inherit the flags of the 6166 * parent ire, if any. 6167 */ 6168 ire = ire_create_v6( 6169 v6dstp, /* dest address */ 6170 &ipv6_all_ones, /* mask */ 6171 &src_ipif->ipif_v6src_addr, /* source address */ 6172 NULL, /* gateway address */ 6173 &save_ire->ire_max_frag, 6174 NULL, /* Fast Path header */ 6175 dst_ill->ill_rq, /* recv-from queue */ 6176 dst_ill->ill_wq, /* send-to queue */ 6177 IRE_CACHE, 6178 NULL, 6179 src_ipif, 6180 NULL, 6181 (fire != NULL) ? /* Parent handle */ 6182 fire->ire_phandle : 0, 6183 save_ire->ire_ihandle, /* Interface handle */ 6184 (fire != NULL) ? 6185 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6186 0, 6187 &ire_uinfo_null, 6188 NULL, 6189 NULL); 6190 6191 if (ire == NULL) { 6192 ire_refrele(save_ire); 6193 break; 6194 } 6195 6196 ire->ire_marks |= ire_marks; 6197 6198 /* Resolve and add ire to the ctable */ 6199 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6200 switch (err) { 6201 case 0: 6202 /* Prevent save_ire from getting deleted */ 6203 IRB_REFHOLD(save_ire->ire_bucket); 6204 /* Has it been removed already ? */ 6205 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6206 IRB_REFRELE(save_ire->ire_bucket); 6207 ire_refrele(save_ire); 6208 break; 6209 } 6210 /* 6211 * We have a resolved cache entry, 6212 * add in the IRE. 6213 */ 6214 ire_add_then_send(q, ire, first_mp); 6215 if (ip6_asp_table_held) { 6216 ip6_asp_table_refrele(); 6217 ip6_asp_table_held = B_FALSE; 6218 } 6219 6220 /* Assert that it is not deleted yet. */ 6221 ASSERT(save_ire->ire_ptpn != NULL); 6222 IRB_REFRELE(save_ire->ire_bucket); 6223 ire_refrele(save_ire); 6224 if (fire != NULL) { 6225 ire_refrele(fire); 6226 fire = NULL; 6227 } 6228 6229 /* 6230 * The resolution loop is re-entered if we 6231 * actually are in a multirouting case. 6232 */ 6233 if (copy_mp != NULL) { 6234 boolean_t need_resolve = 6235 ire_multirt_need_resolve_v6(v6dstp, 6236 MBLK_GETLABEL(copy_mp)); 6237 if (!need_resolve) { 6238 MULTIRT_DEBUG_UNTAG(copy_mp); 6239 freemsg(copy_mp); 6240 copy_mp = NULL; 6241 } else { 6242 /* 6243 * ipif_lookup_group_v6() calls 6244 * ire_lookup_multi_v6() that 6245 * uses ire_ftable_lookup_v6() 6246 * to find an IRE_INTERFACE for 6247 * the group. In the multirt 6248 * case, ire_lookup_multi_v6() 6249 * then invokes 6250 * ire_multirt_lookup_v6() to 6251 * find the next resolvable ire. 6252 * As a result, we obtain a new 6253 * interface, derived from the 6254 * next ire. 6255 */ 6256 if (ipif_held) { 6257 ipif_refrele(ipif); 6258 ipif_held = B_FALSE; 6259 } 6260 ipif = ipif_lookup_group_v6( 6261 v6dstp, zoneid); 6262 ip2dbg(("ip_newroute_ipif: " 6263 "multirt dst %08x, " 6264 "ipif %p\n", 6265 ntohl(V4_PART_OF_V6( 6266 (*v6dstp))), 6267 (void *)ipif)); 6268 if (ipif != NULL) { 6269 ipif_held = B_TRUE; 6270 mp = copy_mp; 6271 copy_mp = NULL; 6272 multirt_resolve_next = 6273 B_TRUE; 6274 continue; 6275 } else { 6276 freemsg(copy_mp); 6277 } 6278 } 6279 } 6280 ill_refrele(dst_ill); 6281 if (ipif_held) { 6282 ipif_refrele(ipif); 6283 ipif_held = B_FALSE; 6284 } 6285 if (src_ipif != NULL) 6286 ipif_refrele(src_ipif); 6287 return; 6288 6289 case EINPROGRESS: 6290 /* 6291 * mp was consumed - presumably queued. 6292 * No need for ire, presumably resolution is 6293 * in progress, and ire will be added when the 6294 * address is resolved. 6295 */ 6296 if (ip6_asp_table_held) { 6297 ip6_asp_table_refrele(); 6298 ip6_asp_table_held = B_FALSE; 6299 } 6300 ire_delete(ire); 6301 ire_refrele(save_ire); 6302 if (fire != NULL) { 6303 ire_refrele(fire); 6304 fire = NULL; 6305 } 6306 6307 /* 6308 * The resolution loop is re-entered if we 6309 * actually are in a multirouting case. 6310 */ 6311 if (copy_mp != NULL) { 6312 boolean_t need_resolve = 6313 ire_multirt_need_resolve_v6(v6dstp, 6314 MBLK_GETLABEL(copy_mp)); 6315 if (!need_resolve) { 6316 MULTIRT_DEBUG_UNTAG(copy_mp); 6317 freemsg(copy_mp); 6318 copy_mp = NULL; 6319 } else { 6320 /* 6321 * ipif_lookup_group_v6() calls 6322 * ire_lookup_multi_v6() that 6323 * uses ire_ftable_lookup_v6() 6324 * to find an IRE_INTERFACE for 6325 * the group. In the multirt 6326 * case, ire_lookup_multi_v6() 6327 * then invokes 6328 * ire_multirt_lookup_v6() to 6329 * find the next resolvable ire. 6330 * As a result, we obtain a new 6331 * interface, derived from the 6332 * next ire. 6333 */ 6334 if (ipif_held) { 6335 ipif_refrele(ipif); 6336 ipif_held = B_FALSE; 6337 } 6338 ipif = ipif_lookup_group_v6( 6339 v6dstp, zoneid); 6340 ip2dbg(("ip_newroute_ipif: " 6341 "multirt dst %08x, " 6342 "ipif %p\n", 6343 ntohl(V4_PART_OF_V6( 6344 (*v6dstp))), 6345 (void *)ipif)); 6346 if (ipif != NULL) { 6347 ipif_held = B_TRUE; 6348 mp = copy_mp; 6349 copy_mp = NULL; 6350 multirt_resolve_next = 6351 B_TRUE; 6352 continue; 6353 } else { 6354 freemsg(copy_mp); 6355 } 6356 } 6357 } 6358 ill_refrele(dst_ill); 6359 if (ipif_held) { 6360 ipif_refrele(ipif); 6361 ipif_held = B_FALSE; 6362 } 6363 if (src_ipif != NULL) 6364 ipif_refrele(src_ipif); 6365 return; 6366 default: 6367 /* Some transient error */ 6368 ire_refrele(save_ire); 6369 break; 6370 } 6371 break; 6372 } 6373 default: 6374 break; 6375 } 6376 if (ip6_asp_table_held) { 6377 ip6_asp_table_refrele(); 6378 ip6_asp_table_held = B_FALSE; 6379 } 6380 } while (multirt_resolve_next); 6381 6382 err_ret: 6383 if (ip6_asp_table_held) 6384 ip6_asp_table_refrele(); 6385 if (ire != NULL) 6386 ire_refrele(ire); 6387 if (fire != NULL) 6388 ire_refrele(fire); 6389 if (ipif != NULL && ipif_held) 6390 ipif_refrele(ipif); 6391 if (src_ipif != NULL) 6392 ipif_refrele(src_ipif); 6393 /* Multicast - no point in trying to generate ICMP error */ 6394 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6395 if (dst_ill != NULL) { 6396 ill = dst_ill; 6397 ill_held = B_TRUE; 6398 } 6399 if (mp->b_prev || mp->b_next) { 6400 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6401 } else { 6402 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6403 } 6404 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6405 mp->b_next = NULL; 6406 mp->b_prev = NULL; 6407 freemsg(first_mp); 6408 if (ill_held) 6409 ill_refrele(ill); 6410 } 6411 6412 /* 6413 * Parse and process any hop-by-hop or destination options. 6414 * 6415 * Assumes that q is an ill read queue so that ICMP errors for link-local 6416 * destinations are sent out the correct interface. 6417 * 6418 * Returns -1 if there was an error and mp has been consumed. 6419 * Returns 0 if no special action is needed. 6420 * Returns 1 if the packet contained a router alert option for this node 6421 * which is verified to be "interesting/known" for our implementation. 6422 * 6423 * XXX Note: In future as more hbh or dest options are defined, 6424 * it may be better to have different routines for hbh and dest 6425 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6426 * may have same value in different namespaces. Or is it same namespace ?? 6427 * Current code checks for each opt_type (other than pads) if it is in 6428 * the expected nexthdr (hbh or dest) 6429 */ 6430 static int 6431 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6432 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6433 { 6434 uint8_t opt_type; 6435 uint_t optused; 6436 int ret = 0; 6437 mblk_t *first_mp; 6438 const char *errtype; 6439 zoneid_t zoneid; 6440 ill_t *ill = q->q_ptr; 6441 6442 first_mp = mp; 6443 if (mp->b_datap->db_type == M_CTL) { 6444 mp = mp->b_cont; 6445 } 6446 6447 while (optlen != 0) { 6448 opt_type = *optptr; 6449 if (opt_type == IP6OPT_PAD1) { 6450 optused = 1; 6451 } else { 6452 if (optlen < 2) 6453 goto bad_opt; 6454 errtype = "malformed"; 6455 if (opt_type == ip6opt_ls) { 6456 optused = 2 + optptr[1]; 6457 if (optused > optlen) 6458 goto bad_opt; 6459 } else switch (opt_type) { 6460 case IP6OPT_PADN: 6461 /* 6462 * Note:We don't verify that (N-2) pad octets 6463 * are zero as required by spec. Adhere to 6464 * "be liberal in what you accept..." part of 6465 * implementation philosophy (RFC791,RFC1122) 6466 */ 6467 optused = 2 + optptr[1]; 6468 if (optused > optlen) 6469 goto bad_opt; 6470 break; 6471 6472 case IP6OPT_JUMBO: 6473 if (hdr_type != IPPROTO_HOPOPTS) 6474 goto opt_error; 6475 goto opt_error; /* XXX Not implemented! */ 6476 6477 case IP6OPT_ROUTER_ALERT: { 6478 struct ip6_opt_router *or; 6479 6480 if (hdr_type != IPPROTO_HOPOPTS) 6481 goto opt_error; 6482 optused = 2 + optptr[1]; 6483 if (optused > optlen) 6484 goto bad_opt; 6485 or = (struct ip6_opt_router *)optptr; 6486 /* Check total length and alignment */ 6487 if (optused != sizeof (*or) || 6488 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6489 goto opt_error; 6490 /* Check value */ 6491 switch (*((uint16_t *)or->ip6or_value)) { 6492 case IP6_ALERT_MLD: 6493 case IP6_ALERT_RSVP: 6494 ret = 1; 6495 } 6496 break; 6497 } 6498 case IP6OPT_HOME_ADDRESS: { 6499 /* 6500 * Minimal support for the home address option 6501 * (which is required by all IPv6 nodes). 6502 * Implement by just swapping the home address 6503 * and source address. 6504 * XXX Note: this has IPsec implications since 6505 * AH needs to take this into account. 6506 * Also, when IPsec is used we need to ensure 6507 * that this is only processed once 6508 * in the received packet (to avoid swapping 6509 * back and forth). 6510 * NOTE:This option processing is considered 6511 * to be unsafe and prone to a denial of 6512 * service attack. 6513 * The current processing is not safe even with 6514 * IPsec secured IP packets. Since the home 6515 * address option processing requirement still 6516 * is in the IETF draft and in the process of 6517 * being redefined for its usage, it has been 6518 * decided to turn off the option by default. 6519 * If this section of code needs to be executed, 6520 * ndd variable ip6_ignore_home_address_opt 6521 * should be set to 0 at the user's own risk. 6522 */ 6523 struct ip6_opt_home_address *oh; 6524 in6_addr_t tmp; 6525 6526 if (ipv6_ignore_home_address_opt) 6527 goto opt_error; 6528 6529 if (hdr_type != IPPROTO_DSTOPTS) 6530 goto opt_error; 6531 optused = 2 + optptr[1]; 6532 if (optused > optlen) 6533 goto bad_opt; 6534 6535 /* 6536 * We did this dest. opt the first time 6537 * around (i.e. before AH processing). 6538 * If we've done AH... stop now. 6539 */ 6540 if (first_mp != mp) { 6541 ipsec_in_t *ii; 6542 6543 ii = (ipsec_in_t *)first_mp->b_rptr; 6544 if (ii->ipsec_in_ah_sa != NULL) 6545 break; 6546 } 6547 6548 oh = (struct ip6_opt_home_address *)optptr; 6549 /* Check total length and alignment */ 6550 if (optused < sizeof (*oh) || 6551 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6552 goto opt_error; 6553 /* Swap ip6_src and the home address */ 6554 tmp = ip6h->ip6_src; 6555 /* XXX Note: only 8 byte alignment option */ 6556 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6557 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6558 break; 6559 } 6560 6561 case IP6OPT_TUNNEL_LIMIT: 6562 if (hdr_type != IPPROTO_DSTOPTS) { 6563 goto opt_error; 6564 } 6565 optused = 2 + optptr[1]; 6566 if (optused > optlen) { 6567 goto bad_opt; 6568 } 6569 if (optused != 3) { 6570 goto opt_error; 6571 } 6572 break; 6573 6574 default: 6575 errtype = "unknown"; 6576 /* FALLTHROUGH */ 6577 opt_error: 6578 /* Determine which zone should send error */ 6579 zoneid = ipif_lookup_addr_zoneid_v6( 6580 &ip6h->ip6_dst, ill); 6581 switch (IP6OPT_TYPE(opt_type)) { 6582 case IP6OPT_TYPE_SKIP: 6583 optused = 2 + optptr[1]; 6584 if (optused > optlen) 6585 goto bad_opt; 6586 ip1dbg(("ip_process_options_v6: %s " 6587 "opt 0x%x skipped\n", 6588 errtype, opt_type)); 6589 break; 6590 case IP6OPT_TYPE_DISCARD: 6591 ip1dbg(("ip_process_options_v6: %s " 6592 "opt 0x%x; packet dropped\n", 6593 errtype, opt_type)); 6594 freemsg(first_mp); 6595 return (-1); 6596 case IP6OPT_TYPE_ICMP: 6597 if (zoneid == ALL_ZONES) { 6598 freemsg(first_mp); 6599 return (-1); 6600 } 6601 icmp_param_problem_v6(WR(q), first_mp, 6602 ICMP6_PARAMPROB_OPTION, 6603 (uint32_t)(optptr - 6604 (uint8_t *)ip6h), 6605 B_FALSE, B_FALSE, zoneid); 6606 return (-1); 6607 case IP6OPT_TYPE_FORCEICMP: 6608 if (zoneid == ALL_ZONES) { 6609 freemsg(first_mp); 6610 return (-1); 6611 } 6612 icmp_param_problem_v6(WR(q), first_mp, 6613 ICMP6_PARAMPROB_OPTION, 6614 (uint32_t)(optptr - 6615 (uint8_t *)ip6h), 6616 B_FALSE, B_TRUE, zoneid); 6617 return (-1); 6618 default: 6619 ASSERT(0); 6620 } 6621 } 6622 } 6623 optlen -= optused; 6624 optptr += optused; 6625 } 6626 return (ret); 6627 6628 bad_opt: 6629 /* Determine which zone should send error */ 6630 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 6631 if (zoneid == ALL_ZONES) { 6632 freemsg(first_mp); 6633 } else { 6634 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6635 (uint32_t)(optptr - (uint8_t *)ip6h), 6636 B_FALSE, B_FALSE, zoneid); 6637 } 6638 return (-1); 6639 } 6640 6641 /* 6642 * Process a routing header that is not yet empty. 6643 * Only handles type 0 routing headers. 6644 */ 6645 static void 6646 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6647 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6648 { 6649 ip6_rthdr0_t *rthdr; 6650 uint_t ehdrlen; 6651 uint_t numaddr; 6652 in6_addr_t *addrptr; 6653 in6_addr_t tmp; 6654 6655 ASSERT(rth->ip6r_segleft != 0); 6656 6657 if (!ipv6_forward_src_routed) { 6658 /* XXX Check for source routed out same interface? */ 6659 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6660 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6661 freemsg(hada_mp); 6662 freemsg(mp); 6663 return; 6664 } 6665 6666 if (rth->ip6r_type != 0) { 6667 if (hada_mp != NULL) 6668 goto hada_drop; 6669 /* Sent by forwarding path, and router is global zone */ 6670 icmp_param_problem_v6(WR(q), mp, 6671 ICMP6_PARAMPROB_HEADER, 6672 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6673 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6674 return; 6675 } 6676 rthdr = (ip6_rthdr0_t *)rth; 6677 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6678 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6679 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6680 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6681 if (rthdr->ip6r0_len & 0x1) { 6682 /* An odd length is impossible */ 6683 if (hada_mp != NULL) 6684 goto hada_drop; 6685 /* Sent by forwarding path, and router is global zone */ 6686 icmp_param_problem_v6(WR(q), mp, 6687 ICMP6_PARAMPROB_HEADER, 6688 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6689 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6690 return; 6691 } 6692 numaddr = rthdr->ip6r0_len / 2; 6693 if (rthdr->ip6r0_segleft > numaddr) { 6694 /* segleft exceeds number of addresses in routing header */ 6695 if (hada_mp != NULL) 6696 goto hada_drop; 6697 /* Sent by forwarding path, and router is global zone */ 6698 icmp_param_problem_v6(WR(q), mp, 6699 ICMP6_PARAMPROB_HEADER, 6700 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6701 (uchar_t *)ip6h), 6702 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6703 return; 6704 } 6705 addrptr += (numaddr - rthdr->ip6r0_segleft); 6706 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6707 IN6_IS_ADDR_MULTICAST(addrptr)) { 6708 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6709 freemsg(hada_mp); 6710 freemsg(mp); 6711 return; 6712 } 6713 /* Swap */ 6714 tmp = *addrptr; 6715 *addrptr = ip6h->ip6_dst; 6716 ip6h->ip6_dst = tmp; 6717 rthdr->ip6r0_segleft--; 6718 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6719 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6720 if (hada_mp != NULL) 6721 goto hada_drop; 6722 /* Sent by forwarding path, and router is global zone */ 6723 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6724 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6725 return; 6726 } 6727 if (ip_check_v6_mblk(mp, ill) == 0) { 6728 ip6h = (ip6_t *)mp->b_rptr; 6729 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6730 } 6731 return; 6732 hada_drop: 6733 /* IPsec kstats: bean counter? */ 6734 freemsg(hada_mp); 6735 freemsg(mp); 6736 } 6737 6738 /* 6739 * Read side put procedure for IPv6 module. 6740 */ 6741 void 6742 ip_rput_v6(queue_t *q, mblk_t *mp) 6743 { 6744 mblk_t *first_mp; 6745 mblk_t *hada_mp = NULL; 6746 ip6_t *ip6h; 6747 boolean_t ll_multicast = B_FALSE; 6748 boolean_t mctl_present = B_FALSE; 6749 ill_t *ill; 6750 struct iocblk *iocp; 6751 uint_t flags = 0; 6752 mblk_t *dl_mp; 6753 6754 ill = (ill_t *)q->q_ptr; 6755 if (ill->ill_state_flags & ILL_CONDEMNED) { 6756 union DL_primitives *dl; 6757 6758 dl = (union DL_primitives *)mp->b_rptr; 6759 /* 6760 * Things are opening or closing - only accept DLPI 6761 * ack messages. If the stream is closing and ip_wsrv 6762 * has completed, ip_close is out of the qwait, but has 6763 * not yet completed qprocsoff. Don't proceed any further 6764 * because the ill has been cleaned up and things hanging 6765 * off the ill have been freed. 6766 */ 6767 if ((mp->b_datap->db_type != M_PCPROTO) || 6768 (dl->dl_primitive == DL_UNITDATA_IND)) { 6769 inet_freemsg(mp); 6770 return; 6771 } 6772 } 6773 6774 dl_mp = NULL; 6775 switch (mp->b_datap->db_type) { 6776 case M_DATA: { 6777 int hlen; 6778 uchar_t *ucp; 6779 struct ether_header *eh; 6780 dl_unitdata_ind_t *dui; 6781 6782 /* 6783 * This is a work-around for CR 6451644, a bug in Nemo. It 6784 * should be removed when that problem is fixed. 6785 */ 6786 if (ill->ill_mactype == DL_ETHER && 6787 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6788 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6789 ucp[-2] == (IP6_DL_SAP >> 8)) { 6790 if (hlen >= sizeof (struct ether_vlan_header) && 6791 ucp[-5] == 0 && ucp[-6] == 0x81) 6792 ucp -= sizeof (struct ether_vlan_header); 6793 else 6794 ucp -= sizeof (struct ether_header); 6795 /* 6796 * If it's a group address, then fabricate a 6797 * DL_UNITDATA_IND message. 6798 */ 6799 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6800 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6801 BPRI_HI)) != NULL) { 6802 eh = (struct ether_header *)ucp; 6803 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6804 DB_TYPE(dl_mp) = M_PROTO; 6805 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6806 dui->dl_primitive = DL_UNITDATA_IND; 6807 dui->dl_dest_addr_length = 8; 6808 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6809 dui->dl_src_addr_length = 8; 6810 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6811 8; 6812 dui->dl_group_address = 1; 6813 ucp = (uchar_t *)(dui + 1); 6814 if (ill->ill_sap_length > 0) 6815 ucp += ill->ill_sap_length; 6816 bcopy(&eh->ether_dhost, ucp, 6); 6817 bcopy(&eh->ether_shost, ucp + 8, 6); 6818 ucp = (uchar_t *)(dui + 1); 6819 if (ill->ill_sap_length < 0) 6820 ucp += 8 + ill->ill_sap_length; 6821 bcopy(&eh->ether_type, ucp, 2); 6822 bcopy(&eh->ether_type, ucp + 8, 2); 6823 } 6824 } 6825 break; 6826 } 6827 6828 case M_PROTO: 6829 case M_PCPROTO: 6830 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6831 DL_UNITDATA_IND) { 6832 /* Go handle anything other than data elsewhere. */ 6833 ip_rput_dlpi(q, mp); 6834 return; 6835 } 6836 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6837 ll_multicast = dlur->dl_group_address; 6838 #undef dlur 6839 /* Save the DLPI header. */ 6840 dl_mp = mp; 6841 mp = mp->b_cont; 6842 dl_mp->b_cont = NULL; 6843 break; 6844 case M_BREAK: 6845 panic("ip_rput_v6: got an M_BREAK"); 6846 /*NOTREACHED*/ 6847 case M_IOCACK: 6848 iocp = (struct iocblk *)mp->b_rptr; 6849 switch (iocp->ioc_cmd) { 6850 case DL_IOC_HDR_INFO: 6851 ill = (ill_t *)q->q_ptr; 6852 ill_fastpath_ack(ill, mp); 6853 return; 6854 case SIOCSTUNPARAM: 6855 case SIOCGTUNPARAM: 6856 case OSIOCSTUNPARAM: 6857 case OSIOCGTUNPARAM: 6858 /* Go through qwriter */ 6859 break; 6860 default: 6861 putnext(q, mp); 6862 return; 6863 } 6864 /* FALLTHRU */ 6865 case M_ERROR: 6866 case M_HANGUP: 6867 mutex_enter(&ill->ill_lock); 6868 if (ill->ill_state_flags & ILL_CONDEMNED) { 6869 mutex_exit(&ill->ill_lock); 6870 freemsg(mp); 6871 return; 6872 } 6873 ill_refhold_locked(ill); 6874 mutex_exit(&ill->ill_lock); 6875 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6876 return; 6877 case M_CTL: 6878 if ((MBLKL(mp) > sizeof (int)) && 6879 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6880 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6881 mctl_present = B_TRUE; 6882 break; 6883 } 6884 putnext(q, mp); 6885 return; 6886 case M_IOCNAK: 6887 iocp = (struct iocblk *)mp->b_rptr; 6888 switch (iocp->ioc_cmd) { 6889 case DL_IOC_HDR_INFO: 6890 case SIOCSTUNPARAM: 6891 case SIOCGTUNPARAM: 6892 case OSIOCSTUNPARAM: 6893 case OSIOCGTUNPARAM: 6894 mutex_enter(&ill->ill_lock); 6895 if (ill->ill_state_flags & ILL_CONDEMNED) { 6896 mutex_exit(&ill->ill_lock); 6897 freemsg(mp); 6898 return; 6899 } 6900 ill_refhold_locked(ill); 6901 mutex_exit(&ill->ill_lock); 6902 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6903 B_FALSE); 6904 return; 6905 default: 6906 break; 6907 } 6908 /* FALLTHRU */ 6909 default: 6910 putnext(q, mp); 6911 return; 6912 } 6913 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6914 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6915 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6916 /* 6917 * if db_ref > 1 then copymsg and free original. Packet may be 6918 * changed and do not want other entity who has a reference to this 6919 * message to trip over the changes. This is a blind change because 6920 * trying to catch all places that might change packet is too 6921 * difficult (since it may be a module above this one). 6922 */ 6923 if (mp->b_datap->db_ref > 1) { 6924 mblk_t *mp1; 6925 6926 mp1 = copymsg(mp); 6927 freemsg(mp); 6928 if (mp1 == NULL) { 6929 first_mp = NULL; 6930 goto discard; 6931 } 6932 mp = mp1; 6933 } 6934 first_mp = mp; 6935 if (mctl_present) { 6936 hada_mp = first_mp; 6937 mp = first_mp->b_cont; 6938 } 6939 6940 if (ip_check_v6_mblk(mp, ill) == -1) 6941 return; 6942 6943 ip6h = (ip6_t *)mp->b_rptr; 6944 6945 DTRACE_PROBE4(ip6__physical__in__start, 6946 ill_t *, ill, ill_t *, NULL, 6947 ip6_t *, ip6h, mblk_t *, first_mp); 6948 6949 FW_HOOKS6(ip6_physical_in_event, ipv6firewall_physical_in, 6950 ill, NULL, ip6h, first_mp, mp); 6951 6952 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6953 6954 if (first_mp == NULL) 6955 return; 6956 6957 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6958 IPV6_DEFAULT_VERS_AND_FLOW) { 6959 /* 6960 * It may be a bit too expensive to do this mapped address 6961 * check here, but in the interest of robustness, it seems 6962 * like the correct place. 6963 * TODO: Avoid this check for e.g. connected TCP sockets 6964 */ 6965 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6966 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6967 goto discard; 6968 } 6969 6970 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6971 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6972 goto discard; 6973 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6974 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6975 goto discard; 6976 } 6977 6978 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6979 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6980 } else { 6981 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6982 goto discard; 6983 } 6984 freemsg(dl_mp); 6985 return; 6986 6987 discard: 6988 if (dl_mp != NULL) 6989 freeb(dl_mp); 6990 freemsg(first_mp); 6991 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6992 } 6993 6994 /* 6995 * Walk through the IPv6 packet in mp and see if there's an AH header 6996 * in it. See if the AH header needs to get done before other headers in 6997 * the packet. (Worker function for ipsec_early_ah_v6().) 6998 */ 6999 #define IPSEC_HDR_DONT_PROCESS 0 7000 #define IPSEC_HDR_PROCESS 1 7001 #define IPSEC_MEMORY_ERROR 2 7002 static int 7003 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 7004 { 7005 uint_t length; 7006 uint_t ehdrlen; 7007 uint8_t *whereptr; 7008 uint8_t *endptr; 7009 uint8_t *nexthdrp; 7010 ip6_dest_t *desthdr; 7011 ip6_rthdr_t *rthdr; 7012 ip6_t *ip6h; 7013 7014 /* 7015 * For now just pullup everything. In general, the less pullups, 7016 * the better, but there's so much squirrelling through anyway, 7017 * it's just easier this way. 7018 */ 7019 if (!pullupmsg(mp, -1)) { 7020 return (IPSEC_MEMORY_ERROR); 7021 } 7022 7023 ip6h = (ip6_t *)mp->b_rptr; 7024 length = IPV6_HDR_LEN; 7025 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 7026 endptr = mp->b_wptr; 7027 7028 /* 7029 * We can't just use the argument nexthdr in the place 7030 * of nexthdrp becaue we don't dereference nexthdrp 7031 * till we confirm whether it is a valid address. 7032 */ 7033 nexthdrp = &ip6h->ip6_nxt; 7034 while (whereptr < endptr) { 7035 /* Is there enough left for len + nexthdr? */ 7036 if (whereptr + MIN_EHDR_LEN > endptr) 7037 return (IPSEC_MEMORY_ERROR); 7038 7039 switch (*nexthdrp) { 7040 case IPPROTO_HOPOPTS: 7041 case IPPROTO_DSTOPTS: 7042 /* Assumes the headers are identical for hbh and dst */ 7043 desthdr = (ip6_dest_t *)whereptr; 7044 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7045 if ((uchar_t *)desthdr + ehdrlen > endptr) 7046 return (IPSEC_MEMORY_ERROR); 7047 /* 7048 * Return DONT_PROCESS because of potential Mobile IPv6 7049 * cruft for destination options. 7050 */ 7051 if (*nexthdrp == IPPROTO_DSTOPTS) 7052 return (IPSEC_HDR_DONT_PROCESS); 7053 nexthdrp = &desthdr->ip6d_nxt; 7054 break; 7055 case IPPROTO_ROUTING: 7056 rthdr = (ip6_rthdr_t *)whereptr; 7057 7058 /* 7059 * If there's more hops left on the routing header, 7060 * return now with DON'T PROCESS. 7061 */ 7062 if (rthdr->ip6r_segleft > 0) 7063 return (IPSEC_HDR_DONT_PROCESS); 7064 7065 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7066 if ((uchar_t *)rthdr + ehdrlen > endptr) 7067 return (IPSEC_MEMORY_ERROR); 7068 nexthdrp = &rthdr->ip6r_nxt; 7069 break; 7070 case IPPROTO_FRAGMENT: 7071 /* Wait for reassembly */ 7072 return (IPSEC_HDR_DONT_PROCESS); 7073 case IPPROTO_AH: 7074 *nexthdr = IPPROTO_AH; 7075 return (IPSEC_HDR_PROCESS); 7076 case IPPROTO_NONE: 7077 /* No next header means we're finished */ 7078 default: 7079 return (IPSEC_HDR_DONT_PROCESS); 7080 } 7081 length += ehdrlen; 7082 whereptr += ehdrlen; 7083 } 7084 panic("ipsec_needs_processing_v6"); 7085 /*NOTREACHED*/ 7086 } 7087 7088 /* 7089 * Path for AH if options are present. If this is the first time we are 7090 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7091 * Otherwise, just fanout. Return value answers the boolean question: 7092 * "Did I consume the mblk you sent me?" 7093 * 7094 * Sometimes AH needs to be done before other IPv6 headers for security 7095 * reasons. This function (and its ipsec_needs_processing_v6() above) 7096 * indicates if that is so, and fans out to the appropriate IPsec protocol 7097 * for the datagram passed in. 7098 */ 7099 static boolean_t 7100 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7101 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 7102 { 7103 mblk_t *mp; 7104 uint8_t nexthdr; 7105 ipsec_in_t *ii = NULL; 7106 ah_t *ah; 7107 ipsec_status_t ipsec_rc; 7108 7109 ASSERT((hada_mp == NULL) || (!mctl_present)); 7110 7111 switch (ipsec_needs_processing_v6( 7112 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7113 case IPSEC_MEMORY_ERROR: 7114 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7115 freemsg(hada_mp); 7116 freemsg(first_mp); 7117 return (B_TRUE); 7118 case IPSEC_HDR_DONT_PROCESS: 7119 return (B_FALSE); 7120 } 7121 7122 /* Default means send it to AH! */ 7123 ASSERT(nexthdr == IPPROTO_AH); 7124 if (!mctl_present) { 7125 mp = first_mp; 7126 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 7127 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7128 "allocation failure.\n")); 7129 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7130 freemsg(hada_mp); 7131 freemsg(mp); 7132 return (B_TRUE); 7133 } 7134 /* 7135 * Store the ill_index so that when we come back 7136 * from IPSEC we ride on the same queue. 7137 */ 7138 ii = (ipsec_in_t *)first_mp->b_rptr; 7139 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7140 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7141 first_mp->b_cont = mp; 7142 } 7143 /* 7144 * Cache hardware acceleration info. 7145 */ 7146 if (hada_mp != NULL) { 7147 ASSERT(ii != NULL); 7148 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7149 "caching data attr.\n")); 7150 ii->ipsec_in_accelerated = B_TRUE; 7151 ii->ipsec_in_da = hada_mp; 7152 } 7153 7154 if (!ipsec_loaded()) { 7155 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 7156 return (B_TRUE); 7157 } 7158 7159 ah = ipsec_inbound_ah_sa(first_mp); 7160 if (ah == NULL) 7161 return (B_TRUE); 7162 ASSERT(ii->ipsec_in_ah_sa != NULL); 7163 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7164 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7165 7166 switch (ipsec_rc) { 7167 case IPSEC_STATUS_SUCCESS: 7168 /* we're done with IPsec processing, send it up */ 7169 ip_fanout_proto_again(first_mp, ill, ill, ire); 7170 break; 7171 case IPSEC_STATUS_FAILED: 7172 BUMP_MIB(&ip6_mib, ipIfStatsInDiscards); 7173 break; 7174 case IPSEC_STATUS_PENDING: 7175 /* no action needed */ 7176 break; 7177 } 7178 return (B_TRUE); 7179 } 7180 7181 /* 7182 * Validate the IPv6 mblk for alignment. 7183 */ 7184 int 7185 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7186 { 7187 int pkt_len, ip6_len; 7188 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7189 7190 /* check for alignment and full IPv6 header */ 7191 if (!OK_32PTR((uchar_t *)ip6h) || 7192 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7193 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7194 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7195 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7196 freemsg(mp); 7197 return (-1); 7198 } 7199 ip6h = (ip6_t *)mp->b_rptr; 7200 } 7201 7202 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7203 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7204 7205 if (mp->b_cont == NULL) 7206 pkt_len = mp->b_wptr - mp->b_rptr; 7207 else 7208 pkt_len = msgdsize(mp); 7209 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7210 7211 /* 7212 * Check for bogus (too short packet) and packet which 7213 * was padded by the link layer. 7214 */ 7215 if (ip6_len != pkt_len) { 7216 ssize_t diff; 7217 7218 if (ip6_len > pkt_len) { 7219 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7220 ip6_len, pkt_len)); 7221 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7222 freemsg(mp); 7223 return (-1); 7224 } 7225 diff = (ssize_t)(pkt_len - ip6_len); 7226 7227 if (!adjmsg(mp, -diff)) { 7228 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7229 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7230 freemsg(mp); 7231 return (-1); 7232 } 7233 } 7234 return (0); 7235 } 7236 7237 /* 7238 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7239 * ip_rput_v6 has already verified alignment, the min length, the version, 7240 * and db_ref = 1. 7241 * 7242 * The ill passed in (the arg named inill) is the ill that the packet 7243 * actually arrived on. We need to remember this when saving the 7244 * input interface index into potential IPV6_PKTINFO data in 7245 * ip_add_info_v6(). 7246 * 7247 * This routine doesn't free dl_mp; that's the caller's responsibility on 7248 * return. (Note that the callers are complex enough that there's no tail 7249 * recursion here anyway.) 7250 */ 7251 void 7252 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7253 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7254 { 7255 ire_t *ire = NULL; 7256 queue_t *rq; 7257 ill_t *ill = inill; 7258 ill_t *outill; 7259 ipif_t *ipif; 7260 uint8_t *whereptr; 7261 uint8_t nexthdr; 7262 uint16_t remlen; 7263 uint_t prev_nexthdr_offset; 7264 uint_t used; 7265 size_t pkt_len; 7266 uint16_t ip6_len; 7267 uint_t hdr_len; 7268 boolean_t mctl_present; 7269 mblk_t *first_mp; 7270 mblk_t *first_mp1; 7271 boolean_t no_forward; 7272 ip6_hbh_t *hbhhdr; 7273 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7274 conn_t *connp; 7275 ilm_t *ilm; 7276 uint32_t ports; 7277 uint_t ipif_id = 0; 7278 zoneid_t zoneid = GLOBAL_ZONEID; 7279 uint16_t hck_flags, reass_hck_flags; 7280 uint32_t reass_sum; 7281 boolean_t cksum_err; 7282 mblk_t *mp1; 7283 7284 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7285 7286 if (hada_mp != NULL) { 7287 /* 7288 * It's an IPsec accelerated packet. 7289 * Keep a pointer to the data attributes around until 7290 * we allocate the ipsecinfo structure. 7291 */ 7292 IPSECHW_DEBUG(IPSECHW_PKT, 7293 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7294 hada_mp->b_cont = NULL; 7295 /* 7296 * Since it is accelerated, it came directly from 7297 * the ill. 7298 */ 7299 ASSERT(mctl_present == B_FALSE); 7300 ASSERT(mp->b_datap->db_type != M_CTL); 7301 } 7302 7303 ip6h = (ip6_t *)mp->b_rptr; 7304 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7305 pkt_len = ip6_len; 7306 7307 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7308 hck_flags = DB_CKSUMFLAGS(mp); 7309 else 7310 hck_flags = 0; 7311 7312 /* Clear checksum flags in case we need to forward */ 7313 DB_CKSUMFLAGS(mp) = 0; 7314 reass_sum = reass_hck_flags = 0; 7315 7316 nexthdr = ip6h->ip6_nxt; 7317 7318 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7319 (uchar_t *)ip6h); 7320 whereptr = (uint8_t *)&ip6h[1]; 7321 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7322 7323 /* Process hop by hop header options */ 7324 if (nexthdr == IPPROTO_HOPOPTS) { 7325 uint_t ehdrlen; 7326 uint8_t *optptr; 7327 7328 if (remlen < MIN_EHDR_LEN) 7329 goto pkt_too_short; 7330 if (mp->b_cont != NULL && 7331 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7332 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7333 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7334 freemsg(hada_mp); 7335 freemsg(first_mp); 7336 return; 7337 } 7338 ip6h = (ip6_t *)mp->b_rptr; 7339 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7340 } 7341 hbhhdr = (ip6_hbh_t *)whereptr; 7342 nexthdr = hbhhdr->ip6h_nxt; 7343 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7344 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7345 7346 if (remlen < ehdrlen) 7347 goto pkt_too_short; 7348 if (mp->b_cont != NULL && 7349 whereptr + ehdrlen > mp->b_wptr) { 7350 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7351 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7352 freemsg(hada_mp); 7353 freemsg(first_mp); 7354 return; 7355 } 7356 ip6h = (ip6_t *)mp->b_rptr; 7357 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7358 hbhhdr = (ip6_hbh_t *)whereptr; 7359 } 7360 7361 optptr = whereptr + 2; 7362 whereptr += ehdrlen; 7363 remlen -= ehdrlen; 7364 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7365 ehdrlen - 2, IPPROTO_HOPOPTS)) { 7366 case -1: 7367 /* 7368 * Packet has been consumed and any 7369 * needed ICMP messages sent. 7370 */ 7371 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7372 freemsg(hada_mp); 7373 return; 7374 case 0: 7375 /* no action needed */ 7376 break; 7377 case 1: 7378 /* Known router alert */ 7379 goto ipv6forus; 7380 } 7381 } 7382 7383 /* 7384 * Attach any necessary label information to this packet. 7385 */ 7386 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7387 if (ip6opt_ls != 0) 7388 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7389 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7390 freemsg(hada_mp); 7391 freemsg(first_mp); 7392 return; 7393 } 7394 7395 /* 7396 * On incoming v6 multicast packets we will bypass the ire table, 7397 * and assume that the read queue corresponds to the targetted 7398 * interface. 7399 * 7400 * The effect of this is the same as the IPv4 original code, but is 7401 * much cleaner I think. See ip_rput for how that was done. 7402 */ 7403 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7404 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7405 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7406 /* 7407 * XXX TODO Give to mrouted to for multicast forwarding. 7408 */ 7409 ILM_WALKER_HOLD(ill); 7410 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7411 ILM_WALKER_RELE(ill); 7412 if (ilm == NULL) { 7413 if (ip_debug > 3) { 7414 /* ip2dbg */ 7415 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7416 " which is not for us: %s\n", AF_INET6, 7417 &ip6h->ip6_dst); 7418 } 7419 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7420 freemsg(hada_mp); 7421 freemsg(first_mp); 7422 return; 7423 } 7424 if (ip_debug > 3) { 7425 /* ip2dbg */ 7426 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7427 AF_INET6, &ip6h->ip6_dst); 7428 } 7429 rq = ill->ill_rq; 7430 zoneid = GLOBAL_ZONEID; 7431 goto ipv6forus; 7432 } 7433 7434 ipif = ill->ill_ipif; 7435 7436 /* 7437 * If a packet was received on an interface that is a 6to4 tunnel, 7438 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7439 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7440 * the 6to4 prefix of the address configured on the receiving interface. 7441 * Otherwise, the packet was delivered to this interface in error and 7442 * the packet must be dropped. 7443 */ 7444 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7445 7446 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7447 &ip6h->ip6_dst)) { 7448 if (ip_debug > 2) { 7449 /* ip1dbg */ 7450 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7451 "addressed packet which is not for us: " 7452 "%s\n", AF_INET6, &ip6h->ip6_dst); 7453 } 7454 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7455 freemsg(first_mp); 7456 return; 7457 } 7458 } 7459 7460 /* 7461 * Find an ire that matches destination. For link-local addresses 7462 * we have to match the ill. 7463 * TBD for site local addresses. 7464 */ 7465 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7466 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7467 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7468 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7469 } else { 7470 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7471 MBLK_GETLABEL(mp)); 7472 } 7473 if (ire == NULL) { 7474 /* 7475 * No matching IRE found. Mark this packet as having 7476 * originated externally. 7477 */ 7478 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7479 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7480 if (!(ill->ill_flags & ILLF_ROUTER)) { 7481 BUMP_MIB(ill->ill_ip_mib, 7482 ipIfStatsInAddrErrors); 7483 } 7484 freemsg(hada_mp); 7485 freemsg(first_mp); 7486 return; 7487 } 7488 if (ip6h->ip6_hops <= 1) { 7489 if (hada_mp != NULL) 7490 goto hada_drop; 7491 /* Sent by forwarding path, and router is global zone */ 7492 icmp_time_exceeded_v6(WR(q), first_mp, 7493 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7494 GLOBAL_ZONEID); 7495 return; 7496 } 7497 /* 7498 * Per RFC 3513 section 2.5.2, we must not forward packets with 7499 * an unspecified source address. 7500 */ 7501 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7502 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7503 freemsg(hada_mp); 7504 freemsg(first_mp); 7505 return; 7506 } 7507 mp->b_prev = (mblk_t *)(uintptr_t) 7508 ill->ill_phyint->phyint_ifindex; 7509 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7510 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7511 ALL_ZONES); 7512 return; 7513 } 7514 ipif_id = ire->ire_ipif->ipif_seqid; 7515 /* we have a matching IRE */ 7516 if (ire->ire_stq != NULL) { 7517 ill_group_t *ill_group; 7518 ill_group_t *ire_group; 7519 7520 /* 7521 * To be quicker, we may wish not to chase pointers 7522 * (ire->ire_ipif->ipif_ill...) and instead store the 7523 * forwarding policy in the ire. An unfortunate side- 7524 * effect of this would be requiring an ire flush whenever 7525 * the ILLF_ROUTER flag changes. For now, chase pointers 7526 * once and store in the boolean no_forward. 7527 * 7528 * This appears twice to keep it out of the non-forwarding, 7529 * yes-it's-for-us-on-the-right-interface case. 7530 */ 7531 no_forward = ((ill->ill_flags & 7532 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7533 7534 7535 ASSERT(first_mp == mp); 7536 /* 7537 * This ire has a send-to queue - forward the packet. 7538 */ 7539 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7540 freemsg(hada_mp); 7541 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7542 if (no_forward) { 7543 BUMP_MIB(ill->ill_ip_mib, 7544 ipIfStatsInAddrErrors); 7545 } 7546 freemsg(mp); 7547 ire_refrele(ire); 7548 return; 7549 } 7550 /* 7551 * ipIfStatsHCInForwDatagrams should only be increment if there 7552 * will be an attempt to forward the packet, which is why we 7553 * increment after the above condition has been checked. 7554 */ 7555 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7556 if (ip6h->ip6_hops <= 1) { 7557 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7558 /* Sent by forwarding path, and router is global zone */ 7559 icmp_time_exceeded_v6(WR(q), mp, 7560 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7561 GLOBAL_ZONEID); 7562 ire_refrele(ire); 7563 return; 7564 } 7565 /* 7566 * Per RFC 3513 section 2.5.2, we must not forward packets with 7567 * an unspecified source address. 7568 */ 7569 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7570 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7571 freemsg(mp); 7572 ire_refrele(ire); 7573 return; 7574 } 7575 7576 if (is_system_labeled()) { 7577 mblk_t *mp1; 7578 7579 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7580 BUMP_MIB(ill->ill_ip_mib, 7581 ipIfStatsForwProhibits); 7582 freemsg(mp); 7583 ire_refrele(ire); 7584 return; 7585 } 7586 /* Size may have changed */ 7587 mp = mp1; 7588 ip6h = (ip6_t *)mp->b_rptr; 7589 pkt_len = msgdsize(mp); 7590 } 7591 7592 if (pkt_len > ire->ire_max_frag) { 7593 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7594 /* Sent by forwarding path, and router is global zone */ 7595 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7596 ll_multicast, B_TRUE, GLOBAL_ZONEID); 7597 ire_refrele(ire); 7598 return; 7599 } 7600 7601 /* 7602 * Check to see if we're forwarding the packet to a 7603 * different link from which it came. If so, check the 7604 * source and destination addresses since routers must not 7605 * forward any packets with link-local source or 7606 * destination addresses to other links. Otherwise (if 7607 * we're forwarding onto the same link), conditionally send 7608 * a redirect message. 7609 */ 7610 ill_group = ill->ill_group; 7611 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7612 if (ire->ire_rfq != q && (ill_group == NULL || 7613 ill_group != ire_group)) { 7614 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7615 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7616 BUMP_MIB(ill->ill_ip_mib, 7617 ipIfStatsInAddrErrors); 7618 freemsg(mp); 7619 ire_refrele(ire); 7620 return; 7621 } 7622 /* TBD add site-local check at site boundary? */ 7623 } else if (ipv6_send_redirects) { 7624 in6_addr_t *v6targ; 7625 in6_addr_t gw_addr_v6; 7626 ire_t *src_ire_v6 = NULL; 7627 7628 /* 7629 * Don't send a redirect when forwarding a source 7630 * routed packet. 7631 */ 7632 if (ip_source_routed_v6(ip6h, mp)) 7633 goto forward; 7634 7635 mutex_enter(&ire->ire_lock); 7636 gw_addr_v6 = ire->ire_gateway_addr_v6; 7637 mutex_exit(&ire->ire_lock); 7638 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7639 v6targ = &gw_addr_v6; 7640 /* 7641 * We won't send redirects to a router 7642 * that doesn't have a link local 7643 * address, but will forward. 7644 */ 7645 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7646 BUMP_MIB(ill->ill_ip_mib, 7647 ipIfStatsInAddrErrors); 7648 goto forward; 7649 } 7650 } else { 7651 v6targ = &ip6h->ip6_dst; 7652 } 7653 7654 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7655 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7656 ALL_ZONES, 0, NULL, 7657 MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7658 7659 if (src_ire_v6 != NULL) { 7660 /* 7661 * The source is directly connected. 7662 */ 7663 mp1 = copymsg(mp); 7664 if (mp1 != NULL) { 7665 icmp_send_redirect_v6(WR(q), 7666 mp1, v6targ, &ip6h->ip6_dst, 7667 ill, B_FALSE); 7668 } 7669 ire_refrele(src_ire_v6); 7670 } 7671 } 7672 7673 forward: 7674 /* Hoplimit verified above */ 7675 ip6h->ip6_hops--; 7676 7677 outill = ire->ire_ipif->ipif_ill; 7678 7679 DTRACE_PROBE4(ip6__forwarding__start, 7680 ill_t *, inill, ill_t *, outill, 7681 ip6_t *, ip6h, mblk_t *, mp); 7682 7683 FW_HOOKS6(ip6_forwarding_event, ipv6firewall_forwarding, 7684 inill, outill, ip6h, mp, mp); 7685 7686 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7687 7688 if (mp != NULL) { 7689 UPDATE_IB_PKT_COUNT(ire); 7690 ire->ire_last_used_time = lbolt; 7691 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7692 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7693 } 7694 IRE_REFRELE(ire); 7695 return; 7696 } 7697 rq = ire->ire_rfq; 7698 7699 /* 7700 * Need to put on correct queue for reassembly to find it. 7701 * No need to use put() since reassembly has its own locks. 7702 * Note: multicast packets and packets destined to addresses 7703 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7704 * the arriving ill. 7705 */ 7706 if (rq != q) { 7707 boolean_t check_multi = B_TRUE; 7708 ill_group_t *ill_group = NULL; 7709 ill_group_t *ire_group = NULL; 7710 ill_t *ire_ill = NULL; 7711 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7712 7713 /* 7714 * To be quicker, we may wish not to chase pointers 7715 * (ire->ire_ipif->ipif_ill...) and instead store the 7716 * forwarding policy in the ire. An unfortunate side- 7717 * effect of this would be requiring an ire flush whenever 7718 * the ILLF_ROUTER flag changes. For now, chase pointers 7719 * once and store in the boolean no_forward. 7720 */ 7721 no_forward = ((ill->ill_flags & 7722 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7723 7724 ill_group = ill->ill_group; 7725 if (rq != NULL) { 7726 ire_ill = (ill_t *)(rq->q_ptr); 7727 ire_group = ire_ill->ill_group; 7728 } 7729 7730 /* 7731 * If it's part of the same IPMP group, or if it's a legal 7732 * address on the 'usesrc' interface, then bypass strict 7733 * checks. 7734 */ 7735 if (ill_group != NULL && ill_group == ire_group) { 7736 check_multi = B_FALSE; 7737 } else if (ill_ifindex != 0 && ire_ill != NULL && 7738 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7739 check_multi = B_FALSE; 7740 } 7741 7742 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7743 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7744 /* 7745 * This packet came in on an interface other than the 7746 * one associated with the destination address 7747 * and we are strict about matches. 7748 * 7749 * As long as the ills belong to the same group, 7750 * we don't consider them to arriving on the wrong 7751 * interface. Thus, when the switch is doing inbound 7752 * load spreading, we won't drop packets when we 7753 * are doing strict multihoming checks. 7754 */ 7755 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7756 freemsg(hada_mp); 7757 freemsg(first_mp); 7758 ire_refrele(ire); 7759 return; 7760 } 7761 7762 if (rq != NULL) 7763 q = rq; 7764 7765 ill = (ill_t *)q->q_ptr; 7766 ASSERT(ill); 7767 } 7768 7769 zoneid = ire->ire_zoneid; 7770 UPDATE_IB_PKT_COUNT(ire); 7771 ire->ire_last_used_time = lbolt; 7772 /* Don't use the ire after this point. */ 7773 ire_refrele(ire); 7774 ipv6forus: 7775 /* 7776 * Looks like this packet is for us one way or another. 7777 * This is where we'll process destination headers etc. 7778 */ 7779 for (; ; ) { 7780 switch (nexthdr) { 7781 case IPPROTO_TCP: { 7782 uint16_t *up; 7783 uint32_t sum; 7784 int offset; 7785 7786 hdr_len = pkt_len - remlen; 7787 7788 if (hada_mp != NULL) { 7789 ip0dbg(("tcp hada drop\n")); 7790 goto hada_drop; 7791 } 7792 7793 7794 /* TCP needs all of the TCP header */ 7795 if (remlen < TCP_MIN_HEADER_LENGTH) 7796 goto pkt_too_short; 7797 if (mp->b_cont != NULL && 7798 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7799 if (!pullupmsg(mp, 7800 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7801 BUMP_MIB(ill->ill_ip_mib, 7802 ipIfStatsInDiscards); 7803 freemsg(first_mp); 7804 return; 7805 } 7806 hck_flags = 0; 7807 ip6h = (ip6_t *)mp->b_rptr; 7808 whereptr = (uint8_t *)ip6h + hdr_len; 7809 } 7810 /* 7811 * Extract the offset field from the TCP header. 7812 */ 7813 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7814 if (offset != 5) { 7815 if (offset < 5) { 7816 ip1dbg(("ip_rput_data_v6: short " 7817 "TCP data offset")); 7818 BUMP_MIB(ill->ill_ip_mib, 7819 ipIfStatsInDiscards); 7820 freemsg(first_mp); 7821 return; 7822 } 7823 /* 7824 * There must be TCP options. 7825 * Make sure we can grab them. 7826 */ 7827 offset <<= 2; 7828 if (remlen < offset) 7829 goto pkt_too_short; 7830 if (mp->b_cont != NULL && 7831 whereptr + offset > mp->b_wptr) { 7832 if (!pullupmsg(mp, 7833 hdr_len + offset)) { 7834 BUMP_MIB(ill->ill_ip_mib, 7835 ipIfStatsInDiscards); 7836 freemsg(first_mp); 7837 return; 7838 } 7839 hck_flags = 0; 7840 ip6h = (ip6_t *)mp->b_rptr; 7841 whereptr = (uint8_t *)ip6h + hdr_len; 7842 } 7843 } 7844 7845 up = (uint16_t *)&ip6h->ip6_src; 7846 /* 7847 * TCP checksum calculation. First sum up the 7848 * pseudo-header fields: 7849 * - Source IPv6 address 7850 * - Destination IPv6 address 7851 * - TCP payload length 7852 * - TCP protocol ID 7853 */ 7854 sum = htons(IPPROTO_TCP + remlen) + 7855 up[0] + up[1] + up[2] + up[3] + 7856 up[4] + up[5] + up[6] + up[7] + 7857 up[8] + up[9] + up[10] + up[11] + 7858 up[12] + up[13] + up[14] + up[15]; 7859 7860 /* Fold initial sum */ 7861 sum = (sum & 0xffff) + (sum >> 16); 7862 7863 mp1 = mp->b_cont; 7864 7865 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7866 IP6_STAT(ip6_in_sw_cksum); 7867 7868 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7869 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7870 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7871 mp, mp1, cksum_err); 7872 7873 if (cksum_err) { 7874 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7875 7876 if (hck_flags & HCK_FULLCKSUM) 7877 IP6_STAT(ip6_tcp_in_full_hw_cksum_err); 7878 else if (hck_flags & HCK_PARTIALCKSUM) 7879 IP6_STAT(ip6_tcp_in_part_hw_cksum_err); 7880 else 7881 IP6_STAT(ip6_tcp_in_sw_cksum_err); 7882 7883 freemsg(first_mp); 7884 return; 7885 } 7886 tcp_fanout: 7887 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7888 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7889 IP_FF_IP6INFO), hdr_len, mctl_present, zoneid); 7890 return; 7891 } 7892 case IPPROTO_SCTP: 7893 { 7894 sctp_hdr_t *sctph; 7895 uint32_t calcsum, pktsum; 7896 uint_t hdr_len = pkt_len - remlen; 7897 7898 /* SCTP needs all of the SCTP header */ 7899 if (remlen < sizeof (*sctph)) { 7900 goto pkt_too_short; 7901 } 7902 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7903 ASSERT(mp->b_cont != NULL); 7904 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7905 BUMP_MIB(ill->ill_ip_mib, 7906 ipIfStatsInDiscards); 7907 freemsg(mp); 7908 return; 7909 } 7910 ip6h = (ip6_t *)mp->b_rptr; 7911 whereptr = (uint8_t *)ip6h + hdr_len; 7912 } 7913 7914 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7915 /* checksum */ 7916 pktsum = sctph->sh_chksum; 7917 sctph->sh_chksum = 0; 7918 calcsum = sctp_cksum(mp, hdr_len); 7919 if (calcsum != pktsum) { 7920 BUMP_MIB(&sctp_mib, sctpChecksumError); 7921 freemsg(mp); 7922 return; 7923 } 7924 sctph->sh_chksum = pktsum; 7925 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7926 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7927 ports, ipif_id, zoneid, mp)) == NULL) { 7928 ip_fanout_sctp_raw(first_mp, ill, 7929 (ipha_t *)ip6h, B_FALSE, ports, 7930 mctl_present, 7931 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), 7932 B_TRUE, ipif_id, zoneid); 7933 return; 7934 } 7935 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7936 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7937 B_FALSE, mctl_present); 7938 return; 7939 } 7940 case IPPROTO_UDP: { 7941 uint16_t *up; 7942 uint32_t sum; 7943 7944 hdr_len = pkt_len - remlen; 7945 7946 if (hada_mp != NULL) { 7947 ip0dbg(("udp hada drop\n")); 7948 goto hada_drop; 7949 } 7950 7951 /* Verify that at least the ports are present */ 7952 if (remlen < UDPH_SIZE) 7953 goto pkt_too_short; 7954 if (mp->b_cont != NULL && 7955 whereptr + UDPH_SIZE > mp->b_wptr) { 7956 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7957 BUMP_MIB(ill->ill_ip_mib, 7958 ipIfStatsInDiscards); 7959 freemsg(first_mp); 7960 return; 7961 } 7962 hck_flags = 0; 7963 ip6h = (ip6_t *)mp->b_rptr; 7964 whereptr = (uint8_t *)ip6h + hdr_len; 7965 } 7966 7967 /* 7968 * Before going through the regular checksum 7969 * calculation, make sure the received checksum 7970 * is non-zero. RFC 2460 says, a 0x0000 checksum 7971 * in a UDP packet (within IPv6 packet) is invalid 7972 * and should be replaced by 0xffff. This makes 7973 * sense as regular checksum calculation will 7974 * pass for both the cases i.e. 0x0000 and 0xffff. 7975 * Removing one of the case makes error detection 7976 * stronger. 7977 */ 7978 7979 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7980 /* 0x0000 checksum is invalid */ 7981 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7982 "checksum value 0x0000\n")); 7983 BUMP_MIB(ill->ill_ip_mib, 7984 udpIfStatsInCksumErrs); 7985 freemsg(first_mp); 7986 return; 7987 } 7988 7989 up = (uint16_t *)&ip6h->ip6_src; 7990 7991 /* 7992 * UDP checksum calculation. First sum up the 7993 * pseudo-header fields: 7994 * - Source IPv6 address 7995 * - Destination IPv6 address 7996 * - UDP payload length 7997 * - UDP protocol ID 7998 */ 7999 8000 sum = htons(IPPROTO_UDP + remlen) + 8001 up[0] + up[1] + up[2] + up[3] + 8002 up[4] + up[5] + up[6] + up[7] + 8003 up[8] + up[9] + up[10] + up[11] + 8004 up[12] + up[13] + up[14] + up[15]; 8005 8006 /* Fold initial sum */ 8007 sum = (sum & 0xffff) + (sum >> 16); 8008 8009 if (reass_hck_flags != 0) { 8010 hck_flags = reass_hck_flags; 8011 8012 IP_CKSUM_RECV_REASS(hck_flags, 8013 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 8014 sum, reass_sum, cksum_err); 8015 } else { 8016 mp1 = mp->b_cont; 8017 8018 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 8019 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 8020 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 8021 mp, mp1, cksum_err); 8022 } 8023 8024 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 8025 IP6_STAT(ip6_in_sw_cksum); 8026 8027 if (cksum_err) { 8028 BUMP_MIB(ill->ill_ip_mib, 8029 udpIfStatsInCksumErrs); 8030 8031 if (hck_flags & HCK_FULLCKSUM) 8032 IP6_STAT(ip6_udp_in_full_hw_cksum_err); 8033 else if (hck_flags & HCK_PARTIALCKSUM) 8034 IP6_STAT(ip6_udp_in_part_hw_cksum_err); 8035 else 8036 IP6_STAT(ip6_udp_in_sw_cksum_err); 8037 8038 freemsg(first_mp); 8039 return; 8040 } 8041 goto udp_fanout; 8042 } 8043 case IPPROTO_ICMPV6: { 8044 uint16_t *up; 8045 uint32_t sum; 8046 uint_t hdr_len = pkt_len - remlen; 8047 8048 if (hada_mp != NULL) { 8049 ip0dbg(("icmp hada drop\n")); 8050 goto hada_drop; 8051 } 8052 8053 up = (uint16_t *)&ip6h->ip6_src; 8054 sum = htons(IPPROTO_ICMPV6 + remlen) + 8055 up[0] + up[1] + up[2] + up[3] + 8056 up[4] + up[5] + up[6] + up[7] + 8057 up[8] + up[9] + up[10] + up[11] + 8058 up[12] + up[13] + up[14] + up[15]; 8059 sum = (sum & 0xffff) + (sum >> 16); 8060 sum = IP_CSUM(mp, hdr_len, sum); 8061 if (sum != 0) { 8062 /* IPv6 ICMP checksum failed */ 8063 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 8064 "failed %x\n", 8065 sum)); 8066 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 8067 BUMP_MIB(ill->ill_icmp6_mib, 8068 ipv6IfIcmpInErrors); 8069 freemsg(first_mp); 8070 return; 8071 } 8072 8073 icmp_fanout: 8074 /* Check variable for testing applications */ 8075 if (ipv6_drop_inbound_icmpv6) { 8076 freemsg(first_mp); 8077 return; 8078 } 8079 /* 8080 * Assume that there is always at least one conn for 8081 * ICMPv6 (in.ndpd) i.e. don't optimize the case 8082 * where there is no conn. 8083 */ 8084 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8085 ASSERT(!(ill->ill_phyint->phyint_flags & 8086 PHYI_LOOPBACK)); 8087 /* 8088 * In the multicast case, applications may have 8089 * joined the group from different zones, so we 8090 * need to deliver the packet to each of them. 8091 * Loop through the multicast memberships 8092 * structures (ilm) on the receive ill and send 8093 * a copy of the packet up each matching one. 8094 */ 8095 ILM_WALKER_HOLD(ill); 8096 for (ilm = ill->ill_ilm; ilm != NULL; 8097 ilm = ilm->ilm_next) { 8098 if (ilm->ilm_flags & ILM_DELETED) 8099 continue; 8100 if (!IN6_ARE_ADDR_EQUAL( 8101 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 8102 continue; 8103 if (!ipif_lookup_zoneid(ill, 8104 ilm->ilm_zoneid, IPIF_UP, NULL)) 8105 continue; 8106 8107 first_mp1 = ip_copymsg(first_mp); 8108 if (first_mp1 == NULL) 8109 continue; 8110 icmp_inbound_v6(q, first_mp1, ill, 8111 hdr_len, mctl_present, 0, 8112 ilm->ilm_zoneid, dl_mp); 8113 } 8114 ILM_WALKER_RELE(ill); 8115 } else { 8116 first_mp1 = ip_copymsg(first_mp); 8117 if (first_mp1 != NULL) 8118 icmp_inbound_v6(q, first_mp1, ill, 8119 hdr_len, mctl_present, 0, zoneid, 8120 dl_mp); 8121 } 8122 } 8123 /* FALLTHRU */ 8124 default: { 8125 /* 8126 * Handle protocols with which IPv6 is less intimate. 8127 */ 8128 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IP6INFO; 8129 8130 if (hada_mp != NULL) { 8131 ip0dbg(("default hada drop\n")); 8132 goto hada_drop; 8133 } 8134 8135 /* 8136 * Enable sending ICMP for "Unknown" nexthdr 8137 * case. i.e. where we did not FALLTHRU from 8138 * IPPROTO_ICMPV6 processing case above. 8139 * If we did FALLTHRU, then the packet has already been 8140 * processed for IPPF, don't process it again in 8141 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8142 * flags 8143 */ 8144 if (nexthdr != IPPROTO_ICMPV6) 8145 proto_flags |= IP_FF_SEND_ICMP; 8146 else 8147 proto_flags |= IP6_NO_IPPOLICY; 8148 8149 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8150 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8151 mctl_present, zoneid); 8152 return; 8153 } 8154 8155 case IPPROTO_DSTOPTS: { 8156 uint_t ehdrlen; 8157 uint8_t *optptr; 8158 ip6_dest_t *desthdr; 8159 8160 /* Check if AH is present. */ 8161 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8162 ire, hada_mp, zoneid)) { 8163 ip0dbg(("dst early hada drop\n")); 8164 return; 8165 } 8166 8167 /* 8168 * Reinitialize pointers, as ipsec_early_ah_v6() does 8169 * complete pullups. We don't have to do more pullups 8170 * as a result. 8171 */ 8172 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8173 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8174 ip6h = (ip6_t *)mp->b_rptr; 8175 8176 if (remlen < MIN_EHDR_LEN) 8177 goto pkt_too_short; 8178 8179 desthdr = (ip6_dest_t *)whereptr; 8180 nexthdr = desthdr->ip6d_nxt; 8181 prev_nexthdr_offset = (uint_t)(whereptr - 8182 (uint8_t *)ip6h); 8183 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8184 if (remlen < ehdrlen) 8185 goto pkt_too_short; 8186 optptr = whereptr + 2; 8187 /* 8188 * Note: XXX This code does not seem to make 8189 * distinction between Destination Options Header 8190 * being before/after Routing Header which can 8191 * happen if we are at the end of source route. 8192 * This may become significant in future. 8193 * (No real significant Destination Options are 8194 * defined/implemented yet ). 8195 */ 8196 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8197 ehdrlen - 2, IPPROTO_DSTOPTS)) { 8198 case -1: 8199 /* 8200 * Packet has been consumed and any needed 8201 * ICMP errors sent. 8202 */ 8203 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8204 freemsg(hada_mp); 8205 return; 8206 case 0: 8207 /* No action needed continue */ 8208 break; 8209 case 1: 8210 /* 8211 * Unnexpected return value 8212 * (Router alert is a Hop-by-Hop option) 8213 */ 8214 #ifdef DEBUG 8215 panic("ip_rput_data_v6: router " 8216 "alert hbh opt indication in dest opt"); 8217 /*NOTREACHED*/ 8218 #else 8219 freemsg(hada_mp); 8220 freemsg(first_mp); 8221 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8222 return; 8223 #endif 8224 } 8225 used = ehdrlen; 8226 break; 8227 } 8228 case IPPROTO_FRAGMENT: { 8229 ip6_frag_t *fraghdr; 8230 size_t no_frag_hdr_len; 8231 8232 if (hada_mp != NULL) { 8233 ip0dbg(("frag hada drop\n")); 8234 goto hada_drop; 8235 } 8236 8237 ASSERT(first_mp == mp); 8238 if (remlen < sizeof (ip6_frag_t)) 8239 goto pkt_too_short; 8240 8241 if (mp->b_cont != NULL && 8242 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8243 if (!pullupmsg(mp, 8244 pkt_len - remlen + sizeof (ip6_frag_t))) { 8245 BUMP_MIB(ill->ill_ip_mib, 8246 ipIfStatsInDiscards); 8247 freemsg(mp); 8248 return; 8249 } 8250 hck_flags = 0; 8251 ip6h = (ip6_t *)mp->b_rptr; 8252 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8253 } 8254 8255 fraghdr = (ip6_frag_t *)whereptr; 8256 used = (uint_t)sizeof (ip6_frag_t); 8257 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8258 8259 /* 8260 * Invoke the CGTP (multirouting) filtering module to 8261 * process the incoming packet. Packets identified as 8262 * duplicates must be discarded. Filtering is active 8263 * only if the the ip_cgtp_filter ndd variable is 8264 * non-zero. 8265 */ 8266 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 8267 int cgtp_flt_pkt = 8268 ip_cgtp_filter_ops->cfo_filter_v6( 8269 inill->ill_rq, ip6h, fraghdr); 8270 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8271 freemsg(mp); 8272 return; 8273 } 8274 } 8275 8276 /* Restore the flags */ 8277 DB_CKSUMFLAGS(mp) = hck_flags; 8278 8279 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8280 remlen - used, &prev_nexthdr_offset, 8281 &reass_sum, &reass_hck_flags); 8282 if (mp == NULL) { 8283 /* Reassembly is still pending */ 8284 return; 8285 } 8286 /* The first mblk are the headers before the frag hdr */ 8287 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8288 8289 first_mp = mp; /* mp has most likely changed! */ 8290 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8291 ip6h = (ip6_t *)mp->b_rptr; 8292 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8293 whereptr = mp->b_rptr + no_frag_hdr_len; 8294 remlen = ntohs(ip6h->ip6_plen) + 8295 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8296 pkt_len = msgdsize(mp); 8297 used = 0; 8298 break; 8299 } 8300 case IPPROTO_HOPOPTS: 8301 if (hada_mp != NULL) { 8302 ip0dbg(("hop hada drop\n")); 8303 goto hada_drop; 8304 } 8305 /* 8306 * Illegal header sequence. 8307 * (Hop-by-hop headers are processed above 8308 * and required to immediately follow IPv6 header) 8309 */ 8310 icmp_param_problem_v6(WR(q), first_mp, 8311 ICMP6_PARAMPROB_NEXTHEADER, 8312 prev_nexthdr_offset, 8313 B_FALSE, B_FALSE, zoneid); 8314 return; 8315 8316 case IPPROTO_ROUTING: { 8317 uint_t ehdrlen; 8318 ip6_rthdr_t *rthdr; 8319 8320 /* Check if AH is present. */ 8321 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8322 ire, hada_mp, zoneid)) { 8323 ip0dbg(("routing hada drop\n")); 8324 return; 8325 } 8326 8327 /* 8328 * Reinitialize pointers, as ipsec_early_ah_v6() does 8329 * complete pullups. We don't have to do more pullups 8330 * as a result. 8331 */ 8332 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8333 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8334 ip6h = (ip6_t *)mp->b_rptr; 8335 8336 if (remlen < MIN_EHDR_LEN) 8337 goto pkt_too_short; 8338 rthdr = (ip6_rthdr_t *)whereptr; 8339 nexthdr = rthdr->ip6r_nxt; 8340 prev_nexthdr_offset = (uint_t)(whereptr - 8341 (uint8_t *)ip6h); 8342 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8343 if (remlen < ehdrlen) 8344 goto pkt_too_short; 8345 if (rthdr->ip6r_segleft != 0) { 8346 /* Not end of source route */ 8347 if (ll_multicast) { 8348 BUMP_MIB(ill->ill_ip_mib, 8349 ipIfStatsForwProhibits); 8350 freemsg(hada_mp); 8351 freemsg(mp); 8352 return; 8353 } 8354 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8355 flags, hada_mp, dl_mp); 8356 return; 8357 } 8358 used = ehdrlen; 8359 break; 8360 } 8361 case IPPROTO_AH: 8362 case IPPROTO_ESP: { 8363 /* 8364 * Fast path for AH/ESP. If this is the first time 8365 * we are sending a datagram to AH/ESP, allocate 8366 * a IPSEC_IN message and prepend it. Otherwise, 8367 * just fanout. 8368 */ 8369 8370 ipsec_in_t *ii; 8371 int ipsec_rc; 8372 8373 if (!mctl_present) { 8374 ASSERT(first_mp == mp); 8375 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 8376 NULL) { 8377 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8378 "allocation failure.\n")); 8379 BUMP_MIB(ill->ill_ip_mib, 8380 ipIfStatsInDiscards); 8381 freemsg(mp); 8382 return; 8383 } 8384 /* 8385 * Store the ill_index so that when we come back 8386 * from IPSEC we ride on the same queue. 8387 */ 8388 ii = (ipsec_in_t *)first_mp->b_rptr; 8389 ii->ipsec_in_ill_index = 8390 ill->ill_phyint->phyint_ifindex; 8391 ii->ipsec_in_rill_index = 8392 ii->ipsec_in_ill_index; 8393 first_mp->b_cont = mp; 8394 /* 8395 * Cache hardware acceleration info. 8396 */ 8397 if (hada_mp != NULL) { 8398 IPSECHW_DEBUG(IPSECHW_PKT, 8399 ("ip_rput_data_v6: " 8400 "caching data attr.\n")); 8401 ii->ipsec_in_accelerated = B_TRUE; 8402 ii->ipsec_in_da = hada_mp; 8403 hada_mp = NULL; 8404 } 8405 } else { 8406 ii = (ipsec_in_t *)first_mp->b_rptr; 8407 } 8408 8409 if (!ipsec_loaded()) { 8410 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8411 ire->ire_zoneid); 8412 return; 8413 } 8414 8415 /* select inbound SA and have IPsec process the pkt */ 8416 if (nexthdr == IPPROTO_ESP) { 8417 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 8418 if (esph == NULL) 8419 return; 8420 ASSERT(ii->ipsec_in_esp_sa != NULL); 8421 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8422 NULL); 8423 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8424 first_mp, esph); 8425 } else { 8426 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 8427 if (ah == NULL) 8428 return; 8429 ASSERT(ii->ipsec_in_ah_sa != NULL); 8430 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8431 NULL); 8432 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8433 first_mp, ah); 8434 } 8435 8436 switch (ipsec_rc) { 8437 case IPSEC_STATUS_SUCCESS: 8438 break; 8439 case IPSEC_STATUS_FAILED: 8440 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8441 /* FALLTHRU */ 8442 case IPSEC_STATUS_PENDING: 8443 return; 8444 } 8445 /* we're done with IPsec processing, send it up */ 8446 ip_fanout_proto_again(first_mp, ill, inill, ire); 8447 return; 8448 } 8449 case IPPROTO_NONE: 8450 /* All processing is done. Count as "delivered". */ 8451 freemsg(hada_mp); 8452 freemsg(first_mp); 8453 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8454 return; 8455 } 8456 whereptr += used; 8457 ASSERT(remlen >= used); 8458 remlen -= used; 8459 } 8460 /* NOTREACHED */ 8461 8462 pkt_too_short: 8463 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8464 ip6_len, pkt_len, remlen)); 8465 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8466 freemsg(hada_mp); 8467 freemsg(first_mp); 8468 return; 8469 udp_fanout: 8470 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8471 connp = NULL; 8472 } else { 8473 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8474 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8475 CONN_DEC_REF(connp); 8476 connp = NULL; 8477 } 8478 } 8479 8480 if (connp == NULL) { 8481 uint32_t ports; 8482 8483 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8484 UDP_PORTS_OFFSET); 8485 IP6_STAT(ip6_udp_slow_path); 8486 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8487 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), mctl_present, 8488 zoneid); 8489 return; 8490 } 8491 8492 if (CONN_UDP_FLOWCTLD(connp)) { 8493 freemsg(first_mp); 8494 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8495 CONN_DEC_REF(connp); 8496 return; 8497 } 8498 8499 /* Initiate IPPF processing */ 8500 if (IP6_IN_IPP(flags)) { 8501 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8502 if (mp == NULL) { 8503 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8504 CONN_DEC_REF(connp); 8505 return; 8506 } 8507 } 8508 8509 if (connp->conn_ipv6_recvpktinfo || 8510 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8511 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8512 if (mp == NULL) { 8513 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8514 CONN_DEC_REF(connp); 8515 return; 8516 } 8517 } 8518 8519 IP6_STAT(ip6_udp_fast_path); 8520 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8521 8522 /* Send it upstream */ 8523 CONN_UDP_RECV(connp, mp); 8524 8525 CONN_DEC_REF(connp); 8526 freemsg(hada_mp); 8527 return; 8528 8529 hada_drop: 8530 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8531 /* IPsec kstats: bump counter here */ 8532 freemsg(hada_mp); 8533 freemsg(first_mp); 8534 } 8535 8536 /* 8537 * Reassemble fragment. 8538 * When it returns a completed message the first mblk will only contain 8539 * the headers prior to the fragment header. 8540 * 8541 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8542 * of the preceding header. This is needed to patch the previous header's 8543 * nexthdr field when reassembly completes. 8544 */ 8545 static mblk_t * 8546 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8547 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8548 uint32_t *cksum_val, uint16_t *cksum_flags) 8549 { 8550 ill_t *ill = (ill_t *)q->q_ptr; 8551 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8552 uint16_t offset; 8553 boolean_t more_frags; 8554 uint8_t nexthdr = fraghdr->ip6f_nxt; 8555 in6_addr_t *v6dst_ptr; 8556 in6_addr_t *v6src_ptr; 8557 uint_t end; 8558 uint_t hdr_length; 8559 size_t count; 8560 ipf_t *ipf; 8561 ipf_t **ipfp; 8562 ipfb_t *ipfb; 8563 mblk_t *mp1; 8564 uint8_t ecn_info = 0; 8565 size_t msg_len; 8566 mblk_t *tail_mp; 8567 mblk_t *t_mp; 8568 boolean_t pruned = B_FALSE; 8569 uint32_t sum_val; 8570 uint16_t sum_flags; 8571 8572 8573 if (cksum_val != NULL) 8574 *cksum_val = 0; 8575 if (cksum_flags != NULL) 8576 *cksum_flags = 0; 8577 8578 /* 8579 * We utilize hardware computed checksum info only for UDP since 8580 * IP fragmentation is a normal occurence for the protocol. In 8581 * addition, checksum offload support for IP fragments carrying 8582 * UDP payload is commonly implemented across network adapters. 8583 */ 8584 ASSERT(ill != NULL); 8585 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8586 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8587 mblk_t *mp1 = mp->b_cont; 8588 int32_t len; 8589 8590 /* Record checksum information from the packet */ 8591 sum_val = (uint32_t)DB_CKSUM16(mp); 8592 sum_flags = DB_CKSUMFLAGS(mp); 8593 8594 /* fragmented payload offset from beginning of mblk */ 8595 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8596 8597 if ((sum_flags & HCK_PARTIALCKSUM) && 8598 (mp1 == NULL || mp1->b_cont == NULL) && 8599 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8600 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8601 uint32_t adj; 8602 /* 8603 * Partial checksum has been calculated by hardware 8604 * and attached to the packet; in addition, any 8605 * prepended extraneous data is even byte aligned. 8606 * If any such data exists, we adjust the checksum; 8607 * this would also handle any postpended data. 8608 */ 8609 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8610 mp, mp1, len, adj); 8611 8612 /* One's complement subtract extraneous checksum */ 8613 if (adj >= sum_val) 8614 sum_val = ~(adj - sum_val) & 0xFFFF; 8615 else 8616 sum_val -= adj; 8617 } 8618 } else { 8619 sum_val = 0; 8620 sum_flags = 0; 8621 } 8622 8623 /* Clear hardware checksumming flag */ 8624 DB_CKSUMFLAGS(mp) = 0; 8625 8626 /* 8627 * Note: Fragment offset in header is in 8-octet units. 8628 * Clearing least significant 3 bits not only extracts 8629 * it but also gets it in units of octets. 8630 */ 8631 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8632 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8633 8634 /* 8635 * Is the more frags flag on and the payload length not a multiple 8636 * of eight? 8637 */ 8638 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8639 zoneid_t zoneid; 8640 8641 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8642 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 8643 if (zoneid == ALL_ZONES) { 8644 freemsg(mp); 8645 return (NULL); 8646 } 8647 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8648 (uint32_t)((char *)&ip6h->ip6_plen - 8649 (char *)ip6h), B_FALSE, B_FALSE, zoneid); 8650 return (NULL); 8651 } 8652 8653 v6src_ptr = &ip6h->ip6_src; 8654 v6dst_ptr = &ip6h->ip6_dst; 8655 end = remlen; 8656 8657 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8658 end += offset; 8659 8660 /* 8661 * Would fragment cause reassembled packet to have a payload length 8662 * greater than IP_MAXPACKET - the max payload size? 8663 */ 8664 if (end > IP_MAXPACKET) { 8665 zoneid_t zoneid; 8666 8667 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8668 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 8669 if (zoneid == ALL_ZONES) { 8670 freemsg(mp); 8671 return (NULL); 8672 } 8673 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8674 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8675 (char *)ip6h), B_FALSE, B_FALSE, zoneid); 8676 return (NULL); 8677 } 8678 8679 /* 8680 * This packet just has one fragment. Reassembly not 8681 * needed. 8682 */ 8683 if (!more_frags && offset == 0) { 8684 goto reass_done; 8685 } 8686 8687 /* 8688 * Drop the fragmented as early as possible, if 8689 * we don't have resource(s) to re-assemble. 8690 */ 8691 if (ip_reass_queue_bytes == 0) { 8692 freemsg(mp); 8693 return (NULL); 8694 } 8695 8696 /* Record the ECN field info. */ 8697 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8698 /* 8699 * If this is not the first fragment, dump the unfragmentable 8700 * portion of the packet. 8701 */ 8702 if (offset) 8703 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8704 8705 /* 8706 * Fragmentation reassembly. Each ILL has a hash table for 8707 * queueing packets undergoing reassembly for all IPIFs 8708 * associated with the ILL. The hash is based on the packet 8709 * IP ident field. The ILL frag hash table was allocated 8710 * as a timer block at the time the ILL was created. Whenever 8711 * there is anything on the reassembly queue, the timer will 8712 * be running. 8713 */ 8714 msg_len = MBLKSIZE(mp); 8715 tail_mp = mp; 8716 while (tail_mp->b_cont != NULL) { 8717 tail_mp = tail_mp->b_cont; 8718 msg_len += MBLKSIZE(tail_mp); 8719 } 8720 /* 8721 * If the reassembly list for this ILL will get too big 8722 * prune it. 8723 */ 8724 8725 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8726 ip_reass_queue_bytes) { 8727 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8728 : (ip_reass_queue_bytes - msg_len)); 8729 pruned = B_TRUE; 8730 } 8731 8732 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8733 mutex_enter(&ipfb->ipfb_lock); 8734 8735 ipfp = &ipfb->ipfb_ipf; 8736 /* Try to find an existing fragment queue for this packet. */ 8737 for (;;) { 8738 ipf = ipfp[0]; 8739 if (ipf) { 8740 /* 8741 * It has to match on ident, source address, and 8742 * dest address. 8743 */ 8744 if (ipf->ipf_ident == ident && 8745 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8746 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8747 8748 /* 8749 * If we have received too many 8750 * duplicate fragments for this packet 8751 * free it. 8752 */ 8753 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8754 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8755 freemsg(mp); 8756 mutex_exit(&ipfb->ipfb_lock); 8757 return (NULL); 8758 } 8759 8760 break; 8761 } 8762 ipfp = &ipf->ipf_hash_next; 8763 continue; 8764 } 8765 8766 8767 /* 8768 * If we pruned the list, do we want to store this new 8769 * fragment?. We apply an optimization here based on the 8770 * fact that most fragments will be received in order. 8771 * So if the offset of this incoming fragment is zero, 8772 * it is the first fragment of a new packet. We will 8773 * keep it. Otherwise drop the fragment, as we have 8774 * probably pruned the packet already (since the 8775 * packet cannot be found). 8776 */ 8777 8778 if (pruned && offset != 0) { 8779 mutex_exit(&ipfb->ipfb_lock); 8780 freemsg(mp); 8781 return (NULL); 8782 } 8783 8784 /* New guy. Allocate a frag message. */ 8785 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8786 if (!mp1) { 8787 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8788 freemsg(mp); 8789 partial_reass_done: 8790 mutex_exit(&ipfb->ipfb_lock); 8791 return (NULL); 8792 } 8793 8794 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8795 /* 8796 * Too many fragmented packets in this hash bucket. 8797 * Free the oldest. 8798 */ 8799 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8800 } 8801 8802 mp1->b_cont = mp; 8803 8804 /* Initialize the fragment header. */ 8805 ipf = (ipf_t *)mp1->b_rptr; 8806 ipf->ipf_mp = mp1; 8807 ipf->ipf_ptphn = ipfp; 8808 ipfp[0] = ipf; 8809 ipf->ipf_hash_next = NULL; 8810 ipf->ipf_ident = ident; 8811 ipf->ipf_v6src = *v6src_ptr; 8812 ipf->ipf_v6dst = *v6dst_ptr; 8813 /* Record reassembly start time. */ 8814 ipf->ipf_timestamp = gethrestime_sec(); 8815 /* Record ipf generation and account for frag header */ 8816 ipf->ipf_gen = ill->ill_ipf_gen++; 8817 ipf->ipf_count = MBLKSIZE(mp1); 8818 ipf->ipf_protocol = nexthdr; 8819 ipf->ipf_nf_hdr_len = 0; 8820 ipf->ipf_prev_nexthdr_offset = 0; 8821 ipf->ipf_last_frag_seen = B_FALSE; 8822 ipf->ipf_ecn = ecn_info; 8823 ipf->ipf_num_dups = 0; 8824 ipfb->ipfb_frag_pkts++; 8825 ipf->ipf_checksum = 0; 8826 ipf->ipf_checksum_flags = 0; 8827 8828 /* Store checksum value in fragment header */ 8829 if (sum_flags != 0) { 8830 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8831 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8832 ipf->ipf_checksum = sum_val; 8833 ipf->ipf_checksum_flags = sum_flags; 8834 } 8835 8836 /* 8837 * We handle reassembly two ways. In the easy case, 8838 * where all the fragments show up in order, we do 8839 * minimal bookkeeping, and just clip new pieces on 8840 * the end. If we ever see a hole, then we go off 8841 * to ip_reassemble which has to mark the pieces and 8842 * keep track of the number of holes, etc. Obviously, 8843 * the point of having both mechanisms is so we can 8844 * handle the easy case as efficiently as possible. 8845 */ 8846 if (offset == 0) { 8847 /* Easy case, in-order reassembly so far. */ 8848 /* Update the byte count */ 8849 ipf->ipf_count += msg_len; 8850 ipf->ipf_tail_mp = tail_mp; 8851 /* 8852 * Keep track of next expected offset in 8853 * ipf_end. 8854 */ 8855 ipf->ipf_end = end; 8856 ipf->ipf_nf_hdr_len = hdr_length; 8857 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8858 } else { 8859 /* Hard case, hole at the beginning. */ 8860 ipf->ipf_tail_mp = NULL; 8861 /* 8862 * ipf_end == 0 means that we have given up 8863 * on easy reassembly. 8864 */ 8865 ipf->ipf_end = 0; 8866 8867 /* Forget checksum offload from now on */ 8868 ipf->ipf_checksum_flags = 0; 8869 8870 /* 8871 * ipf_hole_cnt is set by ip_reassemble. 8872 * ipf_count is updated by ip_reassemble. 8873 * No need to check for return value here 8874 * as we don't expect reassembly to complete or 8875 * fail for the first fragment itself. 8876 */ 8877 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8878 msg_len); 8879 } 8880 /* Update per ipfb and ill byte counts */ 8881 ipfb->ipfb_count += ipf->ipf_count; 8882 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8883 ill->ill_frag_count += ipf->ipf_count; 8884 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8885 /* If the frag timer wasn't already going, start it. */ 8886 mutex_enter(&ill->ill_lock); 8887 ill_frag_timer_start(ill); 8888 mutex_exit(&ill->ill_lock); 8889 goto partial_reass_done; 8890 } 8891 8892 /* 8893 * If the packet's flag has changed (it could be coming up 8894 * from an interface different than the previous, therefore 8895 * possibly different checksum capability), then forget about 8896 * any stored checksum states. Otherwise add the value to 8897 * the existing one stored in the fragment header. 8898 */ 8899 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8900 sum_val += ipf->ipf_checksum; 8901 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8902 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8903 ipf->ipf_checksum = sum_val; 8904 } else if (ipf->ipf_checksum_flags != 0) { 8905 /* Forget checksum offload from now on */ 8906 ipf->ipf_checksum_flags = 0; 8907 } 8908 8909 /* 8910 * We have a new piece of a datagram which is already being 8911 * reassembled. Update the ECN info if all IP fragments 8912 * are ECN capable. If there is one which is not, clear 8913 * all the info. If there is at least one which has CE 8914 * code point, IP needs to report that up to transport. 8915 */ 8916 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8917 if (ecn_info == IPH_ECN_CE) 8918 ipf->ipf_ecn = IPH_ECN_CE; 8919 } else { 8920 ipf->ipf_ecn = IPH_ECN_NECT; 8921 } 8922 8923 if (offset && ipf->ipf_end == offset) { 8924 /* The new fragment fits at the end */ 8925 ipf->ipf_tail_mp->b_cont = mp; 8926 /* Update the byte count */ 8927 ipf->ipf_count += msg_len; 8928 /* Update per ipfb and ill byte counts */ 8929 ipfb->ipfb_count += msg_len; 8930 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8931 ill->ill_frag_count += msg_len; 8932 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8933 if (more_frags) { 8934 /* More to come. */ 8935 ipf->ipf_end = end; 8936 ipf->ipf_tail_mp = tail_mp; 8937 goto partial_reass_done; 8938 } 8939 } else { 8940 /* 8941 * Go do the hard cases. 8942 * Call ip_reassemble(). 8943 */ 8944 int ret; 8945 8946 if (offset == 0) { 8947 if (ipf->ipf_prev_nexthdr_offset == 0) { 8948 ipf->ipf_nf_hdr_len = hdr_length; 8949 ipf->ipf_prev_nexthdr_offset = 8950 *prev_nexthdr_offset; 8951 } 8952 } 8953 /* Save current byte count */ 8954 count = ipf->ipf_count; 8955 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8956 8957 /* Count of bytes added and subtracted (freeb()ed) */ 8958 count = ipf->ipf_count - count; 8959 if (count) { 8960 /* Update per ipfb and ill byte counts */ 8961 ipfb->ipfb_count += count; 8962 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8963 ill->ill_frag_count += count; 8964 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8965 } 8966 if (ret == IP_REASS_PARTIAL) { 8967 goto partial_reass_done; 8968 } else if (ret == IP_REASS_FAILED) { 8969 /* Reassembly failed. Free up all resources */ 8970 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8971 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8972 IP_REASS_SET_START(t_mp, 0); 8973 IP_REASS_SET_END(t_mp, 0); 8974 } 8975 freemsg(mp); 8976 goto partial_reass_done; 8977 } 8978 8979 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8980 } 8981 /* 8982 * We have completed reassembly. Unhook the frag header from 8983 * the reassembly list. 8984 * 8985 * Grab the unfragmentable header length next header value out 8986 * of the first fragment 8987 */ 8988 ASSERT(ipf->ipf_nf_hdr_len != 0); 8989 hdr_length = ipf->ipf_nf_hdr_len; 8990 8991 /* 8992 * Before we free the frag header, record the ECN info 8993 * to report back to the transport. 8994 */ 8995 ecn_info = ipf->ipf_ecn; 8996 8997 /* 8998 * Store the nextheader field in the header preceding the fragment 8999 * header 9000 */ 9001 nexthdr = ipf->ipf_protocol; 9002 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 9003 ipfp = ipf->ipf_ptphn; 9004 9005 /* We need to supply these to caller */ 9006 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 9007 sum_val = ipf->ipf_checksum; 9008 else 9009 sum_val = 0; 9010 9011 mp1 = ipf->ipf_mp; 9012 count = ipf->ipf_count; 9013 ipf = ipf->ipf_hash_next; 9014 if (ipf) 9015 ipf->ipf_ptphn = ipfp; 9016 ipfp[0] = ipf; 9017 ill->ill_frag_count -= count; 9018 ASSERT(ipfb->ipfb_count >= count); 9019 ipfb->ipfb_count -= count; 9020 ipfb->ipfb_frag_pkts--; 9021 mutex_exit(&ipfb->ipfb_lock); 9022 /* Ditch the frag header. */ 9023 mp = mp1->b_cont; 9024 freeb(mp1); 9025 9026 /* 9027 * Make sure the packet is good by doing some sanity 9028 * check. If bad we can silentely drop the packet. 9029 */ 9030 reass_done: 9031 if (hdr_length < sizeof (ip6_frag_t)) { 9032 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 9033 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 9034 freemsg(mp); 9035 return (NULL); 9036 } 9037 9038 /* 9039 * Remove the fragment header from the initial header by 9040 * splitting the mblk into the non-fragmentable header and 9041 * everthing after the fragment extension header. This has the 9042 * side effect of putting all the headers that need destination 9043 * processing into the b_cont block-- on return this fact is 9044 * used in order to avoid having to look at the extensions 9045 * already processed. 9046 * 9047 * Note that this code assumes that the unfragmentable portion 9048 * of the header is in the first mblk and increments 9049 * the read pointer past it. If this assumption is broken 9050 * this code fails badly. 9051 */ 9052 if (mp->b_rptr + hdr_length != mp->b_wptr) { 9053 mblk_t *nmp; 9054 9055 if (!(nmp = dupb(mp))) { 9056 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 9057 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 9058 freemsg(mp); 9059 return (NULL); 9060 } 9061 nmp->b_cont = mp->b_cont; 9062 mp->b_cont = nmp; 9063 nmp->b_rptr += hdr_length; 9064 } 9065 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 9066 9067 ip6h = (ip6_t *)mp->b_rptr; 9068 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 9069 9070 /* Restore original IP length in header. */ 9071 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 9072 /* Record the ECN info. */ 9073 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 9074 ip6h->ip6_vcf |= htonl(ecn_info << 20); 9075 9076 /* Reassembly is successful; return checksum information if needed */ 9077 if (cksum_val != NULL) 9078 *cksum_val = sum_val; 9079 if (cksum_flags != NULL) 9080 *cksum_flags = sum_flags; 9081 9082 return (mp); 9083 } 9084 9085 /* 9086 * Walk through the options to see if there is a routing header. 9087 * If present get the destination which is the last address of 9088 * the option. 9089 */ 9090 in6_addr_t 9091 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 9092 { 9093 uint8_t nexthdr; 9094 uint8_t *whereptr; 9095 ip6_hbh_t *hbhhdr; 9096 ip6_dest_t *dsthdr; 9097 ip6_rthdr0_t *rthdr; 9098 ip6_frag_t *fraghdr; 9099 int ehdrlen; 9100 int left; 9101 in6_addr_t *ap, rv; 9102 9103 if (is_fragment != NULL) 9104 *is_fragment = B_FALSE; 9105 9106 rv = ip6h->ip6_dst; 9107 9108 nexthdr = ip6h->ip6_nxt; 9109 whereptr = (uint8_t *)&ip6h[1]; 9110 for (;;) { 9111 9112 ASSERT(nexthdr != IPPROTO_RAW); 9113 switch (nexthdr) { 9114 case IPPROTO_HOPOPTS: 9115 hbhhdr = (ip6_hbh_t *)whereptr; 9116 nexthdr = hbhhdr->ip6h_nxt; 9117 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 9118 break; 9119 case IPPROTO_DSTOPTS: 9120 dsthdr = (ip6_dest_t *)whereptr; 9121 nexthdr = dsthdr->ip6d_nxt; 9122 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 9123 break; 9124 case IPPROTO_ROUTING: 9125 rthdr = (ip6_rthdr0_t *)whereptr; 9126 nexthdr = rthdr->ip6r0_nxt; 9127 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9128 9129 left = rthdr->ip6r0_segleft; 9130 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9131 rv = *(ap + left - 1); 9132 /* 9133 * If the caller doesn't care whether the packet 9134 * is a fragment or not, we can stop here since 9135 * we have our destination. 9136 */ 9137 if (is_fragment == NULL) 9138 goto done; 9139 break; 9140 case IPPROTO_FRAGMENT: 9141 fraghdr = (ip6_frag_t *)whereptr; 9142 nexthdr = fraghdr->ip6f_nxt; 9143 ehdrlen = sizeof (ip6_frag_t); 9144 if (is_fragment != NULL) 9145 *is_fragment = B_TRUE; 9146 goto done; 9147 default : 9148 goto done; 9149 } 9150 whereptr += ehdrlen; 9151 } 9152 9153 done: 9154 return (rv); 9155 } 9156 9157 /* 9158 * ip_source_routed_v6: 9159 * This function is called by redirect code in ip_rput_data_v6 to 9160 * know whether this packet is source routed through this node i.e 9161 * whether this node (router) is part of the journey. This 9162 * function is called under two cases : 9163 * 9164 * case 1 : Routing header was processed by this node and 9165 * ip_process_rthdr replaced ip6_dst with the next hop 9166 * and we are forwarding the packet to the next hop. 9167 * 9168 * case 2 : Routing header was not processed by this node and we 9169 * are just forwarding the packet. 9170 * 9171 * For case (1) we don't want to send redirects. For case(2) we 9172 * want to send redirects. 9173 */ 9174 static boolean_t 9175 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 9176 { 9177 uint8_t nexthdr; 9178 in6_addr_t *addrptr; 9179 ip6_rthdr0_t *rthdr; 9180 uint8_t numaddr; 9181 ip6_hbh_t *hbhhdr; 9182 uint_t ehdrlen; 9183 uint8_t *byteptr; 9184 9185 ip2dbg(("ip_source_routed_v6\n")); 9186 nexthdr = ip6h->ip6_nxt; 9187 ehdrlen = IPV6_HDR_LEN; 9188 9189 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9190 while (nexthdr == IPPROTO_HOPOPTS || 9191 nexthdr == IPPROTO_DSTOPTS) { 9192 byteptr = (uint8_t *)ip6h + ehdrlen; 9193 /* 9194 * Check if we have already processed 9195 * packets or we are just a forwarding 9196 * router which only pulled up msgs up 9197 * to IPV6HDR and one HBH ext header 9198 */ 9199 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9200 ip2dbg(("ip_source_routed_v6: Extension" 9201 " headers not processed\n")); 9202 return (B_FALSE); 9203 } 9204 hbhhdr = (ip6_hbh_t *)byteptr; 9205 nexthdr = hbhhdr->ip6h_nxt; 9206 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9207 } 9208 switch (nexthdr) { 9209 case IPPROTO_ROUTING: 9210 byteptr = (uint8_t *)ip6h + ehdrlen; 9211 /* 9212 * If for some reason, we haven't pulled up 9213 * the routing hdr data mblk, then we must 9214 * not have processed it at all. So for sure 9215 * we are not part of the source routed journey. 9216 */ 9217 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9218 ip2dbg(("ip_source_routed_v6: Routing" 9219 " header not processed\n")); 9220 return (B_FALSE); 9221 } 9222 rthdr = (ip6_rthdr0_t *)byteptr; 9223 /* 9224 * Either we are an intermediate router or the 9225 * last hop before destination and we have 9226 * already processed the routing header. 9227 * If segment_left is greater than or equal to zero, 9228 * then we must be the (numaddr - segleft) entry 9229 * of the routing header. Although ip6r0_segleft 9230 * is a unit8_t variable, we still check for zero 9231 * or greater value, if in case the data type 9232 * is changed someday in future. 9233 */ 9234 if (rthdr->ip6r0_segleft > 0 || 9235 rthdr->ip6r0_segleft == 0) { 9236 ire_t *ire = NULL; 9237 9238 numaddr = rthdr->ip6r0_len / 2; 9239 addrptr = (in6_addr_t *)((char *)rthdr + 9240 sizeof (*rthdr)); 9241 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9242 if (addrptr != NULL) { 9243 ire = ire_ctable_lookup_v6(addrptr, NULL, 9244 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9245 MATCH_IRE_TYPE); 9246 if (ire != NULL) { 9247 ire_refrele(ire); 9248 return (B_TRUE); 9249 } 9250 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9251 } 9252 } 9253 /* FALLTHRU */ 9254 default: 9255 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9256 return (B_FALSE); 9257 } 9258 } 9259 9260 /* 9261 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9262 * Assumes that the following set of headers appear in the first 9263 * mblk: 9264 * ip6i_t (if present) CAN also appear as a separate mblk. 9265 * ip6_t 9266 * Any extension headers 9267 * TCP/UDP/SCTP header (if present) 9268 * The routine can handle an ICMPv6 header that is not in the first mblk. 9269 * 9270 * The order to determine the outgoing interface is as follows: 9271 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9272 * 2. If conn_nofailover_ill is set then use that ill. 9273 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9274 * 4. If q is an ill queue and (link local or multicast destination) then 9275 * use that ill. 9276 * 5. If IPV6_BOUND_IF has been set use that ill. 9277 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9278 * look for the best IRE match for the unspecified group to determine 9279 * the ill. 9280 * 7. For unicast: Just do an IRE lookup for the best match. 9281 * 9282 * arg2 is always a queue_t *. 9283 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9284 * the zoneid. 9285 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9286 */ 9287 void 9288 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9289 { 9290 conn_t *connp = NULL; 9291 queue_t *q = (queue_t *)arg2; 9292 ire_t *ire = NULL; 9293 ire_t *sctp_ire = NULL; 9294 ip6_t *ip6h; 9295 in6_addr_t *v6dstp; 9296 ill_t *ill = NULL; 9297 ipif_t *ipif; 9298 ip6i_t *ip6i; 9299 int cksum_request; /* -1 => normal. */ 9300 /* 1 => Skip TCP/UDP/SCTP checksum */ 9301 /* Otherwise contains insert offset for checksum */ 9302 int unspec_src; 9303 boolean_t do_outrequests; /* Increment OutRequests? */ 9304 mib2_ipIfStatsEntry_t *mibptr; 9305 int match_flags = MATCH_IRE_ILL_GROUP; 9306 boolean_t attach_if = B_FALSE; 9307 mblk_t *first_mp; 9308 boolean_t mctl_present; 9309 ipsec_out_t *io; 9310 boolean_t drop_if_delayed = B_FALSE; 9311 boolean_t multirt_need_resolve = B_FALSE; 9312 mblk_t *copy_mp = NULL; 9313 int err; 9314 int ip6i_flags = 0; 9315 zoneid_t zoneid; 9316 ill_t *saved_ill = NULL; 9317 boolean_t conn_lock_held; 9318 boolean_t need_decref = B_FALSE; 9319 9320 /* 9321 * Highest bit in version field is Reachability Confirmation bit 9322 * used by NUD in ip_xmit_v6(). 9323 */ 9324 #ifdef _BIG_ENDIAN 9325 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9326 #else 9327 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9328 #endif 9329 9330 /* 9331 * M_CTL comes from 6 places 9332 * 9333 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9334 * both V4 and V6 datagrams. 9335 * 9336 * 2) AH/ESP sends down M_CTL after doing their job with both 9337 * V4 and V6 datagrams. 9338 * 9339 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9340 * attached. 9341 * 9342 * 4) Notifications from an external resolver (for XRESOLV ifs) 9343 * 9344 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9345 * IPsec hardware acceleration support. 9346 * 9347 * 6) TUN_HELLO. 9348 * 9349 * We need to handle (1)'s IPv6 case and (3) here. For the 9350 * IPv4 case in (1), and (2), IPSEC processing has already 9351 * started. The code in ip_wput() already knows how to handle 9352 * continuing IPSEC processing (for IPv4 and IPv6). All other 9353 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9354 * for handling. 9355 */ 9356 first_mp = mp; 9357 mctl_present = B_FALSE; 9358 io = NULL; 9359 9360 /* Multidata transmit? */ 9361 if (DB_TYPE(mp) == M_MULTIDATA) { 9362 /* 9363 * We should never get here, since all Multidata messages 9364 * originating from tcp should have been directed over to 9365 * tcp_multisend() in the first place. 9366 */ 9367 BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); 9368 freemsg(mp); 9369 return; 9370 } else if (DB_TYPE(mp) == M_CTL) { 9371 uint32_t mctltype = 0; 9372 uint32_t mlen = MBLKL(first_mp); 9373 9374 mp = mp->b_cont; 9375 mctl_present = B_TRUE; 9376 io = (ipsec_out_t *)first_mp->b_rptr; 9377 9378 /* 9379 * Validate this M_CTL message. The only three types of 9380 * M_CTL messages we expect to see in this code path are 9381 * ipsec_out_t or ipsec_in_t structures (allocated as 9382 * ipsec_info_t unions), or ipsec_ctl_t structures. 9383 * The ipsec_out_type and ipsec_in_type overlap in the two 9384 * data structures, and they are either set to IPSEC_OUT 9385 * or IPSEC_IN depending on which data structure it is. 9386 * ipsec_ctl_t is an IPSEC_CTL. 9387 * 9388 * All other M_CTL messages are sent to ip_wput_nondata() 9389 * for handling. 9390 */ 9391 if (mlen >= sizeof (io->ipsec_out_type)) 9392 mctltype = io->ipsec_out_type; 9393 9394 if ((mlen == sizeof (ipsec_ctl_t)) && 9395 (mctltype == IPSEC_CTL)) { 9396 ip_output(arg, first_mp, arg2, caller); 9397 return; 9398 } 9399 9400 if ((mlen < sizeof (ipsec_info_t)) || 9401 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9402 mp == NULL) { 9403 ip_wput_nondata(NULL, q, first_mp, NULL); 9404 return; 9405 } 9406 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9407 if (q->q_next == NULL) { 9408 ip6h = (ip6_t *)mp->b_rptr; 9409 /* 9410 * For a freshly-generated TCP dgram that needs IPV6 9411 * processing, don't call ip_wput immediately. We can 9412 * tell this by the ipsec_out_proc_begin. In-progress 9413 * IPSEC_OUT messages have proc_begin set to TRUE, 9414 * and we want to send all IPSEC_IN messages to 9415 * ip_wput() for IPsec processing or finishing. 9416 */ 9417 if (mctltype == IPSEC_IN || 9418 IPVER(ip6h) != IPV6_VERSION || 9419 io->ipsec_out_proc_begin) { 9420 mibptr = &ip6_mib; 9421 goto notv6; 9422 } 9423 } 9424 } else if (DB_TYPE(mp) != M_DATA) { 9425 ip_wput_nondata(NULL, q, mp, NULL); 9426 return; 9427 } 9428 9429 ip6h = (ip6_t *)mp->b_rptr; 9430 9431 if (IPVER(ip6h) != IPV6_VERSION) { 9432 mibptr = &ip6_mib; 9433 goto notv6; 9434 } 9435 9436 if (q->q_next != NULL) { 9437 ill = (ill_t *)q->q_ptr; 9438 /* 9439 * We don't know if this ill will be used for IPv6 9440 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9441 * ipif_set_values() sets the ill_isv6 flag to true if 9442 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9443 * just drop the packet. 9444 */ 9445 if (!ill->ill_isv6) { 9446 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9447 "ILLF_IPV6 was set\n")); 9448 freemsg(first_mp); 9449 return; 9450 } 9451 /* For uniformity do a refhold */ 9452 mutex_enter(&ill->ill_lock); 9453 if (!ILL_CAN_LOOKUP(ill)) { 9454 mutex_exit(&ill->ill_lock); 9455 freemsg(first_mp); 9456 return; 9457 } 9458 ill_refhold_locked(ill); 9459 mutex_exit(&ill->ill_lock); 9460 mibptr = ill->ill_ip_mib; 9461 9462 ASSERT(mibptr != NULL); 9463 unspec_src = 0; 9464 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9465 do_outrequests = B_FALSE; 9466 zoneid = (zoneid_t)(uintptr_t)arg; 9467 } else { 9468 connp = (conn_t *)arg; 9469 ASSERT(connp != NULL); 9470 zoneid = connp->conn_zoneid; 9471 9472 /* is queue flow controlled? */ 9473 if ((q->q_first || connp->conn_draining) && 9474 (caller == IP_WPUT)) { 9475 /* 9476 * 1) TCP sends down M_CTL for detached connections. 9477 * 2) AH/ESP sends down M_CTL. 9478 * 9479 * We don't flow control either of the above. Only 9480 * UDP and others are flow controlled for which we 9481 * can't have a M_CTL. 9482 */ 9483 ASSERT(first_mp == mp); 9484 (void) putq(q, mp); 9485 return; 9486 } 9487 mibptr = &ip6_mib; 9488 unspec_src = connp->conn_unspec_src; 9489 do_outrequests = B_TRUE; 9490 if (mp->b_flag & MSGHASREF) { 9491 mp->b_flag &= ~MSGHASREF; 9492 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9493 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9494 need_decref = B_TRUE; 9495 } 9496 9497 /* 9498 * If there is a policy, try to attach an ipsec_out in 9499 * the front. At the end, first_mp either points to a 9500 * M_DATA message or IPSEC_OUT message linked to a 9501 * M_DATA message. We have to do it now as we might 9502 * lose the "conn" if we go through ip_newroute. 9503 */ 9504 if (!mctl_present && 9505 (connp->conn_out_enforce_policy || 9506 connp->conn_latch != NULL)) { 9507 ASSERT(first_mp == mp); 9508 /* XXX Any better way to get the protocol fast ? */ 9509 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 9510 connp->conn_ulp)) == NULL)) { 9511 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9512 if (need_decref) 9513 CONN_DEC_REF(connp); 9514 return; 9515 } else { 9516 ASSERT(mp->b_datap->db_type == M_CTL); 9517 first_mp = mp; 9518 mp = mp->b_cont; 9519 mctl_present = B_TRUE; 9520 io = (ipsec_out_t *)first_mp->b_rptr; 9521 } 9522 } 9523 } 9524 9525 /* check for alignment and full IPv6 header */ 9526 if (!OK_32PTR((uchar_t *)ip6h) || 9527 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9528 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9529 if (do_outrequests) 9530 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9531 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9532 freemsg(first_mp); 9533 if (ill != NULL) 9534 ill_refrele(ill); 9535 if (need_decref) 9536 CONN_DEC_REF(connp); 9537 return; 9538 } 9539 v6dstp = &ip6h->ip6_dst; 9540 cksum_request = -1; 9541 ip6i = NULL; 9542 9543 /* 9544 * Once neighbor discovery has completed, ndp_process() will provide 9545 * locally generated packets for which processing can be reattempted. 9546 * In these cases, connp is NULL and the original zone is part of a 9547 * prepended ipsec_out_t. 9548 */ 9549 if (io != NULL) { 9550 /* 9551 * When coming from icmp_input_v6, the zoneid might not match 9552 * for the loopback case, because inside icmp_input_v6 the 9553 * queue_t is a conn queue from the sending side. 9554 */ 9555 zoneid = io->ipsec_out_zoneid; 9556 ASSERT(zoneid != ALL_ZONES); 9557 } 9558 9559 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9560 /* 9561 * This is an ip6i_t header followed by an ip6_hdr. 9562 * Check which fields are set. 9563 * 9564 * When the packet comes from a transport we should have 9565 * all needed headers in the first mblk. However, when 9566 * going through ip_newroute*_v6 the ip6i might be in 9567 * a separate mblk when we return here. In that case 9568 * we pullup everything to ensure that extension and transport 9569 * headers "stay" in the first mblk. 9570 */ 9571 ip6i = (ip6i_t *)ip6h; 9572 ip6i_flags = ip6i->ip6i_flags; 9573 9574 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9575 ((mp->b_wptr - (uchar_t *)ip6i) >= 9576 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9577 9578 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9579 if (!pullupmsg(mp, -1)) { 9580 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9581 if (do_outrequests) { 9582 BUMP_MIB(mibptr, 9583 ipIfStatsHCOutRequests); 9584 } 9585 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9586 freemsg(first_mp); 9587 if (ill != NULL) 9588 ill_refrele(ill); 9589 if (need_decref) 9590 CONN_DEC_REF(connp); 9591 return; 9592 } 9593 ip6h = (ip6_t *)mp->b_rptr; 9594 v6dstp = &ip6h->ip6_dst; 9595 ip6i = (ip6i_t *)ip6h; 9596 } 9597 ip6h = (ip6_t *)&ip6i[1]; 9598 9599 /* 9600 * Advance rptr past the ip6i_t to get ready for 9601 * transmitting the packet. However, if the packet gets 9602 * passed to ip_newroute*_v6 then rptr is moved back so 9603 * that the ip6i_t header can be inspected when the 9604 * packet comes back here after passing through 9605 * ire_add_then_send. 9606 */ 9607 mp->b_rptr = (uchar_t *)ip6h; 9608 9609 /* 9610 * IP6I_ATTACH_IF is set in this function when we had a 9611 * conn and it was either bound to the IPFF_NOFAILOVER address 9612 * or IPV6_BOUND_PIF was set. These options override other 9613 * options that set the ifindex. We come here with 9614 * IP6I_ATTACH_IF set when we can't find the ire and 9615 * ip_newroute_v6 is feeding the packet for second time. 9616 */ 9617 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9618 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9619 ASSERT(ip6i->ip6i_ifindex != 0); 9620 if (ill != NULL) 9621 ill_refrele(ill); 9622 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9623 NULL, NULL, NULL, NULL); 9624 if (ill == NULL) { 9625 if (do_outrequests) { 9626 BUMP_MIB(mibptr, 9627 ipIfStatsHCOutRequests); 9628 } 9629 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9630 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9631 ip6i->ip6i_ifindex)); 9632 if (need_decref) 9633 CONN_DEC_REF(connp); 9634 freemsg(first_mp); 9635 return; 9636 } 9637 mibptr = ill->ill_ip_mib; 9638 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9639 /* 9640 * Preserve the index so that when we return 9641 * from IPSEC processing, we know where to 9642 * send the packet. 9643 */ 9644 if (mctl_present) { 9645 ASSERT(io != NULL); 9646 io->ipsec_out_ill_index = 9647 ip6i->ip6i_ifindex; 9648 } 9649 } 9650 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9651 /* 9652 * This is a multipathing probe packet that has 9653 * been delayed in ND resolution. Drop the 9654 * packet for the reasons mentioned in 9655 * nce_queue_mp() 9656 */ 9657 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9658 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9659 freemsg(first_mp); 9660 ill_refrele(ill); 9661 if (need_decref) 9662 CONN_DEC_REF(connp); 9663 return; 9664 } 9665 } 9666 } 9667 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9668 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9669 9670 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9671 if (secpolicy_net_rawaccess(cr) != 0) { 9672 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9673 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9674 NULL, zoneid, NULL, 9675 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9676 if (ire == NULL) { 9677 if (do_outrequests) 9678 BUMP_MIB(mibptr, 9679 ipIfStatsHCOutRequests); 9680 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9681 ip1dbg(("ip_wput_v6: bad source " 9682 "addr\n")); 9683 freemsg(first_mp); 9684 if (ill != NULL) 9685 ill_refrele(ill); 9686 if (need_decref) 9687 CONN_DEC_REF(connp); 9688 return; 9689 } 9690 ire_refrele(ire); 9691 } 9692 /* No need to verify again when using ip_newroute */ 9693 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9694 } 9695 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9696 /* 9697 * Make sure they match since ip_newroute*_v6 etc might 9698 * (unknown to them) inspect ip6i_nexthop when 9699 * they think they access ip6_dst. 9700 */ 9701 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9702 } 9703 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9704 cksum_request = 1; 9705 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9706 cksum_request = ip6i->ip6i_checksum_off; 9707 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9708 unspec_src = 1; 9709 9710 if (do_outrequests && ill != NULL) { 9711 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9712 do_outrequests = B_FALSE; 9713 } 9714 /* 9715 * Store ip6i_t info that we need after we come back 9716 * from IPSEC processing. 9717 */ 9718 if (mctl_present) { 9719 ASSERT(io != NULL); 9720 io->ipsec_out_unspec_src = unspec_src; 9721 } 9722 } 9723 if (connp != NULL && connp->conn_dontroute) 9724 ip6h->ip6_hops = 1; 9725 9726 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9727 goto ipv6multicast; 9728 9729 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9730 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9731 ill_t *conn_outgoing_pill; 9732 9733 conn_outgoing_pill = conn_get_held_ill(connp, 9734 &connp->conn_outgoing_pill, &err); 9735 if (err == ILL_LOOKUP_FAILED) { 9736 if (ill != NULL) 9737 ill_refrele(ill); 9738 if (need_decref) 9739 CONN_DEC_REF(connp); 9740 freemsg(first_mp); 9741 return; 9742 } 9743 if (conn_outgoing_pill != NULL) { 9744 if (ill != NULL) 9745 ill_refrele(ill); 9746 ill = conn_outgoing_pill; 9747 attach_if = B_TRUE; 9748 match_flags = MATCH_IRE_ILL; 9749 mibptr = ill->ill_ip_mib; 9750 9751 /* 9752 * Check if we need an ire that will not be 9753 * looked up by anybody else i.e. HIDDEN. 9754 */ 9755 if (ill_is_probeonly(ill)) 9756 match_flags |= MATCH_IRE_MARK_HIDDEN; 9757 goto send_from_ill; 9758 } 9759 } 9760 9761 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9762 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9763 ill_t *conn_nofailover_ill; 9764 9765 conn_nofailover_ill = conn_get_held_ill(connp, 9766 &connp->conn_nofailover_ill, &err); 9767 if (err == ILL_LOOKUP_FAILED) { 9768 if (ill != NULL) 9769 ill_refrele(ill); 9770 if (need_decref) 9771 CONN_DEC_REF(connp); 9772 freemsg(first_mp); 9773 return; 9774 } 9775 if (conn_nofailover_ill != NULL) { 9776 if (ill != NULL) 9777 ill_refrele(ill); 9778 ill = conn_nofailover_ill; 9779 attach_if = B_TRUE; 9780 /* 9781 * Assumes that ipc_nofailover_ill is used only for 9782 * multipathing probe packets. These packets are better 9783 * dropped, if they are delayed in ND resolution, for 9784 * the reasons described in nce_queue_mp(). 9785 * IP6I_DROP_IFDELAYED will be set later on in this 9786 * function for this packet. 9787 */ 9788 drop_if_delayed = B_TRUE; 9789 match_flags = MATCH_IRE_ILL; 9790 mibptr = ill->ill_ip_mib; 9791 9792 /* 9793 * Check if we need an ire that will not be 9794 * looked up by anybody else i.e. HIDDEN. 9795 */ 9796 if (ill_is_probeonly(ill)) 9797 match_flags |= MATCH_IRE_MARK_HIDDEN; 9798 goto send_from_ill; 9799 } 9800 } 9801 9802 /* 9803 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9804 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9805 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9806 */ 9807 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9808 ASSERT(ip6i->ip6i_ifindex != 0); 9809 attach_if = B_TRUE; 9810 ASSERT(ill != NULL); 9811 match_flags = MATCH_IRE_ILL; 9812 9813 /* 9814 * Check if we need an ire that will not be 9815 * looked up by anybody else i.e. HIDDEN. 9816 */ 9817 if (ill_is_probeonly(ill)) 9818 match_flags |= MATCH_IRE_MARK_HIDDEN; 9819 goto send_from_ill; 9820 } 9821 9822 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9823 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9824 ASSERT(ill != NULL); 9825 goto send_from_ill; 9826 } 9827 9828 /* 9829 * 4. If q is an ill queue and (link local or multicast destination) 9830 * then use that ill. 9831 */ 9832 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9833 goto send_from_ill; 9834 } 9835 9836 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9837 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9838 ill_t *conn_outgoing_ill; 9839 9840 conn_outgoing_ill = conn_get_held_ill(connp, 9841 &connp->conn_outgoing_ill, &err); 9842 if (err == ILL_LOOKUP_FAILED) { 9843 if (ill != NULL) 9844 ill_refrele(ill); 9845 if (need_decref) 9846 CONN_DEC_REF(connp); 9847 freemsg(first_mp); 9848 return; 9849 } 9850 if (ill != NULL) 9851 ill_refrele(ill); 9852 ill = conn_outgoing_ill; 9853 mibptr = ill->ill_ip_mib; 9854 goto send_from_ill; 9855 } 9856 9857 /* 9858 * 6. For unicast: Just do an IRE lookup for the best match. 9859 * If we get here for a link-local address it is rather random 9860 * what interface we pick on a multihomed host. 9861 * *If* there is an IRE_CACHE (and the link-local address 9862 * isn't duplicated on multi links) this will find the IRE_CACHE. 9863 * Otherwise it will use one of the matching IRE_INTERFACE routes 9864 * for the link-local prefix. Hence, applications 9865 * *should* be encouraged to specify an outgoing interface when sending 9866 * to a link local address. 9867 */ 9868 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9869 !connp->conn_fully_bound)) { 9870 /* 9871 * We cache IRE_CACHEs to avoid lookups. We don't do 9872 * this for the tcp global queue and listen end point 9873 * as it does not really have a real destination to 9874 * talk to. 9875 */ 9876 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp)); 9877 } else { 9878 /* 9879 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9880 * grab a lock here to check for CONDEMNED as it is okay 9881 * to send a packet or two with the IRE_CACHE that is going 9882 * away. 9883 */ 9884 mutex_enter(&connp->conn_lock); 9885 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9886 if (ire != NULL && 9887 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9888 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9889 9890 IRE_REFHOLD(ire); 9891 mutex_exit(&connp->conn_lock); 9892 9893 } else { 9894 boolean_t cached = B_FALSE; 9895 9896 connp->conn_ire_cache = NULL; 9897 mutex_exit(&connp->conn_lock); 9898 /* Release the old ire */ 9899 if (ire != NULL && sctp_ire == NULL) 9900 IRE_REFRELE_NOTR(ire); 9901 9902 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9903 MBLK_GETLABEL(mp)); 9904 if (ire != NULL) { 9905 IRE_REFHOLD_NOTR(ire); 9906 9907 mutex_enter(&connp->conn_lock); 9908 if (!(connp->conn_state_flags & CONN_CLOSING) && 9909 (connp->conn_ire_cache == NULL)) { 9910 rw_enter(&ire->ire_bucket->irb_lock, 9911 RW_READER); 9912 if (!(ire->ire_marks & 9913 IRE_MARK_CONDEMNED)) { 9914 connp->conn_ire_cache = ire; 9915 cached = B_TRUE; 9916 } 9917 rw_exit(&ire->ire_bucket->irb_lock); 9918 } 9919 mutex_exit(&connp->conn_lock); 9920 9921 /* 9922 * We can continue to use the ire but since it 9923 * was not cached, we should drop the extra 9924 * reference. 9925 */ 9926 if (!cached) 9927 IRE_REFRELE_NOTR(ire); 9928 } 9929 } 9930 } 9931 9932 if (ire != NULL) { 9933 if (do_outrequests) { 9934 /* Handle IRE_LOCAL's that might appear here */ 9935 if (ire->ire_type == IRE_CACHE) { 9936 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9937 ill_ip_mib; 9938 } else { 9939 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9940 } 9941 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9942 } 9943 ASSERT(!attach_if); 9944 9945 /* 9946 * Check if the ire has the RTF_MULTIRT flag, inherited 9947 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9948 */ 9949 if (ire->ire_flags & RTF_MULTIRT) { 9950 /* 9951 * Force hop limit of multirouted packets if required. 9952 * The hop limit of such packets is bounded by the 9953 * ip_multirt_ttl ndd variable. 9954 * NDP packets must have a hop limit of 255; don't 9955 * change the hop limit in that case. 9956 */ 9957 if ((ip_multirt_ttl > 0) && 9958 (ip6h->ip6_hops > ip_multirt_ttl) && 9959 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9960 if (ip_debug > 3) { 9961 ip2dbg(("ip_wput_v6: forcing multirt " 9962 "hop limit to %d (was %d) ", 9963 ip_multirt_ttl, ip6h->ip6_hops)); 9964 pr_addr_dbg("v6dst %s\n", AF_INET6, 9965 &ire->ire_addr_v6); 9966 } 9967 ip6h->ip6_hops = ip_multirt_ttl; 9968 } 9969 9970 /* 9971 * We look at this point if there are pending 9972 * unresolved routes. ire_multirt_need_resolve_v6() 9973 * checks in O(n) that all IRE_OFFSUBNET ire 9974 * entries for the packet's destination and 9975 * flagged RTF_MULTIRT are currently resolved. 9976 * If some remain unresolved, we do a copy 9977 * of the current message. It will be used 9978 * to initiate additional route resolutions. 9979 */ 9980 multirt_need_resolve = 9981 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9982 MBLK_GETLABEL(first_mp)); 9983 ip2dbg(("ip_wput_v6: ire %p, " 9984 "multirt_need_resolve %d, first_mp %p\n", 9985 (void *)ire, multirt_need_resolve, 9986 (void *)first_mp)); 9987 if (multirt_need_resolve) { 9988 copy_mp = copymsg(first_mp); 9989 if (copy_mp != NULL) { 9990 MULTIRT_DEBUG_TAG(copy_mp); 9991 } 9992 } 9993 } 9994 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9995 connp, caller, 0, ip6i_flags, zoneid); 9996 if (need_decref) { 9997 CONN_DEC_REF(connp); 9998 connp = NULL; 9999 } 10000 IRE_REFRELE(ire); 10001 10002 /* 10003 * Try to resolve another multiroute if 10004 * ire_multirt_need_resolve_v6() deemed it necessary. 10005 * copy_mp will be consumed (sent or freed) by 10006 * ip_newroute_v6(). 10007 */ 10008 if (copy_mp != NULL) { 10009 if (mctl_present) { 10010 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10011 } else { 10012 ip6h = (ip6_t *)copy_mp->b_rptr; 10013 } 10014 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10015 &ip6h->ip6_src, NULL, zoneid); 10016 } 10017 if (ill != NULL) 10018 ill_refrele(ill); 10019 return; 10020 } 10021 10022 /* 10023 * No full IRE for this destination. Send it to 10024 * ip_newroute_v6 to see if anything else matches. 10025 * Mark this packet as having originated on this 10026 * machine. 10027 * Update rptr if there was an ip6i_t header. 10028 */ 10029 mp->b_prev = NULL; 10030 mp->b_next = NULL; 10031 if (ip6i != NULL) 10032 mp->b_rptr -= sizeof (ip6i_t); 10033 10034 if (unspec_src) { 10035 if (ip6i == NULL) { 10036 /* 10037 * Add ip6i_t header to carry unspec_src 10038 * until the packet comes back in ip_wput_v6. 10039 */ 10040 mp = ip_add_info_v6(mp, NULL, v6dstp); 10041 if (mp == NULL) { 10042 if (do_outrequests) 10043 BUMP_MIB(mibptr, 10044 ipIfStatsHCOutRequests); 10045 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10046 if (mctl_present) 10047 freeb(first_mp); 10048 if (ill != NULL) 10049 ill_refrele(ill); 10050 if (need_decref) 10051 CONN_DEC_REF(connp); 10052 return; 10053 } 10054 ip6i = (ip6i_t *)mp->b_rptr; 10055 10056 if (mctl_present) { 10057 ASSERT(first_mp != mp); 10058 first_mp->b_cont = mp; 10059 } else { 10060 first_mp = mp; 10061 } 10062 10063 if ((mp->b_wptr - (uchar_t *)ip6i) == 10064 sizeof (ip6i_t)) { 10065 /* 10066 * ndp_resolver called from ip_newroute_v6 10067 * expects pulled up message. 10068 */ 10069 if (!pullupmsg(mp, -1)) { 10070 ip1dbg(("ip_wput_v6: pullupmsg" 10071 " failed\n")); 10072 if (do_outrequests) { 10073 BUMP_MIB(mibptr, 10074 ipIfStatsHCOutRequests); 10075 } 10076 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10077 freemsg(first_mp); 10078 if (ill != NULL) 10079 ill_refrele(ill); 10080 if (need_decref) 10081 CONN_DEC_REF(connp); 10082 return; 10083 } 10084 ip6i = (ip6i_t *)mp->b_rptr; 10085 } 10086 ip6h = (ip6_t *)&ip6i[1]; 10087 v6dstp = &ip6h->ip6_dst; 10088 } 10089 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10090 if (mctl_present) { 10091 ASSERT(io != NULL); 10092 io->ipsec_out_unspec_src = unspec_src; 10093 } 10094 } 10095 if (do_outrequests) 10096 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10097 if (need_decref) 10098 CONN_DEC_REF(connp); 10099 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 10100 if (ill != NULL) 10101 ill_refrele(ill); 10102 return; 10103 10104 10105 /* 10106 * Handle multicast packets with or without an conn. 10107 * Assumes that the transports set ip6_hops taking 10108 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10109 * into account. 10110 */ 10111 ipv6multicast: 10112 ip2dbg(("ip_wput_v6: multicast\n")); 10113 10114 /* 10115 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10116 * 2. If conn_nofailover_ill is set then use that ill. 10117 * 10118 * Hold the conn_lock till we refhold the ill of interest that is 10119 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10120 * while holding any locks, postpone the refrele until after the 10121 * conn_lock is dropped. 10122 */ 10123 if (connp != NULL) { 10124 mutex_enter(&connp->conn_lock); 10125 conn_lock_held = B_TRUE; 10126 } else { 10127 conn_lock_held = B_FALSE; 10128 } 10129 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10130 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10131 if (err == ILL_LOOKUP_FAILED) { 10132 ip1dbg(("ip_output_v6: multicast" 10133 " conn_outgoing_pill no ipif\n")); 10134 multicast_discard: 10135 ASSERT(saved_ill == NULL); 10136 if (conn_lock_held) 10137 mutex_exit(&connp->conn_lock); 10138 if (ill != NULL) 10139 ill_refrele(ill); 10140 freemsg(first_mp); 10141 if (do_outrequests) 10142 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10143 if (need_decref) 10144 CONN_DEC_REF(connp); 10145 return; 10146 } 10147 saved_ill = ill; 10148 ill = connp->conn_outgoing_pill; 10149 attach_if = B_TRUE; 10150 match_flags = MATCH_IRE_ILL; 10151 mibptr = ill->ill_ip_mib; 10152 10153 /* 10154 * Check if we need an ire that will not be 10155 * looked up by anybody else i.e. HIDDEN. 10156 */ 10157 if (ill_is_probeonly(ill)) 10158 match_flags |= MATCH_IRE_MARK_HIDDEN; 10159 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10160 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10161 if (err == ILL_LOOKUP_FAILED) { 10162 ip1dbg(("ip_output_v6: multicast" 10163 " conn_nofailover_ill no ipif\n")); 10164 goto multicast_discard; 10165 } 10166 saved_ill = ill; 10167 ill = connp->conn_nofailover_ill; 10168 attach_if = B_TRUE; 10169 match_flags = MATCH_IRE_ILL; 10170 10171 /* 10172 * Check if we need an ire that will not be 10173 * looked up by anybody else i.e. HIDDEN. 10174 */ 10175 if (ill_is_probeonly(ill)) 10176 match_flags |= MATCH_IRE_MARK_HIDDEN; 10177 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10178 /* 10179 * Redo 1. If we did not find an IRE_CACHE the first time, 10180 * we should have an ip6i_t with IP6I_ATTACH_IF if 10181 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10182 * used on this endpoint. 10183 */ 10184 ASSERT(ip6i->ip6i_ifindex != 0); 10185 attach_if = B_TRUE; 10186 ASSERT(ill != NULL); 10187 match_flags = MATCH_IRE_ILL; 10188 10189 /* 10190 * Check if we need an ire that will not be 10191 * looked up by anybody else i.e. HIDDEN. 10192 */ 10193 if (ill_is_probeonly(ill)) 10194 match_flags |= MATCH_IRE_MARK_HIDDEN; 10195 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10196 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10197 10198 ASSERT(ill != NULL); 10199 } else if (ill != NULL) { 10200 /* 10201 * 4. If q is an ill queue and (link local or multicast 10202 * destination) then use that ill. 10203 * We don't need the ipif initialization here. 10204 * This useless assert below is just to prevent lint from 10205 * reporting a null body if statement. 10206 */ 10207 ASSERT(ill != NULL); 10208 } else if (connp != NULL) { 10209 /* 10210 * 5. If IPV6_BOUND_IF has been set use that ill. 10211 * 10212 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10213 * Otherwise look for the best IRE match for the unspecified 10214 * group to determine the ill. 10215 * 10216 * conn_multicast_ill is used for only IPv6 packets. 10217 * conn_multicast_ipif is used for only IPv4 packets. 10218 * Thus a PF_INET6 socket send both IPv4 and IPv6 10219 * multicast packets using different IP*_MULTICAST_IF 10220 * interfaces. 10221 */ 10222 if (connp->conn_outgoing_ill != NULL) { 10223 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10224 if (err == ILL_LOOKUP_FAILED) { 10225 ip1dbg(("ip_output_v6: multicast" 10226 " conn_outgoing_ill no ipif\n")); 10227 goto multicast_discard; 10228 } 10229 ill = connp->conn_outgoing_ill; 10230 } else if (connp->conn_multicast_ill != NULL) { 10231 err = ill_check_and_refhold(connp->conn_multicast_ill); 10232 if (err == ILL_LOOKUP_FAILED) { 10233 ip1dbg(("ip_output_v6: multicast" 10234 " conn_multicast_ill no ipif\n")); 10235 goto multicast_discard; 10236 } 10237 ill = connp->conn_multicast_ill; 10238 } else { 10239 mutex_exit(&connp->conn_lock); 10240 conn_lock_held = B_FALSE; 10241 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 10242 if (ipif == NULL) { 10243 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10244 goto multicast_discard; 10245 } 10246 /* 10247 * We have a ref to this ipif, so we can safely 10248 * access ipif_ill. 10249 */ 10250 ill = ipif->ipif_ill; 10251 mutex_enter(&ill->ill_lock); 10252 if (!ILL_CAN_LOOKUP(ill)) { 10253 mutex_exit(&ill->ill_lock); 10254 ipif_refrele(ipif); 10255 ill = NULL; 10256 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10257 goto multicast_discard; 10258 } 10259 ill_refhold_locked(ill); 10260 mutex_exit(&ill->ill_lock); 10261 ipif_refrele(ipif); 10262 /* 10263 * Save binding until IPV6_MULTICAST_IF 10264 * changes it 10265 */ 10266 mutex_enter(&connp->conn_lock); 10267 connp->conn_multicast_ill = ill; 10268 connp->conn_orig_multicast_ifindex = 10269 ill->ill_phyint->phyint_ifindex; 10270 mutex_exit(&connp->conn_lock); 10271 } 10272 } 10273 if (conn_lock_held) 10274 mutex_exit(&connp->conn_lock); 10275 10276 if (saved_ill != NULL) 10277 ill_refrele(saved_ill); 10278 10279 ASSERT(ill != NULL); 10280 /* 10281 * For multicast loopback interfaces replace the multicast address 10282 * with a unicast address for the ire lookup. 10283 */ 10284 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 10285 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10286 10287 mibptr = ill->ill_ip_mib; 10288 if (do_outrequests) { 10289 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10290 do_outrequests = B_FALSE; 10291 } 10292 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10293 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10294 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10295 10296 /* 10297 * As we may lose the conn by the time we reach ip_wput_ire_v6 10298 * we copy conn_multicast_loop and conn_dontroute on to an 10299 * ipsec_out. In case if this datagram goes out secure, 10300 * we need the ill_index also. Copy that also into the 10301 * ipsec_out. 10302 */ 10303 if (mctl_present) { 10304 io = (ipsec_out_t *)first_mp->b_rptr; 10305 ASSERT(first_mp->b_datap->db_type == M_CTL); 10306 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10307 } else { 10308 ASSERT(mp == first_mp); 10309 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 10310 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10311 freemsg(mp); 10312 if (ill != NULL) 10313 ill_refrele(ill); 10314 if (need_decref) 10315 CONN_DEC_REF(connp); 10316 return; 10317 } 10318 io = (ipsec_out_t *)first_mp->b_rptr; 10319 /* This is not a secure packet */ 10320 io->ipsec_out_secure = B_FALSE; 10321 io->ipsec_out_use_global_policy = B_TRUE; 10322 io->ipsec_out_zoneid = 10323 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10324 first_mp->b_cont = mp; 10325 mctl_present = B_TRUE; 10326 } 10327 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10328 io->ipsec_out_unspec_src = unspec_src; 10329 if (connp != NULL) 10330 io->ipsec_out_dontroute = connp->conn_dontroute; 10331 10332 send_from_ill: 10333 ASSERT(ill != NULL); 10334 ASSERT(mibptr == ill->ill_ip_mib); 10335 if (do_outrequests) { 10336 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10337 do_outrequests = B_FALSE; 10338 } 10339 10340 if (io != NULL) 10341 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10342 10343 /* 10344 * When a specific ill is specified (using IPV6_PKTINFO, 10345 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10346 * on routing entries (ftable and ctable) that have a matching 10347 * ire->ire_ipif->ipif_ill. Thus this can only be used 10348 * for destinations that are on-link for the specific ill 10349 * and that can appear on multiple links. Thus it is useful 10350 * for multicast destinations, link-local destinations, and 10351 * at some point perhaps for site-local destinations (if the 10352 * node sits at a site boundary). 10353 * We create the cache entries in the regular ctable since 10354 * it can not "confuse" things for other destinations. 10355 * table. 10356 * 10357 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10358 * It is used only when ire_cache_lookup is used above. 10359 */ 10360 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10361 zoneid, MBLK_GETLABEL(mp), match_flags); 10362 if (ire != NULL) { 10363 /* 10364 * Check if the ire has the RTF_MULTIRT flag, inherited 10365 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10366 */ 10367 if (ire->ire_flags & RTF_MULTIRT) { 10368 /* 10369 * Force hop limit of multirouted packets if required. 10370 * The hop limit of such packets is bounded by the 10371 * ip_multirt_ttl ndd variable. 10372 * NDP packets must have a hop limit of 255; don't 10373 * change the hop limit in that case. 10374 */ 10375 if ((ip_multirt_ttl > 0) && 10376 (ip6h->ip6_hops > ip_multirt_ttl) && 10377 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10378 if (ip_debug > 3) { 10379 ip2dbg(("ip_wput_v6: forcing multirt " 10380 "hop limit to %d (was %d) ", 10381 ip_multirt_ttl, ip6h->ip6_hops)); 10382 pr_addr_dbg("v6dst %s\n", AF_INET6, 10383 &ire->ire_addr_v6); 10384 } 10385 ip6h->ip6_hops = ip_multirt_ttl; 10386 } 10387 10388 /* 10389 * We look at this point if there are pending 10390 * unresolved routes. ire_multirt_need_resolve_v6() 10391 * checks in O(n) that all IRE_OFFSUBNET ire 10392 * entries for the packet's destination and 10393 * flagged RTF_MULTIRT are currently resolved. 10394 * If some remain unresolved, we make a copy 10395 * of the current message. It will be used 10396 * to initiate additional route resolutions. 10397 */ 10398 multirt_need_resolve = 10399 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10400 MBLK_GETLABEL(first_mp)); 10401 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10402 "multirt_need_resolve %d, first_mp %p\n", 10403 (void *)ire, multirt_need_resolve, 10404 (void *)first_mp)); 10405 if (multirt_need_resolve) { 10406 copy_mp = copymsg(first_mp); 10407 if (copy_mp != NULL) { 10408 MULTIRT_DEBUG_TAG(copy_mp); 10409 } 10410 } 10411 } 10412 10413 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10414 ill->ill_name, (void *)ire, 10415 ill->ill_phyint->phyint_ifindex)); 10416 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10417 connp, caller, 10418 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10419 ip6i_flags, zoneid); 10420 ire_refrele(ire); 10421 if (need_decref) { 10422 CONN_DEC_REF(connp); 10423 connp = NULL; 10424 } 10425 10426 /* 10427 * Try to resolve another multiroute if 10428 * ire_multirt_need_resolve_v6() deemed it necessary. 10429 * copy_mp will be consumed (sent or freed) by 10430 * ip_newroute_[ipif_]v6(). 10431 */ 10432 if (copy_mp != NULL) { 10433 if (mctl_present) { 10434 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10435 } else { 10436 ip6h = (ip6_t *)copy_mp->b_rptr; 10437 } 10438 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10439 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10440 zoneid); 10441 if (ipif == NULL) { 10442 ip1dbg(("ip_wput_v6: No ipif for " 10443 "multicast\n")); 10444 MULTIRT_DEBUG_UNTAG(copy_mp); 10445 freemsg(copy_mp); 10446 return; 10447 } 10448 ip_newroute_ipif_v6(q, copy_mp, ipif, 10449 ip6h->ip6_dst, unspec_src, zoneid); 10450 ipif_refrele(ipif); 10451 } else { 10452 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10453 &ip6h->ip6_src, ill, zoneid); 10454 } 10455 } 10456 ill_refrele(ill); 10457 return; 10458 } 10459 if (need_decref) { 10460 CONN_DEC_REF(connp); 10461 connp = NULL; 10462 } 10463 10464 /* Update rptr if there was an ip6i_t header. */ 10465 if (ip6i != NULL) 10466 mp->b_rptr -= sizeof (ip6i_t); 10467 if (unspec_src || attach_if) { 10468 if (ip6i == NULL) { 10469 /* 10470 * Add ip6i_t header to carry unspec_src 10471 * or attach_if until the packet comes back in 10472 * ip_wput_v6. 10473 */ 10474 if (mctl_present) { 10475 first_mp->b_cont = 10476 ip_add_info_v6(mp, NULL, v6dstp); 10477 mp = first_mp->b_cont; 10478 if (mp == NULL) 10479 freeb(first_mp); 10480 } else { 10481 first_mp = mp = ip_add_info_v6(mp, NULL, 10482 v6dstp); 10483 } 10484 if (mp == NULL) { 10485 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10486 ill_refrele(ill); 10487 return; 10488 } 10489 ip6i = (ip6i_t *)mp->b_rptr; 10490 if ((mp->b_wptr - (uchar_t *)ip6i) == 10491 sizeof (ip6i_t)) { 10492 /* 10493 * ndp_resolver called from ip_newroute_v6 10494 * expects a pulled up message. 10495 */ 10496 if (!pullupmsg(mp, -1)) { 10497 ip1dbg(("ip_wput_v6: pullupmsg" 10498 " failed\n")); 10499 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10500 freemsg(first_mp); 10501 return; 10502 } 10503 ip6i = (ip6i_t *)mp->b_rptr; 10504 } 10505 ip6h = (ip6_t *)&ip6i[1]; 10506 v6dstp = &ip6h->ip6_dst; 10507 } 10508 if (unspec_src) 10509 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10510 if (attach_if) { 10511 /* 10512 * Bind to nofailover/BOUND_PIF overrides ifindex. 10513 */ 10514 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10515 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10516 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10517 if (drop_if_delayed) { 10518 /* This is a multipathing probe packet */ 10519 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10520 } 10521 } 10522 if (mctl_present) { 10523 ASSERT(io != NULL); 10524 io->ipsec_out_unspec_src = unspec_src; 10525 } 10526 } 10527 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10528 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10529 unspec_src, zoneid); 10530 } else { 10531 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10532 zoneid); 10533 } 10534 ill_refrele(ill); 10535 return; 10536 10537 notv6: 10538 /* 10539 * XXX implement a IPv4 and IPv6 packet counter per conn and 10540 * switch when ratio exceeds e.g. 10:1 10541 */ 10542 if (q->q_next == NULL) { 10543 connp = Q_TO_CONN(q); 10544 10545 if (IPCL_IS_TCP(connp)) { 10546 /* change conn_send for the tcp_v4_connections */ 10547 connp->conn_send = ip_output; 10548 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10549 /* The 'q' is the default SCTP queue */ 10550 connp = (conn_t *)arg; 10551 } else { 10552 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 10553 } 10554 } 10555 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10556 (void) ip_output(arg, first_mp, arg2, caller); 10557 if (ill != NULL) 10558 ill_refrele(ill); 10559 } 10560 10561 /* 10562 * If this is a conn_t queue, then we pass in the conn. This includes the 10563 * zoneid. 10564 * Otherwise, this is a message for an ill_t queue, 10565 * in which case we use the global zoneid since those are all part of 10566 * the global zone. 10567 */ 10568 static void 10569 ip_wput_v6(queue_t *q, mblk_t *mp) 10570 { 10571 if (CONN_Q(q)) 10572 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10573 else 10574 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10575 } 10576 10577 static void 10578 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10579 { 10580 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10581 io->ipsec_out_attach_if = B_TRUE; 10582 io->ipsec_out_ill_index = attach_index; 10583 } 10584 10585 /* 10586 * NULL send-to queue - packet is to be delivered locally. 10587 */ 10588 void 10589 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10590 ire_t *ire, int fanout_flags) 10591 { 10592 uint32_t ports; 10593 mblk_t *mp = first_mp, *first_mp1; 10594 boolean_t mctl_present; 10595 uint8_t nexthdr; 10596 uint16_t hdr_length; 10597 ipsec_out_t *io; 10598 mib2_ipIfStatsEntry_t *mibptr; 10599 ilm_t *ilm; 10600 uint_t nexthdr_offset; 10601 10602 if (DB_TYPE(mp) == M_CTL) { 10603 io = (ipsec_out_t *)mp->b_rptr; 10604 if (!io->ipsec_out_secure) { 10605 mp = mp->b_cont; 10606 freeb(first_mp); 10607 first_mp = mp; 10608 mctl_present = B_FALSE; 10609 } else { 10610 mctl_present = B_TRUE; 10611 mp = first_mp->b_cont; 10612 ipsec_out_to_in(first_mp); 10613 } 10614 } else { 10615 mctl_present = B_FALSE; 10616 } 10617 10618 DTRACE_PROBE4(ip6__loopback__in__start, 10619 ill_t *, ill, ill_t *, NULL, 10620 ip6_t *, ip6h, mblk_t *, first_mp); 10621 10622 FW_HOOKS6(ip6_loopback_in_event, ipv6firewall_loopback_in, 10623 ill, NULL, ip6h, first_mp, mp); 10624 10625 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10626 10627 if (first_mp == NULL) 10628 return; 10629 10630 nexthdr = ip6h->ip6_nxt; 10631 mibptr = ill->ill_ip_mib; 10632 10633 /* Fastpath */ 10634 switch (nexthdr) { 10635 case IPPROTO_TCP: 10636 case IPPROTO_UDP: 10637 case IPPROTO_ICMPV6: 10638 case IPPROTO_SCTP: 10639 hdr_length = IPV6_HDR_LEN; 10640 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10641 (uchar_t *)ip6h); 10642 break; 10643 default: { 10644 uint8_t *nexthdrp; 10645 10646 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10647 &hdr_length, &nexthdrp)) { 10648 /* Malformed packet */ 10649 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10650 freemsg(first_mp); 10651 return; 10652 } 10653 nexthdr = *nexthdrp; 10654 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10655 break; 10656 } 10657 } 10658 10659 UPDATE_OB_PKT_COUNT(ire); 10660 ire->ire_last_used_time = lbolt; 10661 10662 /* 10663 * Remove reacability confirmation bit from version field 10664 * before looping back the packet. 10665 */ 10666 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10667 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10668 } 10669 10670 switch (nexthdr) { 10671 case IPPROTO_TCP: 10672 if (DB_TYPE(mp) == M_DATA) { 10673 /* 10674 * M_DATA mblk, so init mblk (chain) for 10675 * no struio(). 10676 */ 10677 mblk_t *mp1 = mp; 10678 10679 do { 10680 mp1->b_datap->db_struioflag = 0; 10681 } while ((mp1 = mp1->b_cont) != NULL); 10682 } 10683 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10684 TCP_PORTS_OFFSET); 10685 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10686 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10687 IP_FF_IP6INFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10688 hdr_length, mctl_present, ire->ire_zoneid); 10689 return; 10690 10691 case IPPROTO_UDP: 10692 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10693 UDP_PORTS_OFFSET); 10694 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10695 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO| 10696 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10697 return; 10698 10699 case IPPROTO_SCTP: 10700 { 10701 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10702 10703 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10704 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10705 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO, 10706 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10707 ire->ire_zoneid); 10708 return; 10709 } 10710 case IPPROTO_ICMPV6: { 10711 icmp6_t *icmp6; 10712 10713 /* check for full IPv6+ICMPv6 header */ 10714 if ((mp->b_wptr - mp->b_rptr) < 10715 (hdr_length + ICMP6_MINLEN)) { 10716 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10717 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10718 " failed\n")); 10719 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10720 freemsg(first_mp); 10721 return; 10722 } 10723 ip6h = (ip6_t *)mp->b_rptr; 10724 } 10725 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10726 10727 /* Update output mib stats */ 10728 icmp_update_out_mib_v6(ill, icmp6); 10729 10730 /* Check variable for testing applications */ 10731 if (ipv6_drop_inbound_icmpv6) { 10732 freemsg(first_mp); 10733 return; 10734 } 10735 /* 10736 * Assume that there is always at least one conn for 10737 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10738 * where there is no conn. 10739 */ 10740 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10741 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10742 /* 10743 * In the multicast case, applications may have 10744 * joined the group from different zones, so we 10745 * need to deliver the packet to each of them. 10746 * Loop through the multicast memberships 10747 * structures (ilm) on the receive ill and send 10748 * a copy of the packet up each matching one. 10749 * However, we don't do this for multicasts sent 10750 * on the loopback interface (PHYI_LOOPBACK flag 10751 * set) as they must stay in the sender's zone. 10752 */ 10753 ILM_WALKER_HOLD(ill); 10754 for (ilm = ill->ill_ilm; ilm != NULL; 10755 ilm = ilm->ilm_next) { 10756 if (ilm->ilm_flags & ILM_DELETED) 10757 continue; 10758 if (!IN6_ARE_ADDR_EQUAL( 10759 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10760 continue; 10761 if ((fanout_flags & 10762 IP_FF_NO_MCAST_LOOP) && 10763 ilm->ilm_zoneid == ire->ire_zoneid) 10764 continue; 10765 if (!ipif_lookup_zoneid(ill, 10766 ilm->ilm_zoneid, IPIF_UP, NULL)) 10767 continue; 10768 10769 first_mp1 = ip_copymsg(first_mp); 10770 if (first_mp1 == NULL) 10771 continue; 10772 icmp_inbound_v6(q, first_mp1, ill, 10773 hdr_length, mctl_present, 10774 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10775 NULL); 10776 } 10777 ILM_WALKER_RELE(ill); 10778 } else { 10779 first_mp1 = ip_copymsg(first_mp); 10780 if (first_mp1 != NULL) 10781 icmp_inbound_v6(q, first_mp1, ill, 10782 hdr_length, mctl_present, 10783 IP6_NO_IPPOLICY, ire->ire_zoneid, 10784 NULL); 10785 } 10786 } 10787 /* FALLTHRU */ 10788 default: { 10789 /* 10790 * Handle protocols with which IPv6 is less intimate. 10791 */ 10792 fanout_flags |= IP_FF_RAWIP|IP_FF_IP6INFO; 10793 10794 /* 10795 * Enable sending ICMP for "Unknown" nexthdr 10796 * case. i.e. where we did not FALLTHRU from 10797 * IPPROTO_ICMPV6 processing case above. 10798 */ 10799 if (nexthdr != IPPROTO_ICMPV6) 10800 fanout_flags |= IP_FF_SEND_ICMP; 10801 /* 10802 * Note: There can be more than one stream bound 10803 * to a particular protocol. When this is the case, 10804 * each one gets a copy of any incoming packets. 10805 */ 10806 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10807 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10808 mctl_present, ire->ire_zoneid); 10809 return; 10810 } 10811 } 10812 } 10813 10814 /* 10815 * Send packet using IRE. 10816 * Checksumming is controlled by cksum_request: 10817 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10818 * 1 => Skip TCP/UDP/SCTP checksum 10819 * Otherwise => checksum_request contains insert offset for checksum 10820 * 10821 * Assumes that the following set of headers appear in the first 10822 * mblk: 10823 * ip6_t 10824 * Any extension headers 10825 * TCP/UDP/SCTP header (if present) 10826 * The routine can handle an ICMPv6 header that is not in the first mblk. 10827 * 10828 * NOTE : This function does not ire_refrele the ire passed in as the 10829 * argument unlike ip_wput_ire where the REFRELE is done. 10830 * Refer to ip_wput_ire for more on this. 10831 */ 10832 static void 10833 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10834 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10835 zoneid_t zoneid) 10836 { 10837 ip6_t *ip6h; 10838 uint8_t nexthdr; 10839 uint16_t hdr_length; 10840 uint_t reachable = 0x0; 10841 ill_t *ill; 10842 mib2_ipIfStatsEntry_t *mibptr; 10843 mblk_t *first_mp; 10844 boolean_t mctl_present; 10845 ipsec_out_t *io; 10846 boolean_t conn_dontroute; /* conn value for multicast */ 10847 boolean_t conn_multicast_loop; /* conn value for multicast */ 10848 boolean_t multicast_forward; /* Should we forward ? */ 10849 int max_frag; 10850 10851 ill = ire_to_ill(ire); 10852 first_mp = mp; 10853 multicast_forward = B_FALSE; 10854 10855 if (mp->b_datap->db_type != M_CTL) { 10856 ip6h = (ip6_t *)first_mp->b_rptr; 10857 } else { 10858 io = (ipsec_out_t *)first_mp->b_rptr; 10859 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10860 /* 10861 * Grab the zone id now because the M_CTL can be discarded by 10862 * ip_wput_ire_parse_ipsec_out() below. 10863 */ 10864 ASSERT(zoneid == io->ipsec_out_zoneid); 10865 ASSERT(zoneid != ALL_ZONES); 10866 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10867 /* 10868 * For the multicast case, ipsec_out carries conn_dontroute and 10869 * conn_multicast_loop as conn may not be available here. We 10870 * need this for multicast loopback and forwarding which is done 10871 * later in the code. 10872 */ 10873 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10874 conn_dontroute = io->ipsec_out_dontroute; 10875 conn_multicast_loop = io->ipsec_out_multicast_loop; 10876 /* 10877 * If conn_dontroute is not set or conn_multicast_loop 10878 * is set, we need to do forwarding/loopback. For 10879 * datagrams from ip_wput_multicast, conn_dontroute is 10880 * set to B_TRUE and conn_multicast_loop is set to 10881 * B_FALSE so that we neither do forwarding nor 10882 * loopback. 10883 */ 10884 if (!conn_dontroute || conn_multicast_loop) 10885 multicast_forward = B_TRUE; 10886 } 10887 } 10888 10889 /* 10890 * If the sender didn't supply the hop limit and there is a default 10891 * unicast hop limit associated with the output interface, we use 10892 * that if the packet is unicast. Interface specific unicast hop 10893 * limits as set via the SIOCSLIFLNKINFO ioctl. 10894 */ 10895 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10896 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10897 ip6h->ip6_hops = ill->ill_max_hops; 10898 } 10899 10900 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10901 ire->ire_zoneid != ALL_ZONES) { 10902 /* 10903 * When a zone sends a packet to another zone, we try to deliver 10904 * the packet under the same conditions as if the destination 10905 * was a real node on the network. To do so, we look for a 10906 * matching route in the forwarding table. 10907 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10908 * ip_newroute_v6() does. 10909 * Note that IRE_LOCAL are special, since they are used 10910 * when the zoneid doesn't match in some cases. This means that 10911 * we need to handle ipha_src differently since ire_src_addr 10912 * belongs to the receiving zone instead of the sending zone. 10913 * When ip_restrict_interzone_loopback is set, then 10914 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10915 * for loopback between zones when the logical "Ethernet" would 10916 * have looped them back. 10917 */ 10918 ire_t *src_ire; 10919 10920 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10921 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10922 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10923 if (src_ire != NULL && 10924 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10925 (!ip_restrict_interzone_loopback || 10926 ire_local_same_ill_group(ire, src_ire))) { 10927 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10928 !unspec_src) { 10929 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10930 } 10931 ire_refrele(src_ire); 10932 } else { 10933 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10934 if (src_ire != NULL) { 10935 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10936 ire_refrele(src_ire); 10937 freemsg(first_mp); 10938 return; 10939 } 10940 ire_refrele(src_ire); 10941 } 10942 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10943 /* Failed */ 10944 freemsg(first_mp); 10945 return; 10946 } 10947 icmp_unreachable_v6(q, first_mp, 10948 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10949 zoneid); 10950 return; 10951 } 10952 } 10953 10954 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10955 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10956 connp, unspec_src, zoneid); 10957 if (mp == NULL) { 10958 return; 10959 } 10960 } 10961 10962 first_mp = mp; 10963 if (mp->b_datap->db_type == M_CTL) { 10964 io = (ipsec_out_t *)mp->b_rptr; 10965 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10966 mp = mp->b_cont; 10967 mctl_present = B_TRUE; 10968 } else { 10969 mctl_present = B_FALSE; 10970 } 10971 10972 ip6h = (ip6_t *)mp->b_rptr; 10973 nexthdr = ip6h->ip6_nxt; 10974 mibptr = ill->ill_ip_mib; 10975 10976 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10977 ipif_t *ipif; 10978 10979 /* 10980 * Select the source address using ipif_select_source_v6. 10981 */ 10982 if (attach_index != 0) { 10983 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10984 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10985 } else { 10986 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10987 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10988 } 10989 if (ipif == NULL) { 10990 if (ip_debug > 2) { 10991 /* ip1dbg */ 10992 pr_addr_dbg("ip_wput_ire_v6: no src for " 10993 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10994 printf("ip_wput_ire_v6: interface name %s\n", 10995 ill->ill_name); 10996 } 10997 freemsg(first_mp); 10998 return; 10999 } 11000 ip6h->ip6_src = ipif->ipif_v6src_addr; 11001 ipif_refrele(ipif); 11002 } 11003 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11004 if ((connp != NULL && connp->conn_multicast_loop) || 11005 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 11006 ilm_t *ilm; 11007 11008 ILM_WALKER_HOLD(ill); 11009 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 11010 ILM_WALKER_RELE(ill); 11011 if (ilm != NULL) { 11012 mblk_t *nmp; 11013 int fanout_flags = 0; 11014 11015 if (connp != NULL && 11016 !connp->conn_multicast_loop) { 11017 fanout_flags |= IP_FF_NO_MCAST_LOOP; 11018 } 11019 ip1dbg(("ip_wput_ire_v6: " 11020 "Loopback multicast\n")); 11021 nmp = ip_copymsg(first_mp); 11022 if (nmp != NULL) { 11023 ip6_t *nip6h; 11024 mblk_t *mp_ip6h; 11025 11026 if (mctl_present) { 11027 nip6h = (ip6_t *) 11028 nmp->b_cont->b_rptr; 11029 mp_ip6h = nmp->b_cont; 11030 } else { 11031 nip6h = (ip6_t *)nmp->b_rptr; 11032 mp_ip6h = nmp; 11033 } 11034 11035 DTRACE_PROBE4( 11036 ip6__loopback__out__start, 11037 ill_t *, NULL, 11038 ill_t *, ill, 11039 ip6_t *, nip6h, 11040 mblk_t *, nmp); 11041 11042 FW_HOOKS6(ip6_loopback_out_event, 11043 ipv6firewall_loopback_out, 11044 NULL, ill, nip6h, nmp, mp_ip6h); 11045 11046 DTRACE_PROBE1( 11047 ip6__loopback__out__end, 11048 mblk_t *, nmp); 11049 11050 if (nmp != NULL) { 11051 /* 11052 * Deliver locally and to 11053 * every local zone, except 11054 * the sending zone when 11055 * IPV6_MULTICAST_LOOP is 11056 * disabled. 11057 */ 11058 ip_wput_local_v6(RD(q), ill, 11059 nip6h, nmp, 11060 ire, fanout_flags); 11061 } 11062 } else { 11063 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11064 ip1dbg(("ip_wput_ire_v6: " 11065 "copymsg failed\n")); 11066 } 11067 } 11068 } 11069 if (ip6h->ip6_hops == 0 || 11070 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 11071 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 11072 /* 11073 * Local multicast or just loopback on loopback 11074 * interface. 11075 */ 11076 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 11077 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 11078 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11079 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 11080 freemsg(first_mp); 11081 return; 11082 } 11083 } 11084 11085 if (ire->ire_stq != NULL) { 11086 uint32_t sum; 11087 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 11088 ill_phyint->phyint_ifindex; 11089 queue_t *dev_q = ire->ire_stq->q_next; 11090 11091 /* 11092 * non-NULL send-to queue - packet is to be sent 11093 * out an interface. 11094 */ 11095 11096 /* Driver is flow-controlling? */ 11097 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 11098 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 11099 /* 11100 * Queue packet if we have an conn to give back 11101 * pressure. We can't queue packets intended for 11102 * hardware acceleration since we've tossed that 11103 * state already. If the packet is being fed back 11104 * from ire_send_v6, we don't know the position in 11105 * the queue to enqueue the packet and we discard 11106 * the packet. 11107 */ 11108 if (ip_output_queue && connp != NULL && 11109 !mctl_present && caller != IRE_SEND) { 11110 if (caller == IP_WSRV) { 11111 connp->conn_did_putbq = 1; 11112 (void) putbq(connp->conn_wq, mp); 11113 conn_drain_insert(connp); 11114 /* 11115 * caller == IP_WSRV implies we are 11116 * the service thread, and the 11117 * queue is already noenabled. 11118 * The check for canput and 11119 * the putbq is not atomic. 11120 * So we need to check again. 11121 */ 11122 if (canput(dev_q)) 11123 connp->conn_did_putbq = 0; 11124 } else { 11125 (void) putq(connp->conn_wq, mp); 11126 } 11127 return; 11128 } 11129 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11130 freemsg(first_mp); 11131 return; 11132 } 11133 11134 /* 11135 * Look for reachability confirmations from the transport. 11136 */ 11137 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11138 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11139 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11140 if (mctl_present) 11141 io->ipsec_out_reachable = B_TRUE; 11142 } 11143 /* Fastpath */ 11144 switch (nexthdr) { 11145 case IPPROTO_TCP: 11146 case IPPROTO_UDP: 11147 case IPPROTO_ICMPV6: 11148 case IPPROTO_SCTP: 11149 hdr_length = IPV6_HDR_LEN; 11150 break; 11151 default: { 11152 uint8_t *nexthdrp; 11153 11154 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11155 &hdr_length, &nexthdrp)) { 11156 /* Malformed packet */ 11157 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11158 freemsg(first_mp); 11159 return; 11160 } 11161 nexthdr = *nexthdrp; 11162 break; 11163 } 11164 } 11165 11166 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11167 uint16_t *up; 11168 uint16_t *insp; 11169 11170 /* 11171 * The packet header is processed once for all, even 11172 * in the multirouting case. We disable hardware 11173 * checksum if the packet is multirouted, as it will be 11174 * replicated via several interfaces, and not all of 11175 * them may have this capability. 11176 */ 11177 if (cksum_request == 1 && 11178 !(ire->ire_flags & RTF_MULTIRT)) { 11179 /* Skip the transport checksum */ 11180 goto cksum_done; 11181 } 11182 /* 11183 * Do user-configured raw checksum. 11184 * Compute checksum and insert at offset "cksum_request" 11185 */ 11186 11187 /* check for enough headers for checksum */ 11188 cksum_request += hdr_length; /* offset from rptr */ 11189 if ((mp->b_wptr - mp->b_rptr) < 11190 (cksum_request + sizeof (int16_t))) { 11191 if (!pullupmsg(mp, 11192 cksum_request + sizeof (int16_t))) { 11193 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11194 " failed\n")); 11195 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11196 freemsg(first_mp); 11197 return; 11198 } 11199 ip6h = (ip6_t *)mp->b_rptr; 11200 } 11201 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11202 ASSERT(((uintptr_t)insp & 0x1) == 0); 11203 up = (uint16_t *)&ip6h->ip6_src; 11204 /* 11205 * icmp has placed length and routing 11206 * header adjustment in *insp. 11207 */ 11208 sum = htons(nexthdr) + 11209 up[0] + up[1] + up[2] + up[3] + 11210 up[4] + up[5] + up[6] + up[7] + 11211 up[8] + up[9] + up[10] + up[11] + 11212 up[12] + up[13] + up[14] + up[15]; 11213 sum = (sum & 0xffff) + (sum >> 16); 11214 *insp = IP_CSUM(mp, hdr_length, sum); 11215 if (*insp == 0) 11216 *insp = 0xFFFF; 11217 } else if (nexthdr == IPPROTO_TCP) { 11218 uint16_t *up; 11219 11220 /* 11221 * Check for full IPv6 header + enough TCP header 11222 * to get at the checksum field. 11223 */ 11224 if ((mp->b_wptr - mp->b_rptr) < 11225 (hdr_length + TCP_CHECKSUM_OFFSET + 11226 TCP_CHECKSUM_SIZE)) { 11227 if (!pullupmsg(mp, hdr_length + 11228 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11229 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11230 " failed\n")); 11231 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11232 freemsg(first_mp); 11233 return; 11234 } 11235 ip6h = (ip6_t *)mp->b_rptr; 11236 } 11237 11238 up = (uint16_t *)&ip6h->ip6_src; 11239 /* 11240 * Note: The TCP module has stored the length value 11241 * into the tcp checksum field, so we don't 11242 * need to explicitly sum it in here. 11243 */ 11244 sum = up[0] + up[1] + up[2] + up[3] + 11245 up[4] + up[5] + up[6] + up[7] + 11246 up[8] + up[9] + up[10] + up[11] + 11247 up[12] + up[13] + up[14] + up[15]; 11248 11249 /* Fold the initial sum */ 11250 sum = (sum & 0xffff) + (sum >> 16); 11251 11252 up = (uint16_t *)(((uchar_t *)ip6h) + 11253 hdr_length + TCP_CHECKSUM_OFFSET); 11254 11255 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11256 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11257 ire->ire_max_frag, mctl_present, sum); 11258 11259 /* Software checksum? */ 11260 if (DB_CKSUMFLAGS(mp) == 0) { 11261 IP6_STAT(ip6_out_sw_cksum); 11262 IP6_STAT_UPDATE(ip6_tcp_out_sw_cksum_bytes, 11263 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11264 hdr_length); 11265 } 11266 } else if (nexthdr == IPPROTO_UDP) { 11267 uint16_t *up; 11268 11269 /* 11270 * check for full IPv6 header + enough UDP header 11271 * to get at the UDP checksum field 11272 */ 11273 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11274 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11275 if (!pullupmsg(mp, hdr_length + 11276 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11277 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11278 " failed\n")); 11279 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11280 freemsg(first_mp); 11281 return; 11282 } 11283 ip6h = (ip6_t *)mp->b_rptr; 11284 } 11285 up = (uint16_t *)&ip6h->ip6_src; 11286 /* 11287 * Note: The UDP module has stored the length value 11288 * into the udp checksum field, so we don't 11289 * need to explicitly sum it in here. 11290 */ 11291 sum = up[0] + up[1] + up[2] + up[3] + 11292 up[4] + up[5] + up[6] + up[7] + 11293 up[8] + up[9] + up[10] + up[11] + 11294 up[12] + up[13] + up[14] + up[15]; 11295 11296 /* Fold the initial sum */ 11297 sum = (sum & 0xffff) + (sum >> 16); 11298 11299 up = (uint16_t *)(((uchar_t *)ip6h) + 11300 hdr_length + UDP_CHECKSUM_OFFSET); 11301 11302 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11303 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11304 ire->ire_max_frag, mctl_present, sum); 11305 11306 /* Software checksum? */ 11307 if (DB_CKSUMFLAGS(mp) == 0) { 11308 IP6_STAT(ip6_out_sw_cksum); 11309 IP6_STAT_UPDATE(ip6_udp_out_sw_cksum_bytes, 11310 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11311 hdr_length); 11312 } 11313 } else if (nexthdr == IPPROTO_ICMPV6) { 11314 uint16_t *up; 11315 icmp6_t *icmp6; 11316 11317 /* check for full IPv6+ICMPv6 header */ 11318 if ((mp->b_wptr - mp->b_rptr) < 11319 (hdr_length + ICMP6_MINLEN)) { 11320 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11321 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11322 " failed\n")); 11323 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11324 freemsg(first_mp); 11325 return; 11326 } 11327 ip6h = (ip6_t *)mp->b_rptr; 11328 } 11329 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11330 up = (uint16_t *)&ip6h->ip6_src; 11331 /* 11332 * icmp has placed length and routing 11333 * header adjustment in icmp6_cksum. 11334 */ 11335 sum = htons(IPPROTO_ICMPV6) + 11336 up[0] + up[1] + up[2] + up[3] + 11337 up[4] + up[5] + up[6] + up[7] + 11338 up[8] + up[9] + up[10] + up[11] + 11339 up[12] + up[13] + up[14] + up[15]; 11340 sum = (sum & 0xffff) + (sum >> 16); 11341 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11342 if (icmp6->icmp6_cksum == 0) 11343 icmp6->icmp6_cksum = 0xFFFF; 11344 11345 /* Update output mib stats */ 11346 icmp_update_out_mib_v6(ill, icmp6); 11347 } else if (nexthdr == IPPROTO_SCTP) { 11348 sctp_hdr_t *sctph; 11349 11350 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11351 if (!pullupmsg(mp, hdr_length + 11352 sizeof (*sctph))) { 11353 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11354 " failed\n")); 11355 BUMP_MIB(ill->ill_ip_mib, 11356 ipIfStatsOutDiscards); 11357 freemsg(mp); 11358 return; 11359 } 11360 ip6h = (ip6_t *)mp->b_rptr; 11361 } 11362 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11363 sctph->sh_chksum = 0; 11364 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11365 } 11366 11367 cksum_done: 11368 /* 11369 * We force the insertion of a fragment header using the 11370 * IPH_FRAG_HDR flag in two cases: 11371 * - after reception of an ICMPv6 "packet too big" message 11372 * with a MTU < 1280 (cf. RFC 2460 section 5) 11373 * - for multirouted IPv6 packets, so that the receiver can 11374 * discard duplicates according to their fragment identifier 11375 * 11376 * Two flags modifed from the API can modify this behavior. 11377 * The first is IPV6_USE_MIN_MTU. With this API the user 11378 * can specify how to manage PMTUD for unicast and multicast. 11379 * 11380 * IPV6_DONTFRAG disallows fragmentation. 11381 */ 11382 max_frag = ire->ire_max_frag; 11383 switch (IP6I_USE_MIN_MTU_API(flags)) { 11384 case IPV6_USE_MIN_MTU_DEFAULT: 11385 case IPV6_USE_MIN_MTU_UNICAST: 11386 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11387 max_frag = IPV6_MIN_MTU; 11388 } 11389 break; 11390 11391 case IPV6_USE_MIN_MTU_NEVER: 11392 max_frag = IPV6_MIN_MTU; 11393 break; 11394 } 11395 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11396 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11397 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11398 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11399 max_frag, B_FALSE, B_TRUE, zoneid); 11400 return; 11401 } 11402 11403 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11404 (mp->b_cont ? msgdsize(mp) : 11405 mp->b_wptr - (uchar_t *)ip6h)) { 11406 ip0dbg(("Packet length mismatch: %d, %ld\n", 11407 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11408 msgdsize(mp))); 11409 freemsg(first_mp); 11410 return; 11411 } 11412 /* Do IPSEC processing first */ 11413 if (mctl_present) { 11414 if (attach_index != 0) 11415 ipsec_out_attach_if(io, attach_index); 11416 ipsec_out_process(q, first_mp, ire, ill_index); 11417 return; 11418 } 11419 ASSERT(mp->b_prev == NULL); 11420 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11421 ntohs(ip6h->ip6_plen) + 11422 IPV6_HDR_LEN, max_frag)); 11423 ASSERT(mp == first_mp); 11424 /* Initiate IPPF processing */ 11425 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 11426 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11427 if (mp == NULL) { 11428 return; 11429 } 11430 } 11431 ip_wput_frag_v6(mp, ire, reachable, connp, 11432 caller, max_frag); 11433 return; 11434 } 11435 /* Do IPSEC processing first */ 11436 if (mctl_present) { 11437 int extra_len = ipsec_out_extra_length(first_mp); 11438 11439 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11440 max_frag && ip_ulp_cando_pkt2big(nexthdr)) { 11441 /* 11442 * IPsec headers will push the packet over the 11443 * MTU limit. Issue an ICMPv6 Packet Too Big 11444 * message for this packet if the upper-layer 11445 * that issued this packet will be able to 11446 * react to the icmp_pkt2big_v6() that we'll 11447 * generate. 11448 */ 11449 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11450 max_frag, B_FALSE, B_TRUE, zoneid); 11451 return; 11452 } 11453 if (attach_index != 0) 11454 ipsec_out_attach_if(io, attach_index); 11455 ipsec_out_process(q, first_mp, ire, ill_index); 11456 return; 11457 } 11458 /* 11459 * XXX multicast: add ip_mforward_v6() here. 11460 * Check conn_dontroute 11461 */ 11462 #ifdef lint 11463 /* 11464 * XXX The only purpose of this statement is to avoid lint 11465 * errors. See the above "XXX multicast". When that gets 11466 * fixed, remove this whole #ifdef lint section. 11467 */ 11468 ip3dbg(("multicast forward is %s.\n", 11469 (multicast_forward ? "TRUE" : "FALSE"))); 11470 #endif 11471 11472 UPDATE_OB_PKT_COUNT(ire); 11473 ire->ire_last_used_time = lbolt; 11474 ASSERT(mp == first_mp); 11475 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11476 } else { 11477 DTRACE_PROBE4(ip6__loopback__out__start, 11478 ill_t *, NULL, ill_t *, ill, 11479 ip6_t *, ip6h, mblk_t *, first_mp); 11480 FW_HOOKS6(ip6_loopback_out_event, ipv6firewall_loopback_out, 11481 NULL, ill, ip6h, first_mp, mp); 11482 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11483 if (first_mp != NULL) 11484 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11485 } 11486 } 11487 11488 /* 11489 * Outbound IPv6 fragmentation routine using MDT. 11490 */ 11491 static void 11492 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11493 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11494 { 11495 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11496 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11497 mblk_t *hdr_mp, *md_mp = NULL; 11498 int i1; 11499 multidata_t *mmd; 11500 unsigned char *hdr_ptr, *pld_ptr; 11501 ip_pdescinfo_t pdi; 11502 uint32_t ident; 11503 size_t len; 11504 uint16_t offset; 11505 queue_t *stq = ire->ire_stq; 11506 ill_t *ill = (ill_t *)stq->q_ptr; 11507 11508 ASSERT(DB_TYPE(mp) == M_DATA); 11509 ASSERT(MBLKL(mp) > unfragmentable_len); 11510 11511 /* 11512 * Move read ptr past unfragmentable portion, we don't want this part 11513 * of the data in our fragments. 11514 */ 11515 mp->b_rptr += unfragmentable_len; 11516 11517 /* Calculate how many packets we will send out */ 11518 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11519 pkts = (i1 + max_chunk - 1) / max_chunk; 11520 ASSERT(pkts > 1); 11521 11522 /* Allocate a message block which will hold all the IP Headers. */ 11523 wroff = ip_wroff_extra; 11524 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11525 11526 i1 = pkts * hdr_chunk_len; 11527 /* 11528 * Create the header buffer, Multidata and destination address 11529 * and SAP attribute that should be associated with it. 11530 */ 11531 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11532 ((hdr_mp->b_wptr += i1), 11533 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11534 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11535 freemsg(mp); 11536 if (md_mp == NULL) { 11537 freemsg(hdr_mp); 11538 } else { 11539 free_mmd: IP6_STAT(ip6_frag_mdt_discarded); 11540 freemsg(md_mp); 11541 } 11542 IP6_STAT(ip6_frag_mdt_allocfail); 11543 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11544 return; 11545 } 11546 IP6_STAT(ip6_frag_mdt_allocd); 11547 11548 /* 11549 * Add a payload buffer to the Multidata; this operation must not 11550 * fail, or otherwise our logic in this routine is broken. There 11551 * is no memory allocation done by the routine, so any returned 11552 * failure simply tells us that we've done something wrong. 11553 * 11554 * A failure tells us that either we're adding the same payload 11555 * buffer more than once, or we're trying to add more buffers than 11556 * allowed. None of the above cases should happen, and we panic 11557 * because either there's horrible heap corruption, and/or 11558 * programming mistake. 11559 */ 11560 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11561 goto pbuf_panic; 11562 } 11563 11564 hdr_ptr = hdr_mp->b_rptr; 11565 pld_ptr = mp->b_rptr; 11566 11567 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11568 11569 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11570 11571 /* 11572 * len is the total length of the fragmentable data in this 11573 * datagram. For each fragment sent, we will decrement len 11574 * by the amount of fragmentable data sent in that fragment 11575 * until len reaches zero. 11576 */ 11577 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11578 11579 offset = 0; 11580 prev_nexthdr_offset += wroff; 11581 11582 while (len != 0) { 11583 size_t mlen; 11584 ip6_t *fip6h; 11585 ip6_frag_t *fraghdr; 11586 int error; 11587 11588 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11589 mlen = MIN(len, max_chunk); 11590 len -= mlen; 11591 11592 fip6h = (ip6_t *)(hdr_ptr + wroff); 11593 ASSERT(OK_32PTR(fip6h)); 11594 bcopy(ip6h, fip6h, unfragmentable_len); 11595 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11596 11597 fip6h->ip6_plen = htons((uint16_t)(mlen + 11598 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11599 11600 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11601 unfragmentable_len); 11602 fraghdr->ip6f_nxt = nexthdr; 11603 fraghdr->ip6f_reserved = 0; 11604 fraghdr->ip6f_offlg = htons(offset) | 11605 ((len != 0) ? IP6F_MORE_FRAG : 0); 11606 fraghdr->ip6f_ident = ident; 11607 11608 /* 11609 * Record offset and size of header and data of the next packet 11610 * in the multidata message. 11611 */ 11612 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11613 unfragmentable_len + sizeof (ip6_frag_t), 0); 11614 PDESC_PLD_INIT(&pdi); 11615 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11616 ASSERT(i1 > 0); 11617 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11618 if (i1 == mlen) { 11619 pld_ptr += mlen; 11620 } else { 11621 i1 = mlen - i1; 11622 mp = mp->b_cont; 11623 ASSERT(mp != NULL); 11624 ASSERT(MBLKL(mp) >= i1); 11625 /* 11626 * Attach the next payload message block to the 11627 * multidata message. 11628 */ 11629 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11630 goto pbuf_panic; 11631 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11632 pld_ptr = mp->b_rptr + i1; 11633 } 11634 11635 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11636 KM_NOSLEEP)) == NULL) { 11637 /* 11638 * Any failure other than ENOMEM indicates that we 11639 * have passed in invalid pdesc info or parameters 11640 * to mmd_addpdesc, which must not happen. 11641 * 11642 * EINVAL is a result of failure on boundary checks 11643 * against the pdesc info contents. It should not 11644 * happen, and we panic because either there's 11645 * horrible heap corruption, and/or programming 11646 * mistake. 11647 */ 11648 if (error != ENOMEM) { 11649 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11650 "pdesc logic error detected for " 11651 "mmd %p pinfo %p (%d)\n", 11652 (void *)mmd, (void *)&pdi, error); 11653 /* NOTREACHED */ 11654 } 11655 IP6_STAT(ip6_frag_mdt_addpdescfail); 11656 /* Free unattached payload message blocks as well */ 11657 md_mp->b_cont = mp->b_cont; 11658 goto free_mmd; 11659 } 11660 11661 /* Advance fragment offset. */ 11662 offset += mlen; 11663 11664 /* Advance to location for next header in the buffer. */ 11665 hdr_ptr += hdr_chunk_len; 11666 11667 /* Did we reach the next payload message block? */ 11668 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11669 mp = mp->b_cont; 11670 /* 11671 * Attach the next message block with payload 11672 * data to the multidata message. 11673 */ 11674 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11675 goto pbuf_panic; 11676 pld_ptr = mp->b_rptr; 11677 } 11678 } 11679 11680 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11681 ASSERT(mp->b_wptr == pld_ptr); 11682 11683 /* Update IP statistics */ 11684 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11685 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11686 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11687 /* 11688 * The ipv6 header len is accounted for in unfragmentable_len so 11689 * when calculating the fragmentation overhead just add the frag 11690 * header len. 11691 */ 11692 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11693 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11694 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11695 IP6_STAT_UPDATE(ip6_frag_mdt_pkt_out, pkts); 11696 11697 ire->ire_ob_pkt_count += pkts; 11698 if (ire->ire_ipif != NULL) 11699 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11700 11701 ire->ire_last_used_time = lbolt; 11702 /* Send it down */ 11703 putnext(stq, md_mp); 11704 return; 11705 11706 pbuf_panic: 11707 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11708 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11709 pbuf_idx); 11710 /* NOTREACHED */ 11711 } 11712 11713 /* 11714 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11715 * We have not optimized this in terms of number of mblks 11716 * allocated. For instance, for each fragment sent we always allocate a 11717 * mblk to hold the IPv6 header and fragment header. 11718 * 11719 * Assumes that all the extension headers are contained in the first mblk. 11720 * 11721 * The fragment header is inserted after an hop-by-hop options header 11722 * and after [an optional destinations header followed by] a routing header. 11723 * 11724 * NOTE : This function does not ire_refrele the ire passed in as 11725 * the argument. 11726 */ 11727 void 11728 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11729 int caller, int max_frag) 11730 { 11731 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11732 ip6_t *fip6h; 11733 mblk_t *hmp; 11734 mblk_t *hmp0; 11735 mblk_t *dmp; 11736 ip6_frag_t *fraghdr; 11737 size_t unfragmentable_len; 11738 size_t len; 11739 size_t mlen; 11740 size_t max_chunk; 11741 uint32_t ident; 11742 uint16_t off_flags; 11743 uint16_t offset = 0; 11744 ill_t *ill; 11745 uint8_t nexthdr; 11746 uint_t prev_nexthdr_offset; 11747 uint8_t *ptr; 11748 11749 ASSERT(ire->ire_type == IRE_CACHE); 11750 ill = (ill_t *)ire->ire_stq->q_ptr; 11751 11752 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11753 11754 /* 11755 * Determine the length of the unfragmentable portion of this 11756 * datagram. This consists of the IPv6 header, a potential 11757 * hop-by-hop options header, a potential pre-routing-header 11758 * destination options header, and a potential routing header. 11759 */ 11760 nexthdr = ip6h->ip6_nxt; 11761 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11762 ptr = (uint8_t *)&ip6h[1]; 11763 11764 if (nexthdr == IPPROTO_HOPOPTS) { 11765 ip6_hbh_t *hbh_hdr; 11766 uint_t hdr_len; 11767 11768 hbh_hdr = (ip6_hbh_t *)ptr; 11769 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11770 nexthdr = hbh_hdr->ip6h_nxt; 11771 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11772 - (uint8_t *)ip6h; 11773 ptr += hdr_len; 11774 } 11775 if (nexthdr == IPPROTO_DSTOPTS) { 11776 ip6_dest_t *dest_hdr; 11777 uint_t hdr_len; 11778 11779 dest_hdr = (ip6_dest_t *)ptr; 11780 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11781 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11782 nexthdr = dest_hdr->ip6d_nxt; 11783 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11784 - (uint8_t *)ip6h; 11785 ptr += hdr_len; 11786 } 11787 } 11788 if (nexthdr == IPPROTO_ROUTING) { 11789 ip6_rthdr_t *rthdr; 11790 uint_t hdr_len; 11791 11792 rthdr = (ip6_rthdr_t *)ptr; 11793 nexthdr = rthdr->ip6r_nxt; 11794 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11795 - (uint8_t *)ip6h; 11796 hdr_len = 8 * (rthdr->ip6r_len + 1); 11797 ptr += hdr_len; 11798 } 11799 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11800 11801 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11802 sizeof (ip6_frag_t)) & ~7; 11803 11804 /* Check if we can use MDT to send out the frags. */ 11805 ASSERT(!IRE_IS_LOCAL(ire)); 11806 if (ip_multidata_outbound && reachable == 0 && 11807 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11808 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11809 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11810 nexthdr, prev_nexthdr_offset); 11811 return; 11812 } 11813 11814 /* 11815 * Allocate an mblk with enough room for the link-layer 11816 * header, the unfragmentable part of the datagram, and the 11817 * fragment header. This (or a copy) will be used as the 11818 * first mblk for each fragment we send. 11819 */ 11820 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 11821 BPRI_HI); 11822 if (hmp == NULL) { 11823 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11824 freemsg(mp); 11825 return; 11826 } 11827 hmp->b_rptr += ip_wroff_extra; 11828 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11829 11830 fip6h = (ip6_t *)hmp->b_rptr; 11831 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11832 11833 bcopy(ip6h, fip6h, unfragmentable_len); 11834 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11835 11836 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11837 11838 fraghdr->ip6f_nxt = nexthdr; 11839 fraghdr->ip6f_reserved = 0; 11840 fraghdr->ip6f_offlg = 0; 11841 fraghdr->ip6f_ident = htonl(ident); 11842 11843 /* 11844 * len is the total length of the fragmentable data in this 11845 * datagram. For each fragment sent, we will decrement len 11846 * by the amount of fragmentable data sent in that fragment 11847 * until len reaches zero. 11848 */ 11849 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11850 11851 /* 11852 * Move read ptr past unfragmentable portion, we don't want this part 11853 * of the data in our fragments. 11854 */ 11855 mp->b_rptr += unfragmentable_len; 11856 11857 while (len != 0) { 11858 mlen = MIN(len, max_chunk); 11859 len -= mlen; 11860 if (len != 0) { 11861 /* Not last */ 11862 hmp0 = copyb(hmp); 11863 if (hmp0 == NULL) { 11864 freeb(hmp); 11865 freemsg(mp); 11866 BUMP_MIB(ill->ill_ip_mib, 11867 ipIfStatsOutFragFails); 11868 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11869 return; 11870 } 11871 off_flags = IP6F_MORE_FRAG; 11872 } else { 11873 /* Last fragment */ 11874 hmp0 = hmp; 11875 hmp = NULL; 11876 off_flags = 0; 11877 } 11878 fip6h = (ip6_t *)(hmp0->b_rptr); 11879 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11880 11881 fip6h->ip6_plen = htons((uint16_t)(mlen + 11882 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11883 /* 11884 * Note: Optimization alert. 11885 * In IPv6 (and IPv4) protocol header, Fragment Offset 11886 * ("offset") is 13 bits wide and in 8-octet units. 11887 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11888 * it occupies the most significant 13 bits. 11889 * (least significant 13 bits in IPv4). 11890 * We do not do any shifts here. Not shifting is same effect 11891 * as taking offset value in octet units, dividing by 8 and 11892 * then shifting 3 bits left to line it up in place in proper 11893 * place protocol header. 11894 */ 11895 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11896 11897 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11898 /* mp has already been freed by ip_carve_mp() */ 11899 if (hmp != NULL) 11900 freeb(hmp); 11901 freeb(hmp0); 11902 ip1dbg(("ip_carve_mp: failed\n")); 11903 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11904 return; 11905 } 11906 hmp0->b_cont = dmp; 11907 /* Get the priority marking, if any */ 11908 hmp0->b_band = dmp->b_band; 11909 UPDATE_OB_PKT_COUNT(ire); 11910 ire->ire_last_used_time = lbolt; 11911 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11912 caller, NULL); 11913 reachable = 0; /* No need to redo state machine in loop */ 11914 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11915 offset += mlen; 11916 } 11917 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11918 } 11919 11920 /* 11921 * Determine if the ill and multicast aspects of that packets 11922 * "matches" the conn. 11923 */ 11924 boolean_t 11925 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11926 zoneid_t zoneid) 11927 { 11928 ill_t *in_ill; 11929 boolean_t wantpacket = B_TRUE; 11930 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11931 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11932 11933 /* 11934 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11935 * unicast and multicast reception to conn_incoming_ill. 11936 * conn_wantpacket_v6 is called both for unicast and 11937 * multicast. 11938 * 11939 * 1) The unicast copy of the packet can come anywhere in 11940 * the ill group if it is part of the group. Thus, we 11941 * need to check to see whether the ill group matches 11942 * if in_ill is part of a group. 11943 * 11944 * 2) ip_rput does not suppress duplicate multicast packets. 11945 * If there are two interfaces in a ill group and we have 11946 * 2 applications (conns) joined a multicast group G on 11947 * both the interfaces, ilm_lookup_ill filter in ip_rput 11948 * will give us two packets because we join G on both the 11949 * interfaces rather than nominating just one interface 11950 * for receiving multicast like broadcast above. So, 11951 * we have to call ilg_lookup_ill to filter out duplicate 11952 * copies, if ill is part of a group, to supress duplicates. 11953 */ 11954 in_ill = connp->conn_incoming_ill; 11955 if (in_ill != NULL) { 11956 mutex_enter(&connp->conn_lock); 11957 in_ill = connp->conn_incoming_ill; 11958 mutex_enter(&ill->ill_lock); 11959 /* 11960 * No IPMP, and the packet did not arrive on conn_incoming_ill 11961 * OR, IPMP in use and the packet arrived on an IPMP group 11962 * different from the conn_incoming_ill's IPMP group. 11963 * Reject the packet. 11964 */ 11965 if ((in_ill->ill_group == NULL && in_ill != ill) || 11966 (in_ill->ill_group != NULL && 11967 in_ill->ill_group != ill->ill_group)) { 11968 wantpacket = B_FALSE; 11969 } 11970 mutex_exit(&ill->ill_lock); 11971 mutex_exit(&connp->conn_lock); 11972 if (!wantpacket) 11973 return (B_FALSE); 11974 } 11975 11976 if (connp->conn_multi_router) 11977 return (B_TRUE); 11978 11979 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11980 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11981 /* 11982 * Unicast case: we match the conn only if it's in the specified 11983 * zone. 11984 */ 11985 return (IPCL_ZONE_MATCH(connp, zoneid)); 11986 } 11987 11988 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11989 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11990 /* 11991 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11992 * disabled, therefore we don't dispatch the multicast packet to 11993 * the sending zone. 11994 */ 11995 return (B_FALSE); 11996 } 11997 11998 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 11999 connp->conn_zoneid != zoneid && zoneid != ALL_ZONES) { 12000 /* 12001 * Multicast packet on the loopback interface: we only match 12002 * conns who joined the group in the specified zone. 12003 */ 12004 return (B_FALSE); 12005 } 12006 12007 mutex_enter(&connp->conn_lock); 12008 wantpacket = 12009 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 12010 mutex_exit(&connp->conn_lock); 12011 12012 return (wantpacket); 12013 } 12014 12015 12016 /* 12017 * Transmit a packet and update any NUD state based on the flags 12018 * XXX need to "recover" any ip6i_t when doing putq! 12019 * 12020 * NOTE : This function does not ire_refrele the ire passed in as the 12021 * argument. 12022 */ 12023 void 12024 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 12025 int caller, ipsec_out_t *io) 12026 { 12027 mblk_t *mp1; 12028 nce_t *nce = ire->ire_nce; 12029 ill_t *ill; 12030 ill_t *out_ill; 12031 uint64_t delta; 12032 ip6_t *ip6h; 12033 queue_t *stq = ire->ire_stq; 12034 ire_t *ire1 = NULL; 12035 ire_t *save_ire = ire; 12036 boolean_t multirt_send = B_FALSE; 12037 mblk_t *next_mp = NULL; 12038 12039 ip6h = (ip6_t *)mp->b_rptr; 12040 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 12041 ASSERT(ire->ire_ipversion == IPV6_VERSION); 12042 ASSERT(nce != NULL); 12043 ASSERT(mp->b_datap->db_type == M_DATA); 12044 ASSERT(stq != NULL); 12045 12046 ill = ire_to_ill(ire); 12047 if (!ill) { 12048 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 12049 freemsg(mp); 12050 return; 12051 } 12052 12053 /* 12054 * If a packet is to be sent out an interface that is a 6to4 12055 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 12056 * destination, must be checked to have a 6to4 prefix 12057 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 12058 * address configured on the sending interface. Otherwise, 12059 * the packet was delivered to this interface in error and the 12060 * packet must be dropped. 12061 */ 12062 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 12063 ipif_t *ipif = ill->ill_ipif; 12064 12065 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 12066 &ip6h->ip6_dst)) { 12067 if (ip_debug > 2) { 12068 /* ip1dbg */ 12069 pr_addr_dbg("ip_xmit_v6: attempting to " 12070 "send 6to4 addressed IPv6 " 12071 "destination (%s) out the wrong " 12072 "interface.\n", AF_INET6, 12073 &ip6h->ip6_dst); 12074 } 12075 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12076 freemsg(mp); 12077 return; 12078 } 12079 } 12080 12081 /* Flow-control check has been done in ip_wput_ire_v6 */ 12082 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 12083 caller == IP_WSRV || canput(stq->q_next)) { 12084 uint32_t ill_index; 12085 12086 /* 12087 * In most cases, the emission loop below is entered only 12088 * once. Only in the case where the ire holds the 12089 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 12090 * flagged ires in the bucket, and send the packet 12091 * through all crossed RTF_MULTIRT routes. 12092 */ 12093 if (ire->ire_flags & RTF_MULTIRT) { 12094 /* 12095 * Multirouting case. The bucket where ire is stored 12096 * probably holds other RTF_MULTIRT flagged ires 12097 * to the destination. In this call to ip_xmit_v6, 12098 * we attempt to send the packet through all 12099 * those ires. Thus, we first ensure that ire is the 12100 * first RTF_MULTIRT ire in the bucket, 12101 * before walking the ire list. 12102 */ 12103 ire_t *first_ire; 12104 irb_t *irb = ire->ire_bucket; 12105 ASSERT(irb != NULL); 12106 multirt_send = B_TRUE; 12107 12108 /* Make sure we do not omit any multiroute ire. */ 12109 IRB_REFHOLD(irb); 12110 for (first_ire = irb->irb_ire; 12111 first_ire != NULL; 12112 first_ire = first_ire->ire_next) { 12113 if ((first_ire->ire_flags & RTF_MULTIRT) && 12114 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12115 &ire->ire_addr_v6)) && 12116 !(first_ire->ire_marks & 12117 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12118 break; 12119 } 12120 12121 if ((first_ire != NULL) && (first_ire != ire)) { 12122 IRE_REFHOLD(first_ire); 12123 /* ire will be released by the caller */ 12124 ire = first_ire; 12125 nce = ire->ire_nce; 12126 stq = ire->ire_stq; 12127 ill = ire_to_ill(ire); 12128 } 12129 IRB_REFRELE(irb); 12130 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12131 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12132 ILL_MDT_USABLE(ill)) { 12133 /* 12134 * This tcp connection was marked as MDT-capable, but 12135 * it has been turned off due changes in the interface. 12136 * Now that the interface support is back, turn it on 12137 * by notifying tcp. We don't directly modify tcp_mdt, 12138 * since we leave all the details to the tcp code that 12139 * knows better. 12140 */ 12141 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12142 12143 if (mdimp == NULL) { 12144 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12145 "connp %p (ENOMEM)\n", (void *)connp)); 12146 } else { 12147 CONN_INC_REF(connp); 12148 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 12149 connp, SQTAG_TCP_INPUT_MCTL); 12150 } 12151 } 12152 12153 do { 12154 mblk_t *mp_ip6h; 12155 12156 if (multirt_send) { 12157 irb_t *irb; 12158 /* 12159 * We are in a multiple send case, need to get 12160 * the next ire and make a duplicate of the 12161 * packet. ire1 holds here the next ire to 12162 * process in the bucket. If multirouting is 12163 * expected, any non-RTF_MULTIRT ire that has 12164 * the right destination address is ignored. 12165 */ 12166 irb = ire->ire_bucket; 12167 ASSERT(irb != NULL); 12168 12169 IRB_REFHOLD(irb); 12170 for (ire1 = ire->ire_next; 12171 ire1 != NULL; 12172 ire1 = ire1->ire_next) { 12173 if (!(ire1->ire_flags & RTF_MULTIRT)) 12174 continue; 12175 if (!IN6_ARE_ADDR_EQUAL( 12176 &ire1->ire_addr_v6, 12177 &ire->ire_addr_v6)) 12178 continue; 12179 if (ire1->ire_marks & 12180 (IRE_MARK_CONDEMNED| 12181 IRE_MARK_HIDDEN)) 12182 continue; 12183 12184 /* Got one */ 12185 if (ire1 != save_ire) { 12186 IRE_REFHOLD(ire1); 12187 } 12188 break; 12189 } 12190 IRB_REFRELE(irb); 12191 12192 if (ire1 != NULL) { 12193 next_mp = copyb(mp); 12194 if ((next_mp == NULL) || 12195 ((mp->b_cont != NULL) && 12196 ((next_mp->b_cont = 12197 dupmsg(mp->b_cont)) == 12198 NULL))) { 12199 freemsg(next_mp); 12200 next_mp = NULL; 12201 ire_refrele(ire1); 12202 ire1 = NULL; 12203 } 12204 } 12205 12206 /* Last multiroute ire; don't loop anymore. */ 12207 if (ire1 == NULL) { 12208 multirt_send = B_FALSE; 12209 } 12210 } 12211 12212 ill_index = 12213 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12214 12215 /* Initiate IPPF processing */ 12216 if (IP6_OUT_IPP(flags)) { 12217 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12218 if (mp == NULL) { 12219 BUMP_MIB(ill->ill_ip_mib, 12220 ipIfStatsOutDiscards); 12221 if (next_mp != NULL) 12222 freemsg(next_mp); 12223 if (ire != save_ire) { 12224 ire_refrele(ire); 12225 } 12226 return; 12227 } 12228 ip6h = (ip6_t *)mp->b_rptr; 12229 } 12230 mp_ip6h = mp; 12231 12232 /* 12233 * Check for fastpath, we need to hold nce_lock to 12234 * prevent fastpath update from chaining nce_fp_mp. 12235 */ 12236 12237 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12238 mutex_enter(&nce->nce_lock); 12239 if ((mp1 = nce->nce_fp_mp) != NULL) { 12240 uint32_t hlen; 12241 uchar_t *rptr; 12242 12243 hlen = MBLKL(mp1); 12244 rptr = mp->b_rptr - hlen; 12245 /* 12246 * make sure there is room for the fastpath 12247 * datalink header 12248 */ 12249 if (rptr < mp->b_datap->db_base) { 12250 mp1 = copyb(mp1); 12251 mutex_exit(&nce->nce_lock); 12252 if (mp1 == NULL) { 12253 BUMP_MIB(ill->ill_ip_mib, 12254 ipIfStatsOutDiscards); 12255 freemsg(mp); 12256 if (next_mp != NULL) 12257 freemsg(next_mp); 12258 if (ire != save_ire) { 12259 ire_refrele(ire); 12260 } 12261 return; 12262 } 12263 mp1->b_cont = mp; 12264 12265 /* Get the priority marking, if any */ 12266 mp1->b_band = mp->b_band; 12267 mp = mp1; 12268 } else { 12269 mp->b_rptr = rptr; 12270 /* 12271 * fastpath - pre-pend datalink 12272 * header 12273 */ 12274 bcopy(mp1->b_rptr, rptr, hlen); 12275 mutex_exit(&nce->nce_lock); 12276 } 12277 } else { 12278 /* 12279 * Get the DL_UNITDATA_REQ. 12280 */ 12281 mp1 = nce->nce_res_mp; 12282 if (mp1 == NULL) { 12283 mutex_exit(&nce->nce_lock); 12284 ip1dbg(("ip_xmit_v6: No resolution " 12285 "block ire = %p\n", (void *)ire)); 12286 freemsg(mp); 12287 if (next_mp != NULL) 12288 freemsg(next_mp); 12289 if (ire != save_ire) { 12290 ire_refrele(ire); 12291 } 12292 return; 12293 } 12294 /* 12295 * Prepend the DL_UNITDATA_REQ. 12296 */ 12297 mp1 = copyb(mp1); 12298 mutex_exit(&nce->nce_lock); 12299 if (mp1 == NULL) { 12300 BUMP_MIB(ill->ill_ip_mib, 12301 ipIfStatsOutDiscards); 12302 freemsg(mp); 12303 if (next_mp != NULL) 12304 freemsg(next_mp); 12305 if (ire != save_ire) { 12306 ire_refrele(ire); 12307 } 12308 return; 12309 } 12310 mp1->b_cont = mp; 12311 12312 /* Get the priority marking, if any */ 12313 mp1->b_band = mp->b_band; 12314 mp = mp1; 12315 } 12316 12317 out_ill = (ill_t *)stq->q_ptr; 12318 12319 DTRACE_PROBE4(ip6__physical__out__start, 12320 ill_t *, NULL, ill_t *, out_ill, 12321 ip6_t *, ip6h, mblk_t *, mp); 12322 12323 FW_HOOKS6(ip6_physical_out_event, 12324 ipv6firewall_physical_out, 12325 NULL, out_ill, ip6h, mp, mp_ip6h); 12326 12327 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12328 12329 if (mp == NULL) { 12330 if (multirt_send) { 12331 ASSERT(ire1 != NULL); 12332 if (ire != save_ire) { 12333 ire_refrele(ire); 12334 } 12335 /* 12336 * Proceed with the next RTF_MULTIRT 12337 * ire, also set up the send-to queue 12338 * accordingly. 12339 */ 12340 ire = ire1; 12341 ire1 = NULL; 12342 stq = ire->ire_stq; 12343 nce = ire->ire_nce; 12344 ill = ire_to_ill(ire); 12345 mp = next_mp; 12346 next_mp = NULL; 12347 continue; 12348 } else { 12349 ASSERT(next_mp == NULL); 12350 ASSERT(ire1 == NULL); 12351 break; 12352 } 12353 } 12354 12355 /* 12356 * Update ire and MIB counters; for save_ire, this has 12357 * been done by the caller. 12358 */ 12359 if (ire != save_ire) { 12360 UPDATE_OB_PKT_COUNT(ire); 12361 ire->ire_last_used_time = lbolt; 12362 12363 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12364 BUMP_MIB(ill->ill_ip_mib, 12365 ipIfStatsHCOutMcastPkts); 12366 UPDATE_MIB(ill->ill_ip_mib, 12367 ipIfStatsHCOutMcastOctets, 12368 ntohs(ip6h->ip6_plen) + 12369 IPV6_HDR_LEN); 12370 } 12371 } 12372 12373 /* 12374 * Send it down. XXX Do we want to flow control AH/ESP 12375 * packets that carry TCP payloads? We don't flow 12376 * control TCP packets, but we should also not 12377 * flow-control TCP packets that have been protected. 12378 * We don't have an easy way to find out if an AH/ESP 12379 * packet was originally TCP or not currently. 12380 */ 12381 if (io == NULL) { 12382 BUMP_MIB(ill->ill_ip_mib, 12383 ipIfStatsHCOutTransmits); 12384 UPDATE_MIB(ill->ill_ip_mib, 12385 ipIfStatsHCOutOctets, 12386 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12387 putnext(stq, mp); 12388 } else { 12389 /* 12390 * Safety Pup says: make sure this is 12391 * going to the right interface! 12392 */ 12393 if (io->ipsec_out_capab_ill_index != 12394 ill_index) { 12395 /* IPsec kstats: bump lose counter */ 12396 freemsg(mp1); 12397 } else { 12398 BUMP_MIB(ill->ill_ip_mib, 12399 ipIfStatsHCOutTransmits); 12400 UPDATE_MIB(ill->ill_ip_mib, 12401 ipIfStatsHCOutOctets, 12402 ntohs(ip6h->ip6_plen) + 12403 IPV6_HDR_LEN); 12404 ipsec_hw_putnext(stq, mp); 12405 } 12406 } 12407 12408 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12409 if (ire != save_ire) { 12410 ire_refrele(ire); 12411 } 12412 if (multirt_send) { 12413 ASSERT(ire1 != NULL); 12414 /* 12415 * Proceed with the next RTF_MULTIRT 12416 * ire, also set up the send-to queue 12417 * accordingly. 12418 */ 12419 ire = ire1; 12420 ire1 = NULL; 12421 stq = ire->ire_stq; 12422 nce = ire->ire_nce; 12423 ill = ire_to_ill(ire); 12424 mp = next_mp; 12425 next_mp = NULL; 12426 continue; 12427 } 12428 ASSERT(next_mp == NULL); 12429 ASSERT(ire1 == NULL); 12430 return; 12431 } 12432 12433 ASSERT(nce->nce_state != ND_INCOMPLETE); 12434 12435 /* 12436 * Check for upper layer advice 12437 */ 12438 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12439 /* 12440 * It should be o.k. to check the state without 12441 * a lock here, at most we lose an advice. 12442 */ 12443 nce->nce_last = TICK_TO_MSEC(lbolt64); 12444 if (nce->nce_state != ND_REACHABLE) { 12445 12446 mutex_enter(&nce->nce_lock); 12447 nce->nce_state = ND_REACHABLE; 12448 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12449 mutex_exit(&nce->nce_lock); 12450 (void) untimeout(nce->nce_timeout_id); 12451 if (ip_debug > 2) { 12452 /* ip1dbg */ 12453 pr_addr_dbg("ip_xmit_v6: state" 12454 " for %s changed to" 12455 " REACHABLE\n", AF_INET6, 12456 &ire->ire_addr_v6); 12457 } 12458 } 12459 if (ire != save_ire) { 12460 ire_refrele(ire); 12461 } 12462 if (multirt_send) { 12463 ASSERT(ire1 != NULL); 12464 /* 12465 * Proceed with the next RTF_MULTIRT 12466 * ire, also set up the send-to queue 12467 * accordingly. 12468 */ 12469 ire = ire1; 12470 ire1 = NULL; 12471 stq = ire->ire_stq; 12472 nce = ire->ire_nce; 12473 ill = ire_to_ill(ire); 12474 mp = next_mp; 12475 next_mp = NULL; 12476 continue; 12477 } 12478 ASSERT(next_mp == NULL); 12479 ASSERT(ire1 == NULL); 12480 return; 12481 } 12482 12483 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12484 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12485 " ill_reachable_time = %d \n", delta, 12486 ill->ill_reachable_time)); 12487 if (delta > (uint64_t)ill->ill_reachable_time) { 12488 nce = ire->ire_nce; 12489 mutex_enter(&nce->nce_lock); 12490 switch (nce->nce_state) { 12491 case ND_REACHABLE: 12492 case ND_STALE: 12493 /* 12494 * ND_REACHABLE is identical to 12495 * ND_STALE in this specific case. If 12496 * reachable time has expired for this 12497 * neighbor (delta is greater than 12498 * reachable time), conceptually, the 12499 * neighbor cache is no longer in 12500 * REACHABLE state, but already in 12501 * STALE state. So the correct 12502 * transition here is to ND_DELAY. 12503 */ 12504 nce->nce_state = ND_DELAY; 12505 mutex_exit(&nce->nce_lock); 12506 NDP_RESTART_TIMER(nce, 12507 delay_first_probe_time); 12508 if (ip_debug > 3) { 12509 /* ip2dbg */ 12510 pr_addr_dbg("ip_xmit_v6: state" 12511 " for %s changed to" 12512 " DELAY\n", AF_INET6, 12513 &ire->ire_addr_v6); 12514 } 12515 break; 12516 case ND_DELAY: 12517 case ND_PROBE: 12518 mutex_exit(&nce->nce_lock); 12519 /* Timers have already started */ 12520 break; 12521 case ND_UNREACHABLE: 12522 /* 12523 * ndp timer has detected that this nce 12524 * is unreachable and initiated deleting 12525 * this nce and all its associated IREs. 12526 * This is a race where we found the 12527 * ire before it was deleted and have 12528 * just sent out a packet using this 12529 * unreachable nce. 12530 */ 12531 mutex_exit(&nce->nce_lock); 12532 break; 12533 default: 12534 ASSERT(0); 12535 } 12536 } 12537 12538 if (multirt_send) { 12539 ASSERT(ire1 != NULL); 12540 /* 12541 * Proceed with the next RTF_MULTIRT ire, 12542 * Also set up the send-to queue accordingly. 12543 */ 12544 if (ire != save_ire) { 12545 ire_refrele(ire); 12546 } 12547 ire = ire1; 12548 ire1 = NULL; 12549 stq = ire->ire_stq; 12550 nce = ire->ire_nce; 12551 ill = ire_to_ill(ire); 12552 mp = next_mp; 12553 next_mp = NULL; 12554 } 12555 } while (multirt_send); 12556 /* 12557 * In the multirouting case, release the last ire used for 12558 * emission. save_ire will be released by the caller. 12559 */ 12560 if (ire != save_ire) { 12561 ire_refrele(ire); 12562 } 12563 } else { 12564 /* 12565 * Queue packet if we have an conn to give back pressure. 12566 * We can't queue packets intended for hardware acceleration 12567 * since we've tossed that state already. If the packet is 12568 * being fed back from ire_send_v6, we don't know the 12569 * position in the queue to enqueue the packet and we discard 12570 * the packet. 12571 */ 12572 if (ip_output_queue && (connp != NULL) && (io == NULL) && 12573 (caller != IRE_SEND)) { 12574 if (caller == IP_WSRV) { 12575 connp->conn_did_putbq = 1; 12576 (void) putbq(connp->conn_wq, mp); 12577 conn_drain_insert(connp); 12578 /* 12579 * caller == IP_WSRV implies we are 12580 * the service thread, and the 12581 * queue is already noenabled. 12582 * The check for canput and 12583 * the putbq is not atomic. 12584 * So we need to check again. 12585 */ 12586 if (canput(stq->q_next)) 12587 connp->conn_did_putbq = 0; 12588 } else { 12589 (void) putq(connp->conn_wq, mp); 12590 } 12591 return; 12592 } 12593 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12594 freemsg(mp); 12595 return; 12596 } 12597 } 12598 12599 /* 12600 * pr_addr_dbg function provides the needed buffer space to call 12601 * inet_ntop() function's 3rd argument. This function should be 12602 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12603 * stack buffer space in it's own stack frame. This function uses 12604 * a buffer from it's own stack and prints the information. 12605 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12606 * 12607 * Note: This function can call inet_ntop() once. 12608 */ 12609 void 12610 pr_addr_dbg(char *fmt1, int af, const void *addr) 12611 { 12612 char buf[INET6_ADDRSTRLEN]; 12613 12614 if (fmt1 == NULL) { 12615 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12616 return; 12617 } 12618 12619 /* 12620 * This does not compare debug level and just prints 12621 * out. Thus it is the responsibility of the caller 12622 * to check the appropriate debug-level before calling 12623 * this function. 12624 */ 12625 if (ip_debug > 0) { 12626 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12627 } 12628 12629 12630 } 12631 12632 12633 /* 12634 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12635 * if needed and extension headers) that will be needed based on the 12636 * ip6_pkt_t structure passed by the caller. 12637 * 12638 * The returned length does not include the length of the upper level 12639 * protocol (ULP) header. 12640 */ 12641 int 12642 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12643 { 12644 int len; 12645 12646 len = IPV6_HDR_LEN; 12647 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12648 len += sizeof (ip6i_t); 12649 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12650 ASSERT(ipp->ipp_hopoptslen != 0); 12651 len += ipp->ipp_hopoptslen; 12652 } 12653 if (ipp->ipp_fields & IPPF_RTHDR) { 12654 ASSERT(ipp->ipp_rthdrlen != 0); 12655 len += ipp->ipp_rthdrlen; 12656 } 12657 /* 12658 * En-route destination options 12659 * Only do them if there's a routing header as well 12660 */ 12661 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12662 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12663 ASSERT(ipp->ipp_rtdstoptslen != 0); 12664 len += ipp->ipp_rtdstoptslen; 12665 } 12666 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12667 ASSERT(ipp->ipp_dstoptslen != 0); 12668 len += ipp->ipp_dstoptslen; 12669 } 12670 return (len); 12671 } 12672 12673 /* 12674 * All-purpose routine to build a header chain of an IPv6 header 12675 * followed by any required extension headers and a proto header, 12676 * preceeded (where necessary) by an ip6i_t private header. 12677 * 12678 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12679 * will be filled in appropriately. 12680 * Thus the caller must fill in the rest of the IPv6 header, such as 12681 * traffic class/flowid, source address (if not set here), hoplimit (if not 12682 * set here) and destination address. 12683 * 12684 * The extension headers and ip6i_t header will all be fully filled in. 12685 */ 12686 void 12687 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12688 ip6_pkt_t *ipp, uint8_t protocol) 12689 { 12690 uint8_t *nxthdr_ptr; 12691 uint8_t *cp; 12692 ip6i_t *ip6i; 12693 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12694 12695 /* 12696 * If sending private ip6i_t header down (checksum info, nexthop, 12697 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12698 * then fill it in. (The checksum info will be filled in by icmp). 12699 */ 12700 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12701 ip6i = (ip6i_t *)ip6h; 12702 ip6h = (ip6_t *)&ip6i[1]; 12703 12704 ip6i->ip6i_flags = 0; 12705 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12706 if (ipp->ipp_fields & IPPF_IFINDEX || 12707 ipp->ipp_fields & IPPF_SCOPE_ID) { 12708 ASSERT(ipp->ipp_ifindex != 0); 12709 ip6i->ip6i_flags |= IP6I_IFINDEX; 12710 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12711 } 12712 if (ipp->ipp_fields & IPPF_ADDR) { 12713 /* 12714 * Enable per-packet source address verification if 12715 * IPV6_PKTINFO specified the source address. 12716 * ip6_src is set in the transport's _wput function. 12717 */ 12718 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12719 &ipp->ipp_addr)); 12720 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12721 } 12722 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12723 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12724 /* 12725 * We need to set this flag so that IP doesn't 12726 * rewrite the IPv6 header's hoplimit with the 12727 * current default value. 12728 */ 12729 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12730 } 12731 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12732 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12733 &ipp->ipp_nexthop)); 12734 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12735 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12736 } 12737 /* 12738 * tell IP this is an ip6i_t private header 12739 */ 12740 ip6i->ip6i_nxt = IPPROTO_RAW; 12741 } 12742 /* Initialize IPv6 header */ 12743 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12744 if (ipp->ipp_fields & IPPF_TCLASS) { 12745 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12746 (ipp->ipp_tclass << 20); 12747 } 12748 if (ipp->ipp_fields & IPPF_ADDR) 12749 ip6h->ip6_src = ipp->ipp_addr; 12750 12751 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12752 cp = (uint8_t *)&ip6h[1]; 12753 /* 12754 * Here's where we have to start stringing together 12755 * any extension headers in the right order: 12756 * Hop-by-hop, destination, routing, and final destination opts. 12757 */ 12758 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12759 /* Hop-by-hop options */ 12760 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12761 12762 *nxthdr_ptr = IPPROTO_HOPOPTS; 12763 nxthdr_ptr = &hbh->ip6h_nxt; 12764 12765 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12766 cp += ipp->ipp_hopoptslen; 12767 } 12768 /* 12769 * En-route destination options 12770 * Only do them if there's a routing header as well 12771 */ 12772 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12773 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12774 ip6_dest_t *dst = (ip6_dest_t *)cp; 12775 12776 *nxthdr_ptr = IPPROTO_DSTOPTS; 12777 nxthdr_ptr = &dst->ip6d_nxt; 12778 12779 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12780 cp += ipp->ipp_rtdstoptslen; 12781 } 12782 /* 12783 * Routing header next 12784 */ 12785 if (ipp->ipp_fields & IPPF_RTHDR) { 12786 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12787 12788 *nxthdr_ptr = IPPROTO_ROUTING; 12789 nxthdr_ptr = &rt->ip6r_nxt; 12790 12791 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12792 cp += ipp->ipp_rthdrlen; 12793 } 12794 /* 12795 * Do ultimate destination options 12796 */ 12797 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12798 ip6_dest_t *dest = (ip6_dest_t *)cp; 12799 12800 *nxthdr_ptr = IPPROTO_DSTOPTS; 12801 nxthdr_ptr = &dest->ip6d_nxt; 12802 12803 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12804 cp += ipp->ipp_dstoptslen; 12805 } 12806 /* 12807 * Now set the last header pointer to the proto passed in 12808 */ 12809 *nxthdr_ptr = protocol; 12810 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12811 } 12812 12813 /* 12814 * Return a pointer to the routing header extension header 12815 * in the IPv6 header(s) chain passed in. 12816 * If none found, return NULL 12817 * Assumes that all extension headers are in same mblk as the v6 header 12818 */ 12819 ip6_rthdr_t * 12820 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12821 { 12822 ip6_dest_t *desthdr; 12823 ip6_frag_t *fraghdr; 12824 uint_t hdrlen; 12825 uint8_t nexthdr; 12826 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12827 12828 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12829 return ((ip6_rthdr_t *)ptr); 12830 12831 /* 12832 * The routing header will precede all extension headers 12833 * other than the hop-by-hop and destination options 12834 * extension headers, so if we see anything other than those, 12835 * we're done and didn't find it. 12836 * We could see a destination options header alone but no 12837 * routing header, in which case we'll return NULL as soon as 12838 * we see anything after that. 12839 * Hop-by-hop and destination option headers are identical, 12840 * so we can use either one we want as a template. 12841 */ 12842 nexthdr = ip6h->ip6_nxt; 12843 while (ptr < endptr) { 12844 /* Is there enough left for len + nexthdr? */ 12845 if (ptr + MIN_EHDR_LEN > endptr) 12846 return (NULL); 12847 12848 switch (nexthdr) { 12849 case IPPROTO_HOPOPTS: 12850 case IPPROTO_DSTOPTS: 12851 /* Assumes the headers are identical for hbh and dst */ 12852 desthdr = (ip6_dest_t *)ptr; 12853 hdrlen = 8 * (desthdr->ip6d_len + 1); 12854 nexthdr = desthdr->ip6d_nxt; 12855 break; 12856 12857 case IPPROTO_ROUTING: 12858 return ((ip6_rthdr_t *)ptr); 12859 12860 case IPPROTO_FRAGMENT: 12861 fraghdr = (ip6_frag_t *)ptr; 12862 hdrlen = sizeof (ip6_frag_t); 12863 nexthdr = fraghdr->ip6f_nxt; 12864 break; 12865 12866 default: 12867 return (NULL); 12868 } 12869 ptr += hdrlen; 12870 } 12871 return (NULL); 12872 } 12873 12874 /* 12875 * Called for source-routed packets originating on this node. 12876 * Manipulates the original routing header by moving every entry up 12877 * one slot, placing the first entry in the v6 header's v6_dst field, 12878 * and placing the ultimate destination in the routing header's last 12879 * slot. 12880 * 12881 * Returns the checksum diference between the ultimate destination 12882 * (last hop in the routing header when the packet is sent) and 12883 * the first hop (ip6_dst when the packet is sent) 12884 */ 12885 uint32_t 12886 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 12887 { 12888 uint_t numaddr; 12889 uint_t i; 12890 in6_addr_t *addrptr; 12891 in6_addr_t tmp; 12892 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12893 uint32_t cksm; 12894 uint32_t addrsum = 0; 12895 uint16_t *ptr; 12896 12897 /* 12898 * Perform any processing needed for source routing. 12899 * We know that all extension headers will be in the same mblk 12900 * as the IPv6 header. 12901 */ 12902 12903 /* 12904 * If no segments left in header, or the header length field is zero, 12905 * don't move hop addresses around; 12906 * Checksum difference is zero. 12907 */ 12908 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12909 return (0); 12910 12911 ptr = (uint16_t *)&ip6h->ip6_dst; 12912 cksm = 0; 12913 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12914 cksm += ptr[i]; 12915 } 12916 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12917 12918 /* 12919 * Here's where the fun begins - we have to 12920 * move all addresses up one spot, take the 12921 * first hop and make it our first ip6_dst, 12922 * and place the ultimate destination in the 12923 * newly-opened last slot. 12924 */ 12925 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12926 numaddr = rthdr->ip6r0_len / 2; 12927 tmp = *addrptr; 12928 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12929 *addrptr = addrptr[1]; 12930 } 12931 *addrptr = ip6h->ip6_dst; 12932 ip6h->ip6_dst = tmp; 12933 12934 /* 12935 * From the checksummed ultimate destination subtract the checksummed 12936 * current ip6_dst (the first hop address). Return that number. 12937 * (In the v4 case, the second part of this is done in each routine 12938 * that calls ip_massage_options(). We do it all in this one place 12939 * for v6). 12940 */ 12941 ptr = (uint16_t *)&ip6h->ip6_dst; 12942 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12943 addrsum += ptr[i]; 12944 } 12945 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12946 if ((int)cksm < 0) 12947 cksm--; 12948 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12949 12950 return (cksm); 12951 } 12952 12953 /* 12954 * See if the upper-level protocol indicated by 'proto' will be able 12955 * to do something with an ICMP_FRAGMENTATION_NEEDED (IPv4) or 12956 * ICMP6_PACKET_TOO_BIG (IPv6). 12957 */ 12958 static boolean_t 12959 ip_ulp_cando_pkt2big(int proto) 12960 { 12961 /* 12962 * For now, only TCP can handle this. 12963 * Tunnels may be able to also, but since tun isn't working over 12964 * IPv6 yet, don't worry about it for now. 12965 */ 12966 return (proto == IPPROTO_TCP); 12967 } 12968 12969 12970 /* 12971 * Propagate a multicast group membership operation (join/leave) (*fn) on 12972 * all interfaces crossed by the related multirt routes. 12973 * The call is considered successful if the operation succeeds 12974 * on at least one interface. 12975 * The function is called if the destination address in the packet to send 12976 * is multirouted. 12977 */ 12978 int 12979 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12980 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12981 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12982 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12983 { 12984 ire_t *ire_gw; 12985 irb_t *irb; 12986 int index, error = 0; 12987 opt_restart_t *or; 12988 12989 irb = ire->ire_bucket; 12990 ASSERT(irb != NULL); 12991 12992 ASSERT(DB_TYPE(first_mp) == M_CTL); 12993 or = (opt_restart_t *)first_mp->b_rptr; 12994 12995 IRB_REFHOLD(irb); 12996 for (; ire != NULL; ire = ire->ire_next) { 12997 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12998 continue; 12999 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 13000 continue; 13001 13002 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 13003 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 13004 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 13005 /* No resolver exists for the gateway; skip this ire. */ 13006 if (ire_gw == NULL) 13007 continue; 13008 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 13009 /* 13010 * A resolver exists: we can get the interface on which we have 13011 * to apply the operation. 13012 */ 13013 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 13014 first_mp); 13015 if (error == 0) 13016 or->or_private = CGTP_MCAST_SUCCESS; 13017 13018 if (ip_debug > 0) { 13019 ulong_t off; 13020 char *ksym; 13021 13022 ksym = kobj_getsymname((uintptr_t)fn, &off); 13023 ip2dbg(("ip_multirt_apply_membership_v6: " 13024 "called %s, multirt group 0x%08x via itf 0x%08x, " 13025 "error %d [success %u]\n", 13026 ksym ? ksym : "?", 13027 ntohl(V4_PART_OF_V6((*v6grp))), 13028 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 13029 error, or->or_private)); 13030 } 13031 13032 ire_refrele(ire_gw); 13033 if (error == EINPROGRESS) { 13034 IRB_REFRELE(irb); 13035 return (error); 13036 } 13037 } 13038 IRB_REFRELE(irb); 13039 /* 13040 * Consider the call as successful if we succeeded on at least 13041 * one interface. Otherwise, return the last encountered error. 13042 */ 13043 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 13044 } 13045 13046 void 13047 ip6_kstat_init(void) 13048 { 13049 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 13050 "net", KSTAT_TYPE_NAMED, 13051 sizeof (ip6_statistics) / sizeof (kstat_named_t), 13052 KSTAT_FLAG_VIRTUAL)) != NULL) { 13053 ip6_kstat->ks_data = &ip6_statistics; 13054 kstat_install(ip6_kstat); 13055 } 13056 } 13057 13058 /* 13059 * The following two functions set and get the value for the 13060 * IPV6_SRC_PREFERENCES socket option. 13061 */ 13062 int 13063 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 13064 { 13065 /* 13066 * We only support preferences that are covered by 13067 * IPV6_PREFER_SRC_MASK. 13068 */ 13069 if (prefs & ~IPV6_PREFER_SRC_MASK) 13070 return (EINVAL); 13071 13072 /* 13073 * Look for conflicting preferences or default preferences. If 13074 * both bits of a related pair are clear, the application wants the 13075 * system's default value for that pair. Both bits in a pair can't 13076 * be set. 13077 */ 13078 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13079 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13080 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13081 IPV6_PREFER_SRC_MIPMASK) { 13082 return (EINVAL); 13083 } 13084 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13085 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13086 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13087 IPV6_PREFER_SRC_TMPMASK) { 13088 return (EINVAL); 13089 } 13090 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13091 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13092 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13093 IPV6_PREFER_SRC_CGAMASK) { 13094 return (EINVAL); 13095 } 13096 13097 connp->conn_src_preferences = prefs; 13098 return (0); 13099 } 13100 13101 size_t 13102 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13103 { 13104 *val = connp->conn_src_preferences; 13105 return (sizeof (connp->conn_src_preferences)); 13106 } 13107 13108 int 13109 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13110 { 13111 ill_t *ill; 13112 ire_t *ire; 13113 int error; 13114 13115 /* 13116 * Verify the source address and ifindex. Privileged users can use 13117 * any source address. For ancillary data the source address is 13118 * checked in ip_wput_v6. 13119 */ 13120 if (pkti->ipi6_ifindex != 0) { 13121 ASSERT(connp != NULL); 13122 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13123 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 13124 if (ill == NULL) { 13125 /* 13126 * We just want to know if the interface exists, we 13127 * don't really care about the ill pointer itself. 13128 */ 13129 if (error != EINPROGRESS) 13130 return (error); 13131 error = 0; /* Ensure we don't use it below */ 13132 } else { 13133 ill_refrele(ill); 13134 } 13135 } 13136 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13137 secpolicy_net_rawaccess(cr) != 0) { 13138 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13139 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13140 connp->conn_zoneid, NULL, MATCH_IRE_TYPE); 13141 if (ire != NULL) 13142 ire_refrele(ire); 13143 else 13144 return (ENXIO); 13145 } 13146 return (0); 13147 } 13148 13149 /* 13150 * Get the size of the IP options (including the IP headers size) 13151 * without including the AH header's size. If till_ah is B_FALSE, 13152 * and if AH header is present, dest options beyond AH header will 13153 * also be included in the returned size. 13154 */ 13155 int 13156 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13157 { 13158 ip6_t *ip6h; 13159 uint8_t nexthdr; 13160 uint8_t *whereptr; 13161 ip6_hbh_t *hbhhdr; 13162 ip6_dest_t *dsthdr; 13163 ip6_rthdr_t *rthdr; 13164 int ehdrlen; 13165 int size; 13166 ah_t *ah; 13167 13168 ip6h = (ip6_t *)mp->b_rptr; 13169 size = IPV6_HDR_LEN; 13170 nexthdr = ip6h->ip6_nxt; 13171 whereptr = (uint8_t *)&ip6h[1]; 13172 for (;;) { 13173 /* Assume IP has already stripped it */ 13174 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13175 switch (nexthdr) { 13176 case IPPROTO_HOPOPTS: 13177 hbhhdr = (ip6_hbh_t *)whereptr; 13178 nexthdr = hbhhdr->ip6h_nxt; 13179 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13180 break; 13181 case IPPROTO_DSTOPTS: 13182 dsthdr = (ip6_dest_t *)whereptr; 13183 nexthdr = dsthdr->ip6d_nxt; 13184 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13185 break; 13186 case IPPROTO_ROUTING: 13187 rthdr = (ip6_rthdr_t *)whereptr; 13188 nexthdr = rthdr->ip6r_nxt; 13189 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13190 break; 13191 default : 13192 if (till_ah) { 13193 ASSERT(nexthdr == IPPROTO_AH); 13194 return (size); 13195 } 13196 /* 13197 * If we don't have a AH header to traverse, 13198 * return now. This happens normally for 13199 * outbound datagrams where we have not inserted 13200 * the AH header. 13201 */ 13202 if (nexthdr != IPPROTO_AH) { 13203 return (size); 13204 } 13205 13206 /* 13207 * We don't include the AH header's size 13208 * to be symmetrical with other cases where 13209 * we either don't have a AH header (outbound) 13210 * or peek into the AH header yet (inbound and 13211 * not pulled up yet). 13212 */ 13213 ah = (ah_t *)whereptr; 13214 nexthdr = ah->ah_nexthdr; 13215 ehdrlen = (ah->ah_length << 2) + 8; 13216 13217 if (nexthdr == IPPROTO_DSTOPTS) { 13218 if (whereptr + ehdrlen >= mp->b_wptr) { 13219 /* 13220 * The destination options header 13221 * is not part of the first mblk. 13222 */ 13223 whereptr = mp->b_cont->b_rptr; 13224 } else { 13225 whereptr += ehdrlen; 13226 } 13227 13228 dsthdr = (ip6_dest_t *)whereptr; 13229 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13230 size += ehdrlen; 13231 } 13232 return (size); 13233 } 13234 whereptr += ehdrlen; 13235 size += ehdrlen; 13236 } 13237 } 13238