1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/sdt.h> 46 #include <sys/kobj.h> 47 #include <sys/zone.h> 48 #include <sys/neti.h> 49 #include <sys/hook.h> 50 51 #include <sys/kmem.h> 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/atomic.h> 58 #include <sys/iphada.h> 59 #include <sys/policy.h> 60 #include <net/if.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/if_dl.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 #include <netinet/sctp.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/arp.h> 75 76 #include <inet/ip.h> 77 #include <inet/ip_impl.h> 78 #include <inet/ip6.h> 79 #include <inet/ip6_asp.h> 80 #include <inet/tcp.h> 81 #include <inet/tcp_impl.h> 82 #include <inet/udp_impl.h> 83 #include <inet/ipp_common.h> 84 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_rts.h> 89 #include <inet/optcom.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/tun.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/udp_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern squeue_func_t ip_input_proc; 112 113 /* 114 * IP statistics. 115 */ 116 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 117 #define IP6_STAT_UPDATE(x, n) (ip6_statistics.x.value.ui64 += (n)) 118 119 typedef struct ip6_stat { 120 kstat_named_t ip6_udp_fast_path; 121 kstat_named_t ip6_udp_slow_path; 122 kstat_named_t ip6_udp_fannorm; 123 kstat_named_t ip6_udp_fanmb; 124 kstat_named_t ip6_out_sw_cksum; 125 kstat_named_t ip6_in_sw_cksum; 126 kstat_named_t ip6_tcp_in_full_hw_cksum_err; 127 kstat_named_t ip6_tcp_in_part_hw_cksum_err; 128 kstat_named_t ip6_tcp_in_sw_cksum_err; 129 kstat_named_t ip6_tcp_out_sw_cksum_bytes; 130 kstat_named_t ip6_udp_in_full_hw_cksum_err; 131 kstat_named_t ip6_udp_in_part_hw_cksum_err; 132 kstat_named_t ip6_udp_in_sw_cksum_err; 133 kstat_named_t ip6_udp_out_sw_cksum_bytes; 134 kstat_named_t ip6_frag_mdt_pkt_out; 135 kstat_named_t ip6_frag_mdt_discarded; 136 kstat_named_t ip6_frag_mdt_allocfail; 137 kstat_named_t ip6_frag_mdt_addpdescfail; 138 kstat_named_t ip6_frag_mdt_allocd; 139 } ip6_stat_t; 140 141 static ip6_stat_t ip6_statistics = { 142 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 143 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 144 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 145 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 146 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 147 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 148 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 149 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 150 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 151 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 152 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 153 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 154 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 155 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 156 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 157 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 158 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 159 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 160 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 161 }; 162 163 static kstat_t *ip6_kstat; 164 165 /* 166 * Naming conventions: 167 * These rules should be judiciously applied 168 * if there is a need to identify something as IPv6 versus IPv4 169 * IPv6 funcions will end with _v6 in the ip module. 170 * IPv6 funcions will end with _ipv6 in the transport modules. 171 * IPv6 macros: 172 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 173 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 174 * And then there are ..V4_PART_OF_V6. 175 * The intent is that macros in the ip module end with _V6. 176 * IPv6 global variables will start with ipv6_ 177 * IPv6 structures will start with ipv6 178 * IPv6 defined constants should start with IPV6_ 179 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 180 */ 181 182 /* 183 * IPv6 mibs when the interface (ill) is not known. 184 * When the ill is known the per-interface mib in the ill is used. 185 */ 186 mib2_ipv6IfStatsEntry_t ip6_mib; 187 mib2_ipv6IfIcmpEntry_t icmp6_mib; 188 189 /* 190 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 191 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 192 * from IANA. This mechanism will remain in effect until an official 193 * number is obtained. 194 */ 195 uchar_t ip6opt_ls; 196 197 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 198 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 199 200 const in6_addr_t ipv6_all_ones = 201 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 202 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 203 204 #ifdef _BIG_ENDIAN 205 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 206 #else /* _BIG_ENDIAN */ 207 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 208 #endif /* _BIG_ENDIAN */ 209 210 #ifdef _BIG_ENDIAN 211 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 212 #else /* _BIG_ENDIAN */ 213 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 214 #endif /* _BIG_ENDIAN */ 215 216 #ifdef _BIG_ENDIAN 217 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 218 #else /* _BIG_ENDIAN */ 219 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 220 #endif /* _BIG_ENDIAN */ 221 222 #ifdef _BIG_ENDIAN 223 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 224 #else /* _BIG_ENDIAN */ 225 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 226 #endif /* _BIG_ENDIAN */ 227 228 #ifdef _BIG_ENDIAN 229 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 230 #else /* _BIG_ENDIAN */ 231 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 232 #endif /* _BIG_ENDIAN */ 233 234 #ifdef _BIG_ENDIAN 235 const in6_addr_t ipv6_solicited_node_mcast = 236 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 237 #else /* _BIG_ENDIAN */ 238 const in6_addr_t ipv6_solicited_node_mcast = 239 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 240 #endif /* _BIG_ENDIAN */ 241 242 /* 243 * Used by icmp_send_redirect_v6 for picking random src. 244 */ 245 uint_t icmp_redirect_v6_src_index; 246 247 /* Leave room for ip_newroute to tack on the src and target addresses */ 248 #define OK_RESOLVER_MP_V6(mp) \ 249 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 250 251 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 252 boolean_t, zoneid_t); 253 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 254 const in6_addr_t *, boolean_t, zoneid_t); 255 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 256 static boolean_t icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp); 257 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 258 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 259 boolean_t, boolean_t, boolean_t, boolean_t); 260 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 261 iulp_t *); 262 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 263 uint16_t, boolean_t, boolean_t, boolean_t); 264 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 265 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 266 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 267 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 268 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 269 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 270 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 271 uint8_t *, uint_t, uint8_t); 272 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 273 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 274 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 275 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 276 conn_t *, int, int, int, zoneid_t); 277 static boolean_t ip_ulp_cando_pkt2big(int); 278 279 void ip_rput_v6(queue_t *, mblk_t *); 280 static void ip_wput_v6(queue_t *, mblk_t *); 281 282 /* 283 * A template for an IPv6 AR_ENTRY_QUERY 284 */ 285 static areq_t ipv6_areq_template = { 286 AR_ENTRY_QUERY, /* cmd */ 287 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 288 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 289 IP6_DL_SAP, /* protocol, from arps perspective */ 290 sizeof (areq_t), /* target addr offset */ 291 IPV6_ADDR_LEN, /* target addr_length */ 292 0, /* flags */ 293 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 294 IPV6_ADDR_LEN, /* sender addr length */ 295 6, /* xmit_count */ 296 1000, /* (re)xmit_interval in milliseconds */ 297 4 /* max # of requests to buffer */ 298 /* anything else filled in by the code */ 299 }; 300 301 struct qinit rinit_ipv6 = { 302 (pfi_t)ip_rput_v6, 303 NULL, 304 ip_open, 305 ip_close, 306 NULL, 307 &ip_mod_info 308 }; 309 310 struct qinit winit_ipv6 = { 311 (pfi_t)ip_wput_v6, 312 (pfi_t)ip_wsrv, 313 ip_open, 314 ip_close, 315 NULL, 316 &ip_mod_info 317 }; 318 319 /* 320 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 321 * The message has already been checksummed and if needed, 322 * a copy has been made to be sent any interested ICMP client (conn) 323 * Note that this is different than icmp_inbound() which does the fanout 324 * to conn's as well as local processing of the ICMP packets. 325 * 326 * All error messages are passed to the matching transport stream. 327 * 328 * Zones notes: 329 * The packet is only processed in the context of the specified zone: typically 330 * only this zone will reply to an echo request. This means that the caller must 331 * call icmp_inbound_v6() for each relevant zone. 332 */ 333 static void 334 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 335 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 336 { 337 icmp6_t *icmp6; 338 ip6_t *ip6h; 339 boolean_t interested; 340 ip6i_t *ip6i; 341 in6_addr_t origsrc; 342 ire_t *ire; 343 mblk_t *first_mp; 344 ipsec_in_t *ii; 345 346 ASSERT(ill != NULL); 347 first_mp = mp; 348 if (mctl_present) { 349 mp = first_mp->b_cont; 350 ASSERT(mp != NULL); 351 352 ii = (ipsec_in_t *)first_mp->b_rptr; 353 ASSERT(ii->ipsec_in_type == IPSEC_IN); 354 } 355 356 ip6h = (ip6_t *)mp->b_rptr; 357 358 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 359 360 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 361 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 362 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 363 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 364 freemsg(first_mp); 365 return; 366 } 367 ip6h = (ip6_t *)mp->b_rptr; 368 } 369 if (icmp_accept_clear_messages == 0) { 370 first_mp = ipsec_check_global_policy(first_mp, NULL, 371 NULL, ip6h, mctl_present); 372 if (first_mp == NULL) 373 return; 374 } 375 376 /* 377 * On a labeled system, we have to check whether the zone itself is 378 * permitted to receive raw traffic. 379 */ 380 if (is_system_labeled()) { 381 if (zoneid == ALL_ZONES) 382 zoneid = tsol_packet_to_zoneid(mp); 383 if (!tsol_can_accept_raw(mp, B_FALSE)) { 384 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 385 zoneid)); 386 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 387 freemsg(first_mp); 388 return; 389 } 390 } 391 392 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 393 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 394 icmp6->icmp6_code)); 395 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 396 397 /* Initiate IPPF processing here */ 398 if (IP6_IN_IPP(flags)) { 399 400 /* 401 * If the ifindex changes due to SIOCSLIFINDEX 402 * packet may return to IP on the wrong ill. 403 */ 404 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 405 if (mp == NULL) { 406 if (mctl_present) { 407 freeb(first_mp); 408 } 409 return; 410 } 411 } 412 413 switch (icmp6->icmp6_type) { 414 case ICMP6_DST_UNREACH: 415 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 416 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 417 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 418 break; 419 420 case ICMP6_TIME_EXCEEDED: 421 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 422 break; 423 424 case ICMP6_PARAM_PROB: 425 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 426 break; 427 428 case ICMP6_PACKET_TOO_BIG: 429 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 430 zoneid); 431 return; 432 case ICMP6_ECHO_REQUEST: 433 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 434 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 435 !ipv6_resp_echo_mcast) 436 break; 437 438 /* 439 * We must have exclusive use of the mblk to convert it to 440 * a response. 441 * If not, we copy it. 442 */ 443 if (mp->b_datap->db_ref > 1) { 444 mblk_t *mp1; 445 446 mp1 = copymsg(mp); 447 freemsg(mp); 448 if (mp1 == NULL) { 449 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 450 if (mctl_present) 451 freeb(first_mp); 452 return; 453 } 454 mp = mp1; 455 ip6h = (ip6_t *)mp->b_rptr; 456 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 457 if (mctl_present) 458 first_mp->b_cont = mp; 459 else 460 first_mp = mp; 461 } 462 463 /* 464 * Turn the echo into an echo reply. 465 * Remove any extension headers (do not reverse a source route) 466 * and clear the flow id (keep traffic class for now). 467 */ 468 if (hdr_length != IPV6_HDR_LEN) { 469 int i; 470 471 for (i = 0; i < IPV6_HDR_LEN; i++) 472 mp->b_rptr[hdr_length - i - 1] = 473 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 474 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 475 ip6h = (ip6_t *)mp->b_rptr; 476 ip6h->ip6_nxt = IPPROTO_ICMPV6; 477 hdr_length = IPV6_HDR_LEN; 478 } 479 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 480 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 481 482 ip6h->ip6_plen = 483 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 484 origsrc = ip6h->ip6_src; 485 /* 486 * Reverse the source and destination addresses. 487 * If the return address is a multicast, zero out the source 488 * (ip_wput_v6 will set an address). 489 */ 490 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 491 ip6h->ip6_src = ipv6_all_zeros; 492 ip6h->ip6_dst = origsrc; 493 } else { 494 ip6h->ip6_src = ip6h->ip6_dst; 495 ip6h->ip6_dst = origsrc; 496 } 497 498 /* set the hop limit */ 499 ip6h->ip6_hops = ipv6_def_hops; 500 501 /* 502 * Prepare for checksum by putting icmp length in the icmp 503 * checksum field. The checksum is calculated in ip_wput_v6. 504 */ 505 icmp6->icmp6_cksum = ip6h->ip6_plen; 506 /* 507 * ICMP echo replies should go out on the same interface 508 * the request came on as probes used by in.mpathd for 509 * detecting NIC failures are ECHO packets. We turn-off load 510 * spreading by allocating a ip6i and setting ip6i_attach_if 511 * to B_TRUE which is handled both by ip_wput_v6 and 512 * ip_newroute_v6. If we don't turnoff load spreading, 513 * the packets might get dropped if there are no 514 * non-FAILED/INACTIVE interfaces for it to go out on and 515 * in.mpathd would wrongly detect a failure or mis-detect 516 * a NIC failure as a link failure. As load spreading can 517 * happen only if ill_group is not NULL, we do only for 518 * that case and this does not affect the normal case. 519 * 520 * We force this only on echo packets that came from on-link 521 * hosts. We restrict this to link-local addresses which 522 * is used by in.mpathd for probing. In the IPv6 case, 523 * default routes typically have an ire_ipif pointer and 524 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 525 * might work. As a default route out of this interface 526 * may not be present, enforcing this packet to go out in 527 * this case may not work. 528 */ 529 if (ill->ill_group != NULL && 530 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 531 /* 532 * If we are sending replies to ourselves, don't 533 * set ATTACH_IF as we may not be able to find 534 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 535 * causes ip_wput_v6 to look for an IRE_LOCAL on 536 * "ill" which it may not find and will try to 537 * create an IRE_CACHE for our local address. Once 538 * we do this, we will try to forward all packets 539 * meant to our LOCAL address. 540 */ 541 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 542 NULL); 543 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 544 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 545 if (mp == NULL) { 546 BUMP_MIB(ill->ill_icmp6_mib, 547 ipv6IfIcmpInErrors); 548 if (ire != NULL) 549 ire_refrele(ire); 550 if (mctl_present) 551 freeb(first_mp); 552 return; 553 } else if (mctl_present) { 554 first_mp->b_cont = mp; 555 } else { 556 first_mp = mp; 557 } 558 ip6i = (ip6i_t *)mp->b_rptr; 559 ip6i->ip6i_flags = IP6I_ATTACH_IF; 560 ip6i->ip6i_ifindex = 561 ill->ill_phyint->phyint_ifindex; 562 } 563 if (ire != NULL) 564 ire_refrele(ire); 565 } 566 567 if (!mctl_present) { 568 /* 569 * This packet should go out the same way as it 570 * came in i.e in clear. To make sure that global 571 * policy will not be applied to this in ip_wput, 572 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 573 */ 574 ASSERT(first_mp == mp); 575 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 576 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 577 freemsg(mp); 578 return; 579 } 580 ii = (ipsec_in_t *)first_mp->b_rptr; 581 582 /* This is not a secure packet */ 583 ii->ipsec_in_secure = B_FALSE; 584 first_mp->b_cont = mp; 585 } 586 ii->ipsec_in_zoneid = zoneid; 587 ASSERT(zoneid != ALL_ZONES); 588 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 589 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 590 return; 591 } 592 put(WR(q), first_mp); 593 return; 594 595 case ICMP6_ECHO_REPLY: 596 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 597 break; 598 599 case ND_ROUTER_SOLICIT: 600 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 601 break; 602 603 case ND_ROUTER_ADVERT: 604 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 605 break; 606 607 case ND_NEIGHBOR_SOLICIT: 608 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 609 if (mctl_present) 610 freeb(first_mp); 611 /* XXX may wish to pass first_mp up to ndp_input someday. */ 612 ndp_input(ill, mp, dl_mp); 613 return; 614 615 case ND_NEIGHBOR_ADVERT: 616 BUMP_MIB(ill->ill_icmp6_mib, 617 ipv6IfIcmpInNeighborAdvertisements); 618 if (mctl_present) 619 freeb(first_mp); 620 /* XXX may wish to pass first_mp up to ndp_input someday. */ 621 ndp_input(ill, mp, dl_mp); 622 return; 623 624 case ND_REDIRECT: { 625 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 626 627 if (ipv6_ignore_redirect) 628 break; 629 630 /* 631 * As there is no upper client to deliver, we don't 632 * need the first_mp any more. 633 */ 634 if (mctl_present) 635 freeb(first_mp); 636 if (!pullupmsg(mp, -1) || 637 !icmp_redirect_ok_v6(ill, mp)) { 638 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 639 break; 640 } 641 icmp_redirect_v6(q, mp, ill); 642 return; 643 } 644 645 /* 646 * The next three icmp messages will be handled by MLD. 647 * Pass all valid MLD packets up to any process(es) 648 * listening on a raw ICMP socket. MLD messages are 649 * freed by mld_input function. 650 */ 651 case MLD_LISTENER_QUERY: 652 case MLD_LISTENER_REPORT: 653 case MLD_LISTENER_REDUCTION: 654 if (mctl_present) 655 freeb(first_mp); 656 mld_input(q, mp, ill); 657 return; 658 default: 659 break; 660 } 661 if (interested) { 662 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 663 mctl_present, zoneid); 664 } else { 665 freemsg(first_mp); 666 } 667 } 668 669 /* 670 * Process received IPv6 ICMP Packet too big. 671 * After updating any IRE it does the fanout to any matching transport streams. 672 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 673 */ 674 /* ARGSUSED */ 675 static void 676 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 677 boolean_t mctl_present, zoneid_t zoneid) 678 { 679 ip6_t *ip6h; 680 ip6_t *inner_ip6h; 681 icmp6_t *icmp6; 682 uint16_t hdr_length; 683 uint32_t mtu; 684 ire_t *ire, *first_ire; 685 mblk_t *first_mp; 686 687 first_mp = mp; 688 if (mctl_present) 689 mp = first_mp->b_cont; 690 /* 691 * We must have exclusive use of the mblk to update the MTU 692 * in the packet. 693 * If not, we copy it. 694 * 695 * If there's an M_CTL present, we know that allocated first_mp 696 * earlier in this function, so we know first_mp has refcnt of one. 697 */ 698 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 699 if (mp->b_datap->db_ref > 1) { 700 mblk_t *mp1; 701 702 mp1 = copymsg(mp); 703 freemsg(mp); 704 if (mp1 == NULL) { 705 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 706 if (mctl_present) 707 freeb(first_mp); 708 return; 709 } 710 mp = mp1; 711 if (mctl_present) 712 first_mp->b_cont = mp; 713 else 714 first_mp = mp; 715 } 716 ip6h = (ip6_t *)mp->b_rptr; 717 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 718 hdr_length = ip_hdr_length_v6(mp, ip6h); 719 else 720 hdr_length = IPV6_HDR_LEN; 721 722 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 723 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 724 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 725 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 726 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 727 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 728 freemsg(first_mp); 729 return; 730 } 731 ip6h = (ip6_t *)mp->b_rptr; 732 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 733 inner_ip6h = (ip6_t *)&icmp6[1]; 734 } 735 736 /* 737 * For link local destinations matching simply on IRE type is not 738 * sufficient. Same link local addresses for different ILL's is 739 * possible. 740 */ 741 742 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 743 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 744 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 745 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 746 747 if (first_ire == NULL) { 748 if (ip_debug > 2) { 749 /* ip1dbg */ 750 pr_addr_dbg("icmp_inbound_too_big_v6:" 751 "no ire for dst %s\n", AF_INET6, 752 &inner_ip6h->ip6_dst); 753 } 754 freemsg(first_mp); 755 return; 756 } 757 758 mtu = ntohl(icmp6->icmp6_mtu); 759 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 760 for (ire = first_ire; ire != NULL && 761 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 762 ire = ire->ire_next) { 763 mutex_enter(&ire->ire_lock); 764 if (mtu < IPV6_MIN_MTU) { 765 ip1dbg(("Received mtu less than IPv6 " 766 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 767 mtu = IPV6_MIN_MTU; 768 /* 769 * If an mtu less than IPv6 min mtu is received, 770 * we must include a fragment header in 771 * subsequent packets. 772 */ 773 ire->ire_frag_flag |= IPH_FRAG_HDR; 774 } 775 ip1dbg(("Received mtu from router: %d\n", mtu)); 776 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 777 /* Record the new max frag size for the ULP. */ 778 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 779 /* 780 * If we need a fragment header in every packet 781 * (above case or multirouting), make sure the 782 * ULP takes it into account when computing the 783 * payload size. 784 */ 785 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 786 sizeof (ip6_frag_t)); 787 } else { 788 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 789 } 790 mutex_exit(&ire->ire_lock); 791 } 792 rw_exit(&first_ire->ire_bucket->irb_lock); 793 ire_refrele(first_ire); 794 } else { 795 irb_t *irb = NULL; 796 /* 797 * for non-link local destinations we match only on the IRE type 798 */ 799 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 800 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE); 801 if (ire == NULL) { 802 if (ip_debug > 2) { 803 /* ip1dbg */ 804 pr_addr_dbg("icmp_inbound_too_big_v6:" 805 "no ire for dst %s\n", 806 AF_INET6, &inner_ip6h->ip6_dst); 807 } 808 freemsg(first_mp); 809 return; 810 } 811 irb = ire->ire_bucket; 812 ire_refrele(ire); 813 rw_enter(&irb->irb_lock, RW_READER); 814 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 815 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 816 &inner_ip6h->ip6_dst)) { 817 mtu = ntohl(icmp6->icmp6_mtu); 818 mutex_enter(&ire->ire_lock); 819 if (mtu < IPV6_MIN_MTU) { 820 ip1dbg(("Received mtu less than IPv6" 821 "min mtu %d: %d\n", 822 IPV6_MIN_MTU, mtu)); 823 mtu = IPV6_MIN_MTU; 824 /* 825 * If an mtu less than IPv6 min mtu is 826 * received, we must include a fragment 827 * header in subsequent packets. 828 */ 829 ire->ire_frag_flag |= IPH_FRAG_HDR; 830 } 831 832 ip1dbg(("Received mtu from router: %d\n", mtu)); 833 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 834 /* Record the new max frag size for the ULP. */ 835 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 836 /* 837 * If we need a fragment header in 838 * every packet (above case or 839 * multirouting), make sure the ULP 840 * takes it into account when computing 841 * the payload size. 842 */ 843 icmp6->icmp6_mtu = 844 htonl(ire->ire_max_frag - 845 sizeof (ip6_frag_t)); 846 } else { 847 icmp6->icmp6_mtu = 848 htonl(ire->ire_max_frag); 849 } 850 mutex_exit(&ire->ire_lock); 851 } 852 } 853 rw_exit(&irb->irb_lock); 854 } 855 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 856 mctl_present, zoneid); 857 } 858 859 static void 860 pkt_too_big(conn_t *connp, void *arg) 861 { 862 mblk_t *mp; 863 864 if (!connp->conn_ipv6_recvpathmtu) 865 return; 866 867 /* create message and drop it on this connections read queue */ 868 if ((mp = dupb((mblk_t *)arg)) == NULL) { 869 return; 870 } 871 mp->b_datap->db_type = M_CTL; 872 873 putnext(connp->conn_rq, mp); 874 } 875 876 /* 877 * Fanout received ICMPv6 error packets to the transports. 878 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 879 */ 880 void 881 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 882 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 883 { 884 uint16_t *up; /* Pointer to ports in ULP header */ 885 uint32_t ports; /* reversed ports for fanout */ 886 ip6_t rip6h; /* With reversed addresses */ 887 uint16_t hdr_length; 888 uint8_t *nexthdrp; 889 uint8_t nexthdr; 890 mblk_t *first_mp; 891 ipsec_in_t *ii; 892 tcpha_t *tcpha; 893 conn_t *connp; 894 895 first_mp = mp; 896 if (mctl_present) { 897 mp = first_mp->b_cont; 898 ASSERT(mp != NULL); 899 900 ii = (ipsec_in_t *)first_mp->b_rptr; 901 ASSERT(ii->ipsec_in_type == IPSEC_IN); 902 } else { 903 ii = NULL; 904 } 905 906 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 907 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 908 909 /* 910 * Need to pullup everything in order to use 911 * ip_hdr_length_nexthdr_v6() 912 */ 913 if (mp->b_cont != NULL) { 914 if (!pullupmsg(mp, -1)) { 915 ip1dbg(("icmp_inbound_error_fanout_v6: " 916 "pullupmsg failed\n")); 917 goto drop_pkt; 918 } 919 ip6h = (ip6_t *)mp->b_rptr; 920 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 921 } 922 923 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 924 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 925 goto drop_pkt; 926 927 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 928 goto drop_pkt; 929 nexthdr = *nexthdrp; 930 931 /* Set message type, must be done after pullups */ 932 mp->b_datap->db_type = M_CTL; 933 934 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 935 /* 936 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 937 * sockets. 938 * 939 * Note I don't like walking every connection to deliver 940 * this information to a set of listeners. A separate 941 * list could be kept to keep the cost of this down. 942 */ 943 ipcl_walk(pkt_too_big, (void *)mp); 944 } 945 946 /* Try to pass the ICMP message to clients who need it */ 947 switch (nexthdr) { 948 case IPPROTO_UDP: { 949 /* 950 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 951 * UDP header to get the port information. 952 */ 953 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 954 mp->b_wptr) { 955 break; 956 } 957 /* 958 * Attempt to find a client stream based on port. 959 * Note that we do a reverse lookup since the header is 960 * in the form we sent it out. 961 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 962 * and we only set the src and dst addresses and nexthdr. 963 */ 964 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 965 rip6h.ip6_src = ip6h->ip6_dst; 966 rip6h.ip6_dst = ip6h->ip6_src; 967 rip6h.ip6_nxt = nexthdr; 968 ((uint16_t *)&ports)[0] = up[1]; 969 ((uint16_t *)&ports)[1] = up[0]; 970 971 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 972 IP6_NO_IPPOLICY, mctl_present, zoneid); 973 return; 974 } 975 case IPPROTO_TCP: { 976 /* 977 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 978 * the TCP header to get the port information. 979 */ 980 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 981 mp->b_wptr) { 982 break; 983 } 984 985 /* 986 * Attempt to find a client stream based on port. 987 * Note that we do a reverse lookup since the header is 988 * in the form we sent it out. 989 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 990 * we only set the src and dst addresses and nexthdr. 991 */ 992 993 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 994 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 995 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 996 if (connp == NULL) { 997 goto drop_pkt; 998 } 999 1000 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 1001 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 1002 return; 1003 1004 } 1005 case IPPROTO_SCTP: 1006 /* 1007 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 1008 * the SCTP header to get the port information. 1009 */ 1010 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 1011 mp->b_wptr) { 1012 break; 1013 } 1014 1015 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 1016 ((uint16_t *)&ports)[0] = up[1]; 1017 ((uint16_t *)&ports)[1] = up[0]; 1018 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 1019 IP6_NO_IPPOLICY, 0, zoneid); 1020 return; 1021 case IPPROTO_ESP: 1022 case IPPROTO_AH: { 1023 int ipsec_rc; 1024 1025 /* 1026 * We need a IPSEC_IN in the front to fanout to AH/ESP. 1027 * We will re-use the IPSEC_IN if it is already present as 1028 * AH/ESP will not affect any fields in the IPSEC_IN for 1029 * ICMP errors. If there is no IPSEC_IN, allocate a new 1030 * one and attach it in the front. 1031 */ 1032 if (ii != NULL) { 1033 /* 1034 * ip_fanout_proto_again converts the ICMP errors 1035 * that come back from AH/ESP to M_DATA so that 1036 * if it is non-AH/ESP and we do a pullupmsg in 1037 * this function, it would work. Convert it back 1038 * to M_CTL before we send up as this is a ICMP 1039 * error. This could have been generated locally or 1040 * by some router. Validate the inner IPSEC 1041 * headers. 1042 * 1043 * NOTE : ill_index is used by ip_fanout_proto_again 1044 * to locate the ill. 1045 */ 1046 ASSERT(ill != NULL); 1047 ii->ipsec_in_ill_index = 1048 ill->ill_phyint->phyint_ifindex; 1049 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1050 first_mp->b_cont->b_datap->db_type = M_CTL; 1051 } else { 1052 /* 1053 * IPSEC_IN is not present. We attach a ipsec_in 1054 * message and send up to IPSEC for validating 1055 * and removing the IPSEC headers. Clear 1056 * ipsec_in_secure so that when we return 1057 * from IPSEC, we don't mistakenly think that this 1058 * is a secure packet came from the network. 1059 * 1060 * NOTE : ill_index is used by ip_fanout_proto_again 1061 * to locate the ill. 1062 */ 1063 ASSERT(first_mp == mp); 1064 first_mp = ipsec_in_alloc(B_FALSE); 1065 if (first_mp == NULL) { 1066 freemsg(mp); 1067 BUMP_MIB(&ip_mib, ipInDiscards); 1068 return; 1069 } 1070 ii = (ipsec_in_t *)first_mp->b_rptr; 1071 1072 /* This is not a secure packet */ 1073 ii->ipsec_in_secure = B_FALSE; 1074 first_mp->b_cont = mp; 1075 mp->b_datap->db_type = M_CTL; 1076 ASSERT(ill != NULL); 1077 ii->ipsec_in_ill_index = 1078 ill->ill_phyint->phyint_ifindex; 1079 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1080 } 1081 1082 if (!ipsec_loaded()) { 1083 ip_proto_not_sup(q, first_mp, 0, zoneid); 1084 return; 1085 } 1086 1087 if (nexthdr == IPPROTO_ESP) 1088 ipsec_rc = ipsecesp_icmp_error(first_mp); 1089 else 1090 ipsec_rc = ipsecah_icmp_error(first_mp); 1091 if (ipsec_rc == IPSEC_STATUS_FAILED) 1092 return; 1093 1094 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1095 return; 1096 } 1097 case IPPROTO_ENCAP: 1098 case IPPROTO_IPV6: 1099 if ((uint8_t *)ip6h + hdr_length + 1100 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1101 sizeof (ip6_t)) > mp->b_wptr) 1102 goto drop_pkt; 1103 1104 if (nexthdr == IPPROTO_ENCAP || 1105 !IN6_ARE_ADDR_EQUAL( 1106 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1107 &ip6h->ip6_src) || 1108 !IN6_ARE_ADDR_EQUAL( 1109 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1110 &ip6h->ip6_dst)) { 1111 /* 1112 * For tunnels that have used IPsec protection, 1113 * we need to adjust the MTU to take into account 1114 * the IPsec overhead. 1115 */ 1116 if (ii != NULL) 1117 icmp6->icmp6_mtu = htons( 1118 ntohs(icmp6->icmp6_mtu) - 1119 ipsec_in_extra_length(first_mp)); 1120 } else { 1121 /* 1122 * Self-encapsulated case. As in the ipv4 case, 1123 * we need to strip the 2nd IP header. Since mp 1124 * is already pulled-up, we can simply bcopy 1125 * the 3rd header + data over the 2nd header. 1126 */ 1127 uint16_t unused_len; 1128 ip6_t *inner_ip6h = (ip6_t *) 1129 ((uchar_t *)ip6h + hdr_length); 1130 1131 /* 1132 * Make sure we don't do recursion more than once. 1133 */ 1134 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1135 &unused_len, &nexthdrp) || 1136 *nexthdrp == IPPROTO_IPV6) { 1137 goto drop_pkt; 1138 } 1139 1140 /* 1141 * We are about to modify the packet. Make a copy if 1142 * someone else has a reference to it. 1143 */ 1144 if (DB_REF(mp) > 1) { 1145 mblk_t *mp1; 1146 uint16_t icmp6_offset; 1147 1148 mp1 = copymsg(mp); 1149 if (mp1 == NULL) { 1150 goto drop_pkt; 1151 } 1152 icmp6_offset = (uint16_t) 1153 ((uchar_t *)icmp6 - mp->b_rptr); 1154 freemsg(mp); 1155 mp = mp1; 1156 1157 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1158 ip6h = (ip6_t *)&icmp6[1]; 1159 inner_ip6h = (ip6_t *) 1160 ((uchar_t *)ip6h + hdr_length); 1161 1162 if (mctl_present) 1163 first_mp->b_cont = mp; 1164 else 1165 first_mp = mp; 1166 } 1167 1168 /* 1169 * Need to set db_type back to M_DATA before 1170 * refeeding mp into this function. 1171 */ 1172 DB_TYPE(mp) = M_DATA; 1173 1174 /* 1175 * Copy the 3rd header + remaining data on top 1176 * of the 2nd header. 1177 */ 1178 bcopy(inner_ip6h, ip6h, 1179 mp->b_wptr - (uchar_t *)inner_ip6h); 1180 1181 /* 1182 * Subtract length of the 2nd header. 1183 */ 1184 mp->b_wptr -= hdr_length; 1185 1186 /* 1187 * Now recurse, and see what I _really_ should be 1188 * doing here. 1189 */ 1190 icmp_inbound_error_fanout_v6(q, first_mp, 1191 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1192 zoneid); 1193 return; 1194 } 1195 /* FALLTHRU */ 1196 default: 1197 /* 1198 * The rip6h header is only used for the lookup and we 1199 * only set the src and dst addresses and nexthdr. 1200 */ 1201 rip6h.ip6_src = ip6h->ip6_dst; 1202 rip6h.ip6_dst = ip6h->ip6_src; 1203 rip6h.ip6_nxt = nexthdr; 1204 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1205 IP6_NO_IPPOLICY, mctl_present, zoneid); 1206 return; 1207 } 1208 /* NOTREACHED */ 1209 drop_pkt: 1210 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1211 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1212 freemsg(first_mp); 1213 } 1214 1215 /* 1216 * Validate the incoming redirect message, if valid redirect 1217 * processing is done later. This is separated from the actual 1218 * redirect processing to avoid becoming single threaded when not 1219 * necessary. (i.e invalid packet) 1220 * Assumes that any AH or ESP headers have already been removed. 1221 * The mp has already been pulled up. 1222 */ 1223 boolean_t 1224 icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp) 1225 { 1226 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1227 nd_redirect_t *rd; 1228 ire_t *ire; 1229 uint16_t len; 1230 uint16_t hdr_length; 1231 1232 ASSERT(mp->b_cont == NULL); 1233 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1234 hdr_length = ip_hdr_length_v6(mp, ip6h); 1235 else 1236 hdr_length = IPV6_HDR_LEN; 1237 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1238 len = mp->b_wptr - mp->b_rptr - hdr_length; 1239 if (!IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1240 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1241 (rd->nd_rd_code != 0) || 1242 (len < sizeof (nd_redirect_t)) || 1243 (IN6_IS_ADDR_V4MAPPED(&rd->nd_rd_dst)) || 1244 (IN6_IS_ADDR_MULTICAST(&rd->nd_rd_dst))) { 1245 return (B_FALSE); 1246 } 1247 if (!(IN6_IS_ADDR_LINKLOCAL(&rd->nd_rd_target) || 1248 IN6_ARE_ADDR_EQUAL(&rd->nd_rd_target, &rd->nd_rd_dst))) { 1249 return (B_FALSE); 1250 } 1251 1252 /* 1253 * Verify that the IP source address of the redirect is 1254 * the same as the current first-hop router for the specified 1255 * ICMP destination address. Just to be cautious, this test 1256 * will be done again before we add the redirect, in case 1257 * router goes away between now and then. 1258 */ 1259 ire = ire_route_lookup_v6(&rd->nd_rd_dst, 0, 1260 &ip6h->ip6_src, 0, ill->ill_ipif, NULL, ALL_ZONES, NULL, 1261 MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1262 if (ire == NULL) 1263 return (B_FALSE); 1264 ire_refrele(ire); 1265 if (len > sizeof (nd_redirect_t)) { 1266 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1267 len - sizeof (nd_redirect_t))) 1268 return (B_FALSE); 1269 } 1270 return (B_TRUE); 1271 } 1272 1273 /* 1274 * Process received IPv6 ICMP Redirect messages. 1275 * Assumes that the icmp packet has already been verfied to be 1276 * valid, aligned and in a single mblk all done in icmp_redirect_ok_v6(). 1277 */ 1278 /* ARGSUSED */ 1279 static void 1280 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1281 { 1282 ip6_t *ip6h; 1283 uint16_t hdr_length; 1284 nd_redirect_t *rd; 1285 ire_t *ire; 1286 ire_t *prev_ire; 1287 ire_t *redir_ire; 1288 in6_addr_t *src, *dst, *gateway; 1289 nd_opt_hdr_t *opt; 1290 nce_t *nce; 1291 int nce_flags = 0; 1292 int err = 0; 1293 boolean_t redirect_to_router = B_FALSE; 1294 int len; 1295 iulp_t ulp_info = { 0 }; 1296 ill_t *prev_ire_ill; 1297 ipif_t *ipif; 1298 1299 ip6h = (ip6_t *)mp->b_rptr; 1300 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1301 hdr_length = ip_hdr_length_v6(mp, ip6h); 1302 else 1303 hdr_length = IPV6_HDR_LEN; 1304 1305 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1306 src = &ip6h->ip6_src; 1307 dst = &rd->nd_rd_dst; 1308 gateway = &rd->nd_rd_target; 1309 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1310 redirect_to_router = B_TRUE; 1311 nce_flags |= NCE_F_ISROUTER; 1312 } 1313 /* 1314 * Make sure we had a route for the dest in question and that 1315 * route was pointing to the old gateway (the source of the 1316 * redirect packet.) 1317 */ 1318 ipif = ipif_get_next_ipif(NULL, ill); 1319 if (ipif == NULL) { 1320 freemsg(mp); 1321 return; 1322 } 1323 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1324 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1325 ipif_refrele(ipif); 1326 /* 1327 * Check that 1328 * the redirect was not from ourselves 1329 * old gateway is still directly reachable 1330 */ 1331 if (prev_ire == NULL || 1332 prev_ire->ire_type == IRE_LOCAL) { 1333 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1334 goto fail_redirect; 1335 } 1336 prev_ire_ill = ire_to_ill(prev_ire); 1337 ASSERT(prev_ire_ill != NULL); 1338 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1339 nce_flags |= NCE_F_NONUD; 1340 1341 /* 1342 * Should we use the old ULP info to create the new gateway? From 1343 * a user's perspective, we should inherit the info so that it 1344 * is a "smooth" transition. If we do not do that, then new 1345 * connections going thru the new gateway will have no route metrics, 1346 * which is counter-intuitive to user. From a network point of 1347 * view, this may or may not make sense even though the new gateway 1348 * is still directly connected to us so the route metrics should not 1349 * change much. 1350 * 1351 * But if the old ire_uinfo is not initialized, we do another 1352 * recursive lookup on the dest using the new gateway. There may 1353 * be a route to that. If so, use it to initialize the redirect 1354 * route. 1355 */ 1356 if (prev_ire->ire_uinfo.iulp_set) { 1357 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1358 } else if (redirect_to_router) { 1359 /* 1360 * Only do the following if the redirection is really to 1361 * a router. 1362 */ 1363 ire_t *tmp_ire; 1364 ire_t *sire; 1365 1366 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1367 ALL_ZONES, 0, NULL, 1368 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1369 if (sire != NULL) { 1370 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1371 ASSERT(tmp_ire != NULL); 1372 ire_refrele(tmp_ire); 1373 ire_refrele(sire); 1374 } else if (tmp_ire != NULL) { 1375 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1376 sizeof (iulp_t)); 1377 ire_refrele(tmp_ire); 1378 } 1379 } 1380 1381 len = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1382 opt = (nd_opt_hdr_t *)&rd[1]; 1383 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1384 if (opt != NULL) { 1385 err = ndp_lookup_then_add(ill, 1386 (uchar_t *)&opt[1], /* Link layer address */ 1387 gateway, 1388 &ipv6_all_ones, /* prefix mask */ 1389 &ipv6_all_zeros, /* Mapping mask */ 1390 0, 1391 nce_flags, 1392 ND_STALE, 1393 &nce, 1394 NULL, 1395 NULL); 1396 switch (err) { 1397 case 0: 1398 NCE_REFRELE(nce); 1399 break; 1400 case EEXIST: 1401 /* 1402 * Check to see if link layer address has changed and 1403 * process the nce_state accordingly. 1404 */ 1405 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1406 NCE_REFRELE(nce); 1407 break; 1408 default: 1409 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1410 err)); 1411 goto fail_redirect; 1412 } 1413 } 1414 if (redirect_to_router) { 1415 /* icmp_redirect_ok_v6() must have already verified this */ 1416 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1417 1418 /* 1419 * Create a Route Association. This will allow us to remember 1420 * a router told us to use the particular gateway. 1421 */ 1422 ire = ire_create_v6( 1423 dst, 1424 &ipv6_all_ones, /* mask */ 1425 &prev_ire->ire_src_addr_v6, /* source addr */ 1426 gateway, /* gateway addr */ 1427 &prev_ire->ire_max_frag, /* max frag */ 1428 NULL, /* Fast Path header */ 1429 NULL, /* no rfq */ 1430 NULL, /* no stq */ 1431 IRE_HOST_REDIRECT, 1432 NULL, 1433 prev_ire->ire_ipif, 1434 NULL, 1435 0, 1436 0, 1437 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1438 &ulp_info, 1439 NULL, 1440 NULL); 1441 } else { 1442 /* 1443 * Just create an on link entry, may or may not be a router 1444 * If there is no link layer address option ire_add() won't 1445 * add this. 1446 */ 1447 ire = ire_create_v6( 1448 dst, /* gateway == dst */ 1449 &ipv6_all_ones, /* mask */ 1450 &prev_ire->ire_src_addr_v6, /* source addr */ 1451 &ipv6_all_zeros, /* gateway addr */ 1452 &prev_ire->ire_max_frag, /* max frag */ 1453 NULL, /* Fast Path header */ 1454 prev_ire->ire_rfq, /* ire rfq */ 1455 prev_ire->ire_stq, /* ire stq */ 1456 IRE_CACHE, 1457 NULL, 1458 prev_ire->ire_ipif, 1459 &ipv6_all_ones, 1460 0, 1461 0, 1462 0, 1463 &ulp_info, 1464 NULL, 1465 NULL); 1466 } 1467 if (ire == NULL) 1468 goto fail_redirect; 1469 1470 /* 1471 * XXX If there is no nce i.e there is no target link layer address 1472 * option with the redirect message, ire_add will fail. In that 1473 * case we never add the IRE_CACHE/IRE_HOST_REDIRECT. We need 1474 * to fix this. 1475 */ 1476 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1477 1478 /* tell routing sockets that we received a redirect */ 1479 ip_rts_change_v6(RTM_REDIRECT, 1480 &rd->nd_rd_dst, 1481 &rd->nd_rd_target, 1482 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1483 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1484 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1485 1486 /* 1487 * Delete any existing IRE_HOST_REDIRECT for this destination. 1488 * This together with the added IRE has the effect of 1489 * modifying an existing redirect. 1490 */ 1491 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST_REDIRECT, 1492 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1493 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1494 1495 ire_refrele(ire); /* Held in ire_add_v6 */ 1496 1497 if (redir_ire != NULL) { 1498 ire_delete(redir_ire); 1499 ire_refrele(redir_ire); 1500 } 1501 } 1502 1503 if (prev_ire->ire_type == IRE_CACHE) 1504 ire_delete(prev_ire); 1505 ire_refrele(prev_ire); 1506 prev_ire = NULL; 1507 1508 fail_redirect: 1509 if (prev_ire != NULL) 1510 ire_refrele(prev_ire); 1511 freemsg(mp); 1512 } 1513 1514 static ill_t * 1515 ip_queue_to_ill_v6(queue_t *q) 1516 { 1517 ill_t *ill; 1518 1519 ASSERT(WR(q) == q); 1520 1521 if (q->q_next != NULL) { 1522 ill = (ill_t *)q->q_ptr; 1523 if (ILL_CAN_LOOKUP(ill)) 1524 ill_refhold(ill); 1525 else 1526 ill = NULL; 1527 } else { 1528 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1529 NULL, NULL, NULL, NULL, NULL); 1530 } 1531 if (ill == NULL) 1532 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1533 return (ill); 1534 } 1535 1536 /* 1537 * Assigns an appropriate source address to the packet. 1538 * If origdst is one of our IP addresses that use it as the source. 1539 * If the queue is an ill queue then select a source from that ill. 1540 * Otherwise pick a source based on a route lookup back to the origsrc. 1541 * 1542 * src is the return parameter. Returns a pointer to src or NULL if failure. 1543 */ 1544 static in6_addr_t * 1545 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1546 in6_addr_t *src, zoneid_t zoneid) 1547 { 1548 ill_t *ill; 1549 ire_t *ire; 1550 ipif_t *ipif; 1551 1552 ASSERT(!(wq->q_flag & QREADR)); 1553 if (wq->q_next != NULL) { 1554 ill = (ill_t *)wq->q_ptr; 1555 } else { 1556 ill = NULL; 1557 } 1558 1559 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1560 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1561 if (ire != NULL) { 1562 /* Destined to one of our addresses */ 1563 *src = *origdst; 1564 ire_refrele(ire); 1565 return (src); 1566 } 1567 if (ire != NULL) { 1568 ire_refrele(ire); 1569 ire = NULL; 1570 } 1571 if (ill == NULL) { 1572 /* What is the route back to the original source? */ 1573 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1574 NULL, NULL, zoneid, NULL, 1575 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1576 if (ire == NULL) { 1577 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1578 return (NULL); 1579 } 1580 /* 1581 * Does not matter whether we use ire_stq or ire_ipif here. 1582 * Just pick an ill for ICMP replies. 1583 */ 1584 ASSERT(ire->ire_ipif != NULL); 1585 ill = ire->ire_ipif->ipif_ill; 1586 ire_refrele(ire); 1587 } 1588 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1589 IPV6_PREFER_SRC_DEFAULT, zoneid); 1590 if (ipif != NULL) { 1591 *src = ipif->ipif_v6src_addr; 1592 ipif_refrele(ipif); 1593 return (src); 1594 } 1595 /* 1596 * Unusual case - can't find a usable source address to reach the 1597 * original source. Use what in the route to the source. 1598 */ 1599 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1600 NULL, NULL, zoneid, NULL, 1601 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1602 if (ire == NULL) { 1603 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1604 return (NULL); 1605 } 1606 ASSERT(ire != NULL); 1607 *src = ire->ire_src_addr_v6; 1608 ire_refrele(ire); 1609 return (src); 1610 } 1611 1612 /* 1613 * Build and ship an IPv6 ICMP message using the packet data in mp, 1614 * and the ICMP header pointed to by "stuff". (May be called as 1615 * writer.) 1616 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1617 * verify that an icmp error packet can be sent. 1618 * 1619 * If q is an ill write side queue (which is the case when packets 1620 * arrive from ip_rput) then ip_wput code will ensure that packets to 1621 * link-local destinations are sent out that ill. 1622 * 1623 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1624 * source address (see above function). 1625 */ 1626 static void 1627 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1628 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid) 1629 { 1630 ip6_t *ip6h; 1631 in6_addr_t v6dst; 1632 size_t len_needed; 1633 size_t msg_len; 1634 mblk_t *mp1; 1635 icmp6_t *icmp6; 1636 ill_t *ill; 1637 in6_addr_t v6src; 1638 mblk_t *ipsec_mp; 1639 ipsec_out_t *io; 1640 1641 ill = ip_queue_to_ill_v6(q); 1642 if (ill == NULL) { 1643 freemsg(mp); 1644 return; 1645 } 1646 1647 if (mctl_present) { 1648 /* 1649 * If it is : 1650 * 1651 * 1) a IPSEC_OUT, then this is caused by outbound 1652 * datagram originating on this host. IPSEC processing 1653 * may or may not have been done. Refer to comments above 1654 * icmp_inbound_error_fanout for details. 1655 * 1656 * 2) a IPSEC_IN if we are generating a icmp_message 1657 * for an incoming datagram destined for us i.e called 1658 * from ip_fanout_send_icmp. 1659 */ 1660 ipsec_info_t *in; 1661 1662 ipsec_mp = mp; 1663 mp = ipsec_mp->b_cont; 1664 1665 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1666 ip6h = (ip6_t *)mp->b_rptr; 1667 1668 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1669 in->ipsec_info_type == IPSEC_IN); 1670 1671 if (in->ipsec_info_type == IPSEC_IN) { 1672 /* 1673 * Convert the IPSEC_IN to IPSEC_OUT. 1674 */ 1675 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1676 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1677 ill_refrele(ill); 1678 return; 1679 } 1680 } else { 1681 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1682 io = (ipsec_out_t *)in; 1683 /* 1684 * Clear out ipsec_out_proc_begin, so we do a fresh 1685 * ire lookup. 1686 */ 1687 io->ipsec_out_proc_begin = B_FALSE; 1688 } 1689 } else { 1690 /* 1691 * This is in clear. The icmp message we are building 1692 * here should go out in clear. 1693 */ 1694 ipsec_in_t *ii; 1695 ASSERT(mp->b_datap->db_type == M_DATA); 1696 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1697 freemsg(mp); 1698 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1699 ill_refrele(ill); 1700 return; 1701 } 1702 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1703 1704 /* This is not a secure packet */ 1705 ii->ipsec_in_secure = B_FALSE; 1706 /* 1707 * For trusted extensions using a shared IP address we can 1708 * send using any zoneid. 1709 */ 1710 if (zoneid == ALL_ZONES) 1711 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1712 else 1713 ii->ipsec_in_zoneid = zoneid; 1714 ipsec_mp->b_cont = mp; 1715 ip6h = (ip6_t *)mp->b_rptr; 1716 /* 1717 * Convert the IPSEC_IN to IPSEC_OUT. 1718 */ 1719 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1720 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1721 ill_refrele(ill); 1722 return; 1723 } 1724 } 1725 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1726 1727 if (v6src_ptr != NULL) { 1728 v6src = *v6src_ptr; 1729 } else { 1730 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1731 &v6src, zoneid) == NULL) { 1732 freemsg(ipsec_mp); 1733 ill_refrele(ill); 1734 return; 1735 } 1736 } 1737 v6dst = ip6h->ip6_src; 1738 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1739 msg_len = msgdsize(mp); 1740 if (msg_len > len_needed) { 1741 if (!adjmsg(mp, len_needed - msg_len)) { 1742 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1743 freemsg(ipsec_mp); 1744 ill_refrele(ill); 1745 return; 1746 } 1747 msg_len = len_needed; 1748 } 1749 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1750 if (mp1 == NULL) { 1751 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1752 freemsg(ipsec_mp); 1753 ill_refrele(ill); 1754 return; 1755 } 1756 ill_refrele(ill); 1757 mp1->b_cont = mp; 1758 mp = mp1; 1759 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1760 io->ipsec_out_type == IPSEC_OUT); 1761 ipsec_mp->b_cont = mp; 1762 1763 /* 1764 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1765 * node generates be accepted in peace by all on-host destinations. 1766 * If we do NOT assume that all on-host destinations trust 1767 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1768 * (Look for ipsec_out_icmp_loopback). 1769 */ 1770 io->ipsec_out_icmp_loopback = B_TRUE; 1771 1772 ip6h = (ip6_t *)mp->b_rptr; 1773 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1774 1775 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1776 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1777 ip6h->ip6_hops = ipv6_def_hops; 1778 ip6h->ip6_dst = v6dst; 1779 ip6h->ip6_src = v6src; 1780 msg_len += IPV6_HDR_LEN + len; 1781 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1782 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1783 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1784 } 1785 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1786 icmp6 = (icmp6_t *)&ip6h[1]; 1787 bcopy(stuff, (char *)icmp6, len); 1788 /* 1789 * Prepare for checksum by putting icmp length in the icmp 1790 * checksum field. The checksum is calculated in ip_wput_v6. 1791 */ 1792 icmp6->icmp6_cksum = ip6h->ip6_plen; 1793 if (icmp6->icmp6_type == ND_REDIRECT) { 1794 ip6h->ip6_hops = IPV6_MAX_HOPS; 1795 } 1796 /* Send to V6 writeside put routine */ 1797 put(q, ipsec_mp); 1798 } 1799 1800 /* 1801 * Update the output mib when ICMPv6 packets are sent. 1802 */ 1803 static void 1804 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1805 { 1806 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1807 1808 switch (icmp6->icmp6_type) { 1809 case ICMP6_DST_UNREACH: 1810 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1811 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1812 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1813 break; 1814 1815 case ICMP6_TIME_EXCEEDED: 1816 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1817 break; 1818 1819 case ICMP6_PARAM_PROB: 1820 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1821 break; 1822 1823 case ICMP6_PACKET_TOO_BIG: 1824 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1825 break; 1826 1827 case ICMP6_ECHO_REQUEST: 1828 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1829 break; 1830 1831 case ICMP6_ECHO_REPLY: 1832 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1833 break; 1834 1835 case ND_ROUTER_SOLICIT: 1836 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1837 break; 1838 1839 case ND_ROUTER_ADVERT: 1840 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1841 break; 1842 1843 case ND_NEIGHBOR_SOLICIT: 1844 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1845 break; 1846 1847 case ND_NEIGHBOR_ADVERT: 1848 BUMP_MIB(ill->ill_icmp6_mib, 1849 ipv6IfIcmpOutNeighborAdvertisements); 1850 break; 1851 1852 case ND_REDIRECT: 1853 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1854 break; 1855 1856 case MLD_LISTENER_QUERY: 1857 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1858 break; 1859 1860 case MLD_LISTENER_REPORT: 1861 case MLD_V2_LISTENER_REPORT: 1862 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1863 break; 1864 1865 case MLD_LISTENER_REDUCTION: 1866 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1867 break; 1868 } 1869 } 1870 1871 /* 1872 * Check if it is ok to send an ICMPv6 error packet in 1873 * response to the IP packet in mp. 1874 * Free the message and return null if no 1875 * ICMP error packet should be sent. 1876 */ 1877 static mblk_t * 1878 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1879 boolean_t llbcast, boolean_t mcast_ok) 1880 { 1881 ip6_t *ip6h; 1882 1883 if (!mp) 1884 return (NULL); 1885 1886 ip6h = (ip6_t *)mp->b_rptr; 1887 1888 /* Check if source address uniquely identifies the host */ 1889 1890 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1891 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1892 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1893 freemsg(mp); 1894 return (NULL); 1895 } 1896 1897 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1898 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1899 icmp6_t *icmp6; 1900 1901 if (mp->b_wptr - mp->b_rptr < len_needed) { 1902 if (!pullupmsg(mp, len_needed)) { 1903 ill_t *ill; 1904 1905 ill = ip_queue_to_ill_v6(q); 1906 if (ill == NULL) { 1907 BUMP_MIB(&icmp6_mib, 1908 ipv6IfIcmpInErrors); 1909 } else { 1910 BUMP_MIB(ill->ill_icmp6_mib, 1911 ipv6IfIcmpInErrors); 1912 ill_refrele(ill); 1913 } 1914 freemsg(mp); 1915 return (NULL); 1916 } 1917 ip6h = (ip6_t *)mp->b_rptr; 1918 } 1919 icmp6 = (icmp6_t *)&ip6h[1]; 1920 /* Explicitly do not generate errors in response to redirects */ 1921 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1922 icmp6->icmp6_type == ND_REDIRECT) { 1923 freemsg(mp); 1924 return (NULL); 1925 } 1926 } 1927 /* 1928 * Check that the destination is not multicast and that the packet 1929 * was not sent on link layer broadcast or multicast. (Exception 1930 * is Packet too big message as per the draft - when mcast_ok is set.) 1931 */ 1932 if (!mcast_ok && 1933 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1934 freemsg(mp); 1935 return (NULL); 1936 } 1937 if (icmp_err_rate_limit()) { 1938 /* 1939 * Only send ICMP error packets every so often. 1940 * This should be done on a per port/source basis, 1941 * but for now this will suffice. 1942 */ 1943 freemsg(mp); 1944 return (NULL); 1945 } 1946 return (mp); 1947 } 1948 1949 /* 1950 * Generate an ICMPv6 redirect message. 1951 * Include target link layer address option if it exits. 1952 * Always include redirect header. 1953 */ 1954 static void 1955 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1956 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1957 { 1958 nd_redirect_t *rd; 1959 nd_opt_rd_hdr_t *rdh; 1960 uchar_t *buf; 1961 nce_t *nce = NULL; 1962 nd_opt_hdr_t *opt; 1963 int len; 1964 int ll_opt_len = 0; 1965 int max_redir_hdr_data_len; 1966 int pkt_len; 1967 in6_addr_t *srcp; 1968 1969 /* 1970 * We are called from ip_rput where we could 1971 * not have attached an IPSEC_IN. 1972 */ 1973 ASSERT(mp->b_datap->db_type == M_DATA); 1974 1975 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1976 if (mp == NULL) 1977 return; 1978 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1979 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1980 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1981 ill->ill_phys_addr_length + 7)/8 * 8; 1982 } 1983 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1984 ASSERT(len % 4 == 0); 1985 buf = kmem_alloc(len, KM_NOSLEEP); 1986 if (buf == NULL) { 1987 if (nce != NULL) 1988 NCE_REFRELE(nce); 1989 freemsg(mp); 1990 return; 1991 } 1992 1993 rd = (nd_redirect_t *)buf; 1994 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1995 rd->nd_rd_code = 0; 1996 rd->nd_rd_reserved = 0; 1997 rd->nd_rd_target = *targetp; 1998 rd->nd_rd_dst = *dest; 1999 2000 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 2001 if (nce != NULL && ll_opt_len != 0) { 2002 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2003 opt->nd_opt_len = ll_opt_len/8; 2004 bcopy((char *)nce->nce_res_mp->b_rptr + 2005 NCE_LL_ADDR_OFFSET(ill), &opt[1], 2006 ill->ill_phys_addr_length); 2007 } 2008 if (nce != NULL) 2009 NCE_REFRELE(nce); 2010 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 2011 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 2012 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 2013 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 2014 pkt_len = msgdsize(mp); 2015 /* Make sure mp is 8 byte aligned */ 2016 if (pkt_len > max_redir_hdr_data_len) { 2017 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 2018 sizeof (nd_opt_rd_hdr_t))/8; 2019 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 2020 } else { 2021 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 2022 (void) adjmsg(mp, -(pkt_len % 8)); 2023 } 2024 rdh->nd_opt_rh_reserved1 = 0; 2025 rdh->nd_opt_rh_reserved2 = 0; 2026 /* ipif_v6src_addr contains the link-local source address */ 2027 rw_enter(&ill_g_lock, RW_READER); 2028 if (ill->ill_group != NULL) { 2029 /* 2030 * The receiver of the redirect will verify whether it 2031 * had a route through us (srcp that we will use in 2032 * the redirect) or not. As we load spread even link-locals, 2033 * we don't know which source address the receiver of 2034 * redirect has in its route for communicating with us. 2035 * Thus we randomly choose a source here and finally we 2036 * should get to the right one and it will eventually 2037 * accept the redirect from us. We can't call 2038 * ip_lookup_scope_v6 because we don't have the right 2039 * link-local address here. Thus we randomly choose one. 2040 */ 2041 int cnt = ill->ill_group->illgrp_ill_count; 2042 2043 ill = ill->ill_group->illgrp_ill; 2044 cnt = ++icmp_redirect_v6_src_index % cnt; 2045 while (cnt--) 2046 ill = ill->ill_group_next; 2047 srcp = &ill->ill_ipif->ipif_v6src_addr; 2048 } else { 2049 srcp = &ill->ill_ipif->ipif_v6src_addr; 2050 } 2051 rw_exit(&ill_g_lock); 2052 /* Redirects sent by router, and router is global zone */ 2053 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID); 2054 kmem_free(buf, len); 2055 } 2056 2057 2058 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 2059 void 2060 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 2061 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2062 { 2063 icmp6_t icmp6; 2064 boolean_t mctl_present; 2065 mblk_t *first_mp; 2066 2067 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2068 2069 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2070 if (mp == NULL) { 2071 if (mctl_present) 2072 freeb(first_mp); 2073 return; 2074 } 2075 bzero(&icmp6, sizeof (icmp6_t)); 2076 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2077 icmp6.icmp6_code = code; 2078 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2079 zoneid); 2080 } 2081 2082 /* 2083 * Generate an ICMP unreachable message. 2084 */ 2085 void 2086 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2087 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2088 { 2089 icmp6_t icmp6; 2090 boolean_t mctl_present; 2091 mblk_t *first_mp; 2092 2093 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2094 2095 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2096 if (mp == NULL) { 2097 if (mctl_present) 2098 freeb(first_mp); 2099 return; 2100 } 2101 bzero(&icmp6, sizeof (icmp6_t)); 2102 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2103 icmp6.icmp6_code = code; 2104 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2105 zoneid); 2106 } 2107 2108 /* 2109 * Generate an ICMP pkt too big message. 2110 */ 2111 static void 2112 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2113 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2114 { 2115 icmp6_t icmp6; 2116 mblk_t *first_mp; 2117 boolean_t mctl_present; 2118 2119 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2120 2121 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2122 if (mp == NULL) { 2123 if (mctl_present) 2124 freeb(first_mp); 2125 return; 2126 } 2127 bzero(&icmp6, sizeof (icmp6_t)); 2128 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2129 icmp6.icmp6_code = 0; 2130 icmp6.icmp6_mtu = htonl(mtu); 2131 2132 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2133 zoneid); 2134 } 2135 2136 /* 2137 * Generate an ICMP parameter problem message. (May be called as writer.) 2138 * 'offset' is the offset from the beginning of the packet in error. 2139 */ 2140 static void 2141 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2142 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2143 { 2144 icmp6_t icmp6; 2145 boolean_t mctl_present; 2146 mblk_t *first_mp; 2147 2148 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2149 2150 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2151 if (mp == NULL) { 2152 if (mctl_present) 2153 freeb(first_mp); 2154 return; 2155 } 2156 bzero((char *)&icmp6, sizeof (icmp6_t)); 2157 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2158 icmp6.icmp6_code = code; 2159 icmp6.icmp6_pptr = htonl(offset); 2160 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2161 zoneid); 2162 } 2163 2164 /* 2165 * This code will need to take into account the possibility of binding 2166 * to a link local address on a multi-homed host, in which case the 2167 * outgoing interface (from the conn) will need to be used when getting 2168 * an ire for the dst. Going through proper outgoing interface and 2169 * choosing the source address corresponding to the outgoing interface 2170 * is necessary when the destination address is a link-local address and 2171 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2172 * This can happen when active connection is setup; thus ipp pointer 2173 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2174 * pointer is passed as ipp pointer. 2175 */ 2176 mblk_t * 2177 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2178 { 2179 ssize_t len; 2180 int protocol; 2181 struct T_bind_req *tbr; 2182 sin6_t *sin6; 2183 ipa6_conn_t *ac6; 2184 in6_addr_t *v6srcp; 2185 in6_addr_t *v6dstp; 2186 uint16_t lport; 2187 uint16_t fport; 2188 uchar_t *ucp; 2189 mblk_t *mp1; 2190 boolean_t ire_requested; 2191 boolean_t ipsec_policy_set; 2192 int error = 0; 2193 boolean_t local_bind; 2194 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2195 ipa6_conn_x_t *acx6; 2196 boolean_t verify_dst; 2197 2198 ASSERT(connp->conn_af_isv6); 2199 len = mp->b_wptr - mp->b_rptr; 2200 if (len < (sizeof (*tbr) + 1)) { 2201 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2202 "ip_bind_v6: bogus msg, len %ld", len); 2203 goto bad_addr; 2204 } 2205 /* Back up and extract the protocol identifier. */ 2206 mp->b_wptr--; 2207 tbr = (struct T_bind_req *)mp->b_rptr; 2208 /* Reset the message type in preparation for shipping it back. */ 2209 mp->b_datap->db_type = M_PCPROTO; 2210 2211 protocol = *mp->b_wptr & 0xFF; 2212 connp->conn_ulp = (uint8_t)protocol; 2213 2214 /* 2215 * Check for a zero length address. This is from a protocol that 2216 * wants to register to receive all packets of its type. 2217 */ 2218 if (tbr->ADDR_length == 0) { 2219 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2220 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2221 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2222 /* 2223 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2224 * Do not allow others to bind to these. 2225 */ 2226 goto bad_addr; 2227 } 2228 2229 /* 2230 * 2231 * The udp module never sends down a zero-length address, 2232 * and allowing this on a labeled system will break MLP 2233 * functionality. 2234 */ 2235 if (is_system_labeled() && protocol == IPPROTO_UDP) 2236 goto bad_addr; 2237 2238 /* Allow ipsec plumbing */ 2239 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2240 protocol != IPPROTO_ESP) 2241 goto bad_addr; 2242 2243 connp->conn_srcv6 = ipv6_all_zeros; 2244 ipcl_proto_insert_v6(connp, protocol); 2245 2246 tbr->PRIM_type = T_BIND_ACK; 2247 return (mp); 2248 } 2249 2250 /* Extract the address pointer from the message. */ 2251 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2252 tbr->ADDR_length); 2253 if (ucp == NULL) { 2254 ip1dbg(("ip_bind_v6: no address\n")); 2255 goto bad_addr; 2256 } 2257 if (!OK_32PTR(ucp)) { 2258 ip1dbg(("ip_bind_v6: unaligned address\n")); 2259 goto bad_addr; 2260 } 2261 mp1 = mp->b_cont; /* trailing mp if any */ 2262 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2263 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2264 2265 switch (tbr->ADDR_length) { 2266 default: 2267 ip1dbg(("ip_bind_v6: bad address length %d\n", 2268 (int)tbr->ADDR_length)); 2269 goto bad_addr; 2270 2271 case IPV6_ADDR_LEN: 2272 /* Verification of local address only */ 2273 v6srcp = (in6_addr_t *)ucp; 2274 lport = 0; 2275 local_bind = B_TRUE; 2276 break; 2277 2278 case sizeof (sin6_t): 2279 sin6 = (sin6_t *)ucp; 2280 v6srcp = &sin6->sin6_addr; 2281 lport = sin6->sin6_port; 2282 local_bind = B_TRUE; 2283 break; 2284 2285 case sizeof (ipa6_conn_t): 2286 /* 2287 * Verify that both the source and destination addresses 2288 * are valid. 2289 * Note that we allow connect to broadcast and multicast 2290 * addresses when ire_requested is set. Thus the ULP 2291 * has to check for IRE_BROADCAST and multicast. 2292 */ 2293 ac6 = (ipa6_conn_t *)ucp; 2294 v6srcp = &ac6->ac6_laddr; 2295 v6dstp = &ac6->ac6_faddr; 2296 fport = ac6->ac6_fport; 2297 /* For raw socket, the local port is not set. */ 2298 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2299 connp->conn_lport; 2300 local_bind = B_FALSE; 2301 /* Always verify destination reachability. */ 2302 verify_dst = B_TRUE; 2303 break; 2304 2305 case sizeof (ipa6_conn_x_t): 2306 /* 2307 * Verify that the source address is valid. 2308 * Note that we allow connect to broadcast and multicast 2309 * addresses when ire_requested is set. Thus the ULP 2310 * has to check for IRE_BROADCAST and multicast. 2311 */ 2312 acx6 = (ipa6_conn_x_t *)ucp; 2313 ac6 = &acx6->ac6x_conn; 2314 v6srcp = &ac6->ac6_laddr; 2315 v6dstp = &ac6->ac6_faddr; 2316 fport = ac6->ac6_fport; 2317 lport = ac6->ac6_lport; 2318 local_bind = B_FALSE; 2319 /* 2320 * Client that passed ipa6_conn_x_t to us specifies whether to 2321 * verify destination reachability. 2322 */ 2323 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2324 break; 2325 } 2326 if (local_bind) { 2327 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2328 /* Bind to IPv4 address */ 2329 ipaddr_t v4src; 2330 2331 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2332 2333 error = ip_bind_laddr(connp, mp, v4src, lport, 2334 ire_requested, ipsec_policy_set, 2335 tbr->ADDR_length != IPV6_ADDR_LEN); 2336 if (error != 0) 2337 goto bad_addr; 2338 connp->conn_pkt_isv6 = B_FALSE; 2339 } else { 2340 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2341 error = 0; 2342 goto bad_addr; 2343 } 2344 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2345 ire_requested, ipsec_policy_set, 2346 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2347 if (error != 0) 2348 goto bad_addr; 2349 connp->conn_pkt_isv6 = B_TRUE; 2350 } 2351 if (protocol == IPPROTO_TCP) 2352 connp->conn_recv = tcp_conn_request; 2353 } else { 2354 /* 2355 * Bind to local and remote address. Local might be 2356 * unspecified in which case it will be extracted from 2357 * ire_src_addr_v6 2358 */ 2359 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2360 /* Connect to IPv4 address */ 2361 ipaddr_t v4src; 2362 ipaddr_t v4dst; 2363 2364 /* Is the source unspecified or mapped? */ 2365 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2366 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2367 ip1dbg(("ip_bind_v6: " 2368 "dst is mapped, but not the src\n")); 2369 goto bad_addr; 2370 } 2371 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2372 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2373 2374 /* 2375 * XXX Fix needed. Need to pass ipsec_policy_set 2376 * instead of B_FALSE. 2377 */ 2378 2379 /* Always verify destination reachability. */ 2380 error = ip_bind_connected(connp, mp, &v4src, lport, 2381 v4dst, fport, ire_requested, ipsec_policy_set, 2382 B_TRUE, B_TRUE); 2383 if (error != 0) 2384 goto bad_addr; 2385 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2386 connp->conn_pkt_isv6 = B_FALSE; 2387 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2388 ip1dbg(("ip_bind_v6: " 2389 "src is mapped, but not the dst\n")); 2390 goto bad_addr; 2391 } else { 2392 error = ip_bind_connected_v6(connp, mp, v6srcp, 2393 lport, v6dstp, ipp, fport, ire_requested, 2394 ipsec_policy_set, B_TRUE, verify_dst); 2395 if (error != 0) 2396 goto bad_addr; 2397 connp->conn_pkt_isv6 = B_TRUE; 2398 } 2399 if (protocol == IPPROTO_TCP) 2400 connp->conn_recv = tcp_input; 2401 } 2402 /* Update qinfo if v4/v6 changed */ 2403 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2404 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2405 if (connp->conn_pkt_isv6) 2406 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2407 else 2408 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2409 } 2410 2411 /* 2412 * Pass the IPSEC headers size in ire_ipsec_overhead. 2413 * We can't do this in ip_bind_insert_ire because the policy 2414 * may not have been inherited at that point in time and hence 2415 * conn_out_enforce_policy may not be set. 2416 */ 2417 mp1 = mp->b_cont; 2418 if (ire_requested && connp->conn_out_enforce_policy && 2419 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2420 ire_t *ire = (ire_t *)mp1->b_rptr; 2421 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2422 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2423 } 2424 2425 /* Send it home. */ 2426 mp->b_datap->db_type = M_PCPROTO; 2427 tbr->PRIM_type = T_BIND_ACK; 2428 return (mp); 2429 2430 bad_addr: 2431 if (error == EINPROGRESS) 2432 return (NULL); 2433 if (error > 0) 2434 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2435 else 2436 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2437 return (mp); 2438 } 2439 2440 /* 2441 * Here address is verified to be a valid local address. 2442 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2443 * address is also considered a valid local address. 2444 * In the case of a multicast address, however, the 2445 * upper protocol is expected to reset the src address 2446 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2447 * no packets are emitted with multicast address as 2448 * source address. 2449 * The addresses valid for bind are: 2450 * (1) - in6addr_any 2451 * (2) - IP address of an UP interface 2452 * (3) - IP address of a DOWN interface 2453 * (4) - a multicast address. In this case 2454 * the conn will only receive packets destined to 2455 * the specified multicast address. Note: the 2456 * application still has to issue an 2457 * IPV6_JOIN_GROUP socket option. 2458 * 2459 * In all the above cases, the bound address must be valid in the current zone. 2460 * When the address is loopback or multicast, there might be many matching IREs 2461 * so bind has to look up based on the zone. 2462 */ 2463 static int 2464 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2465 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2466 boolean_t fanout_insert) 2467 { 2468 int error = 0; 2469 ire_t *src_ire = NULL; 2470 ipif_t *ipif = NULL; 2471 mblk_t *policy_mp; 2472 zoneid_t zoneid; 2473 2474 if (ipsec_policy_set) 2475 policy_mp = mp->b_cont; 2476 2477 /* 2478 * If it was previously connected, conn_fully_bound would have 2479 * been set. 2480 */ 2481 connp->conn_fully_bound = B_FALSE; 2482 2483 zoneid = connp->conn_zoneid; 2484 2485 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2486 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2487 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2488 /* 2489 * If an address other than in6addr_any is requested, 2490 * we verify that it is a valid address for bind 2491 * Note: Following code is in if-else-if form for 2492 * readability compared to a condition check. 2493 */ 2494 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2495 if (IRE_IS_LOCAL(src_ire)) { 2496 /* 2497 * (2) Bind to address of local UP interface 2498 */ 2499 ipif = src_ire->ire_ipif; 2500 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2501 ipif_t *multi_ipif = NULL; 2502 ire_t *save_ire; 2503 /* 2504 * (4) bind to multicast address. 2505 * Fake out the IRE returned to upper 2506 * layer to be a broadcast IRE in 2507 * ip_bind_insert_ire_v6(). 2508 * Pass other information that matches 2509 * the ipif (e.g. the source address). 2510 * conn_multicast_ill is only used for 2511 * IPv6 packets 2512 */ 2513 mutex_enter(&connp->conn_lock); 2514 if (connp->conn_multicast_ill != NULL) { 2515 (void) ipif_lookup_zoneid( 2516 connp->conn_multicast_ill, zoneid, 0, 2517 &multi_ipif); 2518 } else { 2519 /* 2520 * Look for default like 2521 * ip_wput_v6 2522 */ 2523 multi_ipif = ipif_lookup_group_v6( 2524 &ipv6_unspecified_group, zoneid); 2525 } 2526 mutex_exit(&connp->conn_lock); 2527 save_ire = src_ire; 2528 src_ire = NULL; 2529 if (multi_ipif == NULL || !ire_requested || 2530 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2531 src_ire = save_ire; 2532 error = EADDRNOTAVAIL; 2533 } else { 2534 ASSERT(src_ire != NULL); 2535 if (save_ire != NULL) 2536 ire_refrele(save_ire); 2537 } 2538 if (multi_ipif != NULL) 2539 ipif_refrele(multi_ipif); 2540 } else { 2541 *mp->b_wptr++ = (char)connp->conn_ulp; 2542 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2543 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2544 if (ipif == NULL) { 2545 if (error == EINPROGRESS) { 2546 if (src_ire != NULL) 2547 ire_refrele(src_ire); 2548 return (error); 2549 } 2550 /* 2551 * Not a valid address for bind 2552 */ 2553 error = EADDRNOTAVAIL; 2554 } else { 2555 ipif_refrele(ipif); 2556 } 2557 /* 2558 * Just to keep it consistent with the processing in 2559 * ip_bind_v6(). 2560 */ 2561 mp->b_wptr--; 2562 } 2563 2564 if (error != 0) { 2565 /* Red Alert! Attempting to be a bogon! */ 2566 if (ip_debug > 2) { 2567 /* ip1dbg */ 2568 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2569 " address %s\n", AF_INET6, v6src); 2570 } 2571 goto bad_addr; 2572 } 2573 } 2574 2575 /* 2576 * Allow setting new policies. For example, disconnects come 2577 * down as ipa_t bind. As we would have set conn_policy_cached 2578 * to B_TRUE before, we should set it to B_FALSE, so that policy 2579 * can change after the disconnect. 2580 */ 2581 connp->conn_policy_cached = B_FALSE; 2582 2583 /* If not fanout_insert this was just an address verification */ 2584 if (fanout_insert) { 2585 /* 2586 * The addresses have been verified. Time to insert in 2587 * the correct fanout list. 2588 */ 2589 connp->conn_srcv6 = *v6src; 2590 connp->conn_remv6 = ipv6_all_zeros; 2591 connp->conn_lport = lport; 2592 connp->conn_fport = 0; 2593 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2594 } 2595 if (error == 0) { 2596 if (ire_requested) { 2597 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2598 error = -1; 2599 goto bad_addr; 2600 } 2601 } else if (ipsec_policy_set) { 2602 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2603 error = -1; 2604 goto bad_addr; 2605 } 2606 } 2607 } 2608 bad_addr: 2609 if (error != 0) { 2610 if (connp->conn_anon_port) { 2611 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2612 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2613 B_FALSE); 2614 } 2615 connp->conn_mlp_type = mlptSingle; 2616 } 2617 2618 if (src_ire != NULL) 2619 ire_refrele(src_ire); 2620 2621 if (ipsec_policy_set) { 2622 ASSERT(policy_mp != NULL); 2623 freeb(policy_mp); 2624 /* 2625 * As of now assume that nothing else accompanies 2626 * IPSEC_POLICY_SET. 2627 */ 2628 mp->b_cont = NULL; 2629 } 2630 return (error); 2631 } 2632 2633 /* ARGSUSED */ 2634 static void 2635 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2636 void *dummy_arg) 2637 { 2638 conn_t *connp = NULL; 2639 t_scalar_t prim; 2640 2641 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2642 2643 if (CONN_Q(q)) 2644 connp = Q_TO_CONN(q); 2645 ASSERT(connp != NULL); 2646 2647 prim = ((union T_primitives *)mp->b_rptr)->type; 2648 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2649 2650 if (IPCL_IS_TCP(connp)) { 2651 /* Pass sticky_ipp for scope_id and pktinfo */ 2652 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2653 } else { 2654 /* For UDP and ICMP */ 2655 mp = ip_bind_v6(q, mp, connp, NULL); 2656 } 2657 if (mp != NULL) { 2658 if (IPCL_IS_TCP(connp)) { 2659 CONN_INC_REF(connp); 2660 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2661 connp, SQTAG_TCP_RPUTOTHER); 2662 } else if (IPCL_IS_UDP(connp)) { 2663 udp_resume_bind(connp, mp); 2664 } else { 2665 qreply(q, mp); 2666 CONN_OPER_PENDING_DONE(connp); 2667 } 2668 } 2669 } 2670 2671 /* 2672 * Verify that both the source and destination addresses 2673 * are valid. If verify_dst, then destination address must also be reachable, 2674 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2675 * It takes ip6_pkt_t * as one of the arguments to determine correct 2676 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2677 * destination address. Note that parameter ipp is only useful for TCP connect 2678 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2679 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2680 * 2681 */ 2682 static int 2683 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2684 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2685 boolean_t ire_requested, boolean_t ipsec_policy_set, 2686 boolean_t fanout_insert, boolean_t verify_dst) 2687 { 2688 ire_t *src_ire; 2689 ire_t *dst_ire; 2690 int error = 0; 2691 int protocol; 2692 mblk_t *policy_mp; 2693 ire_t *sire = NULL; 2694 ire_t *md_dst_ire = NULL; 2695 ill_t *md_ill = NULL; 2696 ill_t *dst_ill = NULL; 2697 ipif_t *src_ipif = NULL; 2698 zoneid_t zoneid; 2699 boolean_t ill_held = B_FALSE; 2700 2701 src_ire = dst_ire = NULL; 2702 /* 2703 * NOTE: The protocol is beyond the wptr because that's how 2704 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2705 */ 2706 protocol = *mp->b_wptr & 0xFF; 2707 2708 /* 2709 * If we never got a disconnect before, clear it now. 2710 */ 2711 connp->conn_fully_bound = B_FALSE; 2712 2713 if (ipsec_policy_set) { 2714 policy_mp = mp->b_cont; 2715 } 2716 2717 zoneid = connp->conn_zoneid; 2718 2719 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2720 ipif_t *ipif; 2721 2722 /* 2723 * Use an "emulated" IRE_BROADCAST to tell the transport it 2724 * is a multicast. 2725 * Pass other information that matches 2726 * the ipif (e.g. the source address). 2727 * 2728 * conn_multicast_ill is only used for IPv6 packets 2729 */ 2730 mutex_enter(&connp->conn_lock); 2731 if (connp->conn_multicast_ill != NULL) { 2732 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2733 zoneid, 0, &ipif); 2734 } else { 2735 /* Look for default like ip_wput_v6 */ 2736 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2737 } 2738 mutex_exit(&connp->conn_lock); 2739 if (ipif == NULL || !ire_requested || 2740 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2741 if (ipif != NULL) 2742 ipif_refrele(ipif); 2743 if (ip_debug > 2) { 2744 /* ip1dbg */ 2745 pr_addr_dbg("ip_bind_connected_v6: bad " 2746 "connected multicast %s\n", AF_INET6, 2747 v6dst); 2748 } 2749 error = ENETUNREACH; 2750 goto bad_addr; 2751 } 2752 if (ipif != NULL) 2753 ipif_refrele(ipif); 2754 } else { 2755 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2756 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2757 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2758 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR); 2759 /* 2760 * We also prevent ire's with src address INADDR_ANY to 2761 * be used, which are created temporarily for 2762 * sending out packets from endpoints that have 2763 * conn_unspec_src set. 2764 */ 2765 if (dst_ire == NULL || 2766 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2767 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2768 /* 2769 * When verifying destination reachability, we always 2770 * complain. 2771 * 2772 * When not verifying destination reachability but we 2773 * found an IRE, i.e. the destination is reachable, 2774 * then the other tests still apply and we complain. 2775 */ 2776 if (verify_dst || (dst_ire != NULL)) { 2777 if (ip_debug > 2) { 2778 /* ip1dbg */ 2779 pr_addr_dbg("ip_bind_connected_v6: bad" 2780 " connected dst %s\n", AF_INET6, 2781 v6dst); 2782 } 2783 if (dst_ire == NULL || 2784 !(dst_ire->ire_type & IRE_HOST)) { 2785 error = ENETUNREACH; 2786 } else { 2787 error = EHOSTUNREACH; 2788 } 2789 goto bad_addr; 2790 } 2791 } 2792 } 2793 2794 /* 2795 * We now know that routing will allow us to reach the destination. 2796 * Check whether Trusted Solaris policy allows communication with this 2797 * host, and pretend that the destination is unreachable if not. 2798 * 2799 * This is never a problem for TCP, since that transport is known to 2800 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2801 * handling. If the remote is unreachable, it will be detected at that 2802 * point, so there's no reason to check it here. 2803 * 2804 * Note that for sendto (and other datagram-oriented friends), this 2805 * check is done as part of the data path label computation instead. 2806 * The check here is just to make non-TCP connect() report the right 2807 * error. 2808 */ 2809 if (dst_ire != NULL && is_system_labeled() && 2810 !IPCL_IS_TCP(connp) && 2811 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2812 connp->conn_mac_exempt) != 0) { 2813 error = EHOSTUNREACH; 2814 if (ip_debug > 2) { 2815 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2816 AF_INET6, v6dst); 2817 } 2818 goto bad_addr; 2819 } 2820 2821 /* 2822 * If the app does a connect(), it means that it will most likely 2823 * send more than 1 packet to the destination. It makes sense 2824 * to clear the temporary flag. 2825 */ 2826 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2827 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2828 irb_t *irb = dst_ire->ire_bucket; 2829 2830 rw_enter(&irb->irb_lock, RW_WRITER); 2831 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2832 irb->irb_tmp_ire_cnt--; 2833 rw_exit(&irb->irb_lock); 2834 } 2835 2836 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2837 2838 /* 2839 * See if we should notify ULP about MDT; we do this whether or not 2840 * ire_requested is TRUE, in order to handle active connects; MDT 2841 * eligibility tests for passive connects are handled separately 2842 * through tcp_adapt_ire(). We do this before the source address 2843 * selection, because dst_ire may change after a call to 2844 * ipif_select_source_v6(). This is a best-effort check, as the 2845 * packet for this connection may not actually go through 2846 * dst_ire->ire_stq, and the exact IRE can only be known after 2847 * calling ip_newroute_v6(). This is why we further check on the 2848 * IRE during Multidata packet transmission in tcp_multisend(). 2849 */ 2850 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2851 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2852 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2853 ILL_MDT_CAPABLE(md_ill)) { 2854 md_dst_ire = dst_ire; 2855 IRE_REFHOLD(md_dst_ire); 2856 } 2857 2858 if (dst_ire != NULL && 2859 dst_ire->ire_type == IRE_LOCAL && 2860 dst_ire->ire_zoneid != zoneid && 2861 dst_ire->ire_zoneid != ALL_ZONES) { 2862 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2863 zoneid, 0, NULL, 2864 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2865 MATCH_IRE_RJ_BHOLE); 2866 if (src_ire == NULL) { 2867 error = EHOSTUNREACH; 2868 goto bad_addr; 2869 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2870 if (!(src_ire->ire_type & IRE_HOST)) 2871 error = ENETUNREACH; 2872 else 2873 error = EHOSTUNREACH; 2874 goto bad_addr; 2875 } 2876 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2877 src_ipif = src_ire->ire_ipif; 2878 ipif_refhold(src_ipif); 2879 *v6src = src_ipif->ipif_v6lcl_addr; 2880 } 2881 ire_refrele(src_ire); 2882 src_ire = NULL; 2883 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2884 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2885 *v6src = sire->ire_src_addr_v6; 2886 ire_refrele(dst_ire); 2887 dst_ire = sire; 2888 sire = NULL; 2889 } else if (dst_ire->ire_type == IRE_CACHE && 2890 (dst_ire->ire_flags & RTF_SETSRC)) { 2891 ASSERT(dst_ire->ire_zoneid == zoneid || 2892 dst_ire->ire_zoneid == ALL_ZONES); 2893 *v6src = dst_ire->ire_src_addr_v6; 2894 } else { 2895 /* 2896 * Pick a source address so that a proper inbound load 2897 * spreading would happen. Use dst_ill specified by the 2898 * app. when socket option or scopeid is set. 2899 */ 2900 int err; 2901 2902 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2903 uint_t if_index; 2904 2905 /* 2906 * Scope id or IPV6_PKTINFO 2907 */ 2908 2909 if_index = ipp->ipp_ifindex; 2910 dst_ill = ill_lookup_on_ifindex( 2911 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2912 if (dst_ill == NULL) { 2913 ip1dbg(("ip_bind_connected_v6:" 2914 " bad ifindex %d\n", if_index)); 2915 error = EADDRNOTAVAIL; 2916 goto bad_addr; 2917 } 2918 ill_held = B_TRUE; 2919 } else if (connp->conn_outgoing_ill != NULL) { 2920 /* 2921 * For IPV6_BOUND_IF socket option, 2922 * conn_outgoing_ill should be set 2923 * already in TCP or UDP/ICMP. 2924 */ 2925 dst_ill = conn_get_held_ill(connp, 2926 &connp->conn_outgoing_ill, &err); 2927 if (err == ILL_LOOKUP_FAILED) { 2928 ip1dbg(("ip_bind_connected_v6:" 2929 "no ill for bound_if\n")); 2930 error = EADDRNOTAVAIL; 2931 goto bad_addr; 2932 } 2933 ill_held = B_TRUE; 2934 } else if (dst_ire->ire_stq != NULL) { 2935 /* No need to hold ill here */ 2936 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2937 } else { 2938 /* No need to hold ill here */ 2939 dst_ill = dst_ire->ire_ipif->ipif_ill; 2940 } 2941 if (!ip6_asp_can_lookup()) { 2942 *mp->b_wptr++ = (char)protocol; 2943 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2944 ip_bind_connected_resume_v6); 2945 error = EINPROGRESS; 2946 goto refrele_and_quit; 2947 } 2948 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2949 RESTRICT_TO_NONE, connp->conn_src_preferences, 2950 zoneid); 2951 ip6_asp_table_refrele(); 2952 if (src_ipif == NULL) { 2953 pr_addr_dbg("ip_bind_connected_v6: " 2954 "no usable source address for " 2955 "connection to %s\n", AF_INET6, v6dst); 2956 error = EADDRNOTAVAIL; 2957 goto bad_addr; 2958 } 2959 *v6src = src_ipif->ipif_v6lcl_addr; 2960 } 2961 } 2962 2963 /* 2964 * We do ire_route_lookup_v6() here (and not an interface lookup) 2965 * as we assert that v6src should only come from an 2966 * UP interface for hard binding. 2967 */ 2968 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2969 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2970 2971 /* src_ire must be a local|loopback */ 2972 if (!IRE_IS_LOCAL(src_ire)) { 2973 if (ip_debug > 2) { 2974 /* ip1dbg */ 2975 pr_addr_dbg("ip_bind_connected_v6: bad " 2976 "connected src %s\n", AF_INET6, v6src); 2977 } 2978 error = EADDRNOTAVAIL; 2979 goto bad_addr; 2980 } 2981 2982 /* 2983 * If the source address is a loopback address, the 2984 * destination had best be local or multicast. 2985 * The transports that can't handle multicast will reject 2986 * those addresses. 2987 */ 2988 if (src_ire->ire_type == IRE_LOOPBACK && 2989 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2990 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2991 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2992 error = -1; 2993 goto bad_addr; 2994 } 2995 /* 2996 * Allow setting new policies. For example, disconnects come 2997 * down as ipa_t bind. As we would have set conn_policy_cached 2998 * to B_TRUE before, we should set it to B_FALSE, so that policy 2999 * can change after the disconnect. 3000 */ 3001 connp->conn_policy_cached = B_FALSE; 3002 3003 /* 3004 * The addresses have been verified. Initialize the conn 3005 * before calling the policy as they expect the conns 3006 * initialized. 3007 */ 3008 connp->conn_srcv6 = *v6src; 3009 connp->conn_remv6 = *v6dst; 3010 connp->conn_lport = lport; 3011 connp->conn_fport = fport; 3012 3013 ASSERT(!(ipsec_policy_set && ire_requested)); 3014 if (ire_requested) { 3015 iulp_t *ulp_info = NULL; 3016 3017 /* 3018 * Note that sire will not be NULL if this is an off-link 3019 * connection and there is not cache for that dest yet. 3020 * 3021 * XXX Because of an existing bug, if there are multiple 3022 * default routes, the IRE returned now may not be the actual 3023 * default route used (default routes are chosen in a 3024 * round robin fashion). So if the metrics for different 3025 * default routes are different, we may return the wrong 3026 * metrics. This will not be a problem if the existing 3027 * bug is fixed. 3028 */ 3029 if (sire != NULL) 3030 ulp_info = &(sire->ire_uinfo); 3031 3032 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 3033 error = -1; 3034 goto bad_addr; 3035 } 3036 } else if (ipsec_policy_set) { 3037 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 3038 error = -1; 3039 goto bad_addr; 3040 } 3041 } 3042 3043 /* 3044 * Cache IPsec policy in this conn. If we have per-socket policy, 3045 * we'll cache that. If we don't, we'll inherit global policy. 3046 * 3047 * We can't insert until the conn reflects the policy. Note that 3048 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 3049 * connections where we don't have a policy. This is to prevent 3050 * global policy lookups in the inbound path. 3051 * 3052 * If we insert before we set conn_policy_cached, 3053 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 3054 * because global policy cound be non-empty. We normally call 3055 * ipsec_check_policy() for conn_policy_cached connections only if 3056 * conn_in_enforce_policy is set. But in this case, 3057 * conn_policy_cached can get set anytime since we made the 3058 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 3059 * is called, which will make the above assumption false. Thus, we 3060 * need to insert after we set conn_policy_cached. 3061 */ 3062 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 3063 goto bad_addr; 3064 3065 /* If not fanout_insert this was just an address verification */ 3066 if (fanout_insert) { 3067 /* 3068 * The addresses have been verified. Time to insert in 3069 * the correct fanout list. 3070 */ 3071 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 3072 connp->conn_ports, 3073 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 3074 } 3075 if (error == 0) { 3076 connp->conn_fully_bound = B_TRUE; 3077 /* 3078 * Our initial checks for MDT have passed; the IRE is not 3079 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 3080 * be supporting MDT. Pass the IRE, IPC and ILL into 3081 * ip_mdinfo_return(), which performs further checks 3082 * against them and upon success, returns the MDT info 3083 * mblk which we will attach to the bind acknowledgment. 3084 */ 3085 if (md_dst_ire != NULL) { 3086 mblk_t *mdinfo_mp; 3087 3088 ASSERT(md_ill != NULL); 3089 ASSERT(md_ill->ill_mdt_capab != NULL); 3090 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 3091 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 3092 linkb(mp, mdinfo_mp); 3093 } 3094 } 3095 bad_addr: 3096 if (ipsec_policy_set) { 3097 ASSERT(policy_mp != NULL); 3098 freeb(policy_mp); 3099 /* 3100 * As of now assume that nothing else accompanies 3101 * IPSEC_POLICY_SET. 3102 */ 3103 mp->b_cont = NULL; 3104 } 3105 refrele_and_quit: 3106 if (src_ire != NULL) 3107 IRE_REFRELE(src_ire); 3108 if (dst_ire != NULL) 3109 IRE_REFRELE(dst_ire); 3110 if (sire != NULL) 3111 IRE_REFRELE(sire); 3112 if (src_ipif != NULL) 3113 ipif_refrele(src_ipif); 3114 if (md_dst_ire != NULL) 3115 IRE_REFRELE(md_dst_ire); 3116 if (ill_held && dst_ill != NULL) 3117 ill_refrele(dst_ill); 3118 return (error); 3119 } 3120 3121 /* 3122 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3123 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3124 */ 3125 static boolean_t 3126 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3127 iulp_t *ulp_info) 3128 { 3129 mblk_t *mp1; 3130 ire_t *ret_ire; 3131 3132 mp1 = mp->b_cont; 3133 ASSERT(mp1 != NULL); 3134 3135 if (ire != NULL) { 3136 /* 3137 * mp1 initialized above to IRE_DB_REQ_TYPE 3138 * appended mblk. Its <upper protocol>'s 3139 * job to make sure there is room. 3140 */ 3141 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3142 return (B_FALSE); 3143 3144 mp1->b_datap->db_type = IRE_DB_TYPE; 3145 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3146 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3147 ret_ire = (ire_t *)mp1->b_rptr; 3148 if (IN6_IS_ADDR_MULTICAST(dst) || 3149 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3150 ret_ire->ire_type = IRE_BROADCAST; 3151 ret_ire->ire_addr_v6 = *dst; 3152 } 3153 if (ulp_info != NULL) { 3154 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3155 sizeof (iulp_t)); 3156 } 3157 ret_ire->ire_mp = mp1; 3158 } else { 3159 /* 3160 * No IRE was found. Remove IRE mblk. 3161 */ 3162 mp->b_cont = mp1->b_cont; 3163 freeb(mp1); 3164 } 3165 return (B_TRUE); 3166 } 3167 3168 /* 3169 * Add an ip6i_t header to the front of the mblk. 3170 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3171 * Returns NULL if allocation fails (and frees original message). 3172 * Used in outgoing path when going through ip_newroute_*v6(). 3173 * Used in incoming path to pass ifindex to transports. 3174 */ 3175 mblk_t * 3176 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3177 { 3178 mblk_t *mp1; 3179 ip6i_t *ip6i; 3180 ip6_t *ip6h; 3181 3182 ip6h = (ip6_t *)mp->b_rptr; 3183 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3184 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3185 mp->b_datap->db_ref > 1) { 3186 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3187 if (mp1 == NULL) { 3188 freemsg(mp); 3189 return (NULL); 3190 } 3191 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3192 mp1->b_cont = mp; 3193 mp = mp1; 3194 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3195 } 3196 mp->b_rptr = (uchar_t *)ip6i; 3197 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3198 ip6i->ip6i_nxt = IPPROTO_RAW; 3199 if (ill != NULL) { 3200 ip6i->ip6i_flags = IP6I_IFINDEX; 3201 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3202 } else { 3203 ip6i->ip6i_flags = 0; 3204 } 3205 ip6i->ip6i_nexthop = *dst; 3206 return (mp); 3207 } 3208 3209 /* 3210 * Handle protocols with which IP is less intimate. There 3211 * can be more than one stream bound to a particular 3212 * protocol. When this is the case, normally each one gets a copy 3213 * of any incoming packets. 3214 * However, if the packet was tunneled and not multicast we only send to it 3215 * the first match. 3216 * 3217 * Zones notes: 3218 * Packets will be distributed to streams in all zones. This is really only 3219 * useful for ICMPv6 as only applications in the global zone can create raw 3220 * sockets for other protocols. 3221 */ 3222 static void 3223 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3224 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3225 boolean_t mctl_present, zoneid_t zoneid) 3226 { 3227 queue_t *rq; 3228 mblk_t *mp1, *first_mp1; 3229 in6_addr_t dst = ip6h->ip6_dst; 3230 in6_addr_t src = ip6h->ip6_src; 3231 boolean_t one_only; 3232 mblk_t *first_mp = mp; 3233 boolean_t secure, shared_addr; 3234 conn_t *connp, *first_connp, *next_connp; 3235 connf_t *connfp; 3236 3237 if (mctl_present) { 3238 mp = first_mp->b_cont; 3239 secure = ipsec_in_is_secure(first_mp); 3240 ASSERT(mp != NULL); 3241 } else { 3242 secure = B_FALSE; 3243 } 3244 3245 /* 3246 * If the packet was tunneled and not multicast we only send to it 3247 * the first match. 3248 */ 3249 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3250 !IN6_IS_ADDR_MULTICAST(&dst)); 3251 3252 shared_addr = (zoneid == ALL_ZONES); 3253 if (shared_addr) { 3254 /* 3255 * We don't allow multilevel ports for raw IP, so no need to 3256 * check for that here. 3257 */ 3258 zoneid = tsol_packet_to_zoneid(mp); 3259 } 3260 3261 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3262 mutex_enter(&connfp->connf_lock); 3263 connp = connfp->connf_head; 3264 for (connp = connfp->connf_head; connp != NULL; 3265 connp = connp->conn_next) { 3266 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3267 zoneid) && 3268 (!is_system_labeled() || 3269 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3270 connp))) 3271 break; 3272 } 3273 3274 if (connp == NULL || connp->conn_upq == NULL) { 3275 /* 3276 * No one bound to this port. Is 3277 * there a client that wants all 3278 * unclaimed datagrams? 3279 */ 3280 mutex_exit(&connfp->connf_lock); 3281 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3282 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3283 nexthdr_offset, mctl_present, zoneid)) { 3284 BUMP_MIB(ill->ill_ip6_mib, ipv6InUnknownProtos); 3285 } 3286 3287 return; 3288 } 3289 3290 CONN_INC_REF(connp); 3291 first_connp = connp; 3292 3293 /* 3294 * XXX: Fix the multiple protocol listeners case. We should not 3295 * be walking the conn->next list here. 3296 */ 3297 if (one_only) { 3298 /* 3299 * Only send message to one tunnel driver by immediately 3300 * terminating the loop. 3301 */ 3302 connp = NULL; 3303 } else { 3304 connp = connp->conn_next; 3305 3306 } 3307 for (;;) { 3308 while (connp != NULL) { 3309 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3310 flags, zoneid) && 3311 (!is_system_labeled() || 3312 tsol_receive_local(mp, &dst, IPV6_VERSION, 3313 shared_addr, connp))) 3314 break; 3315 connp = connp->conn_next; 3316 } 3317 3318 /* 3319 * Just copy the data part alone. The mctl part is 3320 * needed just for verifying policy and it is never 3321 * sent up. 3322 */ 3323 if (connp == NULL || connp->conn_upq == NULL || 3324 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3325 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3326 /* 3327 * No more intested clients or memory 3328 * allocation failed 3329 */ 3330 connp = first_connp; 3331 break; 3332 } 3333 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3334 CONN_INC_REF(connp); 3335 mutex_exit(&connfp->connf_lock); 3336 rq = connp->conn_rq; 3337 /* 3338 * For link-local always add ifindex so that transport can set 3339 * sin6_scope_id. Avoid it for ICMP error fanout. 3340 */ 3341 if ((connp->conn_ipv6_recvpktinfo || 3342 IN6_IS_ADDR_LINKLOCAL(&src)) && 3343 (flags & IP_FF_IP6INFO)) { 3344 /* Add header */ 3345 mp1 = ip_add_info_v6(mp1, inill, &dst); 3346 } 3347 if (mp1 == NULL) { 3348 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3349 } else if (!canputnext(rq)) { 3350 if (flags & IP_FF_RAWIP) { 3351 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3352 } else { 3353 BUMP_MIB(ill->ill_icmp6_mib, 3354 ipv6IfIcmpInOverflows); 3355 } 3356 3357 freemsg(mp1); 3358 } else { 3359 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3360 first_mp1 = ipsec_check_inbound_policy 3361 (first_mp1, connp, NULL, ip6h, 3362 mctl_present); 3363 } 3364 if (first_mp1 != NULL) { 3365 if (mctl_present) 3366 freeb(first_mp1); 3367 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3368 putnext(rq, mp1); 3369 } 3370 } 3371 mutex_enter(&connfp->connf_lock); 3372 /* Follow the next pointer before releasing the conn. */ 3373 next_connp = connp->conn_next; 3374 CONN_DEC_REF(connp); 3375 connp = next_connp; 3376 } 3377 3378 /* Last one. Send it upstream. */ 3379 mutex_exit(&connfp->connf_lock); 3380 3381 /* Initiate IPPF processing */ 3382 if (IP6_IN_IPP(flags)) { 3383 uint_t ifindex; 3384 3385 mutex_enter(&ill->ill_lock); 3386 ifindex = ill->ill_phyint->phyint_ifindex; 3387 mutex_exit(&ill->ill_lock); 3388 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3389 if (mp == NULL) { 3390 CONN_DEC_REF(connp); 3391 if (mctl_present) 3392 freeb(first_mp); 3393 return; 3394 } 3395 } 3396 3397 /* 3398 * For link-local always add ifindex so that transport can set 3399 * sin6_scope_id. Avoid it for ICMP error fanout. 3400 */ 3401 if ((connp->conn_ipv6_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3402 (flags & IP_FF_IP6INFO)) { 3403 /* Add header */ 3404 mp = ip_add_info_v6(mp, inill, &dst); 3405 if (mp == NULL) { 3406 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3407 CONN_DEC_REF(connp); 3408 if (mctl_present) 3409 freeb(first_mp); 3410 return; 3411 } else if (mctl_present) { 3412 first_mp->b_cont = mp; 3413 } else { 3414 first_mp = mp; 3415 } 3416 } 3417 3418 rq = connp->conn_rq; 3419 if (!canputnext(rq)) { 3420 if (flags & IP_FF_RAWIP) { 3421 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3422 } else { 3423 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3424 } 3425 3426 freemsg(first_mp); 3427 } else { 3428 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3429 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3430 NULL, ip6h, mctl_present); 3431 if (first_mp == NULL) { 3432 CONN_DEC_REF(connp); 3433 return; 3434 } 3435 } 3436 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3437 putnext(rq, mp); 3438 if (mctl_present) 3439 freeb(first_mp); 3440 } 3441 CONN_DEC_REF(connp); 3442 } 3443 3444 /* 3445 * Send an ICMP error after patching up the packet appropriately. Returns 3446 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3447 */ 3448 int 3449 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3450 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3451 boolean_t mctl_present, zoneid_t zoneid) 3452 { 3453 ip6_t *ip6h; 3454 mblk_t *first_mp; 3455 boolean_t secure; 3456 unsigned char db_type; 3457 3458 first_mp = mp; 3459 if (mctl_present) { 3460 mp = mp->b_cont; 3461 secure = ipsec_in_is_secure(first_mp); 3462 ASSERT(mp != NULL); 3463 } else { 3464 /* 3465 * If this is an ICMP error being reported - which goes 3466 * up as M_CTLs, we need to convert them to M_DATA till 3467 * we finish checking with global policy because 3468 * ipsec_check_global_policy() assumes M_DATA as clear 3469 * and M_CTL as secure. 3470 */ 3471 db_type = mp->b_datap->db_type; 3472 mp->b_datap->db_type = M_DATA; 3473 secure = B_FALSE; 3474 } 3475 /* 3476 * We are generating an icmp error for some inbound packet. 3477 * Called from all ip_fanout_(udp, tcp, proto) functions. 3478 * Before we generate an error, check with global policy 3479 * to see whether this is allowed to enter the system. As 3480 * there is no "conn", we are checking with global policy. 3481 */ 3482 ip6h = (ip6_t *)mp->b_rptr; 3483 if (secure || ipsec_inbound_v6_policy_present) { 3484 first_mp = ipsec_check_global_policy(first_mp, NULL, 3485 NULL, ip6h, mctl_present); 3486 if (first_mp == NULL) 3487 return (0); 3488 } 3489 3490 if (!mctl_present) 3491 mp->b_datap->db_type = db_type; 3492 3493 if (flags & IP_FF_SEND_ICMP) { 3494 if (flags & IP_FF_HDR_COMPLETE) { 3495 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3496 freemsg(first_mp); 3497 return (1); 3498 } 3499 } 3500 switch (icmp_type) { 3501 case ICMP6_DST_UNREACH: 3502 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3503 B_FALSE, B_FALSE, zoneid); 3504 break; 3505 case ICMP6_PARAM_PROB: 3506 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3507 nexthdr_offset, B_FALSE, B_FALSE, zoneid); 3508 break; 3509 default: 3510 #ifdef DEBUG 3511 panic("ip_fanout_send_icmp_v6: wrong type"); 3512 /*NOTREACHED*/ 3513 #else 3514 freemsg(first_mp); 3515 break; 3516 #endif 3517 } 3518 } else { 3519 freemsg(first_mp); 3520 return (0); 3521 } 3522 3523 return (1); 3524 } 3525 3526 3527 /* 3528 * Fanout for TCP packets 3529 * The caller puts <fport, lport> in the ports parameter. 3530 */ 3531 static void 3532 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3533 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3534 { 3535 mblk_t *first_mp; 3536 boolean_t secure; 3537 conn_t *connp; 3538 tcph_t *tcph; 3539 boolean_t syn_present = B_FALSE; 3540 3541 first_mp = mp; 3542 if (mctl_present) { 3543 mp = first_mp->b_cont; 3544 secure = ipsec_in_is_secure(first_mp); 3545 ASSERT(mp != NULL); 3546 } else { 3547 secure = B_FALSE; 3548 } 3549 3550 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3551 3552 if (connp == NULL || 3553 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3554 /* 3555 * No hard-bound match. Send Reset. 3556 */ 3557 dblk_t *dp = mp->b_datap; 3558 uint32_t ill_index; 3559 3560 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3561 3562 /* Initiate IPPf processing, if needed. */ 3563 if (IPP_ENABLED(IPP_LOCAL_IN) && (flags & IP6_NO_IPPOLICY)) { 3564 ill_index = ill->ill_phyint->phyint_ifindex; 3565 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3566 if (first_mp == NULL) { 3567 if (connp != NULL) 3568 CONN_DEC_REF(connp); 3569 return; 3570 } 3571 } 3572 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3573 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid); 3574 if (connp != NULL) 3575 CONN_DEC_REF(connp); 3576 return; 3577 } 3578 3579 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3580 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3581 if (connp->conn_flags & IPCL_TCP) { 3582 squeue_t *sqp; 3583 3584 /* 3585 * For fused tcp loopback, assign the eager's 3586 * squeue to be that of the active connect's. 3587 */ 3588 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3589 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3590 !IP6_IN_IPP(flags)) { 3591 ASSERT(Q_TO_CONN(q) != NULL); 3592 sqp = Q_TO_CONN(q)->conn_sqp; 3593 } else { 3594 sqp = IP_SQUEUE_GET(lbolt); 3595 } 3596 3597 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3598 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3599 3600 /* 3601 * db_cksumstuff is unused in the incoming 3602 * path; Thus store the ifindex here. It will 3603 * be cleared in tcp_conn_create_v6(). 3604 */ 3605 DB_CKSUMSTUFF(mp) = 3606 (intptr_t)ill->ill_phyint->phyint_ifindex; 3607 syn_present = B_TRUE; 3608 } 3609 } 3610 3611 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3612 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3613 if ((flags & TH_RST) || (flags & TH_URG)) { 3614 CONN_DEC_REF(connp); 3615 freemsg(first_mp); 3616 return; 3617 } 3618 if (flags & TH_ACK) { 3619 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid); 3620 CONN_DEC_REF(connp); 3621 return; 3622 } 3623 3624 CONN_DEC_REF(connp); 3625 freemsg(first_mp); 3626 return; 3627 } 3628 3629 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3630 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3631 NULL, ip6h, mctl_present); 3632 if (first_mp == NULL) { 3633 CONN_DEC_REF(connp); 3634 return; 3635 } 3636 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3637 ASSERT(syn_present); 3638 if (mctl_present) { 3639 ASSERT(first_mp != mp); 3640 first_mp->b_datap->db_struioflag |= 3641 STRUIO_POLICY; 3642 } else { 3643 ASSERT(first_mp == mp); 3644 mp->b_datap->db_struioflag &= 3645 ~STRUIO_EAGER; 3646 mp->b_datap->db_struioflag |= 3647 STRUIO_POLICY; 3648 } 3649 } else { 3650 /* 3651 * Discard first_mp early since we're dealing with a 3652 * fully-connected conn_t and tcp doesn't do policy in 3653 * this case. Also, if someone is bound to IPPROTO_TCP 3654 * over raw IP, they don't expect to see a M_CTL. 3655 */ 3656 if (mctl_present) { 3657 freeb(first_mp); 3658 mctl_present = B_FALSE; 3659 } 3660 first_mp = mp; 3661 } 3662 } 3663 3664 /* Initiate IPPF processing */ 3665 if (IP6_IN_IPP(flags)) { 3666 uint_t ifindex; 3667 3668 mutex_enter(&ill->ill_lock); 3669 ifindex = ill->ill_phyint->phyint_ifindex; 3670 mutex_exit(&ill->ill_lock); 3671 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3672 if (mp == NULL) { 3673 CONN_DEC_REF(connp); 3674 if (mctl_present) { 3675 freeb(first_mp); 3676 } 3677 return; 3678 } else if (mctl_present) { 3679 /* 3680 * ip_add_info_v6 might return a new mp. 3681 */ 3682 ASSERT(first_mp != mp); 3683 first_mp->b_cont = mp; 3684 } else { 3685 first_mp = mp; 3686 } 3687 } 3688 3689 /* 3690 * For link-local always add ifindex so that TCP can bind to that 3691 * interface. Avoid it for ICMP error fanout. 3692 */ 3693 if (!syn_present && ((connp->conn_ipv6_recvpktinfo || 3694 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3695 (flags & IP_FF_IP6INFO))) { 3696 /* Add header */ 3697 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3698 if (mp == NULL) { 3699 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3700 CONN_DEC_REF(connp); 3701 if (mctl_present) 3702 freeb(first_mp); 3703 return; 3704 } else if (mctl_present) { 3705 ASSERT(first_mp != mp); 3706 first_mp->b_cont = mp; 3707 } else { 3708 first_mp = mp; 3709 } 3710 } 3711 3712 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3713 if (IPCL_IS_TCP(connp)) { 3714 (*ip_input_proc)(connp->conn_sqp, first_mp, 3715 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3716 } else { 3717 putnext(connp->conn_rq, first_mp); 3718 CONN_DEC_REF(connp); 3719 } 3720 } 3721 3722 /* 3723 * Fanout for UDP packets. 3724 * The caller puts <fport, lport> in the ports parameter. 3725 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3726 * 3727 * If SO_REUSEADDR is set all multicast and broadcast packets 3728 * will be delivered to all streams bound to the same port. 3729 * 3730 * Zones notes: 3731 * Multicast packets will be distributed to streams in all zones. 3732 */ 3733 static void 3734 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3735 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3736 zoneid_t zoneid) 3737 { 3738 uint32_t dstport, srcport; 3739 in6_addr_t dst; 3740 mblk_t *first_mp; 3741 boolean_t secure; 3742 conn_t *connp; 3743 connf_t *connfp; 3744 conn_t *first_conn; 3745 conn_t *next_conn; 3746 mblk_t *mp1, *first_mp1; 3747 in6_addr_t src; 3748 boolean_t shared_addr; 3749 3750 first_mp = mp; 3751 if (mctl_present) { 3752 mp = first_mp->b_cont; 3753 secure = ipsec_in_is_secure(first_mp); 3754 ASSERT(mp != NULL); 3755 } else { 3756 secure = B_FALSE; 3757 } 3758 3759 /* Extract ports in net byte order */ 3760 dstport = htons(ntohl(ports) & 0xFFFF); 3761 srcport = htons(ntohl(ports) >> 16); 3762 dst = ip6h->ip6_dst; 3763 src = ip6h->ip6_src; 3764 3765 shared_addr = (zoneid == ALL_ZONES); 3766 if (shared_addr) { 3767 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3768 /* 3769 * If no shared MLP is found, tsol_mlp_findzone returns 3770 * ALL_ZONES. In that case, we assume it's SLP, and 3771 * search for the zone based on the packet label. 3772 * That will also return ALL_ZONES on failure, but 3773 * we never allow conn_zoneid to be set to ALL_ZONES. 3774 */ 3775 if (zoneid == ALL_ZONES) 3776 zoneid = tsol_packet_to_zoneid(mp); 3777 } 3778 3779 /* Attempt to find a client stream based on destination port. */ 3780 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3781 mutex_enter(&connfp->connf_lock); 3782 connp = connfp->connf_head; 3783 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3784 /* 3785 * Not multicast. Send to the one (first) client we find. 3786 */ 3787 while (connp != NULL) { 3788 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3789 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3790 conn_wantpacket_v6(connp, ill, ip6h, 3791 flags, zoneid)) { 3792 break; 3793 } 3794 connp = connp->conn_next; 3795 } 3796 if (connp == NULL || connp->conn_upq == NULL) 3797 goto notfound; 3798 3799 if (is_system_labeled() && 3800 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3801 connp)) 3802 goto notfound; 3803 3804 /* Found a client */ 3805 CONN_INC_REF(connp); 3806 mutex_exit(&connfp->connf_lock); 3807 3808 if (CONN_UDP_FLOWCTLD(connp)) { 3809 freemsg(first_mp); 3810 CONN_DEC_REF(connp); 3811 return; 3812 } 3813 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3814 first_mp = ipsec_check_inbound_policy(first_mp, 3815 connp, NULL, ip6h, mctl_present); 3816 if (first_mp == NULL) { 3817 CONN_DEC_REF(connp); 3818 return; 3819 } 3820 } 3821 /* Initiate IPPF processing */ 3822 if (IP6_IN_IPP(flags)) { 3823 uint_t ifindex; 3824 3825 mutex_enter(&ill->ill_lock); 3826 ifindex = ill->ill_phyint->phyint_ifindex; 3827 mutex_exit(&ill->ill_lock); 3828 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3829 if (mp == NULL) { 3830 CONN_DEC_REF(connp); 3831 if (mctl_present) 3832 freeb(first_mp); 3833 return; 3834 } 3835 } 3836 /* 3837 * For link-local always add ifindex so that 3838 * transport can set sin6_scope_id. Avoid it for 3839 * ICMP error fanout. 3840 */ 3841 if ((connp->conn_ipv6_recvpktinfo || 3842 IN6_IS_ADDR_LINKLOCAL(&src)) && 3843 (flags & IP_FF_IP6INFO)) { 3844 /* Add header */ 3845 mp = ip_add_info_v6(mp, inill, &dst); 3846 if (mp == NULL) { 3847 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3848 CONN_DEC_REF(connp); 3849 if (mctl_present) 3850 freeb(first_mp); 3851 return; 3852 } else if (mctl_present) { 3853 first_mp->b_cont = mp; 3854 } else { 3855 first_mp = mp; 3856 } 3857 } 3858 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3859 3860 /* Send it upstream */ 3861 CONN_UDP_RECV(connp, mp); 3862 3863 IP6_STAT(ip6_udp_fannorm); 3864 CONN_DEC_REF(connp); 3865 if (mctl_present) 3866 freeb(first_mp); 3867 return; 3868 } 3869 3870 while (connp != NULL) { 3871 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3872 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3873 (!is_system_labeled() || 3874 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3875 connp))) 3876 break; 3877 connp = connp->conn_next; 3878 } 3879 3880 if (connp == NULL || connp->conn_upq == NULL) 3881 goto notfound; 3882 3883 first_conn = connp; 3884 3885 CONN_INC_REF(connp); 3886 connp = connp->conn_next; 3887 for (;;) { 3888 while (connp != NULL) { 3889 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3890 src) && conn_wantpacket_v6(connp, ill, ip6h, 3891 flags, zoneid) && 3892 (!is_system_labeled() || 3893 tsol_receive_local(mp, &dst, IPV6_VERSION, 3894 shared_addr, connp))) 3895 break; 3896 connp = connp->conn_next; 3897 } 3898 /* 3899 * Just copy the data part alone. The mctl part is 3900 * needed just for verifying policy and it is never 3901 * sent up. 3902 */ 3903 if (connp == NULL || 3904 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3905 ((first_mp1 = ip_copymsg(first_mp)) 3906 == NULL))) { 3907 /* 3908 * No more interested clients or memory 3909 * allocation failed 3910 */ 3911 connp = first_conn; 3912 break; 3913 } 3914 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3915 CONN_INC_REF(connp); 3916 mutex_exit(&connfp->connf_lock); 3917 /* 3918 * For link-local always add ifindex so that transport 3919 * can set sin6_scope_id. Avoid it for ICMP error 3920 * fanout. 3921 */ 3922 if ((connp->conn_ipv6_recvpktinfo || 3923 IN6_IS_ADDR_LINKLOCAL(&src)) && 3924 (flags & IP_FF_IP6INFO)) { 3925 /* Add header */ 3926 mp1 = ip_add_info_v6(mp1, inill, &dst); 3927 } 3928 /* mp1 could have changed */ 3929 if (mctl_present) 3930 first_mp1->b_cont = mp1; 3931 else 3932 first_mp1 = mp1; 3933 if (mp1 == NULL) { 3934 if (mctl_present) 3935 freeb(first_mp1); 3936 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3937 goto next_one; 3938 } 3939 if (CONN_UDP_FLOWCTLD(connp)) { 3940 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3941 freemsg(first_mp1); 3942 goto next_one; 3943 } 3944 3945 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3946 secure) { 3947 first_mp1 = ipsec_check_inbound_policy 3948 (first_mp1, connp, NULL, ip6h, 3949 mctl_present); 3950 } 3951 if (first_mp1 != NULL) { 3952 if (mctl_present) 3953 freeb(first_mp1); 3954 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3955 3956 /* Send it upstream */ 3957 CONN_UDP_RECV(connp, mp1); 3958 } 3959 next_one: 3960 mutex_enter(&connfp->connf_lock); 3961 /* Follow the next pointer before releasing the conn. */ 3962 next_conn = connp->conn_next; 3963 IP6_STAT(ip6_udp_fanmb); 3964 CONN_DEC_REF(connp); 3965 connp = next_conn; 3966 } 3967 3968 /* Last one. Send it upstream. */ 3969 mutex_exit(&connfp->connf_lock); 3970 3971 /* Initiate IPPF processing */ 3972 if (IP6_IN_IPP(flags)) { 3973 uint_t ifindex; 3974 3975 mutex_enter(&ill->ill_lock); 3976 ifindex = ill->ill_phyint->phyint_ifindex; 3977 mutex_exit(&ill->ill_lock); 3978 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3979 if (mp == NULL) { 3980 CONN_DEC_REF(connp); 3981 if (mctl_present) { 3982 freeb(first_mp); 3983 } 3984 return; 3985 } 3986 } 3987 3988 /* 3989 * For link-local always add ifindex so that transport can set 3990 * sin6_scope_id. Avoid it for ICMP error fanout. 3991 */ 3992 if ((connp->conn_ipv6_recvpktinfo || 3993 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IP6INFO)) { 3994 /* Add header */ 3995 mp = ip_add_info_v6(mp, inill, &dst); 3996 if (mp == NULL) { 3997 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3998 CONN_DEC_REF(connp); 3999 if (mctl_present) 4000 freeb(first_mp); 4001 return; 4002 } else if (mctl_present) { 4003 first_mp->b_cont = mp; 4004 } else { 4005 first_mp = mp; 4006 } 4007 } 4008 if (CONN_UDP_FLOWCTLD(connp)) { 4009 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 4010 freemsg(mp); 4011 } else { 4012 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 4013 first_mp = ipsec_check_inbound_policy(first_mp, 4014 connp, NULL, ip6h, mctl_present); 4015 if (first_mp == NULL) { 4016 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 4017 CONN_DEC_REF(connp); 4018 return; 4019 } 4020 } 4021 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 4022 4023 /* Send it upstream */ 4024 CONN_UDP_RECV(connp, mp); 4025 } 4026 IP6_STAT(ip6_udp_fanmb); 4027 CONN_DEC_REF(connp); 4028 if (mctl_present) 4029 freeb(first_mp); 4030 return; 4031 4032 notfound: 4033 mutex_exit(&connfp->connf_lock); 4034 /* 4035 * No one bound to this port. Is 4036 * there a client that wants all 4037 * unclaimed datagrams? 4038 */ 4039 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4040 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4041 0, flags | IP_FF_RAWIP | IP_FF_IP6INFO, mctl_present, 4042 zoneid); 4043 } else { 4044 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4045 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4046 mctl_present, zoneid)) { 4047 BUMP_MIB(&ip_mib, udpNoPorts); 4048 } 4049 } 4050 } 4051 4052 /* 4053 * int ip_find_hdr_v6() 4054 * 4055 * This routine is used by the upper layer protocols and the IP tunnel 4056 * module to: 4057 * - Set extension header pointers to appropriate locations 4058 * - Determine IPv6 header length and return it 4059 * - Return a pointer to the last nexthdr value 4060 * 4061 * The caller must initialize ipp_fields. 4062 * 4063 * NOTE: If multiple extension headers of the same type are present, 4064 * ip_find_hdr_v6() will set the respective extension header pointers 4065 * to the first one that it encounters in the IPv6 header. It also 4066 * skips fragment headers. This routine deals with malformed packets 4067 * of various sorts in which case the returned length is up to the 4068 * malformed part. 4069 */ 4070 int 4071 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4072 { 4073 uint_t length, ehdrlen; 4074 uint8_t nexthdr; 4075 uint8_t *whereptr, *endptr; 4076 ip6_dest_t *tmpdstopts; 4077 ip6_rthdr_t *tmprthdr; 4078 ip6_hbh_t *tmphopopts; 4079 ip6_frag_t *tmpfraghdr; 4080 4081 length = IPV6_HDR_LEN; 4082 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4083 endptr = mp->b_wptr; 4084 4085 nexthdr = ip6h->ip6_nxt; 4086 while (whereptr < endptr) { 4087 /* Is there enough left for len + nexthdr? */ 4088 if (whereptr + MIN_EHDR_LEN > endptr) 4089 goto done; 4090 4091 switch (nexthdr) { 4092 case IPPROTO_HOPOPTS: 4093 tmphopopts = (ip6_hbh_t *)whereptr; 4094 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4095 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4096 goto done; 4097 nexthdr = tmphopopts->ip6h_nxt; 4098 /* return only 1st hbh */ 4099 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4100 ipp->ipp_fields |= IPPF_HOPOPTS; 4101 ipp->ipp_hopopts = tmphopopts; 4102 ipp->ipp_hopoptslen = ehdrlen; 4103 } 4104 break; 4105 case IPPROTO_DSTOPTS: 4106 tmpdstopts = (ip6_dest_t *)whereptr; 4107 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4108 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4109 goto done; 4110 nexthdr = tmpdstopts->ip6d_nxt; 4111 /* 4112 * ipp_dstopts is set to the destination header after a 4113 * routing header. 4114 * Assume it is a post-rthdr destination header 4115 * and adjust when we find an rthdr. 4116 */ 4117 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4118 ipp->ipp_fields |= IPPF_DSTOPTS; 4119 ipp->ipp_dstopts = tmpdstopts; 4120 ipp->ipp_dstoptslen = ehdrlen; 4121 } 4122 break; 4123 case IPPROTO_ROUTING: 4124 tmprthdr = (ip6_rthdr_t *)whereptr; 4125 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4126 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4127 goto done; 4128 nexthdr = tmprthdr->ip6r_nxt; 4129 /* return only 1st rthdr */ 4130 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4131 ipp->ipp_fields |= IPPF_RTHDR; 4132 ipp->ipp_rthdr = tmprthdr; 4133 ipp->ipp_rthdrlen = ehdrlen; 4134 } 4135 /* 4136 * Make any destination header we've seen be a 4137 * pre-rthdr destination header. 4138 */ 4139 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4140 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4141 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4142 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4143 ipp->ipp_dstopts = NULL; 4144 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4145 ipp->ipp_dstoptslen = 0; 4146 } 4147 break; 4148 case IPPROTO_FRAGMENT: 4149 /* 4150 * Fragment headers are skipped. Currently, only 4151 * IP cares for their existence. If anyone other 4152 * than IP ever has the need to know about the 4153 * location of fragment headers, support can be 4154 * added to the ip6_pkt_t at that time. 4155 */ 4156 tmpfraghdr = (ip6_frag_t *)whereptr; 4157 ehdrlen = sizeof (ip6_frag_t); 4158 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4159 goto done; 4160 nexthdr = tmpfraghdr->ip6f_nxt; 4161 break; 4162 case IPPROTO_NONE: 4163 default: 4164 goto done; 4165 } 4166 length += ehdrlen; 4167 whereptr += ehdrlen; 4168 } 4169 done: 4170 if (nexthdrp != NULL) 4171 *nexthdrp = nexthdr; 4172 return (length); 4173 } 4174 4175 int 4176 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4177 { 4178 ire_t *ire; 4179 4180 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4181 ire = ire_lookup_local_v6(zoneid); 4182 if (ire == NULL) { 4183 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4184 return (1); 4185 } 4186 ip6h->ip6_src = ire->ire_addr_v6; 4187 ire_refrele(ire); 4188 } 4189 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4190 ip6h->ip6_hops = ipv6_def_hops; 4191 return (0); 4192 } 4193 4194 /* 4195 * Try to determine where and what are the IPv6 header length and 4196 * pointer to nexthdr value for the upper layer protocol (or an 4197 * unknown next hdr). 4198 * 4199 * Parameters returns a pointer to the nexthdr value; 4200 * Must handle malformed packets of various sorts. 4201 * Function returns failure for malformed cases. 4202 */ 4203 boolean_t 4204 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4205 uint8_t **nexthdrpp) 4206 { 4207 uint16_t length; 4208 uint_t ehdrlen; 4209 uint8_t *nexthdrp; 4210 uint8_t *whereptr; 4211 uint8_t *endptr; 4212 ip6_dest_t *desthdr; 4213 ip6_rthdr_t *rthdr; 4214 ip6_frag_t *fraghdr; 4215 4216 length = IPV6_HDR_LEN; 4217 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4218 endptr = mp->b_wptr; 4219 4220 nexthdrp = &ip6h->ip6_nxt; 4221 while (whereptr < endptr) { 4222 /* Is there enough left for len + nexthdr? */ 4223 if (whereptr + MIN_EHDR_LEN > endptr) 4224 break; 4225 4226 switch (*nexthdrp) { 4227 case IPPROTO_HOPOPTS: 4228 case IPPROTO_DSTOPTS: 4229 /* Assumes the headers are identical for hbh and dst */ 4230 desthdr = (ip6_dest_t *)whereptr; 4231 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4232 if ((uchar_t *)desthdr + ehdrlen > endptr) 4233 return (B_FALSE); 4234 nexthdrp = &desthdr->ip6d_nxt; 4235 break; 4236 case IPPROTO_ROUTING: 4237 rthdr = (ip6_rthdr_t *)whereptr; 4238 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4239 if ((uchar_t *)rthdr + ehdrlen > endptr) 4240 return (B_FALSE); 4241 nexthdrp = &rthdr->ip6r_nxt; 4242 break; 4243 case IPPROTO_FRAGMENT: 4244 fraghdr = (ip6_frag_t *)whereptr; 4245 ehdrlen = sizeof (ip6_frag_t); 4246 if ((uchar_t *)&fraghdr[1] > endptr) 4247 return (B_FALSE); 4248 nexthdrp = &fraghdr->ip6f_nxt; 4249 break; 4250 case IPPROTO_NONE: 4251 /* No next header means we're finished */ 4252 default: 4253 *hdr_length_ptr = length; 4254 *nexthdrpp = nexthdrp; 4255 return (B_TRUE); 4256 } 4257 length += ehdrlen; 4258 whereptr += ehdrlen; 4259 *hdr_length_ptr = length; 4260 *nexthdrpp = nexthdrp; 4261 } 4262 switch (*nexthdrp) { 4263 case IPPROTO_HOPOPTS: 4264 case IPPROTO_DSTOPTS: 4265 case IPPROTO_ROUTING: 4266 case IPPROTO_FRAGMENT: 4267 /* 4268 * If any know extension headers are still to be processed, 4269 * the packet's malformed (or at least all the IP header(s) are 4270 * not in the same mblk - and that should never happen. 4271 */ 4272 return (B_FALSE); 4273 4274 default: 4275 /* 4276 * If we get here, we know that all of the IP headers were in 4277 * the same mblk, even if the ULP header is in the next mblk. 4278 */ 4279 *hdr_length_ptr = length; 4280 *nexthdrpp = nexthdrp; 4281 return (B_TRUE); 4282 } 4283 } 4284 4285 /* 4286 * Return the length of the IPv6 related headers (including extension headers) 4287 * Returns a length even if the packet is malformed. 4288 */ 4289 int 4290 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4291 { 4292 uint16_t hdr_len; 4293 uint8_t *nexthdrp; 4294 4295 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4296 return (hdr_len); 4297 } 4298 4299 /* 4300 * Select an ill for the packet by considering load spreading across 4301 * a different ill in the group if dst_ill is part of some group. 4302 */ 4303 static ill_t * 4304 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4305 { 4306 ill_t *ill; 4307 4308 /* 4309 * We schedule irrespective of whether the source address is 4310 * INADDR_UNSPECIED or not. 4311 */ 4312 ill = illgrp_scheduler(dst_ill); 4313 if (ill == NULL) 4314 return (NULL); 4315 4316 /* 4317 * For groups with names ip_sioctl_groupname ensures that all 4318 * ills are of same type. For groups without names, ifgrp_insert 4319 * ensures this. 4320 */ 4321 ASSERT(dst_ill->ill_type == ill->ill_type); 4322 4323 return (ill); 4324 } 4325 4326 /* 4327 * IPv6 - 4328 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4329 * to send out a packet to a destination address for which we do not have 4330 * specific routing information. 4331 * 4332 * Handle non-multicast packets. If ill is non-NULL the match is done 4333 * for that ill. 4334 * 4335 * When a specific ill is specified (using IPV6_PKTINFO, 4336 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4337 * on routing entries (ftable and ctable) that have a matching 4338 * ire->ire_ipif->ipif_ill. Thus this can only be used 4339 * for destinations that are on-link for the specific ill 4340 * and that can appear on multiple links. Thus it is useful 4341 * for multicast destinations, link-local destinations, and 4342 * at some point perhaps for site-local destinations (if the 4343 * node sits at a site boundary). 4344 * We create the cache entries in the regular ctable since 4345 * it can not "confuse" things for other destinations. 4346 * table. 4347 * 4348 * When ill is part of a ill group, we subject the packets 4349 * to load spreading even if the ill is specified by the 4350 * means described above. We disable only for IPV6_BOUND_PIF 4351 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4352 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4353 * set. 4354 * 4355 * NOTE : These are the scopes of some of the variables that point at IRE, 4356 * which needs to be followed while making any future modifications 4357 * to avoid memory leaks. 4358 * 4359 * - ire and sire are the entries looked up initially by 4360 * ire_ftable_lookup_v6. 4361 * - ipif_ire is used to hold the interface ire associated with 4362 * the new cache ire. But it's scope is limited, so we always REFRELE 4363 * it before branching out to error paths. 4364 * - save_ire is initialized before ire_create, so that ire returned 4365 * by ire_create will not over-write the ire. We REFRELE save_ire 4366 * before breaking out of the switch. 4367 * 4368 * Thus on failures, we have to REFRELE only ire and sire, if they 4369 * are not NULL. 4370 * 4371 * v6srcp may be used in the future. Currently unused. 4372 */ 4373 /* ARGSUSED */ 4374 void 4375 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4376 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4377 { 4378 in6_addr_t v6gw; 4379 in6_addr_t dst; 4380 ire_t *ire = NULL; 4381 ipif_t *src_ipif = NULL; 4382 ill_t *dst_ill = NULL; 4383 ire_t *sire = NULL; 4384 ire_t *save_ire; 4385 mblk_t *dlureq_mp; 4386 ip6_t *ip6h; 4387 int err = 0; 4388 mblk_t *first_mp; 4389 ipsec_out_t *io; 4390 ill_t *attach_ill = NULL; 4391 ushort_t ire_marks = 0; 4392 int match_flags; 4393 boolean_t ip6i_present; 4394 ire_t *first_sire = NULL; 4395 mblk_t *copy_mp = NULL; 4396 mblk_t *xmit_mp = NULL; 4397 in6_addr_t save_dst; 4398 uint32_t multirt_flags = 4399 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4400 boolean_t multirt_is_resolvable; 4401 boolean_t multirt_resolve_next; 4402 boolean_t need_rele = B_FALSE; 4403 boolean_t do_attach_ill = B_FALSE; 4404 boolean_t ip6_asp_table_held = B_FALSE; 4405 tsol_ire_gw_secattr_t *attrp = NULL; 4406 tsol_gcgrp_t *gcgrp = NULL; 4407 tsol_gcgrp_addr_t ga; 4408 4409 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4410 4411 first_mp = mp; 4412 if (mp->b_datap->db_type == M_CTL) { 4413 mp = mp->b_cont; 4414 io = (ipsec_out_t *)first_mp->b_rptr; 4415 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4416 } else { 4417 io = NULL; 4418 } 4419 4420 /* 4421 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4422 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4423 * could be NULL. 4424 * 4425 * This information can appear either in an ip6i_t or an IPSEC_OUT 4426 * message. 4427 */ 4428 ip6h = (ip6_t *)mp->b_rptr; 4429 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4430 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4431 if (!ip6i_present || 4432 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4433 attach_ill = ip_grab_attach_ill(ill, first_mp, 4434 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4435 io->ipsec_out_ill_index), B_TRUE); 4436 /* Failure case frees things for us. */ 4437 if (attach_ill == NULL) 4438 return; 4439 4440 /* 4441 * Check if we need an ire that will not be 4442 * looked up by anybody else i.e. HIDDEN. 4443 */ 4444 if (ill_is_probeonly(attach_ill)) 4445 ire_marks = IRE_MARK_HIDDEN; 4446 } 4447 } 4448 4449 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4450 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4451 goto icmp_err_ret; 4452 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4453 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4454 goto icmp_err_ret; 4455 } 4456 4457 /* 4458 * If this IRE is created for forwarding or it is not for 4459 * TCP traffic, mark it as temporary. 4460 * 4461 * Is it sufficient just to check the next header?? 4462 */ 4463 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4464 ire_marks |= IRE_MARK_TEMPORARY; 4465 4466 /* 4467 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4468 * chain until it gets the most specific information available. 4469 * For example, we know that there is no IRE_CACHE for this dest, 4470 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4471 * ire_ftable_lookup_v6 will look up the gateway, etc. 4472 */ 4473 4474 if (ill == NULL) { 4475 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4476 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4477 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4478 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4479 match_flags); 4480 /* 4481 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4482 * in a NULL ill, but the packet could be a neighbor 4483 * solicitation/advertisment and could have a valid attach_ill. 4484 */ 4485 if (attach_ill != NULL) 4486 ill_refrele(attach_ill); 4487 } else { 4488 if (attach_ill != NULL) { 4489 /* 4490 * attach_ill is set only for communicating with 4491 * on-link hosts. So, don't look for DEFAULT. 4492 * ip_wput_v6 passes the right ill in this case and 4493 * hence we can assert. 4494 */ 4495 ASSERT(ill == attach_ill); 4496 ill_refrele(attach_ill); 4497 do_attach_ill = B_TRUE; 4498 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4499 } else { 4500 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4501 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4502 } 4503 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4504 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4505 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags); 4506 } 4507 4508 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4509 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4510 4511 if (zoneid == ALL_ZONES && ire != NULL) { 4512 /* 4513 * In the forwarding case, we can use a route from any zone 4514 * since we won't change the source address. We can easily 4515 * assert that the source address is already set when there's no 4516 * ip6_info header - otherwise we'd have to call pullupmsg(). 4517 */ 4518 ASSERT(ip6i_present || 4519 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4520 zoneid = ire->ire_zoneid; 4521 } 4522 4523 /* 4524 * We enter a loop that will be run only once in most cases. 4525 * The loop is re-entered in the case where the destination 4526 * can be reached through multiple RTF_MULTIRT-flagged routes. 4527 * The intention is to compute multiple routes to a single 4528 * destination in a single ip_newroute_v6 call. 4529 * The information is contained in sire->ire_flags. 4530 */ 4531 do { 4532 multirt_resolve_next = B_FALSE; 4533 4534 if (dst_ill != NULL) { 4535 ill_refrele(dst_ill); 4536 dst_ill = NULL; 4537 } 4538 if (src_ipif != NULL) { 4539 ipif_refrele(src_ipif); 4540 src_ipif = NULL; 4541 } 4542 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4543 ip3dbg(("ip_newroute_v6: starting new resolution " 4544 "with first_mp %p, tag %d\n", 4545 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4546 4547 /* 4548 * We check if there are trailing unresolved routes for 4549 * the destination contained in sire. 4550 */ 4551 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4552 &sire, multirt_flags, MBLK_GETLABEL(mp)); 4553 4554 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4555 "ire %p, sire %p\n", 4556 multirt_is_resolvable, (void *)ire, (void *)sire)); 4557 4558 if (!multirt_is_resolvable) { 4559 /* 4560 * No more multirt routes to resolve; give up 4561 * (all routes resolved or no more resolvable 4562 * routes). 4563 */ 4564 if (ire != NULL) { 4565 ire_refrele(ire); 4566 ire = NULL; 4567 } 4568 } else { 4569 ASSERT(sire != NULL); 4570 ASSERT(ire != NULL); 4571 /* 4572 * We simply use first_sire as a flag that 4573 * indicates if a resolvable multirt route has 4574 * already been found during the preceding 4575 * loops. If it is not the case, we may have 4576 * to send an ICMP error to report that the 4577 * destination is unreachable. We do not 4578 * IRE_REFHOLD first_sire. 4579 */ 4580 if (first_sire == NULL) { 4581 first_sire = sire; 4582 } 4583 } 4584 } 4585 if ((ire == NULL) || (ire == sire)) { 4586 /* 4587 * either ire == NULL (the destination cannot be 4588 * resolved) or ire == sire (the gateway cannot be 4589 * resolved). At this point, there are no more routes 4590 * to resolve for the destination, thus we exit. 4591 */ 4592 if (ip_debug > 3) { 4593 /* ip2dbg */ 4594 pr_addr_dbg("ip_newroute_v6: " 4595 "can't resolve %s\n", AF_INET6, v6dstp); 4596 } 4597 ip3dbg(("ip_newroute_v6: " 4598 "ire %p, sire %p, first_sire %p\n", 4599 (void *)ire, (void *)sire, (void *)first_sire)); 4600 4601 if (sire != NULL) { 4602 ire_refrele(sire); 4603 sire = NULL; 4604 } 4605 4606 if (first_sire != NULL) { 4607 /* 4608 * At least one multirt route has been found 4609 * in the same ip_newroute() call; there is no 4610 * need to report an ICMP error. 4611 * first_sire was not IRE_REFHOLDed. 4612 */ 4613 MULTIRT_DEBUG_UNTAG(first_mp); 4614 freemsg(first_mp); 4615 return; 4616 } 4617 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4618 RTA_DST); 4619 goto icmp_err_ret; 4620 } 4621 4622 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4623 4624 /* 4625 * Verify that the returned IRE does not have either the 4626 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4627 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4628 */ 4629 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4630 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4631 goto icmp_err_ret; 4632 4633 /* 4634 * Increment the ire_ob_pkt_count field for ire if it is an 4635 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4636 * increment the same for the parent IRE, sire, if it is some 4637 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4638 * and HOST_REDIRECT). 4639 */ 4640 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4641 UPDATE_OB_PKT_COUNT(ire); 4642 ire->ire_last_used_time = lbolt; 4643 } 4644 4645 if (sire != NULL) { 4646 mutex_enter(&sire->ire_lock); 4647 v6gw = sire->ire_gateway_addr_v6; 4648 mutex_exit(&sire->ire_lock); 4649 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4650 IRE_INTERFACE)) == 0); 4651 UPDATE_OB_PKT_COUNT(sire); 4652 sire->ire_last_used_time = lbolt; 4653 } else { 4654 v6gw = ipv6_all_zeros; 4655 } 4656 4657 /* 4658 * We have a route to reach the destination. 4659 * 4660 * 1) If the interface is part of ill group, try to get a new 4661 * ill taking load spreading into account. 4662 * 4663 * 2) After selecting the ill, get a source address that might 4664 * create good inbound load spreading and that matches the 4665 * right scope. ipif_select_source_v6 does this for us. 4666 * 4667 * If the application specified the ill (ifindex), we still 4668 * load spread. Only if the packets needs to go out specifically 4669 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4670 * IPV6_BOUND_PIF we don't try to use a different ill for load 4671 * spreading. 4672 */ 4673 if (!do_attach_ill) { 4674 /* 4675 * If the interface belongs to an interface group, 4676 * make sure the next possible interface in the group 4677 * is used. This encourages load spreading among 4678 * peers in an interface group. However, in the case 4679 * of multirouting, load spreading is not used, as we 4680 * actually want to replicate outgoing packets through 4681 * particular interfaces. 4682 * 4683 * Note: While we pick a dst_ill we are really only 4684 * interested in the ill for load spreading. 4685 * The source ipif is determined by source address 4686 * selection below. 4687 */ 4688 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4689 dst_ill = ire->ire_ipif->ipif_ill; 4690 /* For uniformity do a refhold */ 4691 ill_refhold(dst_ill); 4692 } else { 4693 /* 4694 * If we are here trying to create an IRE_CACHE 4695 * for an offlink destination and have the 4696 * IRE_CACHE for the next hop and the latter is 4697 * using virtual IP source address selection i.e 4698 * it's ire->ire_ipif is pointing to a virtual 4699 * network interface (vni) then 4700 * ip_newroute_get_dst_ll() will return the vni 4701 * interface as the dst_ill. Since the vni is 4702 * virtual i.e not associated with any physical 4703 * interface, it cannot be the dst_ill, hence 4704 * in such a case call ip_newroute_get_dst_ll() 4705 * with the stq_ill instead of the ire_ipif ILL. 4706 * The function returns a refheld ill. 4707 */ 4708 if ((ire->ire_type == IRE_CACHE) && 4709 IS_VNI(ire->ire_ipif->ipif_ill)) 4710 dst_ill = ip_newroute_get_dst_ill_v6( 4711 ire->ire_stq->q_ptr); 4712 else 4713 dst_ill = ip_newroute_get_dst_ill_v6( 4714 ire->ire_ipif->ipif_ill); 4715 } 4716 if (dst_ill == NULL) { 4717 if (ip_debug > 2) { 4718 pr_addr_dbg("ip_newroute_v6 : no dst " 4719 "ill for dst %s\n", 4720 AF_INET6, v6dstp); 4721 } 4722 goto icmp_err_ret; 4723 } else if (dst_ill->ill_group == NULL && ill != NULL && 4724 dst_ill != ill) { 4725 /* 4726 * If "ill" is not part of any group, we should 4727 * have found a route matching "ill" as we 4728 * called ire_ftable_lookup_v6 with 4729 * MATCH_IRE_ILL_GROUP. 4730 * Rather than asserting when there is a 4731 * mismatch, we just drop the packet. 4732 */ 4733 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4734 "dst_ill %s ill %s\n", 4735 dst_ill->ill_name, 4736 ill->ill_name)); 4737 goto icmp_err_ret; 4738 } 4739 } else { 4740 dst_ill = ire->ire_ipif->ipif_ill; 4741 /* For uniformity do refhold */ 4742 ill_refhold(dst_ill); 4743 /* 4744 * We should have found a route matching ill as we 4745 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4746 * Rather than asserting, while there is a mismatch, 4747 * we just drop the packet. 4748 */ 4749 if (dst_ill != ill) { 4750 ip0dbg(("ip_newroute_v6: Packet dropped as " 4751 "IP6I_ATTACH_IF ill is %s, " 4752 "ire->ire_ipif->ipif_ill is %s\n", 4753 ill->ill_name, 4754 dst_ill->ill_name)); 4755 goto icmp_err_ret; 4756 } 4757 } 4758 /* 4759 * Pick a source address which matches the scope of the 4760 * destination address. 4761 * For RTF_SETSRC routes, the source address is imposed by the 4762 * parent ire (sire). 4763 */ 4764 ASSERT(src_ipif == NULL); 4765 if (ire->ire_type == IRE_IF_RESOLVER && 4766 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4767 ip6_asp_can_lookup()) { 4768 /* 4769 * The ire cache entry we're adding is for the 4770 * gateway itself. The source address in this case 4771 * is relative to the gateway's address. 4772 */ 4773 ip6_asp_table_held = B_TRUE; 4774 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4775 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4776 if (src_ipif != NULL) 4777 ire_marks |= IRE_MARK_USESRC_CHECK; 4778 } else { 4779 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4780 /* 4781 * Check that the ipif matching the requested 4782 * source address still exists. 4783 */ 4784 src_ipif = ipif_lookup_addr_v6( 4785 &sire->ire_src_addr_v6, NULL, zoneid, 4786 NULL, NULL, NULL, NULL); 4787 } 4788 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4789 uint_t restrict_ill = RESTRICT_TO_NONE; 4790 4791 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4792 & IP6I_ATTACH_IF) 4793 restrict_ill = RESTRICT_TO_ILL; 4794 ip6_asp_table_held = B_TRUE; 4795 src_ipif = ipif_select_source_v6(dst_ill, 4796 v6dstp, restrict_ill, 4797 IPV6_PREFER_SRC_DEFAULT, zoneid); 4798 if (src_ipif != NULL) 4799 ire_marks |= IRE_MARK_USESRC_CHECK; 4800 } 4801 } 4802 4803 if (src_ipif == NULL) { 4804 if (ip_debug > 2) { 4805 /* ip1dbg */ 4806 pr_addr_dbg("ip_newroute_v6: no src for " 4807 "dst %s\n, ", AF_INET6, v6dstp); 4808 printf("ip_newroute_v6: interface name %s\n", 4809 dst_ill->ill_name); 4810 } 4811 goto icmp_err_ret; 4812 } 4813 4814 if (ip_debug > 3) { 4815 /* ip2dbg */ 4816 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4817 AF_INET6, &v6gw); 4818 } 4819 ip2dbg(("\tire type %s (%d)\n", 4820 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4821 4822 /* 4823 * At this point in ip_newroute_v6(), ire is either the 4824 * IRE_CACHE of the next-hop gateway for an off-subnet 4825 * destination or an IRE_INTERFACE type that should be used 4826 * to resolve an on-subnet destination or an on-subnet 4827 * next-hop gateway. 4828 * 4829 * In the IRE_CACHE case, we have the following : 4830 * 4831 * 1) src_ipif - used for getting a source address. 4832 * 4833 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4834 * means packets using this IRE_CACHE will go out on dst_ill. 4835 * 4836 * 3) The IRE sire will point to the prefix that is the longest 4837 * matching route for the destination. These prefix types 4838 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST, and 4839 * IRE_HOST_REDIRECT. 4840 * 4841 * The newly created IRE_CACHE entry for the off-subnet 4842 * destination is tied to both the prefix route and the 4843 * interface route used to resolve the next-hop gateway 4844 * via the ire_phandle and ire_ihandle fields, respectively. 4845 * 4846 * In the IRE_INTERFACE case, we have the following : 4847 * 4848 * 1) src_ipif - used for getting a source address. 4849 * 4850 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4851 * means packets using the IRE_CACHE that we will build 4852 * here will go out on dst_ill. 4853 * 4854 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4855 * to be created will only be tied to the IRE_INTERFACE that 4856 * was derived from the ire_ihandle field. 4857 * 4858 * If sire is non-NULL, it means the destination is off-link 4859 * and we will first create the IRE_CACHE for the gateway. 4860 * Next time through ip_newroute_v6, we will create the 4861 * IRE_CACHE for the final destination as described above. 4862 */ 4863 save_ire = ire; 4864 switch (ire->ire_type) { 4865 case IRE_CACHE: { 4866 ire_t *ipif_ire; 4867 4868 ASSERT(sire != NULL); 4869 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4870 mutex_enter(&ire->ire_lock); 4871 v6gw = ire->ire_gateway_addr_v6; 4872 mutex_exit(&ire->ire_lock); 4873 } 4874 /* 4875 * We need 3 ire's to create a new cache ire for an 4876 * off-link destination from the cache ire of the 4877 * gateway. 4878 * 4879 * 1. The prefix ire 'sire' 4880 * 2. The cache ire of the gateway 'ire' 4881 * 3. The interface ire 'ipif_ire' 4882 * 4883 * We have (1) and (2). We lookup (3) below. 4884 * 4885 * If there is no interface route to the gateway, 4886 * it is a race condition, where we found the cache 4887 * but the inteface route has been deleted. 4888 */ 4889 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4890 if (ipif_ire == NULL) { 4891 ip1dbg(("ip_newroute_v6:" 4892 "ire_ihandle_lookup_offlink_v6 failed\n")); 4893 goto icmp_err_ret; 4894 } 4895 /* 4896 * Assume DL_UNITDATA_REQ is same for all physical 4897 * interfaces in the ifgrp. If it isn't, this code will 4898 * have to be seriously rewhacked to allow the 4899 * fastpath probing (such that I cache the link 4900 * header in the IRE_CACHE) to work over ifgrps. 4901 * We have what we need to build an IRE_CACHE. 4902 */ 4903 /* 4904 * Note: the new ire inherits RTF_SETSRC 4905 * and RTF_MULTIRT to propagate these flags from prefix 4906 * to cache. 4907 */ 4908 4909 /* 4910 * Check cached gateway IRE for any security 4911 * attributes; if found, associate the gateway 4912 * credentials group to the destination IRE. 4913 */ 4914 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4915 mutex_enter(&attrp->igsa_lock); 4916 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4917 GCGRP_REFHOLD(gcgrp); 4918 mutex_exit(&attrp->igsa_lock); 4919 } 4920 4921 ire = ire_create_v6( 4922 v6dstp, /* dest address */ 4923 &ipv6_all_ones, /* mask */ 4924 &src_ipif->ipif_v6src_addr, /* source address */ 4925 &v6gw, /* gateway address */ 4926 &save_ire->ire_max_frag, 4927 NULL, /* Fast Path header */ 4928 dst_ill->ill_rq, /* recv-from queue */ 4929 dst_ill->ill_wq, /* send-to queue */ 4930 IRE_CACHE, 4931 NULL, 4932 src_ipif, 4933 &sire->ire_mask_v6, /* Parent mask */ 4934 sire->ire_phandle, /* Parent handle */ 4935 ipif_ire->ire_ihandle, /* Interface handle */ 4936 sire->ire_flags & /* flags if any */ 4937 (RTF_SETSRC | RTF_MULTIRT), 4938 &(sire->ire_uinfo), 4939 NULL, 4940 gcgrp); 4941 4942 if (ire == NULL) { 4943 if (gcgrp != NULL) { 4944 GCGRP_REFRELE(gcgrp); 4945 gcgrp = NULL; 4946 } 4947 ire_refrele(save_ire); 4948 ire_refrele(ipif_ire); 4949 break; 4950 } 4951 4952 /* reference now held by IRE */ 4953 gcgrp = NULL; 4954 4955 ire->ire_marks |= ire_marks; 4956 4957 /* 4958 * Prevent sire and ipif_ire from getting deleted. The 4959 * newly created ire is tied to both of them via the 4960 * phandle and ihandle respectively. 4961 */ 4962 IRB_REFHOLD(sire->ire_bucket); 4963 /* Has it been removed already ? */ 4964 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4965 IRB_REFRELE(sire->ire_bucket); 4966 ire_refrele(ipif_ire); 4967 ire_refrele(save_ire); 4968 break; 4969 } 4970 4971 IRB_REFHOLD(ipif_ire->ire_bucket); 4972 /* Has it been removed already ? */ 4973 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4974 IRB_REFRELE(ipif_ire->ire_bucket); 4975 IRB_REFRELE(sire->ire_bucket); 4976 ire_refrele(ipif_ire); 4977 ire_refrele(save_ire); 4978 break; 4979 } 4980 4981 xmit_mp = first_mp; 4982 if (ire->ire_flags & RTF_MULTIRT) { 4983 copy_mp = copymsg(first_mp); 4984 if (copy_mp != NULL) { 4985 xmit_mp = copy_mp; 4986 MULTIRT_DEBUG_TAG(first_mp); 4987 } 4988 } 4989 ire_add_then_send(q, ire, xmit_mp); 4990 if (ip6_asp_table_held) { 4991 ip6_asp_table_refrele(); 4992 ip6_asp_table_held = B_FALSE; 4993 } 4994 ire_refrele(save_ire); 4995 4996 /* Assert that sire is not deleted yet. */ 4997 ASSERT(sire->ire_ptpn != NULL); 4998 IRB_REFRELE(sire->ire_bucket); 4999 5000 /* Assert that ipif_ire is not deleted yet. */ 5001 ASSERT(ipif_ire->ire_ptpn != NULL); 5002 IRB_REFRELE(ipif_ire->ire_bucket); 5003 ire_refrele(ipif_ire); 5004 5005 if (copy_mp != NULL) { 5006 /* 5007 * Search for the next unresolved 5008 * multirt route. 5009 */ 5010 copy_mp = NULL; 5011 ipif_ire = NULL; 5012 ire = NULL; 5013 /* re-enter the loop */ 5014 multirt_resolve_next = B_TRUE; 5015 continue; 5016 } 5017 ire_refrele(sire); 5018 ill_refrele(dst_ill); 5019 ipif_refrele(src_ipif); 5020 return; 5021 } 5022 case IRE_IF_NORESOLVER: 5023 /* 5024 * We have what we need to build an IRE_CACHE. 5025 * 5026 * Create a new dlureq_mp with the IPv6 gateway 5027 * address in destination address in the DLPI hdr 5028 * if the physical length is exactly 16 bytes. 5029 */ 5030 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5031 const in6_addr_t *addr; 5032 5033 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5034 addr = &v6gw; 5035 else 5036 addr = v6dstp; 5037 5038 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 5039 dst_ill->ill_phys_addr_length, 5040 dst_ill->ill_sap, 5041 dst_ill->ill_sap_length); 5042 } else { 5043 dlureq_mp = ill_dlur_gen(NULL, 5044 dst_ill->ill_phys_addr_length, 5045 dst_ill->ill_sap, 5046 dst_ill->ill_sap_length); 5047 } 5048 if (dlureq_mp == NULL) 5049 break; 5050 /* 5051 * TSol note: We are creating the ire cache for the 5052 * destination 'dst'. If 'dst' is offlink, going 5053 * through the first hop 'gw', the security attributes 5054 * of 'dst' must be set to point to the gateway 5055 * credentials of gateway 'gw'. If 'dst' is onlink, it 5056 * is possible that 'dst' is a potential gateway that is 5057 * referenced by some route that has some security 5058 * attributes. Thus in the former case, we need to do a 5059 * gcgrp_lookup of 'gw' while in the latter case we 5060 * need to do gcgrp_lookup of 'dst' itself. 5061 */ 5062 ga.ga_af = AF_INET6; 5063 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5064 ga.ga_addr = v6gw; 5065 else 5066 ga.ga_addr = *v6dstp; 5067 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5068 5069 /* 5070 * Note: the new ire inherits sire flags RTF_SETSRC 5071 * and RTF_MULTIRT to propagate those rules from prefix 5072 * to cache. 5073 */ 5074 ire = ire_create_v6( 5075 v6dstp, /* dest address */ 5076 &ipv6_all_ones, /* mask */ 5077 &src_ipif->ipif_v6src_addr, /* source address */ 5078 &v6gw, /* gateway address */ 5079 &save_ire->ire_max_frag, 5080 NULL, /* Fast Path header */ 5081 dst_ill->ill_rq, /* recv-from queue */ 5082 dst_ill->ill_wq, /* send-to queue */ 5083 IRE_CACHE, 5084 dlureq_mp, 5085 src_ipif, 5086 &save_ire->ire_mask_v6, /* Parent mask */ 5087 (sire != NULL) ? /* Parent handle */ 5088 sire->ire_phandle : 0, 5089 save_ire->ire_ihandle, /* Interface handle */ 5090 (sire != NULL) ? /* flags if any */ 5091 sire->ire_flags & 5092 (RTF_SETSRC | RTF_MULTIRT) : 0, 5093 &(save_ire->ire_uinfo), 5094 NULL, 5095 gcgrp); 5096 5097 freeb(dlureq_mp); 5098 5099 if (ire == NULL) { 5100 if (gcgrp != NULL) { 5101 GCGRP_REFRELE(gcgrp); 5102 gcgrp = NULL; 5103 } 5104 ire_refrele(save_ire); 5105 break; 5106 } 5107 5108 /* reference now held by IRE */ 5109 gcgrp = NULL; 5110 5111 ire->ire_marks |= ire_marks; 5112 5113 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5114 dst = v6gw; 5115 else 5116 dst = *v6dstp; 5117 err = ndp_noresolver(dst_ill, &dst); 5118 if (err != 0) { 5119 ire_refrele(save_ire); 5120 break; 5121 } 5122 5123 /* Prevent save_ire from getting deleted */ 5124 IRB_REFHOLD(save_ire->ire_bucket); 5125 /* Has it been removed already ? */ 5126 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5127 IRB_REFRELE(save_ire->ire_bucket); 5128 ire_refrele(save_ire); 5129 break; 5130 } 5131 5132 xmit_mp = first_mp; 5133 /* 5134 * In case of MULTIRT, a copy of the current packet 5135 * to send is made to further re-enter the 5136 * loop and attempt another route resolution 5137 */ 5138 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5139 copy_mp = copymsg(first_mp); 5140 if (copy_mp != NULL) { 5141 xmit_mp = copy_mp; 5142 MULTIRT_DEBUG_TAG(first_mp); 5143 } 5144 } 5145 ire_add_then_send(q, ire, xmit_mp); 5146 if (ip6_asp_table_held) { 5147 ip6_asp_table_refrele(); 5148 ip6_asp_table_held = B_FALSE; 5149 } 5150 5151 /* Assert that it is not deleted yet. */ 5152 ASSERT(save_ire->ire_ptpn != NULL); 5153 IRB_REFRELE(save_ire->ire_bucket); 5154 ire_refrele(save_ire); 5155 5156 if (copy_mp != NULL) { 5157 /* 5158 * If we found a (no)resolver, we ignore any 5159 * trailing top priority IRE_CACHE in 5160 * further loops. This ensures that we do not 5161 * omit any (no)resolver despite the priority 5162 * in this call. 5163 * IRE_CACHE, if any, will be processed 5164 * by another thread entering ip_newroute(), 5165 * (on resolver response, for example). 5166 * We use this to force multiple parallel 5167 * resolution as soon as a packet needs to be 5168 * sent. The result is, after one packet 5169 * emission all reachable routes are generally 5170 * resolved. 5171 * Otherwise, complete resolution of MULTIRT 5172 * routes would require several emissions as 5173 * side effect. 5174 */ 5175 multirt_flags &= ~MULTIRT_CACHEGW; 5176 5177 /* 5178 * Search for the next unresolved multirt 5179 * route. 5180 */ 5181 copy_mp = NULL; 5182 save_ire = NULL; 5183 ire = NULL; 5184 /* re-enter the loop */ 5185 multirt_resolve_next = B_TRUE; 5186 continue; 5187 } 5188 5189 /* Don't need sire anymore */ 5190 if (sire != NULL) 5191 ire_refrele(sire); 5192 ill_refrele(dst_ill); 5193 ipif_refrele(src_ipif); 5194 return; 5195 5196 case IRE_IF_RESOLVER: 5197 /* 5198 * We can't build an IRE_CACHE yet, but at least we 5199 * found a resolver that can help. 5200 */ 5201 dst = *v6dstp; 5202 5203 /* 5204 * To be at this point in the code with a non-zero gw 5205 * means that dst is reachable through a gateway that 5206 * we have never resolved. By changing dst to the gw 5207 * addr we resolve the gateway first. When 5208 * ire_add_then_send() tries to put the IP dg to dst, 5209 * it will reenter ip_newroute() at which time we will 5210 * find the IRE_CACHE for the gw and create another 5211 * IRE_CACHE above (for dst itself). 5212 */ 5213 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5214 save_dst = dst; 5215 dst = v6gw; 5216 v6gw = ipv6_all_zeros; 5217 } 5218 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5219 /* 5220 * Ask the external resolver to do its thing. 5221 * Make an mblk chain in the following form: 5222 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5223 */ 5224 mblk_t *ire_mp; 5225 mblk_t *areq_mp; 5226 areq_t *areq; 5227 in6_addr_t *addrp; 5228 5229 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5230 if (ip6_asp_table_held) { 5231 ip6_asp_table_refrele(); 5232 ip6_asp_table_held = B_FALSE; 5233 } 5234 ire = ire_create_mp_v6( 5235 &dst, /* dest address */ 5236 &ipv6_all_ones, /* mask */ 5237 &src_ipif->ipif_v6src_addr, 5238 /* source address */ 5239 &v6gw, /* gateway address */ 5240 NULL, /* Fast Path header */ 5241 dst_ill->ill_rq, /* recv-from queue */ 5242 dst_ill->ill_wq, /* send-to queue */ 5243 IRE_CACHE, 5244 NULL, 5245 src_ipif, 5246 &save_ire->ire_mask_v6, 5247 /* Parent mask */ 5248 0, 5249 save_ire->ire_ihandle, 5250 /* Interface handle */ 5251 0, /* flags if any */ 5252 &(save_ire->ire_uinfo), 5253 NULL, 5254 NULL); 5255 5256 ire_refrele(save_ire); 5257 if (ire == NULL) { 5258 ip1dbg(("ip_newroute_v6:" 5259 "ire is NULL\n")); 5260 break; 5261 } 5262 5263 if ((sire != NULL) && 5264 (sire->ire_flags & RTF_MULTIRT)) { 5265 /* 5266 * processing a copy of the packet to 5267 * send for further resolution loops 5268 */ 5269 copy_mp = copymsg(first_mp); 5270 if (copy_mp != NULL) 5271 MULTIRT_DEBUG_TAG(copy_mp); 5272 } 5273 ire->ire_marks |= ire_marks; 5274 ire_mp = ire->ire_mp; 5275 /* 5276 * Now create or find an nce for this interface. 5277 * The hw addr will need to to be set from 5278 * the reply to the AR_ENTRY_QUERY that 5279 * we're about to send. This will be done in 5280 * ire_add_v6(). 5281 */ 5282 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5283 switch (err) { 5284 case 0: 5285 /* 5286 * New cache entry created. 5287 * Break, then ask the external 5288 * resolver. 5289 */ 5290 break; 5291 case EINPROGRESS: 5292 /* 5293 * Resolution in progress; 5294 * packet has been queued by 5295 * ndp_resolver(). 5296 */ 5297 ire_delete(ire); 5298 ire = NULL; 5299 /* 5300 * Check if another multirt 5301 * route must be resolved. 5302 */ 5303 if (copy_mp != NULL) { 5304 /* 5305 * If we found a resolver, we 5306 * ignore any trailing top 5307 * priority IRE_CACHE in 5308 * further loops. The reason is 5309 * the same as for noresolver. 5310 */ 5311 multirt_flags &= 5312 ~MULTIRT_CACHEGW; 5313 /* 5314 * Search for the next 5315 * unresolved multirt route. 5316 */ 5317 first_mp = copy_mp; 5318 copy_mp = NULL; 5319 mp = first_mp; 5320 if (mp->b_datap->db_type == 5321 M_CTL) { 5322 mp = mp->b_cont; 5323 } 5324 ASSERT(sire != NULL); 5325 dst = save_dst; 5326 /* 5327 * re-enter the loop 5328 */ 5329 multirt_resolve_next = 5330 B_TRUE; 5331 continue; 5332 } 5333 5334 if (sire != NULL) 5335 ire_refrele(sire); 5336 ill_refrele(dst_ill); 5337 ipif_refrele(src_ipif); 5338 return; 5339 default: 5340 /* 5341 * Transient error; packet will be 5342 * freed. 5343 */ 5344 ire_delete(ire); 5345 ire = NULL; 5346 break; 5347 } 5348 if (err != 0) 5349 break; 5350 /* 5351 * Now set up the AR_ENTRY_QUERY and send it. 5352 */ 5353 areq_mp = ill_arp_alloc(dst_ill, 5354 (uchar_t *)&ipv6_areq_template, 5355 (caddr_t)&dst); 5356 if (areq_mp == NULL) { 5357 ip1dbg(("ip_newroute_v6:" 5358 "areq_mp is NULL\n")); 5359 freemsg(ire_mp); 5360 break; 5361 } 5362 areq = (areq_t *)areq_mp->b_rptr; 5363 addrp = (in6_addr_t *)((char *)areq + 5364 areq->areq_target_addr_offset); 5365 *addrp = dst; 5366 addrp = (in6_addr_t *)((char *)areq + 5367 areq->areq_sender_addr_offset); 5368 *addrp = src_ipif->ipif_v6src_addr; 5369 /* 5370 * link the chain, then send up to the resolver. 5371 */ 5372 linkb(areq_mp, ire_mp); 5373 linkb(areq_mp, mp); 5374 ip1dbg(("ip_newroute_v6:" 5375 "putnext to resolver\n")); 5376 putnext(dst_ill->ill_rq, areq_mp); 5377 /* 5378 * Check if another multirt route 5379 * must be resolved. 5380 */ 5381 ire = NULL; 5382 if (copy_mp != NULL) { 5383 /* 5384 * If we find a resolver, we ignore any 5385 * trailing top priority IRE_CACHE in 5386 * further loops. The reason is the 5387 * same as for noresolver. 5388 */ 5389 multirt_flags &= ~MULTIRT_CACHEGW; 5390 /* 5391 * Search for the next unresolved 5392 * multirt route. 5393 */ 5394 first_mp = copy_mp; 5395 copy_mp = NULL; 5396 mp = first_mp; 5397 if (mp->b_datap->db_type == M_CTL) { 5398 mp = mp->b_cont; 5399 } 5400 ASSERT(sire != NULL); 5401 dst = save_dst; 5402 /* 5403 * re-enter the loop 5404 */ 5405 multirt_resolve_next = B_TRUE; 5406 continue; 5407 } 5408 5409 if (sire != NULL) 5410 ire_refrele(sire); 5411 ill_refrele(dst_ill); 5412 ipif_refrele(src_ipif); 5413 return; 5414 } 5415 /* 5416 * Non-external resolver case. 5417 * 5418 * TSol note: Please see the note above the 5419 * IRE_IF_NORESOLVER case. 5420 */ 5421 ga.ga_af = AF_INET6; 5422 ga.ga_addr = dst; 5423 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5424 5425 ire = ire_create_v6( 5426 &dst, /* dest address */ 5427 &ipv6_all_ones, /* mask */ 5428 &src_ipif->ipif_v6src_addr, /* source address */ 5429 &v6gw, /* gateway address */ 5430 &save_ire->ire_max_frag, 5431 NULL, /* Fast Path header */ 5432 dst_ill->ill_rq, /* recv-from queue */ 5433 dst_ill->ill_wq, /* send-to queue */ 5434 IRE_CACHE, 5435 NULL, 5436 src_ipif, 5437 &save_ire->ire_mask_v6, /* Parent mask */ 5438 0, 5439 save_ire->ire_ihandle, /* Interface handle */ 5440 0, /* flags if any */ 5441 &(save_ire->ire_uinfo), 5442 NULL, 5443 gcgrp); 5444 5445 if (ire == NULL) { 5446 if (gcgrp != NULL) { 5447 GCGRP_REFRELE(gcgrp); 5448 gcgrp = NULL; 5449 } 5450 ire_refrele(save_ire); 5451 break; 5452 } 5453 5454 /* reference now held by IRE */ 5455 gcgrp = NULL; 5456 5457 if ((sire != NULL) && 5458 (sire->ire_flags & RTF_MULTIRT)) { 5459 copy_mp = copymsg(first_mp); 5460 if (copy_mp != NULL) 5461 MULTIRT_DEBUG_TAG(copy_mp); 5462 } 5463 5464 ire->ire_marks |= ire_marks; 5465 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5466 switch (err) { 5467 case 0: 5468 /* Prevent save_ire from getting deleted */ 5469 IRB_REFHOLD(save_ire->ire_bucket); 5470 /* Has it been removed already ? */ 5471 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5472 IRB_REFRELE(save_ire->ire_bucket); 5473 ire_refrele(save_ire); 5474 break; 5475 } 5476 5477 /* 5478 * We have a resolved cache entry, 5479 * add in the IRE. 5480 */ 5481 ire_add_then_send(q, ire, first_mp); 5482 if (ip6_asp_table_held) { 5483 ip6_asp_table_refrele(); 5484 ip6_asp_table_held = B_FALSE; 5485 } 5486 5487 /* Assert that it is not deleted yet. */ 5488 ASSERT(save_ire->ire_ptpn != NULL); 5489 IRB_REFRELE(save_ire->ire_bucket); 5490 ire_refrele(save_ire); 5491 /* 5492 * Check if another multirt route 5493 * must be resolved. 5494 */ 5495 ire = NULL; 5496 if (copy_mp != NULL) { 5497 /* 5498 * If we find a resolver, we ignore any 5499 * trailing top priority IRE_CACHE in 5500 * further loops. The reason is the 5501 * same as for noresolver. 5502 */ 5503 multirt_flags &= ~MULTIRT_CACHEGW; 5504 /* 5505 * Search for the next unresolved 5506 * multirt route. 5507 */ 5508 first_mp = copy_mp; 5509 copy_mp = NULL; 5510 mp = first_mp; 5511 if (mp->b_datap->db_type == M_CTL) { 5512 mp = mp->b_cont; 5513 } 5514 ASSERT(sire != NULL); 5515 dst = save_dst; 5516 /* 5517 * re-enter the loop 5518 */ 5519 multirt_resolve_next = B_TRUE; 5520 continue; 5521 } 5522 5523 if (sire != NULL) 5524 ire_refrele(sire); 5525 ill_refrele(dst_ill); 5526 ipif_refrele(src_ipif); 5527 return; 5528 5529 case EINPROGRESS: 5530 /* 5531 * mp was consumed - presumably queued. 5532 * No need for ire, presumably resolution is 5533 * in progress, and ire will be added when the 5534 * address is resolved. 5535 */ 5536 if (ip6_asp_table_held) { 5537 ip6_asp_table_refrele(); 5538 ip6_asp_table_held = B_FALSE; 5539 } 5540 ASSERT(ire->ire_nce == NULL); 5541 ire_delete(ire); 5542 ire_refrele(save_ire); 5543 /* 5544 * Check if another multirt route 5545 * must be resolved. 5546 */ 5547 ire = NULL; 5548 if (copy_mp != NULL) { 5549 /* 5550 * If we find a resolver, we ignore any 5551 * trailing top priority IRE_CACHE in 5552 * further loops. The reason is the 5553 * same as for noresolver. 5554 */ 5555 multirt_flags &= ~MULTIRT_CACHEGW; 5556 /* 5557 * Search for the next unresolved 5558 * multirt route. 5559 */ 5560 first_mp = copy_mp; 5561 copy_mp = NULL; 5562 mp = first_mp; 5563 if (mp->b_datap->db_type == M_CTL) { 5564 mp = mp->b_cont; 5565 } 5566 ASSERT(sire != NULL); 5567 dst = save_dst; 5568 /* 5569 * re-enter the loop 5570 */ 5571 multirt_resolve_next = B_TRUE; 5572 continue; 5573 } 5574 if (sire != NULL) 5575 ire_refrele(sire); 5576 ill_refrele(dst_ill); 5577 ipif_refrele(src_ipif); 5578 return; 5579 default: 5580 /* Some transient error */ 5581 ASSERT(ire->ire_nce == NULL); 5582 ire_refrele(save_ire); 5583 break; 5584 } 5585 break; 5586 default: 5587 break; 5588 } 5589 if (ip6_asp_table_held) { 5590 ip6_asp_table_refrele(); 5591 ip6_asp_table_held = B_FALSE; 5592 } 5593 } while (multirt_resolve_next); 5594 5595 err_ret: 5596 ip1dbg(("ip_newroute_v6: dropped\n")); 5597 if (src_ipif != NULL) 5598 ipif_refrele(src_ipif); 5599 if (dst_ill != NULL) { 5600 need_rele = B_TRUE; 5601 ill = dst_ill; 5602 } 5603 if (ill != NULL) { 5604 if (mp->b_prev != NULL) { 5605 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 5606 } else { 5607 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 5608 } 5609 5610 if (need_rele) 5611 ill_refrele(ill); 5612 } else { 5613 if (mp->b_prev != NULL) { 5614 BUMP_MIB(&ip6_mib, ipv6InDiscards); 5615 } else { 5616 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 5617 } 5618 } 5619 /* Did this packet originate externally? */ 5620 if (mp->b_prev) { 5621 mp->b_next = NULL; 5622 mp->b_prev = NULL; 5623 } 5624 if (copy_mp != NULL) { 5625 MULTIRT_DEBUG_UNTAG(copy_mp); 5626 freemsg(copy_mp); 5627 } 5628 MULTIRT_DEBUG_UNTAG(first_mp); 5629 freemsg(first_mp); 5630 if (ire != NULL) 5631 ire_refrele(ire); 5632 if (sire != NULL) 5633 ire_refrele(sire); 5634 return; 5635 5636 icmp_err_ret: 5637 if (ip6_asp_table_held) 5638 ip6_asp_table_refrele(); 5639 if (src_ipif != NULL) 5640 ipif_refrele(src_ipif); 5641 if (dst_ill != NULL) { 5642 need_rele = B_TRUE; 5643 ill = dst_ill; 5644 } 5645 ip1dbg(("ip_newroute_v6: no route\n")); 5646 if (sire != NULL) 5647 ire_refrele(sire); 5648 /* 5649 * We need to set sire to NULL to avoid double freeing if we 5650 * ever goto err_ret from below. 5651 */ 5652 sire = NULL; 5653 ip6h = (ip6_t *)mp->b_rptr; 5654 /* Skip ip6i_t header if present */ 5655 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5656 /* Make sure the IPv6 header is present */ 5657 if ((mp->b_wptr - (uchar_t *)ip6h) < 5658 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5659 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5660 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5661 goto err_ret; 5662 } 5663 } 5664 mp->b_rptr += sizeof (ip6i_t); 5665 ip6h = (ip6_t *)mp->b_rptr; 5666 } 5667 /* Did this packet originate externally? */ 5668 if (mp->b_prev) { 5669 if (ill != NULL) { 5670 BUMP_MIB(ill->ill_ip6_mib, ipv6InNoRoutes); 5671 } else { 5672 BUMP_MIB(&ip6_mib, ipv6InNoRoutes); 5673 } 5674 mp->b_next = NULL; 5675 mp->b_prev = NULL; 5676 q = WR(q); 5677 } else { 5678 if (ill != NULL) { 5679 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 5680 } else { 5681 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 5682 } 5683 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5684 /* Failed */ 5685 if (copy_mp != NULL) { 5686 MULTIRT_DEBUG_UNTAG(copy_mp); 5687 freemsg(copy_mp); 5688 } 5689 MULTIRT_DEBUG_UNTAG(first_mp); 5690 freemsg(first_mp); 5691 if (ire != NULL) 5692 ire_refrele(ire); 5693 if (need_rele) 5694 ill_refrele(ill); 5695 return; 5696 } 5697 } 5698 5699 if (need_rele) 5700 ill_refrele(ill); 5701 5702 /* 5703 * At this point we will have ire only if RTF_BLACKHOLE 5704 * or RTF_REJECT flags are set on the IRE. It will not 5705 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5706 */ 5707 if (ire != NULL) { 5708 if (ire->ire_flags & RTF_BLACKHOLE) { 5709 ire_refrele(ire); 5710 if (copy_mp != NULL) { 5711 MULTIRT_DEBUG_UNTAG(copy_mp); 5712 freemsg(copy_mp); 5713 } 5714 MULTIRT_DEBUG_UNTAG(first_mp); 5715 freemsg(first_mp); 5716 return; 5717 } 5718 ire_refrele(ire); 5719 } 5720 if (ip_debug > 3) { 5721 /* ip2dbg */ 5722 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5723 AF_INET6, v6dstp); 5724 } 5725 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5726 B_FALSE, B_FALSE, zoneid); 5727 } 5728 5729 /* 5730 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5731 * we need to send out a packet to a destination address for which we do not 5732 * have specific routing information. It is only used for multicast packets. 5733 * 5734 * If unspec_src we allow creating an IRE with source address zero. 5735 * ire_send_v6() will delete it after the packet is sent. 5736 */ 5737 void 5738 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5739 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5740 { 5741 ire_t *ire = NULL; 5742 ipif_t *src_ipif = NULL; 5743 int err = 0; 5744 ill_t *dst_ill = NULL; 5745 ire_t *save_ire; 5746 ushort_t ire_marks = 0; 5747 ipsec_out_t *io; 5748 ill_t *attach_ill = NULL; 5749 ill_t *ill; 5750 ip6_t *ip6h; 5751 mblk_t *first_mp; 5752 boolean_t ip6i_present; 5753 ire_t *fire = NULL; 5754 mblk_t *copy_mp = NULL; 5755 boolean_t multirt_resolve_next; 5756 in6_addr_t *v6dstp = &v6dst; 5757 boolean_t ipif_held = B_FALSE; 5758 boolean_t ill_held = B_FALSE; 5759 boolean_t ip6_asp_table_held = B_FALSE; 5760 5761 /* 5762 * This loop is run only once in most cases. 5763 * We loop to resolve further routes only when the destination 5764 * can be reached through multiple RTF_MULTIRT-flagged ires. 5765 */ 5766 do { 5767 multirt_resolve_next = B_FALSE; 5768 if (dst_ill != NULL) { 5769 ill_refrele(dst_ill); 5770 dst_ill = NULL; 5771 } 5772 5773 if (src_ipif != NULL) { 5774 ipif_refrele(src_ipif); 5775 src_ipif = NULL; 5776 } 5777 ASSERT(ipif != NULL); 5778 ill = ipif->ipif_ill; 5779 5780 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5781 if (ip_debug > 2) { 5782 /* ip1dbg */ 5783 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5784 AF_INET6, v6dstp); 5785 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5786 ill->ill_name, ipif->ipif_isv6); 5787 } 5788 5789 first_mp = mp; 5790 if (mp->b_datap->db_type == M_CTL) { 5791 mp = mp->b_cont; 5792 io = (ipsec_out_t *)first_mp->b_rptr; 5793 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5794 } else { 5795 io = NULL; 5796 } 5797 5798 /* 5799 * If the interface is a pt-pt interface we look for an 5800 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5801 * local_address and the pt-pt destination address. 5802 * Otherwise we just match the local address. 5803 */ 5804 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5805 goto err_ret; 5806 } 5807 /* 5808 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5809 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5810 * as it could be NULL. 5811 * 5812 * This information can appear either in an ip6i_t or an 5813 * IPSEC_OUT message. 5814 */ 5815 ip6h = (ip6_t *)mp->b_rptr; 5816 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5817 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5818 if (!ip6i_present || 5819 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5820 attach_ill = ip_grab_attach_ill(ill, first_mp, 5821 (ip6i_present ? 5822 ((ip6i_t *)ip6h)->ip6i_ifindex : 5823 io->ipsec_out_ill_index), B_TRUE); 5824 /* Failure case frees things for us. */ 5825 if (attach_ill == NULL) 5826 return; 5827 5828 /* 5829 * Check if we need an ire that will not be 5830 * looked up by anybody else i.e. HIDDEN. 5831 */ 5832 if (ill_is_probeonly(attach_ill)) 5833 ire_marks = IRE_MARK_HIDDEN; 5834 } 5835 } 5836 5837 /* 5838 * We check if an IRE_OFFSUBNET for the addr that goes through 5839 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5840 * RTF_MULTIRT flags must be honored. 5841 */ 5842 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5843 ip2dbg(("ip_newroute_ipif_v6: " 5844 "ipif_lookup_multi_ire_v6(" 5845 "ipif %p, dst %08x) = fire %p\n", 5846 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5847 (void *)fire)); 5848 5849 /* 5850 * If the application specified the ill (ifindex), we still 5851 * load spread. Only if the packets needs to go out specifically 5852 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5853 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5854 * multirouting, then we don't try to use a different ill for 5855 * load spreading. 5856 */ 5857 if (attach_ill == NULL) { 5858 /* 5859 * If the interface belongs to an interface group, 5860 * make sure the next possible interface in the group 5861 * is used. This encourages load spreading among peers 5862 * in an interface group. 5863 * 5864 * Note: While we pick a dst_ill we are really only 5865 * interested in the ill for load spreading. The source 5866 * ipif is determined by source address selection below. 5867 */ 5868 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5869 dst_ill = ipif->ipif_ill; 5870 /* For uniformity do a refhold */ 5871 ill_refhold(dst_ill); 5872 } else { 5873 /* refheld by ip_newroute_get_dst_ill_v6 */ 5874 dst_ill = 5875 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5876 } 5877 if (dst_ill == NULL) { 5878 if (ip_debug > 2) { 5879 pr_addr_dbg("ip_newroute_ipif_v6: " 5880 "no dst ill for dst %s\n", 5881 AF_INET6, v6dstp); 5882 } 5883 goto err_ret; 5884 } 5885 } else { 5886 dst_ill = ipif->ipif_ill; 5887 /* 5888 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5889 * and IPV6_BOUND_PIF case. 5890 */ 5891 ASSERT(dst_ill == attach_ill); 5892 /* attach_ill is already refheld */ 5893 } 5894 /* 5895 * Pick a source address which matches the scope of the 5896 * destination address. 5897 * For RTF_SETSRC routes, the source address is imposed by the 5898 * parent ire (fire). 5899 */ 5900 ASSERT(src_ipif == NULL); 5901 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5902 /* 5903 * Check that the ipif matching the requested source 5904 * address still exists. 5905 */ 5906 src_ipif = 5907 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5908 NULL, zoneid, NULL, NULL, NULL, NULL); 5909 } 5910 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5911 ip6_asp_table_held = B_TRUE; 5912 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5913 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5914 } 5915 5916 if (src_ipif == NULL) { 5917 if (!unspec_src) { 5918 if (ip_debug > 2) { 5919 /* ip1dbg */ 5920 pr_addr_dbg("ip_newroute_ipif_v6: " 5921 "no src for dst %s\n,", 5922 AF_INET6, v6dstp); 5923 printf(" through interface %s\n", 5924 dst_ill->ill_name); 5925 } 5926 goto err_ret; 5927 } 5928 src_ipif = ipif; 5929 ipif_refhold(src_ipif); 5930 } 5931 ire = ipif_to_ire_v6(ipif); 5932 if (ire == NULL) { 5933 if (ip_debug > 2) { 5934 /* ip1dbg */ 5935 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5936 AF_INET6, &ipif->ipif_v6lcl_addr); 5937 printf("ip_newroute_ipif_v6: " 5938 "if %s\n", dst_ill->ill_name); 5939 } 5940 goto err_ret; 5941 } 5942 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5943 goto err_ret; 5944 5945 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5946 5947 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5948 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5949 if (ip_debug > 2) { 5950 /* ip1dbg */ 5951 pr_addr_dbg(" address %s\n", 5952 AF_INET6, &ire->ire_src_addr_v6); 5953 } 5954 save_ire = ire; 5955 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5956 (void *)ire, (void *)ipif)); 5957 5958 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5959 /* 5960 * an IRE_OFFSUBET was looked up 5961 * on that interface. 5962 * this ire has RTF_MULTIRT flag, 5963 * so the resolution loop 5964 * will be re-entered to resolve 5965 * additional routes on other 5966 * interfaces. For that purpose, 5967 * a copy of the packet is 5968 * made at this point. 5969 */ 5970 fire->ire_last_used_time = lbolt; 5971 copy_mp = copymsg(first_mp); 5972 if (copy_mp) { 5973 MULTIRT_DEBUG_TAG(copy_mp); 5974 } 5975 } 5976 5977 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5978 switch (ire->ire_type) { 5979 case IRE_IF_NORESOLVER: { 5980 /* We have what we need to build an IRE_CACHE. */ 5981 mblk_t *dlureq_mp; 5982 5983 /* 5984 * Create a new dlureq_mp with the 5985 * IPv6 gateway address in destination address in the 5986 * DLPI hdr if the physical length is exactly 16 bytes. 5987 */ 5988 ASSERT(dst_ill->ill_isv6); 5989 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5990 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5991 dst_ill->ill_phys_addr_length, 5992 dst_ill->ill_sap, 5993 dst_ill->ill_sap_length); 5994 } else { 5995 dlureq_mp = ill_dlur_gen(NULL, 5996 dst_ill->ill_phys_addr_length, 5997 dst_ill->ill_sap, 5998 dst_ill->ill_sap_length); 5999 } 6000 6001 if (dlureq_mp == NULL) 6002 break; 6003 /* 6004 * The newly created ire will inherit the flags of the 6005 * parent ire, if any. 6006 */ 6007 ire = ire_create_v6( 6008 v6dstp, /* dest address */ 6009 &ipv6_all_ones, /* mask */ 6010 &src_ipif->ipif_v6src_addr, /* source address */ 6011 NULL, /* gateway address */ 6012 &save_ire->ire_max_frag, 6013 NULL, /* Fast Path header */ 6014 dst_ill->ill_rq, /* recv-from queue */ 6015 dst_ill->ill_wq, /* send-to queue */ 6016 IRE_CACHE, 6017 dlureq_mp, 6018 src_ipif, 6019 NULL, 6020 (fire != NULL) ? /* Parent handle */ 6021 fire->ire_phandle : 0, 6022 save_ire->ire_ihandle, /* Interface handle */ 6023 (fire != NULL) ? 6024 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6025 0, 6026 &ire_uinfo_null, 6027 NULL, 6028 NULL); 6029 6030 freeb(dlureq_mp); 6031 6032 if (ire == NULL) { 6033 ire_refrele(save_ire); 6034 break; 6035 } 6036 6037 ire->ire_marks |= ire_marks; 6038 6039 err = ndp_noresolver(dst_ill, v6dstp); 6040 if (err != 0) { 6041 ire_refrele(save_ire); 6042 break; 6043 } 6044 6045 /* Prevent save_ire from getting deleted */ 6046 IRB_REFHOLD(save_ire->ire_bucket); 6047 /* Has it been removed already ? */ 6048 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6049 IRB_REFRELE(save_ire->ire_bucket); 6050 ire_refrele(save_ire); 6051 break; 6052 } 6053 6054 ire_add_then_send(q, ire, first_mp); 6055 if (ip6_asp_table_held) { 6056 ip6_asp_table_refrele(); 6057 ip6_asp_table_held = B_FALSE; 6058 } 6059 6060 /* Assert that it is not deleted yet. */ 6061 ASSERT(save_ire->ire_ptpn != NULL); 6062 IRB_REFRELE(save_ire->ire_bucket); 6063 ire_refrele(save_ire); 6064 if (fire != NULL) { 6065 ire_refrele(fire); 6066 fire = NULL; 6067 } 6068 6069 /* 6070 * The resolution loop is re-entered if we 6071 * actually are in a multirouting case. 6072 */ 6073 if (copy_mp != NULL) { 6074 boolean_t need_resolve = 6075 ire_multirt_need_resolve_v6(v6dstp, 6076 MBLK_GETLABEL(copy_mp)); 6077 if (!need_resolve) { 6078 MULTIRT_DEBUG_UNTAG(copy_mp); 6079 freemsg(copy_mp); 6080 copy_mp = NULL; 6081 } else { 6082 /* 6083 * ipif_lookup_group_v6() calls 6084 * ire_lookup_multi_v6() that uses 6085 * ire_ftable_lookup_v6() to find 6086 * an IRE_INTERFACE for the group. 6087 * In the multirt case, 6088 * ire_lookup_multi_v6() then invokes 6089 * ire_multirt_lookup_v6() to find 6090 * the next resolvable ire. 6091 * As a result, we obtain a new 6092 * interface, derived from the 6093 * next ire. 6094 */ 6095 if (ipif_held) { 6096 ipif_refrele(ipif); 6097 ipif_held = B_FALSE; 6098 } 6099 ipif = ipif_lookup_group_v6(v6dstp, 6100 zoneid); 6101 ip2dbg(("ip_newroute_ipif: " 6102 "multirt dst %08x, ipif %p\n", 6103 ntohl(V4_PART_OF_V6((*v6dstp))), 6104 (void *)ipif)); 6105 if (ipif != NULL) { 6106 ipif_held = B_TRUE; 6107 mp = copy_mp; 6108 copy_mp = NULL; 6109 multirt_resolve_next = 6110 B_TRUE; 6111 continue; 6112 } else { 6113 freemsg(copy_mp); 6114 } 6115 } 6116 } 6117 ill_refrele(dst_ill); 6118 if (ipif_held) { 6119 ipif_refrele(ipif); 6120 ipif_held = B_FALSE; 6121 } 6122 if (src_ipif != NULL) 6123 ipif_refrele(src_ipif); 6124 return; 6125 } 6126 case IRE_IF_RESOLVER: { 6127 6128 ASSERT(dst_ill->ill_isv6); 6129 6130 /* 6131 * We obtain a partial IRE_CACHE which we will pass 6132 * along with the resolver query. When the response 6133 * comes back it will be there ready for us to add. 6134 */ 6135 /* 6136 * the newly created ire will inherit the flags of the 6137 * parent ire, if any. 6138 */ 6139 ire = ire_create_v6( 6140 v6dstp, /* dest address */ 6141 &ipv6_all_ones, /* mask */ 6142 &src_ipif->ipif_v6src_addr, /* source address */ 6143 NULL, /* gateway address */ 6144 &save_ire->ire_max_frag, 6145 NULL, /* Fast Path header */ 6146 dst_ill->ill_rq, /* recv-from queue */ 6147 dst_ill->ill_wq, /* send-to queue */ 6148 IRE_CACHE, 6149 NULL, 6150 src_ipif, 6151 NULL, 6152 (fire != NULL) ? /* Parent handle */ 6153 fire->ire_phandle : 0, 6154 save_ire->ire_ihandle, /* Interface handle */ 6155 (fire != NULL) ? 6156 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6157 0, 6158 &ire_uinfo_null, 6159 NULL, 6160 NULL); 6161 6162 if (ire == NULL) { 6163 ire_refrele(save_ire); 6164 break; 6165 } 6166 6167 ire->ire_marks |= ire_marks; 6168 6169 /* Resolve and add ire to the ctable */ 6170 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6171 switch (err) { 6172 case 0: 6173 /* Prevent save_ire from getting deleted */ 6174 IRB_REFHOLD(save_ire->ire_bucket); 6175 /* Has it been removed already ? */ 6176 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6177 IRB_REFRELE(save_ire->ire_bucket); 6178 ire_refrele(save_ire); 6179 break; 6180 } 6181 /* 6182 * We have a resolved cache entry, 6183 * add in the IRE. 6184 */ 6185 ire_add_then_send(q, ire, first_mp); 6186 if (ip6_asp_table_held) { 6187 ip6_asp_table_refrele(); 6188 ip6_asp_table_held = B_FALSE; 6189 } 6190 6191 /* Assert that it is not deleted yet. */ 6192 ASSERT(save_ire->ire_ptpn != NULL); 6193 IRB_REFRELE(save_ire->ire_bucket); 6194 ire_refrele(save_ire); 6195 if (fire != NULL) { 6196 ire_refrele(fire); 6197 fire = NULL; 6198 } 6199 6200 /* 6201 * The resolution loop is re-entered if we 6202 * actually are in a multirouting case. 6203 */ 6204 if (copy_mp != NULL) { 6205 boolean_t need_resolve = 6206 ire_multirt_need_resolve_v6(v6dstp, 6207 MBLK_GETLABEL(copy_mp)); 6208 if (!need_resolve) { 6209 MULTIRT_DEBUG_UNTAG(copy_mp); 6210 freemsg(copy_mp); 6211 copy_mp = NULL; 6212 } else { 6213 /* 6214 * ipif_lookup_group_v6() calls 6215 * ire_lookup_multi_v6() that 6216 * uses ire_ftable_lookup_v6() 6217 * to find an IRE_INTERFACE for 6218 * the group. In the multirt 6219 * case, ire_lookup_multi_v6() 6220 * then invokes 6221 * ire_multirt_lookup_v6() to 6222 * find the next resolvable ire. 6223 * As a result, we obtain a new 6224 * interface, derived from the 6225 * next ire. 6226 */ 6227 if (ipif_held) { 6228 ipif_refrele(ipif); 6229 ipif_held = B_FALSE; 6230 } 6231 ipif = ipif_lookup_group_v6( 6232 v6dstp, zoneid); 6233 ip2dbg(("ip_newroute_ipif: " 6234 "multirt dst %08x, " 6235 "ipif %p\n", 6236 ntohl(V4_PART_OF_V6( 6237 (*v6dstp))), 6238 (void *)ipif)); 6239 if (ipif != NULL) { 6240 ipif_held = B_TRUE; 6241 mp = copy_mp; 6242 copy_mp = NULL; 6243 multirt_resolve_next = 6244 B_TRUE; 6245 continue; 6246 } else { 6247 freemsg(copy_mp); 6248 } 6249 } 6250 } 6251 ill_refrele(dst_ill); 6252 if (ipif_held) { 6253 ipif_refrele(ipif); 6254 ipif_held = B_FALSE; 6255 } 6256 if (src_ipif != NULL) 6257 ipif_refrele(src_ipif); 6258 return; 6259 6260 case EINPROGRESS: 6261 /* 6262 * mp was consumed - presumably queued. 6263 * No need for ire, presumably resolution is 6264 * in progress, and ire will be added when the 6265 * address is resolved. 6266 */ 6267 if (ip6_asp_table_held) { 6268 ip6_asp_table_refrele(); 6269 ip6_asp_table_held = B_FALSE; 6270 } 6271 ire_delete(ire); 6272 ire_refrele(save_ire); 6273 if (fire != NULL) { 6274 ire_refrele(fire); 6275 fire = NULL; 6276 } 6277 6278 /* 6279 * The resolution loop is re-entered if we 6280 * actually are in a multirouting case. 6281 */ 6282 if (copy_mp != NULL) { 6283 boolean_t need_resolve = 6284 ire_multirt_need_resolve_v6(v6dstp, 6285 MBLK_GETLABEL(copy_mp)); 6286 if (!need_resolve) { 6287 MULTIRT_DEBUG_UNTAG(copy_mp); 6288 freemsg(copy_mp); 6289 copy_mp = NULL; 6290 } else { 6291 /* 6292 * ipif_lookup_group_v6() calls 6293 * ire_lookup_multi_v6() that 6294 * uses ire_ftable_lookup_v6() 6295 * to find an IRE_INTERFACE for 6296 * the group. In the multirt 6297 * case, ire_lookup_multi_v6() 6298 * then invokes 6299 * ire_multirt_lookup_v6() to 6300 * find the next resolvable ire. 6301 * As a result, we obtain a new 6302 * interface, derived from the 6303 * next ire. 6304 */ 6305 if (ipif_held) { 6306 ipif_refrele(ipif); 6307 ipif_held = B_FALSE; 6308 } 6309 ipif = ipif_lookup_group_v6( 6310 v6dstp, zoneid); 6311 ip2dbg(("ip_newroute_ipif: " 6312 "multirt dst %08x, " 6313 "ipif %p\n", 6314 ntohl(V4_PART_OF_V6( 6315 (*v6dstp))), 6316 (void *)ipif)); 6317 if (ipif != NULL) { 6318 ipif_held = B_TRUE; 6319 mp = copy_mp; 6320 copy_mp = NULL; 6321 multirt_resolve_next = 6322 B_TRUE; 6323 continue; 6324 } else { 6325 freemsg(copy_mp); 6326 } 6327 } 6328 } 6329 ill_refrele(dst_ill); 6330 if (ipif_held) { 6331 ipif_refrele(ipif); 6332 ipif_held = B_FALSE; 6333 } 6334 if (src_ipif != NULL) 6335 ipif_refrele(src_ipif); 6336 return; 6337 default: 6338 /* Some transient error */ 6339 ire_refrele(save_ire); 6340 break; 6341 } 6342 break; 6343 } 6344 default: 6345 break; 6346 } 6347 if (ip6_asp_table_held) { 6348 ip6_asp_table_refrele(); 6349 ip6_asp_table_held = B_FALSE; 6350 } 6351 } while (multirt_resolve_next); 6352 6353 err_ret: 6354 if (ip6_asp_table_held) 6355 ip6_asp_table_refrele(); 6356 if (ire != NULL) 6357 ire_refrele(ire); 6358 if (fire != NULL) 6359 ire_refrele(fire); 6360 if (ipif != NULL && ipif_held) 6361 ipif_refrele(ipif); 6362 if (src_ipif != NULL) 6363 ipif_refrele(src_ipif); 6364 /* Multicast - no point in trying to generate ICMP error */ 6365 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6366 if (dst_ill != NULL) { 6367 ill = dst_ill; 6368 ill_held = B_TRUE; 6369 } 6370 if (mp->b_prev || mp->b_next) { 6371 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6372 } else { 6373 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 6374 } 6375 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6376 mp->b_next = NULL; 6377 mp->b_prev = NULL; 6378 freemsg(first_mp); 6379 if (ill_held) 6380 ill_refrele(ill); 6381 } 6382 6383 /* 6384 * Parse and process any hop-by-hop or destination options. 6385 * 6386 * Assumes that q is an ill read queue so that ICMP errors for link-local 6387 * destinations are sent out the correct interface. 6388 * 6389 * Returns -1 if there was an error and mp has been consumed. 6390 * Returns 0 if no special action is needed. 6391 * Returns 1 if the packet contained a router alert option for this node 6392 * which is verified to be "interesting/known" for our implementation. 6393 * 6394 * XXX Note: In future as more hbh or dest options are defined, 6395 * it may be better to have different routines for hbh and dest 6396 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6397 * may have same value in different namespaces. Or is it same namespace ?? 6398 * Current code checks for each opt_type (other than pads) if it is in 6399 * the expected nexthdr (hbh or dest) 6400 */ 6401 static int 6402 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6403 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6404 { 6405 uint8_t opt_type; 6406 uint_t optused; 6407 int ret = 0; 6408 mblk_t *first_mp; 6409 const char *errtype; 6410 zoneid_t zoneid; 6411 ill_t *ill = q->q_ptr; 6412 6413 first_mp = mp; 6414 if (mp->b_datap->db_type == M_CTL) { 6415 mp = mp->b_cont; 6416 } 6417 6418 while (optlen != 0) { 6419 opt_type = *optptr; 6420 if (opt_type == IP6OPT_PAD1) { 6421 optused = 1; 6422 } else { 6423 if (optlen < 2) 6424 goto bad_opt; 6425 errtype = "malformed"; 6426 if (opt_type == ip6opt_ls) { 6427 optused = 2 + optptr[1]; 6428 if (optused > optlen) 6429 goto bad_opt; 6430 } else switch (opt_type) { 6431 case IP6OPT_PADN: 6432 /* 6433 * Note:We don't verify that (N-2) pad octets 6434 * are zero as required by spec. Adhere to 6435 * "be liberal in what you accept..." part of 6436 * implementation philosophy (RFC791,RFC1122) 6437 */ 6438 optused = 2 + optptr[1]; 6439 if (optused > optlen) 6440 goto bad_opt; 6441 break; 6442 6443 case IP6OPT_JUMBO: 6444 if (hdr_type != IPPROTO_HOPOPTS) 6445 goto opt_error; 6446 goto opt_error; /* XXX Not implemented! */ 6447 6448 case IP6OPT_ROUTER_ALERT: { 6449 struct ip6_opt_router *or; 6450 6451 if (hdr_type != IPPROTO_HOPOPTS) 6452 goto opt_error; 6453 optused = 2 + optptr[1]; 6454 if (optused > optlen) 6455 goto bad_opt; 6456 or = (struct ip6_opt_router *)optptr; 6457 /* Check total length and alignment */ 6458 if (optused != sizeof (*or) || 6459 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6460 goto opt_error; 6461 /* Check value */ 6462 switch (*((uint16_t *)or->ip6or_value)) { 6463 case IP6_ALERT_MLD: 6464 case IP6_ALERT_RSVP: 6465 ret = 1; 6466 } 6467 break; 6468 } 6469 case IP6OPT_HOME_ADDRESS: { 6470 /* 6471 * Minimal support for the home address option 6472 * (which is required by all IPv6 nodes). 6473 * Implement by just swapping the home address 6474 * and source address. 6475 * XXX Note: this has IPsec implications since 6476 * AH needs to take this into account. 6477 * Also, when IPsec is used we need to ensure 6478 * that this is only processed once 6479 * in the received packet (to avoid swapping 6480 * back and forth). 6481 * NOTE:This option processing is considered 6482 * to be unsafe and prone to a denial of 6483 * service attack. 6484 * The current processing is not safe even with 6485 * IPsec secured IP packets. Since the home 6486 * address option processing requirement still 6487 * is in the IETF draft and in the process of 6488 * being redefined for its usage, it has been 6489 * decided to turn off the option by default. 6490 * If this section of code needs to be executed, 6491 * ndd variable ip6_ignore_home_address_opt 6492 * should be set to 0 at the user's own risk. 6493 */ 6494 struct ip6_opt_home_address *oh; 6495 in6_addr_t tmp; 6496 6497 if (ipv6_ignore_home_address_opt) 6498 goto opt_error; 6499 6500 if (hdr_type != IPPROTO_DSTOPTS) 6501 goto opt_error; 6502 optused = 2 + optptr[1]; 6503 if (optused > optlen) 6504 goto bad_opt; 6505 6506 /* 6507 * We did this dest. opt the first time 6508 * around (i.e. before AH processing). 6509 * If we've done AH... stop now. 6510 */ 6511 if (first_mp != mp) { 6512 ipsec_in_t *ii; 6513 6514 ii = (ipsec_in_t *)first_mp->b_rptr; 6515 if (ii->ipsec_in_ah_sa != NULL) 6516 break; 6517 } 6518 6519 oh = (struct ip6_opt_home_address *)optptr; 6520 /* Check total length and alignment */ 6521 if (optused < sizeof (*oh) || 6522 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6523 goto opt_error; 6524 /* Swap ip6_src and the home address */ 6525 tmp = ip6h->ip6_src; 6526 /* XXX Note: only 8 byte alignment option */ 6527 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6528 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6529 break; 6530 } 6531 6532 case IP6OPT_TUNNEL_LIMIT: 6533 if (hdr_type != IPPROTO_DSTOPTS) { 6534 goto opt_error; 6535 } 6536 optused = 2 + optptr[1]; 6537 if (optused > optlen) { 6538 goto bad_opt; 6539 } 6540 if (optused != 3) { 6541 goto opt_error; 6542 } 6543 break; 6544 6545 default: 6546 errtype = "unknown"; 6547 /* FALLTHROUGH */ 6548 opt_error: 6549 /* Determine which zone should send error */ 6550 zoneid = ipif_lookup_addr_zoneid_v6( 6551 &ip6h->ip6_dst, ill); 6552 switch (IP6OPT_TYPE(opt_type)) { 6553 case IP6OPT_TYPE_SKIP: 6554 optused = 2 + optptr[1]; 6555 if (optused > optlen) 6556 goto bad_opt; 6557 ip1dbg(("ip_process_options_v6: %s " 6558 "opt 0x%x skipped\n", 6559 errtype, opt_type)); 6560 break; 6561 case IP6OPT_TYPE_DISCARD: 6562 ip1dbg(("ip_process_options_v6: %s " 6563 "opt 0x%x; packet dropped\n", 6564 errtype, opt_type)); 6565 freemsg(first_mp); 6566 return (-1); 6567 case IP6OPT_TYPE_ICMP: 6568 if (zoneid == ALL_ZONES) { 6569 freemsg(first_mp); 6570 return (-1); 6571 } 6572 icmp_param_problem_v6(WR(q), first_mp, 6573 ICMP6_PARAMPROB_OPTION, 6574 (uint32_t)(optptr - 6575 (uint8_t *)ip6h), 6576 B_FALSE, B_FALSE, zoneid); 6577 return (-1); 6578 case IP6OPT_TYPE_FORCEICMP: 6579 if (zoneid == ALL_ZONES) { 6580 freemsg(first_mp); 6581 return (-1); 6582 } 6583 icmp_param_problem_v6(WR(q), first_mp, 6584 ICMP6_PARAMPROB_OPTION, 6585 (uint32_t)(optptr - 6586 (uint8_t *)ip6h), 6587 B_FALSE, B_TRUE, zoneid); 6588 return (-1); 6589 default: 6590 ASSERT(0); 6591 } 6592 } 6593 } 6594 optlen -= optused; 6595 optptr += optused; 6596 } 6597 return (ret); 6598 6599 bad_opt: 6600 /* Determine which zone should send error */ 6601 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 6602 if (zoneid == ALL_ZONES) { 6603 freemsg(first_mp); 6604 } else { 6605 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6606 (uint32_t)(optptr - (uint8_t *)ip6h), 6607 B_FALSE, B_FALSE, zoneid); 6608 } 6609 return (-1); 6610 } 6611 6612 /* 6613 * Process a routing header that is not yet empty. 6614 * Only handles type 0 routing headers. 6615 */ 6616 static void 6617 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6618 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6619 { 6620 ip6_rthdr0_t *rthdr; 6621 uint_t ehdrlen; 6622 uint_t numaddr; 6623 in6_addr_t *addrptr; 6624 in6_addr_t tmp; 6625 6626 ASSERT(rth->ip6r_segleft != 0); 6627 6628 if (!ipv6_forward_src_routed) { 6629 /* XXX Check for source routed out same interface? */ 6630 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 6631 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 6632 freemsg(hada_mp); 6633 freemsg(mp); 6634 return; 6635 } 6636 6637 if (rth->ip6r_type != 0) { 6638 if (hada_mp != NULL) 6639 goto hada_drop; 6640 /* Sent by forwarding path, and router is global zone */ 6641 icmp_param_problem_v6(WR(q), mp, 6642 ICMP6_PARAMPROB_HEADER, 6643 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6644 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6645 return; 6646 } 6647 rthdr = (ip6_rthdr0_t *)rth; 6648 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6649 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6650 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6651 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6652 if (rthdr->ip6r0_len & 0x1) { 6653 /* An odd length is impossible */ 6654 if (hada_mp != NULL) 6655 goto hada_drop; 6656 /* Sent by forwarding path, and router is global zone */ 6657 icmp_param_problem_v6(WR(q), mp, 6658 ICMP6_PARAMPROB_HEADER, 6659 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6660 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6661 return; 6662 } 6663 numaddr = rthdr->ip6r0_len / 2; 6664 if (rthdr->ip6r0_segleft > numaddr) { 6665 /* segleft exceeds number of addresses in routing header */ 6666 if (hada_mp != NULL) 6667 goto hada_drop; 6668 /* Sent by forwarding path, and router is global zone */ 6669 icmp_param_problem_v6(WR(q), mp, 6670 ICMP6_PARAMPROB_HEADER, 6671 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6672 (uchar_t *)ip6h), 6673 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6674 return; 6675 } 6676 addrptr += (numaddr - rthdr->ip6r0_segleft); 6677 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6678 IN6_IS_ADDR_MULTICAST(addrptr)) { 6679 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6680 freemsg(hada_mp); 6681 freemsg(mp); 6682 return; 6683 } 6684 /* Swap */ 6685 tmp = *addrptr; 6686 *addrptr = ip6h->ip6_dst; 6687 ip6h->ip6_dst = tmp; 6688 rthdr->ip6r0_segleft--; 6689 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6690 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6691 if (hada_mp != NULL) 6692 goto hada_drop; 6693 /* Sent by forwarding path, and router is global zone */ 6694 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6695 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6696 return; 6697 } 6698 if (ip_check_v6_mblk(mp, ill) == 0) { 6699 ip6h = (ip6_t *)mp->b_rptr; 6700 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6701 } 6702 return; 6703 hada_drop: 6704 /* IPsec kstats: bean counter? */ 6705 freemsg(hada_mp); 6706 freemsg(mp); 6707 } 6708 6709 /* 6710 * Read side put procedure for IPv6 module. 6711 */ 6712 void 6713 ip_rput_v6(queue_t *q, mblk_t *mp) 6714 { 6715 mblk_t *first_mp; 6716 mblk_t *hada_mp = NULL; 6717 ip6_t *ip6h; 6718 boolean_t ll_multicast = B_FALSE; 6719 boolean_t mctl_present = B_FALSE; 6720 ill_t *ill; 6721 struct iocblk *iocp; 6722 uint_t flags = 0; 6723 mblk_t *dl_mp; 6724 6725 ill = (ill_t *)q->q_ptr; 6726 if (ill->ill_state_flags & ILL_CONDEMNED) { 6727 union DL_primitives *dl; 6728 6729 dl = (union DL_primitives *)mp->b_rptr; 6730 /* 6731 * Things are opening or closing - only accept DLPI 6732 * ack messages. If the stream is closing and ip_wsrv 6733 * has completed, ip_close is out of the qwait, but has 6734 * not yet completed qprocsoff. Don't proceed any further 6735 * because the ill has been cleaned up and things hanging 6736 * off the ill have been freed. 6737 */ 6738 if ((mp->b_datap->db_type != M_PCPROTO) || 6739 (dl->dl_primitive == DL_UNITDATA_IND)) { 6740 inet_freemsg(mp); 6741 return; 6742 } 6743 } 6744 6745 dl_mp = NULL; 6746 switch (mp->b_datap->db_type) { 6747 case M_DATA: { 6748 int hlen; 6749 uchar_t *ucp; 6750 struct ether_header *eh; 6751 dl_unitdata_ind_t *dui; 6752 6753 /* 6754 * This is a work-around for CR 6451644, a bug in Nemo. It 6755 * should be removed when that problem is fixed. 6756 */ 6757 if (ill->ill_mactype == DL_ETHER && 6758 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6759 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6760 ucp[-2] == (IP6_DL_SAP >> 8)) { 6761 if (hlen >= sizeof (struct ether_vlan_header) && 6762 ucp[-5] == 0 && ucp[-6] == 0x81) 6763 ucp -= sizeof (struct ether_vlan_header); 6764 else 6765 ucp -= sizeof (struct ether_header); 6766 /* 6767 * If it's a group address, then fabricate a 6768 * DL_UNITDATA_IND message. 6769 */ 6770 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6771 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6772 BPRI_HI)) != NULL) { 6773 eh = (struct ether_header *)ucp; 6774 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6775 DB_TYPE(dl_mp) = M_PROTO; 6776 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6777 dui->dl_primitive = DL_UNITDATA_IND; 6778 dui->dl_dest_addr_length = 8; 6779 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6780 dui->dl_src_addr_length = 8; 6781 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6782 8; 6783 dui->dl_group_address = 1; 6784 ucp = (uchar_t *)(dui + 1); 6785 if (ill->ill_sap_length > 0) 6786 ucp += ill->ill_sap_length; 6787 bcopy(&eh->ether_dhost, ucp, 6); 6788 bcopy(&eh->ether_shost, ucp + 8, 6); 6789 ucp = (uchar_t *)(dui + 1); 6790 if (ill->ill_sap_length < 0) 6791 ucp += 8 + ill->ill_sap_length; 6792 bcopy(&eh->ether_type, ucp, 2); 6793 bcopy(&eh->ether_type, ucp + 8, 2); 6794 } 6795 } 6796 break; 6797 } 6798 6799 case M_PROTO: 6800 case M_PCPROTO: 6801 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6802 DL_UNITDATA_IND) { 6803 /* Go handle anything other than data elsewhere. */ 6804 ip_rput_dlpi(q, mp); 6805 return; 6806 } 6807 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6808 ll_multicast = dlur->dl_group_address; 6809 #undef dlur 6810 /* Save the DLPI header. */ 6811 dl_mp = mp; 6812 mp = mp->b_cont; 6813 dl_mp->b_cont = NULL; 6814 break; 6815 case M_BREAK: 6816 panic("ip_rput_v6: got an M_BREAK"); 6817 /*NOTREACHED*/ 6818 case M_IOCACK: 6819 iocp = (struct iocblk *)mp->b_rptr; 6820 switch (iocp->ioc_cmd) { 6821 case DL_IOC_HDR_INFO: 6822 ill = (ill_t *)q->q_ptr; 6823 ill_fastpath_ack(ill, mp); 6824 return; 6825 case SIOCSTUNPARAM: 6826 case SIOCGTUNPARAM: 6827 case OSIOCSTUNPARAM: 6828 case OSIOCGTUNPARAM: 6829 /* Go through qwriter */ 6830 break; 6831 default: 6832 putnext(q, mp); 6833 return; 6834 } 6835 /* FALLTHRU */ 6836 case M_ERROR: 6837 case M_HANGUP: 6838 mutex_enter(&ill->ill_lock); 6839 if (ill->ill_state_flags & ILL_CONDEMNED) { 6840 mutex_exit(&ill->ill_lock); 6841 freemsg(mp); 6842 return; 6843 } 6844 ill_refhold_locked(ill); 6845 mutex_exit(&ill->ill_lock); 6846 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6847 return; 6848 case M_CTL: 6849 if ((MBLKL(mp) > sizeof (int)) && 6850 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6851 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6852 mctl_present = B_TRUE; 6853 break; 6854 } 6855 putnext(q, mp); 6856 return; 6857 case M_IOCNAK: 6858 iocp = (struct iocblk *)mp->b_rptr; 6859 switch (iocp->ioc_cmd) { 6860 case DL_IOC_HDR_INFO: 6861 case SIOCSTUNPARAM: 6862 case SIOCGTUNPARAM: 6863 case OSIOCSTUNPARAM: 6864 case OSIOCGTUNPARAM: 6865 mutex_enter(&ill->ill_lock); 6866 if (ill->ill_state_flags & ILL_CONDEMNED) { 6867 mutex_exit(&ill->ill_lock); 6868 freemsg(mp); 6869 return; 6870 } 6871 ill_refhold_locked(ill); 6872 mutex_exit(&ill->ill_lock); 6873 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6874 B_FALSE); 6875 return; 6876 default: 6877 break; 6878 } 6879 /* FALLTHRU */ 6880 default: 6881 putnext(q, mp); 6882 return; 6883 } 6884 6885 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 6886 /* 6887 * if db_ref > 1 then copymsg and free original. Packet may be 6888 * changed and do not want other entity who has a reference to this 6889 * message to trip over the changes. This is a blind change because 6890 * trying to catch all places that might change packet is too 6891 * difficult (since it may be a module above this one). 6892 */ 6893 if (mp->b_datap->db_ref > 1) { 6894 mblk_t *mp1; 6895 6896 mp1 = copymsg(mp); 6897 freemsg(mp); 6898 if (mp1 == NULL) { 6899 first_mp = NULL; 6900 goto discard; 6901 } 6902 mp = mp1; 6903 } 6904 first_mp = mp; 6905 if (mctl_present) { 6906 hada_mp = first_mp; 6907 mp = first_mp->b_cont; 6908 } 6909 6910 if (ip_check_v6_mblk(mp, ill) == -1) 6911 return; 6912 6913 ip6h = (ip6_t *)mp->b_rptr; 6914 6915 DTRACE_PROBE4(ip6__physical__in__start, 6916 ill_t *, ill, ill_t *, NULL, 6917 ip6_t *, ip6h, mblk_t *, first_mp); 6918 6919 FW_HOOKS6(ip6_physical_in_event, ipv6firewall_physical_in, 6920 MSG_FWCOOKED_IN, ill, NULL, ip6h, first_mp, mp); 6921 6922 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6923 6924 if (first_mp == NULL) 6925 return; 6926 6927 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6928 IPV6_DEFAULT_VERS_AND_FLOW) { 6929 /* 6930 * It may be a bit too expensive to do this mapped address 6931 * check here, but in the interest of robustness, it seems 6932 * like the correct place. 6933 * TODO: Avoid this check for e.g. connected TCP sockets 6934 */ 6935 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6936 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6937 goto discard; 6938 } 6939 6940 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6941 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6942 goto discard; 6943 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6944 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6945 goto discard; 6946 } 6947 6948 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6949 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6950 } else { 6951 BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4); 6952 goto discard; 6953 } 6954 freemsg(dl_mp); 6955 return; 6956 6957 discard: 6958 if (dl_mp != NULL) 6959 freeb(dl_mp); 6960 freemsg(first_mp); 6961 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6962 } 6963 6964 /* 6965 * Walk through the IPv6 packet in mp and see if there's an AH header 6966 * in it. See if the AH header needs to get done before other headers in 6967 * the packet. (Worker function for ipsec_early_ah_v6().) 6968 */ 6969 #define IPSEC_HDR_DONT_PROCESS 0 6970 #define IPSEC_HDR_PROCESS 1 6971 #define IPSEC_MEMORY_ERROR 2 6972 static int 6973 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6974 { 6975 uint_t length; 6976 uint_t ehdrlen; 6977 uint8_t *whereptr; 6978 uint8_t *endptr; 6979 uint8_t *nexthdrp; 6980 ip6_dest_t *desthdr; 6981 ip6_rthdr_t *rthdr; 6982 ip6_t *ip6h; 6983 6984 /* 6985 * For now just pullup everything. In general, the less pullups, 6986 * the better, but there's so much squirrelling through anyway, 6987 * it's just easier this way. 6988 */ 6989 if (!pullupmsg(mp, -1)) { 6990 return (IPSEC_MEMORY_ERROR); 6991 } 6992 6993 ip6h = (ip6_t *)mp->b_rptr; 6994 length = IPV6_HDR_LEN; 6995 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6996 endptr = mp->b_wptr; 6997 6998 /* 6999 * We can't just use the argument nexthdr in the place 7000 * of nexthdrp becaue we don't dereference nexthdrp 7001 * till we confirm whether it is a valid address. 7002 */ 7003 nexthdrp = &ip6h->ip6_nxt; 7004 while (whereptr < endptr) { 7005 /* Is there enough left for len + nexthdr? */ 7006 if (whereptr + MIN_EHDR_LEN > endptr) 7007 return (IPSEC_MEMORY_ERROR); 7008 7009 switch (*nexthdrp) { 7010 case IPPROTO_HOPOPTS: 7011 case IPPROTO_DSTOPTS: 7012 /* Assumes the headers are identical for hbh and dst */ 7013 desthdr = (ip6_dest_t *)whereptr; 7014 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7015 if ((uchar_t *)desthdr + ehdrlen > endptr) 7016 return (IPSEC_MEMORY_ERROR); 7017 /* 7018 * Return DONT_PROCESS because of potential Mobile IPv6 7019 * cruft for destination options. 7020 */ 7021 if (*nexthdrp == IPPROTO_DSTOPTS) 7022 return (IPSEC_HDR_DONT_PROCESS); 7023 nexthdrp = &desthdr->ip6d_nxt; 7024 break; 7025 case IPPROTO_ROUTING: 7026 rthdr = (ip6_rthdr_t *)whereptr; 7027 7028 /* 7029 * If there's more hops left on the routing header, 7030 * return now with DON'T PROCESS. 7031 */ 7032 if (rthdr->ip6r_segleft > 0) 7033 return (IPSEC_HDR_DONT_PROCESS); 7034 7035 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7036 if ((uchar_t *)rthdr + ehdrlen > endptr) 7037 return (IPSEC_MEMORY_ERROR); 7038 nexthdrp = &rthdr->ip6r_nxt; 7039 break; 7040 case IPPROTO_FRAGMENT: 7041 /* Wait for reassembly */ 7042 return (IPSEC_HDR_DONT_PROCESS); 7043 case IPPROTO_AH: 7044 *nexthdr = IPPROTO_AH; 7045 return (IPSEC_HDR_PROCESS); 7046 case IPPROTO_NONE: 7047 /* No next header means we're finished */ 7048 default: 7049 return (IPSEC_HDR_DONT_PROCESS); 7050 } 7051 length += ehdrlen; 7052 whereptr += ehdrlen; 7053 } 7054 panic("ipsec_needs_processing_v6"); 7055 /*NOTREACHED*/ 7056 } 7057 7058 /* 7059 * Path for AH if options are present. If this is the first time we are 7060 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7061 * Otherwise, just fanout. Return value answers the boolean question: 7062 * "Did I consume the mblk you sent me?" 7063 * 7064 * Sometimes AH needs to be done before other IPv6 headers for security 7065 * reasons. This function (and its ipsec_needs_processing_v6() above) 7066 * indicates if that is so, and fans out to the appropriate IPsec protocol 7067 * for the datagram passed in. 7068 */ 7069 static boolean_t 7070 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7071 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 7072 { 7073 mblk_t *mp; 7074 uint8_t nexthdr; 7075 ipsec_in_t *ii = NULL; 7076 ah_t *ah; 7077 ipsec_status_t ipsec_rc; 7078 7079 ASSERT((hada_mp == NULL) || (!mctl_present)); 7080 7081 switch (ipsec_needs_processing_v6( 7082 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7083 case IPSEC_MEMORY_ERROR: 7084 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7085 freemsg(hada_mp); 7086 freemsg(first_mp); 7087 return (B_TRUE); 7088 case IPSEC_HDR_DONT_PROCESS: 7089 return (B_FALSE); 7090 } 7091 7092 /* Default means send it to AH! */ 7093 ASSERT(nexthdr == IPPROTO_AH); 7094 if (!mctl_present) { 7095 mp = first_mp; 7096 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 7097 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7098 "allocation failure.\n")); 7099 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7100 freemsg(hada_mp); 7101 freemsg(mp); 7102 return (B_TRUE); 7103 } 7104 /* 7105 * Store the ill_index so that when we come back 7106 * from IPSEC we ride on the same queue. 7107 */ 7108 ii = (ipsec_in_t *)first_mp->b_rptr; 7109 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7110 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7111 first_mp->b_cont = mp; 7112 } 7113 /* 7114 * Cache hardware acceleration info. 7115 */ 7116 if (hada_mp != NULL) { 7117 ASSERT(ii != NULL); 7118 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7119 "caching data attr.\n")); 7120 ii->ipsec_in_accelerated = B_TRUE; 7121 ii->ipsec_in_da = hada_mp; 7122 } 7123 7124 if (!ipsec_loaded()) { 7125 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 7126 return (B_TRUE); 7127 } 7128 7129 ah = ipsec_inbound_ah_sa(first_mp); 7130 if (ah == NULL) 7131 return (B_TRUE); 7132 ASSERT(ii->ipsec_in_ah_sa != NULL); 7133 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7134 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7135 7136 switch (ipsec_rc) { 7137 case IPSEC_STATUS_SUCCESS: 7138 /* we're done with IPsec processing, send it up */ 7139 ip_fanout_proto_again(first_mp, ill, ill, ire); 7140 break; 7141 case IPSEC_STATUS_FAILED: 7142 BUMP_MIB(&ip6_mib, ipv6InDiscards); 7143 break; 7144 case IPSEC_STATUS_PENDING: 7145 /* no action needed */ 7146 break; 7147 } 7148 return (B_TRUE); 7149 } 7150 7151 /* 7152 * Validate the IPv6 mblk for alignment. 7153 */ 7154 int 7155 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7156 { 7157 int pkt_len, ip6_len; 7158 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7159 7160 /* check for alignment and full IPv6 header */ 7161 if (!OK_32PTR((uchar_t *)ip6h) || 7162 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7163 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7164 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7165 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7166 freemsg(mp); 7167 return (-1); 7168 } 7169 ip6h = (ip6_t *)mp->b_rptr; 7170 } 7171 7172 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7173 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7174 7175 if (mp->b_cont == NULL) 7176 pkt_len = mp->b_wptr - mp->b_rptr; 7177 else 7178 pkt_len = msgdsize(mp); 7179 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7180 7181 /* 7182 * Check for bogus (too short packet) and packet which 7183 * was padded by the link layer. 7184 */ 7185 if (ip6_len != pkt_len) { 7186 ssize_t diff; 7187 7188 if (ip6_len > pkt_len) { 7189 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7190 ip6_len, pkt_len)); 7191 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 7192 freemsg(mp); 7193 return (-1); 7194 } 7195 diff = (ssize_t)(pkt_len - ip6_len); 7196 7197 if (!adjmsg(mp, -diff)) { 7198 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7199 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7200 freemsg(mp); 7201 return (-1); 7202 } 7203 } 7204 return (0); 7205 } 7206 7207 /* 7208 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7209 * ip_rput_v6 has already verified alignment, the min length, the version, 7210 * and db_ref = 1. 7211 * 7212 * The ill passed in (the arg named inill) is the ill that the packet 7213 * actually arrived on. We need to remember this when saving the 7214 * input interface index into potential IPV6_PKTINFO data in 7215 * ip_add_info_v6(). 7216 * 7217 * This routine doesn't free dl_mp; that's the caller's responsibility on 7218 * return. (Note that the callers are complex enough that there's no tail 7219 * recursion here anyway.) 7220 */ 7221 void 7222 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7223 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7224 { 7225 ire_t *ire = NULL; 7226 queue_t *rq; 7227 ill_t *ill = inill; 7228 ill_t *outill; 7229 ipif_t *ipif; 7230 uint8_t *whereptr; 7231 uint8_t nexthdr; 7232 uint16_t remlen; 7233 uint_t prev_nexthdr_offset; 7234 uint_t used; 7235 size_t pkt_len; 7236 uint16_t ip6_len; 7237 uint_t hdr_len; 7238 boolean_t mctl_present; 7239 mblk_t *first_mp; 7240 mblk_t *first_mp1; 7241 boolean_t no_forward; 7242 ip6_hbh_t *hbhhdr; 7243 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7244 conn_t *connp; 7245 ilm_t *ilm; 7246 uint32_t ports; 7247 uint_t ipif_id = 0; 7248 zoneid_t zoneid = GLOBAL_ZONEID; 7249 uint16_t hck_flags, reass_hck_flags; 7250 uint32_t reass_sum; 7251 boolean_t cksum_err; 7252 mblk_t *mp1; 7253 7254 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7255 7256 if (hada_mp != NULL) { 7257 /* 7258 * It's an IPsec accelerated packet. 7259 * Keep a pointer to the data attributes around until 7260 * we allocate the ipsecinfo structure. 7261 */ 7262 IPSECHW_DEBUG(IPSECHW_PKT, 7263 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7264 hada_mp->b_cont = NULL; 7265 /* 7266 * Since it is accelerated, it came directly from 7267 * the ill. 7268 */ 7269 ASSERT(mctl_present == B_FALSE); 7270 ASSERT(mp->b_datap->db_type != M_CTL); 7271 } 7272 7273 ip6h = (ip6_t *)mp->b_rptr; 7274 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7275 pkt_len = ip6_len; 7276 7277 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7278 hck_flags = DB_CKSUMFLAGS(mp); 7279 else 7280 hck_flags = 0; 7281 7282 /* Clear checksum flags in case we need to forward */ 7283 DB_CKSUMFLAGS(mp) = 0; 7284 reass_sum = reass_hck_flags = 0; 7285 7286 nexthdr = ip6h->ip6_nxt; 7287 7288 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7289 (uchar_t *)ip6h); 7290 whereptr = (uint8_t *)&ip6h[1]; 7291 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7292 7293 /* Process hop by hop header options */ 7294 if (nexthdr == IPPROTO_HOPOPTS) { 7295 uint_t ehdrlen; 7296 uint8_t *optptr; 7297 7298 if (remlen < MIN_EHDR_LEN) 7299 goto pkt_too_short; 7300 if (mp->b_cont != NULL && 7301 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7302 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7303 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7304 freemsg(hada_mp); 7305 freemsg(first_mp); 7306 return; 7307 } 7308 ip6h = (ip6_t *)mp->b_rptr; 7309 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7310 } 7311 hbhhdr = (ip6_hbh_t *)whereptr; 7312 nexthdr = hbhhdr->ip6h_nxt; 7313 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7314 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7315 7316 if (remlen < ehdrlen) 7317 goto pkt_too_short; 7318 if (mp->b_cont != NULL && 7319 whereptr + ehdrlen > mp->b_wptr) { 7320 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7321 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7322 freemsg(hada_mp); 7323 freemsg(first_mp); 7324 return; 7325 } 7326 ip6h = (ip6_t *)mp->b_rptr; 7327 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7328 hbhhdr = (ip6_hbh_t *)whereptr; 7329 } 7330 7331 optptr = whereptr + 2; 7332 whereptr += ehdrlen; 7333 remlen -= ehdrlen; 7334 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7335 ehdrlen - 2, IPPROTO_HOPOPTS)) { 7336 case -1: 7337 /* 7338 * Packet has been consumed and any 7339 * needed ICMP messages sent. 7340 */ 7341 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7342 freemsg(hada_mp); 7343 return; 7344 case 0: 7345 /* no action needed */ 7346 break; 7347 case 1: 7348 /* Known router alert */ 7349 goto ipv6forus; 7350 } 7351 } 7352 7353 /* 7354 * Attach any necessary label information to this packet. 7355 */ 7356 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7357 if (ip6opt_ls != 0) 7358 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7359 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7360 freemsg(hada_mp); 7361 freemsg(first_mp); 7362 return; 7363 } 7364 7365 /* 7366 * On incoming v6 multicast packets we will bypass the ire table, 7367 * and assume that the read queue corresponds to the targetted 7368 * interface. 7369 * 7370 * The effect of this is the same as the IPv4 original code, but is 7371 * much cleaner I think. See ip_rput for how that was done. 7372 */ 7373 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7374 BUMP_MIB(ill->ill_ip6_mib, ipv6InMcastPkts); 7375 /* 7376 * XXX TODO Give to mrouted to for multicast forwarding. 7377 */ 7378 ILM_WALKER_HOLD(ill); 7379 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7380 ILM_WALKER_RELE(ill); 7381 if (ilm == NULL) { 7382 if (ip_debug > 3) { 7383 /* ip2dbg */ 7384 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7385 " which is not for us: %s\n", AF_INET6, 7386 &ip6h->ip6_dst); 7387 } 7388 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7389 freemsg(hada_mp); 7390 freemsg(first_mp); 7391 return; 7392 } 7393 if (ip_debug > 3) { 7394 /* ip2dbg */ 7395 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7396 AF_INET6, &ip6h->ip6_dst); 7397 } 7398 rq = ill->ill_rq; 7399 zoneid = GLOBAL_ZONEID; 7400 goto ipv6forus; 7401 } 7402 7403 ipif = ill->ill_ipif; 7404 7405 /* 7406 * If a packet was received on an interface that is a 6to4 tunnel, 7407 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7408 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7409 * the 6to4 prefix of the address configured on the receiving interface. 7410 * Otherwise, the packet was delivered to this interface in error and 7411 * the packet must be dropped. 7412 */ 7413 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7414 7415 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7416 &ip6h->ip6_dst)) { 7417 if (ip_debug > 2) { 7418 /* ip1dbg */ 7419 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7420 "addressed packet which is not for us: " 7421 "%s\n", AF_INET6, &ip6h->ip6_dst); 7422 } 7423 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7424 freemsg(first_mp); 7425 return; 7426 } 7427 } 7428 7429 /* 7430 * Find an ire that matches destination. For link-local addresses 7431 * we have to match the ill. 7432 * TBD for site local addresses. 7433 */ 7434 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7435 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7436 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7437 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7438 } else { 7439 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7440 MBLK_GETLABEL(mp)); 7441 } 7442 if (ire == NULL) { 7443 /* 7444 * No matching IRE found. Mark this packet as having 7445 * originated externally. 7446 */ 7447 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7448 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7449 if (!(ill->ill_flags & ILLF_ROUTER)) 7450 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7451 freemsg(hada_mp); 7452 freemsg(first_mp); 7453 return; 7454 } 7455 if (ip6h->ip6_hops <= 1) { 7456 if (hada_mp != NULL) 7457 goto hada_drop; 7458 /* Sent by forwarding path, and router is global zone */ 7459 icmp_time_exceeded_v6(WR(q), first_mp, 7460 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7461 GLOBAL_ZONEID); 7462 return; 7463 } 7464 /* 7465 * Per RFC 3513 section 2.5.2, we must not forward packets with 7466 * an unspecified source address. 7467 */ 7468 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7469 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7470 freemsg(hada_mp); 7471 freemsg(first_mp); 7472 return; 7473 } 7474 mp->b_prev = (mblk_t *)(uintptr_t) 7475 ill->ill_phyint->phyint_ifindex; 7476 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7477 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7478 ALL_ZONES); 7479 return; 7480 } 7481 ipif_id = ire->ire_ipif->ipif_seqid; 7482 /* we have a matching IRE */ 7483 if (ire->ire_stq != NULL) { 7484 ill_group_t *ill_group; 7485 ill_group_t *ire_group; 7486 7487 /* 7488 * To be quicker, we may wish not to chase pointers 7489 * (ire->ire_ipif->ipif_ill...) and instead store the 7490 * forwarding policy in the ire. An unfortunate side- 7491 * effect of this would be requiring an ire flush whenever 7492 * the ILLF_ROUTER flag changes. For now, chase pointers 7493 * once and store in the boolean no_forward. 7494 * 7495 * This appears twice to keep it out of the non-forwarding, 7496 * yes-it's-for-us-on-the-right-interface case. 7497 */ 7498 no_forward = ((ill->ill_flags & 7499 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7500 7501 7502 ASSERT(first_mp == mp); 7503 /* 7504 * This ire has a send-to queue - forward the packet. 7505 */ 7506 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7507 freemsg(hada_mp); 7508 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7509 if (no_forward) 7510 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7511 freemsg(mp); 7512 ire_refrele(ire); 7513 return; 7514 } 7515 if (ip6h->ip6_hops <= 1) { 7516 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7517 /* Sent by forwarding path, and router is global zone */ 7518 icmp_time_exceeded_v6(WR(q), mp, 7519 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7520 GLOBAL_ZONEID); 7521 ire_refrele(ire); 7522 return; 7523 } 7524 /* 7525 * Per RFC 3513 section 2.5.2, we must not forward packets with 7526 * an unspecified source address. 7527 */ 7528 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7529 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7530 freemsg(mp); 7531 ire_refrele(ire); 7532 return; 7533 } 7534 7535 if (is_system_labeled()) { 7536 mblk_t *mp1; 7537 7538 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7539 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7540 freemsg(mp); 7541 ire_refrele(ire); 7542 return; 7543 } 7544 /* Size may have changed */ 7545 mp = mp1; 7546 ip6h = (ip6_t *)mp->b_rptr; 7547 pkt_len = msgdsize(mp); 7548 } 7549 7550 if (pkt_len > ire->ire_max_frag) { 7551 BUMP_MIB(ill->ill_ip6_mib, ipv6InTooBigErrors); 7552 /* Sent by forwarding path, and router is global zone */ 7553 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7554 ll_multicast, B_TRUE, GLOBAL_ZONEID); 7555 ire_refrele(ire); 7556 return; 7557 } 7558 7559 /* 7560 * Check to see if we're forwarding the packet to a 7561 * different link from which it came. If so, check the 7562 * source and destination addresses since routers must not 7563 * forward any packets with link-local source or 7564 * destination addresses to other links. Otherwise (if 7565 * we're forwarding onto the same link), conditionally send 7566 * a redirect message. 7567 */ 7568 ill_group = ill->ill_group; 7569 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7570 if (ire->ire_rfq != q && (ill_group == NULL || 7571 ill_group != ire_group)) { 7572 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7573 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7574 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7575 freemsg(mp); 7576 ire_refrele(ire); 7577 return; 7578 } 7579 /* TBD add site-local check at site boundary? */ 7580 } else if (ipv6_send_redirects) { 7581 in6_addr_t *v6targ; 7582 in6_addr_t gw_addr_v6; 7583 ire_t *src_ire_v6 = NULL; 7584 7585 /* 7586 * Don't send a redirect when forwarding a source 7587 * routed packet. 7588 */ 7589 if (ip_source_routed_v6(ip6h, mp)) 7590 goto forward; 7591 7592 mutex_enter(&ire->ire_lock); 7593 gw_addr_v6 = ire->ire_gateway_addr_v6; 7594 mutex_exit(&ire->ire_lock); 7595 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7596 v6targ = &gw_addr_v6; 7597 /* 7598 * We won't send redirects to a router 7599 * that doesn't have a link local 7600 * address, but will forward. 7601 */ 7602 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7603 BUMP_MIB(ill->ill_ip6_mib, 7604 ipv6InAddrErrors); 7605 goto forward; 7606 } 7607 } else { 7608 v6targ = &ip6h->ip6_dst; 7609 } 7610 7611 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7612 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7613 ALL_ZONES, 0, NULL, 7614 MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7615 7616 if (src_ire_v6 != NULL) { 7617 /* 7618 * The source is directly connected. 7619 */ 7620 mp1 = copymsg(mp); 7621 if (mp1 != NULL) { 7622 icmp_send_redirect_v6(WR(q), 7623 mp1, v6targ, &ip6h->ip6_dst, 7624 ill, B_FALSE); 7625 } 7626 ire_refrele(src_ire_v6); 7627 } 7628 } 7629 7630 forward: 7631 /* Hoplimit verified above */ 7632 ip6h->ip6_hops--; 7633 7634 outill = ire->ire_ipif->ipif_ill; 7635 7636 DTRACE_PROBE4(ip6__forwarding__start, 7637 ill_t *, inill, ill_t *, outill, 7638 ip6_t *, ip6h, mblk_t *, mp); 7639 7640 FW_HOOKS6(ip6_forwarding_event, ipv6firewall_forwarding, 7641 MSG_FWCOOKED_FORWARD, inill, outill, ip6h, mp, mp); 7642 7643 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7644 7645 if (mp != NULL) { 7646 UPDATE_IB_PKT_COUNT(ire); 7647 ire->ire_last_used_time = lbolt; 7648 BUMP_MIB(ill->ill_ip6_mib, ipv6OutForwDatagrams); 7649 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7650 } 7651 IRE_REFRELE(ire); 7652 return; 7653 } 7654 rq = ire->ire_rfq; 7655 7656 /* 7657 * Need to put on correct queue for reassembly to find it. 7658 * No need to use put() since reassembly has its own locks. 7659 * Note: multicast packets and packets destined to addresses 7660 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7661 * the arriving ill. 7662 */ 7663 if (rq != q) { 7664 boolean_t check_multi = B_TRUE; 7665 ill_group_t *ill_group = NULL; 7666 ill_group_t *ire_group = NULL; 7667 ill_t *ire_ill = NULL; 7668 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7669 7670 /* 7671 * To be quicker, we may wish not to chase pointers 7672 * (ire->ire_ipif->ipif_ill...) and instead store the 7673 * forwarding policy in the ire. An unfortunate side- 7674 * effect of this would be requiring an ire flush whenever 7675 * the ILLF_ROUTER flag changes. For now, chase pointers 7676 * once and store in the boolean no_forward. 7677 */ 7678 no_forward = ((ill->ill_flags & 7679 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7680 7681 ill_group = ill->ill_group; 7682 if (rq != NULL) { 7683 ire_ill = (ill_t *)(rq->q_ptr); 7684 ire_group = ire_ill->ill_group; 7685 } 7686 7687 /* 7688 * If it's part of the same IPMP group, or if it's a legal 7689 * address on the 'usesrc' interface, then bypass strict 7690 * checks. 7691 */ 7692 if (ill_group != NULL && ill_group == ire_group) { 7693 check_multi = B_FALSE; 7694 } else if (ill_ifindex != 0 && ire_ill != NULL && 7695 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7696 check_multi = B_FALSE; 7697 } 7698 7699 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7700 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7701 /* 7702 * This packet came in on an interface other than the 7703 * one associated with the destination address 7704 * and we are strict about matches. 7705 * 7706 * As long as the ills belong to the same group, 7707 * we don't consider them to arriving on the wrong 7708 * interface. Thus, when the switch is doing inbound 7709 * load spreading, we won't drop packets when we 7710 * are doing strict multihoming checks. 7711 */ 7712 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7713 freemsg(hada_mp); 7714 freemsg(first_mp); 7715 ire_refrele(ire); 7716 return; 7717 } 7718 7719 if (rq != NULL) 7720 q = rq; 7721 7722 ill = (ill_t *)q->q_ptr; 7723 ASSERT(ill); 7724 } 7725 7726 zoneid = ire->ire_zoneid; 7727 UPDATE_IB_PKT_COUNT(ire); 7728 ire->ire_last_used_time = lbolt; 7729 /* Don't use the ire after this point. */ 7730 ire_refrele(ire); 7731 ipv6forus: 7732 /* 7733 * Looks like this packet is for us one way or another. 7734 * This is where we'll process destination headers etc. 7735 */ 7736 for (; ; ) { 7737 switch (nexthdr) { 7738 case IPPROTO_TCP: { 7739 uint16_t *up; 7740 uint32_t sum; 7741 int offset; 7742 7743 hdr_len = pkt_len - remlen; 7744 7745 if (hada_mp != NULL) { 7746 ip0dbg(("tcp hada drop\n")); 7747 goto hada_drop; 7748 } 7749 7750 7751 /* TCP needs all of the TCP header */ 7752 if (remlen < TCP_MIN_HEADER_LENGTH) 7753 goto pkt_too_short; 7754 if (mp->b_cont != NULL && 7755 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7756 if (!pullupmsg(mp, 7757 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7758 BUMP_MIB(ill->ill_ip6_mib, 7759 ipv6InDiscards); 7760 freemsg(first_mp); 7761 return; 7762 } 7763 hck_flags = 0; 7764 ip6h = (ip6_t *)mp->b_rptr; 7765 whereptr = (uint8_t *)ip6h + hdr_len; 7766 } 7767 /* 7768 * Extract the offset field from the TCP header. 7769 */ 7770 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7771 if (offset != 5) { 7772 if (offset < 5) { 7773 ip1dbg(("ip_rput_data_v6: short " 7774 "TCP data offset")); 7775 BUMP_MIB(ill->ill_ip6_mib, 7776 ipv6InDiscards); 7777 freemsg(first_mp); 7778 return; 7779 } 7780 /* 7781 * There must be TCP options. 7782 * Make sure we can grab them. 7783 */ 7784 offset <<= 2; 7785 if (remlen < offset) 7786 goto pkt_too_short; 7787 if (mp->b_cont != NULL && 7788 whereptr + offset > mp->b_wptr) { 7789 if (!pullupmsg(mp, 7790 hdr_len + offset)) { 7791 BUMP_MIB(ill->ill_ip6_mib, 7792 ipv6InDiscards); 7793 freemsg(first_mp); 7794 return; 7795 } 7796 hck_flags = 0; 7797 ip6h = (ip6_t *)mp->b_rptr; 7798 whereptr = (uint8_t *)ip6h + hdr_len; 7799 } 7800 } 7801 7802 up = (uint16_t *)&ip6h->ip6_src; 7803 /* 7804 * TCP checksum calculation. First sum up the 7805 * pseudo-header fields: 7806 * - Source IPv6 address 7807 * - Destination IPv6 address 7808 * - TCP payload length 7809 * - TCP protocol ID 7810 */ 7811 sum = htons(IPPROTO_TCP + remlen) + 7812 up[0] + up[1] + up[2] + up[3] + 7813 up[4] + up[5] + up[6] + up[7] + 7814 up[8] + up[9] + up[10] + up[11] + 7815 up[12] + up[13] + up[14] + up[15]; 7816 7817 /* Fold initial sum */ 7818 sum = (sum & 0xffff) + (sum >> 16); 7819 7820 mp1 = mp->b_cont; 7821 7822 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7823 IP6_STAT(ip6_in_sw_cksum); 7824 7825 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7826 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7827 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7828 mp, mp1, cksum_err); 7829 7830 if (cksum_err) { 7831 BUMP_MIB(&ip_mib, tcpInErrs); 7832 7833 if (hck_flags & HCK_FULLCKSUM) 7834 IP6_STAT(ip6_tcp_in_full_hw_cksum_err); 7835 else if (hck_flags & HCK_PARTIALCKSUM) 7836 IP6_STAT(ip6_tcp_in_part_hw_cksum_err); 7837 else 7838 IP6_STAT(ip6_tcp_in_sw_cksum_err); 7839 7840 freemsg(first_mp); 7841 return; 7842 } 7843 tcp_fanout: 7844 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7845 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7846 IP_FF_IP6INFO), hdr_len, mctl_present, zoneid); 7847 return; 7848 } 7849 case IPPROTO_SCTP: 7850 { 7851 sctp_hdr_t *sctph; 7852 uint32_t calcsum, pktsum; 7853 uint_t hdr_len = pkt_len - remlen; 7854 7855 /* SCTP needs all of the SCTP header */ 7856 if (remlen < sizeof (*sctph)) { 7857 goto pkt_too_short; 7858 } 7859 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7860 ASSERT(mp->b_cont != NULL); 7861 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7862 BUMP_MIB(ill->ill_ip6_mib, 7863 ipv6InDiscards); 7864 freemsg(mp); 7865 return; 7866 } 7867 ip6h = (ip6_t *)mp->b_rptr; 7868 whereptr = (uint8_t *)ip6h + hdr_len; 7869 } 7870 7871 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7872 /* checksum */ 7873 pktsum = sctph->sh_chksum; 7874 sctph->sh_chksum = 0; 7875 calcsum = sctp_cksum(mp, hdr_len); 7876 if (calcsum != pktsum) { 7877 BUMP_MIB(&sctp_mib, sctpChecksumError); 7878 freemsg(mp); 7879 return; 7880 } 7881 sctph->sh_chksum = pktsum; 7882 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7883 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7884 ports, ipif_id, zoneid, mp)) == NULL) { 7885 ip_fanout_sctp_raw(first_mp, ill, 7886 (ipha_t *)ip6h, B_FALSE, ports, 7887 mctl_present, 7888 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), 7889 B_TRUE, ipif_id, zoneid); 7890 return; 7891 } 7892 BUMP_MIB(&ip_mib, ipInDelivers); 7893 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7894 B_FALSE, mctl_present); 7895 return; 7896 } 7897 case IPPROTO_UDP: { 7898 uint16_t *up; 7899 uint32_t sum; 7900 7901 hdr_len = pkt_len - remlen; 7902 7903 if (hada_mp != NULL) { 7904 ip0dbg(("udp hada drop\n")); 7905 goto hada_drop; 7906 } 7907 7908 /* Verify that at least the ports are present */ 7909 if (remlen < UDPH_SIZE) 7910 goto pkt_too_short; 7911 if (mp->b_cont != NULL && 7912 whereptr + UDPH_SIZE > mp->b_wptr) { 7913 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7914 BUMP_MIB(ill->ill_ip6_mib, 7915 ipv6InDiscards); 7916 freemsg(first_mp); 7917 return; 7918 } 7919 hck_flags = 0; 7920 ip6h = (ip6_t *)mp->b_rptr; 7921 whereptr = (uint8_t *)ip6h + hdr_len; 7922 } 7923 7924 /* 7925 * Before going through the regular checksum 7926 * calculation, make sure the received checksum 7927 * is non-zero. RFC 2460 says, a 0x0000 checksum 7928 * in a UDP packet (within IPv6 packet) is invalid 7929 * and should be replaced by 0xffff. This makes 7930 * sense as regular checksum calculation will 7931 * pass for both the cases i.e. 0x0000 and 0xffff. 7932 * Removing one of the case makes error detection 7933 * stronger. 7934 */ 7935 7936 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7937 /* 0x0000 checksum is invalid */ 7938 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7939 "checksum value 0x0000\n")); 7940 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7941 freemsg(first_mp); 7942 return; 7943 } 7944 7945 up = (uint16_t *)&ip6h->ip6_src; 7946 7947 /* 7948 * UDP checksum calculation. First sum up the 7949 * pseudo-header fields: 7950 * - Source IPv6 address 7951 * - Destination IPv6 address 7952 * - UDP payload length 7953 * - UDP protocol ID 7954 */ 7955 7956 sum = htons(IPPROTO_UDP + remlen) + 7957 up[0] + up[1] + up[2] + up[3] + 7958 up[4] + up[5] + up[6] + up[7] + 7959 up[8] + up[9] + up[10] + up[11] + 7960 up[12] + up[13] + up[14] + up[15]; 7961 7962 /* Fold initial sum */ 7963 sum = (sum & 0xffff) + (sum >> 16); 7964 7965 if (reass_hck_flags != 0) { 7966 hck_flags = reass_hck_flags; 7967 7968 IP_CKSUM_RECV_REASS(hck_flags, 7969 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7970 sum, reass_sum, cksum_err); 7971 } else { 7972 mp1 = mp->b_cont; 7973 7974 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7975 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7976 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7977 mp, mp1, cksum_err); 7978 } 7979 7980 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7981 IP6_STAT(ip6_in_sw_cksum); 7982 7983 if (cksum_err) { 7984 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7985 7986 if (hck_flags & HCK_FULLCKSUM) 7987 IP6_STAT(ip6_udp_in_full_hw_cksum_err); 7988 else if (hck_flags & HCK_PARTIALCKSUM) 7989 IP6_STAT(ip6_udp_in_part_hw_cksum_err); 7990 else 7991 IP6_STAT(ip6_udp_in_sw_cksum_err); 7992 7993 freemsg(first_mp); 7994 return; 7995 } 7996 goto udp_fanout; 7997 } 7998 case IPPROTO_ICMPV6: { 7999 uint16_t *up; 8000 uint32_t sum; 8001 uint_t hdr_len = pkt_len - remlen; 8002 8003 if (hada_mp != NULL) { 8004 ip0dbg(("icmp hada drop\n")); 8005 goto hada_drop; 8006 } 8007 8008 up = (uint16_t *)&ip6h->ip6_src; 8009 sum = htons(IPPROTO_ICMPV6 + remlen) + 8010 up[0] + up[1] + up[2] + up[3] + 8011 up[4] + up[5] + up[6] + up[7] + 8012 up[8] + up[9] + up[10] + up[11] + 8013 up[12] + up[13] + up[14] + up[15]; 8014 sum = (sum & 0xffff) + (sum >> 16); 8015 sum = IP_CSUM(mp, hdr_len, sum); 8016 if (sum != 0) { 8017 /* IPv6 ICMP checksum failed */ 8018 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 8019 "failed %x\n", 8020 sum)); 8021 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 8022 BUMP_MIB(ill->ill_icmp6_mib, 8023 ipv6IfIcmpInErrors); 8024 freemsg(first_mp); 8025 return; 8026 } 8027 8028 icmp_fanout: 8029 /* Check variable for testing applications */ 8030 if (ipv6_drop_inbound_icmpv6) { 8031 freemsg(first_mp); 8032 return; 8033 } 8034 /* 8035 * Assume that there is always at least one conn for 8036 * ICMPv6 (in.ndpd) i.e. don't optimize the case 8037 * where there is no conn. 8038 */ 8039 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8040 ASSERT(!(ill->ill_phyint->phyint_flags & 8041 PHYI_LOOPBACK)); 8042 /* 8043 * In the multicast case, applications may have 8044 * joined the group from different zones, so we 8045 * need to deliver the packet to each of them. 8046 * Loop through the multicast memberships 8047 * structures (ilm) on the receive ill and send 8048 * a copy of the packet up each matching one. 8049 */ 8050 ILM_WALKER_HOLD(ill); 8051 for (ilm = ill->ill_ilm; ilm != NULL; 8052 ilm = ilm->ilm_next) { 8053 if (ilm->ilm_flags & ILM_DELETED) 8054 continue; 8055 if (!IN6_ARE_ADDR_EQUAL( 8056 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 8057 continue; 8058 if (!ipif_lookup_zoneid(ill, 8059 ilm->ilm_zoneid, IPIF_UP, NULL)) 8060 continue; 8061 8062 first_mp1 = ip_copymsg(first_mp); 8063 if (first_mp1 == NULL) 8064 continue; 8065 icmp_inbound_v6(q, first_mp1, ill, 8066 hdr_len, mctl_present, 0, 8067 ilm->ilm_zoneid, dl_mp); 8068 } 8069 ILM_WALKER_RELE(ill); 8070 } else { 8071 first_mp1 = ip_copymsg(first_mp); 8072 if (first_mp1 != NULL) 8073 icmp_inbound_v6(q, first_mp1, ill, 8074 hdr_len, mctl_present, 0, zoneid, 8075 dl_mp); 8076 } 8077 } 8078 /* FALLTHRU */ 8079 default: { 8080 /* 8081 * Handle protocols with which IPv6 is less intimate. 8082 */ 8083 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IP6INFO; 8084 8085 if (hada_mp != NULL) { 8086 ip0dbg(("default hada drop\n")); 8087 goto hada_drop; 8088 } 8089 8090 /* 8091 * Enable sending ICMP for "Unknown" nexthdr 8092 * case. i.e. where we did not FALLTHRU from 8093 * IPPROTO_ICMPV6 processing case above. 8094 * If we did FALLTHRU, then the packet has already been 8095 * processed for IPPF, don't process it again in 8096 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8097 * flags 8098 */ 8099 if (nexthdr != IPPROTO_ICMPV6) 8100 proto_flags |= IP_FF_SEND_ICMP; 8101 else 8102 proto_flags |= IP6_NO_IPPOLICY; 8103 8104 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8105 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8106 mctl_present, zoneid); 8107 return; 8108 } 8109 8110 case IPPROTO_DSTOPTS: { 8111 uint_t ehdrlen; 8112 uint8_t *optptr; 8113 ip6_dest_t *desthdr; 8114 8115 /* Check if AH is present. */ 8116 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8117 ire, hada_mp, zoneid)) { 8118 ip0dbg(("dst early hada drop\n")); 8119 return; 8120 } 8121 8122 /* 8123 * Reinitialize pointers, as ipsec_early_ah_v6() does 8124 * complete pullups. We don't have to do more pullups 8125 * as a result. 8126 */ 8127 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8128 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8129 ip6h = (ip6_t *)mp->b_rptr; 8130 8131 if (remlen < MIN_EHDR_LEN) 8132 goto pkt_too_short; 8133 8134 desthdr = (ip6_dest_t *)whereptr; 8135 nexthdr = desthdr->ip6d_nxt; 8136 prev_nexthdr_offset = (uint_t)(whereptr - 8137 (uint8_t *)ip6h); 8138 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8139 if (remlen < ehdrlen) 8140 goto pkt_too_short; 8141 optptr = whereptr + 2; 8142 /* 8143 * Note: XXX This code does not seem to make 8144 * distinction between Destination Options Header 8145 * being before/after Routing Header which can 8146 * happen if we are at the end of source route. 8147 * This may become significant in future. 8148 * (No real significant Destination Options are 8149 * defined/implemented yet ). 8150 */ 8151 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8152 ehdrlen - 2, IPPROTO_DSTOPTS)) { 8153 case -1: 8154 /* 8155 * Packet has been consumed and any needed 8156 * ICMP errors sent. 8157 */ 8158 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8159 freemsg(hada_mp); 8160 return; 8161 case 0: 8162 /* No action needed continue */ 8163 break; 8164 case 1: 8165 /* 8166 * Unnexpected return value 8167 * (Router alert is a Hop-by-Hop option) 8168 */ 8169 #ifdef DEBUG 8170 panic("ip_rput_data_v6: router " 8171 "alert hbh opt indication in dest opt"); 8172 /*NOTREACHED*/ 8173 #else 8174 freemsg(hada_mp); 8175 freemsg(first_mp); 8176 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8177 return; 8178 #endif 8179 } 8180 used = ehdrlen; 8181 break; 8182 } 8183 case IPPROTO_FRAGMENT: { 8184 ip6_frag_t *fraghdr; 8185 size_t no_frag_hdr_len; 8186 8187 if (hada_mp != NULL) { 8188 ip0dbg(("frag hada drop\n")); 8189 goto hada_drop; 8190 } 8191 8192 ASSERT(first_mp == mp); 8193 if (remlen < sizeof (ip6_frag_t)) 8194 goto pkt_too_short; 8195 8196 if (mp->b_cont != NULL && 8197 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8198 if (!pullupmsg(mp, 8199 pkt_len - remlen + sizeof (ip6_frag_t))) { 8200 BUMP_MIB(ill->ill_ip6_mib, 8201 ipv6InDiscards); 8202 freemsg(mp); 8203 return; 8204 } 8205 hck_flags = 0; 8206 ip6h = (ip6_t *)mp->b_rptr; 8207 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8208 } 8209 8210 fraghdr = (ip6_frag_t *)whereptr; 8211 used = (uint_t)sizeof (ip6_frag_t); 8212 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmReqds); 8213 8214 /* 8215 * Invoke the CGTP (multirouting) filtering module to 8216 * process the incoming packet. Packets identified as 8217 * duplicates must be discarded. Filtering is active 8218 * only if the the ip_cgtp_filter ndd variable is 8219 * non-zero. 8220 */ 8221 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 8222 int cgtp_flt_pkt = 8223 ip_cgtp_filter_ops->cfo_filter_v6( 8224 inill->ill_rq, ip6h, fraghdr); 8225 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8226 freemsg(mp); 8227 return; 8228 } 8229 } 8230 8231 /* Restore the flags */ 8232 DB_CKSUMFLAGS(mp) = hck_flags; 8233 8234 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8235 remlen - used, &prev_nexthdr_offset, 8236 &reass_sum, &reass_hck_flags); 8237 if (mp == NULL) { 8238 /* Reassembly is still pending */ 8239 return; 8240 } 8241 /* The first mblk are the headers before the frag hdr */ 8242 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmOKs); 8243 8244 first_mp = mp; /* mp has most likely changed! */ 8245 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8246 ip6h = (ip6_t *)mp->b_rptr; 8247 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8248 whereptr = mp->b_rptr + no_frag_hdr_len; 8249 remlen = ntohs(ip6h->ip6_plen) + 8250 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8251 pkt_len = msgdsize(mp); 8252 used = 0; 8253 break; 8254 } 8255 case IPPROTO_HOPOPTS: 8256 if (hada_mp != NULL) { 8257 ip0dbg(("hop hada drop\n")); 8258 goto hada_drop; 8259 } 8260 /* 8261 * Illegal header sequence. 8262 * (Hop-by-hop headers are processed above 8263 * and required to immediately follow IPv6 header) 8264 */ 8265 icmp_param_problem_v6(WR(q), first_mp, 8266 ICMP6_PARAMPROB_NEXTHEADER, 8267 prev_nexthdr_offset, 8268 B_FALSE, B_FALSE, zoneid); 8269 return; 8270 8271 case IPPROTO_ROUTING: { 8272 uint_t ehdrlen; 8273 ip6_rthdr_t *rthdr; 8274 8275 /* Check if AH is present. */ 8276 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8277 ire, hada_mp, zoneid)) { 8278 ip0dbg(("routing hada drop\n")); 8279 return; 8280 } 8281 8282 /* 8283 * Reinitialize pointers, as ipsec_early_ah_v6() does 8284 * complete pullups. We don't have to do more pullups 8285 * as a result. 8286 */ 8287 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8288 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8289 ip6h = (ip6_t *)mp->b_rptr; 8290 8291 if (remlen < MIN_EHDR_LEN) 8292 goto pkt_too_short; 8293 rthdr = (ip6_rthdr_t *)whereptr; 8294 nexthdr = rthdr->ip6r_nxt; 8295 prev_nexthdr_offset = (uint_t)(whereptr - 8296 (uint8_t *)ip6h); 8297 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8298 if (remlen < ehdrlen) 8299 goto pkt_too_short; 8300 if (rthdr->ip6r_segleft != 0) { 8301 /* Not end of source route */ 8302 if (ll_multicast) { 8303 BUMP_MIB(ill->ill_ip6_mib, 8304 ipv6ForwProhibits); 8305 freemsg(hada_mp); 8306 freemsg(mp); 8307 return; 8308 } 8309 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8310 flags, hada_mp, dl_mp); 8311 return; 8312 } 8313 used = ehdrlen; 8314 break; 8315 } 8316 case IPPROTO_AH: 8317 case IPPROTO_ESP: { 8318 /* 8319 * Fast path for AH/ESP. If this is the first time 8320 * we are sending a datagram to AH/ESP, allocate 8321 * a IPSEC_IN message and prepend it. Otherwise, 8322 * just fanout. 8323 */ 8324 8325 ipsec_in_t *ii; 8326 int ipsec_rc; 8327 8328 if (!mctl_present) { 8329 ASSERT(first_mp == mp); 8330 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 8331 NULL) { 8332 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8333 "allocation failure.\n")); 8334 BUMP_MIB(ill->ill_ip6_mib, 8335 ipv6InDiscards); 8336 freemsg(mp); 8337 return; 8338 } 8339 /* 8340 * Store the ill_index so that when we come back 8341 * from IPSEC we ride on the same queue. 8342 */ 8343 ii = (ipsec_in_t *)first_mp->b_rptr; 8344 ii->ipsec_in_ill_index = 8345 ill->ill_phyint->phyint_ifindex; 8346 ii->ipsec_in_rill_index = 8347 ii->ipsec_in_ill_index; 8348 first_mp->b_cont = mp; 8349 /* 8350 * Cache hardware acceleration info. 8351 */ 8352 if (hada_mp != NULL) { 8353 IPSECHW_DEBUG(IPSECHW_PKT, 8354 ("ip_rput_data_v6: " 8355 "caching data attr.\n")); 8356 ii->ipsec_in_accelerated = B_TRUE; 8357 ii->ipsec_in_da = hada_mp; 8358 hada_mp = NULL; 8359 } 8360 } else { 8361 ii = (ipsec_in_t *)first_mp->b_rptr; 8362 } 8363 8364 if (!ipsec_loaded()) { 8365 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8366 ire->ire_zoneid); 8367 return; 8368 } 8369 8370 /* select inbound SA and have IPsec process the pkt */ 8371 if (nexthdr == IPPROTO_ESP) { 8372 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 8373 if (esph == NULL) 8374 return; 8375 ASSERT(ii->ipsec_in_esp_sa != NULL); 8376 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8377 NULL); 8378 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8379 first_mp, esph); 8380 } else { 8381 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 8382 if (ah == NULL) 8383 return; 8384 ASSERT(ii->ipsec_in_ah_sa != NULL); 8385 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8386 NULL); 8387 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8388 first_mp, ah); 8389 } 8390 8391 switch (ipsec_rc) { 8392 case IPSEC_STATUS_SUCCESS: 8393 break; 8394 case IPSEC_STATUS_FAILED: 8395 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8396 /* FALLTHRU */ 8397 case IPSEC_STATUS_PENDING: 8398 return; 8399 } 8400 /* we're done with IPsec processing, send it up */ 8401 ip_fanout_proto_again(first_mp, ill, inill, ire); 8402 return; 8403 } 8404 case IPPROTO_NONE: 8405 /* All processing is done. Count as "delivered". */ 8406 freemsg(hada_mp); 8407 freemsg(first_mp); 8408 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8409 return; 8410 } 8411 whereptr += used; 8412 ASSERT(remlen >= used); 8413 remlen -= used; 8414 } 8415 /* NOTREACHED */ 8416 8417 pkt_too_short: 8418 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8419 ip6_len, pkt_len, remlen)); 8420 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 8421 freemsg(hada_mp); 8422 freemsg(first_mp); 8423 return; 8424 udp_fanout: 8425 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8426 connp = NULL; 8427 } else { 8428 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8429 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8430 CONN_DEC_REF(connp); 8431 connp = NULL; 8432 } 8433 } 8434 8435 if (connp == NULL) { 8436 uint32_t ports; 8437 8438 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8439 UDP_PORTS_OFFSET); 8440 IP6_STAT(ip6_udp_slow_path); 8441 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8442 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), mctl_present, 8443 zoneid); 8444 return; 8445 } 8446 8447 if (CONN_UDP_FLOWCTLD(connp)) { 8448 freemsg(first_mp); 8449 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 8450 CONN_DEC_REF(connp); 8451 return; 8452 } 8453 8454 /* Initiate IPPF processing */ 8455 if (IP6_IN_IPP(flags)) { 8456 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8457 if (mp == NULL) { 8458 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8459 CONN_DEC_REF(connp); 8460 return; 8461 } 8462 } 8463 8464 if (connp->conn_ipv6_recvpktinfo || 8465 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8466 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8467 if (mp == NULL) { 8468 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8469 CONN_DEC_REF(connp); 8470 return; 8471 } 8472 } 8473 8474 IP6_STAT(ip6_udp_fast_path); 8475 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 8476 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8477 8478 /* Send it upstream */ 8479 CONN_UDP_RECV(connp, mp); 8480 8481 CONN_DEC_REF(connp); 8482 freemsg(hada_mp); 8483 return; 8484 8485 hada_drop: 8486 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8487 /* IPsec kstats: bump counter here */ 8488 freemsg(hada_mp); 8489 freemsg(first_mp); 8490 } 8491 8492 /* 8493 * Reassemble fragment. 8494 * When it returns a completed message the first mblk will only contain 8495 * the headers prior to the fragment header. 8496 * 8497 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8498 * of the preceding header. This is needed to patch the previous header's 8499 * nexthdr field when reassembly completes. 8500 */ 8501 static mblk_t * 8502 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8503 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8504 uint32_t *cksum_val, uint16_t *cksum_flags) 8505 { 8506 ill_t *ill = (ill_t *)q->q_ptr; 8507 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8508 uint16_t offset; 8509 boolean_t more_frags; 8510 uint8_t nexthdr = fraghdr->ip6f_nxt; 8511 in6_addr_t *v6dst_ptr; 8512 in6_addr_t *v6src_ptr; 8513 uint_t end; 8514 uint_t hdr_length; 8515 size_t count; 8516 ipf_t *ipf; 8517 ipf_t **ipfp; 8518 ipfb_t *ipfb; 8519 mblk_t *mp1; 8520 uint8_t ecn_info = 0; 8521 size_t msg_len; 8522 mblk_t *tail_mp; 8523 mblk_t *t_mp; 8524 boolean_t pruned = B_FALSE; 8525 uint32_t sum_val; 8526 uint16_t sum_flags; 8527 8528 8529 if (cksum_val != NULL) 8530 *cksum_val = 0; 8531 if (cksum_flags != NULL) 8532 *cksum_flags = 0; 8533 8534 /* 8535 * We utilize hardware computed checksum info only for UDP since 8536 * IP fragmentation is a normal occurence for the protocol. In 8537 * addition, checksum offload support for IP fragments carrying 8538 * UDP payload is commonly implemented across network adapters. 8539 */ 8540 ASSERT(ill != NULL); 8541 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8542 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8543 mblk_t *mp1 = mp->b_cont; 8544 int32_t len; 8545 8546 /* Record checksum information from the packet */ 8547 sum_val = (uint32_t)DB_CKSUM16(mp); 8548 sum_flags = DB_CKSUMFLAGS(mp); 8549 8550 /* fragmented payload offset from beginning of mblk */ 8551 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8552 8553 if ((sum_flags & HCK_PARTIALCKSUM) && 8554 (mp1 == NULL || mp1->b_cont == NULL) && 8555 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8556 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8557 uint32_t adj; 8558 /* 8559 * Partial checksum has been calculated by hardware 8560 * and attached to the packet; in addition, any 8561 * prepended extraneous data is even byte aligned. 8562 * If any such data exists, we adjust the checksum; 8563 * this would also handle any postpended data. 8564 */ 8565 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8566 mp, mp1, len, adj); 8567 8568 /* One's complement subtract extraneous checksum */ 8569 if (adj >= sum_val) 8570 sum_val = ~(adj - sum_val) & 0xFFFF; 8571 else 8572 sum_val -= adj; 8573 } 8574 } else { 8575 sum_val = 0; 8576 sum_flags = 0; 8577 } 8578 8579 /* Clear hardware checksumming flag */ 8580 DB_CKSUMFLAGS(mp) = 0; 8581 8582 /* 8583 * Note: Fragment offset in header is in 8-octet units. 8584 * Clearing least significant 3 bits not only extracts 8585 * it but also gets it in units of octets. 8586 */ 8587 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8588 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8589 8590 /* 8591 * Is the more frags flag on and the payload length not a multiple 8592 * of eight? 8593 */ 8594 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8595 zoneid_t zoneid; 8596 8597 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8598 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 8599 if (zoneid == ALL_ZONES) { 8600 freemsg(mp); 8601 return (NULL); 8602 } 8603 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8604 (uint32_t)((char *)&ip6h->ip6_plen - 8605 (char *)ip6h), B_FALSE, B_FALSE, zoneid); 8606 return (NULL); 8607 } 8608 8609 v6src_ptr = &ip6h->ip6_src; 8610 v6dst_ptr = &ip6h->ip6_dst; 8611 end = remlen; 8612 8613 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8614 end += offset; 8615 8616 /* 8617 * Would fragment cause reassembled packet to have a payload length 8618 * greater than IP_MAXPACKET - the max payload size? 8619 */ 8620 if (end > IP_MAXPACKET) { 8621 zoneid_t zoneid; 8622 8623 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8624 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 8625 if (zoneid == ALL_ZONES) { 8626 freemsg(mp); 8627 return (NULL); 8628 } 8629 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8630 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8631 (char *)ip6h), B_FALSE, B_FALSE, zoneid); 8632 return (NULL); 8633 } 8634 8635 /* 8636 * This packet just has one fragment. Reassembly not 8637 * needed. 8638 */ 8639 if (!more_frags && offset == 0) { 8640 goto reass_done; 8641 } 8642 8643 /* 8644 * Drop the fragmented as early as possible, if 8645 * we don't have resource(s) to re-assemble. 8646 */ 8647 if (ip_reass_queue_bytes == 0) { 8648 freemsg(mp); 8649 return (NULL); 8650 } 8651 8652 /* Record the ECN field info. */ 8653 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8654 /* 8655 * If this is not the first fragment, dump the unfragmentable 8656 * portion of the packet. 8657 */ 8658 if (offset) 8659 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8660 8661 /* 8662 * Fragmentation reassembly. Each ILL has a hash table for 8663 * queueing packets undergoing reassembly for all IPIFs 8664 * associated with the ILL. The hash is based on the packet 8665 * IP ident field. The ILL frag hash table was allocated 8666 * as a timer block at the time the ILL was created. Whenever 8667 * there is anything on the reassembly queue, the timer will 8668 * be running. 8669 */ 8670 msg_len = MBLKSIZE(mp); 8671 tail_mp = mp; 8672 while (tail_mp->b_cont != NULL) { 8673 tail_mp = tail_mp->b_cont; 8674 msg_len += MBLKSIZE(tail_mp); 8675 } 8676 /* 8677 * If the reassembly list for this ILL will get too big 8678 * prune it. 8679 */ 8680 8681 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8682 ip_reass_queue_bytes) { 8683 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8684 : (ip_reass_queue_bytes - msg_len)); 8685 pruned = B_TRUE; 8686 } 8687 8688 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8689 mutex_enter(&ipfb->ipfb_lock); 8690 8691 ipfp = &ipfb->ipfb_ipf; 8692 /* Try to find an existing fragment queue for this packet. */ 8693 for (;;) { 8694 ipf = ipfp[0]; 8695 if (ipf) { 8696 /* 8697 * It has to match on ident, source address, and 8698 * dest address. 8699 */ 8700 if (ipf->ipf_ident == ident && 8701 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8702 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8703 8704 /* 8705 * If we have received too many 8706 * duplicate fragments for this packet 8707 * free it. 8708 */ 8709 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8710 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8711 freemsg(mp); 8712 mutex_exit(&ipfb->ipfb_lock); 8713 return (NULL); 8714 } 8715 8716 break; 8717 } 8718 ipfp = &ipf->ipf_hash_next; 8719 continue; 8720 } 8721 8722 8723 /* 8724 * If we pruned the list, do we want to store this new 8725 * fragment?. We apply an optimization here based on the 8726 * fact that most fragments will be received in order. 8727 * So if the offset of this incoming fragment is zero, 8728 * it is the first fragment of a new packet. We will 8729 * keep it. Otherwise drop the fragment, as we have 8730 * probably pruned the packet already (since the 8731 * packet cannot be found). 8732 */ 8733 8734 if (pruned && offset != 0) { 8735 mutex_exit(&ipfb->ipfb_lock); 8736 freemsg(mp); 8737 return (NULL); 8738 } 8739 8740 /* New guy. Allocate a frag message. */ 8741 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8742 if (!mp1) { 8743 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8744 freemsg(mp); 8745 partial_reass_done: 8746 mutex_exit(&ipfb->ipfb_lock); 8747 return (NULL); 8748 } 8749 8750 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8751 /* 8752 * Too many fragmented packets in this hash bucket. 8753 * Free the oldest. 8754 */ 8755 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8756 } 8757 8758 mp1->b_cont = mp; 8759 8760 /* Initialize the fragment header. */ 8761 ipf = (ipf_t *)mp1->b_rptr; 8762 ipf->ipf_mp = mp1; 8763 ipf->ipf_ptphn = ipfp; 8764 ipfp[0] = ipf; 8765 ipf->ipf_hash_next = NULL; 8766 ipf->ipf_ident = ident; 8767 ipf->ipf_v6src = *v6src_ptr; 8768 ipf->ipf_v6dst = *v6dst_ptr; 8769 /* Record reassembly start time. */ 8770 ipf->ipf_timestamp = gethrestime_sec(); 8771 /* Record ipf generation and account for frag header */ 8772 ipf->ipf_gen = ill->ill_ipf_gen++; 8773 ipf->ipf_count = MBLKSIZE(mp1); 8774 ipf->ipf_protocol = nexthdr; 8775 ipf->ipf_nf_hdr_len = 0; 8776 ipf->ipf_prev_nexthdr_offset = 0; 8777 ipf->ipf_last_frag_seen = B_FALSE; 8778 ipf->ipf_ecn = ecn_info; 8779 ipf->ipf_num_dups = 0; 8780 ipfb->ipfb_frag_pkts++; 8781 ipf->ipf_checksum = 0; 8782 ipf->ipf_checksum_flags = 0; 8783 8784 /* Store checksum value in fragment header */ 8785 if (sum_flags != 0) { 8786 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8787 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8788 ipf->ipf_checksum = sum_val; 8789 ipf->ipf_checksum_flags = sum_flags; 8790 } 8791 8792 /* 8793 * We handle reassembly two ways. In the easy case, 8794 * where all the fragments show up in order, we do 8795 * minimal bookkeeping, and just clip new pieces on 8796 * the end. If we ever see a hole, then we go off 8797 * to ip_reassemble which has to mark the pieces and 8798 * keep track of the number of holes, etc. Obviously, 8799 * the point of having both mechanisms is so we can 8800 * handle the easy case as efficiently as possible. 8801 */ 8802 if (offset == 0) { 8803 /* Easy case, in-order reassembly so far. */ 8804 /* Update the byte count */ 8805 ipf->ipf_count += msg_len; 8806 ipf->ipf_tail_mp = tail_mp; 8807 /* 8808 * Keep track of next expected offset in 8809 * ipf_end. 8810 */ 8811 ipf->ipf_end = end; 8812 ipf->ipf_nf_hdr_len = hdr_length; 8813 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8814 } else { 8815 /* Hard case, hole at the beginning. */ 8816 ipf->ipf_tail_mp = NULL; 8817 /* 8818 * ipf_end == 0 means that we have given up 8819 * on easy reassembly. 8820 */ 8821 ipf->ipf_end = 0; 8822 8823 /* Forget checksum offload from now on */ 8824 ipf->ipf_checksum_flags = 0; 8825 8826 /* 8827 * ipf_hole_cnt is set by ip_reassemble. 8828 * ipf_count is updated by ip_reassemble. 8829 * No need to check for return value here 8830 * as we don't expect reassembly to complete or 8831 * fail for the first fragment itself. 8832 */ 8833 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8834 msg_len); 8835 } 8836 /* Update per ipfb and ill byte counts */ 8837 ipfb->ipfb_count += ipf->ipf_count; 8838 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8839 ill->ill_frag_count += ipf->ipf_count; 8840 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8841 /* If the frag timer wasn't already going, start it. */ 8842 mutex_enter(&ill->ill_lock); 8843 ill_frag_timer_start(ill); 8844 mutex_exit(&ill->ill_lock); 8845 goto partial_reass_done; 8846 } 8847 8848 /* 8849 * If the packet's flag has changed (it could be coming up 8850 * from an interface different than the previous, therefore 8851 * possibly different checksum capability), then forget about 8852 * any stored checksum states. Otherwise add the value to 8853 * the existing one stored in the fragment header. 8854 */ 8855 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8856 sum_val += ipf->ipf_checksum; 8857 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8858 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8859 ipf->ipf_checksum = sum_val; 8860 } else if (ipf->ipf_checksum_flags != 0) { 8861 /* Forget checksum offload from now on */ 8862 ipf->ipf_checksum_flags = 0; 8863 } 8864 8865 /* 8866 * We have a new piece of a datagram which is already being 8867 * reassembled. Update the ECN info if all IP fragments 8868 * are ECN capable. If there is one which is not, clear 8869 * all the info. If there is at least one which has CE 8870 * code point, IP needs to report that up to transport. 8871 */ 8872 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8873 if (ecn_info == IPH_ECN_CE) 8874 ipf->ipf_ecn = IPH_ECN_CE; 8875 } else { 8876 ipf->ipf_ecn = IPH_ECN_NECT; 8877 } 8878 8879 if (offset && ipf->ipf_end == offset) { 8880 /* The new fragment fits at the end */ 8881 ipf->ipf_tail_mp->b_cont = mp; 8882 /* Update the byte count */ 8883 ipf->ipf_count += msg_len; 8884 /* Update per ipfb and ill byte counts */ 8885 ipfb->ipfb_count += msg_len; 8886 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8887 ill->ill_frag_count += msg_len; 8888 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8889 if (more_frags) { 8890 /* More to come. */ 8891 ipf->ipf_end = end; 8892 ipf->ipf_tail_mp = tail_mp; 8893 goto partial_reass_done; 8894 } 8895 } else { 8896 /* 8897 * Go do the hard cases. 8898 * Call ip_reassemble(). 8899 */ 8900 int ret; 8901 8902 if (offset == 0) { 8903 if (ipf->ipf_prev_nexthdr_offset == 0) { 8904 ipf->ipf_nf_hdr_len = hdr_length; 8905 ipf->ipf_prev_nexthdr_offset = 8906 *prev_nexthdr_offset; 8907 } 8908 } 8909 /* Save current byte count */ 8910 count = ipf->ipf_count; 8911 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8912 8913 /* Count of bytes added and subtracted (freeb()ed) */ 8914 count = ipf->ipf_count - count; 8915 if (count) { 8916 /* Update per ipfb and ill byte counts */ 8917 ipfb->ipfb_count += count; 8918 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8919 ill->ill_frag_count += count; 8920 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8921 } 8922 if (ret == IP_REASS_PARTIAL) { 8923 goto partial_reass_done; 8924 } else if (ret == IP_REASS_FAILED) { 8925 /* Reassembly failed. Free up all resources */ 8926 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8927 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8928 IP_REASS_SET_START(t_mp, 0); 8929 IP_REASS_SET_END(t_mp, 0); 8930 } 8931 freemsg(mp); 8932 goto partial_reass_done; 8933 } 8934 8935 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8936 } 8937 /* 8938 * We have completed reassembly. Unhook the frag header from 8939 * the reassembly list. 8940 * 8941 * Grab the unfragmentable header length next header value out 8942 * of the first fragment 8943 */ 8944 ASSERT(ipf->ipf_nf_hdr_len != 0); 8945 hdr_length = ipf->ipf_nf_hdr_len; 8946 8947 /* 8948 * Before we free the frag header, record the ECN info 8949 * to report back to the transport. 8950 */ 8951 ecn_info = ipf->ipf_ecn; 8952 8953 /* 8954 * Store the nextheader field in the header preceding the fragment 8955 * header 8956 */ 8957 nexthdr = ipf->ipf_protocol; 8958 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8959 ipfp = ipf->ipf_ptphn; 8960 8961 /* We need to supply these to caller */ 8962 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8963 sum_val = ipf->ipf_checksum; 8964 else 8965 sum_val = 0; 8966 8967 mp1 = ipf->ipf_mp; 8968 count = ipf->ipf_count; 8969 ipf = ipf->ipf_hash_next; 8970 if (ipf) 8971 ipf->ipf_ptphn = ipfp; 8972 ipfp[0] = ipf; 8973 ill->ill_frag_count -= count; 8974 ASSERT(ipfb->ipfb_count >= count); 8975 ipfb->ipfb_count -= count; 8976 ipfb->ipfb_frag_pkts--; 8977 mutex_exit(&ipfb->ipfb_lock); 8978 /* Ditch the frag header. */ 8979 mp = mp1->b_cont; 8980 freeb(mp1); 8981 8982 /* 8983 * Make sure the packet is good by doing some sanity 8984 * check. If bad we can silentely drop the packet. 8985 */ 8986 reass_done: 8987 if (hdr_length < sizeof (ip6_frag_t)) { 8988 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8989 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8990 freemsg(mp); 8991 return (NULL); 8992 } 8993 8994 /* 8995 * Remove the fragment header from the initial header by 8996 * splitting the mblk into the non-fragmentable header and 8997 * everthing after the fragment extension header. This has the 8998 * side effect of putting all the headers that need destination 8999 * processing into the b_cont block-- on return this fact is 9000 * used in order to avoid having to look at the extensions 9001 * already processed. 9002 * 9003 * Note that this code assumes that the unfragmentable portion 9004 * of the header is in the first mblk and increments 9005 * the read pointer past it. If this assumption is broken 9006 * this code fails badly. 9007 */ 9008 if (mp->b_rptr + hdr_length != mp->b_wptr) { 9009 mblk_t *nmp; 9010 9011 if (!(nmp = dupb(mp))) { 9012 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 9013 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 9014 freemsg(mp); 9015 return (NULL); 9016 } 9017 nmp->b_cont = mp->b_cont; 9018 mp->b_cont = nmp; 9019 nmp->b_rptr += hdr_length; 9020 } 9021 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 9022 9023 ip6h = (ip6_t *)mp->b_rptr; 9024 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 9025 9026 /* Restore original IP length in header. */ 9027 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 9028 /* Record the ECN info. */ 9029 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 9030 ip6h->ip6_vcf |= htonl(ecn_info << 20); 9031 9032 /* Reassembly is successful; return checksum information if needed */ 9033 if (cksum_val != NULL) 9034 *cksum_val = sum_val; 9035 if (cksum_flags != NULL) 9036 *cksum_flags = sum_flags; 9037 9038 return (mp); 9039 } 9040 9041 /* 9042 * Walk through the options to see if there is a routing header. 9043 * If present get the destination which is the last address of 9044 * the option. 9045 */ 9046 in6_addr_t 9047 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 9048 { 9049 uint8_t nexthdr; 9050 uint8_t *whereptr; 9051 ip6_hbh_t *hbhhdr; 9052 ip6_dest_t *dsthdr; 9053 ip6_rthdr0_t *rthdr; 9054 ip6_frag_t *fraghdr; 9055 int ehdrlen; 9056 int left; 9057 in6_addr_t *ap, rv; 9058 9059 if (is_fragment != NULL) 9060 *is_fragment = B_FALSE; 9061 9062 rv = ip6h->ip6_dst; 9063 9064 nexthdr = ip6h->ip6_nxt; 9065 whereptr = (uint8_t *)&ip6h[1]; 9066 for (;;) { 9067 9068 ASSERT(nexthdr != IPPROTO_RAW); 9069 switch (nexthdr) { 9070 case IPPROTO_HOPOPTS: 9071 hbhhdr = (ip6_hbh_t *)whereptr; 9072 nexthdr = hbhhdr->ip6h_nxt; 9073 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 9074 break; 9075 case IPPROTO_DSTOPTS: 9076 dsthdr = (ip6_dest_t *)whereptr; 9077 nexthdr = dsthdr->ip6d_nxt; 9078 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 9079 break; 9080 case IPPROTO_ROUTING: 9081 rthdr = (ip6_rthdr0_t *)whereptr; 9082 nexthdr = rthdr->ip6r0_nxt; 9083 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9084 9085 left = rthdr->ip6r0_segleft; 9086 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9087 rv = *(ap + left - 1); 9088 /* 9089 * If the caller doesn't care whether the packet 9090 * is a fragment or not, we can stop here since 9091 * we have our destination. 9092 */ 9093 if (is_fragment == NULL) 9094 goto done; 9095 break; 9096 case IPPROTO_FRAGMENT: 9097 fraghdr = (ip6_frag_t *)whereptr; 9098 nexthdr = fraghdr->ip6f_nxt; 9099 ehdrlen = sizeof (ip6_frag_t); 9100 if (is_fragment != NULL) 9101 *is_fragment = B_TRUE; 9102 goto done; 9103 default : 9104 goto done; 9105 } 9106 whereptr += ehdrlen; 9107 } 9108 9109 done: 9110 return (rv); 9111 } 9112 9113 /* 9114 * ip_source_routed_v6: 9115 * This function is called by redirect code in ip_rput_data_v6 to 9116 * know whether this packet is source routed through this node i.e 9117 * whether this node (router) is part of the journey. This 9118 * function is called under two cases : 9119 * 9120 * case 1 : Routing header was processed by this node and 9121 * ip_process_rthdr replaced ip6_dst with the next hop 9122 * and we are forwarding the packet to the next hop. 9123 * 9124 * case 2 : Routing header was not processed by this node and we 9125 * are just forwarding the packet. 9126 * 9127 * For case (1) we don't want to send redirects. For case(2) we 9128 * want to send redirects. 9129 */ 9130 static boolean_t 9131 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 9132 { 9133 uint8_t nexthdr; 9134 in6_addr_t *addrptr; 9135 ip6_rthdr0_t *rthdr; 9136 uint8_t numaddr; 9137 ip6_hbh_t *hbhhdr; 9138 uint_t ehdrlen; 9139 uint8_t *byteptr; 9140 9141 ip2dbg(("ip_source_routed_v6\n")); 9142 nexthdr = ip6h->ip6_nxt; 9143 ehdrlen = IPV6_HDR_LEN; 9144 9145 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9146 while (nexthdr == IPPROTO_HOPOPTS || 9147 nexthdr == IPPROTO_DSTOPTS) { 9148 byteptr = (uint8_t *)ip6h + ehdrlen; 9149 /* 9150 * Check if we have already processed 9151 * packets or we are just a forwarding 9152 * router which only pulled up msgs up 9153 * to IPV6HDR and one HBH ext header 9154 */ 9155 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9156 ip2dbg(("ip_source_routed_v6: Extension" 9157 " headers not processed\n")); 9158 return (B_FALSE); 9159 } 9160 hbhhdr = (ip6_hbh_t *)byteptr; 9161 nexthdr = hbhhdr->ip6h_nxt; 9162 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9163 } 9164 switch (nexthdr) { 9165 case IPPROTO_ROUTING: 9166 byteptr = (uint8_t *)ip6h + ehdrlen; 9167 /* 9168 * If for some reason, we haven't pulled up 9169 * the routing hdr data mblk, then we must 9170 * not have processed it at all. So for sure 9171 * we are not part of the source routed journey. 9172 */ 9173 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9174 ip2dbg(("ip_source_routed_v6: Routing" 9175 " header not processed\n")); 9176 return (B_FALSE); 9177 } 9178 rthdr = (ip6_rthdr0_t *)byteptr; 9179 /* 9180 * Either we are an intermediate router or the 9181 * last hop before destination and we have 9182 * already processed the routing header. 9183 * If segment_left is greater than or equal to zero, 9184 * then we must be the (numaddr - segleft) entry 9185 * of the routing header. Although ip6r0_segleft 9186 * is a unit8_t variable, we still check for zero 9187 * or greater value, if in case the data type 9188 * is changed someday in future. 9189 */ 9190 if (rthdr->ip6r0_segleft > 0 || 9191 rthdr->ip6r0_segleft == 0) { 9192 ire_t *ire = NULL; 9193 9194 numaddr = rthdr->ip6r0_len / 2; 9195 addrptr = (in6_addr_t *)((char *)rthdr + 9196 sizeof (*rthdr)); 9197 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9198 if (addrptr != NULL) { 9199 ire = ire_ctable_lookup_v6(addrptr, NULL, 9200 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9201 MATCH_IRE_TYPE); 9202 if (ire != NULL) { 9203 ire_refrele(ire); 9204 return (B_TRUE); 9205 } 9206 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9207 } 9208 } 9209 /* FALLTHRU */ 9210 default: 9211 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9212 return (B_FALSE); 9213 } 9214 } 9215 9216 /* 9217 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9218 * Assumes that the following set of headers appear in the first 9219 * mblk: 9220 * ip6i_t (if present) CAN also appear as a separate mblk. 9221 * ip6_t 9222 * Any extension headers 9223 * TCP/UDP/SCTP header (if present) 9224 * The routine can handle an ICMPv6 header that is not in the first mblk. 9225 * 9226 * The order to determine the outgoing interface is as follows: 9227 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9228 * 2. If conn_nofailover_ill is set then use that ill. 9229 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9230 * 4. If q is an ill queue and (link local or multicast destination) then 9231 * use that ill. 9232 * 5. If IPV6_BOUND_IF has been set use that ill. 9233 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9234 * look for the best IRE match for the unspecified group to determine 9235 * the ill. 9236 * 7. For unicast: Just do an IRE lookup for the best match. 9237 * 9238 * arg2 is always a queue_t *. 9239 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9240 * the zoneid. 9241 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9242 */ 9243 void 9244 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9245 { 9246 conn_t *connp = NULL; 9247 queue_t *q = (queue_t *)arg2; 9248 ire_t *ire = NULL; 9249 ire_t *sctp_ire = NULL; 9250 ip6_t *ip6h; 9251 in6_addr_t *v6dstp; 9252 ill_t *ill = NULL; 9253 ipif_t *ipif; 9254 ip6i_t *ip6i; 9255 int cksum_request; /* -1 => normal. */ 9256 /* 1 => Skip TCP/UDP/SCTP checksum */ 9257 /* Otherwise contains insert offset for checksum */ 9258 int unspec_src; 9259 boolean_t do_outrequests; /* Increment OutRequests? */ 9260 mib2_ipv6IfStatsEntry_t *mibptr; 9261 int match_flags = MATCH_IRE_ILL_GROUP; 9262 boolean_t attach_if = B_FALSE; 9263 mblk_t *first_mp; 9264 boolean_t mctl_present; 9265 ipsec_out_t *io; 9266 boolean_t drop_if_delayed = B_FALSE; 9267 boolean_t multirt_need_resolve = B_FALSE; 9268 mblk_t *copy_mp = NULL; 9269 int err; 9270 int ip6i_flags = 0; 9271 zoneid_t zoneid; 9272 ill_t *saved_ill = NULL; 9273 boolean_t conn_lock_held; 9274 boolean_t need_decref = B_FALSE; 9275 9276 /* 9277 * Highest bit in version field is Reachability Confirmation bit 9278 * used by NUD in ip_xmit_v6(). 9279 */ 9280 #ifdef _BIG_ENDIAN 9281 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9282 #else 9283 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9284 #endif 9285 9286 /* 9287 * M_CTL comes from 5 places 9288 * 9289 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9290 * both V4 and V6 datagrams. 9291 * 9292 * 2) AH/ESP sends down M_CTL after doing their job with both 9293 * V4 and V6 datagrams. 9294 * 9295 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9296 * attached. 9297 * 9298 * 4) Notifications from an external resolver (for XRESOLV ifs) 9299 * 9300 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9301 * IPsec hardware acceleration support. 9302 * 9303 * We need to handle (1)'s IPv6 case and (3) here. For the 9304 * IPv4 case in (1), and (2), IPSEC processing has already 9305 * started. The code in ip_wput() already knows how to handle 9306 * continuing IPSEC processing (for IPv4 and IPv6). All other 9307 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9308 * for handling. 9309 */ 9310 first_mp = mp; 9311 mctl_present = B_FALSE; 9312 io = NULL; 9313 9314 /* Multidata transmit? */ 9315 if (DB_TYPE(mp) == M_MULTIDATA) { 9316 /* 9317 * We should never get here, since all Multidata messages 9318 * originating from tcp should have been directed over to 9319 * tcp_multisend() in the first place. 9320 */ 9321 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 9322 freemsg(mp); 9323 return; 9324 } else if (DB_TYPE(mp) == M_CTL) { 9325 uint32_t mctltype = 0; 9326 uint32_t mlen = MBLKL(first_mp); 9327 9328 mp = mp->b_cont; 9329 mctl_present = B_TRUE; 9330 io = (ipsec_out_t *)first_mp->b_rptr; 9331 9332 /* 9333 * Validate this M_CTL message. The only three types of 9334 * M_CTL messages we expect to see in this code path are 9335 * ipsec_out_t or ipsec_in_t structures (allocated as 9336 * ipsec_info_t unions), or ipsec_ctl_t structures. 9337 * The ipsec_out_type and ipsec_in_type overlap in the two 9338 * data structures, and they are either set to IPSEC_OUT 9339 * or IPSEC_IN depending on which data structure it is. 9340 * ipsec_ctl_t is an IPSEC_CTL. 9341 * 9342 * All other M_CTL messages are sent to ip_wput_nondata() 9343 * for handling. 9344 */ 9345 if (mlen >= sizeof (io->ipsec_out_type)) 9346 mctltype = io->ipsec_out_type; 9347 9348 if ((mlen == sizeof (ipsec_ctl_t)) && 9349 (mctltype == IPSEC_CTL)) { 9350 ip_output(arg, first_mp, arg2, caller); 9351 return; 9352 } 9353 9354 if ((mlen < sizeof (ipsec_info_t)) || 9355 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9356 mp == NULL) { 9357 ip_wput_nondata(NULL, q, first_mp, NULL); 9358 return; 9359 } 9360 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9361 if (q->q_next == NULL) { 9362 ip6h = (ip6_t *)mp->b_rptr; 9363 /* 9364 * For a freshly-generated TCP dgram that needs IPV6 9365 * processing, don't call ip_wput immediately. We can 9366 * tell this by the ipsec_out_proc_begin. In-progress 9367 * IPSEC_OUT messages have proc_begin set to TRUE, 9368 * and we want to send all IPSEC_IN messages to 9369 * ip_wput() for IPsec processing or finishing. 9370 */ 9371 if (mctltype == IPSEC_IN || 9372 IPVER(ip6h) != IPV6_VERSION || 9373 io->ipsec_out_proc_begin) { 9374 mibptr = &ip6_mib; 9375 goto notv6; 9376 } 9377 } 9378 } else if (DB_TYPE(mp) != M_DATA) { 9379 ip_wput_nondata(NULL, q, mp, NULL); 9380 return; 9381 } 9382 9383 ip6h = (ip6_t *)mp->b_rptr; 9384 9385 if (IPVER(ip6h) != IPV6_VERSION) { 9386 mibptr = &ip6_mib; 9387 goto notv6; 9388 } 9389 9390 if (q->q_next != NULL) { 9391 ill = (ill_t *)q->q_ptr; 9392 /* 9393 * We don't know if this ill will be used for IPv6 9394 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9395 * ipif_set_values() sets the ill_isv6 flag to true if 9396 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9397 * just drop the packet. 9398 */ 9399 if (!ill->ill_isv6) { 9400 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9401 "ILLF_IPV6 was set\n")); 9402 freemsg(first_mp); 9403 return; 9404 } 9405 /* For uniformity do a refhold */ 9406 mutex_enter(&ill->ill_lock); 9407 if (!ILL_CAN_LOOKUP(ill)) { 9408 mutex_exit(&ill->ill_lock); 9409 freemsg(first_mp); 9410 return; 9411 } 9412 ill_refhold_locked(ill); 9413 mutex_exit(&ill->ill_lock); 9414 mibptr = ill->ill_ip6_mib; 9415 /* 9416 * ill_ip6_mib is allocated by ipif_set_values() when 9417 * ill_isv6 is set. Thus if ill_isv6 is true, 9418 * ill_ip6_mib had better not be NULL. 9419 */ 9420 ASSERT(mibptr != NULL); 9421 unspec_src = 0; 9422 BUMP_MIB(mibptr, ipv6OutRequests); 9423 do_outrequests = B_FALSE; 9424 zoneid = (zoneid_t)(uintptr_t)arg; 9425 } else { 9426 connp = (conn_t *)arg; 9427 ASSERT(connp != NULL); 9428 zoneid = connp->conn_zoneid; 9429 9430 /* is queue flow controlled? */ 9431 if ((q->q_first || connp->conn_draining) && 9432 (caller == IP_WPUT)) { 9433 /* 9434 * 1) TCP sends down M_CTL for detached connections. 9435 * 2) AH/ESP sends down M_CTL. 9436 * 9437 * We don't flow control either of the above. Only 9438 * UDP and others are flow controlled for which we 9439 * can't have a M_CTL. 9440 */ 9441 ASSERT(first_mp == mp); 9442 (void) putq(q, mp); 9443 return; 9444 } 9445 mibptr = &ip6_mib; 9446 unspec_src = connp->conn_unspec_src; 9447 do_outrequests = B_TRUE; 9448 if (mp->b_flag & MSGHASREF) { 9449 mp->b_flag &= ~MSGHASREF; 9450 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9451 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9452 need_decref = B_TRUE; 9453 } 9454 9455 /* 9456 * If there is a policy, try to attach an ipsec_out in 9457 * the front. At the end, first_mp either points to a 9458 * M_DATA message or IPSEC_OUT message linked to a 9459 * M_DATA message. We have to do it now as we might 9460 * lose the "conn" if we go through ip_newroute. 9461 */ 9462 if (!mctl_present && 9463 (connp->conn_out_enforce_policy || 9464 connp->conn_latch != NULL)) { 9465 ASSERT(first_mp == mp); 9466 /* XXX Any better way to get the protocol fast ? */ 9467 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 9468 connp->conn_ulp)) == NULL)) { 9469 if (need_decref) 9470 CONN_DEC_REF(connp); 9471 return; 9472 } else { 9473 ASSERT(mp->b_datap->db_type == M_CTL); 9474 first_mp = mp; 9475 mp = mp->b_cont; 9476 mctl_present = B_TRUE; 9477 io = (ipsec_out_t *)first_mp->b_rptr; 9478 } 9479 } 9480 } 9481 9482 /* check for alignment and full IPv6 header */ 9483 if (!OK_32PTR((uchar_t *)ip6h) || 9484 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9485 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9486 if (do_outrequests) 9487 BUMP_MIB(mibptr, ipv6OutRequests); 9488 BUMP_MIB(mibptr, ipv6OutDiscards); 9489 freemsg(first_mp); 9490 if (ill != NULL) 9491 ill_refrele(ill); 9492 if (need_decref) 9493 CONN_DEC_REF(connp); 9494 return; 9495 } 9496 v6dstp = &ip6h->ip6_dst; 9497 cksum_request = -1; 9498 ip6i = NULL; 9499 9500 /* 9501 * Once neighbor discovery has completed, ndp_process() will provide 9502 * locally generated packets for which processing can be reattempted. 9503 * In these cases, connp is NULL and the original zone is part of a 9504 * prepended ipsec_out_t. 9505 */ 9506 if (io != NULL) { 9507 /* 9508 * When coming from icmp_input_v6, the zoneid might not match 9509 * for the loopback case, because inside icmp_input_v6 the 9510 * queue_t is a conn queue from the sending side. 9511 */ 9512 zoneid = io->ipsec_out_zoneid; 9513 ASSERT(zoneid != ALL_ZONES); 9514 } 9515 9516 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9517 /* 9518 * This is an ip6i_t header followed by an ip6_hdr. 9519 * Check which fields are set. 9520 * 9521 * When the packet comes from a transport we should have 9522 * all needed headers in the first mblk. However, when 9523 * going through ip_newroute*_v6 the ip6i might be in 9524 * a separate mblk when we return here. In that case 9525 * we pullup everything to ensure that extension and transport 9526 * headers "stay" in the first mblk. 9527 */ 9528 ip6i = (ip6i_t *)ip6h; 9529 ip6i_flags = ip6i->ip6i_flags; 9530 9531 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9532 ((mp->b_wptr - (uchar_t *)ip6i) >= 9533 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9534 9535 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9536 if (!pullupmsg(mp, -1)) { 9537 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9538 if (do_outrequests) 9539 BUMP_MIB(mibptr, ipv6OutRequests); 9540 BUMP_MIB(mibptr, ipv6OutDiscards); 9541 freemsg(first_mp); 9542 if (ill != NULL) 9543 ill_refrele(ill); 9544 if (need_decref) 9545 CONN_DEC_REF(connp); 9546 return; 9547 } 9548 ip6h = (ip6_t *)mp->b_rptr; 9549 v6dstp = &ip6h->ip6_dst; 9550 ip6i = (ip6i_t *)ip6h; 9551 } 9552 ip6h = (ip6_t *)&ip6i[1]; 9553 9554 /* 9555 * Advance rptr past the ip6i_t to get ready for 9556 * transmitting the packet. However, if the packet gets 9557 * passed to ip_newroute*_v6 then rptr is moved back so 9558 * that the ip6i_t header can be inspected when the 9559 * packet comes back here after passing through 9560 * ire_add_then_send. 9561 */ 9562 mp->b_rptr = (uchar_t *)ip6h; 9563 9564 /* 9565 * IP6I_ATTACH_IF is set in this function when we had a 9566 * conn and it was either bound to the IPFF_NOFAILOVER address 9567 * or IPV6_BOUND_PIF was set. These options override other 9568 * options that set the ifindex. We come here with 9569 * IP6I_ATTACH_IF set when we can't find the ire and 9570 * ip_newroute_v6 is feeding the packet for second time. 9571 */ 9572 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9573 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9574 ASSERT(ip6i->ip6i_ifindex != 0); 9575 if (ill != NULL) 9576 ill_refrele(ill); 9577 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9578 NULL, NULL, NULL, NULL); 9579 if (ill == NULL) { 9580 if (do_outrequests) 9581 BUMP_MIB(mibptr, ipv6OutRequests); 9582 BUMP_MIB(mibptr, ipv6OutDiscards); 9583 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9584 ip6i->ip6i_ifindex)); 9585 if (need_decref) 9586 CONN_DEC_REF(connp); 9587 freemsg(first_mp); 9588 return; 9589 } 9590 mibptr = ill->ill_ip6_mib; 9591 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9592 /* 9593 * Preserve the index so that when we return 9594 * from IPSEC processing, we know where to 9595 * send the packet. 9596 */ 9597 if (mctl_present) { 9598 ASSERT(io != NULL); 9599 io->ipsec_out_ill_index = 9600 ip6i->ip6i_ifindex; 9601 } 9602 } 9603 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9604 /* 9605 * This is a multipathing probe packet that has 9606 * been delayed in ND resolution. Drop the 9607 * packet for the reasons mentioned in 9608 * nce_queue_mp() 9609 */ 9610 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9611 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9612 freemsg(first_mp); 9613 ill_refrele(ill); 9614 if (need_decref) 9615 CONN_DEC_REF(connp); 9616 return; 9617 } 9618 } 9619 } 9620 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9621 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9622 9623 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9624 if (secpolicy_net_rawaccess(cr) != 0) { 9625 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9626 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9627 NULL, zoneid, NULL, 9628 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9629 if (ire == NULL) { 9630 if (do_outrequests) 9631 BUMP_MIB(mibptr, 9632 ipv6OutRequests); 9633 BUMP_MIB(mibptr, ipv6OutDiscards); 9634 ip1dbg(("ip_wput_v6: bad source " 9635 "addr\n")); 9636 freemsg(first_mp); 9637 if (ill != NULL) 9638 ill_refrele(ill); 9639 if (need_decref) 9640 CONN_DEC_REF(connp); 9641 return; 9642 } 9643 ire_refrele(ire); 9644 } 9645 /* No need to verify again when using ip_newroute */ 9646 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9647 } 9648 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9649 /* 9650 * Make sure they match since ip_newroute*_v6 etc might 9651 * (unknown to them) inspect ip6i_nexthop when 9652 * they think they access ip6_dst. 9653 */ 9654 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9655 } 9656 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9657 cksum_request = 1; 9658 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9659 cksum_request = ip6i->ip6i_checksum_off; 9660 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9661 unspec_src = 1; 9662 9663 if (do_outrequests && ill != NULL) { 9664 BUMP_MIB(mibptr, ipv6OutRequests); 9665 do_outrequests = B_FALSE; 9666 } 9667 /* 9668 * Store ip6i_t info that we need after we come back 9669 * from IPSEC processing. 9670 */ 9671 if (mctl_present) { 9672 ASSERT(io != NULL); 9673 io->ipsec_out_unspec_src = unspec_src; 9674 } 9675 } 9676 if (connp != NULL && connp->conn_dontroute) 9677 ip6h->ip6_hops = 1; 9678 9679 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9680 goto ipv6multicast; 9681 9682 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9683 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9684 ill_t *conn_outgoing_pill; 9685 9686 conn_outgoing_pill = conn_get_held_ill(connp, 9687 &connp->conn_outgoing_pill, &err); 9688 if (err == ILL_LOOKUP_FAILED) { 9689 if (ill != NULL) 9690 ill_refrele(ill); 9691 if (need_decref) 9692 CONN_DEC_REF(connp); 9693 freemsg(first_mp); 9694 return; 9695 } 9696 if (conn_outgoing_pill != NULL) { 9697 if (ill != NULL) 9698 ill_refrele(ill); 9699 ill = conn_outgoing_pill; 9700 attach_if = B_TRUE; 9701 match_flags = MATCH_IRE_ILL; 9702 mibptr = ill->ill_ip6_mib; 9703 9704 /* 9705 * Check if we need an ire that will not be 9706 * looked up by anybody else i.e. HIDDEN. 9707 */ 9708 if (ill_is_probeonly(ill)) 9709 match_flags |= MATCH_IRE_MARK_HIDDEN; 9710 goto send_from_ill; 9711 } 9712 } 9713 9714 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9715 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9716 ill_t *conn_nofailover_ill; 9717 9718 conn_nofailover_ill = conn_get_held_ill(connp, 9719 &connp->conn_nofailover_ill, &err); 9720 if (err == ILL_LOOKUP_FAILED) { 9721 if (ill != NULL) 9722 ill_refrele(ill); 9723 if (need_decref) 9724 CONN_DEC_REF(connp); 9725 freemsg(first_mp); 9726 return; 9727 } 9728 if (conn_nofailover_ill != NULL) { 9729 if (ill != NULL) 9730 ill_refrele(ill); 9731 ill = conn_nofailover_ill; 9732 attach_if = B_TRUE; 9733 /* 9734 * Assumes that ipc_nofailover_ill is used only for 9735 * multipathing probe packets. These packets are better 9736 * dropped, if they are delayed in ND resolution, for 9737 * the reasons described in nce_queue_mp(). 9738 * IP6I_DROP_IFDELAYED will be set later on in this 9739 * function for this packet. 9740 */ 9741 drop_if_delayed = B_TRUE; 9742 match_flags = MATCH_IRE_ILL; 9743 mibptr = ill->ill_ip6_mib; 9744 9745 /* 9746 * Check if we need an ire that will not be 9747 * looked up by anybody else i.e. HIDDEN. 9748 */ 9749 if (ill_is_probeonly(ill)) 9750 match_flags |= MATCH_IRE_MARK_HIDDEN; 9751 goto send_from_ill; 9752 } 9753 } 9754 9755 /* 9756 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9757 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9758 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9759 */ 9760 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9761 ASSERT(ip6i->ip6i_ifindex != 0); 9762 attach_if = B_TRUE; 9763 ASSERT(ill != NULL); 9764 match_flags = MATCH_IRE_ILL; 9765 9766 /* 9767 * Check if we need an ire that will not be 9768 * looked up by anybody else i.e. HIDDEN. 9769 */ 9770 if (ill_is_probeonly(ill)) 9771 match_flags |= MATCH_IRE_MARK_HIDDEN; 9772 goto send_from_ill; 9773 } 9774 9775 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9776 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9777 ASSERT(ill != NULL); 9778 goto send_from_ill; 9779 } 9780 9781 /* 9782 * 4. If q is an ill queue and (link local or multicast destination) 9783 * then use that ill. 9784 */ 9785 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9786 goto send_from_ill; 9787 } 9788 9789 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9790 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9791 ill_t *conn_outgoing_ill; 9792 9793 conn_outgoing_ill = conn_get_held_ill(connp, 9794 &connp->conn_outgoing_ill, &err); 9795 if (err == ILL_LOOKUP_FAILED) { 9796 if (ill != NULL) 9797 ill_refrele(ill); 9798 if (need_decref) 9799 CONN_DEC_REF(connp); 9800 freemsg(first_mp); 9801 return; 9802 } 9803 if (ill != NULL) 9804 ill_refrele(ill); 9805 ill = conn_outgoing_ill; 9806 mibptr = ill->ill_ip6_mib; 9807 goto send_from_ill; 9808 } 9809 9810 /* 9811 * 6. For unicast: Just do an IRE lookup for the best match. 9812 * If we get here for a link-local address it is rather random 9813 * what interface we pick on a multihomed host. 9814 * *If* there is an IRE_CACHE (and the link-local address 9815 * isn't duplicated on multi links) this will find the IRE_CACHE. 9816 * Otherwise it will use one of the matching IRE_INTERFACE routes 9817 * for the link-local prefix. Hence, applications 9818 * *should* be encouraged to specify an outgoing interface when sending 9819 * to a link local address. 9820 */ 9821 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9822 !connp->conn_fully_bound)) { 9823 /* 9824 * We cache IRE_CACHEs to avoid lookups. We don't do 9825 * this for the tcp global queue and listen end point 9826 * as it does not really have a real destination to 9827 * talk to. 9828 */ 9829 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp)); 9830 } else { 9831 /* 9832 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9833 * grab a lock here to check for CONDEMNED as it is okay 9834 * to send a packet or two with the IRE_CACHE that is going 9835 * away. 9836 */ 9837 mutex_enter(&connp->conn_lock); 9838 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9839 if (ire != NULL && 9840 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9841 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9842 9843 IRE_REFHOLD(ire); 9844 mutex_exit(&connp->conn_lock); 9845 9846 } else { 9847 boolean_t cached = B_FALSE; 9848 9849 connp->conn_ire_cache = NULL; 9850 mutex_exit(&connp->conn_lock); 9851 /* Release the old ire */ 9852 if (ire != NULL && sctp_ire == NULL) 9853 IRE_REFRELE_NOTR(ire); 9854 9855 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9856 MBLK_GETLABEL(mp)); 9857 if (ire != NULL) { 9858 IRE_REFHOLD_NOTR(ire); 9859 9860 mutex_enter(&connp->conn_lock); 9861 if (!(connp->conn_state_flags & CONN_CLOSING) && 9862 (connp->conn_ire_cache == NULL)) { 9863 rw_enter(&ire->ire_bucket->irb_lock, 9864 RW_READER); 9865 if (!(ire->ire_marks & 9866 IRE_MARK_CONDEMNED)) { 9867 connp->conn_ire_cache = ire; 9868 cached = B_TRUE; 9869 } 9870 rw_exit(&ire->ire_bucket->irb_lock); 9871 } 9872 mutex_exit(&connp->conn_lock); 9873 9874 /* 9875 * We can continue to use the ire but since it 9876 * was not cached, we should drop the extra 9877 * reference. 9878 */ 9879 if (!cached) 9880 IRE_REFRELE_NOTR(ire); 9881 } 9882 } 9883 } 9884 9885 if (ire != NULL) { 9886 if (do_outrequests) { 9887 /* Handle IRE_LOCAL's that might appear here */ 9888 if (ire->ire_type == IRE_CACHE) { 9889 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9890 ill_ip6_mib; 9891 } else { 9892 mibptr = ire->ire_ipif->ipif_ill->ill_ip6_mib; 9893 } 9894 BUMP_MIB(mibptr, ipv6OutRequests); 9895 } 9896 ASSERT(!attach_if); 9897 9898 /* 9899 * Check if the ire has the RTF_MULTIRT flag, inherited 9900 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9901 */ 9902 if (ire->ire_flags & RTF_MULTIRT) { 9903 /* 9904 * Force hop limit of multirouted packets if required. 9905 * The hop limit of such packets is bounded by the 9906 * ip_multirt_ttl ndd variable. 9907 * NDP packets must have a hop limit of 255; don't 9908 * change the hop limit in that case. 9909 */ 9910 if ((ip_multirt_ttl > 0) && 9911 (ip6h->ip6_hops > ip_multirt_ttl) && 9912 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9913 if (ip_debug > 3) { 9914 ip2dbg(("ip_wput_v6: forcing multirt " 9915 "hop limit to %d (was %d) ", 9916 ip_multirt_ttl, ip6h->ip6_hops)); 9917 pr_addr_dbg("v6dst %s\n", AF_INET6, 9918 &ire->ire_addr_v6); 9919 } 9920 ip6h->ip6_hops = ip_multirt_ttl; 9921 } 9922 9923 /* 9924 * We look at this point if there are pending 9925 * unresolved routes. ire_multirt_need_resolve_v6() 9926 * checks in O(n) that all IRE_OFFSUBNET ire 9927 * entries for the packet's destination and 9928 * flagged RTF_MULTIRT are currently resolved. 9929 * If some remain unresolved, we do a copy 9930 * of the current message. It will be used 9931 * to initiate additional route resolutions. 9932 */ 9933 multirt_need_resolve = 9934 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9935 MBLK_GETLABEL(first_mp)); 9936 ip2dbg(("ip_wput_v6: ire %p, " 9937 "multirt_need_resolve %d, first_mp %p\n", 9938 (void *)ire, multirt_need_resolve, 9939 (void *)first_mp)); 9940 if (multirt_need_resolve) { 9941 copy_mp = copymsg(first_mp); 9942 if (copy_mp != NULL) { 9943 MULTIRT_DEBUG_TAG(copy_mp); 9944 } 9945 } 9946 } 9947 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9948 connp, caller, 0, ip6i_flags, zoneid); 9949 if (need_decref) { 9950 CONN_DEC_REF(connp); 9951 connp = NULL; 9952 } 9953 IRE_REFRELE(ire); 9954 9955 /* 9956 * Try to resolve another multiroute if 9957 * ire_multirt_need_resolve_v6() deemed it necessary. 9958 * copy_mp will be consumed (sent or freed) by 9959 * ip_newroute_v6(). 9960 */ 9961 if (copy_mp != NULL) { 9962 if (mctl_present) { 9963 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9964 } else { 9965 ip6h = (ip6_t *)copy_mp->b_rptr; 9966 } 9967 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9968 &ip6h->ip6_src, NULL, zoneid); 9969 } 9970 if (ill != NULL) 9971 ill_refrele(ill); 9972 return; 9973 } 9974 9975 /* 9976 * No full IRE for this destination. Send it to 9977 * ip_newroute_v6 to see if anything else matches. 9978 * Mark this packet as having originated on this 9979 * machine. 9980 * Update rptr if there was an ip6i_t header. 9981 */ 9982 mp->b_prev = NULL; 9983 mp->b_next = NULL; 9984 if (ip6i != NULL) 9985 mp->b_rptr -= sizeof (ip6i_t); 9986 9987 if (unspec_src) { 9988 if (ip6i == NULL) { 9989 /* 9990 * Add ip6i_t header to carry unspec_src 9991 * until the packet comes back in ip_wput_v6. 9992 */ 9993 mp = ip_add_info_v6(mp, NULL, v6dstp); 9994 if (mp == NULL) { 9995 if (do_outrequests) 9996 BUMP_MIB(mibptr, ipv6OutRequests); 9997 BUMP_MIB(mibptr, ipv6OutDiscards); 9998 if (mctl_present) 9999 freeb(first_mp); 10000 if (ill != NULL) 10001 ill_refrele(ill); 10002 if (need_decref) 10003 CONN_DEC_REF(connp); 10004 return; 10005 } 10006 ip6i = (ip6i_t *)mp->b_rptr; 10007 10008 if (mctl_present) { 10009 ASSERT(first_mp != mp); 10010 first_mp->b_cont = mp; 10011 } else { 10012 first_mp = mp; 10013 } 10014 10015 if ((mp->b_wptr - (uchar_t *)ip6i) == 10016 sizeof (ip6i_t)) { 10017 /* 10018 * ndp_resolver called from ip_newroute_v6 10019 * expects pulled up message. 10020 */ 10021 if (!pullupmsg(mp, -1)) { 10022 ip1dbg(("ip_wput_v6: pullupmsg" 10023 " failed\n")); 10024 if (do_outrequests) { 10025 BUMP_MIB(mibptr, 10026 ipv6OutRequests); 10027 } 10028 BUMP_MIB(mibptr, ipv6OutDiscards); 10029 freemsg(first_mp); 10030 if (ill != NULL) 10031 ill_refrele(ill); 10032 if (need_decref) 10033 CONN_DEC_REF(connp); 10034 return; 10035 } 10036 ip6i = (ip6i_t *)mp->b_rptr; 10037 } 10038 ip6h = (ip6_t *)&ip6i[1]; 10039 v6dstp = &ip6h->ip6_dst; 10040 } 10041 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10042 if (mctl_present) { 10043 ASSERT(io != NULL); 10044 io->ipsec_out_unspec_src = unspec_src; 10045 } 10046 } 10047 if (do_outrequests) 10048 BUMP_MIB(mibptr, ipv6OutRequests); 10049 if (need_decref) 10050 CONN_DEC_REF(connp); 10051 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 10052 if (ill != NULL) 10053 ill_refrele(ill); 10054 return; 10055 10056 10057 /* 10058 * Handle multicast packets with or without an conn. 10059 * Assumes that the transports set ip6_hops taking 10060 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10061 * into account. 10062 */ 10063 ipv6multicast: 10064 ip2dbg(("ip_wput_v6: multicast\n")); 10065 10066 /* 10067 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10068 * 2. If conn_nofailover_ill is set then use that ill. 10069 * 10070 * Hold the conn_lock till we refhold the ill of interest that is 10071 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10072 * while holding any locks, postpone the refrele until after the 10073 * conn_lock is dropped. 10074 */ 10075 if (connp != NULL) { 10076 mutex_enter(&connp->conn_lock); 10077 conn_lock_held = B_TRUE; 10078 } else { 10079 conn_lock_held = B_FALSE; 10080 } 10081 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10082 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10083 if (err == ILL_LOOKUP_FAILED) { 10084 ip1dbg(("ip_output_v6: multicast" 10085 " conn_outgoing_pill no ipif\n")); 10086 multicast_discard: 10087 ASSERT(saved_ill == NULL); 10088 if (conn_lock_held) 10089 mutex_exit(&connp->conn_lock); 10090 if (ill != NULL) 10091 ill_refrele(ill); 10092 freemsg(first_mp); 10093 if (do_outrequests) 10094 BUMP_MIB(mibptr, ipv6OutDiscards); 10095 if (need_decref) 10096 CONN_DEC_REF(connp); 10097 return; 10098 } 10099 saved_ill = ill; 10100 ill = connp->conn_outgoing_pill; 10101 attach_if = B_TRUE; 10102 match_flags = MATCH_IRE_ILL; 10103 mibptr = ill->ill_ip6_mib; 10104 10105 /* 10106 * Check if we need an ire that will not be 10107 * looked up by anybody else i.e. HIDDEN. 10108 */ 10109 if (ill_is_probeonly(ill)) 10110 match_flags |= MATCH_IRE_MARK_HIDDEN; 10111 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10112 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10113 if (err == ILL_LOOKUP_FAILED) { 10114 ip1dbg(("ip_output_v6: multicast" 10115 " conn_nofailover_ill no ipif\n")); 10116 goto multicast_discard; 10117 } 10118 saved_ill = ill; 10119 ill = connp->conn_nofailover_ill; 10120 attach_if = B_TRUE; 10121 match_flags = MATCH_IRE_ILL; 10122 10123 /* 10124 * Check if we need an ire that will not be 10125 * looked up by anybody else i.e. HIDDEN. 10126 */ 10127 if (ill_is_probeonly(ill)) 10128 match_flags |= MATCH_IRE_MARK_HIDDEN; 10129 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10130 /* 10131 * Redo 1. If we did not find an IRE_CACHE the first time, 10132 * we should have an ip6i_t with IP6I_ATTACH_IF if 10133 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10134 * used on this endpoint. 10135 */ 10136 ASSERT(ip6i->ip6i_ifindex != 0); 10137 attach_if = B_TRUE; 10138 ASSERT(ill != NULL); 10139 match_flags = MATCH_IRE_ILL; 10140 10141 /* 10142 * Check if we need an ire that will not be 10143 * looked up by anybody else i.e. HIDDEN. 10144 */ 10145 if (ill_is_probeonly(ill)) 10146 match_flags |= MATCH_IRE_MARK_HIDDEN; 10147 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10148 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10149 10150 ASSERT(ill != NULL); 10151 } else if (ill != NULL) { 10152 /* 10153 * 4. If q is an ill queue and (link local or multicast 10154 * destination) then use that ill. 10155 * We don't need the ipif initialization here. 10156 * This useless assert below is just to prevent lint from 10157 * reporting a null body if statement. 10158 */ 10159 ASSERT(ill != NULL); 10160 } else if (connp != NULL) { 10161 /* 10162 * 5. If IPV6_BOUND_IF has been set use that ill. 10163 * 10164 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10165 * Otherwise look for the best IRE match for the unspecified 10166 * group to determine the ill. 10167 * 10168 * conn_multicast_ill is used for only IPv6 packets. 10169 * conn_multicast_ipif is used for only IPv4 packets. 10170 * Thus a PF_INET6 socket send both IPv4 and IPv6 10171 * multicast packets using different IP*_MULTICAST_IF 10172 * interfaces. 10173 */ 10174 if (connp->conn_outgoing_ill != NULL) { 10175 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10176 if (err == ILL_LOOKUP_FAILED) { 10177 ip1dbg(("ip_output_v6: multicast" 10178 " conn_outgoing_ill no ipif\n")); 10179 goto multicast_discard; 10180 } 10181 ill = connp->conn_outgoing_ill; 10182 } else if (connp->conn_multicast_ill != NULL) { 10183 err = ill_check_and_refhold(connp->conn_multicast_ill); 10184 if (err == ILL_LOOKUP_FAILED) { 10185 ip1dbg(("ip_output_v6: multicast" 10186 " conn_multicast_ill no ipif\n")); 10187 goto multicast_discard; 10188 } 10189 ill = connp->conn_multicast_ill; 10190 } else { 10191 mutex_exit(&connp->conn_lock); 10192 conn_lock_held = B_FALSE; 10193 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 10194 if (ipif == NULL) { 10195 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10196 goto multicast_discard; 10197 } 10198 /* 10199 * We have a ref to this ipif, so we can safely 10200 * access ipif_ill. 10201 */ 10202 ill = ipif->ipif_ill; 10203 mutex_enter(&ill->ill_lock); 10204 if (!ILL_CAN_LOOKUP(ill)) { 10205 mutex_exit(&ill->ill_lock); 10206 ipif_refrele(ipif); 10207 ill = NULL; 10208 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10209 goto multicast_discard; 10210 } 10211 ill_refhold_locked(ill); 10212 mutex_exit(&ill->ill_lock); 10213 ipif_refrele(ipif); 10214 /* 10215 * Save binding until IPV6_MULTICAST_IF 10216 * changes it 10217 */ 10218 mutex_enter(&connp->conn_lock); 10219 connp->conn_multicast_ill = ill; 10220 connp->conn_orig_multicast_ifindex = 10221 ill->ill_phyint->phyint_ifindex; 10222 mutex_exit(&connp->conn_lock); 10223 } 10224 } 10225 if (conn_lock_held) 10226 mutex_exit(&connp->conn_lock); 10227 10228 if (saved_ill != NULL) 10229 ill_refrele(saved_ill); 10230 10231 ASSERT(ill != NULL); 10232 /* 10233 * For multicast loopback interfaces replace the multicast address 10234 * with a unicast address for the ire lookup. 10235 */ 10236 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 10237 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10238 10239 mibptr = ill->ill_ip6_mib; 10240 if (do_outrequests) { 10241 BUMP_MIB(mibptr, ipv6OutRequests); 10242 do_outrequests = B_FALSE; 10243 } 10244 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10245 10246 /* 10247 * As we may lose the conn by the time we reach ip_wput_ire_v6 10248 * we copy conn_multicast_loop and conn_dontroute on to an 10249 * ipsec_out. In case if this datagram goes out secure, 10250 * we need the ill_index also. Copy that also into the 10251 * ipsec_out. 10252 */ 10253 if (mctl_present) { 10254 io = (ipsec_out_t *)first_mp->b_rptr; 10255 ASSERT(first_mp->b_datap->db_type == M_CTL); 10256 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10257 } else { 10258 ASSERT(mp == first_mp); 10259 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 10260 BUMP_MIB(mibptr, ipv6OutDiscards); 10261 freemsg(mp); 10262 if (ill != NULL) 10263 ill_refrele(ill); 10264 if (need_decref) 10265 CONN_DEC_REF(connp); 10266 return; 10267 } 10268 io = (ipsec_out_t *)first_mp->b_rptr; 10269 /* This is not a secure packet */ 10270 io->ipsec_out_secure = B_FALSE; 10271 io->ipsec_out_use_global_policy = B_TRUE; 10272 io->ipsec_out_zoneid = 10273 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10274 first_mp->b_cont = mp; 10275 mctl_present = B_TRUE; 10276 } 10277 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10278 io->ipsec_out_unspec_src = unspec_src; 10279 if (connp != NULL) 10280 io->ipsec_out_dontroute = connp->conn_dontroute; 10281 10282 send_from_ill: 10283 ASSERT(ill != NULL); 10284 ASSERT(mibptr == ill->ill_ip6_mib); 10285 if (do_outrequests) { 10286 BUMP_MIB(mibptr, ipv6OutRequests); 10287 do_outrequests = B_FALSE; 10288 } 10289 10290 if (io != NULL) 10291 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10292 10293 /* 10294 * When a specific ill is specified (using IPV6_PKTINFO, 10295 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10296 * on routing entries (ftable and ctable) that have a matching 10297 * ire->ire_ipif->ipif_ill. Thus this can only be used 10298 * for destinations that are on-link for the specific ill 10299 * and that can appear on multiple links. Thus it is useful 10300 * for multicast destinations, link-local destinations, and 10301 * at some point perhaps for site-local destinations (if the 10302 * node sits at a site boundary). 10303 * We create the cache entries in the regular ctable since 10304 * it can not "confuse" things for other destinations. 10305 * table. 10306 * 10307 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10308 * It is used only when ire_cache_lookup is used above. 10309 */ 10310 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10311 zoneid, MBLK_GETLABEL(mp), match_flags); 10312 if (ire != NULL) { 10313 /* 10314 * Check if the ire has the RTF_MULTIRT flag, inherited 10315 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10316 */ 10317 if (ire->ire_flags & RTF_MULTIRT) { 10318 /* 10319 * Force hop limit of multirouted packets if required. 10320 * The hop limit of such packets is bounded by the 10321 * ip_multirt_ttl ndd variable. 10322 * NDP packets must have a hop limit of 255; don't 10323 * change the hop limit in that case. 10324 */ 10325 if ((ip_multirt_ttl > 0) && 10326 (ip6h->ip6_hops > ip_multirt_ttl) && 10327 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10328 if (ip_debug > 3) { 10329 ip2dbg(("ip_wput_v6: forcing multirt " 10330 "hop limit to %d (was %d) ", 10331 ip_multirt_ttl, ip6h->ip6_hops)); 10332 pr_addr_dbg("v6dst %s\n", AF_INET6, 10333 &ire->ire_addr_v6); 10334 } 10335 ip6h->ip6_hops = ip_multirt_ttl; 10336 } 10337 10338 /* 10339 * We look at this point if there are pending 10340 * unresolved routes. ire_multirt_need_resolve_v6() 10341 * checks in O(n) that all IRE_OFFSUBNET ire 10342 * entries for the packet's destination and 10343 * flagged RTF_MULTIRT are currently resolved. 10344 * If some remain unresolved, we make a copy 10345 * of the current message. It will be used 10346 * to initiate additional route resolutions. 10347 */ 10348 multirt_need_resolve = 10349 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10350 MBLK_GETLABEL(first_mp)); 10351 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10352 "multirt_need_resolve %d, first_mp %p\n", 10353 (void *)ire, multirt_need_resolve, 10354 (void *)first_mp)); 10355 if (multirt_need_resolve) { 10356 copy_mp = copymsg(first_mp); 10357 if (copy_mp != NULL) { 10358 MULTIRT_DEBUG_TAG(copy_mp); 10359 } 10360 } 10361 } 10362 10363 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10364 ill->ill_name, (void *)ire, 10365 ill->ill_phyint->phyint_ifindex)); 10366 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10367 connp, caller, 10368 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10369 ip6i_flags, zoneid); 10370 ire_refrele(ire); 10371 if (need_decref) { 10372 CONN_DEC_REF(connp); 10373 connp = NULL; 10374 } 10375 10376 /* 10377 * Try to resolve another multiroute if 10378 * ire_multirt_need_resolve_v6() deemed it necessary. 10379 * copy_mp will be consumed (sent or freed) by 10380 * ip_newroute_[ipif_]v6(). 10381 */ 10382 if (copy_mp != NULL) { 10383 if (mctl_present) { 10384 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10385 } else { 10386 ip6h = (ip6_t *)copy_mp->b_rptr; 10387 } 10388 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10389 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10390 zoneid); 10391 if (ipif == NULL) { 10392 ip1dbg(("ip_wput_v6: No ipif for " 10393 "multicast\n")); 10394 MULTIRT_DEBUG_UNTAG(copy_mp); 10395 freemsg(copy_mp); 10396 return; 10397 } 10398 ip_newroute_ipif_v6(q, copy_mp, ipif, 10399 ip6h->ip6_dst, unspec_src, zoneid); 10400 ipif_refrele(ipif); 10401 } else { 10402 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10403 &ip6h->ip6_src, ill, zoneid); 10404 } 10405 } 10406 ill_refrele(ill); 10407 return; 10408 } 10409 if (need_decref) { 10410 CONN_DEC_REF(connp); 10411 connp = NULL; 10412 } 10413 10414 /* Update rptr if there was an ip6i_t header. */ 10415 if (ip6i != NULL) 10416 mp->b_rptr -= sizeof (ip6i_t); 10417 if (unspec_src || attach_if) { 10418 if (ip6i == NULL) { 10419 /* 10420 * Add ip6i_t header to carry unspec_src 10421 * or attach_if until the packet comes back in 10422 * ip_wput_v6. 10423 */ 10424 if (mctl_present) { 10425 first_mp->b_cont = 10426 ip_add_info_v6(mp, NULL, v6dstp); 10427 mp = first_mp->b_cont; 10428 if (mp == NULL) 10429 freeb(first_mp); 10430 } else { 10431 first_mp = mp = ip_add_info_v6(mp, NULL, 10432 v6dstp); 10433 } 10434 if (mp == NULL) { 10435 BUMP_MIB(mibptr, ipv6OutDiscards); 10436 ill_refrele(ill); 10437 return; 10438 } 10439 ip6i = (ip6i_t *)mp->b_rptr; 10440 if ((mp->b_wptr - (uchar_t *)ip6i) == 10441 sizeof (ip6i_t)) { 10442 /* 10443 * ndp_resolver called from ip_newroute_v6 10444 * expects a pulled up message. 10445 */ 10446 if (!pullupmsg(mp, -1)) { 10447 ip1dbg(("ip_wput_v6: pullupmsg" 10448 " failed\n")); 10449 BUMP_MIB(mibptr, ipv6OutDiscards); 10450 freemsg(first_mp); 10451 return; 10452 } 10453 ip6i = (ip6i_t *)mp->b_rptr; 10454 } 10455 ip6h = (ip6_t *)&ip6i[1]; 10456 v6dstp = &ip6h->ip6_dst; 10457 } 10458 if (unspec_src) 10459 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10460 if (attach_if) { 10461 /* 10462 * Bind to nofailover/BOUND_PIF overrides ifindex. 10463 */ 10464 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10465 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10466 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10467 if (drop_if_delayed) { 10468 /* This is a multipathing probe packet */ 10469 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10470 } 10471 } 10472 if (mctl_present) { 10473 ASSERT(io != NULL); 10474 io->ipsec_out_unspec_src = unspec_src; 10475 } 10476 } 10477 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10478 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10479 unspec_src, zoneid); 10480 } else { 10481 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10482 zoneid); 10483 } 10484 ill_refrele(ill); 10485 return; 10486 10487 notv6: 10488 /* 10489 * XXX implement a IPv4 and IPv6 packet counter per conn and 10490 * switch when ratio exceeds e.g. 10:1 10491 */ 10492 if (q->q_next == NULL) { 10493 connp = Q_TO_CONN(q); 10494 10495 if (IPCL_IS_TCP(connp)) { 10496 /* change conn_send for the tcp_v4_connections */ 10497 connp->conn_send = ip_output; 10498 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10499 /* The 'q' is the default SCTP queue */ 10500 connp = (conn_t *)arg; 10501 } else { 10502 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 10503 } 10504 } 10505 BUMP_MIB(mibptr, ipv6OutIPv4); 10506 (void) ip_output(arg, first_mp, arg2, caller); 10507 if (ill != NULL) 10508 ill_refrele(ill); 10509 } 10510 10511 /* 10512 * If this is a conn_t queue, then we pass in the conn. This includes the 10513 * zoneid. 10514 * Otherwise, this is a message for an ill_t queue, 10515 * in which case we use the global zoneid since those are all part of 10516 * the global zone. 10517 */ 10518 static void 10519 ip_wput_v6(queue_t *q, mblk_t *mp) 10520 { 10521 if (CONN_Q(q)) 10522 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10523 else 10524 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10525 } 10526 10527 static void 10528 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10529 { 10530 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10531 io->ipsec_out_attach_if = B_TRUE; 10532 io->ipsec_out_ill_index = attach_index; 10533 } 10534 10535 /* 10536 * NULL send-to queue - packet is to be delivered locally. 10537 */ 10538 void 10539 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10540 ire_t *ire, int fanout_flags) 10541 { 10542 uint32_t ports; 10543 mblk_t *mp = first_mp, *first_mp1; 10544 boolean_t mctl_present; 10545 uint8_t nexthdr; 10546 uint16_t hdr_length; 10547 ipsec_out_t *io; 10548 mib2_ipv6IfStatsEntry_t *mibptr; 10549 ilm_t *ilm; 10550 uint_t nexthdr_offset; 10551 10552 if (DB_TYPE(mp) == M_CTL) { 10553 io = (ipsec_out_t *)mp->b_rptr; 10554 if (!io->ipsec_out_secure) { 10555 mp = mp->b_cont; 10556 freeb(first_mp); 10557 first_mp = mp; 10558 mctl_present = B_FALSE; 10559 } else { 10560 mctl_present = B_TRUE; 10561 mp = first_mp->b_cont; 10562 ipsec_out_to_in(first_mp); 10563 } 10564 } else { 10565 mctl_present = B_FALSE; 10566 } 10567 10568 nexthdr = ip6h->ip6_nxt; 10569 mibptr = ill->ill_ip6_mib; 10570 10571 /* Fastpath */ 10572 switch (nexthdr) { 10573 case IPPROTO_TCP: 10574 case IPPROTO_UDP: 10575 case IPPROTO_ICMPV6: 10576 case IPPROTO_SCTP: 10577 hdr_length = IPV6_HDR_LEN; 10578 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10579 (uchar_t *)ip6h); 10580 break; 10581 default: { 10582 uint8_t *nexthdrp; 10583 10584 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10585 &hdr_length, &nexthdrp)) { 10586 /* Malformed packet */ 10587 BUMP_MIB(mibptr, ipv6OutDiscards); 10588 freemsg(first_mp); 10589 return; 10590 } 10591 nexthdr = *nexthdrp; 10592 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10593 break; 10594 } 10595 } 10596 10597 10598 DTRACE_PROBE4(ip6__loopback__in__start, 10599 ill_t *, ill, ill_t *, NULL, 10600 ip6_t *, ip6h, mblk_t *, first_mp); 10601 10602 FW_HOOKS6(ip6_loopback_in_event, ipv6firewall_loopback_in, 10603 MSG_FWCOOKED_IN, ill, NULL, ip6h, first_mp, mp); 10604 10605 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10606 10607 if (first_mp == NULL) 10608 return; 10609 10610 nexthdr = ip6h->ip6_nxt; 10611 10612 UPDATE_OB_PKT_COUNT(ire); 10613 ire->ire_last_used_time = lbolt; 10614 10615 /* 10616 * Remove reacability confirmation bit from version field 10617 * before looping back the packet. 10618 */ 10619 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10620 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10621 } 10622 10623 switch (nexthdr) { 10624 case IPPROTO_TCP: 10625 if (DB_TYPE(mp) == M_DATA) { 10626 /* 10627 * M_DATA mblk, so init mblk (chain) for 10628 * no struio(). 10629 */ 10630 mblk_t *mp1 = mp; 10631 10632 do { 10633 mp1->b_datap->db_struioflag = 0; 10634 } while ((mp1 = mp1->b_cont) != NULL); 10635 } 10636 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10637 TCP_PORTS_OFFSET); 10638 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10639 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10640 IP_FF_IP6INFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10641 hdr_length, mctl_present, ire->ire_zoneid); 10642 return; 10643 10644 case IPPROTO_UDP: 10645 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10646 UDP_PORTS_OFFSET); 10647 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10648 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO| 10649 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10650 return; 10651 10652 case IPPROTO_SCTP: 10653 { 10654 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10655 10656 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10657 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10658 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO, 10659 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10660 ire->ire_zoneid); 10661 return; 10662 } 10663 case IPPROTO_ICMPV6: { 10664 icmp6_t *icmp6; 10665 10666 /* check for full IPv6+ICMPv6 header */ 10667 if ((mp->b_wptr - mp->b_rptr) < 10668 (hdr_length + ICMP6_MINLEN)) { 10669 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10670 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10671 " failed\n")); 10672 BUMP_MIB(mibptr, ipv6OutDiscards); 10673 freemsg(first_mp); 10674 return; 10675 } 10676 ip6h = (ip6_t *)mp->b_rptr; 10677 } 10678 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10679 10680 /* Update output mib stats */ 10681 icmp_update_out_mib_v6(ill, icmp6); 10682 10683 /* Check variable for testing applications */ 10684 if (ipv6_drop_inbound_icmpv6) { 10685 freemsg(first_mp); 10686 return; 10687 } 10688 /* 10689 * Assume that there is always at least one conn for 10690 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10691 * where there is no conn. 10692 */ 10693 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10694 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10695 /* 10696 * In the multicast case, applications may have 10697 * joined the group from different zones, so we 10698 * need to deliver the packet to each of them. 10699 * Loop through the multicast memberships 10700 * structures (ilm) on the receive ill and send 10701 * a copy of the packet up each matching one. 10702 * However, we don't do this for multicasts sent 10703 * on the loopback interface (PHYI_LOOPBACK flag 10704 * set) as they must stay in the sender's zone. 10705 */ 10706 ILM_WALKER_HOLD(ill); 10707 for (ilm = ill->ill_ilm; ilm != NULL; 10708 ilm = ilm->ilm_next) { 10709 if (ilm->ilm_flags & ILM_DELETED) 10710 continue; 10711 if (!IN6_ARE_ADDR_EQUAL( 10712 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10713 continue; 10714 if ((fanout_flags & 10715 IP_FF_NO_MCAST_LOOP) && 10716 ilm->ilm_zoneid == ire->ire_zoneid) 10717 continue; 10718 if (!ipif_lookup_zoneid(ill, 10719 ilm->ilm_zoneid, IPIF_UP, NULL)) 10720 continue; 10721 10722 first_mp1 = ip_copymsg(first_mp); 10723 if (first_mp1 == NULL) 10724 continue; 10725 icmp_inbound_v6(q, first_mp1, ill, 10726 hdr_length, mctl_present, 10727 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10728 NULL); 10729 } 10730 ILM_WALKER_RELE(ill); 10731 } else { 10732 first_mp1 = ip_copymsg(first_mp); 10733 if (first_mp1 != NULL) 10734 icmp_inbound_v6(q, first_mp1, ill, 10735 hdr_length, mctl_present, 10736 IP6_NO_IPPOLICY, ire->ire_zoneid, 10737 NULL); 10738 } 10739 } 10740 /* FALLTHRU */ 10741 default: { 10742 /* 10743 * Handle protocols with which IPv6 is less intimate. 10744 */ 10745 fanout_flags |= IP_FF_RAWIP|IP_FF_IP6INFO; 10746 10747 /* 10748 * Enable sending ICMP for "Unknown" nexthdr 10749 * case. i.e. where we did not FALLTHRU from 10750 * IPPROTO_ICMPV6 processing case above. 10751 */ 10752 if (nexthdr != IPPROTO_ICMPV6) 10753 fanout_flags |= IP_FF_SEND_ICMP; 10754 /* 10755 * Note: There can be more than one stream bound 10756 * to a particular protocol. When this is the case, 10757 * each one gets a copy of any incoming packets. 10758 */ 10759 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10760 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10761 mctl_present, ire->ire_zoneid); 10762 return; 10763 } 10764 } 10765 } 10766 10767 /* 10768 * Send packet using IRE. 10769 * Checksumming is controlled by cksum_request: 10770 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10771 * 1 => Skip TCP/UDP/SCTP checksum 10772 * Otherwise => checksum_request contains insert offset for checksum 10773 * 10774 * Assumes that the following set of headers appear in the first 10775 * mblk: 10776 * ip6_t 10777 * Any extension headers 10778 * TCP/UDP/SCTP header (if present) 10779 * The routine can handle an ICMPv6 header that is not in the first mblk. 10780 * 10781 * NOTE : This function does not ire_refrele the ire passed in as the 10782 * argument unlike ip_wput_ire where the REFRELE is done. 10783 * Refer to ip_wput_ire for more on this. 10784 */ 10785 static void 10786 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10787 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10788 zoneid_t zoneid) 10789 { 10790 ip6_t *ip6h; 10791 uint8_t nexthdr; 10792 uint16_t hdr_length; 10793 uint_t reachable = 0x0; 10794 ill_t *ill; 10795 mib2_ipv6IfStatsEntry_t *mibptr; 10796 mblk_t *first_mp; 10797 boolean_t mctl_present; 10798 ipsec_out_t *io; 10799 boolean_t conn_dontroute; /* conn value for multicast */ 10800 boolean_t conn_multicast_loop; /* conn value for multicast */ 10801 boolean_t multicast_forward; /* Should we forward ? */ 10802 int max_frag; 10803 10804 ill = ire_to_ill(ire); 10805 first_mp = mp; 10806 multicast_forward = B_FALSE; 10807 10808 if (mp->b_datap->db_type != M_CTL) { 10809 ip6h = (ip6_t *)first_mp->b_rptr; 10810 } else { 10811 io = (ipsec_out_t *)first_mp->b_rptr; 10812 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10813 /* 10814 * Grab the zone id now because the M_CTL can be discarded by 10815 * ip_wput_ire_parse_ipsec_out() below. 10816 */ 10817 ASSERT(zoneid == io->ipsec_out_zoneid); 10818 ASSERT(zoneid != ALL_ZONES); 10819 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10820 /* 10821 * For the multicast case, ipsec_out carries conn_dontroute and 10822 * conn_multicast_loop as conn may not be available here. We 10823 * need this for multicast loopback and forwarding which is done 10824 * later in the code. 10825 */ 10826 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10827 conn_dontroute = io->ipsec_out_dontroute; 10828 conn_multicast_loop = io->ipsec_out_multicast_loop; 10829 /* 10830 * If conn_dontroute is not set or conn_multicast_loop 10831 * is set, we need to do forwarding/loopback. For 10832 * datagrams from ip_wput_multicast, conn_dontroute is 10833 * set to B_TRUE and conn_multicast_loop is set to 10834 * B_FALSE so that we neither do forwarding nor 10835 * loopback. 10836 */ 10837 if (!conn_dontroute || conn_multicast_loop) 10838 multicast_forward = B_TRUE; 10839 } 10840 } 10841 10842 /* 10843 * If the sender didn't supply the hop limit and there is a default 10844 * unicast hop limit associated with the output interface, we use 10845 * that if the packet is unicast. Interface specific unicast hop 10846 * limits as set via the SIOCSLIFLNKINFO ioctl. 10847 */ 10848 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10849 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10850 ip6h->ip6_hops = ill->ill_max_hops; 10851 } 10852 10853 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10854 ire->ire_zoneid != ALL_ZONES) { 10855 /* 10856 * When a zone sends a packet to another zone, we try to deliver 10857 * the packet under the same conditions as if the destination 10858 * was a real node on the network. To do so, we look for a 10859 * matching route in the forwarding table. 10860 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10861 * ip_newroute_v6() does. 10862 * Note that IRE_LOCAL are special, since they are used 10863 * when the zoneid doesn't match in some cases. This means that 10864 * we need to handle ipha_src differently since ire_src_addr 10865 * belongs to the receiving zone instead of the sending zone. 10866 * When ip_restrict_interzone_loopback is set, then 10867 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10868 * for loopback between zones when the logical "Ethernet" would 10869 * have looped them back. 10870 */ 10871 ire_t *src_ire; 10872 10873 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10874 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10875 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10876 if (src_ire != NULL && 10877 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10878 (!ip_restrict_interzone_loopback || 10879 ire_local_same_ill_group(ire, src_ire))) { 10880 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10881 !unspec_src) { 10882 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10883 } 10884 ire_refrele(src_ire); 10885 } else { 10886 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 10887 if (src_ire != NULL) { 10888 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10889 ire_refrele(src_ire); 10890 freemsg(first_mp); 10891 return; 10892 } 10893 ire_refrele(src_ire); 10894 } 10895 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10896 /* Failed */ 10897 freemsg(first_mp); 10898 return; 10899 } 10900 icmp_unreachable_v6(q, first_mp, 10901 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10902 zoneid); 10903 return; 10904 } 10905 } 10906 10907 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10908 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10909 connp, unspec_src, zoneid); 10910 if (mp == NULL) { 10911 return; 10912 } 10913 } 10914 10915 first_mp = mp; 10916 if (mp->b_datap->db_type == M_CTL) { 10917 io = (ipsec_out_t *)mp->b_rptr; 10918 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10919 mp = mp->b_cont; 10920 mctl_present = B_TRUE; 10921 } else { 10922 mctl_present = B_FALSE; 10923 } 10924 10925 ip6h = (ip6_t *)mp->b_rptr; 10926 nexthdr = ip6h->ip6_nxt; 10927 mibptr = ill->ill_ip6_mib; 10928 10929 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10930 ipif_t *ipif; 10931 10932 /* 10933 * Select the source address using ipif_select_source_v6. 10934 */ 10935 if (attach_index != 0) { 10936 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10937 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10938 } else { 10939 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10940 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10941 } 10942 if (ipif == NULL) { 10943 if (ip_debug > 2) { 10944 /* ip1dbg */ 10945 pr_addr_dbg("ip_wput_ire_v6: no src for " 10946 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10947 printf("ip_wput_ire_v6: interface name %s\n", 10948 ill->ill_name); 10949 } 10950 freemsg(first_mp); 10951 return; 10952 } 10953 ip6h->ip6_src = ipif->ipif_v6src_addr; 10954 ipif_refrele(ipif); 10955 } 10956 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10957 if ((connp != NULL && connp->conn_multicast_loop) || 10958 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10959 ilm_t *ilm; 10960 10961 ILM_WALKER_HOLD(ill); 10962 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10963 ILM_WALKER_RELE(ill); 10964 if (ilm != NULL) { 10965 mblk_t *nmp; 10966 int fanout_flags = 0; 10967 10968 if (connp != NULL && 10969 !connp->conn_multicast_loop) { 10970 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10971 } 10972 ip1dbg(("ip_wput_ire_v6: " 10973 "Loopback multicast\n")); 10974 nmp = ip_copymsg(first_mp); 10975 if (nmp != NULL) { 10976 ip6_t *nip6h; 10977 mblk_t *mp_ip6h; 10978 10979 if (mctl_present) { 10980 nip6h = (ip6_t *) 10981 nmp->b_cont->b_rptr; 10982 mp_ip6h = nmp->b_cont; 10983 } else { 10984 nip6h = (ip6_t *)nmp->b_rptr; 10985 mp_ip6h = nmp; 10986 } 10987 10988 DTRACE_PROBE4( 10989 ip6__loopback__out__start, 10990 ill_t *, NULL, 10991 ill_t *, ill, 10992 ip6_t *, nip6h, 10993 mblk_t *, nmp); 10994 10995 FW_HOOKS6(ip6_loopback_out_event, 10996 ipv6firewall_loopback_out, 10997 MSG_FWCOOKED_OUT, NULL, ill, 10998 nip6h, nmp, mp_ip6h); 10999 11000 DTRACE_PROBE1( 11001 ip6__loopback__out__end, 11002 mblk_t *, nmp); 11003 11004 if (nmp != NULL) { 11005 /* 11006 * Deliver locally and to 11007 * every local zone, except 11008 * the sending zone when 11009 * IPV6_MULTICAST_LOOP is 11010 * disabled. 11011 */ 11012 ip_wput_local_v6(RD(q), ill, 11013 nip6h, nmp, 11014 ire, fanout_flags); 11015 } 11016 } else { 11017 BUMP_MIB(mibptr, ipv6OutDiscards); 11018 ip1dbg(("ip_wput_ire_v6: " 11019 "copymsg failed\n")); 11020 } 11021 } 11022 } 11023 if (ip6h->ip6_hops == 0 || 11024 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 11025 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 11026 /* 11027 * Local multicast or just loopback on loopback 11028 * interface. 11029 */ 11030 BUMP_MIB(mibptr, ipv6OutMcastPkts); 11031 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 11032 freemsg(first_mp); 11033 return; 11034 } 11035 } 11036 11037 if (ire->ire_stq != NULL) { 11038 uint32_t sum; 11039 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 11040 ill_phyint->phyint_ifindex; 11041 queue_t *dev_q = ire->ire_stq->q_next; 11042 11043 /* 11044 * non-NULL send-to queue - packet is to be sent 11045 * out an interface. 11046 */ 11047 11048 /* Driver is flow-controlling? */ 11049 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 11050 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 11051 /* 11052 * Queue packet if we have an conn to give back 11053 * pressure. We can't queue packets intended for 11054 * hardware acceleration since we've tossed that 11055 * state already. If the packet is being fed back 11056 * from ire_send_v6, we don't know the position in 11057 * the queue to enqueue the packet and we discard 11058 * the packet. 11059 */ 11060 if (ip_output_queue && connp != NULL && 11061 !mctl_present && caller != IRE_SEND) { 11062 if (caller == IP_WSRV) { 11063 connp->conn_did_putbq = 1; 11064 (void) putbq(connp->conn_wq, mp); 11065 conn_drain_insert(connp); 11066 /* 11067 * caller == IP_WSRV implies we are 11068 * the service thread, and the 11069 * queue is already noenabled. 11070 * The check for canput and 11071 * the putbq is not atomic. 11072 * So we need to check again. 11073 */ 11074 if (canput(dev_q)) 11075 connp->conn_did_putbq = 0; 11076 } else { 11077 (void) putq(connp->conn_wq, mp); 11078 } 11079 return; 11080 } 11081 BUMP_MIB(mibptr, ipv6OutDiscards); 11082 freemsg(first_mp); 11083 return; 11084 } 11085 11086 /* 11087 * Look for reachability confirmations from the transport. 11088 */ 11089 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11090 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11091 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11092 if (mctl_present) 11093 io->ipsec_out_reachable = B_TRUE; 11094 } 11095 /* Fastpath */ 11096 switch (nexthdr) { 11097 case IPPROTO_TCP: 11098 case IPPROTO_UDP: 11099 case IPPROTO_ICMPV6: 11100 case IPPROTO_SCTP: 11101 hdr_length = IPV6_HDR_LEN; 11102 break; 11103 default: { 11104 uint8_t *nexthdrp; 11105 11106 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11107 &hdr_length, &nexthdrp)) { 11108 /* Malformed packet */ 11109 BUMP_MIB(mibptr, ipv6OutDiscards); 11110 freemsg(first_mp); 11111 return; 11112 } 11113 nexthdr = *nexthdrp; 11114 break; 11115 } 11116 } 11117 11118 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11119 uint16_t *up; 11120 uint16_t *insp; 11121 11122 /* 11123 * The packet header is processed once for all, even 11124 * in the multirouting case. We disable hardware 11125 * checksum if the packet is multirouted, as it will be 11126 * replicated via several interfaces, and not all of 11127 * them may have this capability. 11128 */ 11129 if (cksum_request == 1 && 11130 !(ire->ire_flags & RTF_MULTIRT)) { 11131 /* Skip the transport checksum */ 11132 goto cksum_done; 11133 } 11134 /* 11135 * Do user-configured raw checksum. 11136 * Compute checksum and insert at offset "cksum_request" 11137 */ 11138 11139 /* check for enough headers for checksum */ 11140 cksum_request += hdr_length; /* offset from rptr */ 11141 if ((mp->b_wptr - mp->b_rptr) < 11142 (cksum_request + sizeof (int16_t))) { 11143 if (!pullupmsg(mp, 11144 cksum_request + sizeof (int16_t))) { 11145 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11146 " failed\n")); 11147 BUMP_MIB(mibptr, ipv6OutDiscards); 11148 freemsg(first_mp); 11149 return; 11150 } 11151 ip6h = (ip6_t *)mp->b_rptr; 11152 } 11153 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11154 ASSERT(((uintptr_t)insp & 0x1) == 0); 11155 up = (uint16_t *)&ip6h->ip6_src; 11156 /* 11157 * icmp has placed length and routing 11158 * header adjustment in *insp. 11159 */ 11160 sum = htons(nexthdr) + 11161 up[0] + up[1] + up[2] + up[3] + 11162 up[4] + up[5] + up[6] + up[7] + 11163 up[8] + up[9] + up[10] + up[11] + 11164 up[12] + up[13] + up[14] + up[15]; 11165 sum = (sum & 0xffff) + (sum >> 16); 11166 *insp = IP_CSUM(mp, hdr_length, sum); 11167 if (*insp == 0) 11168 *insp = 0xFFFF; 11169 } else if (nexthdr == IPPROTO_TCP) { 11170 uint16_t *up; 11171 11172 /* 11173 * Check for full IPv6 header + enough TCP header 11174 * to get at the checksum field. 11175 */ 11176 if ((mp->b_wptr - mp->b_rptr) < 11177 (hdr_length + TCP_CHECKSUM_OFFSET + 11178 TCP_CHECKSUM_SIZE)) { 11179 if (!pullupmsg(mp, hdr_length + 11180 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11181 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11182 " failed\n")); 11183 BUMP_MIB(mibptr, ipv6OutDiscards); 11184 freemsg(first_mp); 11185 return; 11186 } 11187 ip6h = (ip6_t *)mp->b_rptr; 11188 } 11189 11190 up = (uint16_t *)&ip6h->ip6_src; 11191 /* 11192 * Note: The TCP module has stored the length value 11193 * into the tcp checksum field, so we don't 11194 * need to explicitly sum it in here. 11195 */ 11196 sum = up[0] + up[1] + up[2] + up[3] + 11197 up[4] + up[5] + up[6] + up[7] + 11198 up[8] + up[9] + up[10] + up[11] + 11199 up[12] + up[13] + up[14] + up[15]; 11200 11201 /* Fold the initial sum */ 11202 sum = (sum & 0xffff) + (sum >> 16); 11203 11204 up = (uint16_t *)(((uchar_t *)ip6h) + 11205 hdr_length + TCP_CHECKSUM_OFFSET); 11206 11207 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11208 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11209 ire->ire_max_frag, mctl_present, sum); 11210 11211 /* Software checksum? */ 11212 if (DB_CKSUMFLAGS(mp) == 0) { 11213 IP6_STAT(ip6_out_sw_cksum); 11214 IP6_STAT_UPDATE(ip6_tcp_out_sw_cksum_bytes, 11215 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11216 hdr_length); 11217 } 11218 } else if (nexthdr == IPPROTO_UDP) { 11219 uint16_t *up; 11220 11221 /* 11222 * check for full IPv6 header + enough UDP header 11223 * to get at the UDP checksum field 11224 */ 11225 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11226 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11227 if (!pullupmsg(mp, hdr_length + 11228 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11229 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11230 " failed\n")); 11231 BUMP_MIB(mibptr, ipv6OutDiscards); 11232 freemsg(first_mp); 11233 return; 11234 } 11235 ip6h = (ip6_t *)mp->b_rptr; 11236 } 11237 up = (uint16_t *)&ip6h->ip6_src; 11238 /* 11239 * Note: The UDP module has stored the length value 11240 * into the udp checksum field, so we don't 11241 * need to explicitly sum it in here. 11242 */ 11243 sum = up[0] + up[1] + up[2] + up[3] + 11244 up[4] + up[5] + up[6] + up[7] + 11245 up[8] + up[9] + up[10] + up[11] + 11246 up[12] + up[13] + up[14] + up[15]; 11247 11248 /* Fold the initial sum */ 11249 sum = (sum & 0xffff) + (sum >> 16); 11250 11251 up = (uint16_t *)(((uchar_t *)ip6h) + 11252 hdr_length + UDP_CHECKSUM_OFFSET); 11253 11254 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11255 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11256 ire->ire_max_frag, mctl_present, sum); 11257 11258 /* Software checksum? */ 11259 if (DB_CKSUMFLAGS(mp) == 0) { 11260 IP6_STAT(ip6_out_sw_cksum); 11261 IP6_STAT_UPDATE(ip6_udp_out_sw_cksum_bytes, 11262 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11263 hdr_length); 11264 } 11265 } else if (nexthdr == IPPROTO_ICMPV6) { 11266 uint16_t *up; 11267 icmp6_t *icmp6; 11268 11269 /* check for full IPv6+ICMPv6 header */ 11270 if ((mp->b_wptr - mp->b_rptr) < 11271 (hdr_length + ICMP6_MINLEN)) { 11272 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11273 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11274 " failed\n")); 11275 BUMP_MIB(mibptr, ipv6OutDiscards); 11276 freemsg(first_mp); 11277 return; 11278 } 11279 ip6h = (ip6_t *)mp->b_rptr; 11280 } 11281 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11282 up = (uint16_t *)&ip6h->ip6_src; 11283 /* 11284 * icmp has placed length and routing 11285 * header adjustment in icmp6_cksum. 11286 */ 11287 sum = htons(IPPROTO_ICMPV6) + 11288 up[0] + up[1] + up[2] + up[3] + 11289 up[4] + up[5] + up[6] + up[7] + 11290 up[8] + up[9] + up[10] + up[11] + 11291 up[12] + up[13] + up[14] + up[15]; 11292 sum = (sum & 0xffff) + (sum >> 16); 11293 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11294 if (icmp6->icmp6_cksum == 0) 11295 icmp6->icmp6_cksum = 0xFFFF; 11296 11297 /* Update output mib stats */ 11298 icmp_update_out_mib_v6(ill, icmp6); 11299 } else if (nexthdr == IPPROTO_SCTP) { 11300 sctp_hdr_t *sctph; 11301 11302 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11303 if (!pullupmsg(mp, hdr_length + 11304 sizeof (*sctph))) { 11305 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11306 " failed\n")); 11307 BUMP_MIB(ill->ill_ip6_mib, 11308 ipv6OutDiscards); 11309 freemsg(mp); 11310 return; 11311 } 11312 ip6h = (ip6_t *)mp->b_rptr; 11313 } 11314 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11315 sctph->sh_chksum = 0; 11316 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11317 } 11318 11319 cksum_done: 11320 /* 11321 * We force the insertion of a fragment header using the 11322 * IPH_FRAG_HDR flag in two cases: 11323 * - after reception of an ICMPv6 "packet too big" message 11324 * with a MTU < 1280 (cf. RFC 2460 section 5) 11325 * - for multirouted IPv6 packets, so that the receiver can 11326 * discard duplicates according to their fragment identifier 11327 * 11328 * Two flags modifed from the API can modify this behavior. 11329 * The first is IPV6_USE_MIN_MTU. With this API the user 11330 * can specify how to manage PMTUD for unicast and multicast. 11331 * 11332 * IPV6_DONTFRAG disallows fragmentation. 11333 */ 11334 max_frag = ire->ire_max_frag; 11335 switch (IP6I_USE_MIN_MTU_API(flags)) { 11336 case IPV6_USE_MIN_MTU_DEFAULT: 11337 case IPV6_USE_MIN_MTU_UNICAST: 11338 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11339 max_frag = IPV6_MIN_MTU; 11340 } 11341 break; 11342 11343 case IPV6_USE_MIN_MTU_NEVER: 11344 max_frag = IPV6_MIN_MTU; 11345 break; 11346 } 11347 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11348 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11349 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11350 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11351 max_frag, B_FALSE, B_TRUE, zoneid); 11352 return; 11353 } 11354 11355 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11356 (mp->b_cont ? msgdsize(mp) : 11357 mp->b_wptr - (uchar_t *)ip6h)) { 11358 ip0dbg(("Packet length mismatch: %d, %ld\n", 11359 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11360 msgdsize(mp))); 11361 freemsg(first_mp); 11362 return; 11363 } 11364 /* Do IPSEC processing first */ 11365 if (mctl_present) { 11366 if (attach_index != 0) 11367 ipsec_out_attach_if(io, attach_index); 11368 ipsec_out_process(q, first_mp, ire, ill_index); 11369 return; 11370 } 11371 ASSERT(mp->b_prev == NULL); 11372 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11373 ntohs(ip6h->ip6_plen) + 11374 IPV6_HDR_LEN, max_frag)); 11375 ASSERT(mp == first_mp); 11376 /* Initiate IPPF processing */ 11377 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 11378 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11379 if (mp == NULL) { 11380 return; 11381 } 11382 } 11383 ip_wput_frag_v6(mp, ire, reachable, connp, 11384 caller, max_frag); 11385 return; 11386 } 11387 /* Do IPSEC processing first */ 11388 if (mctl_present) { 11389 int extra_len = ipsec_out_extra_length(first_mp); 11390 11391 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11392 max_frag && ip_ulp_cando_pkt2big(nexthdr)) { 11393 /* 11394 * IPsec headers will push the packet over the 11395 * MTU limit. Issue an ICMPv6 Packet Too Big 11396 * message for this packet if the upper-layer 11397 * that issued this packet will be able to 11398 * react to the icmp_pkt2big_v6() that we'll 11399 * generate. 11400 */ 11401 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11402 max_frag, B_FALSE, B_TRUE, zoneid); 11403 return; 11404 } 11405 if (attach_index != 0) 11406 ipsec_out_attach_if(io, attach_index); 11407 ipsec_out_process(q, first_mp, ire, ill_index); 11408 return; 11409 } 11410 /* 11411 * XXX multicast: add ip_mforward_v6() here. 11412 * Check conn_dontroute 11413 */ 11414 #ifdef lint 11415 /* 11416 * XXX The only purpose of this statement is to avoid lint 11417 * errors. See the above "XXX multicast". When that gets 11418 * fixed, remove this whole #ifdef lint section. 11419 */ 11420 ip3dbg(("multicast forward is %s.\n", 11421 (multicast_forward ? "TRUE" : "FALSE"))); 11422 #endif 11423 11424 UPDATE_OB_PKT_COUNT(ire); 11425 ire->ire_last_used_time = lbolt; 11426 ASSERT(mp == first_mp); 11427 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11428 } else { 11429 DTRACE_PROBE4(ip6__loopback__out__start, 11430 ill_t *, NULL, ill_t *, ill, 11431 ip6_t *, ip6h, mblk_t *, first_mp); 11432 FW_HOOKS6(ip6_loopback_out_event, ipv6firewall_loopback_out, 11433 MSG_FWCOOKED_OUT, NULL, ill, ip6h, first_mp, mp); 11434 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11435 if (first_mp != NULL) 11436 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11437 } 11438 } 11439 11440 /* 11441 * Outbound IPv6 fragmentation routine using MDT. 11442 */ 11443 static void 11444 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11445 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11446 { 11447 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11448 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11449 mblk_t *hdr_mp, *md_mp = NULL; 11450 int i1; 11451 multidata_t *mmd; 11452 unsigned char *hdr_ptr, *pld_ptr; 11453 ip_pdescinfo_t pdi; 11454 uint32_t ident; 11455 size_t len; 11456 uint16_t offset; 11457 queue_t *stq = ire->ire_stq; 11458 ill_t *ill = (ill_t *)stq->q_ptr; 11459 11460 ASSERT(DB_TYPE(mp) == M_DATA); 11461 ASSERT(MBLKL(mp) > unfragmentable_len); 11462 11463 /* 11464 * Move read ptr past unfragmentable portion, we don't want this part 11465 * of the data in our fragments. 11466 */ 11467 mp->b_rptr += unfragmentable_len; 11468 11469 /* Calculate how many packets we will send out */ 11470 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11471 pkts = (i1 + max_chunk - 1) / max_chunk; 11472 ASSERT(pkts > 1); 11473 11474 /* Allocate a message block which will hold all the IP Headers. */ 11475 wroff = ip_wroff_extra; 11476 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11477 11478 i1 = pkts * hdr_chunk_len; 11479 /* 11480 * Create the header buffer, Multidata and destination address 11481 * and SAP attribute that should be associated with it. 11482 */ 11483 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11484 ((hdr_mp->b_wptr += i1), 11485 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11486 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11487 freemsg(mp); 11488 if (md_mp == NULL) { 11489 freemsg(hdr_mp); 11490 } else { 11491 free_mmd: IP6_STAT(ip6_frag_mdt_discarded); 11492 freemsg(md_mp); 11493 } 11494 IP6_STAT(ip6_frag_mdt_allocfail); 11495 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11496 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutDiscards, pkts); 11497 return; 11498 } 11499 IP6_STAT(ip6_frag_mdt_allocd); 11500 11501 /* 11502 * Add a payload buffer to the Multidata; this operation must not 11503 * fail, or otherwise our logic in this routine is broken. There 11504 * is no memory allocation done by the routine, so any returned 11505 * failure simply tells us that we've done something wrong. 11506 * 11507 * A failure tells us that either we're adding the same payload 11508 * buffer more than once, or we're trying to add more buffers than 11509 * allowed. None of the above cases should happen, and we panic 11510 * because either there's horrible heap corruption, and/or 11511 * programming mistake. 11512 */ 11513 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11514 goto pbuf_panic; 11515 } 11516 11517 hdr_ptr = hdr_mp->b_rptr; 11518 pld_ptr = mp->b_rptr; 11519 11520 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11521 11522 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11523 11524 /* 11525 * len is the total length of the fragmentable data in this 11526 * datagram. For each fragment sent, we will decrement len 11527 * by the amount of fragmentable data sent in that fragment 11528 * until len reaches zero. 11529 */ 11530 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11531 11532 offset = 0; 11533 prev_nexthdr_offset += wroff; 11534 11535 while (len != 0) { 11536 size_t mlen; 11537 ip6_t *fip6h; 11538 ip6_frag_t *fraghdr; 11539 int error; 11540 11541 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11542 mlen = MIN(len, max_chunk); 11543 len -= mlen; 11544 11545 fip6h = (ip6_t *)(hdr_ptr + wroff); 11546 ASSERT(OK_32PTR(fip6h)); 11547 bcopy(ip6h, fip6h, unfragmentable_len); 11548 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11549 11550 fip6h->ip6_plen = htons((uint16_t)(mlen + 11551 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11552 11553 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11554 unfragmentable_len); 11555 fraghdr->ip6f_nxt = nexthdr; 11556 fraghdr->ip6f_reserved = 0; 11557 fraghdr->ip6f_offlg = htons(offset) | 11558 ((len != 0) ? IP6F_MORE_FRAG : 0); 11559 fraghdr->ip6f_ident = ident; 11560 11561 /* 11562 * Record offset and size of header and data of the next packet 11563 * in the multidata message. 11564 */ 11565 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11566 unfragmentable_len + sizeof (ip6_frag_t), 0); 11567 PDESC_PLD_INIT(&pdi); 11568 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11569 ASSERT(i1 > 0); 11570 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11571 if (i1 == mlen) { 11572 pld_ptr += mlen; 11573 } else { 11574 i1 = mlen - i1; 11575 mp = mp->b_cont; 11576 ASSERT(mp != NULL); 11577 ASSERT(MBLKL(mp) >= i1); 11578 /* 11579 * Attach the next payload message block to the 11580 * multidata message. 11581 */ 11582 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11583 goto pbuf_panic; 11584 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11585 pld_ptr = mp->b_rptr + i1; 11586 } 11587 11588 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11589 KM_NOSLEEP)) == NULL) { 11590 /* 11591 * Any failure other than ENOMEM indicates that we 11592 * have passed in invalid pdesc info or parameters 11593 * to mmd_addpdesc, which must not happen. 11594 * 11595 * EINVAL is a result of failure on boundary checks 11596 * against the pdesc info contents. It should not 11597 * happen, and we panic because either there's 11598 * horrible heap corruption, and/or programming 11599 * mistake. 11600 */ 11601 if (error != ENOMEM) { 11602 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11603 "pdesc logic error detected for " 11604 "mmd %p pinfo %p (%d)\n", 11605 (void *)mmd, (void *)&pdi, error); 11606 /* NOTREACHED */ 11607 } 11608 IP6_STAT(ip6_frag_mdt_addpdescfail); 11609 /* Free unattached payload message blocks as well */ 11610 md_mp->b_cont = mp->b_cont; 11611 goto free_mmd; 11612 } 11613 11614 /* Advance fragment offset. */ 11615 offset += mlen; 11616 11617 /* Advance to location for next header in the buffer. */ 11618 hdr_ptr += hdr_chunk_len; 11619 11620 /* Did we reach the next payload message block? */ 11621 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11622 mp = mp->b_cont; 11623 /* 11624 * Attach the next message block with payload 11625 * data to the multidata message. 11626 */ 11627 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11628 goto pbuf_panic; 11629 pld_ptr = mp->b_rptr; 11630 } 11631 } 11632 11633 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11634 ASSERT(mp->b_wptr == pld_ptr); 11635 11636 /* Update IP statistics */ 11637 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutFragCreates, pkts); 11638 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11639 IP6_STAT_UPDATE(ip6_frag_mdt_pkt_out, pkts); 11640 11641 ire->ire_ob_pkt_count += pkts; 11642 if (ire->ire_ipif != NULL) 11643 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11644 11645 ire->ire_last_used_time = lbolt; 11646 /* Send it down */ 11647 putnext(stq, md_mp); 11648 return; 11649 11650 pbuf_panic: 11651 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11652 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11653 pbuf_idx); 11654 /* NOTREACHED */ 11655 } 11656 11657 /* 11658 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11659 * We have not optimized this in terms of number of mblks 11660 * allocated. For instance, for each fragment sent we always allocate a 11661 * mblk to hold the IPv6 header and fragment header. 11662 * 11663 * Assumes that all the extension headers are contained in the first mblk. 11664 * 11665 * The fragment header is inserted after an hop-by-hop options header 11666 * and after [an optional destinations header followed by] a routing header. 11667 * 11668 * NOTE : This function does not ire_refrele the ire passed in as 11669 * the argument. 11670 */ 11671 void 11672 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11673 int caller, int max_frag) 11674 { 11675 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11676 ip6_t *fip6h; 11677 mblk_t *hmp; 11678 mblk_t *hmp0; 11679 mblk_t *dmp; 11680 ip6_frag_t *fraghdr; 11681 size_t unfragmentable_len; 11682 size_t len; 11683 size_t mlen; 11684 size_t max_chunk; 11685 uint32_t ident; 11686 uint16_t off_flags; 11687 uint16_t offset = 0; 11688 ill_t *ill; 11689 uint8_t nexthdr; 11690 uint_t prev_nexthdr_offset; 11691 uint8_t *ptr; 11692 11693 ASSERT(ire->ire_type == IRE_CACHE); 11694 ill = (ill_t *)ire->ire_stq->q_ptr; 11695 11696 /* 11697 * Determine the length of the unfragmentable portion of this 11698 * datagram. This consists of the IPv6 header, a potential 11699 * hop-by-hop options header, a potential pre-routing-header 11700 * destination options header, and a potential routing header. 11701 */ 11702 nexthdr = ip6h->ip6_nxt; 11703 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11704 ptr = (uint8_t *)&ip6h[1]; 11705 11706 if (nexthdr == IPPROTO_HOPOPTS) { 11707 ip6_hbh_t *hbh_hdr; 11708 uint_t hdr_len; 11709 11710 hbh_hdr = (ip6_hbh_t *)ptr; 11711 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11712 nexthdr = hbh_hdr->ip6h_nxt; 11713 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11714 - (uint8_t *)ip6h; 11715 ptr += hdr_len; 11716 } 11717 if (nexthdr == IPPROTO_DSTOPTS) { 11718 ip6_dest_t *dest_hdr; 11719 uint_t hdr_len; 11720 11721 dest_hdr = (ip6_dest_t *)ptr; 11722 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11723 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11724 nexthdr = dest_hdr->ip6d_nxt; 11725 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11726 - (uint8_t *)ip6h; 11727 ptr += hdr_len; 11728 } 11729 } 11730 if (nexthdr == IPPROTO_ROUTING) { 11731 ip6_rthdr_t *rthdr; 11732 uint_t hdr_len; 11733 11734 rthdr = (ip6_rthdr_t *)ptr; 11735 nexthdr = rthdr->ip6r_nxt; 11736 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11737 - (uint8_t *)ip6h; 11738 hdr_len = 8 * (rthdr->ip6r_len + 1); 11739 ptr += hdr_len; 11740 } 11741 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11742 11743 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11744 sizeof (ip6_frag_t)) & ~7; 11745 11746 /* Check if we can use MDT to send out the frags. */ 11747 ASSERT(!IRE_IS_LOCAL(ire)); 11748 if (ip_multidata_outbound && reachable == 0 && 11749 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11750 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11751 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11752 nexthdr, prev_nexthdr_offset); 11753 return; 11754 } 11755 11756 /* 11757 * Allocate an mblk with enough room for the link-layer 11758 * header, the unfragmentable part of the datagram, and the 11759 * fragment header. This (or a copy) will be used as the 11760 * first mblk for each fragment we send. 11761 */ 11762 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 11763 BPRI_HI); 11764 if (hmp == NULL) { 11765 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11766 freemsg(mp); 11767 return; 11768 } 11769 hmp->b_rptr += ip_wroff_extra; 11770 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11771 11772 fip6h = (ip6_t *)hmp->b_rptr; 11773 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11774 11775 bcopy(ip6h, fip6h, unfragmentable_len); 11776 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11777 11778 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11779 11780 fraghdr->ip6f_nxt = nexthdr; 11781 fraghdr->ip6f_reserved = 0; 11782 fraghdr->ip6f_offlg = 0; 11783 fraghdr->ip6f_ident = htonl(ident); 11784 11785 /* 11786 * len is the total length of the fragmentable data in this 11787 * datagram. For each fragment sent, we will decrement len 11788 * by the amount of fragmentable data sent in that fragment 11789 * until len reaches zero. 11790 */ 11791 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11792 11793 /* 11794 * Move read ptr past unfragmentable portion, we don't want this part 11795 * of the data in our fragments. 11796 */ 11797 mp->b_rptr += unfragmentable_len; 11798 11799 while (len != 0) { 11800 mlen = MIN(len, max_chunk); 11801 len -= mlen; 11802 if (len != 0) { 11803 /* Not last */ 11804 hmp0 = copyb(hmp); 11805 if (hmp0 == NULL) { 11806 freeb(hmp); 11807 freemsg(mp); 11808 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11809 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11810 return; 11811 } 11812 off_flags = IP6F_MORE_FRAG; 11813 } else { 11814 /* Last fragment */ 11815 hmp0 = hmp; 11816 hmp = NULL; 11817 off_flags = 0; 11818 } 11819 fip6h = (ip6_t *)(hmp0->b_rptr); 11820 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11821 11822 fip6h->ip6_plen = htons((uint16_t)(mlen + 11823 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11824 /* 11825 * Note: Optimization alert. 11826 * In IPv6 (and IPv4) protocol header, Fragment Offset 11827 * ("offset") is 13 bits wide and in 8-octet units. 11828 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11829 * it occupies the most significant 13 bits. 11830 * (least significant 13 bits in IPv4). 11831 * We do not do any shifts here. Not shifting is same effect 11832 * as taking offset value in octet units, dividing by 8 and 11833 * then shifting 3 bits left to line it up in place in proper 11834 * place protocol header. 11835 */ 11836 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11837 11838 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11839 /* mp has already been freed by ip_carve_mp() */ 11840 if (hmp != NULL) 11841 freeb(hmp); 11842 freeb(hmp0); 11843 ip1dbg(("ip_carve_mp: failed\n")); 11844 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11845 return; 11846 } 11847 hmp0->b_cont = dmp; 11848 /* Get the priority marking, if any */ 11849 hmp0->b_band = dmp->b_band; 11850 UPDATE_OB_PKT_COUNT(ire); 11851 ire->ire_last_used_time = lbolt; 11852 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11853 caller, NULL); 11854 reachable = 0; /* No need to redo state machine in loop */ 11855 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragCreates); 11856 offset += mlen; 11857 } 11858 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11859 } 11860 11861 /* 11862 * Determine if the ill and multicast aspects of that packets 11863 * "matches" the conn. 11864 */ 11865 boolean_t 11866 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11867 zoneid_t zoneid) 11868 { 11869 ill_t *in_ill; 11870 boolean_t wantpacket = B_TRUE; 11871 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11872 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11873 11874 /* 11875 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11876 * unicast and multicast reception to conn_incoming_ill. 11877 * conn_wantpacket_v6 is called both for unicast and 11878 * multicast. 11879 * 11880 * 1) The unicast copy of the packet can come anywhere in 11881 * the ill group if it is part of the group. Thus, we 11882 * need to check to see whether the ill group matches 11883 * if in_ill is part of a group. 11884 * 11885 * 2) ip_rput does not suppress duplicate multicast packets. 11886 * If there are two interfaces in a ill group and we have 11887 * 2 applications (conns) joined a multicast group G on 11888 * both the interfaces, ilm_lookup_ill filter in ip_rput 11889 * will give us two packets because we join G on both the 11890 * interfaces rather than nominating just one interface 11891 * for receiving multicast like broadcast above. So, 11892 * we have to call ilg_lookup_ill to filter out duplicate 11893 * copies, if ill is part of a group, to supress duplicates. 11894 */ 11895 in_ill = connp->conn_incoming_ill; 11896 if (in_ill != NULL) { 11897 mutex_enter(&connp->conn_lock); 11898 in_ill = connp->conn_incoming_ill; 11899 mutex_enter(&ill->ill_lock); 11900 /* 11901 * No IPMP, and the packet did not arrive on conn_incoming_ill 11902 * OR, IPMP in use and the packet arrived on an IPMP group 11903 * different from the conn_incoming_ill's IPMP group. 11904 * Reject the packet. 11905 */ 11906 if ((in_ill->ill_group == NULL && in_ill != ill) || 11907 (in_ill->ill_group != NULL && 11908 in_ill->ill_group != ill->ill_group)) { 11909 wantpacket = B_FALSE; 11910 } 11911 mutex_exit(&ill->ill_lock); 11912 mutex_exit(&connp->conn_lock); 11913 if (!wantpacket) 11914 return (B_FALSE); 11915 } 11916 11917 if (connp->conn_multi_router) 11918 return (B_TRUE); 11919 11920 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11921 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11922 /* 11923 * Unicast case: we match the conn only if it's in the specified 11924 * zone. 11925 */ 11926 return (IPCL_ZONE_MATCH(connp, zoneid)); 11927 } 11928 11929 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11930 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11931 /* 11932 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11933 * disabled, therefore we don't dispatch the multicast packet to 11934 * the sending zone. 11935 */ 11936 return (B_FALSE); 11937 } 11938 11939 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 11940 connp->conn_zoneid != zoneid && zoneid != ALL_ZONES) { 11941 /* 11942 * Multicast packet on the loopback interface: we only match 11943 * conns who joined the group in the specified zone. 11944 */ 11945 return (B_FALSE); 11946 } 11947 11948 mutex_enter(&connp->conn_lock); 11949 wantpacket = 11950 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11951 mutex_exit(&connp->conn_lock); 11952 11953 return (wantpacket); 11954 } 11955 11956 11957 /* 11958 * Transmit a packet and update any NUD state based on the flags 11959 * XXX need to "recover" any ip6i_t when doing putq! 11960 * 11961 * NOTE : This function does not ire_refrele the ire passed in as the 11962 * argument. 11963 */ 11964 void 11965 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11966 int caller, ipsec_out_t *io) 11967 { 11968 mblk_t *mp1; 11969 nce_t *nce = ire->ire_nce; 11970 ill_t *ill; 11971 ill_t *out_ill; 11972 uint64_t delta; 11973 ip6_t *ip6h; 11974 queue_t *stq = ire->ire_stq; 11975 ire_t *ire1 = NULL; 11976 ire_t *save_ire = ire; 11977 boolean_t multirt_send = B_FALSE; 11978 mblk_t *next_mp = NULL; 11979 11980 ip6h = (ip6_t *)mp->b_rptr; 11981 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11982 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11983 ASSERT(nce != NULL); 11984 ASSERT(mp->b_datap->db_type == M_DATA); 11985 ASSERT(stq != NULL); 11986 11987 ill = ire_to_ill(ire); 11988 if (!ill) { 11989 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11990 freemsg(mp); 11991 return; 11992 } 11993 11994 /* 11995 * If a packet is to be sent out an interface that is a 6to4 11996 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11997 * destination, must be checked to have a 6to4 prefix 11998 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11999 * address configured on the sending interface. Otherwise, 12000 * the packet was delivered to this interface in error and the 12001 * packet must be dropped. 12002 */ 12003 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 12004 ipif_t *ipif = ill->ill_ipif; 12005 12006 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 12007 &ip6h->ip6_dst)) { 12008 if (ip_debug > 2) { 12009 /* ip1dbg */ 12010 pr_addr_dbg("ip_xmit_v6: attempting to " 12011 "send 6to4 addressed IPv6 " 12012 "destination (%s) out the wrong " 12013 "interface.\n", AF_INET6, 12014 &ip6h->ip6_dst); 12015 } 12016 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 12017 freemsg(mp); 12018 return; 12019 } 12020 } 12021 12022 /* Flow-control check has been done in ip_wput_ire_v6 */ 12023 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 12024 caller == IP_WSRV || canput(stq->q_next)) { 12025 uint32_t ill_index; 12026 12027 /* 12028 * In most cases, the emission loop below is entered only 12029 * once. Only in the case where the ire holds the 12030 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 12031 * flagged ires in the bucket, and send the packet 12032 * through all crossed RTF_MULTIRT routes. 12033 */ 12034 if (ire->ire_flags & RTF_MULTIRT) { 12035 /* 12036 * Multirouting case. The bucket where ire is stored 12037 * probably holds other RTF_MULTIRT flagged ires 12038 * to the destination. In this call to ip_xmit_v6, 12039 * we attempt to send the packet through all 12040 * those ires. Thus, we first ensure that ire is the 12041 * first RTF_MULTIRT ire in the bucket, 12042 * before walking the ire list. 12043 */ 12044 ire_t *first_ire; 12045 irb_t *irb = ire->ire_bucket; 12046 ASSERT(irb != NULL); 12047 multirt_send = B_TRUE; 12048 12049 /* Make sure we do not omit any multiroute ire. */ 12050 IRB_REFHOLD(irb); 12051 for (first_ire = irb->irb_ire; 12052 first_ire != NULL; 12053 first_ire = first_ire->ire_next) { 12054 if ((first_ire->ire_flags & RTF_MULTIRT) && 12055 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12056 &ire->ire_addr_v6)) && 12057 !(first_ire->ire_marks & 12058 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12059 break; 12060 } 12061 12062 if ((first_ire != NULL) && (first_ire != ire)) { 12063 IRE_REFHOLD(first_ire); 12064 /* ire will be released by the caller */ 12065 ire = first_ire; 12066 nce = ire->ire_nce; 12067 stq = ire->ire_stq; 12068 ill = ire_to_ill(ire); 12069 } 12070 IRB_REFRELE(irb); 12071 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12072 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12073 ILL_MDT_USABLE(ill)) { 12074 /* 12075 * This tcp connection was marked as MDT-capable, but 12076 * it has been turned off due changes in the interface. 12077 * Now that the interface support is back, turn it on 12078 * by notifying tcp. We don't directly modify tcp_mdt, 12079 * since we leave all the details to the tcp code that 12080 * knows better. 12081 */ 12082 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12083 12084 if (mdimp == NULL) { 12085 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12086 "connp %p (ENOMEM)\n", (void *)connp)); 12087 } else { 12088 CONN_INC_REF(connp); 12089 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 12090 connp, SQTAG_TCP_INPUT_MCTL); 12091 } 12092 } 12093 12094 do { 12095 mblk_t *mp_ip6h; 12096 12097 if (multirt_send) { 12098 irb_t *irb; 12099 /* 12100 * We are in a multiple send case, need to get 12101 * the next ire and make a duplicate of the 12102 * packet. ire1 holds here the next ire to 12103 * process in the bucket. If multirouting is 12104 * expected, any non-RTF_MULTIRT ire that has 12105 * the right destination address is ignored. 12106 */ 12107 irb = ire->ire_bucket; 12108 ASSERT(irb != NULL); 12109 12110 IRB_REFHOLD(irb); 12111 for (ire1 = ire->ire_next; 12112 ire1 != NULL; 12113 ire1 = ire1->ire_next) { 12114 if (!(ire1->ire_flags & RTF_MULTIRT)) 12115 continue; 12116 if (!IN6_ARE_ADDR_EQUAL( 12117 &ire1->ire_addr_v6, 12118 &ire->ire_addr_v6)) 12119 continue; 12120 if (ire1->ire_marks & 12121 (IRE_MARK_CONDEMNED| 12122 IRE_MARK_HIDDEN)) 12123 continue; 12124 12125 /* Got one */ 12126 if (ire1 != save_ire) { 12127 IRE_REFHOLD(ire1); 12128 } 12129 break; 12130 } 12131 IRB_REFRELE(irb); 12132 12133 if (ire1 != NULL) { 12134 next_mp = copyb(mp); 12135 if ((next_mp == NULL) || 12136 ((mp->b_cont != NULL) && 12137 ((next_mp->b_cont = 12138 dupmsg(mp->b_cont)) == 12139 NULL))) { 12140 freemsg(next_mp); 12141 next_mp = NULL; 12142 ire_refrele(ire1); 12143 ire1 = NULL; 12144 } 12145 } 12146 12147 /* Last multiroute ire; don't loop anymore. */ 12148 if (ire1 == NULL) { 12149 multirt_send = B_FALSE; 12150 } 12151 } 12152 12153 ill_index = 12154 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12155 12156 /* Initiate IPPF processing */ 12157 if (IP6_OUT_IPP(flags)) { 12158 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12159 if (mp == NULL) { 12160 BUMP_MIB(ill->ill_ip6_mib, 12161 ipv6OutDiscards); 12162 if (next_mp != NULL) 12163 freemsg(next_mp); 12164 if (ire != save_ire) { 12165 ire_refrele(ire); 12166 } 12167 return; 12168 } 12169 ip6h = (ip6_t *)mp->b_rptr; 12170 } 12171 mp_ip6h = mp; 12172 12173 /* 12174 * Check for fastpath, we need to hold nce_lock to 12175 * prevent fastpath update from chaining nce_fp_mp. 12176 */ 12177 12178 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12179 mutex_enter(&nce->nce_lock); 12180 if ((mp1 = nce->nce_fp_mp) != NULL) { 12181 uint32_t hlen; 12182 uchar_t *rptr; 12183 12184 hlen = MBLKL(mp1); 12185 rptr = mp->b_rptr - hlen; 12186 /* 12187 * make sure there is room for the fastpath 12188 * datalink header 12189 */ 12190 if (rptr < mp->b_datap->db_base) { 12191 mp1 = copyb(mp1); 12192 mutex_exit(&nce->nce_lock); 12193 if (mp1 == NULL) { 12194 BUMP_MIB(ill->ill_ip6_mib, 12195 ipv6OutDiscards); 12196 freemsg(mp); 12197 if (next_mp != NULL) 12198 freemsg(next_mp); 12199 if (ire != save_ire) { 12200 ire_refrele(ire); 12201 } 12202 return; 12203 } 12204 mp1->b_cont = mp; 12205 12206 /* Get the priority marking, if any */ 12207 mp1->b_band = mp->b_band; 12208 mp = mp1; 12209 } else { 12210 mp->b_rptr = rptr; 12211 /* 12212 * fastpath - pre-pend datalink 12213 * header 12214 */ 12215 bcopy(mp1->b_rptr, rptr, hlen); 12216 mutex_exit(&nce->nce_lock); 12217 } 12218 } else { 12219 /* 12220 * Get the DL_UNITDATA_REQ. 12221 */ 12222 mp1 = nce->nce_res_mp; 12223 if (mp1 == NULL) { 12224 mutex_exit(&nce->nce_lock); 12225 ip1dbg(("ip_xmit_v6: No resolution " 12226 "block ire = %p\n", (void *)ire)); 12227 freemsg(mp); 12228 if (next_mp != NULL) 12229 freemsg(next_mp); 12230 if (ire != save_ire) { 12231 ire_refrele(ire); 12232 } 12233 return; 12234 } 12235 /* 12236 * Prepend the DL_UNITDATA_REQ. 12237 */ 12238 mp1 = copyb(mp1); 12239 mutex_exit(&nce->nce_lock); 12240 if (mp1 == NULL) { 12241 BUMP_MIB(ill->ill_ip6_mib, 12242 ipv6OutDiscards); 12243 freemsg(mp); 12244 if (next_mp != NULL) 12245 freemsg(next_mp); 12246 if (ire != save_ire) { 12247 ire_refrele(ire); 12248 } 12249 return; 12250 } 12251 mp1->b_cont = mp; 12252 12253 /* Get the priority marking, if any */ 12254 mp1->b_band = mp->b_band; 12255 mp = mp1; 12256 } 12257 12258 out_ill = (ill_t *)stq->q_ptr; 12259 12260 DTRACE_PROBE4(ip6__physical__out__start, 12261 ill_t *, NULL, ill_t *, out_ill, 12262 ip6_t *, ip6h, mblk_t *, mp); 12263 12264 FW_HOOKS6(ip6_physical_out_event, 12265 ipv6firewall_physical_out, MSG_FWCOOKED_OUT, 12266 NULL, out_ill, ip6h, mp, mp_ip6h); 12267 12268 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12269 12270 if (mp == NULL) { 12271 if (multirt_send) { 12272 ASSERT(ire1 != NULL); 12273 if (ire != save_ire) { 12274 ire_refrele(ire); 12275 } 12276 /* 12277 * Proceed with the next RTF_MULTIRT 12278 * ire, also set up the send-to queue 12279 * accordingly. 12280 */ 12281 ire = ire1; 12282 ire1 = NULL; 12283 stq = ire->ire_stq; 12284 nce = ire->ire_nce; 12285 ill = ire_to_ill(ire); 12286 mp = next_mp; 12287 next_mp = NULL; 12288 continue; 12289 } else { 12290 ASSERT(next_mp == NULL); 12291 ASSERT(ire1 == NULL); 12292 break; 12293 } 12294 } 12295 12296 /* 12297 * Update ire counters; for save_ire, this has been 12298 * done by the caller. 12299 */ 12300 if (ire != save_ire) { 12301 UPDATE_OB_PKT_COUNT(ire); 12302 ire->ire_last_used_time = lbolt; 12303 } 12304 12305 /* 12306 * Send it down. XXX Do we want to flow control AH/ESP 12307 * packets that carry TCP payloads? We don't flow 12308 * control TCP packets, but we should also not 12309 * flow-control TCP packets that have been protected. 12310 * We don't have an easy way to find out if an AH/ESP 12311 * packet was originally TCP or not currently. 12312 */ 12313 if (io == NULL) { 12314 putnext(stq, mp); 12315 } else { 12316 /* 12317 * Safety Pup says: make sure this is 12318 * going to the right interface! 12319 */ 12320 if (io->ipsec_out_capab_ill_index != 12321 ill_index) { 12322 /* IPsec kstats: bump lose counter */ 12323 freemsg(mp1); 12324 } else { 12325 ipsec_hw_putnext(stq, mp); 12326 } 12327 } 12328 12329 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12330 if (ire != save_ire) { 12331 ire_refrele(ire); 12332 } 12333 if (multirt_send) { 12334 ASSERT(ire1 != NULL); 12335 /* 12336 * Proceed with the next RTF_MULTIRT 12337 * ire, also set up the send-to queue 12338 * accordingly. 12339 */ 12340 ire = ire1; 12341 ire1 = NULL; 12342 stq = ire->ire_stq; 12343 nce = ire->ire_nce; 12344 ill = ire_to_ill(ire); 12345 mp = next_mp; 12346 next_mp = NULL; 12347 continue; 12348 } 12349 ASSERT(next_mp == NULL); 12350 ASSERT(ire1 == NULL); 12351 return; 12352 } 12353 12354 ASSERT(nce->nce_state != ND_INCOMPLETE); 12355 12356 /* 12357 * Check for upper layer advice 12358 */ 12359 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12360 /* 12361 * It should be o.k. to check the state without 12362 * a lock here, at most we lose an advice. 12363 */ 12364 nce->nce_last = TICK_TO_MSEC(lbolt64); 12365 if (nce->nce_state != ND_REACHABLE) { 12366 12367 mutex_enter(&nce->nce_lock); 12368 nce->nce_state = ND_REACHABLE; 12369 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12370 mutex_exit(&nce->nce_lock); 12371 (void) untimeout(nce->nce_timeout_id); 12372 if (ip_debug > 2) { 12373 /* ip1dbg */ 12374 pr_addr_dbg("ip_xmit_v6: state" 12375 " for %s changed to" 12376 " REACHABLE\n", AF_INET6, 12377 &ire->ire_addr_v6); 12378 } 12379 } 12380 if (ire != save_ire) { 12381 ire_refrele(ire); 12382 } 12383 if (multirt_send) { 12384 ASSERT(ire1 != NULL); 12385 /* 12386 * Proceed with the next RTF_MULTIRT 12387 * ire, also set up the send-to queue 12388 * accordingly. 12389 */ 12390 ire = ire1; 12391 ire1 = NULL; 12392 stq = ire->ire_stq; 12393 nce = ire->ire_nce; 12394 ill = ire_to_ill(ire); 12395 mp = next_mp; 12396 next_mp = NULL; 12397 continue; 12398 } 12399 ASSERT(next_mp == NULL); 12400 ASSERT(ire1 == NULL); 12401 return; 12402 } 12403 12404 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12405 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12406 " ill_reachable_time = %d \n", delta, 12407 ill->ill_reachable_time)); 12408 if (delta > (uint64_t)ill->ill_reachable_time) { 12409 nce = ire->ire_nce; 12410 mutex_enter(&nce->nce_lock); 12411 switch (nce->nce_state) { 12412 case ND_REACHABLE: 12413 case ND_STALE: 12414 /* 12415 * ND_REACHABLE is identical to 12416 * ND_STALE in this specific case. If 12417 * reachable time has expired for this 12418 * neighbor (delta is greater than 12419 * reachable time), conceptually, the 12420 * neighbor cache is no longer in 12421 * REACHABLE state, but already in 12422 * STALE state. So the correct 12423 * transition here is to ND_DELAY. 12424 */ 12425 nce->nce_state = ND_DELAY; 12426 mutex_exit(&nce->nce_lock); 12427 NDP_RESTART_TIMER(nce, 12428 delay_first_probe_time); 12429 if (ip_debug > 3) { 12430 /* ip2dbg */ 12431 pr_addr_dbg("ip_xmit_v6: state" 12432 " for %s changed to" 12433 " DELAY\n", AF_INET6, 12434 &ire->ire_addr_v6); 12435 } 12436 break; 12437 case ND_DELAY: 12438 case ND_PROBE: 12439 mutex_exit(&nce->nce_lock); 12440 /* Timers have already started */ 12441 break; 12442 case ND_UNREACHABLE: 12443 /* 12444 * ndp timer has detected that this nce 12445 * is unreachable and initiated deleting 12446 * this nce and all its associated IREs. 12447 * This is a race where we found the 12448 * ire before it was deleted and have 12449 * just sent out a packet using this 12450 * unreachable nce. 12451 */ 12452 mutex_exit(&nce->nce_lock); 12453 break; 12454 default: 12455 ASSERT(0); 12456 } 12457 } 12458 12459 if (multirt_send) { 12460 ASSERT(ire1 != NULL); 12461 /* 12462 * Proceed with the next RTF_MULTIRT ire, 12463 * Also set up the send-to queue accordingly. 12464 */ 12465 if (ire != save_ire) { 12466 ire_refrele(ire); 12467 } 12468 ire = ire1; 12469 ire1 = NULL; 12470 stq = ire->ire_stq; 12471 nce = ire->ire_nce; 12472 ill = ire_to_ill(ire); 12473 mp = next_mp; 12474 next_mp = NULL; 12475 } 12476 } while (multirt_send); 12477 /* 12478 * In the multirouting case, release the last ire used for 12479 * emission. save_ire will be released by the caller. 12480 */ 12481 if (ire != save_ire) { 12482 ire_refrele(ire); 12483 } 12484 } else { 12485 /* 12486 * Queue packet if we have an conn to give back pressure. 12487 * We can't queue packets intended for hardware acceleration 12488 * since we've tossed that state already. If the packet is 12489 * being fed back from ire_send_v6, we don't know the 12490 * position in the queue to enqueue the packet and we discard 12491 * the packet. 12492 */ 12493 if (ip_output_queue && (connp != NULL) && (io == NULL) && 12494 (caller != IRE_SEND)) { 12495 if (caller == IP_WSRV) { 12496 connp->conn_did_putbq = 1; 12497 (void) putbq(connp->conn_wq, mp); 12498 conn_drain_insert(connp); 12499 /* 12500 * caller == IP_WSRV implies we are 12501 * the service thread, and the 12502 * queue is already noenabled. 12503 * The check for canput and 12504 * the putbq is not atomic. 12505 * So we need to check again. 12506 */ 12507 if (canput(stq->q_next)) 12508 connp->conn_did_putbq = 0; 12509 } else { 12510 (void) putq(connp->conn_wq, mp); 12511 } 12512 return; 12513 } 12514 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 12515 freemsg(mp); 12516 return; 12517 } 12518 } 12519 12520 /* 12521 * pr_addr_dbg function provides the needed buffer space to call 12522 * inet_ntop() function's 3rd argument. This function should be 12523 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12524 * stack buffer space in it's own stack frame. This function uses 12525 * a buffer from it's own stack and prints the information. 12526 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12527 * 12528 * Note: This function can call inet_ntop() once. 12529 */ 12530 void 12531 pr_addr_dbg(char *fmt1, int af, const void *addr) 12532 { 12533 char buf[INET6_ADDRSTRLEN]; 12534 12535 if (fmt1 == NULL) { 12536 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12537 return; 12538 } 12539 12540 /* 12541 * This does not compare debug level and just prints 12542 * out. Thus it is the responsibility of the caller 12543 * to check the appropriate debug-level before calling 12544 * this function. 12545 */ 12546 if (ip_debug > 0) { 12547 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12548 } 12549 12550 12551 } 12552 12553 12554 /* 12555 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12556 * if needed and extension headers) that will be needed based on the 12557 * ip6_pkt_t structure passed by the caller. 12558 * 12559 * The returned length does not include the length of the upper level 12560 * protocol (ULP) header. 12561 */ 12562 int 12563 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12564 { 12565 int len; 12566 12567 len = IPV6_HDR_LEN; 12568 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12569 len += sizeof (ip6i_t); 12570 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12571 ASSERT(ipp->ipp_hopoptslen != 0); 12572 len += ipp->ipp_hopoptslen; 12573 } 12574 if (ipp->ipp_fields & IPPF_RTHDR) { 12575 ASSERT(ipp->ipp_rthdrlen != 0); 12576 len += ipp->ipp_rthdrlen; 12577 } 12578 /* 12579 * En-route destination options 12580 * Only do them if there's a routing header as well 12581 */ 12582 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12583 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12584 ASSERT(ipp->ipp_rtdstoptslen != 0); 12585 len += ipp->ipp_rtdstoptslen; 12586 } 12587 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12588 ASSERT(ipp->ipp_dstoptslen != 0); 12589 len += ipp->ipp_dstoptslen; 12590 } 12591 return (len); 12592 } 12593 12594 /* 12595 * All-purpose routine to build a header chain of an IPv6 header 12596 * followed by any required extension headers and a proto header, 12597 * preceeded (where necessary) by an ip6i_t private header. 12598 * 12599 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12600 * will be filled in appropriately. 12601 * Thus the caller must fill in the rest of the IPv6 header, such as 12602 * traffic class/flowid, source address (if not set here), hoplimit (if not 12603 * set here) and destination address. 12604 * 12605 * The extension headers and ip6i_t header will all be fully filled in. 12606 */ 12607 void 12608 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12609 ip6_pkt_t *ipp, uint8_t protocol) 12610 { 12611 uint8_t *nxthdr_ptr; 12612 uint8_t *cp; 12613 ip6i_t *ip6i; 12614 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12615 12616 /* 12617 * If sending private ip6i_t header down (checksum info, nexthop, 12618 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12619 * then fill it in. (The checksum info will be filled in by icmp). 12620 */ 12621 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12622 ip6i = (ip6i_t *)ip6h; 12623 ip6h = (ip6_t *)&ip6i[1]; 12624 12625 ip6i->ip6i_flags = 0; 12626 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12627 if (ipp->ipp_fields & IPPF_IFINDEX || 12628 ipp->ipp_fields & IPPF_SCOPE_ID) { 12629 ASSERT(ipp->ipp_ifindex != 0); 12630 ip6i->ip6i_flags |= IP6I_IFINDEX; 12631 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12632 } 12633 if (ipp->ipp_fields & IPPF_ADDR) { 12634 /* 12635 * Enable per-packet source address verification if 12636 * IPV6_PKTINFO specified the source address. 12637 * ip6_src is set in the transport's _wput function. 12638 */ 12639 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12640 &ipp->ipp_addr)); 12641 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12642 } 12643 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12644 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12645 /* 12646 * We need to set this flag so that IP doesn't 12647 * rewrite the IPv6 header's hoplimit with the 12648 * current default value. 12649 */ 12650 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12651 } 12652 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12653 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12654 &ipp->ipp_nexthop)); 12655 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12656 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12657 } 12658 /* 12659 * tell IP this is an ip6i_t private header 12660 */ 12661 ip6i->ip6i_nxt = IPPROTO_RAW; 12662 } 12663 /* Initialize IPv6 header */ 12664 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12665 if (ipp->ipp_fields & IPPF_TCLASS) { 12666 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12667 (ipp->ipp_tclass << 20); 12668 } 12669 if (ipp->ipp_fields & IPPF_ADDR) 12670 ip6h->ip6_src = ipp->ipp_addr; 12671 12672 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12673 cp = (uint8_t *)&ip6h[1]; 12674 /* 12675 * Here's where we have to start stringing together 12676 * any extension headers in the right order: 12677 * Hop-by-hop, destination, routing, and final destination opts. 12678 */ 12679 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12680 /* Hop-by-hop options */ 12681 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12682 12683 *nxthdr_ptr = IPPROTO_HOPOPTS; 12684 nxthdr_ptr = &hbh->ip6h_nxt; 12685 12686 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12687 cp += ipp->ipp_hopoptslen; 12688 } 12689 /* 12690 * En-route destination options 12691 * Only do them if there's a routing header as well 12692 */ 12693 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12694 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12695 ip6_dest_t *dst = (ip6_dest_t *)cp; 12696 12697 *nxthdr_ptr = IPPROTO_DSTOPTS; 12698 nxthdr_ptr = &dst->ip6d_nxt; 12699 12700 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12701 cp += ipp->ipp_rtdstoptslen; 12702 } 12703 /* 12704 * Routing header next 12705 */ 12706 if (ipp->ipp_fields & IPPF_RTHDR) { 12707 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12708 12709 *nxthdr_ptr = IPPROTO_ROUTING; 12710 nxthdr_ptr = &rt->ip6r_nxt; 12711 12712 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12713 cp += ipp->ipp_rthdrlen; 12714 } 12715 /* 12716 * Do ultimate destination options 12717 */ 12718 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12719 ip6_dest_t *dest = (ip6_dest_t *)cp; 12720 12721 *nxthdr_ptr = IPPROTO_DSTOPTS; 12722 nxthdr_ptr = &dest->ip6d_nxt; 12723 12724 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12725 cp += ipp->ipp_dstoptslen; 12726 } 12727 /* 12728 * Now set the last header pointer to the proto passed in 12729 */ 12730 *nxthdr_ptr = protocol; 12731 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12732 } 12733 12734 /* 12735 * Return a pointer to the routing header extension header 12736 * in the IPv6 header(s) chain passed in. 12737 * If none found, return NULL 12738 * Assumes that all extension headers are in same mblk as the v6 header 12739 */ 12740 ip6_rthdr_t * 12741 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12742 { 12743 ip6_dest_t *desthdr; 12744 ip6_frag_t *fraghdr; 12745 uint_t hdrlen; 12746 uint8_t nexthdr; 12747 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12748 12749 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12750 return ((ip6_rthdr_t *)ptr); 12751 12752 /* 12753 * The routing header will precede all extension headers 12754 * other than the hop-by-hop and destination options 12755 * extension headers, so if we see anything other than those, 12756 * we're done and didn't find it. 12757 * We could see a destination options header alone but no 12758 * routing header, in which case we'll return NULL as soon as 12759 * we see anything after that. 12760 * Hop-by-hop and destination option headers are identical, 12761 * so we can use either one we want as a template. 12762 */ 12763 nexthdr = ip6h->ip6_nxt; 12764 while (ptr < endptr) { 12765 /* Is there enough left for len + nexthdr? */ 12766 if (ptr + MIN_EHDR_LEN > endptr) 12767 return (NULL); 12768 12769 switch (nexthdr) { 12770 case IPPROTO_HOPOPTS: 12771 case IPPROTO_DSTOPTS: 12772 /* Assumes the headers are identical for hbh and dst */ 12773 desthdr = (ip6_dest_t *)ptr; 12774 hdrlen = 8 * (desthdr->ip6d_len + 1); 12775 nexthdr = desthdr->ip6d_nxt; 12776 break; 12777 12778 case IPPROTO_ROUTING: 12779 return ((ip6_rthdr_t *)ptr); 12780 12781 case IPPROTO_FRAGMENT: 12782 fraghdr = (ip6_frag_t *)ptr; 12783 hdrlen = sizeof (ip6_frag_t); 12784 nexthdr = fraghdr->ip6f_nxt; 12785 break; 12786 12787 default: 12788 return (NULL); 12789 } 12790 ptr += hdrlen; 12791 } 12792 return (NULL); 12793 } 12794 12795 /* 12796 * Called for source-routed packets originating on this node. 12797 * Manipulates the original routing header by moving every entry up 12798 * one slot, placing the first entry in the v6 header's v6_dst field, 12799 * and placing the ultimate destination in the routing header's last 12800 * slot. 12801 * 12802 * Returns the checksum diference between the ultimate destination 12803 * (last hop in the routing header when the packet is sent) and 12804 * the first hop (ip6_dst when the packet is sent) 12805 */ 12806 uint32_t 12807 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 12808 { 12809 uint_t numaddr; 12810 uint_t i; 12811 in6_addr_t *addrptr; 12812 in6_addr_t tmp; 12813 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12814 uint32_t cksm; 12815 uint32_t addrsum = 0; 12816 uint16_t *ptr; 12817 12818 /* 12819 * Perform any processing needed for source routing. 12820 * We know that all extension headers will be in the same mblk 12821 * as the IPv6 header. 12822 */ 12823 12824 /* 12825 * If no segments left in header, or the header length field is zero, 12826 * don't move hop addresses around; 12827 * Checksum difference is zero. 12828 */ 12829 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12830 return (0); 12831 12832 ptr = (uint16_t *)&ip6h->ip6_dst; 12833 cksm = 0; 12834 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12835 cksm += ptr[i]; 12836 } 12837 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12838 12839 /* 12840 * Here's where the fun begins - we have to 12841 * move all addresses up one spot, take the 12842 * first hop and make it our first ip6_dst, 12843 * and place the ultimate destination in the 12844 * newly-opened last slot. 12845 */ 12846 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12847 numaddr = rthdr->ip6r0_len / 2; 12848 tmp = *addrptr; 12849 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12850 *addrptr = addrptr[1]; 12851 } 12852 *addrptr = ip6h->ip6_dst; 12853 ip6h->ip6_dst = tmp; 12854 12855 /* 12856 * From the checksummed ultimate destination subtract the checksummed 12857 * current ip6_dst (the first hop address). Return that number. 12858 * (In the v4 case, the second part of this is done in each routine 12859 * that calls ip_massage_options(). We do it all in this one place 12860 * for v6). 12861 */ 12862 ptr = (uint16_t *)&ip6h->ip6_dst; 12863 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12864 addrsum += ptr[i]; 12865 } 12866 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12867 if ((int)cksm < 0) 12868 cksm--; 12869 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12870 12871 return (cksm); 12872 } 12873 12874 /* 12875 * See if the upper-level protocol indicated by 'proto' will be able 12876 * to do something with an ICMP_FRAGMENTATION_NEEDED (IPv4) or 12877 * ICMP6_PACKET_TOO_BIG (IPv6). 12878 */ 12879 static boolean_t 12880 ip_ulp_cando_pkt2big(int proto) 12881 { 12882 /* 12883 * For now, only TCP can handle this. 12884 * Tunnels may be able to also, but since tun isn't working over 12885 * IPv6 yet, don't worry about it for now. 12886 */ 12887 return (proto == IPPROTO_TCP); 12888 } 12889 12890 12891 /* 12892 * Propagate a multicast group membership operation (join/leave) (*fn) on 12893 * all interfaces crossed by the related multirt routes. 12894 * The call is considered successful if the operation succeeds 12895 * on at least one interface. 12896 * The function is called if the destination address in the packet to send 12897 * is multirouted. 12898 */ 12899 int 12900 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12901 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12902 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12903 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12904 { 12905 ire_t *ire_gw; 12906 irb_t *irb; 12907 int index, error = 0; 12908 opt_restart_t *or; 12909 12910 irb = ire->ire_bucket; 12911 ASSERT(irb != NULL); 12912 12913 ASSERT(DB_TYPE(first_mp) == M_CTL); 12914 or = (opt_restart_t *)first_mp->b_rptr; 12915 12916 IRB_REFHOLD(irb); 12917 for (; ire != NULL; ire = ire->ire_next) { 12918 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12919 continue; 12920 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12921 continue; 12922 12923 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12924 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12925 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 12926 /* No resolver exists for the gateway; skip this ire. */ 12927 if (ire_gw == NULL) 12928 continue; 12929 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12930 /* 12931 * A resolver exists: we can get the interface on which we have 12932 * to apply the operation. 12933 */ 12934 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12935 first_mp); 12936 if (error == 0) 12937 or->or_private = CGTP_MCAST_SUCCESS; 12938 12939 if (ip_debug > 0) { 12940 ulong_t off; 12941 char *ksym; 12942 12943 ksym = kobj_getsymname((uintptr_t)fn, &off); 12944 ip2dbg(("ip_multirt_apply_membership_v6: " 12945 "called %s, multirt group 0x%08x via itf 0x%08x, " 12946 "error %d [success %u]\n", 12947 ksym ? ksym : "?", 12948 ntohl(V4_PART_OF_V6((*v6grp))), 12949 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12950 error, or->or_private)); 12951 } 12952 12953 ire_refrele(ire_gw); 12954 if (error == EINPROGRESS) { 12955 IRB_REFRELE(irb); 12956 return (error); 12957 } 12958 } 12959 IRB_REFRELE(irb); 12960 /* 12961 * Consider the call as successful if we succeeded on at least 12962 * one interface. Otherwise, return the last encountered error. 12963 */ 12964 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12965 } 12966 12967 void 12968 ip6_kstat_init(void) 12969 { 12970 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 12971 "net", KSTAT_TYPE_NAMED, 12972 sizeof (ip6_statistics) / sizeof (kstat_named_t), 12973 KSTAT_FLAG_VIRTUAL)) != NULL) { 12974 ip6_kstat->ks_data = &ip6_statistics; 12975 kstat_install(ip6_kstat); 12976 } 12977 } 12978 12979 /* 12980 * The following two functions set and get the value for the 12981 * IPV6_SRC_PREFERENCES socket option. 12982 */ 12983 int 12984 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12985 { 12986 /* 12987 * We only support preferences that are covered by 12988 * IPV6_PREFER_SRC_MASK. 12989 */ 12990 if (prefs & ~IPV6_PREFER_SRC_MASK) 12991 return (EINVAL); 12992 12993 /* 12994 * Look for conflicting preferences or default preferences. If 12995 * both bits of a related pair are clear, the application wants the 12996 * system's default value for that pair. Both bits in a pair can't 12997 * be set. 12998 */ 12999 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13000 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13001 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13002 IPV6_PREFER_SRC_MIPMASK) { 13003 return (EINVAL); 13004 } 13005 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13006 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13007 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13008 IPV6_PREFER_SRC_TMPMASK) { 13009 return (EINVAL); 13010 } 13011 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13012 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13013 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13014 IPV6_PREFER_SRC_CGAMASK) { 13015 return (EINVAL); 13016 } 13017 13018 connp->conn_src_preferences = prefs; 13019 return (0); 13020 } 13021 13022 size_t 13023 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13024 { 13025 *val = connp->conn_src_preferences; 13026 return (sizeof (connp->conn_src_preferences)); 13027 } 13028 13029 int 13030 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13031 { 13032 ill_t *ill; 13033 ire_t *ire; 13034 int error; 13035 13036 /* 13037 * Verify the source address and ifindex. Privileged users can use 13038 * any source address. For ancillary data the source address is 13039 * checked in ip_wput_v6. 13040 */ 13041 if (pkti->ipi6_ifindex != 0) { 13042 ASSERT(connp != NULL); 13043 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13044 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 13045 if (ill == NULL) { 13046 /* 13047 * We just want to know if the interface exists, we 13048 * don't really care about the ill pointer itself. 13049 */ 13050 if (error != EINPROGRESS) 13051 return (error); 13052 error = 0; /* Ensure we don't use it below */ 13053 } else { 13054 ill_refrele(ill); 13055 } 13056 } 13057 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13058 secpolicy_net_rawaccess(cr) != 0) { 13059 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13060 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13061 connp->conn_zoneid, NULL, MATCH_IRE_TYPE); 13062 if (ire != NULL) 13063 ire_refrele(ire); 13064 else 13065 return (ENXIO); 13066 } 13067 return (0); 13068 } 13069 13070 /* 13071 * Get the size of the IP options (including the IP headers size) 13072 * without including the AH header's size. If till_ah is B_FALSE, 13073 * and if AH header is present, dest options beyond AH header will 13074 * also be included in the returned size. 13075 */ 13076 int 13077 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13078 { 13079 ip6_t *ip6h; 13080 uint8_t nexthdr; 13081 uint8_t *whereptr; 13082 ip6_hbh_t *hbhhdr; 13083 ip6_dest_t *dsthdr; 13084 ip6_rthdr_t *rthdr; 13085 int ehdrlen; 13086 int size; 13087 ah_t *ah; 13088 13089 ip6h = (ip6_t *)mp->b_rptr; 13090 size = IPV6_HDR_LEN; 13091 nexthdr = ip6h->ip6_nxt; 13092 whereptr = (uint8_t *)&ip6h[1]; 13093 for (;;) { 13094 /* Assume IP has already stripped it */ 13095 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13096 switch (nexthdr) { 13097 case IPPROTO_HOPOPTS: 13098 hbhhdr = (ip6_hbh_t *)whereptr; 13099 nexthdr = hbhhdr->ip6h_nxt; 13100 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13101 break; 13102 case IPPROTO_DSTOPTS: 13103 dsthdr = (ip6_dest_t *)whereptr; 13104 nexthdr = dsthdr->ip6d_nxt; 13105 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13106 break; 13107 case IPPROTO_ROUTING: 13108 rthdr = (ip6_rthdr_t *)whereptr; 13109 nexthdr = rthdr->ip6r_nxt; 13110 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13111 break; 13112 default : 13113 if (till_ah) { 13114 ASSERT(nexthdr == IPPROTO_AH); 13115 return (size); 13116 } 13117 /* 13118 * If we don't have a AH header to traverse, 13119 * return now. This happens normally for 13120 * outbound datagrams where we have not inserted 13121 * the AH header. 13122 */ 13123 if (nexthdr != IPPROTO_AH) { 13124 return (size); 13125 } 13126 13127 /* 13128 * We don't include the AH header's size 13129 * to be symmetrical with other cases where 13130 * we either don't have a AH header (outbound) 13131 * or peek into the AH header yet (inbound and 13132 * not pulled up yet). 13133 */ 13134 ah = (ah_t *)whereptr; 13135 nexthdr = ah->ah_nexthdr; 13136 ehdrlen = (ah->ah_length << 2) + 8; 13137 13138 if (nexthdr == IPPROTO_DSTOPTS) { 13139 if (whereptr + ehdrlen >= mp->b_wptr) { 13140 /* 13141 * The destination options header 13142 * is not part of the first mblk. 13143 */ 13144 whereptr = mp->b_cont->b_rptr; 13145 } else { 13146 whereptr += ehdrlen; 13147 } 13148 13149 dsthdr = (ip6_dest_t *)whereptr; 13150 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13151 size += ehdrlen; 13152 } 13153 return (size); 13154 } 13155 whereptr += ehdrlen; 13156 size += ehdrlen; 13157 } 13158 } 13159